/* * OpenPBS (Portable Batch System) v2.3 Software License * * Copyright (c) 1999-2000 Veridian Information Solutions, Inc. * All rights reserved. * * --------------------------------------------------------------------------- * For a license to use or redistribute the OpenPBS software under conditions * other than those described below, or to purchase support for this software, * please contact Veridian Systems, PBS Products Department ("Licensor") at: * * www.OpenPBS.org +1 650 967-4675 sales@OpenPBS.org * 877 902-4PBS (US toll-free) * --------------------------------------------------------------------------- * * This license covers use of the OpenPBS v2.3 software (the "Software") at * your site or location, and, for certain users, redistribution of the * Software to other sites and locations. Use and redistribution of * OpenPBS v2.3 in source and binary forms, with or without modification, * are permitted provided that all of the following conditions are met. * After December 31, 2001, only conditions 3-6 must be met: * * 1. Commercial and/or non-commercial use of the Software is permitted * provided a current software registration is on file at www.OpenPBS.org. * If use of this software contributes to a publication, product, or * service, proper attribution must be given; see www.OpenPBS.org/credit.html * * 2. Redistribution in any form is only permitted for non-commercial, * non-profit purposes. There can be no charge for the Software or any * software incorporating the Software. Further, there can be no * expectation of revenue generated as a consequence of redistributing * the Software. * * 3. Any Redistribution of source code must retain the above copyright notice * and the acknowledgment contained in paragraph 6, this list of conditions * and the disclaimer contained in paragraph 7. * * 4. Any Redistribution in binary form must reproduce the above copyright * notice and the acknowledgment contained in paragraph 6, this list of * conditions and the disclaimer contained in paragraph 7 in the * documentation and/or other materials provided with the distribution. * * 5. Redistributions in any form must be accompanied by information on how to * obtain complete source code for the OpenPBS software and any * modifications and/or additions to the OpenPBS software. The source code * must either be included in the distribution or be available for no more * than the cost of distribution plus a nominal fee, and all modifications * and additions to the Software must be freely redistributable by any party * (including Licensor) without restriction. * * 6. All advertising materials mentioning features or use of the Software must * display the following acknowledgment: * * "This product includes software developed by NASA Ames Research Center, * Lawrence Livermore National Laboratory, and Veridian Information * Solutions, Inc. * Visit www.OpenPBS.org for OpenPBS software support, * products, and information." * * 7. DISCLAIMER OF WARRANTY * * THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT * ARE EXPRESSLY DISCLAIMED. * * IN NO EVENT SHALL VERIDIAN CORPORATION, ITS AFFILIATED COMPANIES, OR THE * U.S. GOVERNMENT OR ANY OF ITS AGENCIES BE LIABLE FOR ANY DIRECT OR INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * This license will be governed by the laws of the Commonwealth of Virginia, * without reference to its choice of law rules. */ #include /* the master config generated by configure */ #include #include #include #include #include #include #include #include #include #include "portability.h" #include "server_limits.h" #include "net_connect.h" #include "mcom.h" #include "log.h" #include #include #include #ifdef _AIX #include #endif #ifdef __APPLE__ /* this is a hack for the missing bindresvport declaration on OS X the function works fine but its use will generate a compiler warning if -Wall is used with gcc */ int bindresvport(int sd, struct sockaddr_in *sin); #endif /** * Returns the max number of possible file descriptors (as * per the OS limits). * */ int get_max_num_descriptors(void) { static int max_num_descriptors = 0; if (max_num_descriptors <= 0) max_num_descriptors = getdtablesize(); return(max_num_descriptors); } /* END get_num_max_descriptors() */ /** * Returns the number of bytes needed to allocate * a fd_set array that can hold all of the possible * socket descriptors. */ int get_fdset_size(void) { unsigned int MaxNumDescriptors = 0; int NumFDSetsNeeded = 0; int NumBytesInFDSet = 0; int Result = 0; MaxNumDescriptors = get_max_num_descriptors(); NumBytesInFDSet = sizeof(fd_set); NumFDSetsNeeded = MaxNumDescriptors / FD_SETSIZE; if (MaxNumDescriptors < FD_SETSIZE) { /* the default size already provides sufficient space */ Result = NumBytesInFDSet; } else if ((MaxNumDescriptors % FD_SETSIZE) > 0) { /* we need to allocate more memory to cover extra * bits--add an extra FDSet worth of memory to the size */ Result = (NumFDSetsNeeded + 1) * NumBytesInFDSet; } else { /* division was exact--we know exactly how many bytes we need */ Result = NumFDSetsNeeded * NumBytesInFDSet; } return(Result); } /* END get_fdset_size() */ /* ** wait for connect to complete. We use non-blocking sockets, ** so have to wait for completion this way. */ static int await_connect( long timeout, /* I */ int sockd) /* I */ { int n; int val; int rc; fd_set *BigFDSet = NULL; struct timeval tv; torque_socklen_t len; /* * some operating systems (like FreeBSD) cannot have a value for tv.tv_usec * larger than 1,000,000 so we need to split up the timeout duration between * seconds and microseconds */ tv.tv_sec = timeout / 1000000; tv.tv_usec = timeout % 1000000; /* calculate needed size for fd_set in select() */ BigFDSet = (fd_set *)calloc(1,sizeof(char) * get_fdset_size()); if (!BigFDSet) { log_err(ENOMEM,__func__,"Could not allocate memory to set file descriptor"); return -1; } FD_SET(sockd, BigFDSet); if ((n = select(sockd+1,0,BigFDSet,0,&tv)) != 1) { /* FAILURE: socket not ready for write */ free(BigFDSet); return(-1); } len = sizeof(val); rc = getsockopt(sockd, SOL_SOCKET, SO_ERROR, &val, &len); if ((rc == 0) && (val == 0)) { /* SUCCESS: no failures detected */ free(BigFDSet); return(0); } errno = val; /* FAILURE: socket error detected */ free(BigFDSet); return(-1); } /* END await_connect() */ /* global */ long MaxConnectTimeout = 5000000; /* in microseconds */ /* * client_to_svr - connect to a server * * Perform socket/tcp/ip stuff to connect to a server. * * Returns: >=0 the socket obtained, or * PBS_NET_RC_FATAL (-1) if fatal error, just quit, or * PBS_NET_RC_RETRY (-2) if temp error, should retry * * NOTE: the server's host address and port were chosen as parameters * rather than their names to possibly save extra look-ups. It seems likely * that the caller "might" make several calls to the same host or different * hosts with the same port. Let the caller keep the addresses around * rather than look it up each time. * * NOTE: will wait up to MaxConnectTimeout microseconds for transient network failures */ /* NOTE: create new connection on reserved port to validate root/trusted authority */ int client_to_svr( pbs_net_t hostaddr, /* I - internet addr of host */ unsigned int port, /* I - port to which to connect */ int local_port, /* I - BOOLEAN: not 0 to use local reserved port */ char *EMsg) /* O (optional,minsize=1024) */ { const char id[] = "client_to_svr"; struct sockaddr_in local; struct sockaddr_in remote; int sock; unsigned short tryport = 777; #ifndef NOPRIVPORTS int errorsock; int bind_retry; int flags; #endif int one = 1; int trycount = 0; struct timespec rem; #define STARTPORT 144 #define ENDPORT (IPPORT_RESERVED - 1) #define NPORTS (ENDPORT - STARTPORT + 1) #define SHUFFLE_COUNT 3 if (EMsg != NULL) EMsg[0] = '\0'; errno = 0; /* In case we can't connect go to sleep for 1 millisecond and try again */ rem.tv_sec = 0; rem.tv_nsec = 1000000; memset(&local, 0, sizeof(local)); memset(&remote, 0, sizeof(remote)); local.sin_family = AF_INET; local.sin_addr.s_addr = 0; local.sin_port = 0; retry: /* retry goto added (rentec) */ /* get socket */ sock = socket(AF_INET, SOCK_STREAM, 0); if (sock < 0) { if (EMsg != NULL) sprintf(EMsg, "cannot create socket in %s - errno: %d %s", id, errno, strerror(errno)); return(PBS_NET_RC_FATAL); } if (sock >= PBS_NET_MAX_CONNECTIONS) { if (EMsg != NULL) sprintf(EMsg, "PBS_NET_MAX_CONNECTIONS exceeded in %s", id); close(sock); /* too many connections */ return(PBS_NET_RC_RETRY); } #ifndef NOPRIVPORTS flags = fcntl(sock, F_GETFL); flags |= O_NONBLOCK; fcntl(sock, F_SETFL, flags); #endif /* !NOPRIVPORTS */ /* If local privilege port requested, bind to one */ /* must be root privileged to do this */ if (local_port != FALSE) { /* set REUSEADDR (rentec) */ setsockopt( sock, SOL_SOCKET, SO_REUSEADDR, (void *)&one, sizeof(one)); #ifndef NOPRIVPORTS if (trycount < SHUFFLE_COUNT) { errorsock = -1; #ifdef HAVE_BINDRESVPORT /* * bindresvport seems to cause connect() failures in some odd corner case when * talking to a local daemon. So we'll only try this once and fallback to * the slow loop around bind() if connect() fails with EADDRINUSE * or EADDRNOTAVAIL. * http://www.supercluster.org/pipermail/torqueusers/2006-June/003740.html */ for (bind_retry = 0; bind_retry < 3; bind_retry++) { errorsock = bindresvport(sock, &local); if (errorsock == 0) break; usleep(1000); } if (errorsock != 0) { /* bindresvport could not get a privileged port */ if (EMsg != NULL) sprintf(EMsg, "cannot bind to reserved port in %s - errno: %d %s", id, errno, strerror(errno)); close(sock); return(PBS_NET_RC_FATAL); } tryport = ntohs(local.sin_port); goto jump_to_check; #else /* HAVE_BINDRESVPORT */ /* Pseudo-casual shuffling of tryport */ tryport = (rand() % NPORTS) + STARTPORT; #endif /* HAVE_BINDRESVPORT */ } else { /* A simple port search after SHUFFLE_COUNT shuffling */ if (tryport > ENDPORT) { tryport = STARTPORT; } } retry_bind: local.sin_port = htons(tryport); errorsock = bind(sock, (struct sockaddr *)&local, sizeof(local)); #ifdef HAVE_BINDRESVPORT jump_to_check: #endif /* HAVE_BINDRESVPORT */ if (errorsock < 0) { #ifdef NDEBUG2 fprintf(stderr, "INFO: cannot bind to port %d, errno: %d - %s\n", tryport, errno, strerror(errno)); #endif /* NDEBUG2 */ /* Terminate on errors, except "address already in use" */ if ((errno == EADDRINUSE) || (errno == EINVAL) || (errno == EADDRNOTAVAIL)) { if (tryport++ < ENDPORT) goto retry_bind; } if (EMsg != NULL) sprintf(EMsg, "cannot bind to reserved port in %s - errno: %d %s", id, errno, strerror(errno)); close(sock); return(PBS_NET_RC_RETRY); } #endif /* !NOPRIVPORTS */ } /* END if (local_port != FALSE) */ /* bind successful!!! */ /* connect to specified server host and port */ remote.sin_addr.s_addr = htonl(hostaddr); remote.sin_port = htons((unsigned short)port); remote.sin_family = AF_INET; if (connect(sock, (struct sockaddr *)&remote, sizeof(remote)) >= 0) { /* SUCCESS */ return(sock); } /* process failure */ if (errno == EINPROGRESS) if (await_connect(MaxConnectTimeout, sock) == 0) { return(sock); } #ifdef NDEBUG2 fprintf(stderr, "INFO: cannot connect to port %d, errno=%d - %s\n", tryport, errno, strerror(errno)); #endif /* NDEBUG2 */ switch (errno) { case ECONNREFUSED: /* Connection refused */ if (EMsg != NULL) sprintf(EMsg, "cannot connect to port %d in %s - connection refused.\n Check if trqauthd should be running\n", tryport, id); close(sock); return(PBS_NET_RC_RETRY); /*NOTREACHED*/ case EINPROGRESS: /* Operation now in progress */ case EALREADY: /* Operation already in progress */ case EISCONN: /* Transport endpoint is already connected */ case ETIMEDOUT: /* Connection timed out */ case EAGAIN: /* Operation would block */ case EINTR: /* Interrupted system call */ if (await_connect(MaxConnectTimeout, sock) == 0) { /* socket not ready for writing after MaxConnectTimeout microseconds timeout */ /* no network failures detected */ break; }/* Interrupted system call */ /* fall through to next case */ case EINVAL: /* Invalid argument */ case EADDRINUSE: /* Address already in use */ case EADDRNOTAVAIL: /* Cannot assign requested address */ /* TCP is not ready for us. Sleep for a millisecond and see if that will change anything before the next retry */ nanosleep(&rem,&rem); if (local_port != FALSE) { if (trycount++ > (NPORTS)) { close(sock); return(PBS_NET_RC_RETRY); } #if TCP_RETRY_LIMIT != 0 else if (trycount > TCP_RETRY_LIMIT) { if (EMsg != NULL) sprintf(EMsg, "cannot connect to port %d in %s - errno:%d %s", tryport, id, errno, strerror(errno)); close(sock); return(PBS_NET_RC_FATAL); } #endif /* def TCP_RETRY_LIMIT */ /* continue port search (rentec) */ tryport++; close(sock); goto retry; } default: if (EMsg != NULL) sprintf(EMsg, "cannot connect to port %d in %s - errno:%d %s", tryport, id, errno, strerror(errno)); close(sock); return(PBS_NET_RC_FATAL); } /* END switch (errno) */ return(sock); } /* END client_to_svr() */ /* END net_client.c */