/* $FreeBSD$ */ /* * rpcclnt.c * * Copyright (c) 2004 The Regents of the University of Michigan. * All rights reserved. * * Copyright (c) 2004 Weston Andros Adamson . * Copyright (c) 2004 Marius Aamodt Eriksen . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $Id: rpcclnt.c,v 1.13 2004/06/14 19:38:56 marius Exp $ */ /* * Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of * California. All rights reserved. * * This code is derived from software contributed to Berkeley by Rick Macklem at * The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. 2. * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. 3. All advertising * materials mentioning features or use of this software must display the * following acknowledgement: This product includes software developed by the * University of California, Berkeley and its contributors. 4. Neither the * name of the University nor the names of its contributors may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* For proc_rwmem() */ #if defined(__APPLE__) #include #include #endif #if defined(__FreeBSD__) #include #endif #include #include #include #include #include #include #if defined(__FreeBSD__) #include #endif #if defined(__OpenBSD__) #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* memory management */ #ifdef __OpenBSD__ struct pool rpctask_pool; struct pool rpcclnt_pool; #define RPCTASKPOOL_LWM 10 #define RPCTASKPOOL_HWM 40 #elif defined(__FreeBSD__) static MALLOC_DEFINE(M_RPC, "rpcclnt", "rpc state"); #endif #define RPC_RETURN(X) do { RPCDEBUG("returning %d", X); return X; }while(0) /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean * and mean deviation of rtt for the appropriate type of rpc for the frequent * rpcs and a default for the others. The justification for doing "other" * this way is that these rpcs happen so infrequently that timer est. would * probably be stale. Also, since many of these rpcs are non-idempotent, a * conservative timeout is desired. getattr, lookup - A+2D read, write - * A+4D other - nm_timeo */ #define RPC_RTO(n, t) \ ((t) == 0 ? (n)->rc_timeo : \ ((t) < 3 ? \ (((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \ ((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1))) #define RPC_SRTT(s,r) (r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\ (r)->r_procnum) - 1] #define RPC_SDRTT(s,r) (r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\ (r)->r_procnum) - 1] /* * There is a congestion window for outstanding rpcs maintained per mount * point. The cwnd size is adjusted in roughly the way that: Van Jacobson, * Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM, * August 1988. describes for TCP. The cwnd size is chopped in half on a * retransmit timeout and incremented by 1/cwnd when each rpc reply is * received and a full cwnd of rpcs is in progress. (The sent count and cwnd * are scaled for integer arith.) Variants of "slow start" were tried and * were found to be too much of a performance hit (ave. rtt 3 times larger), * I suspect due to the large rtt that nfs rpcs have. */ #define RPC_CWNDSCALE 256 #define RPC_MAXCWND (RPC_CWNDSCALE * 32) static int rpcclnt_backoff[8] = {2, 4, 8, 16, 32, 64, 128, 256,}; #if 0 #define RPC_ERRSTR_ACCEPTED_SIZE 6 char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] = { "", /* no good message... */ "remote server hasn't exported program.", "remote server can't support version number.", "program can't support procedure.", "procedure can't decode params.", "remote error. remote side memory allocation failure?" }; char *rpc_errstr_denied[2] = { "remote server doesnt support rpc version 2!", "remote server authentication error." }; #define RPC_ERRSTR_AUTH_SIZE 6 char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] = { "", "auth error: bad credential (seal broken).", "auth error: client must begin new session.", "auth error: bad verifier (seal broken).", "auth error: verifier expired or replayed.", "auth error: rejected for security reasons.", }; #endif /* * Static data, mostly RPC constants in XDR form */ static u_int32_t rpc_reply, rpc_call, rpc_vers; /* * rpc_msgdenied, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, * rpc_autherr, rpc_auth_kerb; */ static u_int32_t rpcclnt_xid = 0; static u_int32_t rpcclnt_xid_touched = 0; struct rpcstats rpcstats; int rpcclnt_ticks; SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RD, 0, "RPC Subsystem"); SYSCTL_UINT(_kern_rpc, OID_AUTO, retries, CTLFLAG_RD, &rpcstats.rpcretries, 0, "retries"); SYSCTL_UINT(_kern_rpc, OID_AUTO, request, CTLFLAG_RD, &rpcstats.rpcrequests, 0, "request"); SYSCTL_UINT(_kern_rpc, OID_AUTO, timeouts, CTLFLAG_RD, &rpcstats.rpctimeouts, 0, "timeouts"); SYSCTL_UINT(_kern_rpc, OID_AUTO, unexpected, CTLFLAG_RD, &rpcstats.rpcunexpected, 0, "unexpected"); SYSCTL_UINT(_kern_rpc, OID_AUTO, invalid, CTLFLAG_RD, &rpcstats.rpcinvalid, 0, "invalid"); #ifdef RPCCLNT_DEBUG int rpcdebugon = 0; SYSCTL_UINT(_kern_rpc, OID_AUTO, debug_on, CTLFLAG_RW, &rpcdebugon, 0, "RPC Debug messages"); #endif /* * Queue head for rpctask's */ static TAILQ_HEAD(, rpctask) rpctask_q; #if defined(__FreeBSD__) struct callout_handle rpcclnt_timer_handle; #endif #if defined(__OpenBSD__) static int rpcclnt_send(struct socket *, struct mbuf *, struct mbuf *, struct rpctask *); static int rpcclnt_receive(struct rpctask *, struct mbuf **, struct mbuf **, cthread_t *); #else static int rpcclnt_send(struct socket *, struct sockaddr *, struct mbuf *, struct rpctask *); static int rpcclnt_receive(struct rpctask *, struct sockaddr **, struct mbuf **, cthread_t *); #endif static int rpcclnt_msg(cthread_t *, const char *, char *); static int rpcclnt_reply(struct rpctask *, cthread_t *); static void rpcclnt_timer(void *); static int rpcclnt_sndlock(int *, struct rpctask *); static void rpcclnt_sndunlock(int *); static int rpcclnt_rcvlock(struct rpctask *); static void rpcclnt_rcvunlock(int *); static void rpcclnt_realign(struct mbuf **, int); static int rpc_adv(struct mbuf **, caddr_t *, int, int); static int rpcm_disct(struct mbuf **, caddr_t *, int, int, caddr_t *); static void rpcclnt_softterm(struct rpctask * task); static int rpcclnt_sigintr(struct rpcclnt *, struct rpctask *, cthread_t *); static u_int32_t rpcclnt_proct(struct rpcclnt *, u_int32_t); static struct mbuf *rpcclnt_buildheader(struct rpcclnt *, int, struct mbuf *, int *, struct mbuf **, struct ucred *); static int rpcclnt_request_redirect(struct rpcclnt *rpc, struct mbuf *mrest, int procnum, cthread_t *caller_td, struct ucred *cred, struct rpc_reply *reply); static int _uiotombuf(struct uio *, struct mbuf **, int, cthread_t *); #ifdef ULTRADEBUG static void print_hexl(const u_char *, int, int); #endif #ifdef __APPLE__ static void rpcclnt_timer_funnel(void *); #endif void rpcclnt_init(void) { #if defined(__OpenBSD__) static struct timeout rpcclnt_timer_to; #endif rpcclnt_ticks = (hz * RPC_TICKINTVL + 500) / 1000; if (rpcclnt_ticks < 1) rpcclnt_ticks = 1; rpcstats.rpcretries = 0; rpcstats.rpcrequests = 0; rpcstats.rpctimeouts = 0; rpcstats.rpcunexpected = 0; rpcstats.rpcinvalid = 0; /* * rpc constants how about actually using more than one of these! */ rpc_reply = txdr_unsigned(RPC_REPLY); rpc_vers = txdr_unsigned(RPC_VER2); rpc_call = txdr_unsigned(RPC_CALL); #if 0 rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); rpc_mismatch = txdr_unsigned(RPC_MISMATCH); rpc_autherr = txdr_unsigned(RPC_AUTHERR); rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); #endif /* initialize rpctask queue */ TAILQ_INIT(&rpctask_q); #if defined(__OpenBSD__) /* initialize pools */ pool_init(&rpctask_pool, sizeof(struct rpctask), 0, 0, RPCTASKPOOL_LWM, "rpctask_p", NULL); pool_setlowat(&rpctask_pool, RPCTASKPOOL_LWM); pool_sethiwat(&rpctask_pool, RPCTASKPOOL_HWM); pool_init(&rpcclnt_pool, sizeof(struct rpcclnt), 0, 0, 1, "rpcclnt_p", NULL); #endif #if defined(__OpenBSD__) /* initialize timers */ timeout_set(&rpcclnt_timer_to, rpcclnt_timer, &rpcclnt_timer_to); rpcclnt_timer(&rpcclnt_timer_to); #elif defined(__APPLE__) rpcclnt_timer_funnel(NULL); #else rpcclnt_timer(NULL); #endif rpcdev_init(); RPCDEBUG("rpc initialed"); return; } void rpcclnt_uninit(void) { RPCDEBUG("uninit"); #if defined(__FreeBSD__) untimeout(rpcclnt_timer, (void *)NULL, rpcclnt_timer_handle); #elif defined(__APPLE__) untimeout(rpcclnt_timer_funnel, (void *)NULL); #endif /* XXX delete sysctl variables */ rpcdev_uninit(); } /* * Initialize sockets and congestion for a new RPC connection. We do not free * the sockaddr if error. */ int rpcclnt_connect(struct rpcclnt *rpc, cthread_t *td) { struct socket *so; int s, error, rcvreserve, sndreserve; struct sockaddr *saddr; #if defined(__FreeBSD__) GIANT_REQUIRED; #elif defined(__APPLE__) thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); #endif /* * XXXREDIRECT: perhaps we should actually ping the * destination here. */ if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT)) return (0); /* create the socket */ rpc->rc_so = NULL; saddr = rpc->rc_name; error = SOCREATE(saddr->sa_family, &rpc->rc_so, rpc->rc_sotype, rpc->rc_soproto, td); if (error != 0) { RPCDEBUG("error %d in socreate()", error); RPC_RETURN(error); } so = rpc->rc_so; rpc->rc_soflags = so->so_proto->pr_flags; /* * Some servers require that the client port be a reserved port * number. We always allocate a reserved port, as this prevents * filehandle disclosure through UDP port capture. */ if (saddr->sa_family == AF_INET) { SOSETOPT(error, so, IPPROTO_IP, IP_PORTRANGE, int, IP_PORTRANGE_LOW); if (error) goto bad; SOBIND(error, so, AF_INET, INADDR_ANY, 0, td); if (error) goto bad; SOSETOPT(error, so, IPPROTO_IP, IP_PORTRANGE, int, IP_PORTRANGE_DEFAULT); if (error) goto bad; } /* * Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than * NFS_PORT. */ if (ISSET(rpc->rc_flag, RPCCLNT_NOCONN)) { if (ISSET(rpc->rc_soflags, PR_CONNREQUIRED)) { error = ENOTCONN; goto bad; } } else { error = SOCONNECT(so, saddr, td); if (error) { RPCDEBUG("soconnect returns %d", error); goto bad; } /* * Wait for the connection to complete. Cribbed from the * connect system call but with the wait timing out so that * interruptible mounts don't hang here for a long time. */ s = SPLNET(); while (ISSET(so->so_state, SS_ISCONNECTING) && so->so_error == 0) { (void)tsleep((caddr_t) & so->so_timeo, PSOCK, "rpc", 2 * hz); /* * XXX needs to catch interrupt signals. something * like this: if ((so->so_state & SS_ISCONNECTING) && * so->so_error == 0 && rep && (error = * nfs_sigintr(nmp, rep, rep->r_td)) != 0) { * so->so_state &= ~SS_ISCONNECTING; splx(s); goto * bad; } */ } if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); goto bad; } splx(s); } if (rpc->rc_flag & (RPCCLNT_SOFT | RPCCLNT_INT)) { so->so_rcv.sb_timeo = (5 * hz); so->so_snd.sb_timeo = (5 * hz); } else { so->so_rcv.sb_timeo = 0; so->so_snd.sb_timeo = 0; } if (rpc->rc_sotype == SOCK_DGRAM) { sndreserve = rpc->rc_wsize + RPC_MAXPKTHDR; rcvreserve = rpc->rc_rsize + RPC_MAXPKTHDR; } else if (rpc->rc_sotype == SOCK_SEQPACKET) { sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR) * 2; rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR) * 2; } else { if (rpc->rc_sotype != SOCK_STREAM) panic("rpcclnt_connect() bad sotype"); if (so->so_proto->pr_flags & PR_CONNREQUIRED) { SOSETOPT(error, so, SOL_SOCKET, SO_KEEPALIVE, int, 1); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { SOSETOPT(error, so, IPPROTO_TCP, TCP_NODELAY, int, 1); if (error) goto bad; } sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR + sizeof(u_int32_t)) * 2; rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR + sizeof(u_int32_t)) * 2; } error = soreserve(so, sndreserve, rcvreserve); if (error) goto bad; so->so_rcv.sb_flags |= SB_NOINTR; so->so_snd.sb_flags |= SB_NOINTR; #ifdef __APPLE__ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif /* Initialize other non-zero congestion variables */ rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] = rpc->rc_srtt[3] = rpc->rc_srtt[4] = (RPC_TIMEO << 3); rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] = rpc->rc_sdrtt[3] = rpc->rc_sdrtt[4] = 0; rpc->rc_cwnd = RPC_MAXCWND / 2; /* Initial send window */ rpc->rc_sent = 0; rpc->rc_timeouts = 0; /* XXX - pass in pseudflavor from mount options. */ error = rpcauth_create(rpc, RPCAUTH_PFLAVOR_KERB5); if (error != 0) goto bad; RPC_RETURN(0); bad: #ifdef __APPLE__ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif rpcclnt_disconnect(rpc); RPC_RETURN(error); } /* * Reconnect routine: Called when a connection is broken on a reliable * protocol. - clean up the old socket - nfs_connect() again - set * TASK_MUSTRESEND for all outstanding requests on mount point If this * fails the mount point is DEAD! nb: Must be called with the * nfs_sndlock() set on the mount point. */ int rpcclnt_reconnect(struct rpctask *rep, cthread_t *td) { struct rpctask *rp; struct rpcclnt *rpc = rep->r_rpcclnt; int error; if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT)) panic("rpcclnt_reconnect() called while in redirecting requests"); rpcclnt_disconnect(rpc); while ((error = rpcclnt_connect(rpc, td)) != 0) { if (error == EINTR || error == ERESTART) RPC_RETURN(EINTR); tsleep(&lbolt, PSOCK, "rpccon", 0); } /* * Loop through outstanding request list and fix up all * requests on old socket. */ for (rp = TAILQ_FIRST(&rpctask_q); rp != NULL; rp = TAILQ_NEXT(rp, r_chain)) if (rp->r_rpcclnt == rpc) SET(rp->r_flags, TASK_MUSTRESEND); RPC_RETURN(0); } void rpcclnt_disconnect(struct rpcclnt *rpc) { struct socket *so; if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT)) return; #if defined(__FreeBSD__) GIANT_REQUIRED; /* XXX until socket locking done */ #elif defined(__APPLE__) thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); #endif if (rpc->rc_so != NULL) { so = rpc->rc_so; rpc->rc_so = NULL; soshutdown(so, 2); soclose(so); } #ifdef __APPLE__ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif } void rpcclnt_safedisconnect(struct rpcclnt *rpc) { struct rpctask dummytask; if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT)) return; bzero(&dummytask, sizeof(dummytask)); dummytask.r_rpcclnt = rpc; rpcclnt_rcvlock(&dummytask); rpcclnt_disconnect(rpc); rpcclnt_rcvunlock(&rpc->rc_flag); } /* * This is the nfs send routine. For connection based socket types, it must * be called with an nfs_sndlock() on the socket. "rep == NULL" indicates * that it has been called from a server. For the client side: - return EINTR * if the RPC is terminated, 0 otherwise - set TASK_MUSTRESEND if the send fails * for any reason - do any cleanup required by recoverable socket errors * (???) For the server side: - return EINTR or ERESTART if interrupted by a * signal - return EPIPE if a connection is lost for connection based sockets * (TCP...) - do any cleanup required by recoverable socket errors (???) */ static int rpcclnt_send(struct socket *so, #if defined(__OpenBSD__) struct mbuf *nam, #else struct sockaddr *nam, #endif struct mbuf *top, struct rpctask *rep) { #if defined(__OpenBSD__) struct mbuf *sendnam; #elif defined(__APPLE__) struct sockaddr *sendnam; #else struct sockaddr *sendnam; struct thread *td = curthread; #endif int error, soflags, flags; #if defined(__FreeBSD__) GIANT_REQUIRED; /* XXX until socket locking done */ #endif if (rep != NULL) { if (ISSET(rep->r_flags, TASK_SOFTTERM)) { m_freem(top); RPC_RETURN(EINTR); } if ((so = rep->r_rpcclnt->rc_so) == NULL) { SET(rep->r_flags, TASK_MUSTRESEND); m_freem(top); RPC_RETURN(0); } CLR(rep->r_flags, TASK_MUSTRESEND); soflags = rep->r_rpcclnt->rc_soflags; } else soflags = so->so_proto->pr_flags; if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) sendnam = NULL; else sendnam = nam; if (so->so_type == SOCK_SEQPACKET) flags = MSG_EOR; else flags = 0; #ifdef __APPLE__ thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); #endif error = SOSEND(so, sendnam, NULL, top, NULL, flags, td); #ifdef __APPLE__ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif if (error != 0) { if (rep != NULL) { log(LOG_INFO, "rpc send error %d for service %s\n", error, rep->r_rpcclnt->rc_prog->prog_name); /* * Deal with errors for the client side. */ if (ISSET(rep->r_flags, TASK_SOFTTERM)) error = EINTR; else SET(rep->r_flags, TASK_MUSTRESEND); } else log(LOG_INFO, "rpc service send error %d\n", error); /* * Handle any recoverable (soft) socket errors here. */ if (error != EINTR && error != ERESTART && error != EWOULDBLOCK && error != EPIPE) error = 0; } RPC_RETURN(error); } /* * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all * done by soreceive(), but for SOCK_STREAM we must deal with the * Record Mark and consolidate the data into a new mbuf list. nb: * Sometimes TCP passes the data up to soreceive() in long lists of * small mbufs. For SOCK_STREAM we must be very careful to read an * entire record once we have read any of it, even if the system call * has been interrupted. */ static int rpcclnt_receive(struct rpctask *rep, #if defined(__OpenBSD__) struct mbuf **aname, #else struct sockaddr **aname, #endif struct mbuf **mp, cthread_t *td) { struct socket *so; struct uio auio; struct iovec aio; struct mbuf *m; struct mbuf *control; u_int32_t len; #if defined(__OpenBSD__) struct mbuf **getnam; #else struct sockaddr **getnam; #endif int error, sotype, rcvflg; #if defined(__FreeBSD__) GIANT_REQUIRED; /* XXX until socket locking done */ #endif /* * Set up arguments for soreceive() */ *mp = NULL; *aname = NULL; sotype = rep->r_rpcclnt->rc_sotype; /* * For reliable protocols, lock against other senders/receivers in * case a reconnect is necessary. For SOCK_STREAM, first get the * Record Mark to find out how much more there is to get. We must * lock the socket against other receivers until we have an entire * rpc request/reply. */ if (sotype != SOCK_DGRAM) { error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep); if (error != 0) RPC_RETURN(error); tryagain: /* * Check for fatal errors and resending request. */ /* * Ugh: If a reconnect attempt just happened, rc_so would * have changed. NULL indicates a failed attempt that has * essentially shut down this mount point. */ if (rep->r_mrep || ISSET(rep->r_flags, TASK_SOFTTERM)) { rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); RPC_RETURN(EINTR); } so = rep->r_rpcclnt->rc_so; if (so == NULL) { error = rpcclnt_reconnect(rep, td); if (error) { rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); RPC_RETURN(error); } goto tryagain; } while (ISSET(rep->r_flags, TASK_MUSTRESEND)) { m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT); rpcstats.rpcretries++; error = rpcclnt_send(so, rep->r_rpcclnt->rc_name, m, rep); if (error) { if (error == EINTR || error == ERESTART || (error = rpcclnt_reconnect(rep, td)) != 0) { rpcclnt_sndunlock( &rep->r_rpcclnt->rc_flag); RPC_RETURN(error); } goto tryagain; } } rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag); if (sotype == SOCK_STREAM) { aio.iov_base = (caddr_t) & len; aio.iov_len = sizeof(u_int32_t); auio.uio_iov = &aio; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_offset = 0; auio.uio_resid = sizeof(u_int32_t); #if defined(__OpenBSD__) || defined(__APPLE__) auio.uio_procp = td; #else auio.uio_td = td; #endif do { rcvflg = MSG_WAITALL; #ifdef __APPLE__ thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); #endif error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg); #ifdef __APPLE__ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif if (error == EWOULDBLOCK && rep && ISSET(rep->r_flags, TASK_SOFTTERM)) RPC_RETURN(EINTR); } while (error == EWOULDBLOCK); if (error == 0 && auio.uio_resid > 0) { log(LOG_INFO, "short receive (%zu/%zu) from rpc server %s\n", sizeof(u_int32_t) - auio.uio_resid, sizeof(u_int32_t), rep->r_rpcclnt->rc_prog->prog_name); error = EPIPE; } if (error != 0) goto errout; len = ntohl(len) & ~0x80000000; /* * This is SERIOUS! We are out of sync with the * sender and forcing a disconnect/reconnect is all I * can do. */ if (len > RPC_MAXPACKET) { log(LOG_ERR, "%s (%d) from rpc server %s\n", "impossible packet length", len, rep->r_rpcclnt->rc_prog->prog_name); error = EFBIG; goto errout; } auio.uio_resid = len; #ifdef __APPLE__ thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); #endif do { rcvflg = MSG_WAITALL; error = soreceive(so, NULL, &auio, mp, NULL, &rcvflg); } while (error == EWOULDBLOCK || error == EINTR || error == ERESTART); #ifdef __APPLE__ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif if (error == 0 && auio.uio_resid > 0) { log(LOG_INFO, "short receive (%d/%d) from rpc server %s\n", len - auio.uio_resid, len, rep->r_rpcclnt->rc_prog->prog_name); error = EPIPE; } } else { /* * NB: Since uio_resid is big, MSG_WAITALL is ignored * and soreceive() will return when it has either a * control msg or a data msg. We have no use for * control msg., but must grab them and then throw * them away so we know what is going on. */ auio.uio_resid = len = 100000000; /* Anything Big */ #if defined(__OpenBSD__) || defined(__APPLE__) auio.uio_procp = td; #else auio.uio_td = td; #endif #ifdef __APPLE__ thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); #endif do { rcvflg = 0; error = soreceive(so, NULL, &auio, mp, &control, &rcvflg); if (control) m_freem(control); if (error == EWOULDBLOCK && rep) { if (ISSET(rep->r_flags, TASK_SOFTTERM)) { #ifdef __APPLE__ thread_funnel_switch( NETWORK_FUNNEL, KERNEL_FUNNEL); #endif RPC_RETURN(EINTR); } } } while (error == EWOULDBLOCK || (!error && *mp == NULL && control)); #ifdef __APPLE__ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif if ((rcvflg & MSG_EOR) == 0) printf("Egad!!\n"); if (error == 0 && *mp == NULL) error = EPIPE; len -= auio.uio_resid; } errout: if (error != 0 && error != EINTR && error != ERESTART) { m_freem(*mp); *mp = (struct mbuf *)NULL; if (error != EPIPE) log(LOG_INFO, "receive error %d from rpc server %s\n", error, rep->r_rpcclnt->rc_prog->prog_name); error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep); if (error == 0) error = rpcclnt_reconnect(rep, td); if (error == 0) goto tryagain; } } else { if ((so = rep->r_rpcclnt->rc_so) == NULL) RPC_RETURN(EACCES); if (so->so_state & SS_ISCONNECTED) getnam = NULL; else getnam = aname; auio.uio_resid = len = 1000000; #if defined(__OpenBSD__) || defined(__APPLE__) auio.uio_procp = td; #else auio.uio_td = td; #endif #ifdef __APPLE__ thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); #endif do { rcvflg = 0; error = soreceive(so, getnam, &auio, mp, NULL, &rcvflg); RPCDEBUG("soreceivce returns %d", error); if (error == EWOULDBLOCK && ISSET(rep->r_flags, TASK_SOFTTERM)) { RPCDEBUG("wouldblock && softerm -> EINTR"); #ifdef __APPLE__ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif RPC_RETURN(EINTR); } } while (error == EWOULDBLOCK); #ifdef __APPLE__ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif len -= auio.uio_resid; } if (error != 0) { m_freem(*mp); *mp = NULL; } /* * Search for any mbufs that are not a multiple of 4 bytes * long or with m_data not longword aligned. These could * cause pointer alignment problems, so copy them to well * aligned mbufs. */ rpcclnt_realign(mp, 5 * RPCX_UNSIGNED); RPC_RETURN(error); } /* * Implement receipt of reply on a socket. We must search through the list of * received datagrams matching them with outstanding requests using the xid, * until ours is found. */ /* ARGSUSED */ static int rpcclnt_reply(struct rpctask *myrep, cthread_t *td) { struct rpctask *rep; struct rpcclnt *rpc = myrep->r_rpcclnt; int32_t t1; struct mbuf *mrep, *md; #if defined(__OpenBSD__) struct mbuf *nam; #else struct sockaddr *nam; #endif u_int32_t rxid, *tl; caddr_t dpos, cp2; int error; /* * Loop around until we get our own reply */ for (;;) { /* * Lock against other receivers so that I don't get stuck in * sbwait() after someone else has received my reply for me. * Also necessary for connection based protocols to avoid * race conditions during a reconnect. */ error = rpcclnt_rcvlock(myrep); if (error) RPC_RETURN(error); /* Already received, bye bye */ if (myrep->r_mrep != NULL) { rpcclnt_rcvunlock(&rpc->rc_flag); RPC_RETURN(0); } /* * Get the next Rpc reply off the socket */ error = rpcclnt_receive(myrep, &nam, &mrep, td); rpcclnt_rcvunlock(&rpc->rc_flag); if (error != 0) { /* * Ignore routing errors on connectionless * protocols?? */ if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) { rpc->rc_so->so_error = 0; if (myrep->r_flags & TASK_GETONEREP) RPC_RETURN(0); RPCDEBUG("ingoring routing error on connectionless protocol."); continue; } RPC_RETURN(error); } #if defined(__OpenBSD__) if (nam != NULL) m_freem(nam); #else if (nam != NULL) FREE(nam, M_SONAME); #endif /* * Get the xid and check that it is an rpc reply */ md = mrep; dpos = mtod(md, caddr_t); rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED); rxid = *tl++; if (*tl != rpc_reply) { rpcstats.rpcinvalid++; m_freem(mrep); rpcmout: if (myrep->r_flags & TASK_GETONEREP) RPC_RETURN(0); continue; } /* * Loop through the request list to match up the reply Iff no * match, just drop the datagram */ TAILQ_FOREACH(rep, &rpctask_q, r_chain) { if (rep->r_mrep == NULL && rxid == rep->r_xid) { /* Found it.. */ rep->r_mrep = mrep; rep->r_md = md; rep->r_dpos = dpos; /* * Update congestion window. Do the additive * increase of one rpc/rtt. */ if (rpc->rc_cwnd <= rpc->rc_sent) { rpc->rc_cwnd += (RPC_CWNDSCALE*RPC_CWNDSCALE + (rpc->rc_cwnd >> 1))/rpc->rc_cwnd; if (rpc->rc_cwnd > RPC_MAXCWND) rpc->rc_cwnd = RPC_MAXCWND; } CLR(rep->r_flags, TASK_SENT); rpc->rc_sent -= RPC_CWNDSCALE; /* * Update rtt using a gain of 0.125 on the * mean and a gain of 0.25 on the deviation. */ if (ISSET(rep->r_flags, TASK_TIMING)) { /* * Since the timer resolution of * NFS_HZ is so course, it can often * result in r_rtt == 0. Since r_rtt * == N means that the actual rtt is * between N+dt and N+2-dt ticks, add * 1. */ t1 = rep->r_rtt + 1; t1 -= (RPC_SRTT(rpc, rep) >> 3); RPC_SRTT(rpc, rep) += t1; if (t1 < 0) t1 = -t1; t1 -= (RPC_SDRTT(rpc, rep) >> 2); RPC_SDRTT(rpc, rep) += t1; } rpc->rc_timeouts = 0; break; } } /* * If not matched to a request, drop it. If it's mine, get * out. */ if (rep == 0) { rpcstats.rpcunexpected++; RPCDEBUG("rpc reply not matched\n"); m_freem(mrep); } else if (rep == myrep) { if (rep->r_mrep == NULL) panic("rpcreply nil"); RPC_RETURN(0); } if (myrep->r_flags & TASK_GETONEREP) RPC_RETURN(0); } } /* XXX: ignores tryagain! */ /* * code from nfs_request - goes something like this - fill in task struct - * links task into list - calls nfs_send() for first transmit - calls * nfs_receive() to get reply - fills in reply (which should be initialized * prior to calling), which is valid when 0 is returned and is NEVER freed in * this function * * always frees the request header, but NEVER frees 'mrest' * */ /* * note that reply->result_* are invalid unless reply->type == * RPC_MSGACCEPTED and reply->status == RPC_SUCCESS and that reply->verf_* * are invalid unless reply->type == RPC_MSGACCEPTED */ int rpcclnt_request(struct rpcclnt *rpc, struct mbuf *mrest, int procnum, cthread_t *td, struct ucred *cred, struct rpc_reply *reply) { struct mbuf *m, *mrep; struct rpctask *task, _task; u_int32_t *tl; struct mbuf *md, *mheadend; caddr_t dpos, cp2; int t1, s, error = 0, mrest_len; u_int32_t xid; task = &_task; memset(task, 0, sizeof(*task)); if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT)) return (rpcclnt_request_redirect(rpc, mrest, procnum, td, cred, reply)); task->r_rpcclnt = rpc; task->r_procnum = procnum; task->r_td = td; M_LENGTH(mrest_len, mrest); m = rpcclnt_buildheader(rpc, procnum, mrest, &xid, &mheadend, cred); /* * For stream protocols, insert a Sun RPC Record Mark. */ if (rpc->rc_sotype == SOCK_STREAM) { M_PREPEND(m, RPCX_UNSIGNED, M_TRYWAIT); *mtod(m, u_int32_t *) = htonl(0x80000000 | (m->m_pkthdr.len - RPCX_UNSIGNED)); } task->r_mreq = m; task->r_xid = xid; if (ISSET(rpc->rc_flag, RPCCLNT_SOFT)) task->r_retry = rpc->rc_retry; else task->r_retry = RPC_MAXREXMIT + 1; /* past clip limit */ task->r_rtt = task->r_rexmit = 0; if (rpcclnt_proct(rpc, procnum) > 0) task->r_flags = TASK_TIMING; else task->r_flags = 0; task->r_mrep = NULL; /* * Do the client side RPC. */ rpcstats.rpcrequests++; /* * Chain request into list of outstanding requests. Be sure to put it * LAST so timer finds oldest requests first. */ s = splsoftclock(); TAILQ_INSERT_TAIL(&rpctask_q, task, r_chain); /* * If backing off another request or avoiding congestion, don't send * this one now but let timer do it. If not timing a request, do it * now. */ if (rpc->rc_so && (rpc->rc_sotype != SOCK_DGRAM || ISSET(rpc->rc_flag, RPCCLNT_DUMBTIMR) || rpc->rc_sent < rpc->rc_cwnd)) { splx(s); if (rpc->rc_soflags & PR_CONNREQUIRED) error = rpcclnt_sndlock(&rpc->rc_flag, task); if (error == 0) { error = rpcclnt_send(rpc->rc_so, rpc->rc_name, m_copym(m, 0, M_COPYALL, M_TRYWAIT), task); if (rpc->rc_soflags & PR_CONNREQUIRED) rpcclnt_sndunlock(&rpc->rc_flag); } if (error == 0 && ISSET(task->r_flags, TASK_MUSTRESEND) == 0) { rpc->rc_sent += RPC_CWNDSCALE; SET(task->r_flags, TASK_SENT); } } else { splx(s); task->r_rtt = -1; } /* * Wait for the reply from our send or the timer's. */ if (error == 0 || error == EPIPE) error = rpcclnt_reply(task, td); /* * RPC done, unlink the request. */ s = splsoftclock(); TAILQ_REMOVE(&rpctask_q, task, r_chain); splx(s); /* * Decrement the outstanding request count. */ if (ISSET(task->r_flags, TASK_SENT)) { CLR(task->r_flags, TASK_SENT); /* paranoia */ rpc->rc_sent -= RPC_CWNDSCALE; } /* * If there was a successful reply and a tprintf msg. tprintf a * response. */ if (error == 0 && (ISSET(task->r_flags, TASK_TPRINTFMSG))) rpcclnt_msg(task->r_td, rpc->rc_prog->prog_name, "is alive again"); /* free request header (leaving mrest) */ mheadend->m_next = NULL; m_freem(task->r_mreq); /* initialize reply */ reply->mrep = task->r_mrep; reply->verf_md = NULL; reply->result_md = NULL; mrep = task->r_mrep; md = task->r_md; dpos = task->r_dpos; if (error != 0) goto rpcmout; /* * break down the rpc header and check if ok */ rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED); reply->stat.type = fxdr_unsigned(u_int32_t, *tl); if (reply->stat.type == RPC_MSGDENIED) { rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED); reply->stat.status = fxdr_unsigned(u_int32_t, *tl); switch (reply->stat.status) { case RPC_MISMATCH: rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED); reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++); reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl); error = EOPNOTSUPP; break; case RPC_AUTHERR: rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED); reply->stat.autherr = fxdr_unsigned(u_int32_t, *tl); error = EACCES; break; default: error = EBADRPC; break; } goto rpcmout; } else if (reply->stat.type != RPC_MSGACCEPTED) { error = EBADRPC; goto rpcmout; } rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED); /* Verifier */ reply->verf_md = md; reply->verf_dpos = dpos; reply->verf_type = fxdr_unsigned(u_int32_t, *tl++); reply->verf_size = fxdr_unsigned(u_int32_t, *tl); if (reply->verf_size != 0) rpcm_adv(rpcm_rndup(reply->verf_size)); rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED); reply->stat.status = fxdr_unsigned(u_int32_t, *tl); if (reply->stat.status == RPC_SUCCESS) { if ((uint32_t)(dpos - mtod(md, caddr_t)) >= md->m_len) { RPCDEBUG("where is the next mbuf?"); RPCDEBUG("%d -> %d", (int)(dpos - mtod(md, caddr_t)), md->m_len); if (md->m_next == NULL) { error = EBADRPC; goto rpcmout; } else { reply->result_md = md->m_next; reply->result_dpos = mtod(reply->result_md, caddr_t); } } else { reply->result_md = md; reply->result_dpos = dpos; } } else if (reply->stat.status == RPC_PROGMISMATCH) { rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED); reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++); reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl); error = EBADRPC; /* XXXMARIUS */ } else if (reply->stat.status > 5) { error = EBADRPC; goto rpcmout; } /* * XXX - are there any situations where result_md == null is * fine? Also, should we rpcauth_vrfy() any time when error * != 0? */ if (reply->result_md == NULL) error = EBADRPC; else if (error == 0) error = rpcauth_vrfy_rep(rpc, cred, reply); /* * Outgoing: * .result_md contains the start of the body; */ rpcmout: RPC_RETURN(error); } static int rpcclnt_request_redirect(struct rpcclnt *rpc, struct mbuf *mrest, int procnum, cthread_t *cur_td, struct ucred *cred, struct rpc_reply *reply) { int mrest_len, error, prot = PROT_READ|PROT_WRITE, havehandlertd; struct vmspace *vms; cthread_t *handler_td; vm_offset_t va; vm_size_t size; vm_map_t map; struct mbuf *m = mrest; struct uio uio; struct iovec iov; struct rpcclnt_redirect_req req; struct rpcclnt_redirect_rep rep; size_t siz, left, resid; handler_td = rpcdev_gethandlertd(RPCDEV_TYPE_RPCD); if (handler_td == NULL) { printf("no rpcd running\n"); /* XXX: this message could * easily flood a system */ return (ESRCH); } havehandlertd = 1; M_LENGTH(mrest_len, mrest); size = (vm_size_t) round_page(mrest_len); vms = cthread_proc(handler_td)->p_vmspace; /* Find an address. */ PROC_LOCK(cthread_proc(handler_td)); va = round_page((vm_offset_t)vms->vm_daddr + lim_max(cthread_proc(handler_td), RLIMIT_DATA)); PROC_UNLOCK(cthread_proc(handler_td)); map = &vms->vm_map; error = vm_mmap(map, &va, size, /* size -> vasize */ prot, prot, MAP_ANON, NULL, 0); if (error != 0) goto out0; left = mrest_len; /* * Copy the mbuf into the userspace vm. * * XXXMARIUS: respect ZERO_COPY_SOCKETS here. * * Wow, this is ugly -- i love it! */ do { iov.iov_base = mtod(m, void *); iov.iov_len = min(left, m->m_len); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = va + (mrest_len - left); uio.uio_resid = resid = iov.iov_len; uio.uio_rw = UIO_WRITE; uio.uio_segflg = UIO_SYSSPACE; uio.uio_td = cur_td; #ifdef ULTRADEBUG printf("Upcall:", left); print_hexl(mtod(m, void *), min(left, m->m_len), 0); #endif error = proc_rwmem(cthread_proc(handler_td), &uio); left -= resid - uio.uio_resid; m = m->m_next; } while (left > 0 && error == 0 && m != NULL); if (error != 0) { printf("proc_rwmem() failed (%d)\n", error); goto out1; } req.req_uid = cred->cr_uid; req.req_procnum = procnum; req.req_bp = (caddr_t)va; req.req_len = mrest_len; strlcpy(req.req_host, rpc->rc_servername, sizeof(req.req_host)); siz = sizeof(rep); rpcdev_puthandlertd(RPCDEV_TYPE_RPCD); havehandlertd = 0; error = rpcdev_call(RPCDEV_TYPE_RPCD, (caddr_t)&req, sizeof(req), (caddr_t)&rep, &siz, 1); if (error != 0) { printf("rpcdev_call() failed (%d)\n", error); goto out1; } handler_td = rpcdev_gethandlertd(RPCDEV_TYPE_RPCD); if (handler_td == NULL) { printf("no rpcd running\n"); /* XXX: this message could * easily flood a system */ error = ESRCH; goto out1; } havehandlertd = 1; if (siz != sizeof(rep)) { RPCDEBUG("Bad reply size from rpcd!\n"); error = EINVAL; goto out1; } /* * Now, we have a buffer, and we must copy it into a new mbuf * chain */ MGET(m, M_TRYWAIT, MT_DATA); if (rep.rep_len >= MINCLSIZE) MCLGET(m, M_TRYWAIT); m->m_len = 0; iov.iov_base = rep.rep_bp; iov.iov_len = rep.rep_len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = rep.rep_len; uio.uio_rw = UIO_READ; uio.uio_td = handler_td; uio.uio_segflg = UIO_USERSPACE; error = _uiotombuf(&uio, &m, rep.rep_len, cur_td); if (error != 0) { printf("_uiotombuf failed (%d)\n", error); goto out1; } rpcdev_wakeup(RPCDEV_TYPE_RPCD); #ifdef ULTRADEBUG { struct mbuf *m1 = m; printf("RPC reply (by way of rpcd) is:"); do { print_hexl(mtod(m1, void *), m1->m_len, 0); m1 = m1->m_next; } while (m1 != NULL); } #endif /* Now, we set up the reply struct. Easy, right? */ reply->result_md = m; reply->result_dpos = mtod(m, caddr_t); reply->stat.status = rep.rep_status; out1: if (havehandlertd) { /* Next ops may sleep. */ rpcdev_puthandlertd(RPCDEV_TYPE_RPCD); havehandlertd = 0; } rpcdev_wakeup(RPCDEV_TYPE_RPCD); /* now, we destroy it!! destroy!!! */ vm_map_lock(map); vm_map_delete(map, va, va + size); vm_map_unlock(map); out0: if (havehandlertd) rpcdev_puthandlertd(RPCDEV_TYPE_RPCD); return (error); } #ifdef ULTRADEBUG static void print_hexl(const u_char *buf, int len, int offset) { u_int i, j, jm; int c; printf("\n"); for (i = 0; i < len; i += 0x10) { printf(" %04x: ", (u_int)(i + offset)); jm = len - i; jm = jm > 16 ? 16 : jm; for (j = 0; j < jm; j++) { if ((j % 2) == 1) printf("%02x ", (u_int) buf[i+j]); else printf("%02x", (u_int) buf[i+j]); } for (; j < 16; j++) { if ((j % 2) == 1) printf(" "); else printf(" "); } printf(" "); for (j = 0; j < jm; j++) { c = buf[i+j]; c = '.'; printf("%c", c); } printf("\n"); } } #endif /* ULTRADEBUG */ /* * Nfs timer routine Scan the nfsreq list and retranmit any requests that * have timed out To avoid retransmission attempts on STREAM sockets (in the * future) make sure to set the r_retry field to 0 (implies nm_retry == 0). */ void rpcclnt_timer(arg) void *arg; { #if defined(__OpenBSD__) struct timeout *to = (struct timeout *) arg; #endif struct rpctask *rep; struct mbuf *m; struct socket *so; struct rpcclnt *rpc; int timeo; int s, error; cthread_t * td = curcthread; s = SPLNET(); TAILQ_FOREACH(rep, &rpctask_q, r_chain) { rpc = rep->r_rpcclnt; if (rep->r_mrep || ISSET(rep->r_flags, TASK_SOFTTERM)) continue; if (rpcclnt_sigintr(rpc, rep, rep->r_td)) { SET(rep->r_flags, TASK_SOFTTERM); continue; } if (rep->r_rtt >= 0) { rep->r_rtt++; if (ISSET(rpc->rc_flag, RPCCLNT_DUMBTIMR)) timeo = rpc->rc_timeo; else timeo = RPC_RTO(rpc, rpcclnt_proct(rep->r_rpcclnt, rep->r_procnum)); if (rpc->rc_timeouts > 0) timeo *= rpcclnt_backoff[rpc->rc_timeouts - 1]; if (rep->r_rtt <= timeo) continue; if (rpc->rc_timeouts < 8) rpc->rc_timeouts++; } /* * Check for server not responding */ if (ISSET(rep->r_flags, TASK_TPRINTFMSG) == 0 && rep->r_rexmit > rpc->rc_deadthresh) { rpcclnt_msg(rep->r_td, rpc->rc_prog->prog_name, "not responding"); SET(rep->r_flags, TASK_TPRINTFMSG); } if (rep->r_rexmit >= rep->r_retry) { /* too many */ rpcstats.rpctimeouts++; SET(rep->r_flags, TASK_SOFTTERM); continue; } if (rpc->rc_sotype != SOCK_DGRAM) { if (++rep->r_rexmit > RPC_MAXREXMIT) rep->r_rexmit = RPC_MAXREXMIT; continue; } if ((so = rpc->rc_so) == NULL) continue; /* * If there is enough space and the window allows.. Resend it * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && (ISSET(rpc->rc_flag, RPCCLNT_DUMBTIMR) || ISSET(rep->r_flags, TASK_SENT) || rpc->rc_sent < rpc->rc_cwnd) && (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) { #ifdef __APPLE__ thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); #endif if (ISSET(rpc->rc_flag, RPCCLNT_NOCONN) == 0) error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m, NULL, NULL, td); else error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m, rpc->rc_name, NULL, td); #ifdef __APPLE__ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif if (error) { if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) so->so_error = 0; } else { /* * Iff first send, start timing else turn * timing off, backoff timer and divide * congestion window by 2. */ if (ISSET(rep->r_flags, TASK_SENT)) { CLR(rep->r_flags, TASK_TIMING); if (++rep->r_rexmit > RPC_MAXREXMIT) rep->r_rexmit = RPC_MAXREXMIT; rpc->rc_cwnd >>= 1; if (rpc->rc_cwnd < RPC_CWNDSCALE) rpc->rc_cwnd = RPC_CWNDSCALE; rpcstats.rpcretries++; } else { SET(rep->r_flags, TASK_SENT); rpc->rc_sent += RPC_CWNDSCALE; } rep->r_rtt = 0; } } } splx(s); #if defined(__OpenBSD__) timeout_add(rpcclnt_timer, NULL, rpcclnt_ticks); #elif defined(__APPLE__) timeout(rpcclnt_timer_funnel, NULL, rpcclnt_ticks); /* XXXMARIUS: (arg) */ #else rpcclnt_timer_handle = timeout(rpcclnt_timer, NULL, rpcclnt_ticks); #endif } #ifdef __APPLE__ static void rpcclnt_timer_funnel(void *ap) { boolean_t funnelstate; funnelstate = thread_funnel_set(kernel_flock, TRUE); rpcclnt_timer(ap); thread_funnel_set(kernel_flock, funnelstate); } #endif /* * Test for a termination condition pending on the process. This is used for * RPCCLNT_INT mounts. */ int rpcclnt_sigintr_extern(rpc, td) struct rpcclnt *rpc; cthread_t *td; { return rpcclnt_sigintr(rpc, NULL, td); } static int rpcclnt_sigintr(rpc, task, td) struct rpcclnt *rpc; struct rpctask *task; cthread_t *td; { struct proc *p; #if defined(__FreeBSD__) sigset_t tmpset; #endif if (rpc == NULL) return EFAULT; if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT)) /* Should signal? */ return (0); /* XXX deal with forced unmounts */ if (task && ISSET(task->r_flags, TASK_SOFTTERM)) RPC_RETURN(EINTR); if (!ISSET(rpc->rc_flag, RPCCLNT_INT)) RPC_RETURN(0); if (td == NULL) return (0); p = cthread_proc(td); #if defined(__OpenBSD__) || defined(__APPLE__) if (p && p->p_siglist && (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) & RPCINT_SIGMASK)) RPC_RETURN(EINTR); #else PROC_LOCK(p); tmpset = p->p_siglist; SIGSETNAND(tmpset, td->td_sigmask); mtx_lock(&p->p_sigacts->ps_mtx); SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); mtx_unlock(&p->p_sigacts->ps_mtx); if (SIGNOTEMPTY(p->p_siglist) && RPCCLNTINT_SIGMASK(tmpset)) { PROC_UNLOCK(p); RPC_RETURN(EINTR); } PROC_UNLOCK(p); #endif RPC_RETURN(0); } /* * Lock a socket against others. Necessary for STREAM sockets to ensure you * get an entire rpc request/reply and also to avoid race conditions between * the processes with nfs requests in progress when a reconnect is necessary. */ static int rpcclnt_sndlock(flagp, task) int *flagp; struct rpctask *task; { cthread_t * p; int slpflag = 0, slptimeo = 0; if (task) { p = task->r_td; if (ISSET(task->r_rpcclnt->rc_flag, RPCCLNT_INT)) slpflag = PCATCH; } else p = NULL; while (ISSET(*flagp, RPCCLNT_SNDLOCK)) { if (rpcclnt_sigintr(task->r_rpcclnt, task, p)) RPC_RETURN(EINTR); SET(*flagp, RPCCLNT_WANTSND); tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcsndlck", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } SET(*flagp, RPCCLNT_SNDLOCK); RPC_RETURN(0); } /* * Unlock the stream socket for others. */ static void rpcclnt_sndunlock(flagp) int *flagp; { if (ISSET(*flagp, RPCCLNT_SNDLOCK) == 0) panic("rpc sndunlock"); CLR(*flagp, RPCCLNT_SNDLOCK); if (ISSET(*flagp, RPCCLNT_WANTSND)) { CLR(*flagp, RPCCLNT_WANTSND); wakeup((caddr_t) flagp); } } static int rpcclnt_rcvlock(task) struct rpctask *task; { int *flagp = &task->r_rpcclnt->rc_flag; int slpflag, slptimeo = 0; if (ISSET(*flagp, RPCCLNT_INT)) slpflag = PCATCH; else slpflag = 0; while (ISSET(*flagp, RPCCLNT_RCVLOCK)) { if (rpcclnt_sigintr(task->r_rpcclnt, task, task->r_td)) RPC_RETURN(EINTR); SET(*flagp, RPCCLNT_WANTRCV); tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcrcvlk", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } SET(*flagp, RPCCLNT_RCVLOCK); RPC_RETURN(0); } /* * Unlock the stream socket for others. */ static void rpcclnt_rcvunlock(flagp) int *flagp; { if (ISSET(*flagp, RPCCLNT_RCVLOCK) == 0) panic("nfs rcvunlock"); CLR(*flagp, RPCCLNT_RCVLOCK); if (ISSET(*flagp, RPCCLNT_WANTRCV)) { CLR(*flagp, RPCCLNT_WANTRCV); wakeup((caddr_t) flagp); } } static void rpcclnt_realign(struct mbuf **pm, int hsiz) { struct mbuf *m; struct mbuf *n = NULL; int off = 0; RPCDEBUG("in rpcclnt_realign()"); while ((m = *pm) != NULL) { if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { MGET(n, M_TRYWAIT, MT_DATA); if (m->m_len >= MINCLSIZE) { MCLGET(n, M_TRYWAIT); } n->m_len = 0; break; } pm = &m->m_next; } /* * If n is non-NULL, loop on m copying data, then replace the * portion of the chain that had to be realigned. */ if (n != NULL) { while (m) { m_copyback(n, off, m->m_len, mtod(m, caddr_t)); off += m->m_len; m = m->m_next; } m_freem(*pm); *pm = n; } RPCDEBUG("leave rpcclnt_realign()"); } static int rpcclnt_msg(td, server, msg) cthread_t *td; const char *server; char *msg; { #if defined(__OpenBSD__) || defined(__APPLE__) struct proc *p; tpr_t tpr; #endif #if defined(__OpenBSD__) || defined(__APPLE__) p = cthread_proc(td); if (td != NULL && (p = cthread_proc(td))) tpr = tprintf_open(p); else tpr = NULL; tprintf(tpr, "rpc server %s: %s\n", server, msg); tprintf_close(tpr); #else tprintf(td ? td->td_proc : NULL, LOG_INFO, "rpc server %s: %s\n", server, msg); #endif RPC_RETURN(0); } /* * Build the RPC header and fill in the authorization info. The authorization * string argument is only used when the credentials come from outside of the * kernel (AUTH_KERB). (likewise, the ucred is only used when inside the * kernel) Returns the head of the mbuf list. */ static struct mbuf * rpcclnt_buildheader(struct rpcclnt *rc, int procid, struct mbuf *mrest, int *xidp, struct mbuf **mheadend, struct ucred *cred) { /* register */ struct mbuf *mb; register u_int32_t *tl; /* register */ caddr_t bpos; struct mbuf *mreq, *mb2; int error; MGETHDR(mb, M_TRYWAIT, MT_DATA); if (6 * RPCX_UNSIGNED >= MINCLSIZE) { MCLGET(mb, M_TRYWAIT); } else if (6 * RPCX_UNSIGNED < MHLEN) { MH_ALIGN(mb, 6 * RPCX_UNSIGNED); } else { RPCDEBUG("mbuf too small"); panic("cheap bailout"); } mb->m_len = 0; mreq = mb; bpos = mtod(mb, caddr_t); /* * First the RPC header. */ rpcm_build(tl, u_int32_t *, 6 * RPCX_UNSIGNED); /* Get a new (non-zero) xid */ if ((rpcclnt_xid == 0) && (rpcclnt_xid_touched == 0)) { RANDOM(rpcclnt_xid); rpcclnt_xid_touched = 1; } else { do { RANDOM(*xidp); } while ((*xidp % 256) == 0); rpcclnt_xid += *xidp; } /* XXX: funky... */ *tl++ = *xidp = txdr_unsigned(rpcclnt_xid); *tl++ = rpc_call; *tl++ = rpc_vers; *tl++ = txdr_unsigned(rc->rc_prog->prog_id); *tl++ = txdr_unsigned(rc->rc_prog->prog_version); *tl++ = txdr_unsigned(procid); if ((error = rpcauth_build_reqhdr(rc, cred, &mb, &bpos, mrest)) != 0) { RPCDEBUG("rpcauth_build_reqhdr failed %d", error); return (NULL); } mb->m_next = mrest; *mheadend = mb; M_LENGTH(mreq->m_pkthdr.len, mreq); mreq->m_pkthdr.rcvif = NULL; return (mreq); } /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. This is used by the macros rpcm_dissect and * rpcm_dissecton for tough cases. (The macros use the vars. dpos and dpos2) */ static int rpcm_disct(struct mbuf **mdp, caddr_t *dposp, int siz, int left, caddr_t *cp2) { struct mbuf *mp, *mp2; int siz2, xfer; caddr_t p; mp = *mdp; while (left == 0) { *mdp = mp = mp->m_next; if (mp == NULL) RPC_RETURN(EBADRPC); left = mp->m_len; *dposp = mtod(mp, caddr_t); } if (left >= siz) { *cp2 = *dposp; *dposp += siz; } else if (mp->m_next == NULL) { RPC_RETURN(EBADRPC); } else if (siz > MHLEN) { panic("rpc S too big"); } else { MGET(mp2, M_TRYWAIT, MT_DATA); mp2->m_next = mp->m_next; mp->m_next = mp2; mp->m_len -= left; mp = mp2; *cp2 = p = mtod(mp, caddr_t); bcopy(*dposp, p, left); /* Copy what was left */ siz2 = siz - left; p += left; mp2 = mp->m_next; /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) RPC_RETURN(EBADRPC); xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; if (xfer > 0) { bcopy(mtod(mp2, caddr_t), p, xfer); RPCMADV(mp2, xfer); mp2->m_len -= xfer; p += xfer; siz2 -= xfer; } if (siz2 > 0) mp2 = mp2->m_next; } mp->m_len = siz; *mdp = mp2; *dposp = mtod(mp2, caddr_t); } RPC_RETURN(0); } static u_int32_t rpcclnt_proct(struct rpcclnt *rpc, u_int32_t procid) { if (rpc->rc_proctlen != 0 && rpc->rc_proct != NULL && procid < rpc->rc_proctlen) return (rpc->rc_proct[procid]); return (0); } static int rpc_adv(struct mbuf **mdp, caddr_t *dposp, int offs, int left) { struct mbuf *m; int s; m = *mdp; s = left; while (s < offs) { offs -= s; m = m->m_next; if (m == NULL) RPC_RETURN(EBADRPC); s = m->m_len; } *mdp = m; *dposp = mtod(m, caddr_t) + offs; RPC_RETURN(0); } int rpcclnt_cancelreqs(struct rpcclnt *rpc) { struct rpctask *task; int i, s; if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT)) /* XXX */ return (0); s = SPLNET(); TAILQ_FOREACH(task, &rpctask_q, r_chain) { if (rpc != task->r_rpcclnt || task->r_mrep != NULL || ISSET(task->r_flags, TASK_SOFTTERM)) continue; rpcclnt_softterm(task); } splx(s); for (i = 0; i < 30; i++) { s = SPLNET(); TAILQ_FOREACH(task, &rpctask_q, r_chain) { if (rpc == task->r_rpcclnt) break; } splx(s); if (task == NULL) return (0); tsleep(&lbolt, PSOCK, "nfscancel", 0); } return (EBUSY); } static void rpcclnt_softterm(struct rpctask *task) { SET(task->r_flags, TASK_SOFTTERM); if (ISSET(task->r_flags, TASK_SENT)) { task->r_rpcclnt->rc_sent -= RPC_CWNDSCALE; CLR(task->r_flags, TASK_SENT); } } #if defined(__OpenBSD__) /* called by rpcclnt_get() */ void rpcclnt_create(struct rpcclnt **rpc) { MALLOC(*rpc, struct rpcclnt *, sizeof(struct rpcclnt), M_RPC, M_WAITOK | M_ZERO); } /* called by rpcclnt_put() */ void rpcclnt_destroy(struct rpcclnt *rpc) { if (rpc != NULL) FREE(rpc, M_RPC); else RPCDEBUG("attempting to free a NULL rpcclnt (not dereferenced)"); } #endif /* XXXMARIUS - this needs to respect zerocopy */ static int _uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, cthread_t *cur_td) { char *uiocp; struct mbuf *mp, *mp2; int xfer, left, mlen; int uiosiz, clflg, rem; char *cp; #ifdef DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("_uiotombuf: iovcnt != 1"); #endif if (siz > MLEN) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = rpcm_rndup(siz)-siz; mp = mp2 = *mq; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { MGET(mp, M_TRYWAIT, MT_DATA); if (clflg) MCLGET(mp, M_TRYWAIT); mp->m_len = 0; mp2->m_next = mp; mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; #ifdef notyet if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else { struct uio uio; struct iovec iov; int error; #ifdef notyet if (cthread_proc(cur_td) != NULL && cthread_proc(uiop->uio_td)->p_pid == cthread_proc(cur_td)->p_pid) { copyin(uiocp, mtod(mp, caddr_t) + mp->m_len, xfer); goto next; } #endif iov.iov_base = mtod(mp, char *) + mp->m_len; iov.iov_len = xfer; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(uintptr_t)uiocp; uio.uio_resid = xfer; uio.uio_rw = UIO_READ; uio.uio_segflg = UIO_SYSSPACE; uio.uio_td = cur_td; error = proc_rwmem(cthread_proc(uiop->uio_td), &uio); if (error != 0) return (error); xfer -= uio.uio_resid; } mp->m_len += xfer; left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + uiosiz; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (rem > 0) { if (rem > M_TRAILINGSPACE(mp)) { MGET(mp, M_TRYWAIT, MT_DATA); mp->m_len = 0; mp2->m_next = mp; } cp = mtod(mp, caddr_t)+mp->m_len; for (left = 0; left < rem; left++) *cp++ = '\0'; mp->m_len += rem; } *mq = mp; return (0); }