/* $FreeBSD$ */

/*
 * rpcclnt.c
 *
 * Copyright (c) 2004 The Regents of the University of Michigan.
 * All rights reserved.
 *
 * Copyright (c) 2004 Weston Andros Adamson <muzzle@umich.edu>.
 * Copyright (c) 2004 Marius Aamodt Eriksen <marius@umich.edu>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $Id: rpcclnt.c,v 1.13 2004/06/14 19:38:56 marius Exp $
 */

/*
 * Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of
 * California.  All rights reserved.
 * 
 * This code is derived from software contributed to Berkeley by Rick Macklem at
 * The University of Guelph.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met: 1. Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer. 2.
 * Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution. 3. All advertising
 * materials mentioning features or use of this software must display the
 * following acknowledgement: This product includes software developed by the
 * University of California, Berkeley and its contributors. 4. Neither the
 * name of the University nor the names of its contributors may be used to
 * endorse or promote products derived from this software without specific
 * prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * 
 * @(#)nfs_socket.c	8.5 (Berkeley) 3/30/95
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/syslog.h>
#include <sys/malloc.h>
#include <sys/uio.h>
#include <sys/lock.h>
#include <sys/signalvar.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/ptrace.h>		/* For proc_rwmem() */

#if defined(__APPLE__)
#include <sys/tprintf.h>
#include <sys/buf.h>
#endif


#if defined(__FreeBSD__)
#include <sys/sysent.h>
#endif

#include <sys/syscall.h>
#include <sys/sysctl.h>

#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>

#if defined(__FreeBSD__)
#include <sys/mutex.h>
#endif

#if defined(__OpenBSD__)
#include <sys/timeout.h>
#endif

#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_extern.h>

#include <netinet/in.h>
#include <netinet/tcp.h>

#include <nfs4client/nfs4_glue.h>


#include <nfsx/rpcv2.h>
#include <nfsx/xdr_subs.h>
#include <nfsx/nfsproto.h>
#include <nfsxclient/nfs.h>

#include <rpcx/rpcm_subs.h>
#include <rpcx/rpcclnt.h>
#include <rpcx/rpcclnt-private.h>
#include <rpcx/rpc_dev.h>
#include <rpcx/rpcauth.h>
#include <rpcx/rpcclnt_redirect.h>

#include <nfs4client/nfs4m_glue.h>

/* memory management */
#ifdef __OpenBSD__
struct pool     rpctask_pool;
struct pool     rpcclnt_pool;
#define RPCTASKPOOL_LWM 10
#define RPCTASKPOOL_HWM 40

#elif defined(__FreeBSD__)
static          MALLOC_DEFINE(M_RPC, "rpcclnt", "rpc state");
#endif

#define RPC_RETURN(X) do { RPCDEBUG("returning %d", X); return X; }while(0)

/*
 * Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean
 * and mean deviation of rtt for the appropriate type of rpc for the frequent
 * rpcs and a default for the others. The justification for doing "other"
 * this way is that these rpcs happen so infrequently that timer est. would
 * probably be stale. Also, since many of these rpcs are non-idempotent, a
 * conservative timeout is desired. getattr, lookup - A+2D read, write     -
 * A+4D other           - nm_timeo
 */
#define	RPC_RTO(n, t) \
	((t) == 0 ? (n)->rc_timeo : \
	 ((t) < 3 ? \
	  (((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \
	  ((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1)))

#define	RPC_SRTT(s,r)	(r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\
				(r)->r_procnum) - 1]

#define	RPC_SDRTT(s,r)	(r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\
				(r)->r_procnum) - 1]


/*
 * There is a congestion window for outstanding rpcs maintained per mount
 * point. The cwnd size is adjusted in roughly the way that: Van Jacobson,
 * Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM,
 * August 1988. describes for TCP. The cwnd size is chopped in half on a
 * retransmit timeout and incremented by 1/cwnd when each rpc reply is
 * received and a full cwnd of rpcs is in progress. (The sent count and cwnd
 * are scaled for integer arith.) Variants of "slow start" were tried and
 * were found to be too much of a performance hit (ave. rtt 3 times larger),
 * I suspect due to the large rtt that nfs rpcs have.
 */
#define	RPC_CWNDSCALE	256
#define	RPC_MAXCWND	(RPC_CWNDSCALE * 32)
static int      rpcclnt_backoff[8] = {2, 4, 8, 16, 32, 64, 128, 256,};

#if 0
#define RPC_ERRSTR_ACCEPTED_SIZE 6
char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] = {
	"",			/* no good message... */
	"remote server hasn't exported program.",
	"remote server can't support version number.",
	"program can't support procedure.",
	"procedure can't decode params.",
	"remote error.  remote side memory allocation failure?"
};

char *rpc_errstr_denied[2] = {
	"remote server doesnt support rpc version 2!",
	"remote server authentication error."
};

#define RPC_ERRSTR_AUTH_SIZE 6
char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] = {
	"",
	"auth error: bad credential (seal broken).",
	"auth error: client must begin new session.",
	"auth error: bad verifier (seal broken).",
	"auth error: verifier expired or replayed.",
	"auth error: rejected for security reasons.",
};
#endif

/*
 * Static data, mostly RPC constants in XDR form
 */
static u_int32_t rpc_reply, rpc_call, rpc_vers;

/*
 * rpc_msgdenied, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
 * rpc_autherr, rpc_auth_kerb;
 */

static u_int32_t rpcclnt_xid = 0;
static u_int32_t rpcclnt_xid_touched = 0;
struct rpcstats rpcstats;
int      rpcclnt_ticks;

SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RD, 0, "RPC Subsystem");

SYSCTL_UINT(_kern_rpc, OID_AUTO,
    retries, CTLFLAG_RD, &rpcstats.rpcretries, 0, "retries");
SYSCTL_UINT(_kern_rpc, OID_AUTO,
    request, CTLFLAG_RD, &rpcstats.rpcrequests, 0, "request");
SYSCTL_UINT(_kern_rpc, OID_AUTO,
    timeouts, CTLFLAG_RD, &rpcstats.rpctimeouts, 0, "timeouts");
SYSCTL_UINT(_kern_rpc, OID_AUTO,
    unexpected, CTLFLAG_RD, &rpcstats.rpcunexpected, 0, "unexpected");
SYSCTL_UINT(_kern_rpc, OID_AUTO,
    invalid, CTLFLAG_RD, &rpcstats.rpcinvalid, 0, "invalid");

#ifdef RPCCLNT_DEBUG
int             rpcdebugon = 0;
SYSCTL_UINT(_kern_rpc, OID_AUTO, debug_on, CTLFLAG_RW, &rpcdebugon, 0, "RPC Debug messages");
#endif

/*
 * Queue head for rpctask's
 */
static 
TAILQ_HEAD(, rpctask) rpctask_q;
#if defined(__FreeBSD__)
struct callout_handle rpcclnt_timer_handle;
#endif

#if defined(__OpenBSD__) 
static int  rpcclnt_send(struct socket *, struct mbuf *, struct mbuf *, struct rpctask *);
static int  rpcclnt_receive(struct rpctask *, struct mbuf **, struct mbuf **, cthread_t *);
#else
static int  rpcclnt_send(struct socket *, struct sockaddr *, struct mbuf *, struct rpctask *);
static int  rpcclnt_receive(struct rpctask *, struct sockaddr **, struct mbuf **, cthread_t *);
#endif

static int  rpcclnt_msg(cthread_t *, const char *, char *);

static int  rpcclnt_reply(struct rpctask *, cthread_t *);
static void rpcclnt_timer(void *);
static int  rpcclnt_sndlock(int *, struct rpctask *);
static void rpcclnt_sndunlock(int *);
static int  rpcclnt_rcvlock(struct rpctask *);
static void rpcclnt_rcvunlock(int *);
static void rpcclnt_realign(struct mbuf **, int);
static int  rpc_adv(struct mbuf **, caddr_t *, int, int);
static int  rpcm_disct(struct mbuf **, caddr_t *, int, int, caddr_t *);
static void rpcclnt_softterm(struct rpctask * task);
static int  rpcclnt_sigintr(struct rpcclnt *, struct rpctask *, cthread_t *);

static u_int32_t     rpcclnt_proct(struct rpcclnt *, u_int32_t);
static struct mbuf  *rpcclnt_buildheader(struct rpcclnt *, int,
                         struct mbuf *, int *, struct mbuf **, struct ucred *);

static int rpcclnt_request_redirect(struct rpcclnt *rpc, struct mbuf *mrest,
               int procnum, cthread_t *caller_td, struct ucred *cred,
               struct rpc_reply *reply);

static int _uiotombuf(struct uio *, struct mbuf **, int, cthread_t *);

#ifdef ULTRADEBUG
static void print_hexl(const u_char *, int, int);
#endif

#ifdef __APPLE__
static void rpcclnt_timer_funnel(void *);
#endif

void
rpcclnt_init(void)
{
#if defined(__OpenBSD__) 
	static struct timeout rpcclnt_timer_to;
#endif

	rpcclnt_ticks = (hz * RPC_TICKINTVL + 500) / 1000;
	if (rpcclnt_ticks < 1)
		rpcclnt_ticks = 1;
	rpcstats.rpcretries = 0;
	rpcstats.rpcrequests = 0;
	rpcstats.rpctimeouts = 0;
	rpcstats.rpcunexpected = 0;
	rpcstats.rpcinvalid = 0;

	/*
	 * rpc constants how about actually using more than one of these!
	 */

	rpc_reply = txdr_unsigned(RPC_REPLY);
	rpc_vers = txdr_unsigned(RPC_VER2);
	rpc_call = txdr_unsigned(RPC_CALL);
#if 0
	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
	rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
#endif

	/* initialize rpctask queue */
	TAILQ_INIT(&rpctask_q);

#if defined(__OpenBSD__)
	/* initialize pools */
	pool_init(&rpctask_pool, sizeof(struct rpctask), 0, 0, RPCTASKPOOL_LWM,
	    "rpctask_p", NULL);
	pool_setlowat(&rpctask_pool, RPCTASKPOOL_LWM);
	pool_sethiwat(&rpctask_pool, RPCTASKPOOL_HWM);

	pool_init(&rpcclnt_pool, sizeof(struct rpcclnt), 0, 0, 1, "rpcclnt_p", NULL);
#endif

#if defined(__OpenBSD__) 
	/* initialize timers */
	timeout_set(&rpcclnt_timer_to, rpcclnt_timer, &rpcclnt_timer_to);
	rpcclnt_timer(&rpcclnt_timer_to);
#elif defined(__APPLE__)
	rpcclnt_timer_funnel(NULL);
#else
	rpcclnt_timer(NULL);
#endif

 	rpcdev_init();

	RPCDEBUG("rpc initialed");
	return;
}

void
rpcclnt_uninit(void)
{
  	RPCDEBUG("uninit");
#if defined(__FreeBSD__)
	untimeout(rpcclnt_timer, (void *)NULL, rpcclnt_timer_handle);
#elif defined(__APPLE__)
	untimeout(rpcclnt_timer_funnel, (void *)NULL);
#endif
	/* XXX delete sysctl variables */

	rpcdev_uninit();
}

/*
 * Initialize sockets and congestion for a new RPC connection. We do not free
 * the sockaddr if error.
 */
int
rpcclnt_connect(struct rpcclnt *rpc, cthread_t *td)
{
	struct socket  *so;
	int             s, error, rcvreserve, sndreserve;
	struct sockaddr *saddr;

#if defined(__FreeBSD__) 
	GIANT_REQUIRED;
#elif defined(__APPLE__)
	thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
#endif

	/*
	 * XXXREDIRECT: perhaps we should actually ping the
	 * destination here.
	 */
	if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT))
		return (0);

	/* create the socket */
	rpc->rc_so = NULL;
	saddr = rpc->rc_name;

	error = SOCREATE(saddr->sa_family, &rpc->rc_so,
	    rpc->rc_sotype, rpc->rc_soproto, td);

	if (error != 0) {
		RPCDEBUG("error %d in socreate()", error);
		RPC_RETURN(error);
	}

	so = rpc->rc_so;
	rpc->rc_soflags = so->so_proto->pr_flags;

	/*
	 * Some servers require that the client port be a reserved port
	 * number. We always allocate a reserved port, as this prevents
	 * filehandle disclosure through UDP port capture.
	 */
	if (saddr->sa_family == AF_INET) {
		SOSETOPT(error, so, IPPROTO_IP, IP_PORTRANGE, int, IP_PORTRANGE_LOW);
		if (error)
			goto bad;

		SOBIND(error, so, AF_INET, INADDR_ANY, 0, td);
		if (error)
			goto bad;

		SOSETOPT(error, so, IPPROTO_IP, IP_PORTRANGE, int, IP_PORTRANGE_DEFAULT);
		if (error)
			goto bad;
	}

	/*
	 * Protocols that do not require connections may be optionally left
	 * unconnected for servers that reply from a port other than
	 * NFS_PORT.
	 */
	if (ISSET(rpc->rc_flag, RPCCLNT_NOCONN)) {
		if (ISSET(rpc->rc_soflags, PR_CONNREQUIRED)) {
			error = ENOTCONN;
			goto bad;
		}
	} else {
		error = SOCONNECT(so, saddr, td);

		if (error) {
		  	RPCDEBUG("soconnect returns %d", error);
			goto bad;
		}

		/*
		 * Wait for the connection to complete. Cribbed from the
		 * connect system call but with the wait timing out so that
		 * interruptible mounts don't hang here for a long time.
		 */
		s = SPLNET();

		while (ISSET(so->so_state, SS_ISCONNECTING) && so->so_error == 0) {
			(void)tsleep((caddr_t) & so->so_timeo, PSOCK,
				     "rpc", 2 * hz);

			/*
			 * XXX needs to catch interrupt signals. something
			 * like this: if ((so->so_state & SS_ISCONNECTING) &&
			 * so->so_error == 0 && rep && (error =
			 * nfs_sigintr(nmp, rep, rep->r_td)) != 0) {
			 * so->so_state &= ~SS_ISCONNECTING; splx(s); goto
			 * bad; }
			 */
		}
		if (so->so_error) {
			error = so->so_error;
			so->so_error = 0;
			splx(s);
			goto bad;
		}
		splx(s);
	}
	if (rpc->rc_flag & (RPCCLNT_SOFT | RPCCLNT_INT)) {
		so->so_rcv.sb_timeo = (5 * hz);
		so->so_snd.sb_timeo = (5 * hz);
	} else {
		so->so_rcv.sb_timeo = 0;
		so->so_snd.sb_timeo = 0;
	}

	if (rpc->rc_sotype == SOCK_DGRAM) {
		sndreserve = rpc->rc_wsize + RPC_MAXPKTHDR;
		rcvreserve = rpc->rc_rsize + RPC_MAXPKTHDR;
	} else if (rpc->rc_sotype == SOCK_SEQPACKET) {
		sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR) * 2;
		rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR) * 2;
	} else {
		if (rpc->rc_sotype != SOCK_STREAM)
			panic("rpcclnt_connect() bad sotype");
		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
			SOSETOPT(error, so, SOL_SOCKET, SO_KEEPALIVE, int, 1);
		}
		if (so->so_proto->pr_protocol == IPPROTO_TCP) {
			SOSETOPT(error, so, IPPROTO_TCP, TCP_NODELAY, int, 1);
			if (error)
				goto bad;
		}
		sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR +
			      sizeof(u_int32_t)) * 2;
		rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR +
			      sizeof(u_int32_t)) * 2;
	}
	error = soreserve(so, sndreserve, rcvreserve);

	if (error)
		goto bad;

	so->so_rcv.sb_flags |= SB_NOINTR;
	so->so_snd.sb_flags |= SB_NOINTR;

#ifdef __APPLE__
	thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
#endif

	/* Initialize other non-zero congestion variables */
	rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] = rpc->rc_srtt[3] =
		rpc->rc_srtt[4] = (RPC_TIMEO << 3);
	rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] =
		rpc->rc_sdrtt[3] = rpc->rc_sdrtt[4] = 0;
	rpc->rc_cwnd = RPC_MAXCWND / 2;	/* Initial send window */
	rpc->rc_sent = 0;
	rpc->rc_timeouts = 0;

	/* XXX - pass in pseudflavor from mount options. */
	error = rpcauth_create(rpc, RPCAUTH_PFLAVOR_KERB5);
	if (error != 0)
		goto bad;

	RPC_RETURN(0);

bad:
#ifdef __APPLE__
	thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
#endif
	rpcclnt_disconnect(rpc);
	RPC_RETURN(error);
}

/*
 * Reconnect routine: Called when a connection is broken on a reliable
 * protocol. - clean up the old socket - nfs_connect() again - set
 * TASK_MUSTRESEND for all outstanding requests on mount point If this
 * fails the mount point is DEAD! nb: Must be called with the
 * nfs_sndlock() set on the mount point.
 */
int
rpcclnt_reconnect(struct rpctask *rep, cthread_t *td)
{
	struct rpctask *rp;
	struct rpcclnt *rpc = rep->r_rpcclnt;
	int             error;

	if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT))
		panic("rpcclnt_reconnect() called while in redirecting requests");

	rpcclnt_disconnect(rpc);
	while ((error = rpcclnt_connect(rpc, td)) != 0) {
		if (error == EINTR || error == ERESTART)
			RPC_RETURN(EINTR);
		tsleep(&lbolt, PSOCK, "rpccon", 0);
	}

	/*
	 * Loop through outstanding request list and fix up all
	 * requests on old socket.
	 */
	for (rp = TAILQ_FIRST(&rpctask_q); rp != NULL;
	     rp = TAILQ_NEXT(rp, r_chain))
		if (rp->r_rpcclnt == rpc)
			SET(rp->r_flags, TASK_MUSTRESEND);

	RPC_RETURN(0);
}

void
rpcclnt_disconnect(struct rpcclnt *rpc)
{
	struct socket  *so;

	if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT))
		return;

#if defined(__FreeBSD__)
	GIANT_REQUIRED;		/* XXX until socket locking done */
#elif defined(__APPLE__)
	thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);	
#endif

	if (rpc->rc_so != NULL) {
		so = rpc->rc_so;
		rpc->rc_so = NULL;
		soshutdown(so, 2);
		soclose(so);
	}
#ifdef __APPLE__
	thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
#endif
}

void
rpcclnt_safedisconnect(struct rpcclnt *rpc)
{
	struct rpctask  dummytask;

	if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT))
		return;

	bzero(&dummytask, sizeof(dummytask));
	dummytask.r_rpcclnt = rpc;
	rpcclnt_rcvlock(&dummytask);
	rpcclnt_disconnect(rpc);
	rpcclnt_rcvunlock(&rpc->rc_flag);
}

/*
 * This is the nfs send routine. For connection based socket types, it must
 * be called with an nfs_sndlock() on the socket. "rep == NULL" indicates
 * that it has been called from a server. For the client side: - return EINTR
 * if the RPC is terminated, 0 otherwise - set TASK_MUSTRESEND if the send fails
 * for any reason - do any cleanup required by recoverable socket errors
 * (???) For the server side: - return EINTR or ERESTART if interrupted by a
 * signal - return EPIPE if a connection is lost for connection based sockets
 * (TCP...) - do any cleanup required by recoverable socket errors (???)
 */
static int
rpcclnt_send(struct socket *so,
#if defined(__OpenBSD__) 
    struct mbuf    *nam,
#else
    struct sockaddr *nam,
#endif
    struct mbuf *top, struct rpctask *rep)
{
#if defined(__OpenBSD__) 
	struct mbuf    *sendnam;
#elif defined(__APPLE__)
	struct sockaddr *sendnam;
#else
	struct sockaddr *sendnam;
	struct thread  *td = curthread;
#endif
	int error, soflags, flags;

#if defined(__FreeBSD__)
	GIANT_REQUIRED;		/* XXX until socket locking done */
#endif

	if (rep != NULL) {
		if (ISSET(rep->r_flags, TASK_SOFTTERM)) {
			m_freem(top);
			RPC_RETURN(EINTR);
		}
		if ((so = rep->r_rpcclnt->rc_so) == NULL) {
			SET(rep->r_flags, TASK_MUSTRESEND);
			m_freem(top);
			RPC_RETURN(0);
		}
		CLR(rep->r_flags, TASK_MUSTRESEND);
		soflags = rep->r_rpcclnt->rc_soflags;
	} else
		soflags = so->so_proto->pr_flags;

	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
		sendnam = NULL;
	else
		sendnam = nam;

	if (so->so_type == SOCK_SEQPACKET)
		flags = MSG_EOR;
	else
		flags = 0;

#ifdef __APPLE__
	thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);	
#endif
	error = SOSEND(so, sendnam, NULL, top, NULL, flags, td);
#ifdef __APPLE__
	thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
#endif

	if (error != 0) {
		if (rep != NULL) {
			log(LOG_INFO, "rpc send error %d for service %s\n", error,
			    rep->r_rpcclnt->rc_prog->prog_name);
			/*
			 * Deal with errors for the client side.
			 */
			if (ISSET(rep->r_flags, TASK_SOFTTERM))
				error = EINTR;
			else
				SET(rep->r_flags, TASK_MUSTRESEND);
		} else
			log(LOG_INFO, "rpc service send error %d\n", error);

		/*
		 * Handle any recoverable (soft) socket errors here.
		 */
		if (error != EINTR && error != ERESTART &&
		    error != EWOULDBLOCK && error != EPIPE)
			error = 0;
	}

	RPC_RETURN(error);
}

/*
 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
 * done by soreceive(), but for SOCK_STREAM we must deal with the
 * Record Mark and consolidate the data into a new mbuf list. nb:
 * Sometimes TCP passes the data up to soreceive() in long lists of
 * small mbufs. For SOCK_STREAM we must be very careful to read an
 * entire record once we have read any of it, even if the system call
 * has been interrupted.
 */
static int
rpcclnt_receive(struct rpctask *rep,
#if defined(__OpenBSD__) 
    struct mbuf **aname,
#else
    struct sockaddr **aname,
#endif
    struct mbuf **mp, cthread_t *td)
{
	struct socket  *so;
	struct uio      auio;
	struct iovec    aio;
	struct mbuf    *m;
	struct mbuf    *control;
	u_int32_t       len;
#if defined(__OpenBSD__) 
	struct mbuf   **getnam;
#else
	struct sockaddr **getnam;
#endif
	int error, sotype, rcvflg;

#if defined(__FreeBSD__)
	GIANT_REQUIRED;		/* XXX until socket locking done */
#endif

	/*
	 * Set up arguments for soreceive()
	 */
	*mp = NULL;
	*aname = NULL;
	sotype = rep->r_rpcclnt->rc_sotype;

	/*
	 * For reliable protocols, lock against other senders/receivers in
	 * case a reconnect is necessary. For SOCK_STREAM, first get the
	 * Record Mark to find out how much more there is to get. We must
	 * lock the socket against other receivers until we have an entire
	 * rpc request/reply.
	 */
	if (sotype != SOCK_DGRAM) {
		error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
		if (error != 0)
			RPC_RETURN(error);
tryagain:
		/*
		 * Check for fatal errors and resending request.
		 */
		/*
		 * Ugh: If a reconnect attempt just happened, rc_so would
		 * have changed. NULL indicates a failed attempt that has
		 * essentially shut down this mount point.
		 */
		if (rep->r_mrep || ISSET(rep->r_flags, TASK_SOFTTERM)) {
			rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
			RPC_RETURN(EINTR);
		}
		so = rep->r_rpcclnt->rc_so;
		if (so == NULL) {
			error = rpcclnt_reconnect(rep, td);
			if (error) {
				rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
				RPC_RETURN(error);
			}
			goto tryagain;
		}
		while (ISSET(rep->r_flags, TASK_MUSTRESEND)) {
			m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT);
			rpcstats.rpcretries++;
			error = rpcclnt_send(so,
			    rep->r_rpcclnt->rc_name, m, rep);
			if (error) {
				if (error == EINTR || error == ERESTART ||
				    (error = rpcclnt_reconnect(rep, td)) != 0) {
					rpcclnt_sndunlock(
						&rep->r_rpcclnt->rc_flag);
					RPC_RETURN(error);
				}
				goto tryagain;
			}
		}

		rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
		if (sotype == SOCK_STREAM) {
			aio.iov_base = (caddr_t) & len;
			aio.iov_len = sizeof(u_int32_t);
			auio.uio_iov = &aio;
			auio.uio_iovcnt = 1;
			auio.uio_segflg = UIO_SYSSPACE;
			auio.uio_rw = UIO_READ;
			auio.uio_offset = 0;
			auio.uio_resid = sizeof(u_int32_t);
#if defined(__OpenBSD__) || defined(__APPLE__)
			auio.uio_procp = td;
#else
			auio.uio_td = td;
#endif
			do {
				rcvflg = MSG_WAITALL;
#ifdef __APPLE__
				thread_funnel_switch(KERNEL_FUNNEL,
				    NETWORK_FUNNEL);
#endif				
				error = soreceive(so, NULL, &auio,
				    NULL, NULL, &rcvflg);
#ifdef __APPLE__
				thread_funnel_switch(NETWORK_FUNNEL,
				    KERNEL_FUNNEL);
#endif
				if (error == EWOULDBLOCK && rep && 
				    ISSET(rep->r_flags, TASK_SOFTTERM))
					RPC_RETURN(EINTR);
			} while (error == EWOULDBLOCK);

			if (error == 0 && auio.uio_resid > 0) {
				log(LOG_INFO,
				"short receive (%zu/%zu) from rpc server %s\n",
				    sizeof(u_int32_t) - auio.uio_resid,
				    sizeof(u_int32_t),
				    rep->r_rpcclnt->rc_prog->prog_name);
				error = EPIPE;
			}

			if (error != 0)
				goto errout;

			len = ntohl(len) & ~0x80000000;
			/*
			 * This is SERIOUS! We are out of sync with the
			 * sender and forcing a disconnect/reconnect is all I
			 * can do.
			 */
			if (len > RPC_MAXPACKET) {
				log(LOG_ERR, "%s (%d) from rpc server %s\n",
				    "impossible packet length",
				    len,
				    rep->r_rpcclnt->rc_prog->prog_name);
				error = EFBIG;
				goto errout;
			}
			auio.uio_resid = len;
#ifdef __APPLE__
			thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
#endif
			do {
				rcvflg = MSG_WAITALL;
				error = soreceive(so, NULL, &auio,
				    mp, NULL, &rcvflg);
			} while (error == EWOULDBLOCK || error == EINTR ||
				 error == ERESTART);
#ifdef __APPLE__
			thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
#endif			

			if (error == 0 && auio.uio_resid > 0) {
				log(LOG_INFO,
				"short receive (%d/%d) from rpc server %s\n",
				    len - auio.uio_resid, len,
				    rep->r_rpcclnt->rc_prog->prog_name);
				error = EPIPE;
			}
		} else {
			/*
			 * NB: Since uio_resid is big, MSG_WAITALL is ignored
			 * and soreceive() will return when it has either a
			 * control msg or a data msg. We have no use for
			 * control msg., but must grab them and then throw
			 * them away so we know what is going on.
			 */
			auio.uio_resid = len = 100000000;	/* Anything Big */
#if defined(__OpenBSD__) || defined(__APPLE__)
			auio.uio_procp = td;
#else
			auio.uio_td = td;
#endif
#ifdef __APPLE__
			thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
#endif
			do {
				rcvflg = 0;
				error = soreceive(so, NULL,
				    &auio, mp, &control, &rcvflg);
				if (control)
					m_freem(control);
				if (error == EWOULDBLOCK && rep) {
					if (ISSET(rep->r_flags, TASK_SOFTTERM)) {
#ifdef __APPLE__
						thread_funnel_switch(
							NETWORK_FUNNEL,
							    KERNEL_FUNNEL);
#endif
						RPC_RETURN(EINTR);
					}
				}
			} while (error == EWOULDBLOCK ||
				 (!error && *mp == NULL && control));
#ifdef __APPLE__
			thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
#endif
			if ((rcvflg & MSG_EOR) == 0)
				printf("Egad!!\n");
			if (error == 0 && *mp == NULL)
				error = EPIPE;
			len -= auio.uio_resid;
		}
errout:
		if (error != 0 && error != EINTR && error != ERESTART) {
			m_freem(*mp);
			*mp = (struct mbuf *)NULL;
			if (error != EPIPE)
				log(LOG_INFO,
				    "receive error %d from rpc server %s\n",
				    error,
				    rep->r_rpcclnt->rc_prog->prog_name);
			error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
			if (error == 0)
				error = rpcclnt_reconnect(rep, td);
			if (error == 0)
				goto tryagain;
		}
	} else {
		if ((so = rep->r_rpcclnt->rc_so) == NULL)
			RPC_RETURN(EACCES);
		if (so->so_state & SS_ISCONNECTED)
			getnam = NULL;
		else
			getnam = aname;
		auio.uio_resid = len = 1000000;
#if defined(__OpenBSD__) || defined(__APPLE__)
		auio.uio_procp = td;
#else
		auio.uio_td = td;
#endif

#ifdef __APPLE__
		thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
#endif
		do {
			rcvflg = 0;
			error = soreceive(so, getnam, &auio, mp, NULL, &rcvflg);
			RPCDEBUG("soreceivce returns %d", error);
			if (error == EWOULDBLOCK &&
			    ISSET(rep->r_flags, TASK_SOFTTERM)) {
				RPCDEBUG("wouldblock && softerm -> EINTR");
#ifdef __APPLE__
				thread_funnel_switch(NETWORK_FUNNEL,
				    KERNEL_FUNNEL);
#endif
				RPC_RETURN(EINTR);
			}
		} while (error == EWOULDBLOCK);
#ifdef __APPLE__
		thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
#endif
		len -= auio.uio_resid;
	}

	if (error != 0) {
		m_freem(*mp);
		*mp = NULL;
	} 

	/*
	 * Search for any mbufs that are not a multiple of 4 bytes
	 * long or with m_data not longword aligned. These could
	 * cause pointer alignment problems, so copy them to well
	 * aligned mbufs.
	 */
	rpcclnt_realign(mp, 5 * RPCX_UNSIGNED);
	RPC_RETURN(error);
}


/*
 * Implement receipt of reply on a socket. We must search through the list of
 * received datagrams matching them with outstanding requests using the xid,
 * until ours is found.
 */
/* ARGSUSED */
static int
rpcclnt_reply(struct rpctask *myrep, cthread_t *td)
{
	struct rpctask *rep;
	struct rpcclnt *rpc = myrep->r_rpcclnt;
	int32_t         t1;
	struct mbuf    *mrep, *md;
#if defined(__OpenBSD__) 
	struct mbuf    *nam;
#else
	struct sockaddr *nam;
#endif
	u_int32_t       rxid, *tl;
	caddr_t         dpos, cp2;
	int             error;

	/*
	 * Loop around until we get our own reply
	 */
	for (;;) {
		/*
		 * Lock against other receivers so that I don't get stuck in
		 * sbwait() after someone else has received my reply for me.
		 * Also necessary for connection based protocols to avoid
		 * race conditions during a reconnect.
		 */
		error = rpcclnt_rcvlock(myrep);
		if (error)
			RPC_RETURN(error);
		/* Already received, bye bye */
		if (myrep->r_mrep != NULL) {
			rpcclnt_rcvunlock(&rpc->rc_flag);
			RPC_RETURN(0);
		}
		/*
		 * Get the next Rpc reply off the socket
		 */
		error = rpcclnt_receive(myrep, &nam, &mrep, td);

		rpcclnt_rcvunlock(&rpc->rc_flag);

		if (error != 0) {
			/*
			 * Ignore routing errors on connectionless
			 * protocols??
			 */
			if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) {
				rpc->rc_so->so_error = 0;
				if (myrep->r_flags & TASK_GETONEREP)
					RPC_RETURN(0);
				RPCDEBUG("ingoring routing error on connectionless protocol.");
				continue;
			}
			RPC_RETURN(error);
		}
#if defined(__OpenBSD__)
		if (nam != NULL)
			m_freem(nam);
#else
		if (nam != NULL)
			FREE(nam, M_SONAME);
#endif

		/*
		 * Get the xid and check that it is an rpc reply
		 */
		md = mrep;
		dpos = mtod(md, caddr_t);
		rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
		rxid = *tl++;
		if (*tl != rpc_reply) {
			rpcstats.rpcinvalid++;
			m_freem(mrep);
	rpcmout:
			if (myrep->r_flags & TASK_GETONEREP)
				RPC_RETURN(0);
			continue;
		}
		/*
		 * Loop through the request list to match up the reply Iff no
		 * match, just drop the datagram
		 */
		TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
				/* Found it.. */
				rep->r_mrep = mrep;
				rep->r_md = md;
				rep->r_dpos = dpos;

				/*
				 * Update congestion window. Do the additive
				 * increase of one rpc/rtt.
				 */
				if (rpc->rc_cwnd <= rpc->rc_sent) {
					rpc->rc_cwnd +=
					    (RPC_CWNDSCALE*RPC_CWNDSCALE +
						(rpc->rc_cwnd >> 1))/rpc->rc_cwnd;
					if (rpc->rc_cwnd > RPC_MAXCWND)
						rpc->rc_cwnd = RPC_MAXCWND;
				}

				CLR(rep->r_flags, TASK_SENT);
				rpc->rc_sent -= RPC_CWNDSCALE;

				/*
				 * Update rtt using a gain of 0.125 on the
				 * mean and a gain of 0.25 on the deviation.
				 */
				if (ISSET(rep->r_flags, TASK_TIMING)) {
					/*
					 * Since the timer resolution of
					 * NFS_HZ is so course, it can often
					 * result in r_rtt == 0. Since r_rtt
					 * == N means that the actual rtt is
					 * between N+dt and N+2-dt ticks, add
					 * 1.
					 */
					t1 = rep->r_rtt + 1;
					t1 -= (RPC_SRTT(rpc, rep) >> 3);
					RPC_SRTT(rpc, rep) += t1;
					if (t1 < 0)
						t1 = -t1;
					t1 -= (RPC_SDRTT(rpc, rep) >> 2);
					RPC_SDRTT(rpc, rep) += t1;
				}
				rpc->rc_timeouts = 0;
				break;
			}
		}
		/*
		 * If not matched to a request, drop it. If it's mine, get
		 * out.
		 */
		if (rep == 0) {
			rpcstats.rpcunexpected++;
			RPCDEBUG("rpc reply not matched\n");
			m_freem(mrep);
		} else if (rep == myrep) {
			if (rep->r_mrep == NULL)
				panic("rpcreply nil");
			RPC_RETURN(0);
		}
		if (myrep->r_flags & TASK_GETONEREP)
			RPC_RETURN(0);
	}
}

/* XXX: ignores tryagain! */
/*
 * code from nfs_request - goes something like this - fill in task struct -
 * links task into list - calls nfs_send() for first transmit - calls
 * nfs_receive() to get reply - fills in reply (which should be initialized
 * prior to calling), which is valid when 0 is returned and is NEVER freed in
 * this function
 * 
 * always frees the request header, but NEVER frees 'mrest'
 * 
 */
/*
 * note that reply->result_* are invalid unless reply->type ==
 * RPC_MSGACCEPTED and reply->status == RPC_SUCCESS and that reply->verf_*
 * are invalid unless reply->type == RPC_MSGACCEPTED
 */
int
rpcclnt_request(struct rpcclnt *rpc, struct mbuf *mrest, int procnum,
    cthread_t *td, struct ucred *cred, struct rpc_reply *reply)
{
	struct mbuf    *m, *mrep;
	struct rpctask *task, _task;
	u_int32_t      *tl;
	struct mbuf    *md, *mheadend;
	caddr_t         dpos, cp2;
	int             t1, s, error = 0, mrest_len;
	u_int32_t       xid;

	task = &_task;
	memset(task, 0, sizeof(*task));

	if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT))
		return (rpcclnt_request_redirect(rpc, mrest,
			    procnum, td, cred, reply));

	task->r_rpcclnt = rpc;
	task->r_procnum = procnum;
	task->r_td = td;

	M_LENGTH(mrest_len, mrest);

	m = rpcclnt_buildheader(rpc, procnum, mrest,
	    &xid, &mheadend, cred);

	/*
	 * For stream protocols, insert a Sun RPC Record Mark.
	 */
	if (rpc->rc_sotype == SOCK_STREAM) {
		M_PREPEND(m, RPCX_UNSIGNED, M_TRYWAIT);
		*mtod(m, u_int32_t *) = htonl(0x80000000 |
		    (m->m_pkthdr.len - RPCX_UNSIGNED));
	}
	task->r_mreq = m;
	task->r_xid = xid;

	if (ISSET(rpc->rc_flag, RPCCLNT_SOFT))
		task->r_retry = rpc->rc_retry;
	else
		task->r_retry = RPC_MAXREXMIT + 1;	/* past clip limit */
	task->r_rtt = task->r_rexmit = 0;

	if (rpcclnt_proct(rpc, procnum) > 0)
		task->r_flags = TASK_TIMING;
	else
		task->r_flags = 0;
	task->r_mrep = NULL;

	/*
	 * Do the client side RPC.
	 */
	rpcstats.rpcrequests++;

	/*
	 * Chain request into list of outstanding requests. Be sure to put it
	 * LAST so timer finds oldest requests first.
	 */
	s = splsoftclock();
	TAILQ_INSERT_TAIL(&rpctask_q, task, r_chain);

	/*
	 * If backing off another request or avoiding congestion, don't send
	 * this one now but let timer do it. If not timing a request, do it
	 * now.
	 */
	if (rpc->rc_so && (rpc->rc_sotype != SOCK_DGRAM ||
		ISSET(rpc->rc_flag, RPCCLNT_DUMBTIMR) ||
		rpc->rc_sent < rpc->rc_cwnd)) {
		splx(s);

		if (rpc->rc_soflags & PR_CONNREQUIRED)
			error = rpcclnt_sndlock(&rpc->rc_flag, task);
		if (error == 0) {
			error = rpcclnt_send(rpc->rc_so, rpc->rc_name,
			    m_copym(m, 0, M_COPYALL, M_TRYWAIT), task);
			if (rpc->rc_soflags & PR_CONNREQUIRED)
				rpcclnt_sndunlock(&rpc->rc_flag);
		}
		if (error == 0 && ISSET(task->r_flags, TASK_MUSTRESEND) == 0) {
			rpc->rc_sent += RPC_CWNDSCALE;
			SET(task->r_flags, TASK_SENT);
		}
	} else {
		splx(s);
		task->r_rtt = -1;
	}

	/*
	 * Wait for the reply from our send or the timer's.
	 */
	if (error == 0 || error == EPIPE)
		error = rpcclnt_reply(task, td);

	/*
	 * RPC done, unlink the request.
	 */
	s = splsoftclock();
	TAILQ_REMOVE(&rpctask_q, task, r_chain);
	splx(s);

	/*
	 * Decrement the outstanding request count.
	 */
	if (ISSET(task->r_flags, TASK_SENT)) {
		CLR(task->r_flags, TASK_SENT);	/* paranoia */
		rpc->rc_sent -= RPC_CWNDSCALE;
	}
	/*
	 * If there was a successful reply and a tprintf msg. tprintf a
	 * response.
	 */
	if (error == 0 && (ISSET(task->r_flags, TASK_TPRINTFMSG)))
		rpcclnt_msg(task->r_td,
		    rpc->rc_prog->prog_name, "is alive again");

	/* free request header (leaving mrest) */
	mheadend->m_next = NULL;
	m_freem(task->r_mreq);

	/* initialize reply */
	reply->mrep = task->r_mrep;
	reply->verf_md = NULL;
	reply->result_md = NULL;

	mrep = task->r_mrep;
	md = task->r_md;
	dpos = task->r_dpos;

	if (error != 0)
		goto rpcmout;

	/*
	 * break down the rpc header and check if ok
	 */

	rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
	reply->stat.type = fxdr_unsigned(u_int32_t, *tl);

	if (reply->stat.type == RPC_MSGDENIED) {
		rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
		reply->stat.status = fxdr_unsigned(u_int32_t, *tl);

		switch (reply->stat.status) {
		case RPC_MISMATCH:
			rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
			reply->stat.mismatch_info.low =
			    fxdr_unsigned(u_int32_t, *tl++);
			reply->stat.mismatch_info.high =
			    fxdr_unsigned(u_int32_t, *tl);
			error = EOPNOTSUPP;
			break;
		case RPC_AUTHERR:
			rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
			reply->stat.autherr = fxdr_unsigned(u_int32_t, *tl);
			error = EACCES;
			break;
		default:
			error = EBADRPC;
			break;
		}
		goto rpcmout;
	} else if (reply->stat.type != RPC_MSGACCEPTED) {
		error = EBADRPC;
		goto rpcmout;
	}

	rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);

	/* Verifier */
	reply->verf_md = md;
	reply->verf_dpos = dpos;

	reply->verf_type = fxdr_unsigned(u_int32_t, *tl++);
	reply->verf_size = fxdr_unsigned(u_int32_t, *tl);

	if (reply->verf_size != 0)
		rpcm_adv(rpcm_rndup(reply->verf_size));

	rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
	reply->stat.status = fxdr_unsigned(u_int32_t, *tl);

	if (reply->stat.status == RPC_SUCCESS) {
		if ((uint32_t)(dpos - mtod(md, caddr_t)) >= md->m_len) {
			RPCDEBUG("where is the next mbuf?");
			RPCDEBUG("%d -> %d",
			    (int)(dpos - mtod(md, caddr_t)), md->m_len);
			if (md->m_next == NULL) {
				error = EBADRPC;
				goto rpcmout;
			} else {
				reply->result_md = md->m_next;
				reply->result_dpos =
				    mtod(reply->result_md, caddr_t);
			}
		} else {
			reply->result_md = md;
			reply->result_dpos = dpos;
		}
	} else if (reply->stat.status == RPC_PROGMISMATCH) {
		rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
		reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
		reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
		error = EBADRPC; /* XXXMARIUS */
	} else if (reply->stat.status > 5) {
		error = EBADRPC;
		goto rpcmout;
	}

	/*
	 * XXX - are there any situations where result_md == null is
	 * fine?  Also, should we rpcauth_vrfy() any time when error
	 * != 0?
	 */
	if (reply->result_md == NULL)
		error = EBADRPC;
	else if (error == 0)
		error = rpcauth_vrfy_rep(rpc, cred, reply);

	/*
	 * Outgoing:
	 *     .result_md contains the start of the body;
	 */

rpcmout:
	RPC_RETURN(error);
}

static int
rpcclnt_request_redirect(struct rpcclnt *rpc, struct mbuf *mrest, int procnum,
    cthread_t *cur_td, struct ucred *cred, struct rpc_reply *reply)
{
	int mrest_len, error, prot = PROT_READ|PROT_WRITE, havehandlertd;
	struct vmspace *vms;
	cthread_t *handler_td;
	vm_offset_t va;
        vm_size_t size;
	vm_map_t map;
	struct mbuf *m = mrest;
	struct uio uio;
	struct iovec iov;
	struct rpcclnt_redirect_req req;
	struct rpcclnt_redirect_rep rep;
	size_t siz, left, resid;

	handler_td = rpcdev_gethandlertd(RPCDEV_TYPE_RPCD);
	if (handler_td == NULL) {
		printf("no rpcd running\n"); /* XXX: this message could
					      * easily flood a system */
		return (ESRCH);
	}
	havehandlertd = 1;

	M_LENGTH(mrest_len, mrest);
        size = (vm_size_t) round_page(mrest_len);

	vms = cthread_proc(handler_td)->p_vmspace;

	/* Find an address. */
	PROC_LOCK(cthread_proc(handler_td));
	va = round_page((vm_offset_t)vms->vm_daddr +
	    lim_max(cthread_proc(handler_td), RLIMIT_DATA));
	PROC_UNLOCK(cthread_proc(handler_td));

	map = &vms->vm_map;
	error = vm_mmap(map, &va, size,	/* size -> vasize */
	    prot, prot, MAP_ANON, NULL, 0);
	if (error != 0)
		goto out0;

	left = mrest_len;

	/*
	 * Copy the mbuf into the userspace vm.
	 *
	 * XXXMARIUS: respect ZERO_COPY_SOCKETS here.
	 * 
	 * Wow, this is ugly -- i love it! 
	 */
	do { 
		iov.iov_base = mtod(m, void *);
		iov.iov_len = min(left, m->m_len);
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = va + (mrest_len - left);
		uio.uio_resid = resid = iov.iov_len;
		uio.uio_rw = UIO_WRITE;
		uio.uio_segflg = UIO_SYSSPACE;
		uio.uio_td = cur_td;

#ifdef ULTRADEBUG
		printf("Upcall:", left);
		print_hexl(mtod(m, void *), min(left, m->m_len), 0);
#endif
                error = proc_rwmem(cthread_proc(handler_td), &uio);

		left -= resid - uio.uio_resid;
		m = m->m_next;
	} while (left > 0 && error == 0 && m != NULL);

	if (error != 0) {
		printf("proc_rwmem() failed (%d)\n", error);
		goto out1;
	}

	req.req_uid = cred->cr_uid;
	req.req_procnum = procnum;
	req.req_bp = (caddr_t)va;
	req.req_len = mrest_len;
	strlcpy(req.req_host, rpc->rc_servername, sizeof(req.req_host));

	siz = sizeof(rep);

	rpcdev_puthandlertd(RPCDEV_TYPE_RPCD);
	havehandlertd = 0;
	error = rpcdev_call(RPCDEV_TYPE_RPCD, (caddr_t)&req,
	    sizeof(req), (caddr_t)&rep, &siz, 1);

	if (error != 0) {
		printf("rpcdev_call() failed (%d)\n", error);
		goto out1;
	}

	handler_td = rpcdev_gethandlertd(RPCDEV_TYPE_RPCD);
	if (handler_td == NULL) {
		printf("no rpcd running\n"); /* XXX: this message could
					      * easily flood a system */
		error = ESRCH;
		goto out1;
	}
	havehandlertd = 1;

	if (siz != sizeof(rep)) {
		RPCDEBUG("Bad reply size from rpcd!\n");
		error = EINVAL;
		goto out1;
	}

	/*
	 * Now, we have a buffer, and we must copy it into a new mbuf
	 * chain
	 */

	MGET(m, M_TRYWAIT, MT_DATA);
	if (rep.rep_len >= MINCLSIZE)
		MCLGET(m, M_TRYWAIT);
	m->m_len = 0;

	iov.iov_base = rep.rep_bp;
	iov.iov_len = rep.rep_len;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = 0;
	uio.uio_resid = rep.rep_len;
	uio.uio_rw = UIO_READ;
	uio.uio_td = handler_td;
	uio.uio_segflg = UIO_USERSPACE;

	error = _uiotombuf(&uio, &m, rep.rep_len, cur_td);
	if (error != 0) {
		printf("_uiotombuf failed (%d)\n", error);
		goto out1;
	}

	rpcdev_wakeup(RPCDEV_TYPE_RPCD);

#ifdef ULTRADEBUG
	{
		struct mbuf *m1 = m;

		printf("RPC reply (by way of rpcd) is:");
		do {
			print_hexl(mtod(m1, void *), m1->m_len, 0);
			m1 = m1->m_next;
		} while (m1 != NULL);
	}
#endif

	/* Now, we set up the reply struct.  Easy, right? */
	reply->result_md = m;
	reply->result_dpos = mtod(m, caddr_t);
	reply->stat.status = rep.rep_status;

 out1:
	if (havehandlertd) {	/* Next ops may sleep. */
		rpcdev_puthandlertd(RPCDEV_TYPE_RPCD);
		havehandlertd = 0;
	}

	rpcdev_wakeup(RPCDEV_TYPE_RPCD);

	/* now, we destroy it!! destroy!!! */
        vm_map_lock(map);
        vm_map_delete(map, va, va + size);
        vm_map_unlock(map);
 out0:
	if (havehandlertd)
		rpcdev_puthandlertd(RPCDEV_TYPE_RPCD);

	return (error);
}

#ifdef ULTRADEBUG
static void
print_hexl(const u_char *buf, int len, int offset)
{
        u_int i, j, jm;
        int c;
        
        printf("\n");
        for (i = 0; i < len; i += 0x10) {
                printf("  %04x: ", (u_int)(i + offset));
                jm = len - i;
                jm = jm > 16 ? 16 : jm;
                
                for (j = 0; j < jm; j++) {
                        if ((j % 2) == 1)
                                printf("%02x ", (u_int) buf[i+j]);
                        else
                                printf("%02x", (u_int) buf[i+j]);
                }
                for (; j < 16; j++) {
                        if ((j % 2) == 1) printf("   ");
                        else printf("  ");
                }
                printf(" ");
                
                for (j = 0; j < jm; j++) {
                        c = buf[i+j];
                        c = '.';
                        printf("%c", c);
                }
                printf("\n");
        }
}
#endif /* ULTRADEBUG */


/*
 * Nfs timer routine Scan the nfsreq list and retranmit any requests that
 * have timed out To avoid retransmission attempts on STREAM sockets (in the
 * future) make sure to set the r_retry field to 0 (implies nm_retry == 0).
 */
void
rpcclnt_timer(arg)
	void           *arg;
{
#if defined(__OpenBSD__)
	struct timeout *to = (struct timeout *) arg;
#endif
	struct rpctask *rep;
	struct mbuf    *m;
	struct socket  *so;
	struct rpcclnt *rpc;
	int             timeo;
	int             s, error;

	cthread_t * td = curcthread;

	s = SPLNET();

	TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
		rpc = rep->r_rpcclnt;
		if (rep->r_mrep || ISSET(rep->r_flags, TASK_SOFTTERM))
			continue;
		if (rpcclnt_sigintr(rpc, rep, rep->r_td)) {
			SET(rep->r_flags, TASK_SOFTTERM);
			continue;
		}
		if (rep->r_rtt >= 0) {
			rep->r_rtt++;
			if (ISSET(rpc->rc_flag, RPCCLNT_DUMBTIMR))
				timeo = rpc->rc_timeo;
			else
				timeo = RPC_RTO(rpc,
				    rpcclnt_proct(rep->r_rpcclnt,
					rep->r_procnum));
			if (rpc->rc_timeouts > 0)
				timeo *= rpcclnt_backoff[rpc->rc_timeouts - 1];
			if (rep->r_rtt <= timeo)
				continue;
			if (rpc->rc_timeouts < 8)
				rpc->rc_timeouts++;
		}
		/*
		 * Check for server not responding
		 */
		if (ISSET(rep->r_flags, TASK_TPRINTFMSG) == 0 &&
		    rep->r_rexmit > rpc->rc_deadthresh) {
			rpcclnt_msg(rep->r_td, rpc->rc_prog->prog_name,
				    "not responding");
			SET(rep->r_flags, TASK_TPRINTFMSG);
		}
		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
			rpcstats.rpctimeouts++;
			SET(rep->r_flags, TASK_SOFTTERM);
			continue;
		}
		if (rpc->rc_sotype != SOCK_DGRAM) {
			if (++rep->r_rexmit > RPC_MAXREXMIT)
				rep->r_rexmit = RPC_MAXREXMIT;
			continue;
		}
		if ((so = rpc->rc_so) == NULL)
			continue;

		/*
		 * If there is enough space and the window allows.. Resend it
		 * Set r_rtt to -1 in case we fail to send it now.
		 */
		rep->r_rtt = -1;
		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
		    (ISSET(rpc->rc_flag, RPCCLNT_DUMBTIMR) ||
		     ISSET(rep->r_flags, TASK_SENT) ||
		     rpc->rc_sent < rpc->rc_cwnd) &&
		    (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) {

#ifdef __APPLE__
                        thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
#endif
			if (ISSET(rpc->rc_flag, RPCCLNT_NOCONN) == 0)
				error = (*so->so_proto->pr_usrreqs->pru_send)
				    (so, 0, m, NULL, NULL, td);
			else
				error = (*so->so_proto->pr_usrreqs->pru_send)
				    (so, 0, m, rpc->rc_name, NULL, td);
#ifdef __APPLE__
                        thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
#endif

			if (error) {
				if (RPCIGNORE_SOERROR(rpc->rc_soflags, error))
					so->so_error = 0;
			} else {
				/*
				 * Iff first send, start timing else turn
				 * timing off, backoff timer and divide
				 * congestion window by 2.
				 */
				if (ISSET(rep->r_flags, TASK_SENT)) {
					CLR(rep->r_flags, TASK_TIMING);
					if (++rep->r_rexmit > RPC_MAXREXMIT)
						rep->r_rexmit = RPC_MAXREXMIT;
					rpc->rc_cwnd >>= 1;
					if (rpc->rc_cwnd < RPC_CWNDSCALE)
						rpc->rc_cwnd = RPC_CWNDSCALE;
					rpcstats.rpcretries++;
				} else {
					SET(rep->r_flags, TASK_SENT);
					rpc->rc_sent += RPC_CWNDSCALE;
				}
				rep->r_rtt = 0;
			}
		}
	}
	splx(s);

#if defined(__OpenBSD__) 
	timeout_add(rpcclnt_timer, NULL, rpcclnt_ticks);
#elif defined(__APPLE__)
	timeout(rpcclnt_timer_funnel, NULL, rpcclnt_ticks); /* XXXMARIUS: (arg) */
#else
	rpcclnt_timer_handle = timeout(rpcclnt_timer, NULL, rpcclnt_ticks);
#endif
}

#ifdef __APPLE__
static void
rpcclnt_timer_funnel(void *ap)
{
	boolean_t funnelstate;

	funnelstate = thread_funnel_set(kernel_flock, TRUE);
	rpcclnt_timer(ap);
	thread_funnel_set(kernel_flock, funnelstate);
}
#endif

/*
 * Test for a termination condition pending on the process. This is used for
 * RPCCLNT_INT mounts.
 */
int
rpcclnt_sigintr_extern(rpc, td)
	struct rpcclnt *rpc;
	cthread_t      *td;
{
	return rpcclnt_sigintr(rpc, NULL, td);
}

static int
rpcclnt_sigintr(rpc, task, td)
	struct rpcclnt *rpc;
	struct rpctask *task;
	cthread_t      *td;
{
	struct proc    *p;

#if defined(__FreeBSD__)
	sigset_t        tmpset;
#endif

	if (rpc == NULL) 
		return EFAULT;

	if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT)) /* Should signal? */
		return (0);

	/* XXX deal with forced unmounts */

	if (task && ISSET(task->r_flags, TASK_SOFTTERM))
		RPC_RETURN(EINTR);

	if (!ISSET(rpc->rc_flag, RPCCLNT_INT))
		RPC_RETURN(0);

	if (td == NULL)
		return (0);

	p = cthread_proc(td);

#if defined(__OpenBSD__) || defined(__APPLE__)
	if (p && p->p_siglist &&
	    (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
	     RPCINT_SIGMASK))
		RPC_RETURN(EINTR);
#else
	PROC_LOCK(p);
	tmpset = p->p_siglist;
	SIGSETNAND(tmpset, td->td_sigmask);
	mtx_lock(&p->p_sigacts->ps_mtx);
	SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
	mtx_unlock(&p->p_sigacts->ps_mtx);
	if (SIGNOTEMPTY(p->p_siglist) && RPCCLNTINT_SIGMASK(tmpset)) {
		PROC_UNLOCK(p);
		RPC_RETURN(EINTR);
	}
	PROC_UNLOCK(p);
#endif
	RPC_RETURN(0);
}

/*
 * Lock a socket against others. Necessary for STREAM sockets to ensure you
 * get an entire rpc request/reply and also to avoid race conditions between
 * the processes with nfs requests in progress when a reconnect is necessary.
 */
static int
rpcclnt_sndlock(flagp, task)
	int            *flagp;
	struct rpctask *task;
{
	cthread_t * p;
	int             slpflag = 0, slptimeo = 0;

	if (task) {
		p = task->r_td;
		if (ISSET(task->r_rpcclnt->rc_flag, RPCCLNT_INT))
			slpflag = PCATCH;
	} else
		p = NULL;
	while (ISSET(*flagp, RPCCLNT_SNDLOCK)) {
		if (rpcclnt_sigintr(task->r_rpcclnt, task, p))
			RPC_RETURN(EINTR);
		SET(*flagp, RPCCLNT_WANTSND);
		tsleep((caddr_t) flagp, slpflag | (PZERO - 1),
		    "rpcsndlck", slptimeo);
		if (slpflag == PCATCH) {
			slpflag = 0;
			slptimeo = 2 * hz;
		}
	}

	SET(*flagp, RPCCLNT_SNDLOCK);
	RPC_RETURN(0);
}

/*
 * Unlock the stream socket for others.
 */
static void
rpcclnt_sndunlock(flagp)
	int            *flagp;
{

	if (ISSET(*flagp, RPCCLNT_SNDLOCK) == 0)
		panic("rpc sndunlock");
	CLR(*flagp, RPCCLNT_SNDLOCK);
	if (ISSET(*flagp, RPCCLNT_WANTSND)) {
		CLR(*flagp, RPCCLNT_WANTSND);
		wakeup((caddr_t) flagp);
	}
}

static int
rpcclnt_rcvlock(task)
	struct rpctask *task;
{
	int            *flagp = &task->r_rpcclnt->rc_flag;
	int             slpflag, slptimeo = 0;

	if (ISSET(*flagp, RPCCLNT_INT))
		slpflag = PCATCH;
	else
		slpflag = 0;

	while (ISSET(*flagp, RPCCLNT_RCVLOCK)) {
		if (rpcclnt_sigintr(task->r_rpcclnt, task, task->r_td))
			RPC_RETURN(EINTR);
		SET(*flagp, RPCCLNT_WANTRCV);
		tsleep((caddr_t) flagp, slpflag | (PZERO - 1),
		    "rpcrcvlk", slptimeo);
		if (slpflag == PCATCH) {
			slpflag = 0;
			slptimeo = 2 * hz;
		}
	}
	SET(*flagp, RPCCLNT_RCVLOCK);
	RPC_RETURN(0);
}

/*
 * Unlock the stream socket for others.
 */
static void
rpcclnt_rcvunlock(flagp)
	int            *flagp;
{

	if (ISSET(*flagp, RPCCLNT_RCVLOCK) == 0)
		panic("nfs rcvunlock");
	CLR(*flagp, RPCCLNT_RCVLOCK);
	if (ISSET(*flagp, RPCCLNT_WANTRCV)) {
		CLR(*flagp, RPCCLNT_WANTRCV);
		wakeup((caddr_t) flagp);
	}
}

static void
rpcclnt_realign(struct mbuf **pm, int hsiz)
{
	struct mbuf *m;
	struct mbuf *n = NULL;
	int off = 0;

	RPCDEBUG("in rpcclnt_realign()");

	while ((m = *pm) != NULL) {
	    if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
	        MGET(n, M_TRYWAIT, MT_DATA);
	        if (m->m_len >= MINCLSIZE) {
	            MCLGET(n, M_TRYWAIT);
	        }
	        n->m_len = 0;
	        break;
	    }
	    pm = &m->m_next;
	}

	/*
	* If n is non-NULL, loop on m copying data, then replace the
	* portion of the chain that had to be realigned.
	*/
	if (n != NULL) {
	    while (m) {
	        m_copyback(n, off, m->m_len, mtod(m, caddr_t));
	        off += m->m_len;
	        m = m->m_next;
	    }
	    m_freem(*pm);
	    *pm = n;
	}

	RPCDEBUG("leave rpcclnt_realign()");
}

static int
rpcclnt_msg(td, server, msg)
	cthread_t      *td;
	const char     *server;
	char           *msg;
{
#if defined(__OpenBSD__) || defined(__APPLE__)
	struct proc    *p;
	tpr_t           tpr;
#endif

#if defined(__OpenBSD__) || defined(__APPLE__)
	p = cthread_proc(td);

	if (td != NULL && (p = cthread_proc(td)))
		tpr = tprintf_open(p);
	else
		tpr = NULL;
	tprintf(tpr, "rpc server %s: %s\n", server, msg);
	tprintf_close(tpr);
#else
	tprintf(td ? td->td_proc : NULL, LOG_INFO,
		"rpc server %s: %s\n", server, msg);
#endif
	RPC_RETURN(0);
}

/*
 * Build the RPC header and fill in the authorization info. The authorization
 * string argument is only used when the credentials come from outside of the
 * kernel (AUTH_KERB). (likewise, the ucred is only used when inside the
 * kernel) Returns the head of the mbuf list.
 */
static struct mbuf *
rpcclnt_buildheader(struct rpcclnt *rc, int procid, struct mbuf *mrest,
    int *xidp, struct mbuf **mheadend, struct ucred *cred)
{
	/* register */ struct mbuf *mb;
	register u_int32_t *tl;
	/* register */ caddr_t bpos;
	struct mbuf *mreq, *mb2;
	int error;

	MGETHDR(mb, M_TRYWAIT, MT_DATA);
	if (6 * RPCX_UNSIGNED >= MINCLSIZE) {
		MCLGET(mb, M_TRYWAIT);
	} else if (6 * RPCX_UNSIGNED < MHLEN) {
		MH_ALIGN(mb, 6 * RPCX_UNSIGNED);
	} else {
	  	RPCDEBUG("mbuf too small");
		panic("cheap bailout");
	}
	mb->m_len = 0;
	mreq = mb;
	bpos = mtod(mb, caddr_t);

	/*
	 * First the RPC header.
	 */
	rpcm_build(tl, u_int32_t *, 6 * RPCX_UNSIGNED);

	/* Get a new (non-zero) xid */
	if ((rpcclnt_xid == 0) && (rpcclnt_xid_touched == 0)) {
		RANDOM(rpcclnt_xid);
		rpcclnt_xid_touched = 1;
	} else {
	  	do {
			RANDOM(*xidp);
		} while ((*xidp % 256) == 0);
		rpcclnt_xid += *xidp;
	}

	/* XXX: funky... */
	*tl++ = *xidp = txdr_unsigned(rpcclnt_xid);

	*tl++ = rpc_call;
	*tl++ = rpc_vers;
	*tl++ = txdr_unsigned(rc->rc_prog->prog_id);
	*tl++ = txdr_unsigned(rc->rc_prog->prog_version);
	*tl++ = txdr_unsigned(procid);

	if ((error = rpcauth_build_reqhdr(rc, cred, &mb, &bpos, mrest)) != 0) {
		RPCDEBUG("rpcauth_build_reqhdr failed %d", error);
		return (NULL);
	}

	mb->m_next = mrest;
	*mheadend = mb;
	M_LENGTH(mreq->m_pkthdr.len, mreq);
	mreq->m_pkthdr.rcvif = NULL;

	return (mreq);
}

/*
 * Help break down an mbuf chain by setting the first siz bytes contiguous
 * pointed to by returned val. This is used by the macros rpcm_dissect and
 * rpcm_dissecton for tough cases. (The macros use the vars. dpos and dpos2)
 */
static int
rpcm_disct(struct mbuf **mdp, caddr_t *dposp, int siz, int left, caddr_t *cp2)
{
	struct mbuf    *mp, *mp2;
	int             siz2, xfer;
	caddr_t         p;

	mp = *mdp;
	while (left == 0) {
		*mdp = mp = mp->m_next;
		if (mp == NULL)
			RPC_RETURN(EBADRPC);
		left = mp->m_len;
		*dposp = mtod(mp, caddr_t);
	}
	if (left >= siz) {
		*cp2 = *dposp;
		*dposp += siz;
	} else if (mp->m_next == NULL) {
		RPC_RETURN(EBADRPC);
	} else if (siz > MHLEN) {
		panic("rpc S too big");
	} else {
		MGET(mp2, M_TRYWAIT, MT_DATA);
		mp2->m_next = mp->m_next;
		mp->m_next = mp2;
		mp->m_len -= left;
		mp = mp2;
		*cp2 = p = mtod(mp, caddr_t);
		bcopy(*dposp, p, left);	/* Copy what was left */
		siz2 = siz - left;
		p += left;
		mp2 = mp->m_next;
		/* Loop around copying up the siz2 bytes */
		while (siz2 > 0) {
			if (mp2 == NULL)
				RPC_RETURN(EBADRPC);
			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
			if (xfer > 0) {
				bcopy(mtod(mp2, caddr_t), p, xfer);
				RPCMADV(mp2, xfer);
				mp2->m_len -= xfer;
				p += xfer;
				siz2 -= xfer;
			}
			if (siz2 > 0)
				mp2 = mp2->m_next;
		}
		mp->m_len = siz;
		*mdp = mp2;
		*dposp = mtod(mp2, caddr_t);
	}

	RPC_RETURN(0);
}

static u_int32_t
rpcclnt_proct(struct rpcclnt *rpc, u_int32_t procid)
{
	if (rpc->rc_proctlen != 0 && rpc->rc_proct != NULL &&
	    procid < rpc->rc_proctlen)
		return (rpc->rc_proct[procid]);

	return (0);
}

static int
rpc_adv(struct mbuf **mdp, caddr_t *dposp, int offs, int left)
{
	struct mbuf    *m;
	int             s;

	m = *mdp;
	s = left;
	while (s < offs) {
		offs -= s;
		m = m->m_next;
		if (m == NULL)
			RPC_RETURN(EBADRPC);
		s = m->m_len;
	}
	*mdp = m;
	*dposp = mtod(m, caddr_t) + offs;

	RPC_RETURN(0);
}

int
rpcclnt_cancelreqs(struct rpcclnt *rpc)
{
	struct rpctask *task;
	int             i, s;

	if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT)) /* XXX */
		return (0);

	s = SPLNET();
	TAILQ_FOREACH(task, &rpctask_q, r_chain) {
		if (rpc != task->r_rpcclnt || task->r_mrep != NULL ||
		    ISSET(task->r_flags, TASK_SOFTTERM))
			continue;
		rpcclnt_softterm(task);
	}
	splx(s);

	for (i = 0; i < 30; i++) {
		s = SPLNET();
		TAILQ_FOREACH(task, &rpctask_q, r_chain) {
			if (rpc == task->r_rpcclnt)
				break;
		}
		splx(s);
		if (task == NULL)
			return (0);
		tsleep(&lbolt, PSOCK, "nfscancel", 0);
	}

	return (EBUSY);
}

static void
rpcclnt_softterm(struct rpctask *task)
{
	SET(task->r_flags, TASK_SOFTTERM);
	if (ISSET(task->r_flags, TASK_SENT)) {
		task->r_rpcclnt->rc_sent -= RPC_CWNDSCALE;
		CLR(task->r_flags, TASK_SENT);
	}
}


#if defined(__OpenBSD__)
/* called by rpcclnt_get() */
void
rpcclnt_create(struct rpcclnt **rpc)
{
	MALLOC(*rpc, struct rpcclnt *,
	    sizeof(struct rpcclnt), M_RPC, M_WAITOK | M_ZERO);
}

/* called by rpcclnt_put() */
void
rpcclnt_destroy(struct rpcclnt *rpc)
{
	if (rpc != NULL)
		FREE(rpc, M_RPC);
	else
		RPCDEBUG("attempting to free a NULL rpcclnt (not dereferenced)");
}
#endif

/* XXXMARIUS - this needs to respect zerocopy */
static int
_uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, cthread_t *cur_td)
{
	char *uiocp;
	struct mbuf *mp, *mp2;
	int xfer, left, mlen;
	int uiosiz, clflg, rem;
	char *cp;

#ifdef DIAGNOSTIC
	if (uiop->uio_iovcnt != 1)
		panic("_uiotombuf: iovcnt != 1");
#endif
	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
		clflg = 1;
	else
		clflg = 0;
	rem = rpcm_rndup(siz)-siz;
	mp = mp2 = *mq;
	while (siz > 0) {
		left = uiop->uio_iov->iov_len;
		uiocp = uiop->uio_iov->iov_base;
		if (left > siz)
			left = siz;
		uiosiz = left;
		while (left > 0) {
			mlen = M_TRAILINGSPACE(mp);
			if (mlen == 0) {
				MGET(mp, M_TRYWAIT, MT_DATA);
				if (clflg)
					MCLGET(mp, M_TRYWAIT);
				mp->m_len = 0;
				mp2->m_next = mp;
				mp2 = mp;
				mlen = M_TRAILINGSPACE(mp);
			}
			xfer = (left > mlen) ? mlen : left;
#ifdef notyet
			if (uiop->uio_iov->iov_op != NULL)
				(*(uiop->uio_iov->iov_op))
				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
			else
#endif
			if (uiop->uio_segflg == UIO_SYSSPACE)
				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
			else {
				struct uio uio;
				struct iovec iov;
				int error;

#ifdef notyet
				if (cthread_proc(cur_td) != NULL &&
				    cthread_proc(uiop->uio_td)->p_pid ==
				    cthread_proc(cur_td)->p_pid) {
					copyin(uiocp, mtod(mp, caddr_t) +
					    mp->m_len, xfer);
					goto next;
				}
#endif
				iov.iov_base = mtod(mp, char *) + mp->m_len;
				iov.iov_len = xfer;
				uio.uio_iov = &iov;
				uio.uio_iovcnt = 1;
				uio.uio_offset = (off_t)(uintptr_t)uiocp;
				uio.uio_resid = xfer;
				uio.uio_rw = UIO_READ;
				uio.uio_segflg = UIO_SYSSPACE;
				uio.uio_td = cur_td;

				error = proc_rwmem(cthread_proc(uiop->uio_td),
				    &uio);
				if (error != 0)
					return (error);

				xfer -= uio.uio_resid;
			}
			mp->m_len += xfer;
			left -= xfer;
			uiocp += xfer;
			uiop->uio_offset += xfer;
			uiop->uio_resid -= xfer;
		}
		uiop->uio_iov->iov_base =
		    (char *)uiop->uio_iov->iov_base + uiosiz;
		uiop->uio_iov->iov_len -= uiosiz;
		siz -= uiosiz;
	}
	if (rem > 0) {
		if (rem > M_TRAILINGSPACE(mp)) {
			MGET(mp, M_TRYWAIT, MT_DATA);
			mp->m_len = 0;
			mp2->m_next = mp;
		}
		cp = mtod(mp, caddr_t)+mp->m_len;
		for (left = 0; left < rem; left++)
			*cp++ = '\0';
		mp->m_len += rem;
	} 
	*mq = mp;
	return (0);
}