 rnfs-linux-2.6.16-rc3-jiayingz/Makefile                    |    2 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/Makefile             |    3 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/dir.c                |   24 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/idmap.c              |    1 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/inode.c              |  957 +++++++
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/namespace.c          |  245 ++
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs2xdr.c            |    3 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs3xdr.c            |    3 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs4_fs.h            |   22 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs4proc.c           |  472 +++
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs4state.c          |    5 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs4xdr.c            |  275 ++
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/Makefile            |    4 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/export.c            |   70 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4fsloc.c         |    5 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repclnt.c       |  552 ++++
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repclnt.h       |   53 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repd.c          | 1587 +++++++++++++
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repd.h          |   85 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4replication.c   | 1474 ++++++++++++
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4replication.h   |  190 +
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4reppipe.c       |  193 +
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repstate.c      | 1006 ++++++++
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repstate.h      |  110 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repxdr.c        | 1062 ++++++++
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4xdr.c           |   11 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfssvc.c            |    7 
 rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/vfs.c               |  386 ++-
 rnfs-linux-2.6.16-rc3-jiayingz/fs/super.c                  |   22 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/mount.h       |    5 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfs4.h        |    2 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfs_fs.h      |   21 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfs_fs_sb.h   |    4 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfs_page.h    |    1 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfs_xdr.h     |   60 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfsd/debug.h  |    4 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfsd/export.h |   11 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfsd/nfsd.h   |   19 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/sunrpc/clnt.h |    3 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/sunrpc/svc.h  |    2 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/sunrpc/xdr.h  |    1 
 rnfs-linux-2.6.16-rc3-jiayingz/include/linux/sunrpc/xprt.h |    2 
 rnfs-linux-2.6.16-rc3-jiayingz/net/sunrpc/clnt.c           |   35 
 rnfs-linux-2.6.16-rc3-jiayingz/net/sunrpc/rpc_pipe.c       |    5 
 rnfs-linux-2.6.16-rc3-jiayingz/net/sunrpc/xdr.c            |   28 
 45 files changed, 8763 insertions(+), 269 deletions(-)

diff -puN Makefile~rnfs-all Makefile
--- rnfs-linux-2.6.16-rc3/Makefile~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/Makefile	2007-03-03 16:21:47.000000000 -0500
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 16
-EXTRAVERSION =-rc3-CITI_NFS4_ALL-1
+EXTRAVERSION =-rc3-rnfs
 NAME=Sliding Snow Leopard
 
 # *DOCUMENTATION*
diff -puN fs/nfs/dir.c~rnfs-all fs/nfs/dir.c
--- rnfs-linux-2.6.16-rc3/fs/nfs/dir.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/dir.c	2007-03-03 16:21:47.000000000 -0500
@@ -869,6 +869,17 @@ int nfs_is_exclusive_create(struct inode
 	return (nd->intent.open.flags & O_EXCL) != 0;
 }
 
+static inline int nfs_reval_fsid(struct inode *dir,
+		struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+	struct nfs_server *server = NFS_SERVER(dir);
+
+	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
+		/* Revalidate fsid on root dir */
+		return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode);
+	return 0;
+}
+
 static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
 	struct dentry *res;
@@ -897,9 +908,13 @@ static struct dentry *nfs_lookup(struct 
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error == -ENOENT)
 		goto no_entry;
-	if (error < 0) {
-		res = ERR_PTR(error);
-		goto out_unlock;
+	if (error < 0)
+		goto out_err;
+	/* No need to revalidate dir fsid on moved */
+	if (!(fattr.valid & NFS_ATTR_MOVED)) {
+		error = nfs_reval_fsid(dir, &fhandle, &fattr);
+		if (error < 0)
+			goto out_err;
 	}
 	res = ERR_PTR(-EACCES);
 	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
@@ -915,6 +930,9 @@ out_unlock:
 	unlock_kernel();
 out:
 	return res;
+out_err:
+	res = ERR_PTR(error);
+	goto out_unlock;
 }
 
 #ifdef CONFIG_NFS_V4
diff -puN fs/nfs/idmap.c~rnfs-all fs/nfs/idmap.c
--- rnfs-linux-2.6.16-rc3/fs/nfs/idmap.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/idmap.c	2007-03-03 16:21:47.000000000 -0500
@@ -46,7 +46,6 @@
 #include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 
-#include <linux/nfs_fs_sb.h>
 #include <linux/nfs_fs.h>
 
 #include <linux/nfs_idmap.h>
diff -puN fs/nfs/inode.c~rnfs-all fs/nfs/inode.c
--- rnfs-linux-2.6.16-rc3/fs/nfs/inode.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/inode.c	2007-03-03 16:21:47.000000000 -0500
@@ -33,9 +33,12 @@
 #include <linux/lockd/bind.h>
 #include <linux/smp_lock.h>
 #include <linux/seq_file.h>
+#include <linux/sysctl.h>
 #include <linux/mount.h>
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
+#include <linux/inet.h>
+#include <linux/nfs_xdr.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -128,6 +131,60 @@ struct rpc_program		nfsacl_program = {
 };
 #endif  /* CONFIG_NFS_V3_ACL */
 
+#ifdef CONFIG_SYSCTL
+/* Follow the established convention in NLM */
+#define CTL_UNNUMBERED -2
+
+static ctl_table nfs_sysctls[] = {
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "nfs_mountpoint_timeout",
+		.data		= &nfs_mountpoint_expiry_timeout,
+		.maxlen		= sizeof(nfs_mountpoint_expiry_timeout),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table nfs_sysctl_dir[] = {
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "nfs",
+		.mode		= 0555,
+		.child		= nfs_sysctls,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table nfs_sysctl_root[] = {
+	{
+		.ctl_name	= CTL_FS,
+		.procname	= "fs",
+		.mode		= 0555,
+		.child		= nfs_sysctl_dir,
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header *nfs_sysctl_table;
+
+static inline int nfs_init_sysctl(void)
+{
+	nfs_sysctl_table = register_sysctl_table(nfs_sysctl_root, 0);
+	return nfs_sysctl_table != NULL ? 0 : -ENOMEM;
+}
+
+static inline void nfs_destroy_sysctl(void)
+{
+	unregister_sysctl_table(nfs_sysctl_table);
+}
+#else
+#define nfs_init_sysctl() (0)
+#define nfs_destroy_sysctl() do { } while(0)
+#endif /* CONFIG_SYSCTL */
+
 static inline unsigned long
 nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 {
@@ -234,6 +291,14 @@ nfs_block_size(unsigned long bsize, unsi
 	return nfs_block_bits(bsize, nrbitsp);
 }
 
+static inline void
+nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
+{
+	sb->s_maxbytes = (loff_t)maxfilesize;
+	if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) 
+		sb->s_maxbytes = MAX_LFS_FILESIZE; 
+}
+
 /*
  * Obtain the root inode of the file system.
  */
@@ -250,6 +315,7 @@ nfs_get_root(struct super_block *sb, str
 		return ERR_PTR(error);
 	}
 
+	server->fsid = fsinfo->fattr->fsid;
 	rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
 	if (!rooti)
 		return ERR_PTR(-ENOMEM);
@@ -349,9 +415,7 @@ nfs_sb_init(struct super_block *sb, rpc_
 	}
 	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
 
-	sb->s_maxbytes = fsinfo.maxfilesize;
-	if (sb->s_maxbytes > MAX_LFS_FILESIZE) 
-		sb->s_maxbytes = MAX_LFS_FILESIZE; 
+	nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
 
 	server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
 	server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
@@ -895,6 +959,12 @@ nfs_fhget(struct super_block *sb, struct
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
 			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
 				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+			/* Deal with crossing mountpoints */
+			if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
+				dprintk("nfs_fhget: crossing mountpoints\n");
+				inode->i_op = &nfs_mountpoint_inode_operations;
+				inode->i_fop = NULL;
+			}
 		} else if (S_ISLNK(inode->i_mode))
 			inode->i_op = &nfs_symlink_inode_operations;
 		else
@@ -1238,6 +1308,23 @@ __nfs_revalidate_inode(struct nfs_server
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
 			 inode->i_sb->s_id,
 			 (long long)NFS_FILEID(inode), status);
+		if (status == -ETIMEDOUT) {
+			status = nfs_try_failover_inode(inode, NULL);
+			if (status == 0) {
+				status = nfs_try_migrate_inode(inode, NULL);
+				if (status == 0) {
+					/* redo getattr */
+					status = NFS_PROTO(inode)->getattr(NFS_SERVER(inode), NFS_FH(inode), &fattr);
+				}
+			}
+		}
+		if (status == -EAGAIN) {
+			status = nfs4_recover_filehandle(inode, NULL);
+			if (status == 0) {
+				/* redo getattr */
+				status = NFS_PROTO(inode)->getattr(NFS_SERVER(inode), NFS_FH(inode), &fattr);
+			}
+		}
 		if (status == -ESTALE) {
 			nfs_zap_caches(inode);
 			if (!S_ISDIR(inode->i_mode))
@@ -1524,6 +1611,7 @@ out:
  */
 static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
+	struct nfs_server *server;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	loff_t cur_isize, new_isize;
 	unsigned int	invalid = 0;
@@ -1545,6 +1633,12 @@ static int nfs_update_inode(struct inode
 	if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
 		goto out_changed;
 
+	server = NFS_SERVER(inode);
+	/* Update the fsid if and only if this is the root directory */
+	if (inode == inode->i_sb->s_root->d_inode
+			&& !nfs_fsid_equal(&server->fsid, &fattr->fsid))
+		server->fsid = fattr->fsid;
+
 	/*
 	 * Update the read time so we don't revalidate too often.
 	 */
@@ -1675,9 +1769,172 @@ static int nfs_update_inode(struct inode
 }
 
 /*
+ * nfs_try_migrate_filehandle - Check if we can migrate the inode filehandle
+ * @inode - pointer to inode
+ * @fh - the filehandle resulting from lookup()
+ * @fattr - attributes associated with the new filehandle
+ *
+ * Do our very best to update existing inodes when the user wants to migrate
+ * this filesystem to a replica server.
+ *
+ * Note that here be HUGE dragons, with endless possibilities for causing
+ * trouble...
+ */
+int nfs_try_migrate_filehandle(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr, uint32_t generation)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	/* Argh! The basic file type has changed */
+	if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+		goto out_bad;
+	/* Fileid + filehandle are the same. Good! */
+	if (nfs_compare_fh(&nfsi->fh, fh) == 0 && nfsi->fileid == fattr->fileid)
+		goto out_good;
+	if (fattr->valid && NFS_ATTR_FATTR_V4) {
+		/* Do the NFSv4 change attributes match our cached value? */
+		if (nfsi->change_attr != fattr->change_attr)
+			goto out_bad;
+	} else {
+		/* Does the ctime match? */
+		if (!timespec_equal(&fattr->ctime, &inode->i_ctime))
+			goto out_bad;
+	}
+	/* Does the file size match? */
+	if (nfs_size_to_loff_t(fattr->size) != inode->i_size)
+		goto out_bad;
+	/* FIXME: Here lie the biggest dragons:
+	 *        Try considering all possible races w.r.t. iget5_locked()
+	 */
+	nfs_copy_fh(&nfsi->fh, fh);
+	if (nfsi->fileid != fattr->fileid) {
+		/* The very concept of migrating to a new hash bucket
+		 * is so full of holes and races that it defies belief!
+		 */
+		remove_inode_hash(inode);
+		nfsi->fileid = fattr->fileid;
+		inode->i_ino = nfs_fattr_to_ino_t(fattr);
+		insert_inode_hash(inode);
+	}
+out_good:
+	inode->i_generation = generation;
+	return 0;
+out_bad:
+	return -EIO;
+}
+
+/*
+ * nfs_try_migrate_inode - Update an inode's filehandle after migration
+ * @inode - pointer to inode to migrate
+ * @dentry - pointer to dentry
+ */
+int nfs_try_migrate_inode(struct inode *inode, struct dentry *dentry)
+{
+	struct nfs_fh fh;
+	struct nfs_fattr fattr;
+	struct dentry *next, *next_parent;
+	uint32_t generation;
+	int status;
+
+	if (dentry == NULL) {
+		status = -ENOENT;
+		dentry = d_find_alias(inode);
+		if (dentry == NULL)
+			goto out;
+	} else
+		dget(dentry);
+repeat:
+	/* Has this inode already been revalidated? */
+	status = 0;
+	generation = NFS_SERVER(inode)->generation;
+#if 0
+	if ((long)generation - (long)inode->i_generation <= 0)
+		goto out;
+#endif
+	/* No. Search for a previously revalidated path element */
+	next = dget(dentry);
+	next_parent = dget_parent(dentry);
+	while((long)generation - (long)next_parent->d_inode->i_generation > 0) {
+		BUG_ON(IS_ROOT(next_parent));
+		dput(next);
+		next = next_parent;
+		next_parent = dget_parent(next);
+	}
+	status = NFS_PROTO(inode)->lookup(next_parent->d_inode, &next->d_name,
+					&fh, &fattr);
+	if (status == 0)
+		status = nfs_try_migrate_filehandle(next->d_inode, &fh, &fattr, generation);
+	switch (status) {
+		case -ESTALE:
+			if (IS_ROOT(next_parent))
+				break;
+		case 0:
+			if (dentry->d_inode == inode)
+				break;
+			dput(next_parent);
+			dput(next);
+			goto repeat;
+		default:
+			d_drop(next);
+	}
+	dput(next_parent);
+	dput(next);
+out:
+	dput(dentry);
+	dprintk("%s: returned error %d\n", __FUNCTION__, status);
+	return status;
+}
+
+/*
  * File system information
  */
 
+/*
+ * nfs_path - reconstruct the path given an arbitrary dentry
+ * @base - arbitrary string to prepend to the path
+ * @dentry - pointer to dentry
+ * @buffer - result buffer
+ * @buflen - length of buffer
+ *
+ * Helper function for constructing the path from the
+ * root dentry to an arbitrary hashed dentry.
+ *
+ * This is mainly for use in figuring out the path on the
+ * server side when automounting on top of an existing partition.
+ */
+static char *nfs_path(const char *base, const struct dentry *dentry,
+		      char *buffer, ssize_t buflen)
+{
+	char *end = buffer+buflen;
+	int namelen;
+
+	*--end = '\0';
+	buflen--;
+	spin_lock(&dcache_lock);
+	while (!IS_ROOT(dentry)) {
+		namelen = dentry->d_name.len;
+		buflen -= namelen + 1;
+		if (buflen < 0)
+			goto Elong;
+		end -= namelen;
+		memcpy(end, dentry->d_name.name, namelen);
+		*--end = '/';
+		dentry = dentry->d_parent;
+	}
+	spin_unlock(&dcache_lock);
+	namelen = strlen(base);
+	/* Strip off excess slashes in base string */
+	while (namelen > 0 && base[namelen - 1] == '/')
+		namelen--;
+	buflen -= namelen;
+	if (buflen < 0)
+		goto Elong;
+	end -= namelen;
+	memcpy(end, base, namelen);
+	return end;
+Elong:
+	return ERR_PTR(-ENAMETOOLONG);
+}
+
 static int nfs_set_super(struct super_block *s, void *data)
 {
 	s->s_fs_info = data;
@@ -1825,6 +2082,7 @@ static void nfs_kill_super(struct super_
 
 	kfree(server->hostname);
 	kfree(server);
+	nfs_release_automount_timer();
 }
 
 static struct file_system_type nfs_fs_type = {
@@ -1883,62 +2141,24 @@ static void nfs4_clear_inode(struct inod
 }
 
 
-static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
+static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
+	struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
 {
-	struct nfs_server *server;
-	struct nfs4_client *clp = NULL;
+	struct nfs4_client *clp;
 	struct rpc_xprt *xprt = NULL;
 	struct rpc_clnt *clnt = NULL;
-	struct rpc_timeout timeparms;
-	rpc_authflavor_t authflavour;
 	int err = -EIO;
 
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	server = NFS_SB(sb);
-	if (data->rsize != 0)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize != 0)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
-	server->caps = NFS_CAP_ATOMIC_OPEN;
-
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
-
-	server->rpc_ops = &nfs_v4_clientops;
-
-	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
-
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
-
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	if (!clp) {
 		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
-		return -EIO;
+		return ERR_PTR(err);
 	}
 
 	/* Now create transport and client */
-	authflavour = RPC_AUTH_UNIX;
-	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen != 1) {
-			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
-					__FUNCTION__, data->auth_flavourlen);
-			err = -EINVAL;
-			goto out_fail;
-		}
-		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
-			err = -EFAULT;
-			goto out_fail;
-		}
-	}
-
 	down_write(&clp->cl_sem);
 	if (IS_ERR(clp->cl_rpcclient)) {
-		xprt = xprt_create_proto(data->proto, &server->addr, &timeparms);
+		xprt = xprt_create_proto(proto, &server->addr, timeparms);
 		if (IS_ERR(xprt)) {
 			up_write(&clp->cl_sem);
 			err = PTR_ERR(xprt);
@@ -1947,7 +2167,7 @@ static int nfs4_fill_super(struct super_
 			goto out_fail;
 		}
 		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				server->rpc_ops->version, authflavour);
+				server->rpc_ops->version, flavor);
 		if (IS_ERR(clnt)) {
 			up_write(&clp->cl_sem);
 			err = PTR_ERR(clnt);
@@ -1964,43 +2184,96 @@ static int nfs4_fill_super(struct super_
 	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
 	clnt = rpc_clone_client(clp->cl_rpcclient);
 	if (!IS_ERR(clnt))
-			server->nfs4_state = clp;
+		server->nfs4_state = clp;
 	up_write(&clp->cl_sem);
 	clp = NULL;
 
 	if (IS_ERR(clnt)) {
-		err = PTR_ERR(clnt);
 		dprintk("%s: cannot create RPC client. Error = %d\n",
 				__FUNCTION__, err);
-		return err;
+		return clnt;
 	}
 
-	server->client    = clnt;
-
 	if (server->nfs4_state->cl_idmap == NULL) {
 		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
-	if (clnt->cl_auth->au_flavor != authflavour) {
+	if (clnt->cl_auth->au_flavor != flavor) {
 		struct rpc_auth *auth;
 
-		auth = rpcauth_create(authflavour, clnt);
+		auth = rpcauth_create(flavor, clnt);
 		if (IS_ERR(auth)) {
 			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
-			return PTR_ERR(auth);
+			return (struct rpc_clnt *)auth;
 		}
 	}
+	return clnt;
+
+ out_fail:
+	if (clp)
+		nfs4_put_client(clp);
+	return ERR_PTR(err);
+}
+
+static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
+{
+	struct nfs_server *server;
+	struct rpc_timeout timeparms;
+	rpc_authflavor_t authflavour;
+	int err = -EIO;
+
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	server = NFS_SB(sb);
+	if (data->rsize != 0)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize != 0)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+	server->caps = NFS_CAP_ATOMIC_OPEN;
+
+	server->acregmin = data->acregmin*HZ;
+	server->acregmax = data->acregmax*HZ;
+	server->acdirmin = data->acdirmin*HZ;
+	server->acdirmax = data->acdirmax*HZ;
+
+	server->rpc_ops = &nfs_v4_clientops;
+
+	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
+
+	server->retrans_timeo = timeparms.to_initval;
+	server->retrans_count = timeparms.to_retries;
+
+	/* Now create transport and client */
+	authflavour = RPC_AUTH_UNIX;
+	if (data->auth_flavourlen != 0) {
+		if (data->auth_flavourlen != 1) {
+			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+					__FUNCTION__, data->auth_flavourlen);
+			err = -EINVAL;
+			goto out_fail;
+		}
+		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
+			err = -EFAULT;
+			goto out_fail;
+		}
+	}
+
+	server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour);
+	if (IS_ERR(server->client)) {
+		err = PTR_ERR(server->client);
+			dprintk("%s: cannot create RPC client. Error = %d\n",
+					__FUNCTION__, err);
+			goto out_fail;
+	}
 
 	sb->s_time_gran = 1;
 
 	sb->s_op = &nfs4_sops;
 	err = nfs_sb_init(sb, authflavour);
-	if (err == 0)
-		return 0;
-out_fail:
-	if (clp)
-		nfs4_put_client(clp);
+
+ out_fail:
 	return err;
 }
 
@@ -2147,6 +2420,7 @@ static void nfs4_kill_super(struct super
 
 	kfree(server->hostname);
 	kfree(server);
+	nfs_release_automount_timer();
 }
 
 static struct file_system_type nfs4_fs_type = {
@@ -2186,6 +2460,59 @@ static int param_set_idmap_timeout(const
 module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
 		 &nfs_idmap_cache_timeout, 0644);
 
+/* Constructs the SERVER-side path */
+static inline char *nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
+{
+	return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
+}
+
+static inline char *nfs4_dup_path(const struct dentry *dentry)
+{
+	char *page = (char *) __get_free_page(GFP_USER);
+	char *path;
+
+	path = nfs4_path(dentry, page, PAGE_SIZE);
+	if (!IS_ERR(path)) {
+		int len = PAGE_SIZE + page - path;
+		char *tmp = path;
+
+		path = kmalloc(len, GFP_KERNEL);
+		if (path)
+			memcpy(path, tmp, len);
+		else
+			path = ERR_PTR(-ENOMEM);
+	}
+	free_page((unsigned long)page);
+	return path;
+}
+
+static struct super_block *nfs4_clone_client(struct nfs_server *server, const struct dentry *dentry)
+{
+	struct nfs4_client *clp = server->nfs4_state;
+	struct super_block *sb;
+
+	server->mnt_path = nfs4_dup_path(dentry);
+	if (IS_ERR(server->mnt_path)) {
+		sb = (struct super_block *)server->mnt_path;
+		goto err;
+	}
+	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
+	if (IS_ERR(sb) || sb->s_root)
+		goto free_path;
+	nfs4_server_capabilities(server, &server->fh);
+
+	down_write(&clp->cl_sem);
+	atomic_inc(&clp->cl_count);
+	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
+	up_write(&clp->cl_sem);
+	return sb;
+free_path:
+	kfree(server->mnt_path);
+err:
+	server->mnt_path = NULL;
+	return sb;
+}
+
 #define nfs4_init_once(nfsi) \
 	do { \
 		INIT_LIST_HEAD(&(nfsi)->open_states); \
@@ -2213,12 +2540,513 @@ static inline void unregister_nfs4fs(voi
 	nfs_unregister_sysctl();
 }
 #else
+#define nfs4_clone_client(a,b) ERR_PTR(-EINVAL)
 #define nfs4_init_once(nfsi) \
 	do { } while (0)
 #define register_nfs4fs() (0)
 #define unregister_nfs4fs()
 #endif
 
+static inline struct super_block *nfs_clone_client(struct nfs_server *server)
+{
+	struct super_block *sb;
+
+	sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
+		lockd_up();
+	return sb;
+}
+
+struct nfs_clone_mount {
+	const struct super_block *sb;
+	const struct dentry *dentry;
+	struct nfs_fh *fh;
+	struct nfs_fattr *fattr;
+};
+
+static struct super_block *clone_nfs_sb(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *raw_data)
+{
+	struct nfs_clone_mount *data = raw_data;
+	struct nfs_server *server;
+	struct nfs_server *parent = NFS_SB(data->sb);
+	struct super_block *sb = ERR_PTR(-EINVAL);
+	void *err = ERR_PTR(-ENOMEM);
+	struct inode *root_inode;
+	struct nfs_fsinfo fsinfo;
+	int len;
+
+	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (server == NULL)
+		goto out_err;
+	memcpy(server, parent, sizeof(*server));
+	len = strlen(parent->hostname) + 1;
+	server->hostname = kmalloc(len, GFP_KERNEL);
+	if (server->hostname == NULL)
+		goto free_server;
+	memcpy(server->hostname, parent->hostname, len);
+	server->fsid = data->fattr->fsid;
+	nfs_copy_fh(&server->fh, data->fh);
+	if (rpciod_up() != 0)
+		goto free_hostname;
+
+	switch (parent->rpc_ops->version) {
+		case 2:
+		case 3:
+			sb = nfs_clone_client(server);
+			break;
+		case 4:
+			sb = nfs4_clone_client(server, data->dentry);
+			break;
+		default:
+			BUG();
+	}
+	if (IS_ERR((err = sb)) || sb->s_root)
+		goto kill_rpciod;
+	sb->s_op = data->sb->s_op;
+	sb->s_blocksize = data->sb->s_blocksize;
+	sb->s_blocksize_bits = data->sb->s_blocksize_bits;
+	sb->s_maxbytes = data->sb->s_maxbytes;
+
+	server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	server->client = rpc_clone_client(parent->client);
+	if (IS_ERR((err = server->client)))
+		goto out_deactivate;
+	if (!IS_ERR(parent->client_sys)) {
+		server->client_sys = rpc_clone_client(parent->client_sys);
+		if (IS_ERR((err = server->client_sys)))
+			goto out_deactivate;
+	}
+	if (!IS_ERR(parent->client_acl)) {
+		server->client_acl = rpc_clone_client(parent->client_acl);
+		if (IS_ERR((err = server->client_acl)))
+			goto out_deactivate;
+	}
+	root_inode = nfs_fhget(sb, data->fh, data->fattr);
+	if (!root_inode)
+		goto out_deactivate;
+	sb->s_root = d_alloc_root(root_inode);
+	if (!sb->s_root)
+		goto out_put_root;
+	fsinfo.fattr = data->fattr;
+	if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
+		nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
+	sb->s_root->d_op = server->rpc_ops->dentry_ops;
+	sb->s_flags |= MS_ACTIVE;
+	return sb;
+out_put_root:
+	iput(root_inode);
+out_deactivate:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	return (struct super_block *)err;
+kill_rpciod:
+	rpciod_down();
+free_hostname:
+	kfree(server->hostname);
+free_server:
+	kfree(server);
+out_err:
+	return (struct super_block *)err;
+}
+
+static struct file_system_type clone_nfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "nfs",
+	.get_sb		= clone_nfs_sb,
+	.kill_sb	= nfs_kill_super,
+	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
+static inline char *nfs_devname(const struct vfsmount *mnt_parent,
+			 const struct dentry *dentry,
+			 char *buffer, ssize_t buflen)
+{
+	return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen);
+}
+
+/**
+ * nfs_do_submount - set up mountpoint when crossing a filesystem boundary
+ * @mnt_parent - mountpoint of parent directory
+ * @dentry - parent directory
+ * @fh - filehandle for new root dentry
+ * @fattr - attributes for new root inode
+ *
+ */
+struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
+		const struct dentry *dentry, struct nfs_fh *fh,
+		struct nfs_fattr *fattr)
+{
+	struct nfs_clone_mount mountdata = {
+		.sb = mnt_parent->mnt_sb,
+		.dentry = dentry,
+		.fh = fh,
+		.fattr = fattr,
+	};
+	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
+	char *page = (char *) __get_free_page(GFP_USER);
+	char *devname;
+
+	dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
+			dentry->d_parent->d_name.name,
+			dentry->d_name.name);
+	if (page == NULL)
+		goto out;
+	devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
+	if (!IS_ERR(devname))
+		mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata);
+	else
+		mnt = (struct vfsmount *)devname;
+	free_page((unsigned long)page);
+out:
+	dprintk("%s: done\n", __FUNCTION__);
+	return mnt;
+}
+
+struct nfs_referral_mount {
+	struct super_block *sb;
+	const char *hostname;
+	const char *mnt_path;
+	const struct sockaddr_in *addr;
+};
+
+static struct super_block *referral_nfs_sb(struct file_system_type *fs_type,
+			int flags, const char *dev_name, void *raw_data)
+{
+	struct nfs_referral_mount *data = raw_data;
+	struct nfs_server *server;
+	struct nfs_server *parent = NFS_SB(data->sb);
+	struct super_block *sb = ERR_PTR(-ENOMEM);
+	struct rpc_timeout timeparms;
+	rpc_authflavor_t authflavor = RPC_AUTH_UNIX;
+	int proto = IPPROTO_TCP;
+	int len;
+	int err;
+
+	printk("%s enter\n", __FUNCTION__);
+	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (server == NULL)
+		goto out_err;
+	memset(server, 0, sizeof(struct nfs_server));
+
+	/* Zero out the NFS state stuff */
+	init_nfsv4_state(server);
+	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	/* host name */
+	len = strlen(data->hostname) + 1;
+	server->hostname = kmalloc(len, GFP_KERNEL);
+	if (server->hostname == NULL)
+		goto free_server;
+	memcpy(server->hostname, data->hostname, len);
+
+	/* mount path */
+	len = strlen(data->mnt_path) + 1;
+	server->mnt_path = kmalloc(len, GFP_KERNEL);
+	if (server->mnt_path == NULL)
+		goto free_hostname;
+	memcpy(server->mnt_path, data->mnt_path, len);
+
+	/* client addr */
+	memcpy(server->ip_addr, parent->ip_addr,sizeof(server->ip_addr) - 1);
+
+	/* remote addr */
+	memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in));
+
+	/* Fire up rpciod if not yet running */
+	sb = ERR_PTR(rpciod_up());
+	if (IS_ERR(sb))
+		goto free_mntpath;
+
+	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
+	if (IS_ERR(sb) || sb->s_root)
+		goto kill_rpciod;
+
+	sb->s_flags = flags;
+
+	/* we have to guess. */
+	server->rsize = parent->rsize;
+	server->wsize = parent->wsize;
+	server->flags = parent->flags & NFS_MOUNT_FLAGMASK;
+	server->caps = NFS_CAP_ATOMIC_OPEN;
+	server->acregmin = parent->acregmin;
+	server->acregmax = parent->acregmax;
+	server->acdirmin = parent->acdirmin;
+	server->acdirmax = parent->acdirmax;
+
+	server->rpc_ops = &nfs_v4_clientops;
+
+	/* Since we are following a referral and there may be alternatives,
+	   set the timeouts and retries to low values */
+	timeparms.to_initval = 2 * HZ / 10;
+	timeparms.to_retries = 1;
+	timeparms.to_exponential = 0;  /* XXX ANDROS */
+	timeparms.to_increment = timeparms.to_initval;
+	timeparms.to_maxval = timeparms.to_initval + (timeparms.to_increment * timeparms.to_retries);
+
+	/* Now create transport and client. Use AUTH_UNIX for now */
+	server->client = nfs4_create_client(server, &timeparms, proto, authflavor);
+	if (IS_ERR(server->client)) {
+		err = PTR_ERR(server->client);
+		goto out_deactivate;
+	}
+
+	sb->s_time_gran = 1;
+
+	sb->s_op = &nfs4_sops;
+	err = nfs_sb_init(sb, authflavor);
+	if (err)
+		goto out_deactivate;
+
+	sb->s_flags |= MS_ACTIVE;
+	return sb;
+
+out_deactivate:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	return ERR_PTR(err);
+kill_rpciod:
+	rpciod_down();
+free_mntpath:
+	kfree(server->mnt_path);
+free_hostname:
+	kfree(server->hostname);
+free_server:
+	kfree(server);
+out_err:
+	return sb;
+}
+
+static struct file_system_type referral_nfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "nfs",
+	.get_sb		= referral_nfs_sb,
+	.kill_sb	= nfs4_kill_super,
+	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
+/**
+ * nfs_follow_referral - set up mountpoint when hitting a referral
+ * 			    on moved error
+ * @mnt_parent - mountpoint of parent directory
+ * @dentry - parent directory
+ * @fspath - fs path returned in fs_locations
+ * @mntpath - mount path to new server
+ * @hostname - hostname of new server
+ * @addr - host addr of new server
+ *
+ */
+struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, struct dentry   *dentry, struct nfs4_locations *locations)
+{
+	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+	struct nfs_referral_mount mountdata;
+	char *page;
+	char *path;
+	char *devname;
+	int loc, s;
+
+	if (locations == NULL || locations->nlocations <= 0)
+		goto out;
+
+	printk("%s: handling referral at %s/%s\n", __FUNCTION__,
+		dentry->d_parent->d_name.name, dentry->d_name.name);
+
+	/* Ensure fs path is a prefix of current dentry path */
+	page = (char *) __get_free_page(GFP_USER);
+	if (page == NULL)
+		goto out;
+
+	path = nfs4_path(dentry, page, PAGE_SIZE);
+	if (IS_ERR(path))
+		goto out_free;
+
+	if (strncmp(path, locations->fs_path, strlen(locations->fs_path)) != 0) {
+		dprintk("%s: dentry path %s does not begin with fs path %s\n", __FUNCTION__, path, locations->fs_path);
+		goto out_free;
+	}
+
+	devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
+	if (IS_ERR(devname)) {
+		mnt = (struct vfsmount *)devname;
+		goto out_free;
+	}
+
+	mountdata.sb = dentry->d_sb;
+	loc = 0;
+	while (loc < locations->nlocations && IS_ERR(mnt)) {
+		struct nfs4_location *location = locations->locations[loc];
+
+		if (location == NULL || location->nservers <= 0 ||
+		    location->rootpath == NULL) {
+			loc++;
+			continue;
+		}
+		mountdata.mnt_path = location->rootpath;
+		s = 0;
+		while (s < location->nservers) {
+			char *hostname;
+			struct sockaddr_in addr = {};
+
+			hostname = location->servers[s];
+			if (hostname == NULL) {
+				s++;
+				continue;
+			}
+			mountdata.hostname = hostname;
+
+			/* FIX: need a way to resolve TCP name in kernel */
+			addr.sin_addr.s_addr = in_aton(hostname);
+			addr.sin_port = htons(NFS_PORT);
+			addr.sin_family = AF_INET;
+			mountdata.addr = &addr;
+
+			mnt = vfs_kern_mount(&referral_nfs_fs_type, 0, devname, &mountdata);
+			if (!IS_ERR(mnt)) {
+				break;
+			}
+			s++;
+		}
+		loc++;
+	}
+out_free:
+	free_page((unsigned long)page);
+out:
+	printk("%s: done mnt %p dentry %p\n", __FUNCTION__, mnt, dentry);
+	return mnt;
+}
+
+
+
+/*
+ * nfs_do_refmount - handle crossing a referral on server
+ * @dentry - dentry of referral
+ * @nd - nameidata info
+ *
+ */
+struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
+{
+	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+	struct dentry *parent;
+	struct nfs4_fs_locations *fs_locations = NULL;
+	struct nfs4_locations *locations = NULL;
+#if 0
+	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+#endif
+	struct page *page;
+	int err;
+
+	/* BUG_ON(IS_ROOT(dentry)); */
+	dprintk("%s: enter\n", __FUNCTION__);
+
+	page = alloc_page(GFP_KERNEL);
+	if (page == NULL)
+		goto out;
+
+	fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
+	if (fs_locations == NULL)
+		goto out_free;
+
+	/* Get locations */
+	parent = dget_parent(dentry);
+	dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name);
+	err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
+	dput(parent);
+	if (err != 0 || fs_locations->nlocations <= 0 ||
+	    fs_locations->fs_path.ncomponents <= 0)
+		goto out_free;
+
+	locations = nfs4_copy_locations(fs_locations);
+	if (locations == NULL)
+		goto out_free;
+
+	mnt = nfs_follow_referral(mnt_parent, dentry, locations);
+
+	nfs4_free_locations(locations);
+out_free:
+	__free_page(page);
+	kfree(fs_locations);
+out:
+	dprintk("%s: done\n", __FUNCTION__);
+	return mnt;
+}
+
+struct vfsmount *nfs_do_failover_mount(const struct vfsmount *mnt_parent, struct dentry *dentry)
+{
+	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+
+	/* BUG_ON(IS_ROOT(dentry)); */
+	dprintk("%s: enter\n", __FUNCTION__);
+
+	if (server->nfs4_locations == NULL ||
+	    server->nfs4_locations->nlocations <= 0)
+		goto out;
+
+	mnt = nfs_follow_referral(mnt_parent, dentry, server->nfs4_locations);
+
+out:
+	dprintk("%s: done\n", __FUNCTION__);
+	return mnt;
+}
+
+/*
+ * nfs_try_failover_inode - Update an inode after replica failover
+ * @inode - pointer to inode to failover
+ * @dentry - pointer to dentry
+ */
+int nfs_try_failover_inode(struct inode *inode, struct dentry *dentry)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct vfsmount *mnt;
+	struct vfsmount mnt_parent;
+#if 0
+	char *page;
+	char *path;
+#endif
+	int status = -EIO;
+
+	if (inode == NULL)
+		goto out;
+
+	if (dentry == NULL) {
+		status = -ENOENT;
+		dentry = d_find_alias(inode);
+		if (dentry == NULL)
+			goto out;
+	} else
+		dget(dentry);
+
+	mnt_parent.mnt_devname = server->mnt_path;
+	mnt = nfs_do_failover_mount(&mnt_parent, dentry);
+	if (IS_ERR(mnt)) {
+		status = PTR_ERR(mnt);
+		goto out_dput;
+	}
+
+#if 0
+	/* Compute dentry path to lookup on failover server */
+	page = (char *) __get_free_page(GFP_USER);
+	if (page == NULL)
+		goto out;
+
+	path = nfs_path(NULL, dentry, page, PAGE_SIZE);
+	if (IS_ERR(path))
+		goto out_free;
+
+ out_free:
+	free_page((unsigned long)page);
+#endif
+
+	/* Update inode */
+	inode->i_sb->s_fs_info = mnt->mnt_sb->s_fs_info;
+	status = 0;
+
+ out_dput:
+	dput(dentry);
+ out:
+	dprintk("%s: returned error %d\n", __FUNCTION__, status);
+	return status;
+}
+
 extern int nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
 extern int nfs_init_readpagecache(void);
@@ -2301,6 +3129,10 @@ static int __init init_nfs_fs(void)
 {
 	int err;
 
+	err = nfs_init_sysctl();
+	if (err)
+		goto out5;
+
 	err = nfs_init_nfspagecache();
 	if (err)
 		goto out4;
@@ -2348,6 +3180,8 @@ out2:
 out3:
 	nfs_destroy_nfspagecache();
 out4:
+	nfs_destroy_sysctl();
+out5:
 	return err;
 }
 
@@ -2363,6 +3197,7 @@ static void __exit exit_nfs_fs(void)
 #ifdef CONFIG_PROC_FS
 	rpc_proc_unregister("nfs");
 #endif
+	nfs_destroy_sysctl();
 	unregister_filesystem(&nfs_fs_type);
 	unregister_nfs4fs();
 }
diff -puN fs/nfs/Makefile~rnfs-all fs/nfs/Makefile
--- rnfs-linux-2.6.16-rc3/fs/nfs/Makefile~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/Makefile	2007-03-03 16:21:47.000000000 -0500
@@ -5,7 +5,8 @@
 obj-$(CONFIG_NFS_FS) += nfs.o
 
 nfs-y 			:= dir.o file.o inode.o nfs2xdr.o pagelist.o \
-			   proc.o read.o symlink.o unlink.o write.o
+			   proc.o read.o symlink.o unlink.o write.o \
+			   namespace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
diff -puN /dev/null fs/nfs/namespace.c
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/namespace.c	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,245 @@
+/*
+ * linux/fs/nfs/namespace.c
+ *
+ * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ *
+ * NFS namespace
+ */
+
+#include <linux/config.h>
+
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/nfs_fs.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/vfs.h>
+#include <linux/kthread.h>
+
+#define NFSDBG_FACILITY		NFSDBG_VFS
+
+static LIST_HEAD(nfs_automount_list);
+static void nfs_expire_automounts(void *list);
+static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list);
+int nfs_mountpoint_expiry_timeout = 30 * HZ;
+
+struct nfs_follow_data {
+	struct kref ref;
+	struct nameidata *nd;
+	struct nfs_fh *fh;
+	struct nfs_fattr *fattr;
+	struct completion done;
+};
+
+static struct nfs_follow_data *nfs_get_follow_data (struct nameidata *nd, struct nfs_fattr *fattr, struct nfs_fh *fh)
+{
+	struct nfs_follow_data *fdata;
+
+	printk("%s enter\n", __FUNCTION__);
+	fdata = kmalloc(sizeof(*fdata), GFP_KERNEL);
+        if (!fdata) {
+		fdata =ERR_PTR(-ENOMEM);
+                goto out;
+	}
+/*
+	fdata->nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+        if (!fdata) {
+		fdata =ERR_PTR(-ENOMEM);
+                goto out;
+	}
+	memcpy(fdata->nd, nd, sizeof(*nd));
+*/
+	fdata->nd = nd;
+	kref_init(&fdata->ref);
+	kref_get(&fdata->ref);
+	fdata->fh = fh;
+	fdata->fattr = fattr;
+	init_completion(&fdata->done);
+out:
+	return fdata;
+}
+
+static void nfs_free_follow_data(struct kref *kref)
+{
+	struct nfs_follow_data *fdata = container_of(kref, struct nfs_follow_data, ref);
+	printk("%s enter\n", __FUNCTION__);
+/*
+	kfree(fdata->nd);
+*/
+	kfree(fdata);
+}
+
+static inline void
+nfs_put_follow_data(struct nfs_follow_data *fdata)
+{
+	printk("%s enter\n", __FUNCTION__);
+	kref_put(&fdata->ref, nfs_free_follow_data);
+}
+
+static int nfs_do_follow_mountpoint(void *ptr)
+{
+	struct nfs_follow_data *fd = ptr;
+	struct nameidata *nd = fd->nd;
+	struct vfsmount *mnt;
+	int err = 0;
+
+	printk("%s enter\n", __FUNCTION__);
+
+	if (fd->fattr->valid & NFS_ATTR_MOVED) /* run by kthread */
+		mnt = nfs_do_refmount(nd->mnt, nd->dentry);
+	else
+		mnt = nfs_do_submount(nd->mnt, nd->dentry, fd->fh, fd->fattr);
+
+	if (IS_ERR(mnt)) {
+		err = PTR_ERR(mnt);
+		goto out_err;
+	}
+	mntget(mnt);
+
+	err = do_add_mount(mnt, nd, nd->mnt->mnt_flags, &nfs_automount_list);
+	if (err < 0) {
+		mntput(mnt);
+		if (err == -EBUSY)
+			goto out_follow;
+		goto out_err;
+	}
+	mntput(nd->mnt);
+	dput(nd->dentry);
+
+	if (fd->fattr->valid & NFS_ATTR_MOVED) {
+		/* drop references for kthread */
+		mntput(nd->mnt);
+		dput(nd->dentry);
+	}
+
+	nd->mnt = mnt;
+	nd->dentry = dget(mnt->mnt_root);
+	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+out:
+
+	if (fd->fattr->valid & NFS_ATTR_MOVED) /* run by kthread */
+		complete(&fd->done);
+
+/* ANDROS */
+	printk("%s: done, returned %d\n", __FUNCTION__, err);
+	return err;
+out_err:
+	path_release(nd);
+	goto out;
+out_follow:
+	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
+		;
+	err = 0;
+	goto out;
+}
+
+/*
+ * nfs_follow_mountpoint - handle crossing a mountpoint on the server
+ * @dentry - dentry of mountpoint
+ * @nd - nameidata info
+ *
+ * returns a void * NULL on success.
+ *
+ * When we encounter a mountpoint on the server, we want to set up
+ * a mountpoint on the client too, to prevent inode numbers from
+ * colliding, and to allow "df" to work properly.
+ *
+ * On NFSv4, we also want to allow for the fact that different
+ * filesystems may be migrated to different servers in a failover
+ * situation, and that different filesystems may want to use
+ * different security flavours.
+ */
+static void *nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
+{
+	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	struct dentry *parent;
+	struct nfs_fh fh;
+	struct nfs_fattr fattr;
+	struct nfs_follow_data *fdata;
+	struct task_struct *task;
+	void *err = NULL;
+
+	BUG_ON(IS_ROOT(dentry));
+	dprintk("%s: enter\n", __FUNCTION__);
+	dput(nd->dentry);
+	nd->dentry = dget(dentry);
+	if (d_mountpoint(nd->dentry))
+		goto out_follow;
+	/* Look it up again */
+	parent = dget_parent(nd->dentry);
+	err = ERR_PTR(server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr));
+	dput(parent);
+	if (IS_ERR(err))
+		goto out;
+
+	err = NULL;
+	if (fattr.valid & NFS_ATTR_MOVED) {
+		fdata = nfs_get_follow_data(nd, &fattr, NULL);
+		if (IS_ERR(fdata))
+			return fdata;
+
+		/* get references for kthread */
+		mntget(fdata->nd->mnt);
+		dget(fdata->nd->dentry);
+		task = kthread_run(nfs_do_follow_mountpoint, fdata,
+		                       "%s-fm", server->client->cl_server);
+        	if (IS_ERR(task)) {
+			printk("ANDROS %s: IS_ERR(task) %ld\n",__FUNCTION__,IS_ERR(task));
+			return task;
+		}
+		wait_for_completion(&fdata->done);
+	} else {
+		fdata = nfs_get_follow_data(nd, &fattr, &fh);
+		if (IS_ERR(fdata))
+			return fdata;
+		err = ERR_PTR(nfs_do_follow_mountpoint(fdata));
+	}
+	/* free fdata */
+	nfs_put_follow_data(fdata);
+out:
+	printk("ANDROS: %s returns err %p IS_ERR(err) %ld\n", __FUNCTION__, err, IS_ERR(err));
+	printk("             nd.dentry->d_name %.*s\n", nd->dentry->d_name.len,
+					          nd->dentry->d_name.name);
+	printk("             nd->flags 0x%x\n",nd->flags);
+	printk("             nd->dentry->d_inode %p\n",nd->dentry->d_inode);
+	printk("             nd->dentry->d_inode->i_op %p\n",nd->dentry->d_inode->i_op);
+	printk("             nd->dentry->d_inode->i_op->lookup %p\n",nd->dentry->d_inode->i_op->lookup);
+	return err;
+
+out_follow:
+	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
+		;
+	err = NULL;
+	goto out;
+}
+
+static int nfs_follow_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	int status;
+	status = nfs_getattr(mnt, dentry, stat);
+	dprintk("%s: follow_getattr for dentry %s ret %d\n", __FUNCTION__, dentry->d_name.name, status);
+	return 0; /* hack */
+}
+
+struct inode_operations nfs_mountpoint_inode_operations = {
+	.follow_link	= nfs_follow_mountpoint,
+	.getattr	= nfs_follow_getattr,
+};
+
+static void nfs_expire_automounts(void *data)
+{
+	struct list_head *list = (struct list_head *)data;
+
+	mark_mounts_for_expiry(list);
+	if (!list_empty(list))
+		schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+}
+
+void nfs_release_automount_timer(void)
+{
+	if (list_empty(&nfs_automount_list)) {
+		cancel_delayed_work(&nfs_automount_task);
+		flush_scheduled_work();
+	}
+}
diff -puN fs/nfs/nfs2xdr.c~rnfs-all fs/nfs/nfs2xdr.c
--- rnfs-linux-2.6.16-rc3/fs/nfs/nfs2xdr.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs2xdr.c	2007-03-03 16:21:47.000000000 -0500
@@ -131,7 +131,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt
 	fattr->du.nfs2.blocksize = ntohl(*p++);
 	rdev = ntohl(*p++);
 	fattr->du.nfs2.blocks = ntohl(*p++);
-	fattr->fsid_u.nfs3 = ntohl(*p++);
+	fattr->fsid.major = ntohl(*p++);
+	fattr->fsid.minor = 0;
 	fattr->fileid = ntohl(*p++);
 	p = xdr_decode_time(p, &fattr->atime);
 	p = xdr_decode_time(p, &fattr->mtime);
diff -puN fs/nfs/nfs3xdr.c~rnfs-all fs/nfs/nfs3xdr.c
--- rnfs-linux-2.6.16-rc3/fs/nfs/nfs3xdr.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs3xdr.c	2007-03-03 16:21:47.000000000 -0500
@@ -166,7 +166,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt
 	if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor)
 		fattr->rdev = 0;
 
-	p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3);
+	p = xdr_decode_hyper(p, &fattr->fsid.major);
+	fattr->fsid.minor = 0;
 	p = xdr_decode_hyper(p, &fattr->fileid);
 	p = xdr_decode_time3(p, &fattr->atime);
 	p = xdr_decode_time3(p, &fattr->mtime);
diff -puN fs/nfs/nfs4_fs.h~rnfs-all fs/nfs/nfs4_fs.h
--- rnfs-linux-2.6.16-rc3/fs/nfs/nfs4_fs.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs4_fs.h	2007-03-03 16:21:47.000000000 -0500
@@ -188,8 +188,21 @@ struct nfs4_state {
 	atomic_t count;
 };
 
+struct nfs4_location {
+	char *rootpath;
+	unsigned int nservers;
+	char **servers;
+};
+
+struct nfs4_locations {
+	char *fs_path;
+	int nlocations;
+	struct nfs4_location **locations;
+};
 
 struct nfs4_exception {
+	struct inode *inode;
+	struct qstr *name;
 	long timeout;
 	int retry;
 };
@@ -217,6 +230,14 @@ extern int nfs4_proc_renew(struct nfs4_c
 extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
+extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
+		struct nfs4_fs_locations *fs_locations, struct page *page);
+extern int nfs4_proc_get_replicas(struct nfs_server *, struct nfs_fh *,
+		struct nfs4_fs_locations *, struct page *);
+extern struct nfs4_locations *nfs4_copy_locations(struct nfs4_fs_locations *);
+extern void nfs4_free_locations(struct nfs4_locations *);
+extern int nfs4_recover_filehandle(struct inode *, struct dentry *);
 
 extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
 extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
@@ -225,6 +246,7 @@ extern const u32 nfs4_fattr_bitmap[2];
 extern const u32 nfs4_statfs_bitmap[2];
 extern const u32 nfs4_pathconf_bitmap[2];
 extern const u32 nfs4_fsinfo_bitmap[2];
+extern const u32 nfs4_fs_locations_bitmap[2];
 
 /* nfs4renewd.c */
 extern void nfs4_schedule_state_renewal(struct nfs4_client *);
diff -puN fs/nfs/nfs4proc.c~rnfs-all fs/nfs/nfs4proc.c
--- rnfs-linux-2.6.16-rc3/fs/nfs/nfs4proc.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs4proc.c	2007-03-03 16:21:47.000000000 -0500
@@ -68,6 +68,21 @@ static int nfs4_wait_clnt_recover(struct
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
+static inline int nfs4_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, const char *fn, int line)
+{
+	int rc;
+
+	dprintk("%s:%d, clnt %p\n", fn, line, clnt);
+	rc = rpc_call_sync(clnt, msg, flags);
+	/* If we get server timeout indicated by EIO error at the client,
+	   set the return code from the rpc call to ETIMEDOUT to distinguish
+	   it from other unrecognized errors */
+	if (rc == -EIO)
+		rc = -ETIMEDOUT;
+	return rc;
+}
+#define rpc_call_sync(clnt, msg, flags)        nfs4_rpc_wrapper(clnt, msg, flags, __FUNCTION__, __LINE__)
+
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
 {
@@ -121,6 +136,25 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATT
 			0
 };
 
+const u32 nfs4_fs_locations_bitmap[2] = {
+	FATTR4_WORD0_TYPE
+	| FATTR4_WORD0_CHANGE
+	| FATTR4_WORD0_SIZE
+	| FATTR4_WORD0_FSID
+	| FATTR4_WORD0_FILEID
+	| FATTR4_WORD0_FS_LOCATIONS,
+	FATTR4_WORD1_MODE
+	| FATTR4_WORD1_NUMLINKS
+	| FATTR4_WORD1_OWNER
+	| FATTR4_WORD1_OWNER_GROUP
+	| FATTR4_WORD1_RAWDEV
+	| FATTR4_WORD1_SPACE_USED
+	| FATTR4_WORD1_TIME_ACCESS
+	| FATTR4_WORD1_TIME_METADATA
+	| FATTR4_WORD1_TIME_MODIFY
+	| FATTR4_WORD1_MOUNTED_ON_FILEID
+};
+
 static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 		struct nfs4_readdir_arg *readdir)
 {
@@ -476,7 +510,7 @@ static int _nfs4_do_open_reclaim(struct 
 static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
 {
 	struct nfs_server *server = NFS_SERVER(state->inode);
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = state->inode };
 	int err;
 	do {
 		err = _nfs4_do_open_reclaim(sp, state, dentry);
@@ -521,7 +555,7 @@ static int _nfs4_open_delegation_recall(
 
 int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = dentry->d_inode };
 	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
 	int err;
 	do {
@@ -800,7 +834,14 @@ static int _nfs4_open_expired(struct nfs
 	opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
 	if (opendata == NULL)
 		return -ENOMEM;
-	ret = nfs4_open_recover(opendata, state);
+	/* If we are in a failover situation, recover path first */
+	ret = nfs_try_migrate_inode(opendata->dir->d_inode, opendata->dir);
+	if (ret == 0) {
+		ret = nfs_try_migrate_filehandle(inode, &opendata->o_res.fh,
+				opendata->o_res.f_attr, NFS_SERVER(inode)->generation);
+	}
+	if (ret == 0)
+		ret = nfs4_open_recover(opendata, state);
 	if (ret == -ESTALE) {
 		/* Invalidate the state owner so we don't ever use it again */
 		nfs4_drop_state_owner(sp);
@@ -978,7 +1019,7 @@ out_err:
 
 static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = dir, .name = &dentry->d_name };
 	struct nfs4_state *res;
 	int status;
 
@@ -1058,7 +1099,7 @@ static int nfs4_do_setattr(struct nfs_se
                 struct nfs_fh *fhandle, struct iattr *sattr,
                 struct nfs4_state *state)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = (state) ? state->inode : NULL };
 	int err;
 	do {
 		err = nfs4_handle_exception(server,
@@ -1322,7 +1363,7 @@ static int _nfs4_server_capabilities(str
 	return status;
 }
 
-static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
+int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
 	struct nfs4_exception exception = { };
 	int err;
@@ -1434,6 +1475,51 @@ out:
 	return status;
 }
 
+/*
+ * Get locations and (maybe) other attributes of a referral.
+ * Note that we'll actually follow the referral later when
+ * we detect fsid mismatch in inode revalidation
+ */
+static int nfs4_get_referral(struct inode *dir, struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle)
+{
+	int status = -ENOMEM;
+	struct page *page = NULL;
+	struct nfs4_fs_locations *locations = NULL;
+	struct dentry dentry = {};
+
+	page = alloc_page(GFP_KERNEL);
+	if (page == NULL)
+		goto out;
+	locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
+	if (locations == NULL)
+		goto out;
+
+	dentry.d_name.name = name->name;
+	dentry.d_name.len = name->len;
+	status = nfs4_proc_fs_locations(dir, &dentry, locations, page);
+	if (status != 0)
+		goto out;
+	/* Make sure server returned a different fsid for the referral */
+	if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
+		dprintk("%s: server did not return a different fsid for a referral at %s\n", __FUNCTION__, name->name);
+		status = -EIO;
+		goto out;
+	}
+
+	memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr));
+	fattr->valid |= NFS_ATTR_MOVED;
+	if (!fattr->mode)
+		fattr->mode = S_IFDIR;
+	memset(fhandle, 0, sizeof(struct nfs_fh));
+out:
+	if (page)
+		__free_page(page);
+	if (locations)
+		kfree(locations);
+	dprintk("nfs4_get_referral: name %s returned status %d (fattr %p, valid %d)\n", name->name, status, fattr, fattr->valid);
+	return status;
+}
+
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	struct nfs4_getattr_arg args = {
@@ -1539,13 +1625,17 @@ static int _nfs4_proc_lookup(struct inod
 	
 	dprintk("NFS call  lookup %s\n", name->name);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	if (status == -NFS4ERR_MOVED) {
+		status = nfs4_get_referral(dir, name, fattr, fhandle);
+		dprintk("lookup returned moved error for %s, getting referral info (fattr %p, valid %d)\n", name->name, fattr, fattr->valid);
+	}
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
 }
 
 static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = dir };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
@@ -1601,7 +1691,7 @@ static int _nfs4_proc_access(struct inod
 
 static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = inode };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(inode),
@@ -1656,7 +1746,7 @@ static int _nfs4_proc_readlink(struct in
 static int nfs4_proc_readlink(struct inode *inode, struct page *page,
 		unsigned int pgbase, unsigned int pglen)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = inode };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(inode),
@@ -1694,7 +1784,7 @@ static int _nfs4_proc_read(struct nfs_re
 
 static int nfs4_proc_read(struct nfs_read_data *rdata)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = rdata->inode };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(rdata->inode),
@@ -1736,7 +1826,7 @@ static int _nfs4_proc_write(struct nfs_w
 
 static int nfs4_proc_write(struct nfs_write_data *wdata)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = wdata->inode };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(wdata->inode),
@@ -1777,7 +1867,7 @@ static int _nfs4_proc_commit(struct nfs_
 
 static int nfs4_proc_commit(struct nfs_write_data *cdata)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = cdata->inode };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(cdata->inode),
@@ -1868,7 +1958,7 @@ static int _nfs4_proc_remove(struct inod
 
 static int nfs4_proc_remove(struct inode *dir, struct qstr *name)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = dir, .name = name };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
@@ -1962,7 +2052,7 @@ static int _nfs4_proc_rename(struct inod
 static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		struct inode *new_dir, struct qstr *new_name)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = old_dir, .name = old_name };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(old_dir),
@@ -2009,7 +2099,7 @@ static int _nfs4_proc_link(struct inode 
 
 static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = dir, .name = name };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(inode),
@@ -2063,7 +2153,7 @@ static int nfs4_proc_symlink(struct inod
 		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
 		struct nfs_fattr *fattr)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = dir, .name = name };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
@@ -2116,7 +2206,7 @@ static int _nfs4_proc_mkdir(struct inode
 static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 		struct iattr *sattr)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = dir, .name = &dentry->d_name };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
@@ -2164,7 +2254,7 @@ static int _nfs4_proc_readdir(struct den
 static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                   u64 cookie, struct page *page, unsigned int count, int plus)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = dentry->d_inode };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
@@ -2234,7 +2324,7 @@ static int _nfs4_proc_mknod(struct inode
 static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 		struct iattr *sattr, dev_t rdev)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = dir, .name = &dentry->d_name };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
@@ -2289,6 +2379,177 @@ static int _nfs4_do_fsinfo(struct nfs_se
 	return rpc_call_sync(server->client, &msg, 0);
 }
 
+static int nfs4_pathlen(struct nfs4_pathname *path)
+{
+	int i;
+	int len = 0;
+
+	if (path) {
+		for (i = 0; i < path->ncomponents; i++)
+			len += path->components[i].len + 1;
+	}
+	return len;
+}
+
+static char *nfs4_copy_path(struct nfs4_pathname *pathname)
+{
+	char *path, *p;
+	int pathlen = nfs4_pathlen(pathname) + 1;
+	int i;
+
+	p = path = kmalloc(pathlen, GFP_KERNEL);
+	if (path == NULL)
+		return NULL;
+
+	for (i = 0; i < pathname->ncomponents; i++) {
+		struct nfs4_string *comp = &pathname->components[i];
+		*p++ = '/';
+		strncpy(p, comp->data, comp->len);
+		p += comp->len;
+	}
+	*p = 0;
+	return path;
+}
+
+static char *nfs4_copy_server(struct nfs4_string *string)
+{
+	char *server;
+	int len = string->len + 1;
+
+	server = kmalloc(len, GFP_KERNEL);
+	if (server == NULL)
+		return NULL;
+
+	strncpy(server, string->data, string->len);
+        server[len-1] = 0;
+	return server;
+}
+
+static struct nfs4_location * nfs4_copy_location(struct nfs4_fs_location *locdata)
+{
+	struct nfs4_location *location;
+	int server;
+	int i;
+
+	location = kmalloc(sizeof(struct nfs4_locations), GFP_KERNEL);
+	if (location == NULL)
+		return NULL;
+
+	location->rootpath = nfs4_copy_path(&locdata->rootpath);
+	if (location->rootpath == NULL)
+		goto free_location;
+
+	location->nservers = locdata->nservers;
+	location->servers = kmalloc(location->nservers * sizeof(char *), GFP_KERNEL);
+	if (location->servers == NULL)
+		goto free_path;
+
+	for (server = 0; server < location->nservers; server++) {
+		location->servers[server] = nfs4_copy_server(&locdata->servers[server]);
+		if (location->servers[server] == NULL)
+			goto free_servers;
+	}
+
+	return location;
+
+ free_servers:
+	for (i = 0; i < server; i++)
+		kfree(location->servers[i]);
+ free_path:
+	kfree(location->rootpath);
+ free_location:
+	kfree(location);
+	return NULL;
+}
+
+static void nfs4_free_location(struct nfs4_location *location)
+{
+	int i;
+
+	if (location == NULL)
+		return;
+
+	kfree(location->rootpath);
+	for (i = 0; i < location->nservers; i++)
+		kfree(location->servers[i]);
+	kfree(location);
+	location = NULL;
+}
+
+void nfs4_free_locations(struct nfs4_locations *locations)
+{
+	int i;
+
+	if (locations == NULL)
+		return;
+
+	kfree(locations->fs_path);
+	for (i = 0; i < locations->nlocations; i++)
+		nfs4_free_location(locations->locations[i]);
+	kfree(locations);
+	locations = NULL;
+}
+
+struct nfs4_locations *nfs4_copy_locations(struct nfs4_fs_locations *locsdata)
+{
+	struct nfs4_locations *locations;
+	int location = 0;
+	int i;
+
+	if (locsdata->nlocations <= 0)
+		return NULL;
+
+	locations = kmalloc(sizeof(struct nfs4_locations), GFP_KERNEL);
+	if (locations == NULL)
+		return NULL;
+
+	locations->fs_path = nfs4_copy_path(&locsdata->fs_path);
+	if (locations->fs_path == NULL)
+		goto free_locations;
+
+	locations->nlocations = locsdata->nlocations;
+	locations->locations = kmalloc(sizeof(struct nfs4_location *) * locations->nlocations, GFP_KERNEL);
+	if (locations->locations == NULL)
+		goto free_fspath;
+	for (i = 0; i < locations->nlocations; i++) {
+		locations->locations[i] = nfs4_copy_location(&locsdata->locations[i]);
+		if (locations->locations[i] == NULL)
+			goto free_location;
+	}
+	return locations;
+
+ free_location:
+	for (i = 0; i < location; i++)
+		nfs4_free_location(locations->locations[i]);
+ free_fspath:
+	kfree(locations->fs_path);
+ free_locations:
+	kfree(locations);
+	return NULL;
+}
+
+void nfs4_print_locations(struct nfs4_locations *locations)
+{
+	int i, j;
+
+	if (locations == NULL)
+		return;
+
+	dprintk("%s: %s;", __FUNCTION__, locations->fs_path);
+	for (i = 0; i < locations->nlocations; i++) {
+		struct nfs4_location *location = locations->locations[i];
+		for (j = 0; j < location->nservers; j++) {
+			dprintk("%s", location->servers[j]);
+			if (j != (location->nservers-1))
+				dprintk(",");
+		}
+		dprintk(":%s", location->rootpath);
+		if (i != (locations->nlocations-1))
+			dprintk(";");
+	}
+	dprintk("\n");
+}
+
 static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
 {
 	struct nfs4_exception exception = { };
@@ -2299,6 +2560,34 @@ static int nfs4_do_fsinfo(struct nfs_ser
 				_nfs4_do_fsinfo(server, fhandle, fsinfo),
 				&exception);
 	} while (exception.retry);
+
+	if  ((err == 0) && (server->attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS)) {
+		struct page *page;
+		struct nfs4_fs_locations *locations;
+
+		err = -ENOMEM;
+		locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
+		if (locations == NULL)
+			goto out;
+
+		page = alloc_page(GFP_KERNEL);
+		if (page == NULL) {
+			kfree(locations);
+			goto out;
+		}
+
+		err = nfs4_proc_get_replicas(server, fhandle, locations, page);
+		if (err == 0) {
+			if (server->nfs4_locations)
+				nfs4_free_locations(server->nfs4_locations);
+			/* Copy locations, but ignore error */
+			server->nfs4_locations = nfs4_copy_locations(locations);
+			nfs4_print_locations(server->nfs4_locations);
+		}
+		__free_page(page);
+		kfree(locations);
+	}
+ out:
 	return err;
 }
 
@@ -2491,6 +2780,20 @@ nfs4_proc_commit_setup(struct nfs_write_
 	rpc_call_setup(task, &msg, 0);	
 }
 
+static void
+nfs4_fail_server(struct nfs4_client *clp)
+{
+	struct nfs_server *server;
+
+	list_for_each_entry(server, &clp->cl_superblocks, nfs4_siblings) {
+		if (server->nfs4_locations) {
+			nfs_handle_cb_pathdown(clp);
+			rpc_killall_tasks(server->client);
+			server->client->cl_dead = 1;
+		}
+	}
+}
+
 /*
  * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
  * standalone procedure for queueing an asynchronous RENEW.
@@ -2506,6 +2809,10 @@ static void nfs4_renew_done(struct rpc_t
 			case -NFS4ERR_EXPIRED:
 			case -NFS4ERR_CB_PATH_DOWN:
 				nfs4_schedule_state_recovery(clp);
+				break;
+			case -EIO:
+				nfs4_fail_server(clp);
+				break;
 		}
 		return;
 	}
@@ -2817,6 +3124,51 @@ static int nfs4_delay(struct rpc_clnt *c
 	return res;
 }
 
+int nfs4_recover_filehandle(struct inode *inode, struct dentry *dentry)
+{
+	int status = -EAGAIN;
+	struct nfs_fh fh;
+	struct nfs_fattr fattr;
+
+	if (!inode)
+		goto out;
+
+	status = -ENOENT;
+	if (dentry == NULL) {
+		dentry = d_find_alias(inode);
+		if (dentry == NULL) {
+			dprintk("%s: no dentry for inode %p\n", __FUNCTION__, inode);
+			goto out;
+		}
+	}
+	else
+		dget(dentry);
+
+	if (IS_ROOT(dentry)) {
+		struct nfs_fsinfo info = { .fattr = &fattr };
+		status = NFS_SERVER(inode)->rpc_ops->getroot(NFS_SERVER(inode), &fh, &info);
+	}
+	else {
+		struct dentry *parent = dget_parent(dentry);
+		status = _nfs4_proc_lookup(parent->d_inode, &dentry->d_name, &fh, &fattr);
+		if (status == -NFS4ERR_FHEXPIRED) {
+			status = nfs4_recover_filehandle(parent->d_inode, parent);
+			if (status == 0) {
+				status = _nfs4_proc_lookup(parent->d_inode, &dentry->d_name, &fh, &fattr);
+			}
+		}
+		dput(parent);
+	}
+	if (status == 0)
+		status = nfs_try_migrate_filehandle(inode, &fh, &fattr,
+					    NFS_SERVER(inode)->generation);
+	dput(dentry);
+
+ out:
+	dprintk("%s: inode %p, status %d\n", __FUNCTION__, inode, status);
+	return status;
+}
+
 /* This is the error handling routine for processes that are allowed
  * to sleep.
  */
@@ -2844,6 +3196,17 @@ int nfs4_handle_exception(const struct n
 				break;
 		case -NFS4ERR_OLD_STATEID:
 			exception->retry = 1;
+			break;
+		case -NFS4ERR_FHEXPIRED:
+			ret = nfs4_recover_filehandle(exception->inode, NULL);
+			if (ret == 0)
+				exception->retry = 1;
+			break;
+		case -ETIMEDOUT:
+			ret = nfs_try_failover_inode(exception->inode, NULL);
+			if (ret == 0)
+				exception->retry = 1;
+			break;
 	}
 	/* We failed to handle the error */
 	return nfs4_map_errors(ret);
@@ -3081,7 +3444,7 @@ out:
 
 static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = state->inode };
 	int err;
 
 	do {
@@ -3489,7 +3852,7 @@ out:
 
 static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs4_exception exception = { };
+	struct nfs4_exception exception = { .inode = state->inode };
 	int err;
 
 	do {
@@ -3598,6 +3961,73 @@ ssize_t nfs4_listxattr(struct dentry *de
 	return len;
 }
 
+int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
+		struct nfs4_fs_locations *fs_locations, struct page *page)
+{
+	struct nfs_server *server = NFS_SERVER(dir);
+	u32 bitmask[2] = {
+		[0] = server->attr_bitmask[0] | FATTR4_WORD0_FS_LOCATIONS,
+		[1] = server->attr_bitmask[1],
+	};
+	u32 locations_bitmask[2] = {
+		[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+		[1] = FATTR4_WORD1_MOUNTED_ON_FILEID,
+	};
+	struct nfs4_fs_locations_arg args = {
+		.dir_fh = NFS_FH(dir),
+		.name = &dentry->d_name,
+		.page = page,
+		.bitmask = bitmask,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
+		.rpc_argp = &args,
+		.rpc_resp = fs_locations,
+	};
+	int status;
+
+	dprintk("%s: start\n", __FUNCTION__);
+	fs_locations->fattr.valid = 0;
+	fs_locations->server = server;
+	fs_locations->nlocations = 0;
+	status = rpc_call_sync(server->client, &msg, 0);
+	if (status == -NFS4ERR_MOVED) {
+		args.bitmask = locations_bitmask;
+		msg.rpc_argp = &args;
+		status = rpc_call_sync(server->client, &msg, 0);
+	}
+	dprintk("%s: returned status = %d\n", __FUNCTION__, status);
+	return status;
+}
+
+int nfs4_proc_get_replicas(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs4_fs_locations *fs_locations, struct page *page)
+{
+	u32 bitmask[2] = {
+		[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+		[1] = 0,
+	};
+	struct nfs4_get_replicas_arg args = {
+		.dir_fh = fhandle,
+		.page = page,
+		.bitmask = bitmask,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GET_REPLICAS],
+		.rpc_argp = &args,
+		.rpc_resp = fs_locations,
+	};
+	int status;
+
+	dprintk("%s: start\n", __FUNCTION__);
+	fs_locations->fattr.valid = 0;
+	fs_locations->server = server;
+	fs_locations->nlocations = 0;
+ 	status = rpc_call_sync(server->client, &msg, 0);
+ 	dprintk("%s: returned status = %d\n", __FUNCTION__, status);
+ 	return status;
+}
+
 struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
 	.recover_open	= nfs4_open_reclaim,
 	.recover_lock	= nfs4_lock_reclaim,
diff -puN fs/nfs/nfs4state.c~rnfs-all fs/nfs/nfs4state.c
--- rnfs-linux-2.6.16-rc3/fs/nfs/nfs4state.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs4state.c	2007-03-03 16:21:47.000000000 -0500
@@ -63,6 +63,7 @@ void
 init_nfsv4_state(struct nfs_server *server)
 {
 	server->nfs4_state = NULL;
+	server->nfs4_locations = NULL;
 	INIT_LIST_HEAD(&server->nfs4_siblings);
 }
 
@@ -75,6 +76,10 @@ destroy_nfsv4_state(struct nfs_server *s
 		nfs4_put_client(server->nfs4_state);
 		server->nfs4_state = NULL;
 	}
+	if (server->nfs4_locations) {
+		nfs4_free_locations(server->nfs4_locations);
+		server->nfs4_locations = NULL;
+	}
 }
 
 /*
diff -puN fs/nfs/nfs4xdr.c~rnfs-all fs/nfs/nfs4xdr.c
--- rnfs-linux-2.6.16-rc3/fs/nfs/nfs4xdr.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfs/nfs4xdr.c	2007-03-03 16:21:47.000000000 -0500
@@ -411,6 +411,24 @@ static int nfs_stat_to_errno(int);
 #define NFS4_dec_setacl_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
 				op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
+#define NFS4_enc_fs_locations_sz \
+				(compound_encode_hdr_maxsz + \
+				 encode_putfh_maxsz + \
+				 encode_getattr_maxsz)
+#define NFS4_dec_fs_locations_sz \
+				(compound_decode_hdr_maxsz + \
+				 decode_putfh_maxsz + \
+				 op_decode_hdr_maxsz + \
+				 nfs4_fattr_bitmap_maxsz)
+#define NFS4_enc_get_replicas_sz \
+				(compound_encode_hdr_maxsz + \
+				 encode_putfh_maxsz + \
+				 encode_getattr_maxsz)
+#define NFS4_dec_get_replicas_sz \
+				(compound_decode_hdr_maxsz + \
+				 decode_putfh_maxsz + \
+				 op_decode_hdr_maxsz + \
+				 nfs4_fattr_bitmap_maxsz)
 
 static struct {
 	unsigned int	mode;
@@ -722,6 +740,13 @@ static int encode_fsinfo(struct xdr_stre
 			bitmask[1] & nfs4_fsinfo_bitmap[1]);
 }
 
+static int encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask)
+{
+	return encode_getattr_two(xdr,
+				  bitmask[0] & nfs4_fs_locations_bitmap[0],
+				  bitmask[1] & nfs4_fs_locations_bitmap[1]);
+}
+
 static int encode_getfh(struct xdr_stream *xdr)
 {
 	uint32_t *p;
@@ -2003,6 +2028,67 @@ out:
 }
 
 /*
+ * Encode FS_LOCATIONS request
+ */
+static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 3,
+	};
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	int replen;
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
+		goto out;
+	if ((status = encode_lookup(&xdr, args->name)) != 0)
+		goto out;
+	if ((status = encode_fs_locations(&xdr, args->bitmask)) != 0)
+		goto out;
+	/* set up reply
+	 *   toplevel_status + OP_PUTFH + status
+	 *   + OP_LOOKUP + status + OP_GETATTR + status = 7
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page,
+			0, PAGE_SIZE);
+out:
+	return status;
+}
+
+/*
+ * Encode GET_REPLICAS request
+ */
+static int nfs4_xdr_enc_get_replicas(struct rpc_rqst *req, uint32_t *p, struct nfs4_get_replicas_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	int replen;
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
+		goto out;
+	if ((status = encode_fs_locations(&xdr, args->bitmask)) != 0)
+		goto out;
+	/* set up reply
+	 *   toplevel_status + OP_PUTFH + status + OP_GETATTR + status = 5
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 5) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page,
+			0, PAGE_SIZE);
+out:
+	return status;
+}
+
+/*
  * START OF "GENERIC" DECODE ROUTINES.
  *   These may look a little ugly since they are imported from a "generic"
  * set of XDR encode/decode routines which are intended to be shared by
@@ -2036,7 +2122,7 @@ out:
 	} \
 } while (0)
 
-static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string)
+static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
 {
 	uint32_t *p;
 
@@ -2087,7 +2173,7 @@ static int decode_op_hdr(struct xdr_stre
 static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
 {
 	uint32_t *p;
-	uint32_t strlen;
+	unsigned int strlen;
 	char *str;
 
 	READ_BUF(12);
@@ -2217,7 +2303,7 @@ static int decode_attr_symlink_support(s
 	return 0;
 }
 
-static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fsid *fsid)
+static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
 {
 	uint32_t *p;
 
@@ -2285,6 +2371,22 @@ static int decode_attr_fileid(struct xdr
 	return 0;
 }
 
+static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
+{
+	uint32_t *p;
+
+	*fileid = 0;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) {
+		READ_BUF(8);
+		READ64(*fileid);
+		bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+	}
+	dprintk("%s: fileid=%Lu\n", __FUNCTION__, (unsigned long long)*fileid);
+	return 0;
+}
+
 static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
 {
 	uint32_t *p;
@@ -2336,6 +2438,116 @@ static int decode_attr_files_total(struc
 	return status;
 }
 
+static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
+{
+	int n;
+	uint32_t *p;
+	int status = 0;
+
+	READ_BUF(4);
+	READ32(n);
+	if (n < 0)
+		goto out_eio;
+	if (n == 0)
+		goto root_path;
+	dprintk("path ");
+	path->ncomponents = 0;
+	while (path->ncomponents < n) {
+		struct nfs4_string *component = &path->components[path->ncomponents];
+		status = decode_opaque_inline(xdr, &component->len, &component->data);
+		if (unlikely(status != 0))
+			goto out_eio;
+		if (path->ncomponents != n)
+			dprintk("/");
+		dprintk("%s", component->data);
+		if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS)
+			path->ncomponents++;
+		else {
+			dprintk("cannot parse %d components in path\n", n);
+			goto out_eio;
+		}
+	}
+out:
+	dprintk("\n");
+	return status;
+root_path:
+/* a root pathname is sent as a zero component4 */
+	path->ncomponents = 1;
+	path->components[0].len=0;
+	path->components[0].data=NULL;
+	dprintk("path /\n");
+	goto out;
+out_eio:
+	dprintk(" status %d", status);
+	status = -EIO;
+	goto out;
+}
+
+static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
+{
+	int n;
+	uint32_t *p;
+	int status = -EIO;
+
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_FS_LOCATIONS -1U)))
+		goto out;
+	status = 0;
+	if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS)))
+		goto out;
+	dprintk("%s: fsroot ", __FUNCTION__);
+	status = decode_pathname(xdr, &res->fs_path);
+	if (unlikely(status != 0))
+		goto out;
+	READ_BUF(4);
+	READ32(n);
+	if (n <= 0)
+		goto out_eio;
+	res->nlocations = 0;
+	while (res->nlocations < n) {
+		int m;
+		struct nfs4_fs_location *loc = &res->locations[res->nlocations];
+
+		READ_BUF(4);
+		READ32(m);
+		if (m <= 0)
+			goto out_eio;
+
+		loc->nservers = 0;
+		dprintk("%s: servers ", __FUNCTION__);
+		while (loc->nservers < m) {
+			struct nfs4_string *server = &loc->servers[loc->nservers];
+			status = decode_opaque_inline(xdr, &server->len, &server->data);
+			if (unlikely(status != 0))
+				goto out_eio;
+			dprintk("%s ", server->data);
+			if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS)
+				loc->nservers++;
+			else {
+				int i;
+				dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations);
+				for (i = loc->nservers; i < m; i++) {
+					int len;
+					char *data;
+					status = decode_opaque_inline(xdr, &len, &data);
+					if (unlikely(status != 0))
+						goto out_eio;
+				}
+			}
+		}
+		status = decode_pathname(xdr, &loc->rootpath);
+		if (unlikely(status != 0))
+			goto out_eio;
+		if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES)
+			res->nlocations++;
+	}
+out:
+	dprintk("%s: fs_locations done, error = %d\n", __FUNCTION__, status);
+	return status;
+out_eio:
+	status = -EIO;
+	goto out;
+}
+
 static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
 {
 	uint32_t *p;
@@ -2841,6 +3053,7 @@ static int decode_getfattr(struct xdr_st
 		 bitmap[2] = {0},
 		 type;
 	int status, fmode = 0;
+	uint64_t fileid;
 
 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
 		goto xdr_error;
@@ -2863,10 +3076,14 @@ static int decode_getfattr(struct xdr_st
 		goto xdr_error;
 	if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid_u.nfs4)) != 0)
+	if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0)
 		goto xdr_error;
 	if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0)
 		goto xdr_error;
+	if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
+						struct nfs4_fs_locations,
+						fattr))) != 0)
+		goto xdr_error;
 	if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0)
 		goto xdr_error;
 	fattr->mode |= fmode;
@@ -2886,6 +3103,10 @@ static int decode_getfattr(struct xdr_st
 		goto xdr_error;
 	if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
 		goto xdr_error;
+	if ((status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid)) != 0)
+		goto xdr_error;
+	if (fattr->fileid == 0 && fileid != 0)
+		fattr->fileid = fileid;
 	if ((status = verify_attr_len(xdr, savep, attrlen)) == 0)
 		fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
 xdr_error:
@@ -4211,6 +4432,50 @@ out:
 	return status;
 }
 
+/*
+ * FS_LOCATIONS request
+ */
+static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	if ((status = decode_lookup(&xdr)) != 0)
+		goto out;
+	xdr_enter_page(&xdr, PAGE_SIZE);
+	status = decode_getfattr(&xdr, &res->fattr, res->server);
+out:
+	return status;
+}
+
+/*
+ * GET_REPLICAS request
+ */
+static int nfs4_xdr_dec_get_replicas(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	xdr_enter_page(&xdr, PAGE_SIZE);
+	status = decode_getfattr(&xdr, &res->fattr, res->server);
+out:
+	return status;
+}
+
 uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus)
 {
 	uint32_t bitmap[2] = {0};
@@ -4382,6 +4647,8 @@ struct rpc_procinfo	nfs4_procedures[] = 
   PROC(DELEGRETURN,	enc_delegreturn, dec_delegreturn),
   PROC(GETACL,		enc_getacl,	dec_getacl),
   PROC(SETACL,		enc_setacl,	dec_setacl),
+  PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
+  PROC(GET_REPLICAS,	enc_get_replicas, dec_get_replicas),
 };
 
 struct rpc_version		nfs_version4 = {
diff -puN fs/super.c~rnfs-all fs/super.c
--- rnfs-linux-2.6.16-rc3/fs/super.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/super.c	2007-03-03 16:21:47.000000000 -0500
@@ -787,17 +787,13 @@ struct super_block *get_sb_single(struct
 EXPORT_SYMBOL(get_sb_single);
 
 struct vfsmount *
-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
+vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
 {
-	struct file_system_type *type = get_fs_type(fstype);
 	struct super_block *sb = ERR_PTR(-ENOMEM);
 	struct vfsmount *mnt;
 	int error;
 	char *secdata = NULL;
 
-	if (!type)
-		return ERR_PTR(-ENODEV);
-
 	mnt = alloc_vfsmnt(name);
 	if (!mnt)
 		goto out;
@@ -828,7 +824,6 @@ do_kern_mount(const char *fstype, int fl
 	mnt->mnt_parent = mnt;
 	up_write(&sb->s_umount);
 	free_secdata(secdata);
-	put_filesystem(type);
 	return mnt;
 out_sb:
 	up_write(&sb->s_umount);
@@ -839,10 +834,23 @@ out_free_secdata:
 out_mnt:
 	free_vfsmnt(mnt);
 out:
-	put_filesystem(type);
 	return (struct vfsmount *)sb;
 }
 
+EXPORT_SYMBOL_GPL(vfs_kern_mount);
+
+struct vfsmount *
+do_kern_mount(const char *fstype, int flags, const char *name, void *data)
+{
+	struct file_system_type *type = get_fs_type(fstype);
+	struct vfsmount *mnt;
+	if (!type)
+		return ERR_PTR(-ENODEV);
+	mnt = vfs_kern_mount(type, flags, name, data);
+	put_filesystem(type);
+	return mnt;
+}
+
 EXPORT_SYMBOL_GPL(do_kern_mount);
 
 struct vfsmount *kern_mount(struct file_system_type *type)
diff -puN include/linux/mount.h~rnfs-all include/linux/mount.h
--- rnfs-linux-2.6.16-rc3/include/linux/mount.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/mount.h	2007-03-03 16:21:47.000000000 -0500
@@ -73,6 +73,11 @@ extern struct vfsmount *alloc_vfsmnt(con
 extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
 				      const char *name, void *data);
 
+struct file_system_type;
+extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
+				      int flags, const char *name,
+				      void *data);
+
 struct nameidata;
 
 extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
diff -puN include/linux/nfs4.h~rnfs-all include/linux/nfs4.h
--- rnfs-linux-2.6.16-rc3/include/linux/nfs4.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfs4.h	2007-03-03 16:21:47.000000000 -0500
@@ -384,6 +384,8 @@ enum {
 	NFSPROC4_CLNT_DELEGRETURN,
 	NFSPROC4_CLNT_GETACL,
 	NFSPROC4_CLNT_SETACL,
+	NFSPROC4_CLNT_FS_LOCATIONS,
+	NFSPROC4_CLNT_GET_REPLICAS,
 };
 
 #endif
diff -puN include/linux/nfs_fs.h~rnfs-all include/linux/nfs_fs.h
--- rnfs-linux-2.6.16-rc3/include/linux/nfs_fs.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfs_fs.h	2007-03-03 16:21:47.000000000 -0500
@@ -16,8 +16,6 @@
 #include <linux/rwsem.h>
 #include <linux/wait.h>
 
-#include <linux/nfs_fs_sb.h>
-
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/auth.h>
 #include <linux/sunrpc/clnt.h>
@@ -27,6 +25,9 @@
 #include <linux/nfs3.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_xdr.h>
+
+#include <linux/nfs_fs_sb.h>
+
 #include <linux/rwsem.h>
 #include <linux/mempool.h>
 
@@ -317,6 +318,15 @@ extern void put_nfs_open_context(struct 
 extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
 extern void nfs_file_clear_open_context(struct file *filp);
+extern struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
+					const struct dentry *dentry,
+					struct nfs_fh *fh,
+					struct nfs_fattr *fattr);
+extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent,
+					struct dentry *dentry);
+extern int nfs_try_migrate_inode(struct inode *dir, struct dentry *parent);
+extern int nfs_try_migrate_filehandle(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr, uint32_t generation);
+extern int nfs_try_failover_inode(struct inode *inode, struct dentry *dentry);
 
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern u32 root_nfs_parse_addr(char *name); /*__init*/
@@ -403,6 +413,13 @@ extern void nfs_unregister_sysctl(void);
 #endif
 
 /*
+ * linux/fs/nfs/namespace.c
+ */
+extern struct inode_operations nfs_mountpoint_inode_operations;
+extern int nfs_mountpoint_expiry_timeout;
+extern void nfs_release_automount_timer(void);
+
+/*
  * linux/fs/nfs/unlink.c
  */
 extern int  nfs_async_unlink(struct dentry *);
diff -puN include/linux/nfs_fs_sb.h~rnfs-all include/linux/nfs_fs_sb.h
--- rnfs-linux-2.6.16-rc3/include/linux/nfs_fs_sb.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfs_fs_sb.h	2007-03-03 16:21:47.000000000 -0500
@@ -35,6 +35,8 @@ struct nfs_server {
 	char *			hostname;	/* remote hostname */
 	struct nfs_fh		fh;
 	struct sockaddr_in	addr;
+	struct nfs_fsid		fsid;
+	uint32_t		generation;
 	unsigned long		mount_time;	/* when this fs was mounted */
 #ifdef CONFIG_NFS_V4
 	/* Our own IP address, as a null-terminated string.
@@ -43,6 +45,8 @@ struct nfs_server {
 	char			ip_addr[16];
 	char *			mnt_path;
 	struct nfs4_client *	nfs4_state;	/* all NFSv4 state starts here */
+	struct nfs4_locations *
+				nfs4_locations;	/* List of FS locations */
 	struct list_head	nfs4_siblings;	/* List of other nfs_server structs
 						 * that share the same clientid
 						 */
diff -puN include/linux/nfs_page.h~rnfs-all include/linux/nfs_page.h
--- rnfs-linux-2.6.16-rc3/include/linux/nfs_page.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfs_page.h	2007-03-03 16:21:47.000000000 -0500
@@ -13,7 +13,6 @@
 #include <linux/list.h>
 #include <linux/pagemap.h>
 #include <linux/wait.h>
-#include <linux/nfs_fs_sb.h>
 #include <linux/sunrpc/auth.h>
 #include <linux/nfs_xdr.h>
 
diff -puN include/linux/nfs_xdr.h~rnfs-all include/linux/nfs_xdr.h
--- rnfs-linux-2.6.16-rc3/include/linux/nfs_xdr.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfs_xdr.h	2007-03-03 16:21:47.000000000 -0500
@@ -14,11 +14,19 @@
 #define NFS_DEF_FILE_IO_SIZE	(4096U)
 #define NFS_MIN_FILE_IO_SIZE	(1024U)
 
-struct nfs4_fsid {
-	__u64 major;
-	__u64 minor;
+struct nfs_fsid {
+	uint64_t		major;
+	uint64_t		minor;
 };
 
+/*
+ * Helper for checking equality between 2 fsids.
+ */
+static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid *b)
+{
+	return a->major == b->major && a->minor == b->minor;
+}
+
 struct nfs_fattr {
 	unsigned short		valid;		/* which fields are valid */
 	__u64			pre_size;	/* pre_op_attr.size	  */
@@ -40,10 +48,7 @@ struct nfs_fattr {
 		} nfs3;
 	} du;
 	dev_t			rdev;
-	union {
-		__u64		nfs3;		/* also nfs2 */
-		struct nfs4_fsid nfs4;
-	} fsid_u;
+	struct nfs_fsid		fsid;
 	__u64			fileid;
 	struct timespec		atime;
 	struct timespec		mtime;
@@ -59,6 +64,7 @@ struct nfs_fattr {
 #define NFS_ATTR_FATTR_V3	0x0004		/* NFSv3 attributes */
 #define NFS_ATTR_FATTR_V4	0x0008
 #define NFS_ATTR_PRE_CHANGE	0x0010
+#define NFS_ATTR_MOVED		0x0020
 
 /*
  * Info on the file system
@@ -675,6 +681,46 @@ struct nfs4_server_caps_res {
 	u32				has_symlinks;
 };
 
+struct nfs4_string {
+	unsigned int len;
+	char *data;
+};
+
+#define NFS4_PATHNAME_MAXCOMPONENTS 512
+struct nfs4_pathname {
+	unsigned int ncomponents;
+	struct nfs4_string components[NFS4_PATHNAME_MAXCOMPONENTS];
+};
+
+#define NFS4_FS_LOCATION_MAXSERVERS 10
+struct nfs4_fs_location {
+	unsigned int nservers;
+	struct nfs4_string servers[NFS4_FS_LOCATION_MAXSERVERS];
+	struct nfs4_pathname rootpath;
+};
+
+#define NFS4_FS_LOCATIONS_MAXENTRIES 10
+struct nfs4_fs_locations {
+	struct nfs_fattr fattr;
+	const struct nfs_server *server;
+	struct nfs4_pathname fs_path;
+	int nlocations;
+	struct nfs4_fs_location locations[NFS4_FS_LOCATIONS_MAXENTRIES];
+};
+
+struct nfs4_fs_locations_arg {
+	const struct nfs_fh *dir_fh;
+	const struct qstr *name;
+	struct page *page;
+	const u32 *bitmask;
+};
+
+struct nfs4_get_replicas_arg {
+	const struct nfs_fh *dir_fh;
+	struct page *page;
+	const u32 *bitmask;
+};
+
 #endif /* CONFIG_NFS_V4 */
 
 struct nfs_page;
diff -puN include/linux/sunrpc/xdr.h~rnfs-all include/linux/sunrpc/xdr.h
--- rnfs-linux-2.6.16-rc3/include/linux/sunrpc/xdr.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/sunrpc/xdr.h	2007-03-03 16:21:47.000000000 -0500
@@ -194,6 +194,7 @@ extern void xdr_write_pages(struct xdr_s
 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p);
 extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
+extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
 
 #endif /* __KERNEL__ */
 
diff -puN net/sunrpc/rpc_pipe.c~rnfs-all net/sunrpc/rpc_pipe.c
--- rnfs-linux-2.6.16-rc3/net/sunrpc/rpc_pipe.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/net/sunrpc/rpc_pipe.c	2007-03-03 16:21:47.000000000 -0500
@@ -627,6 +627,7 @@ rpc_lookup_negative(char *path, struct n
 	struct inode *dir;
 	int error;
 
+printk("ANDROS: %s LOOKUPPARENT\n",__FUNCTION__);
 	if ((error = rpc_lookup_parent(path, nd)) != 0)
 		return ERR_PTR(error);
 	dir = nd->dentry->d_inode;
@@ -655,6 +656,7 @@ rpc_mkdir(char *path, struct rpc_clnt *r
 	struct inode *dir;
 	int error;
 
+printk("ANDROS: %s for %s\n",__FUNCTION__, path);
 	dentry = rpc_lookup_negative(path, &nd);
 	if (IS_ERR(dentry))
 		return dentry;
@@ -689,6 +691,7 @@ rpc_rmdir(char *path)
 	struct inode *dir;
 	int error;
 
+printk("ANDROS: %s path %s LOOKUPPARENT\n",__FUNCTION__, path);
 	if ((error = rpc_lookup_parent(path, &nd)) != 0)
 		return error;
 	dir = nd.dentry->d_inode;
@@ -715,6 +718,7 @@ rpc_mkpipe(char *path, void *private, st
 	struct inode *dir, *inode;
 	struct rpc_inode *rpci;
 
+	printk("ANDROS: %s path %s\n", __FUNCTION__,path);
 	dentry = rpc_lookup_negative(path, &nd);
 	if (IS_ERR(dentry))
 		return dentry;
@@ -750,6 +754,7 @@ rpc_unlink(char *path)
 	struct inode *dir;
 	int error;
 
+printk("ANDROS: %s LOOKUPPARENT\n",__FUNCTION__);
 	if ((error = rpc_lookup_parent(path, &nd)) != 0)
 		return error;
 	dir = nd.dentry->d_inode;
diff -puN net/sunrpc/xdr.c~rnfs-all net/sunrpc/xdr.c
--- rnfs-linux-2.6.16-rc3/net/sunrpc/xdr.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/net/sunrpc/xdr.c	2007-03-03 16:21:47.000000000 -0500
@@ -568,8 +568,7 @@ EXPORT_SYMBOL(xdr_inline_decode);
  *
  * Moves data beyond the current pointer position from the XDR head[] buffer
  * into the page list. Any data that lies beyond current position + "len"
- * bytes is moved into the XDR tail[]. The current pointer is then
- * repositioned at the beginning of the XDR tail.
+ * bytes is moved into the XDR tail[].
  */
 void xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
 {
@@ -606,6 +605,31 @@ void xdr_read_pages(struct xdr_stream *x
 }
 EXPORT_SYMBOL(xdr_read_pages);
 
+/**
+ * xdr_enter_page - decode data from the XDR page
+ * @xdr: pointer to xdr_stream struct
+ * @len: number of bytes of page data
+ *
+ * Moves data beyond the current pointer position from the XDR head[] buffer
+ * into the page list. Any data that lies beyond current position + "len"
+ * bytes is moved into the XDR tail[]. The current pointer is then
+ * repositioned at the beginning of the first XDR page.
+ */
+void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
+{
+	char * kaddr = page_address(xdr->buf->pages[0]);
+	xdr_read_pages(xdr, len);
+	/*
+	 * Position current pointer at beginning of tail, and
+	 * set remaining message length.
+	 */
+	if (len > PAGE_CACHE_SIZE - xdr->buf->page_base)
+		len = PAGE_CACHE_SIZE - xdr->buf->page_base;
+	xdr->p = (uint32_t *)(kaddr + xdr->buf->page_base);
+	xdr->end = (uint32_t *)((char *)xdr->p + len);
+}
+EXPORT_SYMBOL(xdr_enter_page);
+
 static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0};
 
 void
diff -puN fs/nfsd/export.c~rnfs-all fs/nfsd/export.c
--- rnfs-linux-2.6.16-rc3/fs/nfsd/export.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/export.c	2007-03-03 16:21:47.000000000 -0500
@@ -33,6 +33,7 @@
 #include <linux/nfsd/nfsfh.h>
 #include <linux/nfsd/syscall.h>
 #include <linux/lockd/bind.h>
+#include <linux/inet.h>
 
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 #define NFSD_PARANOIA 1
@@ -342,15 +343,26 @@ static int check_export(struct inode *in
 
 }
 
+void fsloc_print(struct nfsd4_fs_locations *locs)
+{
+   int i;
+	struct nfsd4_fs_location *locations;
+
+   if( locs == NULL ) return;
+   locations = locs->locations;
+	printk("%s: locations_count %d, migrated %d\n", __FUNCTION__, locs->locations_count, locs->migrated);
+   for( i=0; i < locs->locations_count; i++ )
+      printk("%s:%s\n",locations[i].hosts, locations[i].path);
+}
+
 static int
 fsloc_parse(char **mesg, struct nfsd4_fs_locations **locs)
 {
 	int len;
 	char *buf;
-	struct nfsd4_fs_location *locarray = NULL;
 	int listsize, migrated, i, err;
 
-	locarray = NULL;
+	*locs = NULL;
 
 	/* listsize */
 	err = get_int(mesg, &listsize);
@@ -361,16 +373,19 @@ fsloc_parse(char **mesg, struct nfsd4_fs
 	if (listsize == 0)
 		return 0;
 
+	err = -ENOMEM;
 	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!buf)
-		return -ENOMEM;
-	locarray = kmalloc(listsize * sizeof(struct nfsd4_fs_location),
-		       GFP_KERNEL);
-	if (!locarray)
+		return err;
+	*locs = kzalloc(sizeof(struct nfsd4_fs_locations), GFP_KERNEL);
+	if (!*locs)
 		goto out_free_buf;
-	memset(locarray, 0, listsize * sizeof(struct nfsd4_fs_location));
-	atomic_set(&(*locs)->refcnt, 0);
-	(*locs)->locations = locarray;
+	atomic_set(&(*locs)->refcnt, 1); /* XXX needed? */
+
+	(*locs)->locations = kzalloc(listsize * sizeof(struct nfsd4_fs_location),
+				  GFP_KERNEL);
+	if (!(*locs)->locations)
+		goto out_free_all;
 	for (i=0; i < listsize; i++) {
 		(*locs)->locations_count++;
 		/* colon separated host list */
@@ -379,8 +394,8 @@ fsloc_parse(char **mesg, struct nfsd4_fs
 		if (len <= 0)
 			goto out_free_all;
 		err = -ENOMEM;
-		locarray[i].hosts = kstrdup(buf, GFP_KERNEL);
-		if (!locarray[i].hosts)
+		(*locs)->locations[i].hosts = kstrdup(buf, GFP_KERNEL);
+		if (!(*locs)->locations[i].hosts)
 			goto out_free_all;
 		err = -EINVAL;
 		/* slash separated path component list */
@@ -388,9 +403,14 @@ fsloc_parse(char **mesg, struct nfsd4_fs
 		if (len <= 0)
 			goto out_free_all;
 		err = -ENOMEM;
-		locarray[i].path = kstrdup(buf, GFP_KERNEL);
-		if (!locarray[i].path)
+		(*locs)->locations[i].path = kstrdup(buf, GFP_KERNEL);
+		if (!(*locs)->locations[i].path)
 			goto out_free_all;
+		(*locs)->locations[i].sin_addr.s_addr = in_aton((*locs)->locations[i].hosts);
+		(*locs)->locations[i].clnt = NULL;
+		(*locs)->locations[i].rnfs_wq = NULL;
+		(*locs)->locations[i].failed = 0;
+		(*locs)->locations[i].openseq = 0;
 	}
 	/* migrated */
 	err = get_int(mesg, &migrated);
@@ -398,6 +418,7 @@ fsloc_parse(char **mesg, struct nfsd4_fs
 		goto out_free_all;
 	(*locs)->migrated = migrated;
 	kfree(buf);
+	fsloc_print(*locs);
 	return 0;
 out_free_all:
 	nfsd4_fslocs_put(*locs);
@@ -484,7 +505,7 @@ static int svc_export_parse(struct cache
 
 #ifdef CONFIG_NFSD_V4 /* XXX: put following (and define) inside fsloc_parse? */
 		len = qword_get(&mesg, buf, PAGE_SIZE);
-		if (len == 5 && 0 == memcmp(buf, "fsloc", 5)) {
+		if (len == 5 && (memcmp(buf, "fsloc", 5) == 0)) {
 			err = fsloc_parse(&mesg, &exp.ex_fslocs);
 			if (err) goto out;
 		} else
@@ -562,7 +583,7 @@ static inline void svc_export_update(str
 	new->ex_anon_uid = item->ex_anon_uid;
 	new->ex_anon_gid = item->ex_anon_gid;
 	new->ex_fsid = item->ex_fsid;
-	new->ex_fslocs = nfsd4_fslocs_get(item->ex_fslocs);
+	new->ex_fslocs = rnfs_fslocs_get(item->ex_fslocs);
 }
 
 static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */
@@ -588,6 +609,22 @@ exp_find_key(svc_client *clp, int fsid_t
 	return ek;
 }
 
+struct svc_expkey *
+exp_find_key2(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *req, int *err)
+{
+   struct svc_expkey key, *ek;
+   if (!clp)
+      return NULL;
+   key.ek_client = clp;
+   key.ek_fsidtype = fsid_type;
+   memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
+   *err = -ENOENT;
+   ek = svc_expkey_lookup(&key, 0);
+   if (ek != NULL)
+      *err = cache_check(&svc_expkey_cache, &ek->h, req);
+   return ek;
+}
+
 static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 		       struct svc_export *exp)
 {
@@ -851,9 +888,6 @@ exp_export(struct nfsctl_export *nxp)
 	new.ex_anon_uid = nxp->ex_anon_uid;
 	new.ex_anon_gid = nxp->ex_anon_gid;
 	new.ex_fsid = nxp->ex_dev;
-#ifdef CONFIG_NFSD_V4 /* XXX: can we get rid of these ifdef's here? */
-	new.ex_fslocs = NULL;
-#endif
 
 	exp = svc_export_lookup(&new, 1);
 
diff -puN fs/nfsd/nfs4xdr.c~rnfs-all fs/nfsd/nfs4xdr.c
--- rnfs-linux-2.6.16-rc3/fs/nfsd/nfs4xdr.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4xdr.c	2007-03-03 16:21:47.000000000 -0500
@@ -1421,7 +1421,13 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
 		}
 	}
 
-	status = vfs_getattr(exp->ex_mnt, dentry, &stat);
+	status = rep_pregetattr(dentry->d_inode, &stat);
+	if (status > 0) {
+		status = 0;
+	} else if (status == 0) {
+		status = vfs_getattr(exp->ex_mnt, dentry, &stat);
+	}
+
 	if (status)
 		goto out_nfserr;
 
@@ -1487,7 +1493,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
 			goto out_resource;
 		if (!aclsupport)
 			word0 &= ~FATTR4_WORD0_ACL;
-		if(!(exp->ex_fslocs)) /* XXX: doesn't work in fs-managed case */
+		if ((!exp->ex_fslocs) &&
+				(!exp->ex_mnt->mnt_sb->s_export_op->get_fslocs))
 			word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
 		WRITE32(2);
 		WRITE32(word0);
diff -puN fs/nfsd/nfs4fsloc.c~rnfs-all fs/nfsd/nfs4fsloc.c
--- rnfs-linux-2.6.16-rc3/fs/nfsd/nfs4fsloc.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4fsloc.c	2007-03-03 16:21:47.000000000 -0500
@@ -70,7 +70,10 @@ static int get_fslocs_default(struct svc
 static int is_referral_default(struct svc_export *exp)
 {
 	if (exp->ex_fslocs)
-		return exp->ex_fslocs->migrated;
+		if ((exp->ex_fslocs->migrated >= 0) && (exp->ex_fslocs->migrated < exp->ex_fslocs->locations_count))
+			return 0;
+		else
+			return exp->ex_fslocs->migrated;
 	else
 		return 0;
 }
diff -puN include/linux/nfsd/export.h~rnfs-all include/linux/nfsd/export.h
--- rnfs-linux-2.6.16-rc3/include/linux/nfsd/export.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfsd/export.h	2007-03-03 16:21:47.000000000 -0500
@@ -91,10 +91,13 @@ void			exp_readunlock(void);
 struct svc_expkey *	exp_find_key(struct auth_domain *clp, 
 				     int fsid_type, u32 *fsidv,
 				     struct cache_req *reqp);
-struct svc_export *	exp_get_by_name(struct auth_domain *clp,
-					struct vfsmount *mnt,
-					struct dentry *dentry,
-					struct cache_req *reqp);
+struct svc_expkey *  exp_find_key2(struct auth_domain *clp,
+                 int fsid_type, u32 *fsidv,
+                 struct cache_req *reqp, int *err);
+struct svc_export *  exp_get_by_name(struct auth_domain *clp,
+               struct vfsmount *mnt,
+               struct dentry *dentry,
+               struct cache_req *reqp);
 struct svc_export *	exp_parent(struct auth_domain *clp,
 				   struct vfsmount *mnt,
 				   struct dentry *dentry,
diff -puN include/linux/nfsd/nfsd.h~rnfs-all include/linux/nfsd/nfsd.h
--- rnfs-linux-2.6.16-rc3/include/linux/nfsd/nfsd.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfsd/nfsd.h	2007-03-03 16:21:47.000000000 -0500
@@ -24,6 +24,7 @@
 #include <linux/nfsd/auth.h>
 #include <linux/nfsd/stats.h>
 #include <linux/nfsd/interface.h>
+#include <linux/sunrpc/clnt.h>
 /*
  * nfsd version
  */
@@ -73,6 +74,18 @@ int nfsd4_is_referral(struct svc_rqst *r
 struct nfsd4_fs_location {
 	char *hosts; /* colon separated list of hosts */
 	char *path;  /* slash separated list of path components */
+	struct in_addr		sin_addr;
+	struct rpc_clnt   *clnt;
+	struct rpc_clnt   **dataclnt;
+	unsigned char     failed;
+   /*
+    * for self, it is the seq of the last sent update;
+    * for others, it is the seq of the latest received update.
+    */
+	struct list_head     rnfs_list;
+	struct work_struct   rnfs_work;
+	struct workqueue_struct	*rnfs_wq;
+	unsigned int			openseq;
 };
 
 struct nfsd4_fs_locations {
@@ -83,6 +96,8 @@ struct nfsd4_fs_locations {
 	int migrated;
 };
 
+struct nfsd4_fs_locations *rnfs_fslocs_get(struct nfsd4_fs_locations *replist);
+
 static inline
 struct nfsd4_fs_locations *nfsd4_fslocs_get(struct nfsd4_fs_locations *item)
 {
@@ -100,6 +115,10 @@ nfsd4_fslocs_put(struct nfsd4_fs_locatio
 		for (i = 0; i < item->locations_count; i++) {
 			kfree(item->locations[i].path);
 			kfree(item->locations[i].hosts);
+			if (item->locations[i].clnt)
+				rpc_shutdown_client(item->locations[i].clnt);
+			if (item->locations[i].rnfs_wq)
+				destroy_workqueue(item->locations[i].rnfs_wq);
 		}
 		kfree(item->locations);
 		kfree(item);
diff -puN fs/nfsd/Makefile~rnfs-all fs/nfsd/Makefile
--- rnfs-linux-2.6.16-rc3/fs/nfsd/Makefile~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/Makefile	2007-03-03 16:21:47.000000000 -0500
@@ -10,5 +10,7 @@ nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
-			   nfs4acl.o nfs4callback.o nfs4recover.o nfs4fsloc.o
+			   nfs4acl.o nfs4callback.o nfs4recover.o nfs4fsloc.o \
+				nfs4reppipe.o nfs4replication.o nfs4repstate.o \
+				nfs4repclnt.o nfs4repd.o nfs4repxdr.o
 nfsd-objs		:= $(nfsd-y)
diff -puN include/linux/nfsd/debug.h~rnfs-all include/linux/nfsd/debug.h
--- rnfs-linux-2.6.16-rc3/include/linux/nfsd/debug.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/nfsd/debug.h	2007-03-03 16:21:47.000000000 -0500
@@ -32,6 +32,10 @@
 #define NFSDDBG_REPCACHE	0x0080
 #define NFSDDBG_XDR		0x0100
 #define NFSDDBG_LOCKD		0x0200
+#define NFSDDBG_REP			0x0400
+#define NFSDDBG_REPCLNT      0x0800
+#define NFSDDBG_REPD      0x1000
+#define NFSDDBG_REPSTATE    0x2000
 #define NFSDDBG_ALL		0x7FFF
 #define NFSDDBG_NOCHANGE	0xFFFF
 
diff -puN /dev/null fs/nfsd/nfs4reppipe.c
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4reppipe.c	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,193 @@
+/*
+ * fs/nfsd/nfs4reppipe.c
+ *
+ *  rpc_pipe interface
+ *
+ *  Jiaying Zhang <jiayingz@umich.edu>
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <linux/file.h>
+#include <linux/security.h>
+#include <linux/quotaops.h>
+#include <linux/dnotify.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/workqueue.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/export.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include<asm/uaccess.h>
+
+#include "nfs4replication.h"
+
+#define NFSDDBG_FACILITY   NFSDDBG_REPCLNT
+
+#define RNFS_STATUS_SUCCESS 0
+#define RNFS_STATUS_FAIL 1
+
+struct mreppipe {
+   char     mreppipe_path[16];
+   struct dentry  *mreppipe_dentry;
+	unsigned int status;
+   struct semaphore  mreppipe_lock;
+   wait_queue_head_t mreppipe_wq;
+};
+
+struct mreppipe *mreppipe = NULL;
+
+static ssize_t   mrep_pipe_upcall(struct file *, struct rpc_pipe_msg *,
+		     char __user *, size_t);
+static ssize_t   mrep_pipe_downcall(struct file *, const char __user *,
+		     size_t);
+void             mrep_pipe_destroy_msg(struct rpc_pipe_msg *);
+
+static struct rpc_pipe_ops mreppipe_upcall_ops = {
+        .upcall         = mrep_pipe_upcall,
+        .downcall       = mrep_pipe_downcall,
+        .destroy_msg    = mrep_pipe_destroy_msg,
+};
+
+/* create a rpc_pipe */
+void mreppipe_new(void)
+{
+	if( mreppipe ) return;
+
+   if ((mreppipe = kmalloc(sizeof(*mreppipe), GFP_KERNEL)) == NULL)
+      return;
+	memset(mreppipe, 0, sizeof(*mreppipe));
+
+	snprintf(mreppipe->mreppipe_path, sizeof(mreppipe->mreppipe_path), "nfs/mrep");
+
+   mreppipe->mreppipe_dentry = rpc_mkpipe(mreppipe->mreppipe_path, NULL, &mreppipe_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+   if (IS_ERR(mreppipe->mreppipe_dentry)) {
+		kfree(mreppipe);
+		return;
+	}
+
+   init_MUTEX(&mreppipe->mreppipe_lock);
+	init_waitqueue_head(&mreppipe->mreppipe_wq);
+}
+
+void mreppipe_delete(void)
+{
+
+	if (!mreppipe) return;
+
+	rpc_unlink(mreppipe->mreppipe_path);
+	kfree(mreppipe);
+	mreppipe = NULL;
+}
+
+int mreppipe_call(struct xdr_netobj *hostname, char *path, u32 pathlen, char *lookup_name, unsigned int type)
+{
+	struct rpc_pipe_msg msg;
+	DECLARE_WAITQUEUE(wq, current);
+	int ret = -EIO;
+	char *data, *ptr;
+	unsigned int datalen, len;
+
+	printk("mreppipe_call\n");
+	if (!mreppipe)
+		return -EINVAL;
+	datalen = hostname->len + pathlen + strlen(lookup_name) + sizeof(u32) * 4;
+	data = kmalloc(datalen, GFP_KERNEL);
+
+	memcpy(data, &type, sizeof(u32));
+	ptr = data + sizeof(u32);
+	memcpy(ptr, &hostname->len, sizeof(u32));
+	ptr += sizeof(u32);
+	memcpy(ptr, hostname->data, hostname->len);
+	ptr += hostname->len;
+	memcpy(ptr, &pathlen, sizeof(u32));
+	ptr += sizeof(u32);
+	memcpy(ptr, path, pathlen);
+	ptr += pathlen;
+	len = strlen(lookup_name);
+	memcpy(ptr, &len, sizeof(u32));
+	ptr += sizeof(u32);
+	memcpy(ptr, lookup_name, len);
+
+	down(&mreppipe->mreppipe_lock);
+	memset(&msg, 0, sizeof(msg));
+	msg.data = data;
+	msg.len = datalen;
+
+	add_wait_queue(&mreppipe->mreppipe_wq, &wq);
+	ret = rpc_queue_upcall(mreppipe->mreppipe_dentry->d_inode, &msg);
+	if (ret < 0) {
+		remove_wait_queue(&mreppipe->mreppipe_wq, &wq);
+		printk("mreppipe_fslocations: error in rpc_queue_upcall %d\n", ret);
+		goto out;
+	}
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	up(&mreppipe->mreppipe_lock);
+	schedule();
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&mreppipe->mreppipe_wq, &wq);
+	down(&mreppipe->mreppipe_lock);
+
+ out:
+	kfree(data);
+	up(&mreppipe->mreppipe_lock);
+	return (ret);
+}
+
+/* RPC pipefs upcall/downcall routines : same as idmap and gss*/
+static ssize_t
+mrep_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
+    char __user *dst, size_t buflen)
+{
+   char *data = (char *)msg->data + msg->copied;
+   ssize_t mlen = msg->len - msg->copied;
+   ssize_t left;
+
+   if (mlen > buflen)
+   	mlen = buflen;
+
+   left = copy_to_user(dst, data, mlen);
+	if (left < 0) {
+		msg->errno = left;
+		return left;
+	}
+	mlen -= left;
+	msg->copied += mlen;
+	msg->errno = 0;
+   return mlen;
+}
+
+static ssize_t
+mrep_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+{
+	unsigned int status;
+
+	if (mlen != sizeof(status))
+		return -ENOSPC;
+   if (copy_from_user(&status, src, mlen) != 0)
+		return -EFAULT;
+
+	down(&mreppipe->mreppipe_lock);
+	wake_up(&mreppipe->mreppipe_wq);
+	up(&mreppipe->mreppipe_lock);
+	return -status;
+}
+
+void
+mrep_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+	if (!mreppipe)
+		return;
+	if (msg->errno >= 0)
+		return;
+	down(&mreppipe->mreppipe_lock);
+	wake_up(&mreppipe->mreppipe_wq);
+	up(&mreppipe->mreppipe_lock);
+}
diff -puN include/linux/sunrpc/clnt.h~rnfs-all include/linux/sunrpc/clnt.h
--- rnfs-linux-2.6.16-rc3/include/linux/sunrpc/clnt.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/sunrpc/clnt.h	2007-03-03 16:21:47.000000000 -0500
@@ -127,6 +127,9 @@ int		rpc_register(u32, u32, int, unsigne
 
 void		rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
 
+int		rpc_async_send(struct rpc_clnt *clnt, struct rpc_message *msg,
+			       int flags, const struct rpc_call_ops *tk_ops,
+			       void *calldata);
 int		rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
 			       int flags, const struct rpc_call_ops *tk_ops,
 			       void *calldata);
diff -puN include/linux/sunrpc/xprt.h~rnfs-all include/linux/sunrpc/xprt.h
--- rnfs-linux-2.6.16-rc3/include/linux/sunrpc/xprt.h~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/sunrpc/xprt.h	2007-03-03 16:21:47.000000000 -0500
@@ -19,7 +19,7 @@ extern unsigned int xprt_udp_slot_table_
 extern unsigned int xprt_tcp_slot_table_entries;
 
 #define RPC_MIN_SLOT_TABLE	(2U)
-#define RPC_DEF_SLOT_TABLE	(16U)
+#define RPC_DEF_SLOT_TABLE	(128U)
 #define RPC_MAX_SLOT_TABLE	(128U)
 
 /*
diff -puN net/sunrpc/clnt.c~rnfs-all net/sunrpc/clnt.c
--- rnfs-linux-2.6.16-rc3/net/sunrpc/clnt.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/net/sunrpc/clnt.c	2007-03-03 16:21:47.000000000 -0500
@@ -505,6 +505,41 @@ out:
 	return status;
 }
 
+/* similar tp rpc_call_async except that we do not set task as ASYNC.
+ * instead, we set the flag in encode to guarantee pkts are sent in order */
+int
+rpc_async_send(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
+	       const struct rpc_call_ops *tk_ops, void *data)
+{
+	struct rpc_task	*task;
+	sigset_t	oldset;
+	int		status;
+
+	/* If this client is slain all further I/O fails */
+	if (clnt->cl_dead) 
+		return -EIO;
+
+	/* Create/initialize a new RPC task */
+	status = -ENOMEM;
+	if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
+		goto out;
+
+	/* Mask signals on GSS_AUTH upcalls */
+	rpc_task_sigmask(task, &oldset);		
+
+	rpc_call_setup(task, msg, 0);
+
+	/* Set up the call info struct and execute the task */
+	status = task->tk_status;
+	if (status == 0)
+		rpc_execute(task);
+	else
+		rpc_release_task(task);
+
+	rpc_restore_sigmask(&oldset);		
+out:
+	return status;
+}
 
 void
 rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
diff -puN /dev/null fs/nfsd/nfs4replication.c
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4replication.c	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,1474 @@
+#ifndef __RNFS_USERMODE__
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <linux/file.h>
+#include <linux/security.h>
+#include <linux/quotaops.h>
+#include <linux/dnotify.h>
+#include <linux/timer.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/export.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include <asm/uaccess.h>
+#else
+#include "rnfs.h"
+#include <linux/nfs4.h>
+#include <linux/nfs3.h>
+#include <sys/stat.h>
+#endif
+#include "nfs4repstate.h"
+#include "nfs4replication.h"
+#include "nfs4repclnt.h"
+#include "nfs4repd.h"
+#define NFSDDBG_FACILITY   NFSDDBG_REP
+
+static char *
+rep_get_path(struct dentry *dentry_in, struct dentry *parent_in)
+{
+   char *buf;
+   char *end, *retval, *pathname = NULL;
+   struct dentry *dentry = dentry_in, *parent;
+   int buflen = REPPATH_MAX, namelen;
+
+   if (!(buf = kmalloc(REPPATH_MAX, GFP_KERNEL)))
+      return NULL;
+   memset(buf, 0, REPPATH_MAX);
+
+   end = buf + REPPATH_MAX;
+   *--end = '\0';
+   buflen--;
+   retval = end - 1;
+	*retval = '/';
+   while((dentry != parent_in) && !IS_ROOT(dentry)) {
+      parent = dentry->d_parent;
+      prefetch(parent);
+      namelen = dentry->d_name.len;
+      buflen -= namelen + 1;
+      if (buflen < 0)
+         goto out;
+		end -= namelen;
+      memcpy(end, dentry->d_name.name, namelen);
+      *--end = '/';
+      retval = end;
+      dentry = parent;
+   }
+
+   if ((pathname = kmalloc(strlen(retval)+1, GFP_KERNEL)))
+      memcpy(pathname, retval, strlen(retval)+1);
+	//dprintk("rep_get_path %s\n", pathname);
+ out:
+   kfree(buf);
+   return pathname;
+}
+
+static int
+valid_primary(struct svc_rqst *rqstp, struct rnfs_rnode *rf) {
+   struct nfsd4_fs_location *primary = &rf->exp->ex_fslocs->locations[rf->primary];
+	//dprintk("valid_primary: rnode->primary %d, rq_addr %u, s_addr %u, failed %d\n", rf->primary, rqstp->rq_addr.sin_addr.s_addr, primary->sin_addr.s_addr, primary->failed);
+   return ((rqstp->rq_addr.sin_addr.s_addr == primary->sin_addr.s_addr) && !primary->failed);
+}
+
+static int
+find_primary(struct svc_rqst *rqstp, struct nfsd4_fs_locations *replist, unsigned int seq)
+{
+   int i;
+   for (i=0; i<replist->locations_count; i++) {
+		if (replist->locations[i].sin_addr.s_addr == rqstp->rq_addr.sin_addr.s_addr) {
+			if (seq > (replist->locations[i].openseq+1)) {
+				printk("find_primary: enter_open, seq %u, openseq %u\n", seq, replist->locations[i].openseq);
+				repd_enter_open(&rqstp->rq_chandle);
+				return -1;
+			}
+			return i;
+		}
+   }
+   return -1;
+}
+
+static int rnfs_path_scan(struct dentry *dentry_in, struct rnfs_pnode **pnodep, char **pathp, struct rnfs_rnode **rnodep, struct svc_export *exp)
+{
+   struct rnfs_pnode *pnode = NULL;
+   struct rnfs_rnode *rnode = NULL;
+   struct dentry *dentry, *parent, *opt_parent=NULL, *pnode_dentry=NULL;
+   struct nfsd4_fs_locations *replist = exp->ex_fslocs;
+   int err = 0;
+   char *pathname;
+   struct repclnt_request *rqt = NULL;
+   struct rnfs_parent *pparent = NULL;
+
+   dentry = dget(dentry_in);
+   do {
+      if ((pnode = find_pnode(dentry->d_inode))) {
+			pnode_dentry = dget(dentry);
+         break;
+		}
+      if ((rnode = find_rnode(dentry->d_inode)))
+         break;
+      if ((dentry == exp->ex_dentry) || IS_ROOT(dentry))
+         break;
+      if ((opt_parent == NULL) && (pparent = find_parent(dentry->d_inode)) && (pparent->count[replist->migrated] > 5) && !(pparent->bitmap & ~(1 << (replist->migrated+1))))
+         opt_parent = dget(dentry);
+      parent = dget_parent(dentry);
+      dput(dentry);
+      dentry = parent;
+   } while (1);
+   dput(dentry);
+
+   if (!pnode && !rnode && opt_parent) {
+      dprintk("lock %ld for optimization, count %u, bitmap %lu, migrate %d\n", opt_parent->d_inode->i_ino, pparent->count[replist->migrated], pparent->bitmap, replist->migrated);
+      pnode = alloc_pnode(opt_parent, exp);
+      pnode->pflags |= PNODE_WAIT_LOCK;
+      if ((pathname = rep_get_path(opt_parent, exp->ex_dentry)))
+         rqt = repclnt_rq_alloc(pnode, pathname, REP_OP_NONE, NULL);
+      rep_unlock();
+      if (rqt) {
+         err = repclnt_lock(pnode, rqt);
+         repclnt_rq_lockput(rqt);
+      } else
+         err = -ENOMEM;
+      pnode->pflags &= ~PNODE_WAIT_LOCK;
+      if ((pnode->pflags & PNODE_WAIT_WAITING))
+         wake_up_interruptible(&pnode->pwait);
+      rep_lock();
+		if (err == 0) {
+			pnode_dentry = opt_parent;
+			opt_parent = NULL;
+		}
+   }
+   if (opt_parent)
+      dput(opt_parent);
+	if (err == -EAGAIN)
+		err = 0;
+   if (err)
+      return err;
+
+   if (rnode)
+      *rnodep = rnode;
+   else if (pnode) {
+      if ((pnode->pflags & PNODE_WAITING)) {
+         pnode->pflags |= PNODE_WAIT_WAITING;
+         rnfs_wait_timeout(pnode->pwait, !(pnode->pflags & PNODE_WAITING), REPD_DFLT_TIMEO*HZ, &rep_sema);
+      }
+      if ((pnode->pflags & (PNODE_WAITING|PNODE_CLOSED))) {
+         rep_unlock();
+         pnode_put(pnode);
+         rep_lock();
+         return -EAGAIN;
+      }
+      rnfs_timer(pnode);
+      pathname = rep_get_path(dentry_in, pnode_dentry);
+		dput(pnode_dentry);
+      *pnodep = pnode;
+      *pathp = pathname;
+   } else {
+      //pathname = rep_get_path(dentry_in, exp->ex_dentry);
+      //*pathp = pathname;
+	}
+   return 0;
+}
+
+/* checking upon update requests from client */
+#define REP_HEAD(func, name) \
+   struct rnfs_pnode *pnode = NULL; \
+   struct rnfs_rnode *rnode = NULL; \
+   struct svc_export *exp = current_fh->fh_export; \
+   int err = 0; \
+   struct rep_##name *rep_##name; \
+	char *pathname; \
+	struct repclnt_request *rqt = NULL; \
+	int forward_count = 0; \
+	if (!exp->ex_fslocs) { \
+		REP_VFS_##func; \
+		return err; \
+	} \
+ again: \
+	pnode = NULL; \
+	rnode = NULL; \
+	pathname = NULL; \
+	rep_lock();
+
+/* forward when the locked obj is a file; otherwise forward the request */
+#define RNODE_CHECK(func, name) \
+   if (rnode) { \
+      rep_unlock(); \
+      if (valid_primary(rqstp, rnode)) { \
+         REP_VFS_##func; \
+		} else { \
+			forward_count++; \
+			err = repclnt_send_one(REPPROC_FORWARD, rnode, REP_OP_NONE, NULL, NULL); \
+			rnode_put(rnode); \
+			rnode = NULL; \
+			if (err && forward_count > 2) \
+				return -EACCES; \
+			goto again; \
+		} \
+		rnode_put(rnode); \
+		return err; \
+   }
+
+#define PNODE_CHECK(func, name) \
+   if (pnode) { \
+		if (REP_INIT_##func) \
+			rqt = repclnt_rq_alloc(pnode, pathname, REP_OP_##func, (void *) rep_##name); \
+		rep_unlock(); \
+      REP_VFS_##func; \
+		if (err) { \
+			dprintk("vfs operation err\n"); \
+		} \
+		if (rqt) { \
+      	repclnt_update(pnode, rqt); \
+			if (!((REP_OP_##func == REP_OP_SETATTR) && (((struct iattr *) rep_##name)->ia_valid & ATTR_MTIME))) \
+				current_fh->fh_dentry->d_inode->i_mtime = rqt->mtime; \
+			repclnt_rq_put(rqt); \
+		} \
+		pnode_put(pnode); \
+      return err; \
+   }
+
+#define PNODE_NEW(func, name) \
+      pnode = alloc_pnode(current_fh->fh_dentry, exp); \
+		pnode->pflags |= PNODE_WAIT_LOCK; \
+      if (err || conflict_parent(current_fh->fh_dentry->d_inode, exp->ex_fslocs->migrated)) \
+         pnode->pflags |= PNODE_SINGLE; \
+      if (REP_INIT_##func && (pathname = rep_get_path(current_fh->fh_dentry, exp->ex_dentry))) \
+			rqt = repclnt_rq_alloc(pnode, pathname, REP_OP_##func, (void *) rep_##name); \
+      rep_unlock(); \
+		if (rqt) { \
+         err = repclnt_lock(pnode, rqt); \
+         if (!err) { \
+            REP_VFS_##func; \
+				if (!((REP_OP_##func == REP_OP_SETATTR) && (((struct iattr *) rep_##name)->ia_valid & ATTR_MTIME))) \
+					current_fh->fh_dentry->d_inode->i_mtime = rqt->mtime; \
+			} \
+			repclnt_rq_lockput(rqt); \
+		} else { \
+			err = -ENOMEM; \
+		} \
+		pnode->pflags &= ~PNODE_WAIT_LOCK; \
+		if ((pnode->pflags & PNODE_WAIT_WAITING)) \
+			wake_up_interruptible(&pnode->pwait); \
+		pnode_put(pnode); \
+		if (err == -EAGAIN) \
+			goto again; \
+      return err;
+
+/* VFS: functions called from vfs */
+/* REP_CREATE */
+static struct rep_open *rep_open_init(struct svc_fh *current_fh);
+
+#define REP_VFS_CREATE_PNODE \
+	if (!err && pnode && S_ISREG(resfhp->fh_dentry->d_inode->i_mode)) { \
+		struct rep_open *rep_open; \
+		struct dentry *pdentry; \
+		char *pathname3; \
+		struct repclnt_request *rqt3 = NULL; \
+		rep_lock(); \
+		if ((rep_open = rep_open_init(resfhp))) { \
+			rep_open->pnode->pflags |= PNODE_COMPLETE; \
+			pdentry = d_find_alias(pnode->fi_inode); \
+      	pathname3 = rep_get_path(resfhp->fh_dentry, pdentry); \
+			dput(pdentry); \
+			rqt3 = repclnt_rq_alloc(pnode, pathname3, REP_OP_OPEN, rep_open); \
+			dprintk("vfs_create_pnode: seq %d, pathname %s\n", rqt3->seq, pathname3); \
+		} \
+		rep_unlock(); \
+		if (rqt3) { \
+			err = repclnt_update(pnode, rqt3); \
+			repclnt_rq_put(rqt3); \
+		} \
+	}
+
+#define REP_VFS_CREATE \
+	err = rep_vfs_create(rqstp, current_fh, fname, flen, iap, type, rdev, dirp, resfhp); \
+	REP_VFS_CREATE_PNODE;
+
+#define REP_VFS_CREATE_V3 \
+	err = rep_vfs_create_v3(rqstp, current_fh, fname, flen, iap, resfhp, type, verifier, truncp, dirp); \
+	REP_VFS_CREATE_PNODE;
+
+static struct rep_create *
+rep_create_init(char *fname, int flen, struct iattr *iap, int type, dev_t rdev, struct inode *dirp)
+{
+   struct rep_create *createp;
+	struct timespec now;
+   if (!(createp = kmalloc(sizeof(struct rep_create), GFP_KERNEL)))
+      return NULL;
+   createp->cr_namelen = flen;
+   if (!(createp->cr_name = kmalloc(flen, GFP_KERNEL))) {
+      kfree(createp);
+      return NULL;
+   }
+   memcpy(createp->cr_name, fname, flen);
+   createp->cr_type = type;
+   switch (type) {
+      case S_IFSOCK:
+         createp->u.dev.specdata1 = MAJOR(rdev);
+         createp->u.dev.specdata2 = MINOR(rdev);
+         break;
+      case NFS3_CREATE_UNCHECKED:
+      case NFS3_CREATE_GUARDED:
+      case NFS3_CREATE_EXCLUSIVE:
+         createp->u.verf = rdev;
+         break;
+      default:
+         break;
+   }
+	now = current_fs_time(dirp->i_sb);
+	if (!(iap->ia_valid & ATTR_ATIME))
+		iap->ia_atime = now;
+	if (!(iap->ia_valid & ATTR_MTIME))
+		iap->ia_mtime = now;
+	if (!(iap->ia_valid & ATTR_CTIME))
+		iap->ia_ctime = now;
+	iap->ia_valid |= (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME);
+   memcpy(&createp->cr_attrs, iap, sizeof(struct iattr));
+   return createp;
+}
+
+#define REP_INIT_CREATE \
+	(rep_create = rep_create_init(fname, flen, iap, type, rdev, dirp))
+
+#define REP_INIT_CREATE_V3	REP_INIT_CREATE
+
+int rep_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, char *fname, int flen, struct iattr *iap, int type, dev_t rdev, struct inode *dirp, struct svc_fh *resfhp)
+{
+   REP_HEAD(CREATE, create);
+   dprintk("rep_create\n");
+	if ((err = rnfs_path_scan(current_fh->fh_dentry, &pnode, &pathname, &rnode, current_fh->fh_export))) {
+		rep_unlock();
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+	}
+   RNODE_CHECK(CREATE, create);
+   PNODE_CHECK(CREATE, create);
+	//PNODE_CREATE_NEW(CREATE, create);
+	PNODE_NEW(CREATE, create);
+}
+
+int rep_create_v3(struct svc_rqst *rqstp, struct svc_fh *current_fh, char *fname, int flen, struct iattr *iap, int type, u32 *verifier, int *truncp, struct inode *dirp, struct svc_fh *resfhp)
+{
+	dev_t rdev = verifier ? (*verifier) : 0;
+   REP_HEAD(CREATE_V3, create);
+   dprintk("rep_create_v3: %s\n", fname);
+   if ((err = rnfs_path_scan(current_fh->fh_dentry, &pnode, &pathname, &rnode, current_fh->fh_export))) {
+      rep_unlock();
+      if (err == -EAGAIN)
+         goto again;
+      return err;
+   }
+   RNODE_CHECK(CREATE_V3, create);
+   PNODE_CHECK(CREATE_V3, create);
+   //PNODE_CREATE_NEW(CREATE_V3, create);
+	PNODE_NEW(CREATE_V3, create);
+}
+
+/* REP_REMOVE */
+#define REP_VFS_REMOVE \
+	err = rep_vfs_remove(pnode, current_fh, type, fname, flen);
+
+static struct rep_remove *
+rep_remove_init(int type, char *fname, int flen)
+{
+   struct rep_remove *removep;
+   if (!(removep = kmalloc(sizeof(struct rep_remove), GFP_KERNEL)))
+      return NULL;
+   removep->rm_type = type;
+   removep->rm_namelen = flen;
+   if (!(removep->rm_name = kmalloc(flen, GFP_KERNEL))) {
+      kfree(removep);
+      return NULL;
+   }
+   memcpy(removep->rm_name, fname, flen);
+   return removep;
+}
+
+#define REP_INIT_REMOVE \
+	(rep_remove = rep_remove_init(type, fname, flen))
+
+int rep_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, int type, char *fname, int flen)
+{
+   REP_HEAD(REMOVE, remove);
+   dprintk("rep_remove: %s\n", fname);
+	if ((err = rnfs_path_scan(current_fh->fh_dentry, &pnode, &pathname, &rnode, current_fh->fh_export))) {
+		rep_unlock();
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+	}
+   RNODE_CHECK(REMOVE, remove);
+   PNODE_CHECK(REMOVE, remove);
+   PNODE_NEW(REMOVE, remove);
+}
+
+/* REP_SYMLINK */
+#define REP_VFS_SYMLINK \
+   err = rep_vfs_symlink(current_fh, fname, flen, path, plen, resfhp, iap);
+
+static struct rep_create *
+rep_symlink_init(char *fname, int flen, char *linkname, int linklen, struct iattr *iap) {
+   struct rep_create *createp;
+   if (!(createp = kmalloc(sizeof(struct rep_create), GFP_KERNEL)))
+      return NULL;
+   createp->cr_namelen = flen;
+   if (!(createp->cr_name = kmalloc(flen, GFP_KERNEL))) {
+      kfree(createp);
+      return NULL;
+   }
+   memcpy(createp->cr_name, fname, flen);
+   createp->cr_type = S_IFLNK;
+   createp->u.link.namelen = linklen;
+   if (!(createp->u.link.name = kmalloc(linklen, GFP_KERNEL))) {
+      kfree(createp->cr_name);
+      kfree(createp);
+      return NULL;
+   }
+   memcpy(createp->u.link.name, linkname, linklen);
+   memcpy(&createp->cr_attrs, iap, sizeof(struct iattr));
+   return createp;
+}
+
+#define REP_INIT_SYMLINK \
+	(rep_create = rep_symlink_init(fname, flen, path, plen, iap))
+
+int rep_symlink(struct svc_rqst *rqstp, struct svc_fh *current_fh, char *fname, int flen, char *path, int plen, struct svc_fh *resfhp, struct iattr *iap)
+{
+   REP_HEAD(SYMLINK, create);
+   dprintk("rep_symlink\n");
+	if ((err = rnfs_path_scan(current_fh->fh_dentry, &pnode, &pathname, &rnode, current_fh->fh_export))) {
+		rep_unlock();
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+	}
+   RNODE_CHECK(SYMLINK, create);
+   PNODE_CHECK(SYMLINK, create);
+   PNODE_NEW(SYMLINK, create);
+}
+
+/* nfsd_setattr needs nfserr return */
+/* REP_SETATTR */
+#define REP_VFS_SETATTR \
+   err = rep_vfs_setattr(current_fh, iap);
+
+static struct iattr *rep_setattr_init(struct iattr *iap)
+{
+	struct iattr *rep_setattr;
+	if (!(rep_setattr = kmalloc(sizeof(struct iattr), GFP_KERNEL)))
+		return NULL;
+	memcpy(rep_setattr, iap, sizeof(struct iattr));
+	return rep_setattr;
+}
+
+#define REP_INIT_SETATTR \
+	(rep_setattr = rep_setattr_init(iap))
+
+int rep_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct iattr *iap)
+{
+   struct rnfs_pnode *pnode = NULL;
+   struct rnfs_rnode *rnode = NULL;
+   struct svc_export *exp = current_fh->fh_export;
+   int err = 0;
+   struct iattr *rep_setattr = NULL;
+	char *pathname;
+	struct repclnt_request *rqt = NULL;
+	int forward_count = 0;
+	if (!exp->ex_fslocs) {
+		REP_VFS_SETATTR;
+		return err;
+	}
+ again:
+	rep_lock();
+
+   dprintk("rep_setattr\n");
+	if ((err = rnfs_path_scan(current_fh->fh_dentry, &pnode, &pathname, &rnode, current_fh->fh_export))) {
+		rep_unlock();
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+	}
+   RNODE_CHECK(SETATTR, setattr);
+   PNODE_CHECK(SETATTR, setattr);
+   PNODE_NEW(SETATTR, setattr);
+}
+
+/* REP_RENAME */
+#define REP_VFS_LNK \
+	err = rep_vfs_link(rqstp, fhp2, fname, flen, tfhp);
+
+#define REP_VFS_RENAME \
+	err = rep_vfs_rename(current_fh, fname, flen, fhp2, tname, tlen);
+
+static struct rep_rename *
+rep_rename_init(char *fname, int flen, char *tname, int tlen)
+{
+   struct rep_rename *renamep;
+   if (!(renamep = kmalloc(sizeof(struct rep_rename), GFP_KERNEL)))
+      return NULL;
+   renamep->snamelen = flen;
+   if (!(renamep->sname = kmalloc(flen, GFP_KERNEL))) {
+      kfree(renamep);
+      return NULL;
+   }
+   memcpy(renamep->sname, fname, flen);
+   renamep->tnamelen = tlen;
+   if (!(renamep->tname = kmalloc(tlen, GFP_KERNEL))) {
+      kfree(renamep->sname);
+      kfree(renamep);
+      return NULL;
+   }
+   memcpy(renamep->tname, tname, tlen);
+	dprintk("rep_rename_init: sname %s, slen %d, tname %s, tlen %d\n", renamep->sname, renamep->snamelen, renamep->tname, renamep->tnamelen);
+   renamep->rqt = NULL;
+	renamep->tpathname = NULL;
+   return renamep;
+}
+
+#define REP_INIT_RENAME \
+	(rep_rename = rep_rename_init(fname, flen, tname, tlen))
+
+#define REP_INIT_LNK \
+	REP_INIT_RENAME
+
+static int rep_path_close(struct svc_fh *fhp)
+{
+   struct dentry *dentry = fhp->fh_dentry;
+   struct dentry *parent;
+	struct rnfs_pnode *pnode;
+   int status = 0;
+
+   dget(dentry);
+	do {
+		if ((pnode = find_pnode(dentry->d_inode))) {
+			rep_unlock();
+			if (!rep_wait_close(pnode)) {
+				status = -EPERM;
+         	break;
+			}
+      	wait_event_interruptible_timeout(pnode->pwait, ((pnode->pflags & PNODE_CLOSED)), REPD_DFLT_TIMEO*HZ);
+			pnode_put(pnode);
+			rep_lock();
+		}
+      parent = dget_parent(dentry);
+      dput(dentry);
+		dentry = parent;
+   } while ((dentry != fhp->fh_export->ex_dentry) && !IS_ROOT(dentry));
+   dput(dentry);
+   return status;
+}
+
+#define REP_VFS_OPEN2 \
+	if (opnum == REP_OP_RENAME) { \
+		REP_VFS_RENAME; \
+	} else { \
+		REP_VFS_LNK; \
+	}
+
+static struct dentry *get_com_parent(struct dentry *d1, struct dentry *d2, struct svc_export *exp)
+{
+	struct dentry *dentry, *parent;
+	dentry = dget(d1);
+	do {
+		if (dentry == d2) {
+			dput(dentry);
+			return d2;
+		}
+		parent = dget_parent(dentry);
+		dput(dentry);
+		dentry = parent;
+	} while ((dentry != exp->ex_dentry) && !IS_ROOT(dentry));
+	dput(dentry);
+
+	dentry = dget(d2);
+	do {
+		if (dentry == d1) {
+			dput(dentry);
+			return d1;
+		}
+		parent = dget_parent(dentry);
+		dput(dentry);
+		dentry = parent;
+	} while ((dentry != exp->ex_dentry) && !IS_ROOT(dentry));
+	dput(dentry);
+	if (d1 == exp->ex_dentry)
+		return d1;
+	if (d2 == exp->ex_dentry)
+		return d2;
+	return NULL;
+}
+
+#define OPEN2_RNODE(rnode, rnode2, pnode2, pathname2) \
+{ \
+	forward_count++; \
+	err = repclnt_send_one(REPPROC_FORWARD, rnode, REP_OP_NONE, NULL, NULL); \
+  rnode_put(rnode); \
+  if (rnode2) rnode_put(rnode2); \
+  if (pnode2) pnode_put(pnode2); \
+  if (pathname2) kfree(pathname2); \
+	rnode = rnode2 = NULL; \
+	pnode2 = NULL; \
+	pathname2 = NULL; \
+	if (err && forward_count > 2) \
+		return -EACCES; \
+  goto again; \
+}
+
+static int rnfs_path_scan_nopt(struct dentry *dentry_in, struct rnfs_pnode **pnodep, char **pathp, struct rnfs_rnode **rnodep, struct svc_export *exp)
+{
+   struct rnfs_pnode *pnode = NULL;
+   struct rnfs_rnode *rnode = NULL;
+   struct dentry *dentry, *parent, *pnode_dentry=NULL;
+   //struct nfsd4_fs_locations *replist = exp->ex_fslocs;
+   //int err = 0;
+   char *pathname;
+
+   dentry = dget(dentry_in);
+   do {
+      if ((pnode = find_pnode(dentry->d_inode))) {
+			pnode_dentry = dget(dentry);
+         break;
+		}
+      if ((rnode = find_rnode(dentry->d_inode)))
+         break;
+      if ((dentry == exp->ex_dentry) || IS_ROOT(dentry))
+         break;
+      parent = dget_parent(dentry);
+      dput(dentry);
+      dentry = parent;
+   } while (1);
+   dput(dentry);
+
+   if (rnode)
+      *rnodep = rnode;
+   else if (pnode) {
+      if ((pnode->pflags & PNODE_WAITING)) {
+         pnode->pflags |= PNODE_WAIT_WAITING;
+         rnfs_wait_timeout(pnode->pwait, !(pnode->pflags & PNODE_WAITING), REPD_DFLT_TIMEO*HZ, &rep_sema);
+      }
+      if ((pnode->pflags & (PNODE_WAITING|PNODE_CLOSED))) {
+         rep_unlock();
+         pnode_put(pnode);
+         rep_lock();
+         return -EAGAIN;
+      }
+      rnfs_timer(pnode);
+      pathname = rep_get_path(dentry_in, pnode_dentry);
+		dput(pnode_dentry);
+      *pnodep = pnode;
+      *pathp = pathname;
+	}
+   return 0;
+}
+
+static int rep_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, char *fname, int flen, struct svc_fh *fhp2, char *tname, int tlen, unsigned int opnum, struct svc_fh *tfhp)
+{
+  	struct rnfs_pnode *pnode = NULL, *pnode2 = NULL;
+  	struct rnfs_rnode *rnode = NULL, *rnode2 = NULL;
+  	struct svc_export *exp = current_fh->fh_export;
+  	int err = 0, err2 = 0;
+  	struct rep_rename *rep_rename;
+	char *pathname=NULL, *pathname2=NULL;
+	struct repclnt_request *rqt = NULL;
+	struct dentry *com_parent;
+	int forward_count=0;
+
+	if (!exp->ex_fslocs) {
+		REP_VFS_OPEN2;
+		return err;
+	}
+ again:
+	pnode = pnode2 = NULL;
+	rnode = rnode2 = NULL;
+	pathname = pathname2 = NULL;
+	rep_lock();
+	if ((err = rnfs_path_scan_nopt(current_fh->fh_dentry, &pnode, &pathname, &rnode, exp))) {
+		rep_unlock();
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+	}
+	if ((err = rnfs_path_scan_nopt(fhp2->fh_dentry, &pnode2, &pathname2, &rnode2, exp))) {
+		rep_unlock();
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+	}
+	if (rnode || rnode2) {
+		rep_unlock();
+		if (rnode2)
+			OPEN2_RNODE(rnode2, rnode, pnode, pathname);
+		if (rnode)
+			OPEN2_RNODE(rnode, rnode2, pnode2, pathname2);
+	}
+	// rnode == NULL && rnode2 == NULL
+
+	if (pnode && pnode2) {
+		dprintk("pnode %ld and pnode2 %ld\n", pnode->fi_inode->i_ino, pnode2->fi_inode->i_ino);
+      rnfs_timer(pnode);
+      rnfs_timer(pnode2);
+		if (REP_INIT_RENAME) {
+			if (pnode != pnode2) {
+				struct repclnt_request *rqt2 = NULL;
+				rqt2 = repclnt_rq_alloc(pnode2, NULL, REP_OP_NONE, NULL);
+				rep_rename->rqt = rqt2;
+			}
+			rep_rename->tpathname = pathname2;
+			rqt = repclnt_rq_alloc(pnode, pathname, opnum, (void *) rep_rename);
+		}
+		rep_unlock();
+		REP_VFS_OPEN2;
+		if (rqt) {
+			if (!err) {
+				repclnt_update(pnode, rqt);
+				current_fh->fh_dentry->d_inode->i_mtime = rqt->mtime;
+				fhp2->fh_dentry->d_inode->i_mtime = rqt->mtime;
+			}
+			repclnt_rq_put(rqt);
+		} else if (!err)
+			err = -ENOMEM;
+		pnode_put(pnode);
+		pnode_put(pnode2);
+		return err;
+	}
+
+		rep_unlock();
+		if (pnode) pnode_put(pnode);
+		if (pnode2) pnode_put(pnode2);
+		if (pathname) kfree(pathname);
+		if(pathname2) kfree(pathname2);
+		rep_lock();
+		if (!(err = rep_path_close(current_fh)))
+			 err = rep_path_close(fhp2);
+		if (err)	return err;
+
+		com_parent = get_com_parent(current_fh->fh_dentry, fhp2->fh_dentry, exp);
+		if (com_parent) {
+         struct repclnt_request *rqt2 = NULL;
+			dprintk("try to lock com_parent of two objs\n");
+			pnode = alloc_pnode(com_parent, exp);
+			pnode->pflags |= PNODE_WAIT_LOCK;
+      	if (err2 || conflict_parent(com_parent->d_inode, exp->ex_fslocs->migrated))
+         	pnode->pflags |= PNODE_SINGLE;
+      	if ((pathname = rep_get_path(com_parent, exp->ex_dentry)))
+         	rqt = repclnt_rq_alloc(pnode, pathname, REP_OP_NONE, NULL);
+			rep_unlock();
+			if (rqt) {
+				err = err2 = repclnt_lock(pnode, rqt);
+				if (!err) {
+					REP_VFS_OPEN2;
+				}
+				if (!err && REP_INIT_RENAME && (pathname = rep_get_path(current_fh->fh_dentry, com_parent)) && (rep_rename->tpathname = rep_get_path(fhp2->fh_dentry, com_parent))) {
+					dprintk("tpathname %s\n", rep_rename->tpathname);
+					rqt2 = repclnt_rq_alloc(pnode, pathname, opnum, (void *) rep_rename);
+				}
+				if (rqt2) {
+					repclnt_update(pnode, rqt2);
+					current_fh->fh_dentry->d_inode->i_mtime = rqt->mtime;
+					fhp2->fh_dentry->d_inode->i_mtime = rqt->mtime;
+					repclnt_rq_put(rqt2);
+				}
+				repclnt_rq_lockput(rqt);
+			} else
+				err = -ENOMEM;
+			pnode->pflags &= ~PNODE_WAIT_LOCK;
+			if ((pnode->pflags & PNODE_WAIT_WAITING))
+         	wake_up_interruptible(&pnode->pwait);
+			pnode_put(pnode);
+			if (err == -EAGAIN)
+				goto again;
+      	return err;
+		}
+
+		dprintk("lock two objs\n");
+		pnode = alloc_pnode(current_fh->fh_dentry, exp);
+      pnode->pflags |= PNODE_WAIT_LOCK;
+      if (err2 || conflict_parent(current_fh->fh_dentry->d_inode, exp->ex_fslocs->migrated))
+         pnode->pflags |= PNODE_SINGLE;
+
+		pnode2 = alloc_pnode(fhp2->fh_dentry, exp);
+		pnode2->seq = pnode->seq;
+		//replist->locations[replist->migrated].openseq --;
+      pnode2->pflags |= PNODE_WAIT_LOCK;
+      if (err2 || conflict_parent(fhp2->fh_dentry->d_inode, exp->ex_fslocs->migrated))
+         pnode2->pflags |= PNODE_SINGLE;
+		if (REP_INIT_RENAME && (pathname = rep_get_path(current_fh->fh_dentry, exp->ex_dentry)) && (rep_rename->tpathname = rep_get_path(fhp2->fh_dentry, exp->ex_dentry))) {
+			struct repclnt_request *rqt2 = NULL;
+			rqt2 = repclnt_rq_alloc(pnode2, NULL, REP_OP_NONE, NULL);
+			rep_rename->rqt = rqt2;
+			rqt = repclnt_rq_alloc(pnode, pathname, opnum, (void *) rep_rename);
+		}
+		rep_unlock();
+
+		if (rqt) {
+			err = err2 = repclnt_lock(pnode, rqt);
+			if (!err) {
+				REP_VFS_OPEN2;
+				current_fh->fh_dentry->d_inode->i_mtime = rqt->mtime;
+				fhp2->fh_dentry->d_inode->i_mtime = rqt->mtime;
+			}
+         repclnt_rq_lockput(rqt);
+      }
+
+		pnode->pflags &= ~PNODE_WAIT_LOCK;
+		if ((pnode->pflags & PNODE_WAIT_WAITING))
+			wake_up_interruptible(&pnode->pwait);
+		pnode_put(pnode);
+		if (pnode2) {
+		pnode2->pflags &= ~PNODE_WAIT_LOCK;
+		if ((pnode2->pflags & PNODE_WAIT_WAITING))
+			wake_up_interruptible(&pnode2->pwait);
+		pnode_put(pnode2);
+		}
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+}
+
+int rep_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh, char *fname, int flen, struct svc_fh *fhp2, char *tname, int tlen)
+{
+   dprintk("rep_rename: fname %s, tname %s, fdir %ld, tdir %ld\n", fname, tname, current_fh->fh_dentry->d_inode->i_ino, fhp2->fh_dentry->d_inode->i_ino);
+	if (current_fh->fh_export != fhp2->fh_export) {
+		printk("this is not the same partition?\n");
+		return -EPERM;
+	}
+   if (current_fh->fh_dentry == fhp2->fh_dentry) {
+      REP_HEAD(RENAME, rename);
+		if ((err = rnfs_path_scan(current_fh->fh_dentry, &pnode, &pathname, &rnode, current_fh->fh_export))) {
+			rep_unlock();
+			if (err == -EAGAIN)
+				goto again;
+			return err;
+		}
+      RNODE_CHECK(RENAME, rename);
+      PNODE_CHECK(RENAME, rename);
+      PNODE_NEW(RENAME, rename);
+	} else
+		return rep_open2(rqstp, current_fh, fname, flen, fhp2, tname, tlen, REP_OP_RENAME, NULL);
+}
+
+/* REP_LNK */
+/* current_fh is the fh of linked file, fhp2 is the fh of dest dir */
+int rep_link(struct svc_rqst *rqstp, struct svc_fh *fhp2, char *fname, int flen, struct svc_fh *tfhp)
+{
+	struct svc_fh fh;
+	struct svc_fh *current_fh = &fh;
+	char *tname = fname;
+	int tlen = flen;
+
+	fh.fh_dentry = tfhp->fh_dentry->d_parent;
+	fh.fh_export = tfhp->fh_export;
+   dprintk("rep_lnk: dold %ld, fname %s, dirp %ld\n", tfhp->fh_dentry->d_inode->i_ino, fname, fhp2->fh_dentry->d_inode->i_ino);
+   if (current_fh->fh_dentry == fhp2->fh_dentry) {
+      REP_HEAD(LNK, rename);
+		if ((err = rnfs_path_scan(current_fh->fh_dentry, &pnode, &pathname, &rnode, current_fh->fh_export))) {
+			rep_unlock();
+			if (err == -EAGAIN)
+				goto again;
+			return err;
+		}
+      RNODE_CHECK(LNK, rename);
+      PNODE_CHECK(LNK, rename);
+      PNODE_NEW(LNK, rename);
+	} else
+		return rep_open2(rqstp, current_fh, tname, tlen, fhp2, fname, flen, REP_OP_LNK, tfhp);
+}
+
+/* REP_WRITE */
+static struct rep_open *
+rep_open_init(struct svc_fh *current_fh)
+{
+	struct rep_open *openp;
+	struct rnfs_pnode *pnode;
+	if (!(openp = kmalloc(sizeof(struct rep_open), GFP_KERNEL)))
+		return NULL;
+	pnode = alloc_pnode(current_fh->fh_dentry, current_fh->fh_export);
+	pnode->pflags |= PNODE_SINGLE;
+	pnode->seq = 0;
+	//replist->locations[replist->migrated].openseq --;
+	openp->pnode = pnode;
+	//atomic_inc(&pnode->refcnt);
+	openp->seq = pnode->seq++;
+	return openp;
+}
+
+#define REP_VFS_WRITE \
+   oldfs = get_fs(); set_fs(KERNEL_DS); \
+   written = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset_l); \
+   set_fs(oldfs);
+
+static unsigned long rep_used_page=0;
+
+unsigned long rep_get_used_page(void)
+{
+	return rep_used_page;
+}
+
+static void
+rep_get_pages(struct svc_rqst *rqstp, struct kvec *vec, int vlen, struct rep_write *argp)
+{
+   int v;
+	struct page *p;
+   if (vlen <= 0) return;
+   argp->pages = kmalloc((vlen * sizeof(char *)), GFP_KERNEL);
+   argp->pgbase = offset_in_page(vec[0].iov_base);
+   for (v=0; v < vlen; v++) {
+      argp->pages[v] = virt_to_page(vec[v].iov_base);
+		rep_used_page++;
+		if (!(rep_used_page % 100))
+			dprintk("alloc %lu pages\n", rep_used_page);
+		//get_page(argp->pages[v]);
+		p = alloc_page(GFP_KERNEL);
+		rqstp->rq_argpages[v] = p;
+   }
+}
+
+static struct rep_write *
+rep_write_init(struct svc_rqst *rqstp, loff_t offset, struct kvec *vec, int vlen, unsigned long cnt)
+{
+   struct rep_write *writep;
+   if (!(writep = kmalloc(sizeof(struct rep_write), GFP_KERNEL)))
+      return NULL;
+   writep->offset = offset;
+   writep->count = cnt;
+   writep->vlen = vlen;
+	rep_get_pages(rqstp, vec, vlen, writep);
+   return writep;
+}
+
+#define REP_INIT_WRITE \
+	(rep_write = rep_write_init(rqstp, offset, vec, vlen, cnt))
+
+#define REP_INIT_OPEN \
+	(rep_open = rep_open_init(current_fh))
+
+#define PNODE_WRITE_CHECK \
+   if (pnode) { \
+		struct repclnt_request *rqt2 = NULL; \
+		if ((pnode->pflags & PNODE_SINGLE) && !S_ISREG(pnode->fi_inode->i_mode)) { \
+			rep_unlock(); \
+			rep_wait_close(pnode); \
+			pnode_put(pnode); \
+			goto again; \
+		} \
+		if (pnode->fi_inode != current_fh->fh_dentry->d_inode) { \
+			struct rep_open *rep_open; \
+			if (REP_INIT_OPEN) \
+				rqt = repclnt_rq_alloc(pnode, pathname, REP_OP_OPEN, rep_open); \
+			if (rqt && REP_INIT_WRITE) \
+				rqt2 = repclnt_rq_alloc(rep_open->pnode, NULL, REP_OP_WRITE, (void *) rep_write); \
+			rep_unlock(); \
+			if (rqt) \
+				err = repclnt_update(pnode, rqt); \
+			else { \
+				pnode_put(pnode); \
+				return -ENOMEM; \
+			} \
+			pnode_put(pnode); \
+			if (err) { \
+				if (rqt2) \
+					repclnt_rq_put(rqt2); \
+				repclnt_rq_put(rqt); \
+				return err; \
+			} \
+			pnode = rep_open->pnode; \
+			atomic_inc(&pnode->refcnt); \
+		} else { \
+			if (REP_INIT_WRITE) \
+				rqt2 = repclnt_rq_alloc(pnode, NULL, REP_OP_WRITE, (void *) rep_write); \
+			rep_unlock(); \
+		} \
+      REP_VFS_WRITE; \
+		if (rqt2) { \
+			if (written > 0) \
+      		err = repclnt_update(pnode, rqt2); \
+			repclnt_rq_put(rqt2); \
+		} \
+		if (rqt) \
+			repclnt_rq_put(rqt); \
+		pnode_put(pnode); \
+      return err; \
+   }
+
+/* similar to PNODE_NEW, but we first lock the file obj before sending
+the written data, since distributing a write request (may be as larger
+as 32k is time consuming */
+#define PNODE_WRITE_NEW \
+      pnode = alloc_pnode(current_fh->fh_dentry, exp); \
+      pnode->pflags |= (PNODE_WAIT_LOCK | PNODE_SINGLE); \
+      if ((pathname = rep_get_path(current_fh->fh_dentry, exp->ex_dentry))) \
+         rqt = repclnt_rq_alloc(pnode, pathname, REP_OP_NONE, NULL); \
+      rep_unlock(); \
+      if (rqt) { \
+			struct repclnt_request *rqt2 = NULL; \
+         err = repclnt_lock(pnode, rqt); \
+         if (err) { \
+         	repclnt_rq_put(rqt); \
+				goto again; \
+         } \
+			REP_VFS_WRITE; \
+			if ((written>0) && REP_INIT_WRITE && (rqt2 = repclnt_rq_alloc(pnode, NULL, REP_OP_WRITE, (void *) rep_write))) { \
+				err = repclnt_update(pnode, rqt2); \
+				repclnt_rq_put(rqt2); \
+			} \
+			repclnt_rq_lockput(rqt); \
+		} \
+      pnode->pflags &= ~PNODE_WAIT_LOCK; \
+      if ((pnode->pflags & PNODE_WAIT_WAITING)) \
+         wake_up_interruptible(&pnode->pwait); \
+      pnode_put(pnode); \
+		if (err) \
+      	return err; \
+		return written;
+
+int rep_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, loff_t offset, struct kvec *vec, int vlen, unsigned long cnt, int stable, struct file *file)
+{
+   mm_segment_t      oldfs;
+	int written = 0;
+	loff_t offset_l = offset;
+   REP_HEAD(WRITE, write);
+   //dprintk("rep_write\n");
+	if ((err = rnfs_path_scan(current_fh->fh_dentry, &pnode, &pathname, &rnode, current_fh->fh_export))) {
+		rep_unlock();
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+	}
+   RNODE_CHECK(WRITE, write);
+	PNODE_WRITE_CHECK;
+   PNODE_WRITE_NEW;
+}
+
+struct page **
+rep_find_pages(struct page **rqpages, short max, void *p, unsigned int *pagebase)
+{
+   int v = 0;
+   struct page **pages;
+
+   *pagebase = offset_in_page(p);
+   p -= *pagebase;
+   pages = rqpages;
+   while (v < max) {
+      if (p == page_address(pages[0]))
+         return pages;
+      pages++;
+      v++;
+   }
+   printk("can not find pages, p is %lu, page0 is %lu\n", (unsigned long) (p), (unsigned long) (page_address(rqpages[0])));
+   return NULL;
+}
+
+int rep_preread(struct svc_rqst *rqstp, struct inode *inode, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt)
+{
+	struct rnfs_rnode *rnode;
+	int err;
+	int forward_count=0;
+	struct rep_fread argp;
+ again:
+	rep_lock();
+	rnode = find_rnode(inode);
+	rep_unlock();
+	if (rnode) {
+		dprintk("rep_preread: %ld, primary %d, count %lu\n", rnode->fi_inode->i_ino, rnode->primary, *cnt);
+		forward_count++;
+   	argp.offset = offset;
+   	argp.count = *cnt;
+		argp.pages = rep_find_pages(rqstp->rq_respages, rqstp->rq_resused, vec[0].iov_base, &argp.pgbase);
+		if (!argp.pages) {
+      	struct page *page;
+      	page = virt_to_page(vec[0].iov_base);
+      	argp.pages = &page;
+      	if (*cnt > vec[0].iov_len)
+         	argp.count = vec[0].iov_len;
+   	}
+		err = repclnt_send_one(REPPROC_FORWARD, rnode, REP_OP_FREAD, (void *) &argp, cnt);
+		rnode_put(rnode);
+		if (!err) {
+			dprintk("rep_fread %lu\n", *cnt);
+			return *cnt;
+		}
+		if (forward_count > 2) {
+			printk("rep_preread err\n");
+			return -EACCES;
+		}
+		goto again;
+	}
+	return 0;
+}
+
+int rep_pregetattr(struct inode *inode, struct kstat *statp)
+{
+	struct rnfs_rnode *rnode;
+	int err;
+	int forward_count=0;
+	if (!S_ISREG(inode->i_mode))
+		return 0;
+ again:
+	rep_lock();
+	rnode = find_rnode(inode);
+	rep_unlock();
+	if (rnode) {
+		dprintk("rep_pregetattr: %ld, primary %d\n", rnode->fi_inode->i_ino, rnode->primary);
+		forward_count++;
+		err = repclnt_send_one(REPPROC_FORWARD, rnode, REP_OP_FGETATTR, NULL, statp);
+		rnode_put(rnode);
+		if (err) {
+			if (forward_count > 2) {
+				printk("rep_pregetattr err\n");
+				return -EACCES;
+			} else {
+				dprintk("rep_pregetattr: err %d, try again\n", err);
+				goto again;
+			}
+		} else {
+			statp->dev = inode->i_sb->s_dev;
+			statp->ino = inode->i_ino;
+			statp->rdev = inode->i_rdev;
+			statp->atime = inode->i_atime;
+			statp->blocks = inode->i_blocks;
+			statp->blksize = inode->i_blksize;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+rep_dview(struct nfsd4_fs_locations *replist, struct rep_dview *dview)
+{
+   u32   deny_num;
+   u8   *deny_list = NULL;
+   int i, count = 0;
+
+   deny_num = replist->locations[replist->migrated].failed;
+   /* no new failed nodes */
+   if (deny_num == 0)
+      goto out;
+   replist->locations[replist->migrated].failed = 0;
+   if (!(deny_list = kmalloc(deny_num, GFP_KERNEL)))
+      return -ENOMEM;
+   for (i=0; i < replist->locations_count; i++) {
+      if (replist->locations[i].failed == 2) {
+         deny_list[count] = replist->locations[i].sin_addr.s_addr;
+         count ++;
+         if (count > deny_num) {
+            kfree(deny_list);
+            return -EIO;
+         }          
+			//mreppipe_call(&replist->locations[i].hosts, NULL, 0, replist->locations[i].path, MREP_PROBE);
+         replist->locations[i].failed = 1;
+      }
+   }
+ out:
+   dview->deny_num = count;
+   dview->deny_list = deny_list;
+   return 0;
+}
+
+struct rep_close *
+rep_close_init(struct rnfs_pnode *pnode)
+{
+   struct inode *inode = pnode->fi_inode;
+   struct rep_close *closep;
+   if (!(closep = kmalloc(sizeof(struct rep_close), GFP_KERNEL)))
+      return NULL;
+   if (rep_dview(pnode->exp->ex_fslocs, &closep->dview)) {
+      kfree(closep);
+      return NULL;
+   }
+   closep->mtime = inode->i_mtime;
+   closep->ctime = inode->i_ctime;
+   return closep;
+}
+
+/* RELEASE FUNCTIONS */
+void rep_release_create(struct rep_create *createp)
+{
+   kfree(createp->cr_name);
+   if (createp->cr_type == S_IFLNK)
+      kfree(createp->u.link.name);
+   kfree(createp);
+}
+
+void rep_release_remove(struct rep_remove *removep)
+{
+   kfree(removep->rm_name);
+   kfree(removep);
+}
+
+void rep_release_rename(struct rep_rename *renamep)
+{
+   kfree(renamep->sname);
+   kfree(renamep->tname);
+	if (renamep->tpathname)
+		kfree(renamep->tpathname);
+	if (renamep->rqt)
+		repclnt_rq_put(renamep->rqt);
+   kfree(renamep);
+}
+
+void repd_release_rename(struct repd_rename *renamep)
+{
+   kfree(renamep->sname);
+   kfree(renamep->tname);
+   if (renamep->tpath)
+      kfree(renamep->tpath);
+	if (renamep->tprimary_fh.data)
+		kfree(renamep->tprimary_fh.data);
+   if (renamep->tdentry)
+		dput(renamep->tdentry);
+	if (renamep->rnode)
+		rnode_put(renamep->rnode);
+   kfree(renamep);
+}
+
+void rep_release_setattr(struct iattr *rep_setattr)
+{
+   kfree(rep_setattr);
+}
+
+static void
+rep_put_pages(struct page **pages, int vlen)
+{
+   int v;
+   if (pages) {
+      for (v=0; v < vlen; v++) {
+         put_page(pages[v]);
+			rep_used_page--;
+		}
+      kfree(pages);
+   }
+}
+
+void rep_release_open(struct rep_open *rep_open)
+{
+	if (rep_open) {
+		pnode_put(rep_open->pnode);
+		kfree(rep_open);
+	}
+}
+
+void repd_release_open(struct repd_open *openp)
+{
+	if (openp) {
+		if (openp->primary_fh.data)
+			kfree(openp->primary_fh.data);
+		kfree(openp);
+	}
+}
+
+void rep_release_wait(struct rep_wait *wait)
+{
+	if (wait) {
+		pnode_put(wait->pnode);
+		repclnt_rq_put(wait->rqt);
+		kfree(wait);
+	}
+}
+
+void repd_release_wait(struct repd_wait *wait)
+{
+	if (wait) {
+		if (wait->rnode)
+			rnode_put(wait->rnode);
+		kfree(wait);
+	}
+}
+
+void rep_release_write(struct rep_write *rep_write)
+{
+   rep_put_pages(rep_write->pages, rep_write->vlen);
+   kfree(rep_write);
+}
+
+void repd_release_write(struct repd_write *repd_write)
+{
+   kfree(repd_write);
+}
+
+void rep_release_close(struct rep_close *closep)
+{
+	if (closep->dview.deny_list)
+		kfree(closep->dview.deny_list);
+	kfree(closep);
+}
+
+/* checking upon lock requests from a peer server */
+struct rnfs_rnode *
+rnfs_open_rnode(struct svc_rqst *rqstp, struct dentry *dentry_in, struct svc_export *exp, struct xdr_netobj *primary_fh, unsigned int single, unsigned int seq, char *path, unsigned int pathlen)
+{
+   struct rnfs_pnode *pnode = NULL;
+   struct rnfs_rnode *rnode = NULL, *rparent = NULL;
+   struct rnfs_parent *pparent = NULL;
+   struct dentry *dentry = dentry_in, *parent;
+	int primary;
+
+	primary = find_primary(rqstp, exp->ex_fslocs, seq);
+	if (primary < 0)
+		return NULL;
+	//dprintk("rnfs_open_rnode: primary %d, addr %u\n", primary, exp->ex_fslocs->locations[primary].sin_addr.s_addr);
+ //again:
+   rep_lock();
+	pparent = conflict_parent(dentry->d_inode, primary);
+	if (pparent && (single & REP_LOCK_SINGLE))
+		pparent = NULL;
+   if (!pparent && !(rnode = find_rnode(dentry->d_inode)) && !(pnode = find_pnode(dentry->d_inode))) {
+      dget(dentry);
+      while ((dentry != exp->ex_dentry) && !IS_ROOT(dentry)) {
+         parent = dget_parent(dentry);
+         dput(dentry);
+         dentry = parent;
+         if ((pnode = find_pnode(parent->d_inode))) {
+            if (!(pnode->pflags & PNODE_SINGLE))
+               break;
+				atomic_dec_and_test(&pnode->refcnt);
+            pnode = NULL;
+         }
+         if ((rparent = find_rnode(parent->d_inode))) {
+				if (valid_primary(rqstp, rparent)) {
+					/* avoid rename case, such as mv /a/b/c /a/c */
+					if (rparent->update->opnum != REP_OP_NONE) {
+						printk("enter_pending: existing parent %ld, dentry %ld\n", rparent->fi_inode->i_ino, dentry_in->d_inode->i_ino);
+						rparent->rflags |= RNODE_WAIT_CLOSE;
+						repd_enter_open(&rqstp->rq_chandle);
+						rep_unlock();
+						rnode_put(rparent);
+						dput(dentry);
+						return NULL;
+					} else {
+						atomic_dec_and_test(&rparent->refcnt);
+            		rparent = NULL;
+					}
+				} else if (!(rparent->rflags & RNODE_SINGLE)) {
+               break;
+				} else {
+					atomic_dec_and_test(&rparent->refcnt);
+            	rparent = NULL;
+				}
+         }
+      }
+      dput(dentry);
+   }
+
+	if (pnode && ((pnode->ack_num*2) <= pnode->exp->ex_fslocs->locations_count) && ((pnode->pflags & PNODE_CLOSED) || (rqstp->rq_addr.sin_addr.s_addr < pnode->competitor))) {
+		dprintk("competitor: pnode %ld, pnode->com %u, com %u, closed? %d\n", pnode->fi_inode->i_ino, pnode->competitor, rqstp->rq_addr.sin_addr.s_addr, (pnode->pflags & PNODE_CLOSED));
+		if (!(pnode->pflags & PNODE_CLOSED)) {
+			if (pnode->competitor != exp->ex_fslocs->locations[exp->ex_fslocs->migrated].sin_addr.s_addr)
+					repd_scan_openlist();
+			pnode->competitor = rqstp->rq_addr.sin_addr.s_addr;
+			repd_enter_open(&rqstp->rq_chandle);
+			goto out;
+		} else if ((pnode->ack_num*2) <= pnode->exp->ex_fslocs->locations_count) {
+			if ((pnode->competitor == rqstp->rq_addr.sin_addr.s_addr) || (pnode->competitor == exp->ex_fslocs->locations[exp->ex_fslocs->migrated].sin_addr.s_addr)) {
+				pnode->competitor = 0;
+				rnode = alloc_rnode(dentry_in, primary, exp, primary_fh);
+				if ((single & REP_LOCK_SINGLE))
+					rnode->rflags |= RNODE_SINGLE;
+				wake_up_interruptible(&pnode->pwait);
+				goto out;
+			}
+		}
+	}
+
+   if (pnode || pparent || rparent) {
+		if (pnode)
+			dprintk("pnode confliction: %ld, rqt %u\n", pnode->fi_inode->i_ino, rqstp->rq_addr.sin_addr.s_addr);
+		if (pparent) {
+			dprintk("pparent confliction: %ld, rqt %u\n", pparent->fi_inode->i_ino, rqstp->rq_addr.sin_addr.s_addr);
+			if (!(pparent->bitmap & ~(1 << (exp->ex_fslocs->migrated+1))))
+				rnfs_rnode_print();
+		}
+		if (rparent)
+			dprintk("rparent confliction: %ld, rqt %u\n", rparent->fi_inode->i_ino, rqstp->rq_addr.sin_addr.s_addr);
+		if ((single & REP_LOCK_FORCE)) {
+			rnode = alloc_rnode(dentry_in, primary, exp, primary_fh);
+			if ((single & REP_LOCK_SINGLE))
+				rnode->rflags |= RNODE_SINGLE;
+			goto out;
+		}
+		rnode = ERR_PTR(-EACCES);
+      goto out;
+	}
+   if (rnode) {
+      if (!valid_primary(rqstp, rnode)) {
+			dprintk("existing rnode %ld locked by another server %u, rqt %u\n", rnode->fi_inode->i_ino, exp->ex_fslocs->locations[rnode->primary].sin_addr.s_addr,  rqstp->rq_addr.sin_addr.s_addr);
+         rparent = rnode;
+         rnode = ERR_PTR(-EACCES);
+      } else {
+			dprintk("enter_pending: existing rnode %ld, inode %ld\n", rnode->fi_inode->i_ino, dentry_in->d_inode->i_ino);
+			rnode->rflags |= RNODE_WAIT_CLOSE;
+			repd_enter_open(&rqstp->rq_chandle);
+			rparent = rnode;
+			rnode = NULL;
+      }
+   } else {
+		rnode = alloc_rnode(dentry_in, primary, exp, primary_fh);
+		if ((single & REP_LOCK_SINGLE))
+			rnode->rflags |= RNODE_SINGLE;
+   }
+ out:
+   rep_unlock();
+   if (pnode)  pnode_put(pnode);
+   if (rparent)   rnode_put(rparent);
+   return rnode;
+}
+struct rnfs_rnode *
+rnfs_get_rnode(struct svc_rqst *rqstp, struct inode *inode)
+{
+   struct rnfs_rnode *rnode;
+   rep_lock();
+   rnode = find_rnode(inode);
+   rep_unlock();
+	if (rnode && !valid_primary(rqstp, rnode)) {
+		rnode_put(rnode);
+		return NULL;
+	}
+   return rnode;
+}
+
+struct rnfs_rnode *
+rnfs_openfile_rnode(struct dentry *dentry, int primary, struct svc_export *exp, struct xdr_netobj *primary_fh, unsigned int seq)
+{
+   struct rnfs_rnode *rnode;
+   rep_lock();
+   if ((rnode = find_rnode(dentry->d_inode))) {
+		if (!(rnode->rflags & RNODE_CREATED)) {
+      	printk("rnfs_open_rnode: existing rnode, caused by delayed close?\n");
+      	rep_unlock();
+      	rnode_put(rnode);
+      	return NULL;
+		}
+      dprintk("rnfs_open_rnode: for newly created rnode %ld\n", rnode->fi_inode->i_ino);
+		memcpy(&rnode->primary_fh, primary_fh, sizeof(struct xdr_netobj));
+		primary_fh->data = NULL;
+		primary_fh->len = 0;
+		rnode->rflags &= ~RNODE_CREATED;
+   } else
+   	rnode = alloc_rnode(dentry, primary, exp, primary_fh);
+   if (!(rnode->update = kmalloc(sizeof(struct repd_update), GFP_KERNEL))) {
+		rnode_put(rnode);
+		return NULL;
+   }
+   rnode->update->opnum = REP_OP_NONE;
+   rnode->update->data = NULL;
+   rnode->update->path = NULL;
+   rnode->update->seq = seq;
+   rep_unlock();
+   return rnode;
+}
+ 
+void rep_start()
+{
+   rep_state_start();
+}
+                                                                                   
+void rep_shutdown()
+{
+   rep_state_shutdown();
+}
diff -puN /dev/null fs/nfsd/nfs4replication.h
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4replication.h	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,190 @@
+
+#define REPPROC_NULL       0
+#define REPPROC_OPEN     	1
+#define REPPROC_UPDATE     2
+#define REPPROC_FORWARD    3
+
+#define REP_OP_NONE			0
+#define REP_OP_CLOSE			1
+#define REP_OP_CANCEL      2
+#define REP_OP_WRITE       3
+#define REP_OP_CREATE      4
+#define REP_OP_CREATE_V3   4
+#define REP_OP_SYMLINK     4
+#define REP_OP_REMOVE      5
+#define REP_OP_SETATTR     6
+#define REP_OP_LNK         7
+#define REP_OP_RENAME      8
+#define REP_OP_OPEN			9
+#define REP_OP_FREAD 		10
+#define REP_OP_FGETATTR    11
+#define REP_OP_WAIT			12
+#define REP_OP_CALLBACK		13
+#define REP_OP_OPENFAIL 	14
+#define REP_OP_RECOVERDONE 15
+
+#define REP_LOCK_SINGLE		0x0001
+#define REP_LOCK_FORCE		0x0002
+
+struct rep_dview {
+  unsigned int   deny_num;
+  u8            *deny_list;
+};
+
+struct rep_close {
+   struct timespec      mtime;
+   struct timespec      ctime;
+   struct rep_dview     dview;
+};
+
+struct rep_create {
+   u32               cr_namelen;
+   char              *cr_name;
+   u32               cr_type;
+   union {
+      struct {
+         u32 namelen;
+         char *name;
+      } link; /* NF4LNK */
+      struct {
+         u32 specdata1;
+         u32 specdata2;
+      } dev;  /* NF4BLK, NF4CHR */
+      u32   verf;
+   } u;
+   struct iattr      cr_attrs;
+};
+
+struct rep_remove {
+   u32               rm_type;
+   u32               rm_namelen;
+   char *            rm_name;
+};
+
+struct rep_rename {
+   u32               snamelen;
+   char *            sname;
+   u32               tnamelen;
+   char *            tname;
+	char *				tpathname;
+	struct repclnt_request *rqt;
+};
+
+struct repd_rename {
+   u32                  snamelen;
+   char *               sname;
+   u32                  tnamelen;
+   char *               tname;
+	char *					tpath;
+	struct xdr_netobj		tprimary_fh;
+	struct dentry			*tdentry;
+	struct rnfs_rnode		*rnode;
+	u32						seq;
+};
+
+struct rep_open {
+	struct rnfs_pnode		*pnode;
+	u32						seq;
+};
+
+struct rep_wait {
+	struct rnfs_pnode		*pnode;
+	u32						seq;
+	struct repclnt_request *rqt;
+};
+
+struct repd_open {
+	struct xdr_netobj		primary_fh;
+	u32						seq;
+};
+
+struct repd_wait {
+   u32                  fh_size;
+   char                 *fh_base;
+	u32						seq;
+	struct rnfs_rnode		*rnode;
+};
+
+struct rep_write {
+   u64                 offset;
+   unsigned long       count;
+   int                  vlen;
+   unsigned int        pgbase;
+   struct page **         pages;
+};
+
+struct rep_fread {
+   u64                 offset;
+   unsigned long       count;
+   unsigned int        pgbase;
+   struct page **         pages;
+};
+
+struct repd_write {
+   u64                  offset;
+   unsigned long        count;
+   struct kvec          vec[RPCSVC_MAXPAGES];
+   int                  vlen;
+};
+
+void rep_release_create(struct rep_create *createp);
+void rep_release_remove(struct rep_remove *removep);
+void rep_release_rename(struct rep_rename *renamep);
+void repd_release_rename(struct repd_rename *renamep);
+void rep_release_setattr(struct iattr *attrp);
+void rep_release_open(struct rep_open *openp);
+void repd_release_open(struct repd_open *openp);
+void rep_release_wait(struct rep_wait *wait);
+void repd_release_wait(struct repd_wait *wait);
+void rep_release_write(struct rep_write *writep);
+void repd_release_write(struct repd_write *repd_write);
+void rep_release_close(struct rep_close *closep);
+struct rep_close *rep_close_init(struct rnfs_pnode *pnode);
+void repd_scan_openlist(void);
+
+int rep_vfs_setattr(struct svc_fh *fhp, struct iattr *iap);
+int rep_vfs_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+      char *fname, int flen, struct iattr *iap,
+      int type, dev_t rdev, struct inode *dirp, struct svc_fh *resfhp);
+int rep_vfs_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+      char *fname, int flen, struct iattr *iap,
+      struct svc_fh *resfhp, int createmode, u32 *verifier,
+           int *truncp, struct inode *dirp);
+int rep_vfs_symlink(struct svc_fh *fhp, char *fname, int flen,
+            char *path,  int plen, struct svc_fh *resfhp, struct iattr *iap);
+int rep_vfs_link(struct svc_rqst *rqstp,
+            struct svc_fh *ffhp, char *name, int len, struct svc_fh *tfhp);
+int rep_vfs_rename(struct svc_fh *ffhp, char *fname, int flen,
+             struct svc_fh *tfhp, char *tname, int tlen);
+int rep_vfs_remove(struct rnfs_pnode *pnode, struct svc_fh *fhp, int type, char *fname, int flen);
+
+int rep_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, char *fname, int flen, struct iattr *iap, int type, dev_t rdev, struct inode *dirp, struct svc_fh *resfhp);
+int rep_create_v3(struct svc_rqst *rqstp, struct svc_fh *current_fh, char *fname, int flen, struct iattr *iap, int type, u32 *verifier, int *truncp, struct inode *dirp, struct svc_fh *resfhp);
+int rep_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, int type, char *fname, int flen);
+int rep_symlink(struct svc_rqst *rqstp, struct svc_fh *current_fh, char *fname, int flen, char *path, int plen, struct svc_fh *resfhp, struct iattr *iap);
+int rep_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct iattr *iap);
+int rep_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh, char *fname, int flen, struct svc_fh *fhp2, char *tname, int tlen);
+int rep_link(struct svc_rqst *rqstp, struct svc_fh *fhp2, char *fname, int flen, struct svc_fh *tfhp);
+int rep_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, loff_t offset, struct kvec *vec, int vlen, unsigned long cnt, int stable, struct file *file);
+int rep_preread(struct svc_rqst *rqstp, struct inode *inode, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt);
+int rep_pregetattr(struct inode *inode, struct kstat *statp);
+
+struct rnfs_rnode *rnfs_open_rnode(struct svc_rqst *rqstp, struct dentry *dentry_in, struct svc_export *exp, struct xdr_netobj *primary_fh, unsigned int single, unsigned int seq, char *path, unsigned int pathlen);
+struct rnfs_rnode *rnfs_get_rnode(struct svc_rqst *rqstp, struct inode *);
+struct rnfs_rnode *rnfs_openfile_rnode(struct dentry *dentry, int primary, struct svc_export *exp, struct xdr_netobj *primary_fh, unsigned int seq);
+unsigned long rep_get_used_page(void);
+
+void rep_start(void);
+void rep_shutdown(void);
+void repd_up(void);
+void repd_down(void);
+
+#define MREP_PROBE      0
+#define MREP_RECOVER    1
+#define MREP_SYNC       2
+#define MREP_FAIL       3
+#define MREP_RECOVERD   4
+
+void mreppipe_new(void);
+void mreppipe_delete(void);
+int mreppipe_call(struct xdr_netobj *hostname, char *path, u32 pathlen, char *lookup_name, unsigned int type);
diff -puN fs/nfsd/nfssvc.c~rnfs-all fs/nfsd/nfssvc.c
--- rnfs-linux-2.6.16-rc3/fs/nfsd/nfssvc.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfssvc.c	2007-03-03 16:21:47.000000000 -0500
@@ -34,6 +34,8 @@
 #include <linux/lockd/bind.h>
 #include <linux/nfsacl.h>
 
+#include "nfs4replication.h"
+
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
 /* these signals will be delivered to an nfsd thread 
@@ -298,7 +300,7 @@ nfsd(struct svc_rqst *rqstp)
 
 	nfsdstats.th_cnt++;
 
-	lockd_up();				/* start lockd */
+	repd_up();				/* start lockd */
 
 	me.task = current;
 	list_add(&me.list, &nfsd_list);
@@ -364,6 +366,9 @@ nfsd(struct svc_rqst *rqstp)
 	/* Release lockd */
 	lockd_down();
 
+	/* Release repd */
+	repd_down();
+
 	/* Check if this is last thread */
 	if (serv->sv_nrthreads==1) {
 		
diff -puN fs/nfsd/vfs.c~rnfs-all fs/nfsd/vfs.c
--- rnfs-linux-2.6.16-rc3/fs/nfsd/vfs.c~rnfs-all	2007-03-03 16:21:47.000000000 -0500
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/vfs.c	2007-03-03 16:21:47.000000000 -0500
@@ -54,6 +54,8 @@
 #include <linux/nfs4_acl.h>
 #include <linux/nfsd_idmap.h>
 #include <linux/security.h>
+#include "nfs4repstate.h"
+#include "nfs4replication.h"
 #endif /* CONFIG_NFSD_V4 */
 
 #include <asm/uaccess.h>
@@ -235,6 +237,18 @@ out_nfserr:
  * Set various file attributes.
  * N.B. After this call fhp needs an fh_put
  */
+int rep_vfs_setattr(struct svc_fh *fhp, struct iattr *iap)
+{
+	int err;
+	fh_lock(fhp);
+	err = notify_change(fhp->fh_dentry, iap);
+	if (err)
+		dprintk("rep_vfs_setattr: err %d\n", err);
+	err = nfserrno(err);
+	fh_unlock(fhp);
+	return err;
+}
+
 int
 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	     int check_guard, time_t guardtime)
@@ -347,10 +361,16 @@ nfsd_setattr(struct svc_rqst *rqstp, str
 
 	err = nfserr_notsync;
 	if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
-		fh_lock(fhp);
-		err = notify_change(dentry, iap);
-		err = nfserrno(err);
-		fh_unlock(fhp);
+		if (guardtime == -1) {
+      	fh_lock(fhp);
+      	err = notify_change(dentry, iap);
+      	err = nfserrno(err);
+      	fh_unlock(fhp);
+		} else {
+			err = rep_setattr(rqstp, fhp, iap);
+			if (err)
+				printk("rep_setattr ERR %d\n", err);
+		}
 	}
 	if (size_change)
 		put_write_access(inode);
@@ -832,6 +852,14 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st
 		goto out;
 #endif
 
+	if ((err = rep_preread(rqstp, inode, offset, vec, vlen, count))) {
+		if (err < 0) {
+			printk("rep_preread ERR\n");
+			err = nfserrno(err);
+			goto out;
+		} else
+			goto out_read;
+	}
 	/* Get readahead parameters */
 	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
 
@@ -858,6 +886,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st
 		spin_unlock(&ra_lock);
 	}
 
+out_read:
 	if (err >= 0) {
 		nfsdstats.io_read += err;
 		*count = err;
@@ -887,7 +916,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s
 	struct svc_export	*exp;
 	struct dentry		*dentry;
 	struct inode		*inode;
-	mm_segment_t		oldfs;
 	int			err = 0;
 	int			stable = *stablep;
 
@@ -923,9 +951,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s
 		file->f_flags |= O_SYNC;
 
 	/* Write the data. */
-	oldfs = get_fs(); set_fs(KERNEL_DS);
-	err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
-	set_fs(oldfs);
+	if (cnt)
+		err = rep_write(rqstp, fhp, offset, vec, vlen, cnt, stable, file);
+	if (err < 0)
+		printk("rep_write ERR\n");
 	if (err >= 0) {
 		nfsdstats.io_write += cnt;
 		fsnotify_modify(file->f_dentry);
@@ -972,6 +1001,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s
 	}
 
 	dprintk("nfsd: write complete err=%d\n", err);
+	/* write majority of replicas */
 	if (err >= 0)
 		err = 0;
 	else 
@@ -1084,32 +1114,14 @@ nfsd_commit(struct svc_rqst *rqstp, stru
  *
  * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
  */
-int
-nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		char *fname, int flen, struct iattr *iap,
-		int type, dev_t rdev, struct svc_fh *resfhp)
+int rep_vfs_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+      char *fname, int flen, struct iattr *iap,
+      int type, dev_t rdev, struct inode *dirp, struct svc_fh *resfhp)
 {
-	struct dentry	*dentry, *dchild = NULL;
-	struct inode	*dirp;
-	int		err;
-
-	err = nfserr_perm;
-	if (!flen)
-		goto out;
-	err = nfserr_exist;
-	if (isdotent(fname, flen))
-		goto out;
-
-	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
-	if (err)
-		goto out;
+   struct dentry  *dentry, *dchild = NULL;
+   int      err;
 
 	dentry = fhp->fh_dentry;
-	dirp = dentry->d_inode;
-
-	err = nfserr_notdir;
-	if(!dirp->i_op || !dirp->i_op->lookup)
-		goto out;
 	/*
 	 * Check whether the response file handle has been verified yet.
 	 * If it has, the parent directory should already be locked.
@@ -1154,24 +1166,24 @@ nfsd_create(struct svc_rqst *rqstp, stru
 	/*
 	 * Get the dir op function pointer.
 	 */
-	err = nfserr_perm;
-	switch (type) {
-	case S_IFREG:
-		err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
-		break;
-	case S_IFDIR:
-		err = vfs_mkdir(dirp, dchild, iap->ia_mode);
-		break;
-	case S_IFCHR:
-	case S_IFBLK:
-	case S_IFIFO:
-	case S_IFSOCK:
-		err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
-		break;
-	default:
-	        printk("nfsd: bad file type %o in nfsd_create\n", type);
-		err = -EINVAL;
-	}
+   err = nfserr_perm;
+   switch (type) {
+   case S_IFREG:
+      err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+      break;
+   case S_IFDIR:
+      err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+      break;
+   case S_IFCHR:
+   case S_IFBLK:
+   case S_IFIFO:
+   case S_IFSOCK:
+      err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+      break;
+   default:
+           printk("nfsd: bad file type %o in nfsd_create\n", type);
+      err = -EINVAL;
+   }
 	if (err < 0)
 		goto out_nfserr;
 
@@ -1187,7 +1199,7 @@ nfsd_create(struct svc_rqst *rqstp, stru
 	 * directories via NFS.
 	 */
 	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
-		int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+		int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)(-1));
 		if (err2)
 			err = err2;
 	}
@@ -1206,21 +1218,14 @@ out_nfserr:
 	goto out;
 }
 
-#ifdef CONFIG_NFSD_V3
-/*
- * NFSv3 version of nfsd_create
- */
 int
-nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		char *fname, int flen, struct iattr *iap,
-		struct svc_fh *resfhp, int createmode, u32 *verifier,
-	        int *truncp)
+		int type, dev_t rdev, struct svc_fh *resfhp)
 {
-	struct dentry	*dentry, *dchild = NULL;
+	struct dentry	*dentry;
 	struct inode	*dirp;
 	int		err;
-	__u32		v_mtime=0, v_atime=0;
-	int		v_mode=0;
 
 	err = nfserr_perm;
 	if (!flen)
@@ -1228,8 +1233,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, s
 	err = nfserr_exist;
 	if (isdotent(fname, flen))
 		goto out;
-	if (!(iap->ia_valid & ATTR_MODE))
-		iap->ia_mode = 0;
+
 	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
 	if (err)
 		goto out;
@@ -1237,11 +1241,32 @@ nfsd_create_v3(struct svc_rqst *rqstp, s
 	dentry = fhp->fh_dentry;
 	dirp = dentry->d_inode;
 
-	/* Get all the sanity checks out of the way before
-	 * we lock the parent. */
 	err = nfserr_notdir;
 	if(!dirp->i_op || !dirp->i_op->lookup)
 		goto out;
+
+	err = rep_create(rqstp, fhp, fname, flen, iap, type, rdev, dirp, resfhp);
+	if (err)
+		printk("rep_create ERR\n");
+out:
+	return err;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * NFSv3 version of nfsd_create
+ */
+int rep_vfs_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+      char *fname, int flen, struct iattr *iap,
+      struct svc_fh *resfhp, int createmode, u32 *verifier,
+           int *truncp, struct inode *dirp)
+{
+   struct dentry  *dentry, *dchild = NULL;
+   int      err;
+   __u32    v_mtime=0, v_atime=0;
+   int      v_mode=0;
+
+	dentry = fhp->fh_dentry;
 	fh_lock(fhp);
 
 	/*
@@ -1334,7 +1359,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, s
 	 */
  set_attr:
 	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0) {
- 		int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ 		int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)-1);
 		if (err2)
 			err = err2;
 	}
@@ -1349,12 +1374,51 @@ nfsd_create_v3(struct svc_rqst *rqstp, s
 	fh_unlock(fhp);
 	if (dchild && !IS_ERR(dchild))
 		dput(dchild);
+	if (err)
+		printk("rep_vfs_create_v3: ERR %d\n", err);
  	return err;
  
  out_nfserr:
 	err = nfserrno(err);
 	goto out;
 }
+
+int
+nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		char *fname, int flen, struct iattr *iap,
+		struct svc_fh *resfhp, int createmode, u32 *verifier,
+	        int *truncp)
+{
+	struct dentry	*dentry;
+	struct inode	*dirp;
+	int		err;
+
+	err = nfserr_perm;
+	if (!flen)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
+	if (!(iap->ia_valid & ATTR_MODE))
+		iap->ia_mode = 0;
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	dirp = dentry->d_inode;
+
+	/* Get all the sanity checks out of the way before
+	 * we lock the parent. */
+	err = nfserr_notdir;
+	if(!dirp->i_op || !dirp->i_op->lookup)
+		goto out;
+	err = rep_create_v3(rqstp, fhp, fname, flen, iap, createmode, verifier, truncp, dirp, resfhp);
+	if (err)
+		printk("rep_create_v3 ERR\n");
+ out:
+ 	return err;
+}
 #endif /* CONFIG_NFSD_V3 */
 
 /*
@@ -1406,27 +1470,13 @@ out_nfserr:
  * Create a symlink and look up its inode
  * N.B. After this call _both_ fhp and resfhp need an fh_put
  */
-int
-nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
-				char *fname, int flen,
-				char *path,  int plen,
-				struct svc_fh *resfhp,
-				struct iattr *iap)
+int rep_vfs_symlink(struct svc_fh *fhp, char *fname, int flen,
+				char *path,  int plen, struct svc_fh *resfhp, struct iattr *iap)
 {
 	struct dentry	*dentry, *dnew;
 	int		err, cerr;
 	umode_t		mode;
 
-	err = nfserr_noent;
-	if (!flen || !plen)
-		goto out;
-	err = nfserr_exist;
-	if (isdotent(fname, flen))
-		goto out;
-
-	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
-	if (err)
-		goto out;
 	fh_lock(fhp);
 	dentry = fhp->fh_dentry;
 	dnew = lookup_one_len(fname, dentry, flen);
@@ -1470,31 +1520,41 @@ out_nfserr:
 	goto out;
 }
 
-/*
- * Create a hardlink
- * N.B. After this call _both_ ffhp and tfhp need an fh_put
- */
 int
-nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
-				char *name, int len, struct svc_fh *tfhp)
+nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
+				char *fname, int flen,
+				char *path,  int plen,
+				struct svc_fh *resfhp,
+				struct iattr *iap)
 {
-	struct dentry	*ddir, *dnew, *dold;
-	struct inode	*dirp, *dest;
 	int		err;
 
-	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
-	if (err)
+	err = nfserr_noent;
+	if (!flen || !plen)
 		goto out;
-	err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
 	if (err)
 		goto out;
+	err = rep_symlink(rqstp, fhp, fname, flen, path, plen, resfhp, iap);
+	if (err)
+		printk("rep_symlink ERR\n");
+out:
+	return err;
+}
 
-	err = nfserr_perm;
-	if (!len)
-		goto out;
-	err = nfserr_exist;
-	if (isdotent(name, len))
-		goto out;
+/*
+ * Create a hardlink
+ * N.B. After this call _both_ ffhp and tfhp need an fh_put
+ */
+int rep_vfs_link(struct svc_rqst *rqstp, 
+				struct svc_fh *ffhp, char *name, int len, struct svc_fh *tfhp)
+{
+	struct dentry	*ddir, *dnew, *dold;
+	struct inode	*dirp, *dest;
+	int		err;
 
 	fh_lock(ffhp);
 	ddir = ffhp->fh_dentry;
@@ -1509,6 +1569,8 @@ nfsd_link(struct svc_rqst *rqstp, struct
 	dest = dold->d_inode;
 
 	err = vfs_link(dold, dirp, dnew);
+	if (!err)
+		pnode_before_link(dest, ddir);
 	if (!err) {
 		if (EX_ISSYNC(ffhp->fh_export)) {
 			err = nfserrno(nfsd_sync_dir(ddir));
@@ -1531,39 +1593,51 @@ out_nfserr:
 	goto out;
 }
 
+int
+nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
+				char *name, int len, struct svc_fh *tfhp)
+{
+	int		err;
+
+	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+	err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
+	if (err)
+		goto out;
+
+	err = nfserr_perm;
+	if (!len)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(name, len))
+		goto out;
+
+	err = rep_link(rqstp, ffhp, name, len, tfhp);
+	if (err)
+		printk("rep_link ERR\n");
+out:
+	return err;
+}
+
 /*
  * Rename a file
  * N.B. After this call _both_ ffhp and tfhp need an fh_put
  */
-int
-nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
+int rep_vfs_rename(struct svc_fh *ffhp, char *fname, int flen,
 			    struct svc_fh *tfhp, char *tname, int tlen)
 {
 	struct dentry	*fdentry, *tdentry, *odentry, *ndentry, *trap;
 	struct inode	*fdir, *tdir;
+	struct inode *rinode=NULL;
 	int		err;
 
-	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
-	if (err)
-		goto out;
-	err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE);
-	if (err)
-		goto out;
-
 	fdentry = ffhp->fh_dentry;
 	fdir = fdentry->d_inode;
 
 	tdentry = tfhp->fh_dentry;
 	tdir = tdentry->d_inode;
 
-	err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
-	if (fdir->i_sb != tdir->i_sb)
-		goto out;
-
-	err = nfserr_perm;
-	if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
-		goto out;
-
 	/* cannot use fh_lock as we need deadlock protective ordering
 	 * so do it by hand */
 	trap = lock_rename(tdentry, fdentry);
@@ -1598,6 +1672,8 @@ nfsd_rename(struct svc_rqst *rqstp, stru
 			err = -EPERM;
 	} else
 #endif
+  if (ndentry->d_inode)
+     rinode = igrab(ndentry->d_inode);
 	err = vfs_rename(fdir, odentry, tdir, ndentry);
 	if (!err && EX_ISSYNC(tfhp->fh_export)) {
 		err = nfsd_sync_dir(tdentry);
@@ -1606,6 +1682,9 @@ nfsd_rename(struct svc_rqst *rqstp, stru
 	}
 	if (err == -EWOULDBLOCK)
 		err = -ETIMEDOUT;
+	if (!err)
+		pnode_before_rename(odentry->d_inode, rinode, fdentry, tdentry);
+	iput(rinode);
  out_dput_new:
 	dput(ndentry);
  out_dput_old:
@@ -1623,29 +1702,56 @@ nfsd_rename(struct svc_rqst *rqstp, stru
 	unlock_rename(tdentry, fdentry);
 	ffhp->fh_locked = tfhp->fh_locked = 0;
 
-out:
 	return err;
 }
 
-/*
- * Unlink a file or directory
- * N.B. After this call fhp needs an fh_put
- */
 int
-nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
-				char *fname, int flen)
+nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
+			    struct svc_fh *tfhp, char *tname, int tlen)
 {
-	struct dentry	*dentry, *rdentry;
-	struct inode	*dirp;
+	struct dentry	*fdentry, *tdentry;
+	struct inode	*fdir, *tdir;
 	int		err;
 
-	err = nfserr_acces;
-	if (!flen || isdotent(fname, flen))
+	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
+	if (err)
 		goto out;
-	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
+	err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE);
 	if (err)
 		goto out;
 
+	fdentry = ffhp->fh_dentry;
+	fdir = fdentry->d_inode;
+
+	tdentry = tfhp->fh_dentry;
+	tdir = tdentry->d_inode;
+
+	err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
+	if (fdir->i_sb != tdir->i_sb)
+		goto out;
+
+	err = nfserr_perm;
+	if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
+		goto out;
+
+	err = rep_rename(rqstp, ffhp, fname, flen, tfhp, tname, tlen);
+	if (err)
+		printk("rep_rename ERR\n");
+out:
+	return err;
+}
+
+/*
+ * Unlink a file or directory
+ * N.B. After this call fhp needs an fh_put
+ */
+int rep_vfs_remove(struct rnfs_pnode *pnode, struct svc_fh *fhp, int type, char *fname, int flen)
+{
+   struct dentry  *dentry, *rdentry;
+   struct inode   *dirp;
+	struct inode *rinode;
+   int      err;
+
 	fh_lock(fhp);
 	dentry = fhp->fh_dentry;
 	dirp = dentry->d_inode;
@@ -1664,6 +1770,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
 	if (!type)
 		type = rdentry->d_inode->i_mode & S_IFMT;
 
+	rinode = igrab(rdentry->d_inode);
 	if (type != S_IFDIR) { /* It's UNLINK */
 #ifdef MSNFS
 		if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
@@ -1683,6 +1790,11 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
 	if (err == 0 &&
 	    EX_ISSYNC(fhp->fh_export))
 			err = nfsd_sync_dir(dentry);
+	if (!err && pnode)
+		pnode_before_remove(pnode, rinode, dentry);
+	else
+		dprintk("rep_vfs_remove: err %d, pnode %p\n", err, pnode);
+	iput(rinode);
 
 out_nfserr:
 	err = nfserrno(err);
@@ -1690,6 +1802,26 @@ out:
 	return err;
 }
 
+int
+nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+				char *fname, int flen)
+{
+	int		err;
+
+	err = nfserr_acces;
+	if (!flen || isdotent(fname, flen))
+		goto out;
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
+	if (err)
+		goto out;
+
+	err = rep_remove(rqstp, fhp, type, fname, flen);
+	if (err)
+		printk("rep_remove ERR\n");
+out:
+	return err;
+}
+
 /*
  * Read entries from a directory.
  * The  NFSv3/4 verifier we ignore for now.
diff -puN /dev/null fs/nfsd/nfs4repstate.c
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repstate.c	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,1006 @@
+#ifndef __RNFS_USERMODE__
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <linux/file.h>
+#include <linux/security.h>
+#include <linux/quotaops.h>
+#include <linux/dnotify.h>
+#include <linux/timer.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/export.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#else
+#include <sys/stat.h>
+#include "rnfs.h"
+#endif
+#include "nfs4repstate.h"
+#include "nfs4replication.h"
+#include "nfs4repclnt.h"
+#include "nfs4repd.h"
+
+#define NFSDDBG_FACILITY   NFSDDBG_REP
+
+/* hash table for mutable replication */
+#define MREP_HASH_BITS                   8
+#define MREP_HASH_SIZE                  (1 << MREP_HASH_BITS)
+#define MREP_HASH_MASK                  (MREP_HASH_SIZE - 1)
+#define mrep_hashval(x) \
+        hash_ptr(x, MREP_HASH_BITS)
+
+static struct list_head p_updating_list[MREP_HASH_SIZE];
+static struct list_head r_updating_list[MREP_HASH_SIZE];
+static struct list_head parent_list[MREP_HASH_SIZE];
+DECLARE_MUTEX(rep_sema);
+
+LIST_HEAD(rnfs_dir_lru_list);
+LIST_HEAD(rnfs_file_lru_list);
+static void delayed_release_pnode(void *data);
+DECLARE_WORK(rnfs_delay_dclose_work, delayed_release_pnode, &rnfs_dir_lru_list);
+DECLARE_WORK(rnfs_delay_fclose_work, delayed_release_pnode, &rnfs_file_lru_list);
+static struct workqueue_struct *rnfs_delay_dclose_wq = NULL;
+static struct workqueue_struct *rnfs_delay_fclose_wq = NULL;
+
+static LIST_HEAD(rnfs_replist);
+struct rnfs_replist {
+	struct list_head list;
+	struct nfsd4_fs_locations *replist;
+};
+
+struct nfsd4_fs_locations *rnfs_fslocs_get(struct nfsd4_fs_locations *replist)
+{
+	struct rnfs_replist *entry;
+	int i;
+	if (!replist)
+		return replist;
+	list_for_each_entry(entry, &rnfs_replist, list) {
+		if (!strcmp(entry->replist->locations[entry->replist->migrated].path, replist->locations[replist->migrated].path))
+			return nfsd4_fslocs_get(entry->replist);
+	}
+	if (!(entry = kmalloc(sizeof(*entry), GFP_KERNEL)))
+		return nfsd4_fslocs_get(replist);
+	INIT_LIST_HEAD(&entry->list);
+	entry->replist = nfsd4_fslocs_get(replist);
+	for (i=0; i < replist->locations_count; i++) {
+		if (replist->locations[i].clnt)
+			rpc_shutdown_client(replist->locations[i].clnt);
+		replist->locations[i].clnt = NULL;
+		replist->locations[i].openseq = 0;
+		replist->locations[i].failed = 0;
+	}
+	list_add(&entry->list, &rnfs_replist);
+	return nfsd4_fslocs_get(entry->replist);
+}
+
+static void rnfs_fslocs_clean(void)
+{
+	struct rnfs_replist *entry;
+	printk("rnfs_fslocs_clean\n");
+	while (!list_empty(&rnfs_replist)) {
+		entry = list_entry(rnfs_replist.next, struct rnfs_replist, list);
+		list_del(&entry->list);
+		nfsd4_fslocs_put(entry->replist);
+		kfree(entry);
+	}
+}
+
+/* internal functions */
+static int get_fhandle(struct svc_export *exp, struct dentry *dentry, struct xdr_netobj *fhp)
+{
+   int err;
+   struct svc_fh repd_fh;
+   fh_init(&repd_fh, NFS_FHSIZE);
+   if (!(err = fh_compose(&repd_fh, exp, dentry, NULL))) {
+      fhp->len = repd_fh.fh_handle.fh_size;
+		if (fhp->len > 20)
+			printk("XXX: get_fhandle too long %u\n", fhp->len);
+		if ((fhp->data = kmalloc(fhp->len, GFP_KERNEL)))
+      	memcpy(fhp->data, &repd_fh.fh_handle.fh_base, fhp->len);
+		else
+			err = -ENOMEM;
+   } else {
+		fhp->len =0;
+		fhp->data = NULL;
+      printk("XXX: fail to compose fh_handle, err is %d\n", err);
+   }
+   fh_put(&repd_fh);
+   return err;
+}
+
+static void
+release_fh(struct xdr_netobj *fh, struct nfsd4_fs_locations *replist)
+{
+   int i;
+   if (!fh || !replist)
+      return;
+   for (i=0; i < replist->locations_count; i++) {
+      if (fh[i].data)
+         kfree(fh[i].data);
+   }
+   kfree(fh);
+}
+
+static void
+release_parent(struct rnfs_parent *pnode)
+{
+   if (pnode) {
+      dprintk("release parent %ld\n", pnode->fi_inode->i_ino);
+		if (pnode->bitmap)
+			printk("XXX: parent %ld has nonzero bitmap %lu\n", pnode->fi_inode->i_ino, pnode->bitmap);
+      list_del(&pnode->fi_hash);
+      iput(pnode->fi_inode);
+		kfree(pnode->count);
+      kfree(pnode);
+   }
+}
+
+static struct rnfs_parent *
+alloc_parent(struct inode *ino, struct nfsd4_fs_locations *replist)
+{
+   unsigned int hashval = mrep_hashval(ino);
+   struct rnfs_parent *parent;
+
+   dprintk("alloc parent %ld\n", ino->i_ino);
+   parent = kmalloc(sizeof(struct rnfs_parent), GFP_KERNEL);
+   if (parent) {
+      INIT_LIST_HEAD(&parent->fi_hash);
+      list_add(&parent->fi_hash, &parent_list[hashval]);
+      parent->fi_inode = igrab(ino);
+		parent->bitmap = 0;
+		if ((parent->count = kmalloc(sizeof(unsigned int)*replist->locations_count, GFP_KERNEL))) {
+			memset(parent->count, 0, sizeof(unsigned int)*replist->locations_count);
+      	return parent;
+		}
+		kfree(parent);
+   }
+   return NULL;
+}
+
+struct rnfs_parent *
+find_parent(struct inode *ino)
+{
+   unsigned int hashval = mrep_hashval(ino);
+   struct rnfs_parent *pnode;
+
+   list_for_each_entry(pnode, &parent_list[hashval], fi_hash)
+      if (pnode->fi_inode == ino)
+         return pnode;
+   return NULL;
+}
+
+static void __rnfs_parent_inc(struct dentry *dentry_in, struct svc_export *exp, int i)
+{
+   struct rnfs_parent *pparent;
+   struct dentry *parent, *dentry = dentry_in;
+	struct nfsd4_fs_locations *replist = exp->ex_fslocs;
+
+	dprintk("__rnfs_parent_inc: inode %ld\n", dentry->d_inode->i_ino);
+   while ((dentry != exp->ex_dentry) && !IS_ROOT(dentry)) {
+      parent = dget_parent(dentry);
+      dput(dentry);
+      dentry = parent;
+      if (!(pparent = find_parent(parent->d_inode)))
+         pparent = alloc_parent(parent->d_inode, replist);
+      if (!pparent)  break;
+      if (pparent->count[i] == 0)
+         pparent->bitmap |= (1 << (i+1));
+      pparent->count[i]++;
+		if (i != exp->ex_fslocs->migrated)
+			dprintk("__rnfs_parent_inc: %ld, count %u, bitmap %lu\n", dentry->d_inode->i_ino, pparent->count[i], pparent->bitmap);
+   }
+	dprintk("__rnfs_parent_inc end: inode %ld\n", dentry->d_inode->i_ino);
+   dput(dentry);
+}
+
+static void rnfs_parent_inc(struct inode *inode, struct svc_export *exp, int i)
+{
+   struct dentry *dentry;
+	if (S_ISREG(inode->i_mode) && (inode->i_nlink > 1)) {
+		spin_lock(&dcache_lock);
+		list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+			dget_locked(dentry);
+			spin_unlock(&dcache_lock);
+			dprintk("rnfs_parent_inc: linked file %ld\n", inode->i_ino);
+			__rnfs_parent_inc(dentry, exp, i);
+			spin_lock(&dcache_lock);
+		}
+		spin_unlock(&dcache_lock);
+	} else {
+		dentry = d_find_alias(inode);
+		__rnfs_parent_inc(dentry, exp, i);
+	}
+}
+
+void __rnfs_parent_dec(struct dentry *dentry_in, struct svc_export *exp, int i)
+{
+   struct rnfs_parent *pparent;
+   struct dentry *parent, *dentry = dentry_in;
+	//dprintk("__rnfs_parent_dec: inode %ld, i %d\n", dentry->d_inode->i_ino, i);
+   while ((dentry != exp->ex_dentry) && !IS_ROOT(dentry)) {
+      parent = dget_parent(dentry);
+      dput(dentry);
+      dentry = parent;
+      pparent = find_parent(parent->d_inode);
+      BUG_ON(!pparent);
+      //BUG_ON(pparent->count[i] <= 0);
+		if (pparent->count[i] <= 0) {
+			printk("XXX: __rnfs_parent_dec: multiple dec count: inode %ld, parent %ld\n", dentry_in->d_inode->i_ino, parent->d_inode->i_ino);
+			return;
+		}
+      if (--pparent->count[i] == 0)
+         pparent->bitmap &= ~(1 << (i+1));
+		if (i != exp->ex_fslocs->migrated)
+			dprintk("__rnfs_parent_dec: %ld, count %u, bitmap %lu\n", dentry->d_inode->i_ino, pparent->count[i], pparent->bitmap);
+   }
+	//dprintk("__rnfs_parent_dec end: inode %ld\n", dentry->d_inode->i_ino);
+   dput(dentry);
+}
+
+void rnfs_parent_dec(struct inode *inode, struct svc_export *exp, int i)
+{
+   struct dentry *dentry;
+	if (S_ISREG(inode->i_mode) && (inode->i_nlink > 1)) {
+		spin_lock(&dcache_lock);
+		list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+			dget_locked(dentry);
+			spin_unlock(&dcache_lock);
+			dprintk("rnfs_parent_dec: linked file %ld\n", inode->i_ino);
+			__rnfs_parent_dec(dentry, exp, i);
+			spin_lock(&dcache_lock);
+		}
+		spin_unlock(&dcache_lock);
+	} else {
+		dentry = d_find_alias(inode);
+		__rnfs_parent_dec(dentry, exp, i);
+	}
+}
+
+static void
+release_pnode(struct rnfs_pnode *pnode)
+{
+   if (pnode) {
+		dprintk("release pnode %p, %ld, icount %d\n", pnode, pnode->fi_inode->i_ino, atomic_read(&pnode->fi_inode->i_count));
+		list_del(&pnode->fi_hash);
+		list_del(&pnode->dlist);
+		iput(pnode->fi_inode);
+		release_fh(pnode->fh, pnode->exp->ex_fslocs);
+		exp_put(pnode->exp);
+		kfree(pnode);
+   }
+}
+
+static void
+flush_primary_rqts(struct rnfs_pnode *pnode)
+{
+   struct repclnt_request *rqt;
+   while (!list_empty(&pnode->rq_list)) {
+      rqt = list_entry(pnode->rq_list.next, struct repclnt_request, pl);
+      list_del_init(&rqt->pl);
+      rqt->pnode = NULL;
+   	if (atomic_dec_and_test(&pnode->refcnt)) {
+      	release_pnode(pnode);
+			return;
+		}
+   }
+	pnode->expires = jiffies - RNFS_FILE_TIMER;
+	list_del_init(&pnode->dlist);
+	pnode->pflags |= PNODE_CLOSED;
+	if ((pnode->pflags & (PNODE_WAIT_CLOSE | PNODE_WAIT_COMPLETE))) {
+		pnode->pflags &= ~(PNODE_WAIT_CLOSE|PNODE_WAIT_COMPLETE);
+		wake_up_interruptible(&pnode->pwait);
+	}
+   if (atomic_dec_and_test(&pnode->refcnt))
+    	release_pnode(pnode);
+}
+
+#ifndef __RNFS_USERMODE__
+static inline void rnfs_put_file(struct file *filp)
+{
+   struct inode *inode = filp->f_dentry->d_inode;
+   int (*fsync) (struct file *, struct dentry *, int);
+
+	mutex_lock(&inode->i_mutex);
+   filemap_fdatawrite(inode->i_mapping);
+   if (filp->f_op && (fsync = filp->f_op->fsync))
+      fsync(filp, filp->f_dentry, 0);
+   filemap_fdatawait(inode->i_mapping);
+	mutex_unlock(&inode->i_mutex);
+   dprintk("rnfs_sync_file: inode %ld\n", inode->i_ino);
+   fput(filp);
+}
+
+static inline void rnfs_sync_dir(struct inode *inode)
+{
+  if (inode) {
+     filemap_fdatawrite(inode->i_mapping);
+     filemap_fdatawait(inode->i_mapping);
+  }
+}
+
+#else
+static inline void rnfs_put_file(struct file *filp)
+{
+	fput(filp);
+}
+#endif
+
+/* XXX upcall */
+static void update_dview(struct rep_dview *dview, struct nfsd4_fs_locations *replist)
+{
+   unsigned int index;
+   int i;
+                                                                                
+   for (i=0; i < dview->deny_num; i++) {
+      index = dview->deny_list[i];
+      if ((index < replist->locations_count) && (index != replist->migrated)) {
+         //if (replist->locations[index].failed != 1)
+           // mreppipe_call(&replist->locations[index].hostname, NULL, 0, replist->lookup_name, MREP_FAIL);
+         replist->locations[index].failed = 1;
+      }
+   }
+}
+
+static void __rnode_close(struct rnfs_rnode *rnode)
+{
+	dprintk("rnode_close %ld\n", rnode->fi_inode->i_ino);
+	if (!(rnode->rflags & RNODE_REMOVED))
+		rnfs_parent_dec(rnode->fi_inode, rnode->exp, rnode->primary);
+	list_del_init(&rnode->fi_hash);
+	if ((rnode->rflags & RNODE_WAIT_CLOSE)) {
+		repd_scan_openlist();
+	}
+	rnode->rflags |= RNODE_CLOSED;
+	if ((rnode->rflags & RNODE_FORWARD_WAIT))
+		wake_up_interruptible(&rnode->rwait);
+}
+
+void rnode_close(struct rnfs_rnode *rnode, struct repd_update *update)
+{
+   struct inode *inode = rnode->fi_inode;
+	if (!rnode)
+		return;
+	if (update) {
+   	struct rep_close *close = (struct rep_close *) update->data;
+   	inode->i_mtime = close->mtime;
+   	inode->i_ctime = close->ctime;
+   	update_dview(&close->dview, rnode->exp->ex_fslocs);
+	}
+	if (!(rnode->rflags & RNODE_CLOSED)) {
+		rep_lock();
+		__rnode_close(rnode);
+		rep_unlock();
+	}
+   rnode_put(rnode);
+}
+
+static void
+release_rnode(struct rnfs_rnode *rnode)
+{
+	struct repd_pending_entry *entry;
+   if (rnode) {
+		dprintk("release rnode %ld, count %d\n", rnode->fi_inode->i_ino, atomic_read(&rnode->fi_inode->i_count));
+		if (!(rnode->rflags & RNODE_CLOSED))
+			__rnode_close(rnode);
+		exp_put(rnode->exp);
+		repd_release(rnode->update);
+		while (!list_empty(&rnode->pendings)) {
+			entry = list_entry(rnode->pendings.next, struct repd_pending_entry, list);
+			list_del(&entry->list);
+			if (entry->update->opnum != REP_OP_OPEN)
+				repd_release(entry->update);
+			kfree(entry);
+		}
+		if (rnode->primary_fh.data)
+			kfree(rnode->primary_fh.data);
+		if (rnode->file)
+			rnfs_put_file(rnode->file);
+		else
+			rnfs_sync_dir(rnode->fi_inode);
+		iput(rnode->fi_inode);
+		kfree(rnode);
+   }
+}
+
+/* XXX we may need finer lock */
+void
+rep_lock(void)
+{
+   down(&rep_sema);
+}
+
+void
+rep_unlock(void)
+{
+   up(&rep_sema);
+}
+
+/* pnode refcnt: inc in find_pnode rnfs_timer repclnt_rq_alloc; dec in pnode_put */
+struct rnfs_pnode *
+find_pnode(struct inode *ino)
+{
+   unsigned int hashval = mrep_hashval(ino);
+   struct rnfs_pnode *pnode;
+
+   list_for_each_entry(pnode, &p_updating_list[hashval], fi_hash)
+      if (pnode->fi_inode == ino) {
+			atomic_inc(&pnode->refcnt);
+         return pnode;
+		}
+   return NULL;
+}
+
+/* rnode refcnt: inc in find_rnode; dec in rnode_put */
+struct rnfs_rnode *
+find_rnode(struct inode *ino)
+{
+   unsigned int hashval = mrep_hashval(ino);
+   struct rnfs_rnode *rnode;
+
+   list_for_each_entry(rnode, &r_updating_list[hashval], fi_hash)
+      if (rnode->fi_inode == ino) {
+			atomic_inc(&rnode->refcnt);
+         return rnode;
+		}
+   return NULL;
+}
+
+struct rnfs_parent *
+conflict_parent(struct inode *inode, int i)
+{
+	struct rnfs_parent *parent;
+	//dprintk("conflict_parent: inode %ld, i %d\n", inode->i_ino, i);
+	parent = find_parent(inode);
+	if (parent && (parent->bitmap & ~(1 << (i+1))))
+		return parent;
+	return NULL;
+}
+
+#define RNFS_SEND_DELAY \
+		if (!list_empty(&pnode->rq_list)) { \
+			struct repclnt_request *entry; \
+			printk("pnode %ld rq_list is not empty\n", pnode->fi_inode->i_ino); \
+			list_for_each_entry(entry, &pnode->rq_list, pl) \
+				printk("\t rqt %p, opnum %d\n", entry, entry->opnum); \
+		} \
+		BUG_ON(!list_empty(&pnode->rq_list)); \
+		list_del_init(&pnode->dlist); \
+		list_del_init(&pnode->fi_hash); \
+		pnode->pflags |= PNODE_CLOSED; \
+		rep_unlock(); \
+		if ((closep = rep_close_init(pnode))) \
+			rqt = repclnt_rq_alloc(pnode, NULL, REP_OP_CLOSE, (void *) closep); \
+		if (rqt) { \
+			if (!(pnode->pflags & PNODE_REMOVED)) \
+				rnfs_parent_dec(pnode->fi_inode, pnode->exp, pnode->exp->ex_fslocs->migrated); \
+			atomic_dec(&pnode->refcnt); \
+			repclnt_update(pnode, rqt); \
+			repclnt_rq_put(rqt); \
+			if ((pnode->pflags & PNODE_WAIT_CLOSE)) { \
+				pnode->pflags &= ~PNODE_WAIT_CLOSE; \
+				wake_up_interruptible(&pnode->pwait); \
+			} \
+		} \
+		rep_lock();
+
+static void delayed_release_pnode(void *data)
+{
+	struct rnfs_pnode *pnode;
+	struct rep_close *closep = NULL;
+	struct repclnt_request *rqt = NULL;
+	struct list_head *list = (struct list_head *) data;
+
+	rep_lock();
+	if (list == &rnfs_dir_lru_list) {
+	while (!list_empty(&rnfs_dir_lru_list)) {
+		pnode = list_entry(rnfs_dir_lru_list.next, struct rnfs_pnode, dlist);
+		dprintk("delayed_release_pnode: dir %ld, expires %lu, TIMER %u, jiffies %lu\n", pnode->fi_inode->i_ino, pnode->expires, RNFS_DIR_TIMER, jiffies);
+		if (time_before((jiffies+5), (pnode->expires+RNFS_DIR_TIMER))) {
+			queue_delayed_work(rnfs_delay_dclose_wq, &rnfs_delay_dclose_work, (pnode->expires+RNFS_DIR_TIMER-jiffies));
+			break;
+		}
+		RNFS_SEND_DELAY;
+	}
+	} else {
+	while (!list_empty(&rnfs_file_lru_list)) {
+		pnode = list_entry(rnfs_file_lru_list.next, struct rnfs_pnode, dlist);
+		dprintk("delayed_release_pnode: file %ld, refcount %d, expires %lu, TIMER %u, jiffies %lu\n", pnode->fi_inode->i_ino, atomic_read(&pnode->refcnt), pnode->expires, RNFS_FILE_TIMER, jiffies);
+		if (time_before((jiffies+5), (pnode->expires+RNFS_FILE_TIMER))) {
+			queue_delayed_work(rnfs_delay_fclose_wq, &rnfs_delay_fclose_work, (pnode->expires+RNFS_FILE_TIMER-jiffies));
+			break;
+		}
+		RNFS_SEND_DELAY;
+	}
+	}
+	rep_unlock();
+}
+
+struct rnfs_pnode *
+alloc_pnode(struct dentry *dentry, struct svc_export *exp)
+{
+	struct inode *ino = dentry->d_inode;
+   unsigned int hashval = mrep_hashval(ino);
+   struct rnfs_pnode *pnode;
+	struct nfsd4_fs_locations *replist = exp->ex_fslocs;
+
+	rnfs_parent_inc(ino, exp, replist->migrated);
+   pnode = kmalloc(sizeof(struct rnfs_pnode), GFP_KERNEL);
+   dprintk("alloc pnode %p, %ld, icount %d, openseq %u\n", pnode, ino->i_ino, atomic_read(&ino->i_count), replist->locations[replist->migrated].openseq);
+   if (pnode) {
+      INIT_LIST_HEAD(&pnode->fi_hash);
+      list_add(&pnode->fi_hash, &p_updating_list[hashval]);
+      pnode->fi_inode = igrab(ino);
+      pnode->pflags = 0;
+		exp_get(exp);
+      pnode->exp = exp;
+      init_waitqueue_head(&pnode->pwait);
+		pnode->fh = kmalloc(sizeof(struct xdr_netobj)*replist->locations_count, GFP_KERNEL);
+		memset(pnode->fh, 0, sizeof(struct xdr_netobj)*replist->locations_count);
+		get_fhandle(exp, dentry, &pnode->fh[replist->migrated]);
+		pnode->ack_num = 1;
+		atomic_set(&pnode->refcnt, 2);
+		pnode->seq = replist->locations[replist->migrated].openseq;
+		INIT_LIST_HEAD(&pnode->rq_list);
+		pnode->expires = jiffies;
+		INIT_LIST_HEAD(&pnode->dlist);
+		pnode->competitor = replist->locations[replist->migrated].sin_addr.s_addr;
+      return pnode;
+   }
+   return NULL;
+}
+
+struct rnfs_rnode *
+alloc_rnode(struct dentry *dentry, int primary, struct svc_export *exp, struct xdr_netobj *primary_fh)
+{
+	struct inode *ino = dentry->d_inode;
+   unsigned int hashval = mrep_hashval(ino);
+   struct rnfs_rnode *rnode;
+
+   dprintk("alloc rnode %ld, count %d\n", ino->i_ino, atomic_read(&ino->i_count));
+	rnfs_parent_inc(ino, exp, primary);
+   rnode = kmalloc(sizeof(struct rnfs_rnode), GFP_KERNEL);
+   if (rnode) {
+      INIT_LIST_HEAD(&rnode->fi_hash);
+      list_add(&rnode->fi_hash, &r_updating_list[hashval]);
+      rnode->fi_inode = igrab(ino);
+      rnode->rflags = 0;
+      init_waitqueue_head(&rnode->rwait);
+		rnode->update = NULL;
+		rnode->primary = primary;
+		memcpy(&rnode->primary_fh, primary_fh, sizeof(struct xdr_netobj));
+		primary_fh->data = NULL;
+		primary_fh->len = 0;
+		exp_get(exp);
+		rnode->exp = exp;
+		atomic_set(&rnode->refcnt, 2);
+		INIT_LIST_HEAD(&rnode->pendings);
+		rnode->file = NULL;
+      return rnode;
+	}
+   return NULL;
+}
+
+void rnfs_timer(struct rnfs_pnode *pnode)
+{
+	list_del_init(&pnode->dlist);
+}
+
+static void rnfs_clist_add(struct rnfs_pnode *pnode, int delay)
+{
+	dprintk("rnfs_clist_add: pnode %ld\n", pnode->fi_inode->i_ino);
+	if (S_ISREG(pnode->fi_inode->i_mode)) {
+		dprintk("file %ld to close, isize %lld\n", pnode->fi_inode->i_ino, pnode->fi_inode->i_size);
+		if (delay) {
+			list_add_tail(&pnode->dlist, &rnfs_file_lru_list);
+			pnode->expires = jiffies;
+			queue_delayed_work(rnfs_delay_fclose_wq, &rnfs_delay_fclose_work, RNFS_FILE_TIMER);
+		} else {
+			list_add(&pnode->dlist, &rnfs_file_lru_list);
+			pnode->expires = jiffies - RNFS_FILE_TIMER;
+			rep_unlock();
+			cancel_delayed_work(&rnfs_delay_fclose_work);
+			rep_lock();
+			queue_work(rnfs_delay_fclose_wq, &rnfs_delay_fclose_work);
+		}
+	} else {
+		//if (delay && !(pnode->pflags & PNODE_SINGLE)) {
+		if (delay) {
+			list_add_tail(&pnode->dlist, &rnfs_dir_lru_list);
+			pnode->expires = jiffies;
+			queue_delayed_work(rnfs_delay_dclose_wq, &rnfs_delay_dclose_work, RNFS_DIR_TIMER);
+		} else {
+			list_add(&pnode->dlist, &rnfs_dir_lru_list);
+			pnode->expires = jiffies - RNFS_DIR_TIMER;
+			rep_unlock();
+			cancel_delayed_work(&rnfs_delay_dclose_work);
+			rep_lock();
+			queue_work(rnfs_delay_dclose_wq, &rnfs_delay_dclose_work);
+		}
+	}
+}
+
+void
+pnode_put(struct rnfs_pnode *pnode)
+{
+	dprintk("pnode_put: %ld refcnt %d, empty %d, closed %d\n", pnode->fi_inode->i_ino, atomic_read(&pnode->refcnt), list_empty(&pnode->rq_list), (pnode->pflags & PNODE_CLOSED));
+	rep_lock();
+	if (pnode && list_empty(&pnode->rq_list) && list_empty(&pnode->dlist) && !(pnode->pflags & PNODE_CLOSED)) {
+		if ((pnode->pflags & PNODE_WAIT_CLOSE) || (pnode->pflags & PNODE_REMOVED)) {
+			dprintk("no delay close pnode %ld\n", pnode->fi_inode->i_ino);
+			rnfs_clist_add(pnode, 0);
+		} else
+			rnfs_clist_add(pnode, 1);
+	}
+   if (atomic_dec_and_test(&pnode->refcnt))
+      release_pnode(pnode);
+	rep_unlock();
+}
+
+void pnode_rqput(struct repclnt_request *rqt)
+{
+	struct rnfs_pnode *pnode = rqt->pnode;
+	rep_lock();
+	list_del(&rqt->pl);
+	rep_unlock();
+	if (pnode)
+		pnode_put(pnode);
+}
+
+/* return 1 on success */
+int rep_wait_close(struct rnfs_pnode *pnode)
+{
+	dprintk("rep_wait_close, pnode %ld\n", pnode->fi_inode->i_ino);
+	rep_lock();
+	if (!(pnode->pflags & PNODE_CLOSED)) {
+		pnode->pflags |= PNODE_WAIT_CLOSE;
+		if (list_empty(&pnode->rq_list)) {
+			dprintk("rq_list is empty\n");
+			list_del_init(&pnode->dlist);
+			rnfs_clist_add(pnode, 0);
+		}
+		rep_unlock();
+		wait_event_interruptible_timeout(pnode->pwait, ((pnode->pflags & PNODE_CLOSED)), REPD_DFLT_TIMEO*HZ);
+		return (((pnode->pflags & PNODE_CLOSED) != 0));
+	}
+	rep_unlock();
+	return 1;
+}
+
+int repd_wait_close(struct rnfs_pnode *pnode)
+{
+	dprintk("rep_wait_close, pnode %ld\n", pnode->fi_inode->i_ino);
+	rep_lock();
+	if (!(pnode->pflags & PNODE_CLOSED)) {
+		pnode->pflags |= PNODE_WAIT_CLOSE;
+		if (list_empty(&pnode->rq_list)) {
+			dprintk("rq_list is empty\n");
+			list_del_init(&pnode->dlist);
+			rnfs_clist_add(pnode, 0);
+		}
+	}
+	rep_unlock();
+	return 1;
+}
+
+void rnfs_close_file(struct inode *inode)
+{
+	struct rnfs_pnode *pnode;
+again:
+	rep_lock();
+	if ((pnode = find_pnode(inode)) && !(pnode->pflags & PNODE_COMPLETE)) {
+		dprintk("rep_close_file: inode %ld\n", inode->i_ino);
+      if ((pnode->pflags & PNODE_WAITING)) {
+         pnode->pflags |= PNODE_WAIT_WAITING;
+         rnfs_wait_timeout(pnode->pwait, !(pnode->pflags & PNODE_WAITING), REPD_DFLT_TIMEO*HZ, &rep_sema);
+      }
+      if ((pnode->pflags & (PNODE_WAITING|PNODE_CLOSED))) {
+         rep_unlock();
+         pnode_put(pnode);
+			goto again;
+      }
+		pnode->pflags |= PNODE_WAIT_COMPLETE;
+		rep_unlock();
+		wait_event_interruptible_timeout(pnode->pwait, ((pnode->pflags & PNODE_COMPLETE)), REPD_DFLT_TIMEO*HZ);
+		pnode_put(pnode);
+		dprintk("rep_close_file done\n");
+		return;
+	}
+	rep_unlock();
+	if (pnode)
+		pnode_put(pnode);
+}
+
+struct rnfs_pnode *
+rnfs_get_pnode(struct inode *inode)
+{
+	struct rnfs_pnode *pnode;
+   rep_lock();
+	pnode = find_pnode(inode);
+   rep_unlock();
+	return pnode;
+}
+
+void
+rnode_put(struct rnfs_rnode *rnode)
+{
+	dprintk("rnode_put: refcnt %d\n", atomic_read(&rnode->refcnt));
+	rep_lock();
+	if (rnode && atomic_dec_and_test(&rnode->refcnt)) {
+		release_rnode(rnode);
+	}
+	rep_unlock();
+}
+
+static void before_remove_parent_dec(struct dentry *parent_in, struct svc_export *exp, int i)
+{
+   struct rnfs_parent *pparent;
+   struct dentry *parent=parent_in, *dentry=NULL;
+
+   dget(parent);
+	dprintk("before_remove_parent_dec: inode %ld\n", parent->d_inode->i_ino);
+   do {
+      pparent = find_parent(parent->d_inode);
+      BUG_ON(!pparent);
+      //BUG_ON(pparent->count[i] <= 0);
+		if (pparent->count[i] <= 0) {
+			printk("XXX: before_remove_parent_dec: multiple dec count: inode %ld, parent %ld\n", parent_in->d_inode->i_ino, parent->d_inode->i_ino);
+			return;
+		}
+      if (--pparent->count[i] == 0)
+         pparent->bitmap &= ~(1 << (i+1));
+      dentry = parent;
+		if (i != exp->ex_fslocs->migrated)
+			dprintk("before_remove_parent_dec: %ld, count %u, bitmap %lu\n", dentry->d_inode->i_ino, pparent->count[i], pparent->bitmap);
+   	if ((dentry == exp->ex_dentry) || IS_ROOT(dentry))
+			break;
+      parent = dget_parent(dentry);
+      dput(dentry);
+   } while (1);
+	dprintk("before_remove_parent_dec end: inode %ld\n", parent->d_inode->i_ino);
+	dput(parent);
+}
+
+void pnode_before_remove(struct rnfs_pnode *parent, struct inode *inode, struct dentry *dentry)
+{
+	struct rnfs_pnode *pnode;
+	struct rnfs_rnode *rnode;
+	struct repclnt_request *rqt = NULL, *rqt2;
+	struct rep_wait *rep_wait;
+	if (!inode)
+		printk("XXX: pnode_before_remove: NULL inode\n");
+	else
+		dprintk("pnode_before_remove: %ld\n", inode->i_ino);
+   rep_lock();
+	if ((pnode = find_pnode(inode))) {
+		pnode->pflags |= PNODE_REMOVED;
+		before_remove_parent_dec(dentry, pnode->exp, pnode->exp->ex_fslocs->migrated);
+		if (S_ISREG(pnode->fi_inode->i_mode))
+			flush_primary_rqts(pnode);
+		else {
+		rnfs_timer(parent);
+		rqt2 = repclnt_rq_alloc(parent, NULL, REP_OP_NONE, NULL);
+		rep_wait = kmalloc(sizeof(*rep_wait), GFP_KERNEL);
+		rep_wait->seq = rqt2->seq;
+		rep_wait->pnode = parent;
+		rep_wait->rqt = rqt2;
+		atomic_inc(&parent->refcnt);
+		rnfs_timer(pnode);
+		rqt = repclnt_rq_alloc(pnode, NULL, REP_OP_WAIT, rep_wait);
+		}
+		rep_unlock();
+		if (rqt) {
+			repclnt_update(pnode, rqt);
+			repclnt_rq_put(rqt);
+		}
+		pnode_put(pnode);
+	} else if ((rnode = find_rnode(inode))) {
+   	rep_unlock();
+		repclnt_send_one(REPPROC_FORWARD, rnode, REP_OP_NONE, NULL, NULL);
+		rnode_put(rnode);
+		rnode_before_remove(inode, dentry);
+	} else
+   	rep_unlock();
+}
+
+void rnode_before_remove(struct inode *inode, struct dentry *dentry)
+{
+   struct rnfs_rnode *rnode;
+	if (!inode)	return;
+   rep_lock();
+	rnode = find_rnode(inode);
+	if (rnode) {
+		dprintk("rnode_before_remove: %ld\n", rnode->fi_inode->i_ino);
+		rnode->rflags |= RNODE_REMOVED;
+		before_remove_parent_dec(dentry, rnode->exp, rnode->primary);
+/*
+		if (S_ISREG(rnode->fi_inode->i_mode)) {
+			release_rnode(rnode);
+			rnode = NULL;
+		} else
+*/
+		atomic_dec(&rnode->refcnt);
+	}
+   rep_unlock();
+	if (rnode)
+		rnode_put(rnode);
+}
+
+static void before_rename_parent_inc(struct dentry *parent_in, struct svc_export *exp, int i)
+{
+   struct rnfs_parent *pparent;
+   struct dentry *parent=parent_in, *dentry=NULL;
+
+   dget(parent);
+	dprintk("before_rename_parent_inc: inode %ld\n", parent->d_inode->i_ino);
+   do {
+      if (!(pparent = find_parent(parent->d_inode)))
+			pparent = alloc_parent(parent->d_inode, exp->ex_fslocs);
+		if (!pparent)  break;
+		if (pparent->count[i] == 0)
+			pparent->bitmap |= (1 << (i+1));
+		pparent->count[i]++;
+      dentry = parent;
+		if (i != exp->ex_fslocs->migrated)
+			dprintk("before_rename_parent_inc: %ld, count %u, bitmap %lu\n", dentry->d_inode->i_ino, pparent->count[i], pparent->bitmap);
+   	if ((dentry == exp->ex_dentry) || IS_ROOT(dentry))
+			break;
+      parent = dget_parent(dentry);
+      dput(dentry);
+   } while (1);
+	dprintk("before_rename_parent_inc end: inode %ld\n", parent->d_inode->i_ino);
+	dput(parent);
+}
+
+void pnode_before_link(struct inode *inode, struct dentry *parent)
+{
+	struct rnfs_pnode *pnode;
+	rep_lock();
+	pnode = find_pnode(inode);
+	if (pnode) {
+		dprintk("pnode_before_link: inode %ld\n", inode->i_ino);
+		before_rename_parent_inc(parent, pnode->exp, pnode->exp->ex_fslocs->migrated);
+	}
+	rep_unlock();
+	if (pnode)
+		pnode_put(pnode);
+}
+
+void rnode_before_link(struct inode *inode, struct dentry *parent)
+{
+	struct rnfs_rnode *rnode;
+	rep_lock();
+   rnode = find_rnode(inode);
+	if (rnode) {
+		dprintk("pnode_before_link: inode %ld\n", inode->i_ino);
+		before_rename_parent_inc(parent, rnode->exp, rnode->primary);
+	}
+	rep_unlock();
+	if (rnode)
+		rnode_put(rnode);
+}
+
+void pnode_before_rename(struct inode *inode, struct inode *rinode, struct dentry *fdentry, struct dentry *tdentry)
+{
+	struct rnfs_pnode *pnode, *pnode2;
+	struct rnfs_rnode *rnode2;
+	if (!inode) {
+		dprintk("XXX: pnode_after_rename: NULL inode\n");
+		return;
+	}
+	dprintk("pnode_before_rename: inode %ld, rinode %ld\n", inode->i_ino, (rinode? rinode->i_ino : 0));
+	rep_lock();
+	pnode = find_pnode(inode);
+	pnode2 = find_pnode(rinode);
+	rnode2 = find_rnode(rinode);
+	if (pnode)
+		before_remove_parent_dec(fdentry, pnode->exp, pnode->exp->ex_fslocs->migrated);
+	if (pnode && !pnode2)
+		before_rename_parent_inc(tdentry, pnode->exp, pnode->exp->ex_fslocs->migrated);
+	if (!pnode && pnode2)
+		before_remove_parent_dec(tdentry, pnode2->exp, pnode2->exp->ex_fslocs->migrated);
+	if (pnode2)
+		pnode2->pflags |= PNODE_REMOVED;
+	rep_unlock();
+	if (pnode)
+		pnode_put(pnode);
+	if (pnode2)
+		pnode_put(pnode2);
+	if (rnode2) {
+		repclnt_send_one(REPPROC_FORWARD, rnode2, REP_OP_NONE, NULL, NULL);
+		rnode_before_remove(rinode, tdentry);
+	}
+}
+
+void rnode_before_rename(struct inode *inode, struct inode *rinode, struct dentry *fdentry, struct dentry *tdentry)
+{
+   struct rnfs_rnode *rnode, *rnode2;
+   if (!inode) {
+      printk("XXX: rnode_after_rename: NULL inode\n");
+      return;
+   }
+	dprintk("rnode_after_rename: inode %ld\n", inode->i_ino);
+   rep_lock();
+   rnode = find_rnode(inode);
+   rnode2 = find_rnode(rinode);
+	if (rnode)
+		before_remove_parent_dec(fdentry, rnode->exp, rnode->primary);
+	if (rnode && !rnode2)
+		before_rename_parent_inc(tdentry, rnode->exp, rnode->primary);
+	if (!rnode && rnode2)
+		before_remove_parent_dec(tdentry, rnode2->exp, rnode2->primary);
+	if (rnode2)
+		rnode2->rflags |= RNODE_REMOVED;
+	rep_unlock();
+	if (rnode)
+		rnode_put(rnode);
+	if (rnode2)
+		rnode_put(rnode2);
+}
+
+void rep_state_start(void)
+{
+   int i;
+
+	printk("RNFS_DIR_TIMER %d, RNFS_FILE_TIMER %d\n", RNFS_DIR_TIMER, RNFS_FILE_TIMER);
+   rep_lock();
+   for (i = 0; i < MREP_HASH_SIZE; i++)
+      INIT_LIST_HEAD(&p_updating_list[i]);
+   for (i = 0; i < MREP_HASH_SIZE; i++)
+      INIT_LIST_HEAD(&r_updating_list[i]);
+   for (i = 0; i < MREP_HASH_SIZE; i++)
+      INIT_LIST_HEAD(&parent_list[i]);
+	rnfs_delay_dclose_wq = create_singlethread_workqueue("repstate");
+	rnfs_delay_fclose_wq = create_singlethread_workqueue("repstate");
+   rep_unlock();
+}
+
+void rep_state_shutdown(void)
+{
+   int i;
+   struct rnfs_pnode *pnode, *pn;
+   struct rnfs_rnode *rnode, *rn;
+	struct rnfs_parent *parent, *ppn;
+
+	printk("rep_state_shutdown\n");
+   rep_lock();
+   for (i=0; i < MREP_HASH_SIZE; i++)
+      list_for_each_entry_safe(pnode, pn, &p_updating_list[i], fi_hash)
+			flush_primary_rqts(pnode);
+   for (i=0; i < MREP_HASH_SIZE; i++)
+      list_for_each_entry_safe(rnode, rn, &r_updating_list[i], fi_hash) {
+			printk("unreleased rnode %ld\n", rnode->fi_inode->i_ino);
+         release_rnode(rnode);
+		}
+   rep_unlock();
+	if (rnfs_delay_dclose_wq) {
+		flush_workqueue(rnfs_delay_dclose_wq);
+		destroy_workqueue(rnfs_delay_dclose_wq);
+	}
+	if (rnfs_delay_fclose_wq) {
+		flush_workqueue(rnfs_delay_fclose_wq);
+		destroy_workqueue(rnfs_delay_fclose_wq);
+	}
+   rep_lock();
+   for (i=0; i < MREP_HASH_SIZE; i++)
+      list_for_each_entry_safe(pnode, pn, &p_updating_list[i], fi_hash)
+	printk("unreleased pnode %ld\n", pnode->fi_inode->i_ino);
+   for (i=0; i < MREP_HASH_SIZE; i++)
+      list_for_each_entry_safe(parent, ppn, &parent_list[i], fi_hash)
+         release_parent(parent);
+	rnfs_fslocs_clean();
+   rep_unlock();
+}
+
+void rnfs_rnode_print(void)
+{
+   int i;
+   struct rnfs_rnode *rnode;
+   for (i=0; i < MREP_HASH_SIZE; i++)
+      list_for_each_entry(rnode, &r_updating_list[i], fi_hash)
+			printk("RNODE %ld, primary %d, flags %u, refcnt %d\n", rnode->fi_inode->i_ino, rnode->primary, rnode->rflags, atomic_read(&rnode->refcnt));
+}
diff -puN /dev/null fs/nfsd/nfs4repstate.h
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repstate.h	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,110 @@
+#ifndef REPSTATE_H
+#define REPSTATE_H
+
+#define REPD_DFLT_TIMEO			300
+#define RNFS_DIR_TIMER			1000  // 1000HZ = 1s
+#define RNFS_FILE_TIMER			25000     // 4s
+#define RNFS_PARENT_COUNT		3
+#define REPPATH_MAX           256
+#define RNFS_MAX_SEQ				0x7fffffff
+
+#define PNODE_LOCKING         0x0001
+#define PNODE_UPDATING        0x0002
+#define PNODE_WAIT_LOCK      	0x0004
+#define PNODE_WAIT_CLOSE      0x0008
+#define PNODE_WAITING			0x000c
+#define PNODE_CLOSED      		0x0010
+#define PNODE_WAIT_WAITING		0x0020
+#define PNODE_SINGLE				0x0040
+#define PNODE_REMOVED      	0x0080
+#define PNODE_COMPLETE			0x0100
+#define PNODE_WAIT_COMPLETE	0x0200
+
+#define RNODE_WAIT_CLOSE		0x0001
+#define RNODE_CLOSED				0x0010
+#define RNODE_SINGLE				0x0040
+#define RNODE_REMOVED      	0x0080
+#define RNODE_FORWARD_WAIT		0x0100
+#define RNODE_CREATED			0x0200
+
+struct rnfs_pnode {
+	struct list_head        	fi_hash;
+	struct inode            	*fi_inode;
+   unsigned int            	pflags;
+	struct svc_export				*exp;
+   wait_queue_head_t       	pwait;
+	unsigned long					expires;
+	struct xdr_netobj    		*fh;
+	unsigned int					ack_num;
+	atomic_t							refcnt;
+	unsigned int					seq;
+	struct list_head				rq_list;
+	struct list_head           dlist;
+	u32								competitor;
+};
+
+struct rnfs_rnode {
+	struct list_head        	fi_hash;
+	struct inode            	*fi_inode;
+   unsigned int            	rflags;
+   wait_queue_head_t       	rwait;
+	int								primary;
+	struct xdr_netobj    		primary_fh;
+	struct svc_export				*exp;
+	atomic_t							refcnt;
+	struct repd_update			*update;
+	struct list_head				pendings;
+	struct file						*file;
+};
+
+struct rnfs_parent {
+   struct list_head        fi_hash;
+   struct inode            *fi_inode;
+   unsigned int            *count;
+	unsigned long				bitmap;
+	unsigned int				flags;
+};
+
+#ifndef rnfs_wait_timeout
+#define rnfs_wait_timeout(wq, cond, timeout, mutex) \
+	up(mutex); \
+	wait_event_interruptible_timeout(wq, cond, timeout); \
+	down(mutex);
+#endif
+
+#ifdef __RNFS_USERMODE__
+extern pthread_mutex_t rep_sema;
+#else
+extern struct semaphore rep_sema;
+#endif
+
+void rep_lock(void);
+void rep_unlock(void);
+struct rnfs_pnode *find_pnode(struct inode *);
+struct rnfs_rnode *find_rnode(struct inode *);
+struct rnfs_parent *find_parent(struct inode *ino);
+struct rnfs_parent *conflict_parent(struct inode *inode, int i);
+struct rnfs_pnode *alloc_pnode(struct dentry *, struct svc_export *);
+struct rnfs_rnode *alloc_rnode(struct dentry *, int, struct svc_export *, struct xdr_netobj *);
+void rnfs_timer(struct rnfs_pnode *);
+void pnode_put(struct rnfs_pnode *);
+struct repclnt_request;
+void pnode_rqput(struct repclnt_request *);
+int rep_wait_close(struct rnfs_pnode *);
+int repd_wait_close(struct rnfs_pnode *);
+struct rnfs_pnode *rnfs_get_pnode(struct inode *);
+void rnode_put(struct rnfs_rnode *rnode);
+void rnode_close(struct rnfs_rnode *rnode, struct repd_update *update);
+void __rnfs_parent_dec(struct dentry *dentry, struct svc_export *exp, int i);
+void rnfs_parent_dec(struct inode *inode, struct svc_export *exp, int i);
+void rnode_before_remove(struct inode *inode, struct dentry *dentry);
+void pnode_before_remove(struct rnfs_pnode *parent, struct inode *inode, struct dentry *dentry);
+void pnode_before_rename(struct inode *inode, struct inode *rinode, struct dentry *fdentry, struct dentry *tdentry);
+void rnode_before_rename(struct inode *inode, struct inode *rinode, struct dentry *fdentry, struct dentry *tdentry);
+void pnode_before_link(struct inode *inode, struct dentry *parent);
+void rnode_before_link(struct inode *inode, struct dentry *parent);
+void rnfs_close_file(struct inode *inode);
+void rep_state_start(void);
+void rep_state_shutdown(void);
+void rnfs_rnode_print(void);
+#endif
diff -puN /dev/null fs/nfsd/nfs4repclnt.c
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repclnt.c	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,552 @@
+
+#ifndef __RNFS_USERMODE__
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/inet.h>
+#include <linux/nfs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/export.h>
+#include <linux/nfsd/state.h>
+#include <linux/sunrpc/cache.h>
+#include <linux/nfsd/syscall.h>
+#else
+#include "rnfs.h"
+#endif
+#include "nfs4repstate.h"
+#include "nfs4repclnt.h"
+#include "nfs4replication.h"
+
+#define NFSDDBG_FACILITY   NFSDDBG_REPCLNT
+
+#define REPCLNT_TEST1      (((pnode->ack_num*2) > pnode->exp->ex_fslocs->locations_count) || (atomic_read(&rqt->count) == 1))
+
+DECLARE_MUTEX(repclnt_sema);
+static void repclnt_work_func(void *data);
+
+void rep_release_fh(struct xdr_netobj *fh)
+{
+	if (fh->data)
+		kfree(fh->data);
+	kfree(fh);
+}
+
+static void
+repclnt_release(unsigned int opnum, void *data)
+{
+	//dprintk("repclnt_release: opnum %u\n", opnum);
+   switch (opnum) {
+      case REP_OP_CLOSE:
+			rep_release_close((struct rep_close *) data);
+         break;
+		case REP_OP_WRITE:
+			rep_release_write((struct rep_write *) data);
+			break;
+      case REP_OP_CREATE:
+         rep_release_create((struct rep_create *) data);
+         break;
+      case REP_OP_REMOVE:
+         rep_release_remove((struct rep_remove *) data);
+         break;
+      case REP_OP_SETATTR:
+			rep_release_setattr((struct iattr *) data);
+         break;
+		case REP_OP_LNK:
+		case REP_OP_RENAME:
+			rep_release_rename((struct rep_rename *) data);
+			break;
+		case REP_OP_OPEN:
+			rep_release_open((struct rep_open *) data);
+			break;
+		case REP_OP_WAIT:
+			rep_release_wait((struct rep_wait *) data);
+			break;
+		case REP_OP_CALLBACK:
+			rep_release_fh((struct xdr_netobj *) data);
+			break;
+      default:
+         break;
+   }
+}
+
+static void
+repclnt_rq_release(struct repclnt_request *rqt)
+{
+	//dprintk("repclnt_rq_release: rqt %p\n", rqt);
+	if (rqt->pathname)
+		kfree(rqt->pathname);
+	pnode_rqput(rqt);
+	repclnt_release(rqt->opnum, rqt->argp);
+	kfree(rqt);
+}
+
+struct repclnt_request *
+repclnt_rq_alloc(struct rnfs_pnode *pnode, char *pathname, unsigned int opnum, void *argp)
+{
+   struct repclnt_request *rqt;
+
+   rqt = kmalloc(sizeof(struct repclnt_request), GFP_KERNEL);
+   if (!rqt)
+      return NULL;
+	//dprintk("repclnt_rq_alloc: pnode %ld, opnum %u, seq %u, rqt %p\n", pnode->fi_inode->i_ino, opnum, pnode->seq, rqt);
+	atomic_inc(&pnode->refcnt);
+   rqt->pnode = pnode;
+	INIT_LIST_HEAD(&rqt->pl);
+	list_add_tail(&rqt->pl, &pnode->rq_list);
+	rqt->opnum = opnum;
+   rqt->argp = argp;
+	rqt->pathname = pathname;
+	rqt->seq = pnode->seq++;
+   atomic_set(&rqt->count, 1);
+   atomic_set(&rqt->ack_count, 1);
+	rqt->uid = current->fsuid;
+	rqt->gid = current->fsgid;
+	rqt->mtime = current_fs_time(pnode->fi_inode->i_sb);
+   return rqt;
+}
+
+void repclnt_rq_put(struct repclnt_request *rqt)
+{
+	int count;
+   count = atomic_sub_return(1, &rqt->count);
+	if (!count)
+		repclnt_rq_release(rqt);
+}
+
+static void repclnt_enter_pending(struct callback_data *cdata, struct nfsd4_fs_location *replica, int proc_num);
+
+#define REPCLNT_LOCK_RESEND \
+	dprintk("resend? ack_count %d, ack_num %d, failed %d\n", atomic_read(&rqt->ack_count), pnode->ack_num, replist->locations[replist->migrated].failed); \
+	if ((atomic_read(&rqt->ack_count) < 3) && ((pnode->ack_num*2) > replist->locations_count) && ((replist->locations_count-pnode->ack_num) > replist->locations[replist->migrated].failed)) { \
+		int i; \
+		struct callback_data *cdata2; \
+		atomic_inc(&rqt->ack_count); \
+   	for (i=0; i < replist->locations_count; i++) \
+			if (!replist->locations[i].failed && (pnode->fh[i].len <= 0)) { \
+      		if (!(cdata2 = kmalloc(sizeof(struct callback_data), GFP_KERNEL))) \
+					break; \
+      		cdata2->index_num = i; \
+      		cdata2->rqt = rqt; \
+				atomic_inc(&rqt->count); \
+				count++; \
+				repclnt_enter_pending(cdata2, &replist->locations[i], REPPROC_OPEN); \
+			} \
+	}
+
+void repclnt_rq_lockput(struct repclnt_request *rqt)
+{
+	int count;
+   count = atomic_sub_return(1, &rqt->count);
+	if (!count && rqt->pnode) {
+		struct rnfs_pnode *pnode = rqt->pnode;
+		struct nfsd4_fs_locations *replist = pnode->exp->ex_fslocs;
+		REPCLNT_LOCK_RESEND;
+	}
+	if (!count)
+		repclnt_rq_release(rqt);
+}
+
+static void
+repclnt_callback_done(struct rpc_task *task, void *data)
+{
+	struct callback_data *cdata = (struct callback_data *) data;
+	struct repclnt_request *rqt = cdata->rqt;
+	struct rnfs_pnode *pnode = rqt->pnode;
+	struct nfsd4_fs_locations *replist;
+	int index = cdata->index_num;
+	if (!pnode) goto out;
+	replist = pnode->exp->ex_fslocs;
+	if (task->tk_status && !replist->locations[index].failed) {
+		printk("XXX: repclnt_wait_callback: tk_status %d, opnum %u, seq %u, index %d, inode %ld\n", task->tk_status, rqt->opnum, rqt->seq, index, pnode->fi_inode->i_ino);
+		replist->locations[index].failed = 2;
+		replist->locations[replist->migrated].failed ++;
+	}
+ out:
+	return;
+}
+
+#define CALLBACK_HEADER \
+	struct callback_data *cdata = (struct callback_data *) data; \
+	struct repclnt_request *rqt = cdata->rqt; \
+	struct rnfs_pnode *pnode = rqt->pnode; \
+	struct nfsd4_fs_locations *replist; \
+	int index = cdata->index_num, count; \
+   count = atomic_sub_return(1, &rqt->count); \
+	if (!pnode) goto out; \
+	replist = pnode->exp->ex_fslocs;
+
+static void
+repclnt_nowaitcall_release(void *data)
+{
+	CALLBACK_HEADER;
+	if ((rqt->opnum == REP_OP_OPEN) && !list_empty(&replist->locations[index].rnfs_list))
+		queue_work(replist->locations[index].rnfs_wq, &replist->locations[index].rnfs_work);
+ out:
+	if (!count)
+		repclnt_rq_release(rqt);
+	kfree(cdata);
+}
+
+static void
+repclnt_enter_pending(struct callback_data *cdata, struct nfsd4_fs_location *replica, int proc_num)
+{
+	struct repclnt_pending_entry *entry;
+	//dprintk("repclnt_enter_pending: enter index %d, seq %u\n", cdata->index_num, cdata->rqt->seq);
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	BUG_ON(!entry);
+	INIT_LIST_HEAD(&entry->list);
+	entry->cdata = cdata;
+	entry->proc = proc_num;
+	down(&repclnt_sema);
+	list_add_tail(&entry->list, &replica->rnfs_list);
+	up(&repclnt_sema);
+	queue_work(replica->rnfs_wq, &replica->rnfs_work);
+}
+
+static void
+repclnt_waitcall_release(void *data)
+{
+	CALLBACK_HEADER;
+	if(REPCLNT_TEST1) {
+		if (pnode->pflags & PNODE_WAIT_LOCK)
+			wake_up_interruptible(&pnode->pwait);
+	}
+	if(!count) {
+		REPCLNT_LOCK_RESEND;
+	}
+	if (!list_empty(&replist->locations[index].rnfs_list))
+		queue_work(replist->locations[index].rnfs_wq, &replist->locations[index].rnfs_work);
+ out:
+	if (!count)
+		repclnt_rq_release(rqt);
+	kfree(cdata);
+}
+
+static const struct rpc_call_ops repclnt_lock_ops = {
+	.rpc_release = repclnt_waitcall_release,
+};
+
+static const struct rpc_call_ops repclnt_update_ops = {
+	.rpc_call_done = repclnt_callback_done,
+	.rpc_release = repclnt_nowaitcall_release,
+};
+
+static void
+repclnt_work_func(void *data)
+{
+	struct nfsd4_fs_location *replica = (struct nfsd4_fs_location *) data;
+	struct callback_data *cdata;
+	struct repclnt_request *rqt;
+	struct rnfs_pnode *pnode;
+	struct repclnt_pending_entry *entry, *n;
+   struct rpc_message msg;
+	struct xdr_netobj *fh;
+	int err, i;
+
+	list_for_each_entry_safe(entry, n, &replica->rnfs_list, list) {
+		cdata = entry->cdata;
+		rqt = cdata->rqt;
+		pnode = rqt->pnode;
+		if (!pnode || replica->failed) {
+			repclnt_rq_put(rqt);
+			list_del(&entry->list);
+			kfree(entry);
+			continue;
+		}
+		if (entry->proc == REPPROC_UPDATE) {
+			fh = &pnode->fh[cdata->index_num];
+			if (fh->len <= 0)
+				continue;
+			if (rqt->opnum == REP_OP_WAIT) {
+				struct rep_open *rep_wait = (struct rep_open *) rqt->argp;
+				struct rnfs_pnode *pnode2;
+				pnode2 = rep_wait->pnode;
+				fh = &pnode2->fh[cdata->index_num];
+				if (fh->len <= 0)
+					continue;
+			}
+			if ((rqt->opnum == REP_OP_RENAME) || (rqt->opnum == REP_OP_LNK)) {
+				struct rep_rename *rep_rename = (struct rep_rename *) rqt->argp;
+				struct rnfs_pnode *pnode2;
+				if (rep_rename->rqt) {
+					pnode2 = rep_rename->rqt->pnode;
+					fh = &pnode2->fh[cdata->index_num];
+					if (fh->len <= 0)
+						continue;
+				}
+			}
+		}
+		//dprintk("repclnt_work_func: index %d, pnode %ld, seq %u\n", cdata->index_num, pnode->fi_inode->i_ino, rqt->seq);
+		msg.rpc_proc = &repclnt_procedures[entry->proc];
+		msg.rpc_cred = NULL;
+		msg.rpc_argp = cdata;
+		msg.rpc_resp = cdata; 
+		if (entry->proc == REPPROC_OPEN)
+			err = rpc_async_send(replica->clnt, &msg, RPC_TASK_SOFT, &repclnt_lock_ops, cdata);
+		else if (rqt->opnum == REP_OP_WRITE) {
+			i = rqt->seq % REPCLNT_DATACLNT;
+			//err = rpc_async_send(replica->dataclnt[i], &msg, RPC_TASK_SOFT, &repclnt_update_ops, cdata);
+			err = rpc_call_async(replica->dataclnt[i], &msg, RPC_TASK_SOFT, &repclnt_update_ops, cdata);
+		} else {
+			err = rpc_async_send(replica->clnt, &msg, RPC_TASK_SOFT, &repclnt_update_ops, cdata);
+		}
+		if (err) {
+        		dprintk("fail to send msg for %s\n", replica->hosts);
+			repclnt_rq_put(rqt);
+		}
+		down(&repclnt_sema);
+		list_del(&entry->list);
+		up(&repclnt_sema);
+		kfree(entry);
+	}
+}
+
+static int
+repclnt_send(struct repclnt_request *rqt, int proc_num, struct nfsd4_fs_locations *s_replist)
+{
+   struct nfsd4_fs_location *replist = s_replist->locations;
+   int i;
+   struct callback_data *cdata;
+
+   for (i=0; i < s_replist->locations_count; i++) {
+      if (i == s_replist->migrated) {
+         continue;
+      }
+      if (replist[i].failed) {
+         continue;
+      }
+      if (!replist[i].clnt) {
+         if (repclnt_bind(&replist[i]) != 0) {
+            dprintk("repclnt: could not create clnt for %s\n", replist[i].hosts);
+            replist[i].failed = 2;
+            replist[s_replist->migrated].failed ++;
+            continue;
+         }
+		}
+		if (!replist[i].rnfs_wq) {
+			INIT_LIST_HEAD(&replist[i].rnfs_list);
+			INIT_WORK(&replist[i].rnfs_work, repclnt_work_func, &replist[i]);
+			replist[i].rnfs_wq = create_singlethread_workqueue("repclnt");
+			BUG_ON(!replist[i].rnfs_wq);
+      }
+      if (!(cdata = kmalloc(sizeof(struct callback_data), GFP_KERNEL)))
+         return -ENOMEM;
+      cdata->index_num = i;
+      cdata->rqt = rqt;
+      atomic_inc(&rqt->count);
+		if (rqt->opnum == REP_OP_CLOSE) {
+   		struct rpc_message msg;
+			msg.rpc_proc = &repclnt_procedures[REPPROC_UPDATE];
+			msg.rpc_cred = NULL;
+			msg.rpc_argp = cdata;
+			msg.rpc_resp = cdata; 
+			rpc_async_send(replist[i].clnt, &msg, RPC_TASK_SOFT, &repclnt_update_ops, cdata);
+		} else
+			repclnt_enter_pending(cdata, &replist[i], proc_num);
+   }
+   return 0;
+}
+
+static int
+replist_check(struct nfsd4_fs_locations *s_replist)
+{
+   if (!s_replist || (s_replist->locations_count <= 0) || !s_replist->locations) {
+      printk("replist_check: empty replica list\n");
+      return -EINVAL;
+   }
+   if ((s_replist->migrated <0) || (s_replist->migrated > s_replist->locations_count)) {
+      printk("replist_check: invalid replica list\n");
+      return -EINVAL;
+	}
+	return 0;
+}
+
+static int repclnt_lock_fail(struct rnfs_pnode *pnode)
+{
+	struct repclnt_request *rqt;
+   struct rpc_message msg;
+   struct nfsd4_fs_locations *replist = pnode->exp->ex_fslocs;
+   int i;
+   struct callback_data *cdata;
+
+	rqt = repclnt_rq_alloc(pnode, NULL, REP_OP_OPENFAIL, NULL);
+	if (!rqt)
+		return -ENOMEM;
+	for (i=0; i < replist->locations_count; i++) {
+		if (replist->locations[i].failed || (i == replist->migrated) || (pnode->fh[i].len <= 0))
+			continue;
+		if (!(cdata = kmalloc(sizeof(struct callback_data), GFP_KERNEL)))
+			return -ENOMEM;
+		cdata->index_num = i;
+		cdata->rqt = rqt;
+		atomic_inc(&rqt->count);
+		msg.rpc_proc = &repclnt_procedures[REPPROC_UPDATE];
+		msg.rpc_cred = NULL;
+		msg.rpc_argp = cdata;
+		msg.rpc_resp = cdata;
+		rpc_async_send(replist->locations[i].clnt, &msg, RPC_TASK_SOFT, &repclnt_update_ops, cdata);
+		printk("repclnt_lock_fail %ld, i %d\n", pnode->fi_inode->i_ino, i);
+	}
+	return 0;
+}
+
+struct repclnt_locking_entry {
+	struct list_head	list;
+	char					*path;
+	struct rnfs_pnode	*pnode;
+};
+
+LIST_HEAD(repclnt_locking_list);
+
+void repclnt_scan_locking(char *path, unsigned int pathlen)
+{
+	struct repclnt_locking_entry *entry;
+	printk("repclnt_scan_locking: path %s, pathlen %u\n", path, pathlen);
+	list_for_each_entry(entry, &repclnt_locking_list, list) {
+		printk("repclnt_scan_locking: entry path %s\n", entry->path);
+		if (!strncmp(path, entry->path, pathlen)) {
+			entry->pnode->competitor = 0;
+			wake_up_interruptible(&entry->pnode->pwait);
+		}
+	}	
+}
+
+int repclnt_lock(struct rnfs_pnode *pnode, struct repclnt_request *rqt)
+{
+	int err;
+	struct nfsd4_fs_locations *replist = pnode->exp->ex_fslocs;
+	//struct repclnt_locking_entry *locking_entry = NULL;
+
+	dprintk("repclnt_lock: inode %ld\n", pnode->fi_inode->i_ino);
+	if ((err = replist_check(replist)))
+		return err;
+	if ((err = repclnt_send(rqt, REPPROC_OPEN, replist)))
+		return err;
+	wait_event_interruptible_timeout(pnode->pwait, (REPCLNT_TEST1 || (pnode->pflags & PNODE_CLOSED)), REPD_DFLT_TIMEO*HZ);
+	dprintk("repclnt_lock done: inode %ld\n", pnode->fi_inode->i_ino);
+	if ((pnode->ack_num*2) > replist->locations_count) {
+		if (rqt->seq == replist->locations[replist->migrated].openseq)
+			replist->locations[replist->migrated].openseq++;
+		if (pnode->competitor != replist->locations[replist->migrated].sin_addr.s_addr)
+			repd_scan_openlist();
+		return 0;
+	}
+
+	/* close this pnode */
+	rep_lock();
+	if (!(pnode->pflags & PNODE_REMOVED)) {
+		dprintk("pre_parent_dec pnode\n");
+		rnfs_parent_dec(pnode->fi_inode, pnode->exp, pnode->exp->ex_fslocs->migrated);
+	}
+	pnode->pflags |= PNODE_CLOSED;
+	atomic_dec(&pnode->refcnt);
+	if ((pnode->pflags & PNODE_WAIT_CLOSE)) {
+		pnode->pflags &= ~PNODE_WAIT_CLOSE;
+		wake_up_interruptible(&pnode->pwait);
+	}
+	if (pnode->competitor != replist->locations[replist->migrated].sin_addr.s_addr)
+		repd_scan_openlist();
+	rep_unlock();
+	if (pnode->ack_num > 1)
+		err = repclnt_lock_fail(pnode);
+	if (err)
+		return err;
+	if (pnode->competitor != replist->locations[replist->migrated].sin_addr.s_addr)
+		wait_event_interruptible_timeout(pnode->pwait, (pnode->competitor==0), REPD_DFLT_TIMEO*HZ);
+	else
+		dprintk("we may want to sleep a while here\n");
+	list_del_init(&pnode->fi_hash);
+	list_del_init(&pnode->dlist);
+	dprintk("repclnt_lock err %ld\n", pnode->fi_inode->i_ino);
+	return -EAGAIN;
+}
+
+int repclnt_update(struct rnfs_pnode *pnode, struct repclnt_request *rqt)
+{
+	int err;
+	struct nfsd4_fs_locations *replist = pnode->exp->ex_fslocs;
+
+	//dprintk("repclnt_update: inode %ld\n", pnode->fi_inode->i_ino);
+	dprintk("repclt_update: seq %u, pnode %ld, opnum %u\n", rqt->seq, pnode->fi_inode->i_ino, rqt->opnum);
+	if ((err = replist_check(replist)))
+		return err;
+	err = repclnt_send(rqt, REPPROC_UPDATE, replist);
+	return err;
+}
+
+static void
+repclnt_sendone_callback(struct rpc_task *task, void *data)
+{
+   struct repclnt_sendone_cdata *cdata = (struct repclnt_sendone_cdata *) data;
+   if (!cdata)
+      return;
+	dprintk("repclnt_sendone_callback: %d\n", -task->tk_status);
+   cdata->status = -task->tk_status;
+   if (atomic_dec_and_test(&cdata->count)) {
+   	repclnt_release(cdata->opnum, cdata->argp);
+		kfree(cdata);
+	} else if ((cdata->opnum == REP_OP_FREAD) || (cdata->opnum == REP_OP_FGETATTR)) {
+		struct rnfs_rnode *rnode = cdata->rnode;
+		wake_up_interruptible(&rnode->rwait);
+	}
+
+}
+
+static const struct rpc_call_ops repclnt_sendone_ops = {
+	.rpc_call_done = repclnt_sendone_callback,
+};
+
+int repclnt_send_one(unsigned int proc_num, struct rnfs_rnode *rnode, unsigned int opnum, void *argp, void *resp)
+{
+   struct repclnt_sendone_cdata *cdata = NULL;
+   struct rpc_message msg = {
+      .rpc_proc = &repclnt_procedures[proc_num],
+   };
+	struct nfsd4_fs_location *replica = &rnode->exp->ex_fslocs->locations[rnode->primary];
+   int err = -EINVAL;
+
+   if (!replica->clnt && ((err=repclnt_bind(replica)) != 0))
+     	goto out;
+	dprintk("repclnt_send_one: inode %ld, primary %d\n", rnode->fi_inode->i_ino, rnode->primary);
+	cdata = kmalloc(sizeof(struct repclnt_sendone_cdata), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!cdata)
+		goto out;
+	cdata->rnode = rnode;
+	cdata->opnum = opnum;
+	cdata->argp = argp;
+	cdata->resp = resp;
+	cdata->uid = current->fsuid;
+	cdata->gid = current->fsgid;
+	atomic_set(&cdata->count, 2);
+   msg.rpc_argp = cdata;
+   msg.rpc_resp = cdata;
+   if (!(err = rpc_call_async(replica->clnt, &msg, RPC_TASK_SOFT, &repclnt_sendone_ops, cdata))) {
+		if ((opnum == REP_OP_FREAD) || (opnum == REP_OP_FGETATTR)) {
+      	wait_event_interruptible_timeout(rnode->rwait, (atomic_read(&cdata->count) == 1), REPD_DFLT_TIMEO*HZ);
+			err = (atomic_read(&cdata->count) == 1) ? cdata->status : -EACCES;
+		} else if (opnum != REP_OP_CALLBACK) {
+			rnode->rflags |= RNODE_FORWARD_WAIT;
+      	wait_event_interruptible_timeout(rnode->rwait, (rnode->rflags & RNODE_CLOSED), REPD_DFLT_TIMEO*HZ);
+			rnode->rflags &= ~RNODE_FORWARD_WAIT;
+			if (!(rnode->rflags & RNODE_CLOSED))
+				err = -EACCES;
+		}
+      if (!atomic_dec_and_test(&cdata->count))
+			return err;
+	} else
+		printk("XXX: err in rpc_call_async %d\n", err);
+ out:
+   repclnt_release(opnum, argp);
+	if (cdata)
+		kfree(cdata);
+   return err;
+}
+
diff -puN /dev/null fs/nfsd/nfs4repclnt.h
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repclnt.h	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,53 @@
+#ifndef REPCLNT_H
+#define REPCLNT_H
+
+#define RQ_COMMITED	0x0001
+#define COMMIT_MAX	10
+#define REPCLNT_DATACLNT 1
+
+extern struct rpc_procinfo repclnt_procedures[];
+struct repclnt_request {
+   struct list_head        pl;
+	struct rnfs_pnode			*pnode;
+	unsigned int				seq;
+	unsigned int				opnum;
+   void                    *argp;
+   atomic_t                count;
+   atomic_t                ack_count;
+	char							*pathname;
+	uid_t							uid;
+	gid_t							gid;
+   struct timespec      mtime;
+};
+
+struct repclnt_sendone_cdata {
+	atomic_t             count;
+	struct rnfs_rnode		*rnode;
+	unsigned int         opnum;
+	void						*argp;
+	void						*resp;
+	uid_t							uid;
+	gid_t							gid;
+	int                  status;
+};
+
+struct callback_data {
+   unsigned int            index_num;
+   struct repclnt_request  *rqt;
+};
+
+struct repclnt_pending_entry {
+   struct list_head     list;
+	int						proc;
+	struct callback_data	*cdata;
+};
+
+struct repclnt_request *repclnt_rq_alloc(struct rnfs_pnode *pnode, char *pathname, unsigned int opnum, void *argp);
+void repclnt_rq_put(struct repclnt_request *rqt);
+void repclnt_rq_lockput(struct repclnt_request *rqt);
+int repclnt_lock(struct rnfs_pnode *pnode, struct repclnt_request *rqt);
+int repclnt_update(struct rnfs_pnode *pnode, struct repclnt_request *rqt);
+int repclnt_send_one(unsigned int procnum, struct rnfs_rnode *rnode, unsigned int opnum, void *argp, void *resp);
+int repclnt_bind(struct nfsd4_fs_location *rep);
+void repclnt_scan_locking(char *path, unsigned int pathlen);
+#endif
diff -puN /dev/null fs/nfsd/nfs4repd.c
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repd.c	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,1587 @@
+#ifndef __RNFS_USERMODE__
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/uio.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/dcache.h>
+#include <linux/unistd.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/fsnotify.h>
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/export.h>
+#include <linux/security.h>
+#include <linux/quotaops.h>
+#include <linux/dnotify.h>
+#include <asm/uaccess.h>
+
+#include "nfs4repstate.h"
+#include "nfs4replication.h"
+#include "nfs4repd.h"
+#include "nfs4repclnt.h"
+
+#define NFSDDBG_FACILITY   NFSDDBG_REPD
+
+static DECLARE_MUTEX(repd_sema);
+static DECLARE_MUTEX_LOCKED(repd_start);
+static DECLARE_WAIT_QUEUE_HEAD(repd_exit);
+
+static unsigned int repd_ifup;
+static pid_t         repd_pid;
+unsigned short port;
+
+static void repd(struct svc_rqst *rqstp)
+{
+   struct svc_serv   *serv = rqstp->rq_server;
+   int err = 0;
+
+   lock_kernel();
+   dprintk("repd start\n");
+   repd_pid = current->pid;
+   up(&repd_start);
+   daemonize("repd");
+   allow_signal(SIGKILL);
+
+   rpciod_up();
+   while( (repd_ifup || !signalled()) && repd_pid == current->pid ) {
+      if(signalled())
+         flush_signals(current);
+      err = svc_recv(serv, rqstp, 60*60*HZ);
+      if(err == -EAGAIN || err == -EINTR)
+         continue;
+      if(err < 0) {
+         printk("repd: terminating on error %d\n", -err);
+         break;
+      }
+      svc_process(serv, rqstp);
+   }
+   mreppipe_delete();
+   if (!repd_pid || current->pid == repd_pid)
+      repd_pid = 0;
+   wake_up(&repd_exit);
+   flush_signals(current);
+   svc_exit_thread(rqstp);
+   rpciod_down();
+   unlock_kernel();
+}
+
+void repd_up(void)
+{
+   static int warned = 0;
+   struct svc_serv * serv;
+   int error = 0;
+
+   down(&repd_sema);
+   if(repd_ifup)
+      goto out;
+   repd_ifup = 1;
+   mreppipe_new();
+   error = -ENOMEM;
+   serv = svc_create(&repsvc_program, REPD_BUFSIZE);
+   if (!serv) {
+      printk("repd_up: create service failed\n");
+      goto out;
+   }
+
+   port = REPD_PORT;
+   if ((error = svc_makesock(serv, IPPROTO_UDP, port)) < 0
+#ifdef CONFIG_NFSD_TCP
+     || (error = svc_makesock(serv, IPPROTO_TCP, port)) < 0
+#endif
+      ) {
+      if (warned++ == 0)
+      printk("repd_up: makesock failed, error=%d\n", error);
+      goto destroy_and_out;
+   }
+   warned = 0;
+   rep_start();
+
+   error = svc_create_thread(repd, serv);
+   if (error) {
+      printk("repd_up: create thread failed, error=%d", error);
+      goto destroy_and_out;
+   }
+   down(&repd_start);
+
+destroy_and_out:
+   svc_destroy(serv);
+out:
+   up(&repd_sema);
+}
+
+static struct list_head delayed_open_rqts;
+
+void repd_down(void)
+{
+   struct delayed_open_entry *entry, *n;
+
+   down(&repd_sema);
+   if(!repd_ifup)
+      goto out;
+   dprintk("repd stop\n");
+   repd_ifup = 0;
+   kill_proc(repd_pid, SIGKILL, 1);
+
+   clear_thread_flag(TIF_SIGPENDING);
+   interruptible_sleep_on_timeout(&repd_exit, HZ);
+   if(repd_pid) {
+      printk("repd_down: repd failed to exit, clearing pid\n");
+      repd_pid = 0;
+   }
+   spin_lock_irq(&current->sighand->siglock);
+   recalc_sigpending();
+   spin_unlock_irq(&current->sighand->siglock);
+
+   list_for_each_entry_safe(entry, n, &delayed_open_rqts, list) {
+      list_del(&entry->list);
+      kfree(entry);
+   }
+	rep_shutdown();
+out:
+   up(&repd_sema);
+}
+
+/* functions perform vfs update */
+static int rep_setattr_mask(unsigned int ia_valid)
+{
+	unsigned long dn_mask = 0;
+
+	if (ia_valid & ATTR_UID)
+		dn_mask |= DN_ATTRIB;
+	if (ia_valid & ATTR_GID)
+		dn_mask |= DN_ATTRIB;
+	if (ia_valid & ATTR_SIZE)
+		dn_mask |= DN_MODIFY;
+	/* both times implies a utime(s) call */
+	if ((ia_valid & (ATTR_ATIME|ATTR_MTIME)) == (ATTR_ATIME|ATTR_MTIME))
+		dn_mask |= DN_ATTRIB;
+	else if (ia_valid & ATTR_ATIME)
+		dn_mask |= DN_ACCESS;
+	else if (ia_valid & ATTR_MTIME)
+		dn_mask |= DN_MODIFY;
+	if (ia_valid & ATTR_MODE)
+		dn_mask |= DN_ATTRIB;
+	return dn_mask;
+}
+
+static int
+rep_notify_change(struct dentry * dentry, struct iattr * attr)
+{
+   struct inode *inode = dentry->d_inode;
+   mode_t mode = inode->i_mode;
+   int error;
+   struct timespec now = current_fs_time(inode->i_sb);
+   unsigned int ia_valid = attr->ia_valid;
+
+   if (!inode)
+      BUG();
+
+   if (!(ia_valid & ATTR_ATIME_SET))
+      attr->ia_atime = now;
+   if (ia_valid & ATTR_KILL_SUID) {
+      attr->ia_valid &= ~ATTR_KILL_SUID;
+      if (mode & S_ISUID) {
+         if (!(ia_valid & ATTR_MODE)) {
+            ia_valid = attr->ia_valid |= ATTR_MODE;
+            attr->ia_mode = inode->i_mode;
+         }
+         attr->ia_mode &= ~S_ISUID;
+      }
+   }
+   if (ia_valid & ATTR_KILL_SGID) {
+      attr->ia_valid &= ~ ATTR_KILL_SGID;
+      if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+         if (!(ia_valid & ATTR_MODE)) {
+            ia_valid = attr->ia_valid |= ATTR_MODE;
+            attr->ia_mode = inode->i_mode;
+         }
+         attr->ia_mode &= ~S_ISGID;
+      }
+   }
+   if (!attr->ia_valid)
+      return 0;
+   if (ia_valid & ATTR_SIZE)
+      down_write(&dentry->d_inode->i_alloc_sem);
+
+   if (inode->i_op && inode->i_op->setattr) {
+      error = security_inode_setattr(dentry, attr);
+      if (!error)
+         error = inode->i_op->setattr(dentry, attr);
+   } else {
+      error = inode_change_ok(inode, attr);
+      if (!error)
+         error = security_inode_setattr(dentry, attr);
+      if (!error) {
+         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+             (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
+            error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
+         if (!error)
+            error = inode_setattr(inode, attr);
+      }
+   }
+
+   if (ia_valid & ATTR_SIZE)
+      up_write(&dentry->d_inode->i_alloc_sem);
+
+   if (!error) {
+      unsigned long dn_mask = rep_setattr_mask(ia_valid);
+      if (dn_mask)
+         dnotify_parent(dentry, dn_mask);
+   }
+   return error;
+}
+
+static int repd_setattr(struct dentry  *dentry, struct iattr *iap)
+{
+	struct inode   *inode;
+	int      imode;
+	int      size_change = 0;
+	int err;
+
+	if (!iap) return 0;
+	dprintk("repd_setattr: ia_valid %u, mode %u, uid %u, gid %u, size %lld\n", iap->ia_valid, iap->ia_mode, iap->ia_uid, iap->ia_gid, iap->ia_size);
+	if (!dentry || !dentry->d_inode) {
+		printk("XXX: repd_setattr NULL dentry\n");
+		return -EINVAL;
+	}
+	inode = dentry->d_inode;
+
+   if (iap->ia_valid & ATTR_SIZE) {
+/*
+	if ((err = break_lease(inode, FMODE_WRITE))) {
+		printk("repd_setattr: err from break lease %d\n", err);
+		goto out;
+	}
+*/
+      err = get_write_access(inode);
+      if (err)
+         goto out;
+      size_change = 1;
+      err = locks_verify_truncate(inode, NULL, iap->ia_size);
+      if (err) {
+			printk("err %d after locks_verify_truncate\n", err);
+         put_write_access(inode);
+         goto out;
+      }
+      DQUOT_INIT(inode);
+   }
+
+   imode = inode->i_mode;
+   if (iap->ia_valid & ATTR_MODE) {
+      iap->ia_mode &= S_IALLUGO;
+      imode = iap->ia_mode |= (imode & ~S_IALLUGO);
+   }
+   if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
+      iap->ia_valid |= ATTR_KILL_SUID;
+   if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
+      iap->ia_valid |= ATTR_KILL_SGID;
+
+	//mutex_lock(&inode->i_mutex);
+	err = rep_notify_change(dentry, iap);
+	//mutex_unlock(&inode->i_mutex);
+   if (size_change)
+      put_write_access(inode);
+out:
+   return err;
+}
+
+static int repd_symlink(struct inode *dir, struct dentry *dnew, char *path,  int plen, struct iattr *iap)
+{
+	int		err;
+	umode_t	mode;
+
+   dprintk("repd_symlink\n");
+	if (!plen)
+		return -ENOENT;
+	mode = S_IALLUGO;
+	if (iap && (iap->ia_valid & ATTR_MODE))
+		mode = iap->ia_mode & S_IALLUGO;
+   if (unlikely(path[plen] != 0)) {
+      char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
+      if (path_alloced == NULL)
+         err = -ENOMEM;
+      else {
+         strncpy(path_alloced, path, plen);
+         path_alloced[plen] = 0;
+         err = vfs_symlink(dir, dnew, path_alloced, mode);
+         kfree(path_alloced);
+      }
+   } else
+      err = vfs_symlink(dir, dnew, path, mode);
+	return err;
+}
+
+static int repd_create(struct dentry *dir, struct rep_create *crp)
+{
+   struct dentry *dchild;
+   struct iattr *iap;
+   int err;
+
+   dprintk("repd_create: dir %ld, type %u, cr_namelen %d, cr_name %s, uid %u, gid %u\n", dir->d_inode->i_ino, crp->cr_type, crp->cr_namelen, crp->cr_name, crp->cr_attrs.ia_uid, crp->cr_attrs.ia_gid);
+
+   if(!crp->cr_namelen || isdotent(crp->cr_name, crp->cr_namelen))
+      return -EACCES;
+   if(!dir->d_inode || !dir->d_inode->i_op || !dir->d_inode->i_op->lookup)
+      return -ENOTDIR;
+
+	//mutex_lock(&dir->d_inode->i_mutex);
+   dchild = lookup_one_len(crp->cr_name, dir, crp->cr_namelen);
+   err = PTR_ERR(dchild);
+   if( IS_ERR(dchild) )
+		goto out;
+   iap = &crp->cr_attrs;
+   if(dchild->d_inode) {
+      printk("repd_create: entry already exist\n");
+      goto set_att;
+   }
+
+   err = -EPERM;
+   switch(crp->cr_type) {
+   case S_IFREG:
+      err = vfs_create(dir->d_inode, dchild, iap->ia_mode, NULL);
+      break;
+   case S_IFDIR:
+      err = vfs_mkdir(dir->d_inode, dchild, iap->ia_mode);
+      break;
+   case S_IFLNK:
+		err = repd_symlink(dir->d_inode, dchild, crp->u.link.name, crp->u.link.namelen, iap);
+      break;
+	case S_IFCHR:
+	case S_IFBLK:
+      err = vfs_mknod(dir->d_inode, dchild, iap->ia_mode,
+               MKDEV(crp->u.dev.specdata1, crp->u.dev.specdata2));
+      break;
+	case S_IFIFO:
+	case S_IFSOCK:
+      err = vfs_mknod(dir->d_inode, dchild, iap->ia_mode, 0);
+      break;
+   default:
+      printk("repd_create: bad create type %d\n", crp->cr_type);
+		break;
+   }
+set_att:
+   if (!err && (iap->ia_valid != 0))
+      err = repd_setattr(dchild, iap);
+	if (dchild && !IS_ERR(dchild))
+		dput(dchild);
+ out:
+	//mutex_unlock(&dir->d_inode->i_mutex);
+   return err;
+}
+
+static int repd_create_v3(struct dentry *dir, struct rep_create *crp)
+{
+   struct dentry *dchild = NULL;
+   struct iattr *iap;
+	struct inode *dirp;
+	int createmode;
+   __u32    v_mtime=0, v_atime=0;
+   int      v_mode=0;
+	u32 *verifier;
+   int err;
+
+   dprintk("repd_create_v3: dir %ld, type %u, cr_namelen %d, cr_name %s, uid %u, gid %u\n", dir->d_inode->i_ino, crp->cr_type, crp->cr_namelen, crp->cr_name, crp->cr_attrs.ia_uid, crp->cr_attrs.ia_gid);
+   if(!crp->cr_namelen || isdotent(crp->cr_name, crp->cr_namelen))
+      return -EACCES;
+   if(!dir->d_inode || !dir->d_inode->i_op || !dir->d_inode->i_op->lookup)
+      return -ENOTDIR;
+
+	dirp = dir->d_inode;
+	//mutex_lock(&dirp->i_mutex);
+   dchild = lookup_one_len(crp->cr_name, dir, crp->cr_namelen);
+   err = PTR_ERR(dchild);
+   if( IS_ERR(dchild) )
+		goto out;
+   iap = &crp->cr_attrs;
+	createmode = crp->cr_type;
+	verifier = &crp->u.verf;
+
+   if (createmode == NFS3_CREATE_EXCLUSIVE) {
+      v_mtime = verifier[0]&0x7fffffff;
+      v_atime = verifier[1]&0x7fffffff;
+      v_mode  = S_IFREG
+         | ((verifier[0]&0x80000000) >> (32-7)) /* u+x */
+         | ((verifier[1]&0x80000000) >> (32-9)) /* u+r */
+         ;
+   }
+   if (dchild->d_inode) {
+      err = 0;
+      switch (createmode) {
+      case NFS3_CREATE_UNCHECKED:
+         if (! S_ISREG(dchild->d_inode->i_mode))
+            err = -EEXIST;
+         else {
+            iap->ia_valid &= ATTR_SIZE;
+            goto set_attr;
+         }
+         break;
+      case NFS3_CREATE_EXCLUSIVE:
+         if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
+             && dchild->d_inode->i_atime.tv_sec == v_atime
+             && dchild->d_inode->i_mode  == v_mode
+             && dchild->d_inode->i_size  == 0 )
+            break;
+      case NFS3_CREATE_GUARDED:
+         err = -EEXIST;
+      }
+      goto out;
+   }
+	err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+	if (err < 0) {
+		printk("err from vfs_create %d, permission %d, ia_mode %d\n", err, permission(dirp,MAY_WRITE | MAY_EXEC, NULL), iap->ia_mode);
+		goto out;
+	}
+   if (createmode == NFS3_CREATE_EXCLUSIVE) {
+      iap->ia_valid = ATTR_MTIME|ATTR_ATIME
+         | ATTR_MTIME_SET|ATTR_ATIME_SET
+         | ATTR_MODE;
+      iap->ia_mtime.tv_sec = v_mtime;
+      iap->ia_atime.tv_sec = v_atime;
+      iap->ia_mtime.tv_nsec = 0;
+      iap->ia_atime.tv_nsec = 0;
+      iap->ia_mode  = v_mode;
+   }
+ set_attr:
+   if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0)
+      err = repd_setattr(dchild, iap);
+ out:
+	//mutex_unlock(&dirp->i_mutex);
+   if (dchild && !IS_ERR(dchild))
+      dput(dchild);
+   return err;
+}
+
+static int repd_remove(struct dentry *dir, struct rep_remove *rmp)
+{
+   struct dentry *rdentry;
+   int type;
+   int err;
+	struct inode *rinode;
+
+   dprintk("repd_remove, dir %ld\n", dir->d_inode->i_ino);
+   if( !rmp->rm_namelen || isdotent(rmp->rm_name, rmp->rm_namelen) )
+      return -EACCES;
+
+	//mutex_lock(&dir->d_inode->i_mutex);
+   rdentry = lookup_one_len(rmp->rm_name, dir, rmp->rm_namelen);
+   err = PTR_ERR(rdentry);
+   if( IS_ERR(rdentry) )
+		goto out;
+   if(!rdentry->d_inode) {
+      printk("repd_remove: entry not exist\n");
+      dput(rdentry);
+		goto out;
+   }
+	dprintk("rdentry %ld\n", rdentry->d_inode->i_ino);
+
+	rinode = igrab(rdentry->d_inode);
+   type = rdentry->d_inode->i_mode & S_IFMT;
+   if( type != S_IFDIR ) {
+#ifdef MSNFS
+      if( (fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+				atomic_read(&rdentry->d_count) > 1 ) {
+         err = -EPERM;
+		} else
+#endif
+      err = vfs_unlink(dir->d_inode, rdentry);
+   }
+   else
+      err = vfs_rmdir(dir->d_inode, rdentry);
+	rnode_before_remove(rinode, dir);
+	iput(rinode);
+   dput(rdentry);
+ out:
+	//mutex_unlock(&dir->d_inode->i_mutex);
+   return err;
+}
+
+static int __repd_rename(struct dentry *fdentry, char *fname, int flen, struct dentry *tdentry, char *tname, int tlen)
+{
+   struct dentry  *odentry, *ndentry, *trap;
+   struct inode   *fdir, *tdir, *rinode=NULL;
+   int err;
+
+   if (!fdentry || !tdentry)
+      return -EIO;
+   fdir = fdentry->d_inode;
+   tdir = tdentry->d_inode;
+   if (fdir->i_sb != tdir->i_sb)
+      return -EACCES;
+   if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
+      return -EPERM;
+
+   trap = lock_rename(tdentry, fdentry);
+   odentry = lookup_one_len(fname, fdentry, flen);
+   err = PTR_ERR(odentry);
+   if (IS_ERR(odentry))
+      goto out;
+
+   err = -ENOENT;
+   if (!odentry->d_inode)
+      goto out_dput_old;
+   err = -EINVAL;
+   if (odentry == trap)
+      goto out_dput_old;
+
+   ndentry = lookup_one_len(tname, tdentry, tlen);
+   err = PTR_ERR(ndentry);
+   if (IS_ERR(ndentry))
+      goto out_dput_old;
+   err = -ENOTEMPTY;
+   if (ndentry == trap)
+      goto out_dput_new;
+	if (ndentry->d_inode)
+		rinode = igrab(ndentry->d_inode);
+   err = vfs_rename(fdir, odentry, tdir, ndentry);
+	rnode_before_rename(odentry->d_inode, rinode, fdentry, tdentry);
+	iput(rinode);
+
+ out_dput_new:
+   dput(ndentry);
+ out_dput_old:
+   dput(odentry);
+ out:
+   unlock_rename(tdentry, fdentry);
+/*
+   if (!err) {
+      ndentry = lookup_one_len(tname, tdentry, tlen);
+      if (ndentry && !IS_ERR(ndentry)) {
+         dput(ndentry);
+      }
+   }
+*/
+   return err;
+}
+
+static int repd_rename(struct dentry *dentry, struct repd_rename *rename)
+{
+   dprintk("repd_rename\n");
+   if (!rename->tdentry)
+      return __repd_rename(dentry, rename->sname, rename->snamelen, dentry, rename->tname, rename->tnamelen);
+	else
+   	return __repd_rename(dentry, rename->sname, rename->snamelen, rename->tdentry, rename->tname, rename->tnamelen);
+}
+
+static int repd_lnk(struct dentry *dentry, struct repd_rename *rename)
+{
+   struct dentry  *ddir, *dnew, *dold;
+   struct inode   *dirp, *dest;
+   int err;
+	struct qstr name = {
+		.name = rename->sname,
+		.len = rename->snamelen,
+	};
+
+	if (rename->tdentry)
+   	ddir = rename->tdentry;
+	else
+		ddir = dentry;
+	//mutex_lock(&ddir->d_inode->i_mutex);
+   dirp = ddir->d_inode;
+   dprintk("repd_lnk: sname %s, slen %d, tname %s, tlen %d, d1 %ld, d2 %ld\n", rename->sname, rename->snamelen, rename->tname, rename->tnamelen, dentry->d_inode->i_ino, dirp->i_ino);
+   dnew = lookup_one_len(rename->tname, ddir, rename->tnamelen);
+   if (IS_ERR(dnew))
+      return PTR_ERR(dnew);
+
+	name.hash = full_name_hash(name.name, name.len);
+	dold = d_lookup(dentry, &name);
+	if (dold == NULL) {
+		printk("unable to find linked file\n");
+		return -EINVAL;
+	}
+   dest = dold->d_inode;
+
+	err = vfs_link(dold, dirp, dnew);
+	//mutex_unlock(&ddir->d_inode->i_mutex);
+   dput(dnew);
+	rnode_before_link(dold->d_inode, ddir);
+	dput(dold);
+   return err;
+}
+
+static int repd_write(struct file *file, struct repd_write *argp)
+{
+	int err;
+	struct dentry *dentry = file->f_dentry;
+	mm_segment_t oldfs;
+
+   oldfs = get_fs(); set_fs(KERNEL_DS);
+   err = vfs_writev(file, (struct iovec __user *)argp->vec, argp->vlen, &argp->offset);
+   set_fs(oldfs);
+   if (err >= 0) {
+		//dprintk("write %d bytes\n", err);
+      nfsdstats.io_write += argp->count;
+      fsnotify_modify(dentry);
+   }
+   if (err >= 0 && (dentry->d_inode->i_mode & (S_ISUID | S_ISGID))) {
+		struct iattr	ia;
+		ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID;
+		//mutex_lock(&dentry->d_inode->i_mutex);
+		notify_change(dentry, &ia);
+		//mutex_unlock(&dentry->d_inode->i_mutex);
+	}
+	return err;
+}
+
+/* Deferred request handling */
+struct rdefer_req {
+   struct cache_req              req;
+   struct cache_deferred_req     deferred_req;
+   wait_queue_head_t             waitq;
+   atomic_t                      count;
+};
+
+static inline void
+put_mdr(struct rdefer_req *mdr)
+{
+   if(atomic_dec_and_test(&mdr->count))
+      kfree(mdr);
+}
+
+static inline void
+get_mdr(struct rdefer_req *mdr)
+{
+   atomic_inc(&mdr->count);
+}
+
+static void
+rnfs_revisit(struct cache_deferred_req *dreq, int toomany)
+{
+   struct rdefer_req *mdr=container_of(dreq, struct rdefer_req, deferred_req);
+   wake_up(&mdr->waitq);
+   put_mdr(mdr);
+}
+
+static struct cache_deferred_req *
+rnfs_defer(struct cache_req *req)
+{
+   struct rdefer_req *mdr=container_of(req, struct rdefer_req, req);
+   mdr->deferred_req.revisit = rnfs_revisit;
+   get_mdr(mdr);
+   return (&mdr->deferred_req);
+}
+
+static struct svc_export *
+get_rootexp(struct auth_domain *clp, struct cache_req *reqp)
+{
+   struct rdefer_req *mdr=NULL;
+   u32 fsidv[2];
+   struct svc_expkey *rek;
+   int err;
+   struct svc_export *rexp = NULL;
+
+   if (!clp) {
+      printk("get_rootexp: NULL client auth_domain\n");
+      return NULL;
+   }
+   mdr = kmalloc(sizeof(*mdr), GFP_KERNEL);
+   if (!mdr) {
+      return NULL;
+   }
+   memset(mdr, 0, sizeof(*mdr));
+   atomic_set(&mdr->count, 1);
+   init_waitqueue_head(&mdr->waitq);
+   mdr->req.defer = rnfs_defer;
+
+   /*
+    * according to exp_pseudoroot, the root is defined to be the export
+    * point with fsid==0. the send para of exp_find is the type for
+    * expkey lookup. Type 1 means lookup with key value only
+    */
+   mk_fsid_v1(fsidv, 0);
+   rek = exp_find_key2(clp, 1, fsidv, &mdr->req, &err);
+   if (rek && err == -EAGAIN) {
+      wait_event_interruptible_timeout(mdr->waitq,
+             (test_bit(CACHE_VALID, &rek->h.flags)
+              && (rek->h.expiry_time>=get_seconds())), REPD_DFLT_TIMEO*HZ);
+      rek = exp_find_key(clp, 1, fsidv, reqp);
+   }
+   if (!rek || IS_ERR(rek)) {
+      rexp = (struct svc_export *) rek;
+      goto out;
+   }
+   rexp = rek->ek_export;
+	if (!rexp || IS_ERR(rexp))
+		goto out;
+   exp_get(rexp);
+   expkey_put(&rek->h, &svc_expkey_cache);
+   if ((err = cache_check(&svc_export_cache, &rexp->h, &mdr->req)) == -EAGAIN) {
+      wait_event_interruptible_timeout(mdr->waitq,
+         (test_bit(CACHE_VALID, &rexp->h.flags)
+          && (rexp->h.expiry_time>=get_seconds())), REPD_DFLT_TIMEO*HZ);
+      rexp = exp_find(clp, 1, fsidv, reqp);
+   }
+ out:
+   if (mdr)
+      put_mdr(mdr);
+   return rexp;
+}
+
+struct svc_export *
+rexp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
+{
+   struct rdefer_req *mdr=NULL;
+   struct svc_expkey *rek;
+   int err;
+   struct svc_export *rexp = NULL;
+
+   mdr = kmalloc(sizeof(*mdr), GFP_KERNEL);
+   if(!mdr)
+      return NULL;
+   memset(mdr, 0, sizeof(*mdr));
+   atomic_set(&mdr->count, 1);
+   init_waitqueue_head(&mdr->waitq);
+   mdr->req.defer = rnfs_defer;
+
+   rek = exp_find_key2(clp, fsid_type, fsidv, &mdr->req, &err);
+   if (rek && err == -EAGAIN) {
+      wait_event_interruptible_timeout(mdr->waitq,
+             (test_bit(CACHE_VALID, &rek->h.flags)
+              && (rek->h.expiry_time>=get_seconds())), REPD_DFLT_TIMEO*HZ);
+      rek = exp_find_key(clp, fsid_type, fsidv, reqp);
+   }
+   if (!rek || IS_ERR(rek)) {
+      rexp = (struct svc_export *) rek;
+      goto out;
+   }
+   rexp = rek->ek_export;
+	if (!rexp || IS_ERR(rexp))
+		goto out;
+   exp_get(rexp);
+   expkey_put(&rek->h, &svc_expkey_cache);
+   if ((err = cache_check(&svc_export_cache, &rexp->h, &mdr->req)) == -EAGAIN) {
+      wait_event_interruptible_timeout(mdr->waitq,
+         (test_bit(CACHE_VALID, &rexp->h.flags)
+          && (rexp->h.expiry_time>=get_seconds())), REPD_DFLT_TIMEO*HZ);
+      rexp = exp_find(clp, fsid_type, fsidv, reqp);
+   }
+ out:
+   if (mdr)
+      put_mdr(mdr);
+   return rexp;
+}
+
+#else
+#include "rnfs.h"
+#include "nfs4repstate.h"
+#include "nfs4replication.h"
+#include "nfs4repd.h"
+#include <linux/nfs4.h>
+#include <linux/nfs3.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#endif
+
+static void repd_enter_pending(struct rnfs_rnode *rnode, struct repd_update *update);
+static void repd_scan_pending(struct rnfs_rnode *rnode);
+
+static int repd_wait(unsigned int seq, struct rnfs_rnode *rnode2)
+{
+	struct repd_update *update2;
+
+	if (!rnode2) {
+		printk("XXX: can not find rnode for REP_OP_WAIT\n");
+		return -EINVAL;
+	}
+	dprintk("REP_OP_WAIT: inode %ld, seq %u\n", rnode2->fi_inode->i_ino, seq);
+	if (!(update2 = kmalloc(sizeof(*update2), GFP_KERNEL)))
+		return -ENOMEM;
+	update2->opnum = REP_OP_NONE;
+	update2->data = NULL;
+	update2->path = NULL;
+	update2->seq = seq;
+	if (seq > (rnode2->update->seq + 1)) {
+		repd_enter_pending(rnode2, update2);
+	} else if (seq == (rnode2->update->seq + 1)) {
+		repd_rnode_update(rnode2, update2);
+		repd_scan_pending(rnode2);
+	} else {
+		kfree(update2);
+	}
+	return 0;
+}
+
+static int repd_proc_op(struct dentry *dentry, unsigned int opnum, void *data)
+{
+   struct rep_create *create;
+	struct repd_wait *rep_wait;
+   int status = 0;
+
+	//dprintk("repd_proc_op: dentry %ld, opnum %u\n", dentry->d_inode->i_ino, opnum);
+   switch (opnum) {
+      case REP_OP_CREATE:
+         create = (struct rep_create *) data;
+			if ((create->cr_type == NFS3_CREATE_UNCHECKED) || (create->cr_type == NFS3_CREATE_GUARDED) || (create->cr_type == NFS3_CREATE_EXCLUSIVE)) {
+            status = repd_create_v3(dentry, create);
+         } else {
+            status = repd_create(dentry, create);
+         }
+         break;
+      case REP_OP_REMOVE:
+         status = repd_remove(dentry, (struct rep_remove *) data);
+         break;
+      case REP_OP_SETATTR:
+         status = repd_setattr(dentry, (struct iattr *) data);
+         break;
+      case REP_OP_RENAME:
+         status = repd_rename(dentry, (struct repd_rename *) data);
+         break;
+      case REP_OP_LNK:
+         status = repd_lnk(dentry, (struct repd_rename *) data);
+         break;
+		case REP_OP_WAIT:
+			rep_wait = (struct repd_wait *) data;
+			status = repd_wait(rep_wait->seq, rep_wait->rnode);
+			break;
+      default:
+         break;
+   }
+	if (status)
+		dprintk("XXX: failed operation, status %d\n", status);
+	return status;
+}
+
+void repd_release(struct repd_update *update)
+{
+	unsigned int opnum;
+	void *data;
+
+	if (!update)   return;
+	opnum = update->opnum;
+	data = update->data;
+   //dprintk("repd_release opnum %u\n", opnum);
+   switch (opnum) {
+      case REP_OP_CLOSE:
+         rep_release_close((struct rep_close *) data);
+         break;
+      case REP_OP_WRITE:
+         repd_release_write((struct repd_write *) data);
+         break;
+      case REP_OP_CREATE:
+         rep_release_create((struct rep_create *) data);
+         break;
+      case REP_OP_REMOVE:
+         rep_release_remove((struct rep_remove *) data);
+         break;
+      case REP_OP_SETATTR:
+         rep_release_setattr((struct iattr *) data);
+         break;
+      case REP_OP_LNK:
+      case REP_OP_RENAME:
+         repd_release_rename((struct repd_rename *) data);
+         break;
+      case REP_OP_OPEN:
+         repd_release_open((struct repd_open *) data);
+         break;
+		case REP_OP_WAIT:
+         repd_release_wait((struct repd_wait *) data);
+			break;
+      default:
+         break;
+   }
+	if (update->path)
+		kfree(update->path);
+	if (update)
+		kfree(update);
+}
+
+#define REPD_APPLY_OP \
+			if ((rnode->update->opnum == REP_OP_RENAME) || (rnode->update->opnum == REP_OP_LNK)) { \
+				struct repd_rename *rename = (struct repd_rename *) rnode->update->data; \
+				if (rename->rnode) \
+					repd_wait(rename->seq, rename->rnode); \
+			} \
+			repd_proc_op(dentry, rnode->update->opnum, rnode->update->data); \
+			if (!((rnode->update->opnum == REP_OP_SETATTR) && (((struct iattr *) rnode->update->data)->ia_valid & ATTR_MTIME))) \
+			if (timespec_compare(&dentry->d_inode->i_mtime, &rnode->update->mtime) < 0) \
+				dentry->d_inode->i_mtime = rnode->update->mtime; \
+			if ((rnode->update->opnum == REP_OP_RENAME) || (rnode->update->opnum == REP_OP_LNK)) { \
+				struct repd_rename *rename = (struct repd_rename *) rnode->update->data; \
+				if (rename->tdentry) \
+					if (timespec_compare(&rename->tdentry->d_inode->i_mtime, &rnode->update->mtime) < 0) \
+						rename->tdentry->d_inode->i_mtime = rnode->update->mtime; \
+			} \
+			if ((rnode->update->opnum == REP_OP_CREATE) || (rnode->update->opnum == REP_OP_CREATE_V3)) { \
+				struct rnfs_rnode *rnode2; \
+				struct xdr_netobj tpfh; \
+				struct dentry *dchild; \
+				struct rep_create *create = (struct rep_create *) rnode->update->data; \
+				tpfh.len = 0; \
+				tpfh.data = NULL; \
+				dchild = lookup_one_len(create->cr_name, dentry, create->cr_namelen); \
+				if (!dchild || IS_ERR(dchild)) \
+					printk("XXX: create failed, err dchild\n"); \
+				else if (!dchild->d_inode) { \
+					printk("XXX: create failed, NULL inode\n"); \
+					dput(dchild); \
+				} else if (S_ISREG(dchild->d_inode->i_mode)) { \
+					rnode2 = alloc_rnode(dchild, rnode->primary, rnode->exp, &tpfh); \
+					rnode2->rflags |= RNODE_CREATED; \
+					dput(dchild); \
+					rnode_put(rnode2); \
+				} \
+			}
+
+void repd_rnode_update(struct rnfs_rnode *rnode, struct repd_update *update)
+{
+	struct dentry *dentry;
+
+	if ((rnode->update) && (rnode->update->opnum != REP_OP_NONE) && !(update && (update->opnum == REP_OP_CANCEL))) {
+		struct nameidata nd;
+		memset(&nd, 0, sizeof(nd));
+		if ((rnode->update->opnum == REP_OP_RENAME) || (rnode->update->opnum == REP_OP_LNK)) {
+			struct repd_rename *rename = (struct repd_rename *) rnode->update->data;
+			struct rnfs_rnode *trnode;
+			trnode = (rename->rnode) ? rename->rnode : rnode;
+			if ((rename->tpath) && (strlen(rename->tpath) > 1)) {
+				dprintk("rename tpath %s\n", rename->tpath);
+				nd.mnt = mntget(trnode->exp->ex_mnt);
+				nd.dentry = d_find_alias(trnode->fi_inode);
+				if (link_path_walk(rename->tpath, &nd)) {
+					printk("XXX: fail in link_path_walk\n");
+					return;
+				}
+				rename->tdentry = dget(nd.dentry);
+				path_release(&nd);
+			} else if ((rename->tpath) || (rename->rnode))
+				rename->tdentry = d_find_alias(trnode->fi_inode);
+   		dprintk("rnode_update: rename sname %s, slen %d, tname %s, tlen %d\n", rename->sname, rename->snamelen, rename->tname, rename->tnamelen);
+		}
+		current->fsuid = rnode->update->uid;
+		current->fsgid = rnode->update->gid;
+		if (rnode->update->path && (strlen(rnode->update->path) > 1)) {
+			dprintk("rnode update path %s, inode %ld\n", rnode->update->path, rnode->fi_inode->i_ino);
+			memset(&nd, 0, sizeof(nd));
+			nd.mnt = mntget(rnode->exp->ex_mnt);
+			nd.dentry = d_find_alias(rnode->fi_inode);
+			if (link_path_walk(rnode->update->path, &nd)) {
+				printk("XXX: fail in link_path_walk\n");
+				return;
+			}
+			dentry = nd.dentry;
+			REPD_APPLY_OP;
+			path_release(&nd);
+		} else {
+			dentry = d_find_alias(rnode->fi_inode);
+			REPD_APPLY_OP;
+			dput(dentry);
+		}
+	}
+
+	repd_release(rnode->update);
+   rnode->update = update;
+	if (update && (update->opnum == REP_OP_WRITE)) {
+		if (!rnode->file) {
+			dentry = d_find_alias(rnode->fi_inode);
+			rnode->file = dentry_open(dentry, mntget(rnode->exp->ex_mnt), O_RDWR);
+			if (!rnode->file || IS_ERR(rnode->file)) {
+				printk("XXX: fail in dentry_open\n");
+				return;
+			}
+		}
+		repd_write(rnode->file, (struct repd_write *) update->data);
+	}
+	if (update && (update->opnum == REP_OP_CLOSE))
+		rnode_close(rnode, update);
+}
+
+static void repd_print_pending(struct rnfs_rnode *rnode)
+{
+   struct repd_pending_entry *entry;
+   struct list_head *pos;
+   list_for_each(pos, &rnode->pendings){
+      entry = list_entry(pos, typeof(*entry), list);
+      dprintk("%s: seq %d\n", __FUNCTION__, entry->update->seq);
+		break;
+   }
+}
+
+static void repd_enter_pending(struct rnfs_rnode *rnode, struct repd_update *update)
+{
+   struct list_head *pos;
+   struct repd_pending_entry *rargp, *entry;
+
+   dprintk("%s: entering seq %d\n", __FUNCTION__, update->seq);
+   rargp = kmalloc(sizeof(*rargp), GFP_KERNEL);
+	rargp->update = update;
+   INIT_LIST_HEAD(&rargp->list);
+   list_for_each_prev(pos, &rnode->pendings){
+      entry = list_entry(pos, typeof(*entry), list);
+      if(entry->update->seq < update->seq)
+         break;
+		if (entry->update->seq == update->seq) {
+			if (update->opnum == REP_OP_WRITE)
+				update->data = NULL;
+			repd_release(update);
+			kfree(rargp);
+			repd_print_pending(rnode);
+			return;
+		}
+   }
+   list_add(&rargp->list, pos);
+	repd_print_pending(rnode);
+}
+
+static int open_in_update(struct rnfs_rnode *rnode, struct xdr_netobj *fhp)
+{
+	struct nameidata nd;
+	struct rnfs_rnode *rnode2;
+	struct svc_fh fh;
+	struct repd_open *open = (struct repd_open *) rnode->update->data;
+	struct xdr_netobj *resp = fhp;
+
+	dprintk("open_in_update: path %s, dentry %ld\n", rnode->update->path, rnode->fi_inode->i_ino);
+	nd.mnt = mntget(rnode->exp->ex_mnt);
+	nd.dentry = d_find_alias(rnode->fi_inode);
+	if (link_path_walk(rnode->update->path, &nd)) {
+		printk("XXX: fail in link_path_walk\n");
+		return -EINVAL;
+	}
+	if (!(rnode2 = rnfs_openfile_rnode(nd.dentry, rnode->primary, rnode->exp, (struct xdr_netobj *) &open->primary_fh, open->seq))) {
+		printk("XXX: open_in_update: fail in rnfs_openfile_rnode\n");
+		path_release(&nd);
+		return -EAGAIN;
+	}
+	fh_init(&fh, NFS_FHSIZE);
+	if (fh_compose(&fh, rnode2->exp, nd.dentry, NULL)) {
+		printk("fail in fh_compose\n");
+		fh_put(&fh);
+		path_release(&nd);
+		rnode_put(rnode2);
+		return -EAGAIN;
+	}
+	//dprintk("get fh handle: fh is (%s)\n", SVCFH_fmt(&fh));
+	if (!resp) {
+		if (!(resp = kmalloc(sizeof(*resp), GFP_KERNEL)))
+			return -ENOMEM;
+	}
+	resp->len = fh.fh_handle.fh_size;
+	resp->data = kmalloc(fh.fh_handle.fh_size, GFP_KERNEL);
+	memcpy(resp->data, &fh.fh_handle.fh_base, fh.fh_handle.fh_size);
+	if (!fhp)
+		repclnt_send_one(REPPROC_FORWARD, rnode2, REP_OP_CALLBACK, resp, NULL);
+   fh_put(&fh);
+	path_release(&nd);
+	rnode_put(rnode2);
+	return 0;
+}
+
+static void repd_scan_pending(struct rnfs_rnode *rnode)
+{
+   struct repd_pending_entry *entry;
+	//dprintk("repd_scan_pending\n");
+   while (!list_empty(&rnode->pendings)) {
+      entry = list_entry(rnode->pendings.next, struct repd_pending_entry, list);
+      if (entry->update->seq > (rnode->update->seq + 1))
+         break;
+		//dprintk("repd_scan_pending: entry seq %d, rnode seq %d\n", entry->update->seq, rnode->update->seq);
+      list_del(&entry->list);
+		repd_rnode_update(rnode, entry->update);
+		if (entry->update->opnum == REP_OP_WRITE) {
+			struct repd_write *write = (struct repd_write *) entry->update->data;
+			int v;
+			struct page *page;
+			for (v=0; v < write->vlen; v++) {
+				page = virt_to_page(write->vec[v].iov_base);
+				put_page(page);
+			}
+		}
+      kfree(entry);
+		if (rnode->update && (rnode->update->opnum == REP_OP_OPEN)) {
+			open_in_update(rnode, NULL);
+		}
+   }
+}
+
+static LIST_HEAD(delayed_open_rqts);
+
+void repd_enter_open(struct cache_req *req)
+{
+	struct delayed_open_entry *entry;
+	dprintk("repd_enter_open\n");
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);	
+	entry->dreq = req->defer(req);
+	INIT_LIST_HEAD(&entry->list);
+	list_add_tail(&entry->list, &delayed_open_rqts);
+}
+
+void repd_scan_openlist(void)
+{
+	struct delayed_open_entry *entry, *n;
+	struct cache_deferred_req *dreq;
+	list_for_each_entry_safe(entry, n, &delayed_open_rqts, list) {
+		list_del(&entry->list);
+		dreq = entry->dreq;
+		kfree(entry);
+		dreq->revisit(dreq, 0);
+	}
+}
+
+static struct rnfs_rnode *
+path2rnode(struct svc_rqst *rqstp, char *rpath, unsigned int rpathlen, char *path, unsigned int pathlen, struct xdr_netobj *primary_fh, struct svc_fh *fhp, unsigned int single, unsigned int seq)
+{
+   char *file_name = NULL, *p;
+   char rootpath[256];
+   struct svc_export *rexp = NULL, *exp = NULL;
+	struct nameidata nd;
+   int len, status = -EINVAL;
+	struct rnfs_rnode *rnode = NULL;
+
+   rexp = get_rootexp(rqstp->rq_client, &rqstp->rq_chandle);
+   if (!rexp || IS_ERR(rexp)) {
+      status = (rexp) ? PTR_ERR(rexp) : -EINVAL;
+      dprintk("%s: get rootexp err %d\n", __FUNCTION__, status);
+      goto out;
+   }
+   p = d_path(rexp->ex_dentry, rexp->ex_mnt, rootpath, 256);
+   exp_put(rexp);
+   if (IS_ERR(p)) {
+      status = PTR_ERR(p);
+      goto out;
+   }
+	len = strlen(p) + rpathlen + pathlen;
+   if (len > PATH_MAX) {
+      printk("too long pathname\n");
+      goto out;
+   }
+   file_name = kmalloc(len+1, GFP_KERNEL);
+   if (!file_name) {
+      status = -ENOMEM;
+      goto out;
+   }
+
+   memcpy(file_name, p, strlen(p));
+   p = file_name + strlen(p);
+   memcpy(p, rpath, rpathlen);
+   p = p + rpathlen;
+	*p = '\0';
+
+   if ((status = path_lookup(file_name, 0, &nd))) {
+      printk("XXX: fail in lookup, exp path %s, err %d\n", file_name, status);
+      goto out;
+   }
+   exp = exp_parent(rqstp->rq_client, nd.mnt, nd.dentry, &rqstp->rq_chandle);
+	path_release(&nd);
+	if (!exp || IS_ERR(exp)) {
+		printk("can not get exp structure\n");
+		goto out;
+	}
+
+   memcpy(p, path, pathlen);
+	p = p + pathlen;
+	*p = '\0';
+   dprintk("get fullpath %s\n", file_name);
+   if ((status = path_lookup(file_name, 0, &nd))) {
+		struct dentry *dentry, *parent;
+      printk("XXX: fail in lookup, path %s, err %d\n", file_name, status);
+		while (*p != '/')
+			p--;
+		*p = '\0';
+      printk("try path %s\n", file_name);
+		if ((status = path_lookup(file_name, 0, &nd)))
+      	goto out;
+		dentry = dget(nd.dentry);
+		while(!(rnode = rnfs_get_rnode(rqstp, dentry->d_inode))) {
+      	if ((dentry == exp->ex_dentry) || IS_ROOT(dentry))
+         	break;
+			parent = dget_parent(dentry);
+			dput(dentry);
+			dentry = parent;
+		}
+		dput(dentry);
+		if (rnode) {
+			printk("wait parent to release, rnode %ld\n", rnode->fi_inode->i_ino);
+			rnode->rflags |= RNODE_WAIT_CLOSE;
+			rnode_put(rnode);
+			rnode = NULL;
+			repd_enter_open(&rqstp->rq_chandle);
+		} else {
+			printk("XXX: can not find parent rnode\n");
+		}
+		path_release(&nd);
+		goto out;
+   }
+
+	if ((rnode = rnfs_open_rnode(rqstp, nd.dentry, exp, primary_fh, single, seq, path, pathlen))&& !IS_ERR(rnode)) {
+		fh_init(fhp, NFS_FHSIZE);
+		if ((len = fh_compose(fhp, exp, nd.dentry, NULL))) {
+			dprintk("XXX: fail in fh_compose: err %d\n", len);
+			rnode = NULL;
+			fh_put(fhp);
+		}
+      //dprintk("get fh handle: fh is (%s)\n", SVCFH_fmt(fhp));
+	}
+	path_release(&nd);
+
+ out:
+	if (file_name)
+   	kfree(file_name);
+   if (exp && !IS_ERR(exp)) {
+      exp_put(exp);
+	}
+   return rnode;
+}
+
+
+static struct rnfs_rnode *
+fh2rnode(struct svc_rqst *rqstp, unsigned int fh_size, char *fh_base, int *err)
+{
+	struct rnfs_rnode *rnode;
+	struct svc_fh fh;
+   fh_init(&fh, NFS_FHSIZE);
+   fh.fh_handle.fh_size = fh_size;
+   memcpy(&fh.fh_handle.fh_base, fh_base, fh_size);
+	*err = fh_verify(rqstp, &fh, 0, MAY_NOP);
+   if (*err) {
+      dprintk("fail in rfh_verify: err is %d, fh is (%s)\n", ntohl(*err), SVCFH_fmt(&fh));
+		fh_put(&fh);
+		return NULL;
+   }
+	//dprintk("fh2rnode: inode %ld\n", fh.fh_dentry->d_inode->i_ino);
+	rnode = rnfs_get_rnode(rqstp, fh.fh_dentry->d_inode);
+	if (!rnode)
+		*err = nfserrno(-EINVAL);
+	fh_put(&fh);
+	return rnode;
+}
+
+static struct rnfs_pnode *
+fh2pnode(struct svc_rqst *rqstp, unsigned int fh_size, char *fh_base, struct svc_fh *fhp)
+{
+	int err;
+   fh_init(fhp, NFS_FHSIZE);
+   fhp->fh_handle.fh_size = fh_size;
+   memcpy(&fhp->fh_handle.fh_base, fh_base, fh_size);
+   if ((err = fh_verify(rqstp, fhp, 0, MAY_NOP))) {
+      printk("fail in rfh_verify: err is %d, fh is %s\n", ntohl(err), fh_base);
+		return NULL;
+   }
+	if (fhp->fh_dentry && fhp->fh_dentry->d_inode)
+		dprintk("repsvc_proc_forward: fh2pnode %ld\n", fhp->fh_dentry->d_inode->i_ino);
+	else
+		dprintk("repsvc_proc_forward: NULL inode\n");
+	return rnfs_get_pnode(fhp->fh_dentry->d_inode);
+}
+
+/*
+static int rep_open_create(struct svc_fh *fhp, struct repd_update *update, struct rnfs_rnode *rnode)
+{
+	struct rep_create *create = (struct rep_create *) update->data;
+	struct dentry *dentry = fhp->fh_dentry, *ndentry = NULL;
+	struct rnfs_rnode *rnode2;
+	int status;
+
+	current->fsuid = update->uid;
+	current->fsgid = update->gid;
+	if ((create->cr_type == NFS3_CREATE_UNCHECKED) || (create->cr_type == NFS3_CREATE_GUARDED) || (create->cr_type == NFS3_CREATE_EXCLUSIVE)) {
+		status = repd_create_v3(dentry, create);
+	} else {
+		status = repd_create(dentry, create);
+	}
+	if (status) {
+		printk("XXX: fail in repd_create: err %d\n", status);
+		fh_put(fhp);
+		goto out;
+	}
+   ndentry = lookup_one_len(create->cr_name, fhp->fh_dentry, create->cr_namelen);
+	fh_put(fhp);
+
+	if ((rnode2 = rnfs_openfile_rnode(ndentry, rnode->primary, rnode->exp, &rnode->primary_fh, update->seq))) {
+		fh_init(fhp, NFS_FHSIZE);
+		if ((status = fh_compose(fhp, rnode->exp, ndentry, NULL))) {
+			dprintk("XXX: fail in fh_compose: err %d\n", status);
+			rnode_put(rnode2);
+			fh_put(fhp);
+		}
+		rnode_put(rnode2);
+	} else {
+		dprintk("XXX: rep_open_create: fail in rnfs_openfile_rnode\n");
+	}
+
+ out:
+	rnode_close(rnode, NULL);
+   if (ndentry && !IS_ERR(ndentry))
+      dput(ndentry);
+	return status;
+}
+*/
+
+int repsvc_proc_open(struct svc_rqst *rqstp, struct repd_openargs *argp, struct repd_openres *resp)
+{
+	struct rnfs_rnode *rnode, *rnode2 = NULL;
+	struct svc_fh fh;
+	struct nfsd4_fs_locations *replist;
+	dprintk("repsvc_proc_open: arg seq %u, opnum %u, single %u\n", argp->update->seq, argp->update->opnum, argp->single);
+	resp->seq = argp->update->seq;
+	resp->opnum = argp->update->opnum;
+	resp->fh[0].len = resp->fh[1].len = 0;
+	resp->fh[0].data = resp->fh[1].data = NULL;
+	if ((argp->update->opnum == REP_OP_RENAME) || (argp->update->opnum == REP_OP_LNK)) {
+		struct repd_rename *rename = (struct repd_rename *) argp->update->data;
+		if (rename->tpath) {
+			if ((rnode2 = path2rnode(rqstp, argp->rpath, argp->rpathlen, rename->tpath, strlen(rename->tpath), &rename->tprimary_fh, &fh, 1, argp->update->seq)) && !IS_ERR(rnode2)) {
+				kfree(rename->tpath);
+				rename->tpath = NULL;
+				resp->fh[1].len = fh.fh_handle.fh_size;
+				resp->fh[1].data = kmalloc(fh.fh_handle.fh_size, GFP_KERNEL);
+				memcpy(resp->fh[1].data, &fh.fh_handle.fh_base, fh.fh_handle.fh_size);
+				fh_put(&fh);
+				if (!(rnode2->update = kmalloc(sizeof(struct repd_update), GFP_KERNEL)))
+					return nfserrno(-ENOMEM);
+				rnode2->update->opnum = REP_OP_NONE;
+				rnode2->update->data = NULL;
+				rnode2->update->seq = argp->update->seq;
+				rnode2->update->path = NULL;
+				rename->rnode = rnode2;
+			} else {
+				int status;
+				dprintk("fail to get rnode for rename\n");
+				repd_release(argp->update);
+				if (argp->primary_fh.data) kfree(argp->primary_fh.data);
+				status = rnode2 ? PTR_ERR(rnode2) : -EAGAIN;
+				return nfserrno(status);
+			}
+		}
+	}
+
+	if ((rnode = path2rnode(rqstp, argp->rpath, argp->rpathlen, argp->path, argp->pathlen, &argp->primary_fh, &fh, argp->single, argp->update->seq)) && !IS_ERR(rnode)) {
+		replist = rnode->exp->ex_fslocs;
+		replist->locations[rnode->primary].openseq = argp->update->seq;
+		rnode->update = argp->update;
+/*
+		if (argp->update->opnum == REP_OP_CREATE) {
+			if (rep_open_create(&fh, argp->update, rnode))
+				return nfserrno(-EAGAIN);
+		}
+*/
+		resp->fh[0].len = fh.fh_handle.fh_size;
+		resp->fh[0].data = kmalloc(fh.fh_handle.fh_size, GFP_KERNEL);
+		memcpy(resp->fh[0].data, &fh.fh_handle.fh_base, fh.fh_handle.fh_size);
+		fh_put(&fh);
+		if (!list_empty(&delayed_open_rqts))
+			repd_scan_openlist();
+		rnode_put(rnode);
+		return 0;
+	} else {
+		int status;
+		status = rnode ? PTR_ERR(rnode) : -EAGAIN;
+		dprintk("fail to get rnode for open %d\n", status);
+		resp->fh[1].len = 0;
+		if (resp->fh[1].data && (status == -EAGAIN))
+			kfree(resp->fh[1].data);
+		if (rnode2)
+			rnode_put(rnode2);
+		repd_release(argp->update);
+		if (argp->primary_fh.data) kfree(argp->primary_fh.data);
+		return nfserrno(status);
+	}
+}
+
+int repsvc_proc_update(struct svc_rqst *rqstp, struct repd_updateargs *argp, struct repd_updateres *resp)
+{
+	struct rnfs_rnode *rnode;
+	int err = 0;
+
+	resp->seq = argp->update->seq;
+	resp->opnum = argp->update->opnum;
+	resp->fh.data = NULL;
+	resp->fh.len = 0;
+	if ((rnode = fh2rnode(rqstp, argp->fh_size, argp->fh_base, &err))) {
+		dprintk("repsvc_proc_update: arg seq %u, opnum %u, rnode %ld\n", argp->update->seq, argp->update->opnum, rnode->fi_inode->i_ino);
+		if (argp->update->opnum == REP_OP_WAIT) {
+			struct repd_wait *wait = (struct repd_wait *) argp->update->data;
+			if (!(wait->rnode=fh2rnode(rqstp, wait->fh_size, wait->fh_base, &err))) {
+				rnode_put(rnode);
+				goto out;
+			}
+		}
+		if ((argp->update->opnum == REP_OP_RENAME) || (argp->update->opnum == REP_OP_LNK)) {
+			struct repd_rename *rename = (struct repd_rename *) argp->update->data;
+			struct rnfs_rnode *rnode2;
+			if (rename->tprimary_fh.data) {
+				rnode2=fh2rnode(rqstp, rename->tprimary_fh.len, rename->tprimary_fh.data, &err);
+				if (!rnode2) {
+					rnode_put(rnode);
+					goto out;
+				}
+				rename->rnode = rnode2;
+				dprintk("get the second rnode for open2 %ld\n", rnode2->fi_inode->i_ino);
+			}
+		}
+		if ((!rnode->update) || (argp->update->seq <= rnode->update->seq) || (argp->update->seq > (rnode->update->seq + RNFS_MAX_SEQ))) {
+			if (argp->update->opnum == REP_OP_CLOSE) {
+				dprintk("XXX: delayed close?\n");
+			} else
+				dprintk("invalid seq number\n");
+			repd_release(argp->update);
+		} else if (argp->update->seq > (rnode->update->seq + 1)) {
+			/* XXX hope this does not cause distributed deadlock, i.e. the waited rqt
+			 * stucks at sender waiting for a free slot */
+         if (argp->update->opnum == REP_OP_WRITE) {
+				struct repd_write *write = (struct repd_write *) argp->update->data;
+				int v;
+				struct page *p;
+				dprintk("delayed write request, argp->update->seq %u, rnode->update->seq %u, vlen %d\n", argp->update->seq, rnode->update->seq, write->vlen);
+				for (v=0; v < write->vlen; v++) {
+					p = alloc_page(GFP_KERNEL);
+					rqstp->rq_argpages[v] = p;
+				}
+         }
+			repd_enter_pending(rnode, argp->update);
+		} else {
+			if (rnode->update->opnum == REP_OP_OPENFAIL) {
+				rnode_close(rnode, NULL);
+			} else {
+				repd_rnode_update(rnode, argp->update);
+				if (rnode->update->opnum == REP_OP_OPEN)
+					open_in_update(rnode, &resp->fh);
+				repd_scan_pending(rnode);
+			}
+		}
+		rnode_put(rnode);
+		return nfserrno(err);
+	}
+
+out:
+	dprintk("XXX: repsvc_proc_update: arg seq %u, opnum %u, err %d, can not get rnode\n", argp->update->seq, argp->update->opnum, ntohl(err));
+	if (argp->update->opnum == REP_OP_CLOSE)
+		err = 0;
+	repd_release(argp->update);
+	return err;
+}
+
+static int
+repd_fwrite(struct svc_rqst *rqstp, struct svc_fh *fhp, struct repd_write *argp)
+{
+	int err = 1;
+	dprintk("repd_fwrite\n");
+	err = nfsd_write(rqstp, fhp, NULL, argp->offset, argp->vec, argp->vlen, argp->count, &err);
+	return err;
+}
+
+static int repd_callback(struct svc_rqst *rqstp, struct rnfs_pnode *pnode, struct xdr_netobj *fh)
+{
+	struct nfsd4_fs_locations *replist = pnode->exp->ex_fslocs;
+   int i;
+	dprintk("repd_callback\n");
+	if (fh->len == 0)
+		return -EINVAL;
+   for (i=0; i<replist->locations_count; i++) {
+      if (replist->locations[i].sin_addr.s_addr == rqstp->rq_addr.sin_addr.s_addr) {
+			dprintk("repd_callback: index i %d, fhlen %u, pnode %ld\n", i, fh->len, pnode->fi_inode->i_ino);
+			pnode->fh[i].len = fh->len;
+			pnode->fh[i].data = fh->data;
+   		pnode->ack_num ++;
+   		if (pnode->ack_num == replist->locations_count) {
+      		pnode->pflags |= PNODE_COMPLETE;
+      		if ((pnode->pflags & PNODE_WAIT_COMPLETE)) {
+         		wake_up_interruptible(&pnode->pwait);
+         		pnode->pflags &= ~PNODE_WAIT_COMPLETE;
+      		}
+   		}
+			if (!list_empty(&replist->locations[i].rnfs_list))
+				queue_work(replist->locations[i].rnfs_wq, &replist->locations[i].rnfs_work);
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+int repsvc_proc_forward(struct svc_rqst *rqstp, struct repd_forwardargs *argp, struct repd_forwardres *resp)
+{
+	struct rnfs_pnode *pnode;
+	struct svc_fh fh;
+	int status = -EINVAL;
+	dprintk("repsvc_proc_forward: opnum %d\n", argp->opnum);
+	resp->opnum = argp->opnum;
+	resp->data = NULL;
+	if ((pnode = fh2pnode(rqstp, argp->fh_size, argp->fh_base, &fh))) {
+		current->fsuid = argp->uid;
+		current->fsgid = argp->gid;
+		switch (argp->opnum) {
+		case REP_OP_CALLBACK:
+			status = repd_callback(rqstp, pnode, (struct xdr_netobj *) argp->data);
+			kfree(argp->data);
+			break;
+		case REP_OP_WRITE:
+			status = repd_fwrite(rqstp, &fh, (struct repd_write *) argp->data);
+			kfree(argp->data);
+			break;
+		case REP_OP_SETATTR:
+			status = nfsd_setattr(rqstp, &fh, (struct iattr *) argp->data, 0, (time_t)0);
+			kfree(argp->data);
+			break;
+		case REP_OP_FREAD:
+			if (repd_wait_close(pnode)) {
+				long len;
+				int v, pn;
+				struct kvec vec[RPCSVC_MAXPAGES];
+				unsigned long *count;
+				struct rep_fread *readp = (struct rep_fread *) argp->data;
+   			len = readp->count;
+   			v = 0;
+   			while (len > 0) {
+      			pn = rqstp->rq_resused;
+      			svc_take_page(rqstp);
+      			vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+      			vec[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE;
+      			v++;
+					len -= PAGE_SIZE;
+   			}
+				count = kmalloc(sizeof(unsigned long), GFP_KERNEL);
+				*count = readp->count;
+				resp->data = count;
+				status = nfsd_read(rqstp, &fh, NULL, readp->offset, vec, v, resp->data);
+				dprintk("rep_op_fread: status %d, len %ld, count %lu, v %d\n", status, readp->count, *count, v);
+				if (status) {
+					kfree(resp->data);
+					resp->data = NULL;
+				}
+			}
+			kfree(argp->data);
+			break;
+		case REP_OP_FGETATTR:
+			dprintk("rep_op_fgetattr\n");
+			if (repd_wait_close(pnode)) {
+				struct kstat *statp;
+				statp = kmalloc(sizeof(struct kstat), GFP_KERNEL);
+				resp->data = statp;
+				status = vfs_getattr(fh.fh_export->ex_mnt, fh.fh_dentry, statp);
+				dprintk("rep_op_fgetattr: status %d\n", status);
+				if (status) {
+					kfree(resp->data);
+					resp->data = NULL;
+				}
+			}
+			break;
+		case REP_OP_NONE:
+		case REP_OP_CREATE:
+		case REP_OP_REMOVE:
+		case REP_OP_RENAME:
+		case REP_OP_LNK:
+			if (repd_wait_close(pnode))
+				status = 0;
+			break;
+		default:
+			status = 0;
+			break;
+		}
+		pnode_put(pnode);
+	} else {
+		dprintk("XXX: repsvc_proc_forward: fail in fh2pnode, status %d\n", status);
+	}
+	fh_put(&fh);
+	return status;
+}
+
diff -puN /dev/null fs/nfsd/nfs4repd.h
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repd.h	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,85 @@
+
+#define REPD_PORT          1028
+//#define REPD_BUFSIZE      (1024 + NFSSVC_MAXBLKSIZE)
+#define REPD_BUFSIZE			(1024*1024)
+extern struct svc_program  repsvc_program;
+extern struct work_struct repd_delayed_op;
+
+struct repd_update {
+	unsigned int			seq;
+	unsigned int			opnum;
+	void						*data;
+	char						*path;
+	uid_t						uid;
+	gid_t						gid;
+	struct timespec      mtime;
+};
+
+struct repd_pending_entry {
+	struct repd_update	*update;
+	struct list_head		list;
+};
+
+struct repd_delayed_op {
+	struct list_head     list;
+	struct dentry			*dentry;
+	unsigned int			opnum;
+	void						*data;
+	uid_t						uid;
+	gid_t						gid;
+};
+
+struct delayed_open_entry {
+   struct list_head  list;
+   struct cache_deferred_req *dreq;
+};
+
+struct repd_openargs {
+	struct repd_update	*update;
+   u32                  rpathlen;
+   char                 *rpath;
+   u32                  pathlen;
+	char						*path;
+	struct xdr_netobj		primary_fh;
+	unsigned int			single;
+};
+
+struct repd_updateargs {
+	struct repd_update	*update;
+   u32                  fh_size;
+   char                 *fh_base;
+};
+
+struct repd_forwardargs {
+   u32                  fh_size;
+   char                 *fh_base;
+   unsigned int         opnum;
+   void                 *data;
+	uid_t						uid;
+	gid_t						gid;
+};
+
+struct repd_openres {
+	struct xdr_netobj	fh[2];
+	unsigned int		opnum;
+	unsigned int		seq;
+};
+
+struct repd_updateres {
+	struct xdr_netobj	fh;
+	unsigned int		opnum;
+	unsigned int		seq;
+};
+
+struct repd_forwardres {
+   unsigned int         opnum;
+   void                 *data;
+};
+
+int repsvc_proc_open(struct svc_rqst *rqstp, struct repd_openargs *argp, struct repd_openres *resp);
+int repsvc_proc_update(struct svc_rqst *rqstp, struct repd_updateargs *argp, struct repd_updateres *resp);
+int repsvc_proc_forward(struct svc_rqst *rqstp, struct repd_forwardargs *argp, struct repd_forwardres *resp);
+void repd_release(struct repd_update *update);
+void repd_rnode_update(struct rnfs_rnode *rnode, struct repd_update *update);
+struct svc_export *rexp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp);
+void repd_enter_open(struct cache_req *req);
diff -puN /dev/null fs/nfsd/nfs4repxdr.c
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/fs/nfsd/nfs4repxdr.c	2007-03-03 16:21:47.000000000 -0500
@@ -0,0 +1,1062 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/inet.h>
+#include <linux/nfs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/export.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/cache.h>
+#include <linux/sunrpc/cache.h>
+#include <linux/nfsd/syscall.h>
+
+#include "nfs4repstate.h"
+#include "nfs4replication.h"
+#include "nfs4repclnt.h"
+#include "nfs4repd.h"
+
+#define NFSDDBG_FACILITY   NFSDDBG_REPCLNT
+
+/* repclnt side xdr functions */
+#define REP_PROGRAM        100036
+#define REP_MAX_TCP_TIMEOUT     (600*HZ)
+
+struct rpc_program   repclnt_program;
+struct rpc_procinfo repclnt_procedures[];
+static int proto = IPPROTO_TCP;
+static u32 version=1;
+static rpc_authflavor_t flavor=RPC_AUTH_NULL;
+static struct rpc_timeout  timeparms = {
+   .to_initval = 600 * HZ / 10,
+   .to_retries = 2,
+   .to_maxval = REP_MAX_TCP_TIMEOUT,
+   .to_exponential = 1,
+};
+
+typedef void	(*repclnt_argsrel)(void *);
+
+/* rpc send rutine */
+int repclnt_bind(struct nfsd4_fs_location *rep)
+{
+	struct rpc_xprt *xprt;
+	int err, i;
+	struct sockaddr_in addr;
+
+	if (rep->clnt && !IS_ERR(rep->clnt))
+		rpc_shutdown_client(rep->clnt);
+	rep->clnt = NULL;
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(REPD_PORT);
+	addr.sin_addr.s_addr = rep->sin_addr.s_addr;
+	xprt = xprt_create_proto(proto, &addr, &timeparms);
+	if (!xprt || IS_ERR(xprt)) {
+		err = (xprt) ? PTR_ERR(xprt) : -EIO;
+		return err;
+	}
+	rep->clnt = rpc_new_client(xprt,rep->hosts,&repclnt_program,version,flavor);
+	if (!rep->clnt || IS_ERR(rep->clnt)) {
+		err = (rep->clnt) ? PTR_ERR(rep->clnt) : -EIO;
+		rep->clnt = NULL;
+		return err;
+	}
+	rep->clnt->cl_autobind = 1;
+
+	rep->dataclnt = kmalloc(sizeof(char *)*REPCLNT_DATACLNT, GFP_KERNEL);
+	for (i=0; i<REPCLNT_DATACLNT; i++) {
+	xprt = xprt_create_proto(proto, &addr, &timeparms);
+	if (!xprt || IS_ERR(xprt)) {
+		err = (xprt) ? PTR_ERR(xprt) : -EIO;
+		return err;
+	}
+	rep->dataclnt[i] = rpc_new_client(xprt,rep->hosts,&repclnt_program,version,flavor);
+	if (!rep->dataclnt[i] || IS_ERR(rep->dataclnt[i])) {
+		err = (rep->dataclnt[i]) ? PTR_ERR(rep->dataclnt[i]) : -EIO;
+		rep->dataclnt[i] = NULL;
+		return err;
+	}
+	rep->dataclnt[i]->cl_autobind = 1;
+	//rpc_setbufsize(rep->dataclnt[i], 1024*1024, 1024*1024);
+	}
+	return 0;
+}
+
+/* encode and decode functions */
+static u32 *
+encode_bytes(u32 *p, char *buf, unsigned int len)
+{
+   *p++ = htonl((u32) len);
+	if (len > 0)
+   	memcpy(p, buf, len);
+   return p + XDR_QUADLEN(len);
+}
+
+static inline u32 * encode_time(u32 *p, struct timespec *time) {
+   *p++ = htonl(time->tv_sec);
+   *p++ = htonl(time->tv_nsec);
+   return p;
+}
+
+static u32 *
+encode_time_two(u32 *p, struct timespec *time1, struct timespec *time2)
+{
+   p = encode_time(p, time1);
+   return encode_time(p, time2);
+}
+
+static inline u32 *
+encode_denyview(u32 *p, struct rep_dview *argp)
+{
+   int i;
+   *p++ = htonl(argp->deny_num);
+   for (i=0; i<argp->deny_num; i++)
+		*p++ = htonl(argp->deny_list[i]);
+   return p;
+}
+
+static inline u32 *
+encode_close(u32 *p, struct rep_close *closep)
+{
+	p = encode_time_two(p, &closep->mtime, &closep->ctime);
+	p = encode_denyview(p, &closep->dview);
+	return p;
+}
+
+static inline u32 *
+encode_write(u32 *p, struct rep_write *writep)
+{
+   *p++ = htonl( (uint32_t) ((writep->offset) >> 32) );
+   *p++ = htonl( (uint32_t) (writep->offset) );
+   *p++ = htonl( (uint32_t) (writep->count) );
+	return p;
+}
+
+static u32 *encode_setattr(u32 *p, struct iattr *iap)
+{
+  *p++ = htonl(iap->ia_valid);
+  *p++ = htonl(iap->ia_mode);
+  *p++ = htonl(iap->ia_uid);
+  *p++ = htonl(iap->ia_gid);
+  *p++ = htonl( (uint32_t) ((iap->ia_size) >> 32) );
+  *p++ = htonl( (uint32_t) (iap->ia_size) );
+   p = encode_time(p, &iap->ia_atime);
+   p = encode_time_two(p, &iap->ia_mtime, &iap->ia_ctime);
+  return p;
+}
+
+static u32 *encode_create(u32 *p, struct rep_create *createp)
+{
+	p = encode_bytes(p, createp->cr_name, createp->cr_namelen);
+   *p++ = htonl(createp->cr_type);
+   switch(createp->cr_type) {
+   case S_IFLNK:
+      p = encode_bytes(p, createp->u.link.name, createp->u.link.namelen);
+      break;
+	case S_IFSOCK:
+      *p++ = htonl(createp->u.dev.specdata1);
+      *p++ = htonl(createp->u.dev.specdata2);
+      break;
+   case NFS3_CREATE_UNCHECKED:
+   case NFS3_CREATE_GUARDED:
+   case NFS3_CREATE_EXCLUSIVE:
+      *p++ = htonl(createp->u.verf);
+      break;
+   default:
+      break;
+   }
+   return encode_setattr(p, &createp->cr_attrs);
+}
+
+static u32 *encode_remove(u32 *p, struct rep_remove *removep)
+{
+   *p++ = htonl(removep->rm_type);
+   return encode_bytes(p, removep->rm_name, removep->rm_namelen);
+}
+
+static u32 *encode_rename(u32 *p, struct rep_rename *renamep, int index)
+{
+   p = encode_bytes(p, renamep->sname, renamep->snamelen);
+   p = encode_bytes(p, renamep->tname, renamep->tnamelen);
+	dprintk("encode_rename: sname %s, slen %d, tname %s, tlen %d\n", renamep->sname, renamep->snamelen, renamep->tname, renamep->tnamelen);
+	if (renamep->tpathname) {
+   	*p++ = htonl(1);
+   	p = encode_bytes(p, renamep->tpathname, strlen(renamep->tpathname));
+	} else
+   	*p++ = htonl(0);
+	if (renamep->rqt) {
+		struct rnfs_pnode *tpnode = renamep->rqt->pnode;
+		if (tpnode) {
+   		*p++ = htonl(1);
+   		p = encode_bytes(p, tpnode->fh[index].data, tpnode->fh[index].len);
+			*p++ = htonl(renamep->rqt->seq);
+			return p;
+		}
+	}
+   *p++ = htonl(0);
+	return p;
+}
+
+static u32 *encode_open(u32 *p, struct rep_open *openp)
+{
+	if (openp) {
+		struct rnfs_pnode *pnode = openp->pnode;
+		struct nfsd4_fs_locations *replist = pnode->exp->ex_fslocs;
+   	p = encode_bytes(p, pnode->fh[replist->migrated].data, pnode->fh[replist->migrated].len);
+		*p++ = htonl(openp->seq);
+	}
+	return p;
+}
+
+static u32 *encode_wait(u32 *p, struct rep_open *openp, unsigned int index)
+{
+	if (openp) {
+		struct rnfs_pnode *pnode = openp->pnode;
+   	p = encode_bytes(p, pnode->fh[index].data, pnode->fh[index].len);
+		*p++ = htonl(openp->seq);
+	}
+	return p;
+}
+
+static u32 *encode_fh(u32 *p, struct xdr_netobj *fh)
+{
+	//dprintk("encode_fh len %u\n", fh->len);
+	return encode_bytes(p, fh->data, fh->len);
+}
+
+#define ENCODE_OP_HEADER \
+	struct rnfs_pnode *pnode; \
+	struct repclnt_request *rqt; \
+	unsigned int index_num; \
+	struct nfsd4_fs_locations *replist; \
+	struct rpc_task *task = req->rq_task; \
+	task->tk_flags |= RPC_TASK_ASYNC; \
+	if (!cdata || !cdata->rqt || !cdata->rqt->pnode) \
+		return -EIO; \
+	rqt = cdata->rqt; \
+	pnode = cdata->rqt->pnode; \
+	index_num = cdata->index_num; \
+	replist = pnode->exp->ex_fslocs; \
+	*p++ = htonl(rqt->seq);
+
+#define ENCODE_OP \
+   *p++ = htonl(rqt->opnum); \
+   switch (rqt->opnum) { \
+		case REP_OP_CLOSE: \
+			p = encode_close(p, (struct rep_close *) rqt->argp); \
+			break; \
+		case REP_OP_WRITE: \
+			p = encode_write(p, (struct rep_write *) rqt->argp); \
+			break; \
+      case REP_OP_CREATE: \
+         p = encode_create(p, (struct rep_create *) rqt->argp); \
+         break; \
+      case REP_OP_REMOVE: \
+         p = encode_remove(p, (struct rep_remove *) rqt->argp); \
+         break; \
+      case REP_OP_SETATTR: \
+         p = encode_setattr(p, (struct iattr *) rqt->argp); \
+         break; \
+      case REP_OP_LNK: \
+      case REP_OP_RENAME: \
+         p = encode_rename(p, (struct rep_rename *) rqt->argp, index_num); \
+         break; \
+		case REP_OP_OPEN: \
+			p = encode_open(p, (struct rep_open *) rqt->argp); \
+			break; \
+		case REP_OP_WAIT: \
+			p = encode_wait(p, (struct rep_open *) rqt->argp, index_num); \
+			break; \
+		case REP_OP_CALLBACK: \
+			p = encode_fh(p, (struct xdr_netobj *) rqt->argp); \
+			break; \
+      default: \
+         break; \
+   } \
+   req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); \
+   if (rqt->opnum == REP_OP_WRITE) { \
+      struct xdr_buf *sndbuf = &req->rq_snd_buf; \
+      struct rep_write *writep = (struct rep_write *) rqt->argp; \
+      xdr_encode_pages(sndbuf, writep->pages, writep->pgbase, writep->count); \
+   } \
+	return 0;
+
+static int
+repclt_encode_open(struct rpc_rqst *req, u32 *p, struct callback_data *cdata)
+{
+	unsigned int flags=0;
+	ENCODE_OP_HEADER;
+	dprintk("repclt_encode_open: seq %u, index %d, pnode %ld, opnum %u, pathlen %d, migrated %d, fhlen %u\n", rqt->seq, index_num, pnode->fi_inode->i_ino, rqt->opnum, strlen(rqt->pathname), replist->migrated, pnode->fh[replist->migrated].len);
+  *p++ = htonl(rqt->uid);
+  *p++ = htonl(rqt->gid);
+   p = encode_bytes(p, replist->locations[index_num].path, strlen(replist->locations[index_num].path));
+   p = encode_bytes(p, rqt->pathname, strlen(rqt->pathname));
+   p = encode_bytes(p, pnode->fh[replist->migrated].data, pnode->fh[replist->migrated].len);
+	if ((pnode->pflags & PNODE_SINGLE))
+		flags |= REP_LOCK_SINGLE;
+	if (atomic_read(&rqt->ack_count) > 1)
+		flags |= REP_LOCK_FORCE;
+	*p++ = htonl(flags);
+   p = encode_time(p, &rqt->mtime);
+	index_num = replist->migrated;
+	ENCODE_OP;
+}
+
+static int
+repclt_encode_update(struct rpc_rqst *req, u32 *p, struct callback_data *cdata)
+{
+	ENCODE_OP_HEADER;
+	dprintk("repclt_encode_update: seq %u, pnode %ld, opnum %u, index %d\n", rqt->seq, pnode->fi_inode->i_ino, rqt->opnum, index_num);
+  *p++ = htonl(rqt->uid);
+  *p++ = htonl(rqt->gid);
+	p = encode_bytes(p, pnode->fh[index_num].data, pnode->fh[index_num].len);
+	if (rqt->pathname)
+   	p = encode_bytes(p, rqt->pathname, strlen(rqt->pathname));
+	else
+		*p++ = htonl(0);
+   p = encode_time(p, &rqt->mtime);
+	ENCODE_OP;
+}
+
+static int
+repclt_encode_forward(struct rpc_rqst *req, u32 *p, struct repclnt_sendone_cdata *rqt)
+{
+	struct rnfs_rnode *rnode;
+	struct xdr_netobj *fh;
+	if (!rqt || !rqt->rnode)
+		return -EIO;
+	rnode = rqt->rnode;
+	fh = &rnode->primary_fh;
+  *p++ = htonl(rqt->uid);
+  *p++ = htonl(rqt->gid);
+	p = encode_bytes(p, fh->data, fh->len);
+   *p++ = htonl(rqt->opnum);
+	dprintk("repclnt_encode_forward: opnum %d\n", rqt->opnum);
+   switch (rqt->opnum) {
+		case REP_OP_CALLBACK:
+			p = encode_fh(p, (struct xdr_netobj *) rqt->argp);
+			break;
+		case REP_OP_FREAD:
+			if (rqt->argp) {
+				struct rep_fread *argp = (struct rep_fread *) rqt->argp;
+				int replen;
+				struct rpc_auth   *auth = req->rq_task->tk_auth;
+				dprintk("repclnt_encode_fread: count %lu\n", argp->count);
+   			*p++ = htonl( (uint32_t) ((argp->offset) >> 32) );
+   			*p++ = htonl( (uint32_t) (argp->offset) );
+   			*p++ = htonl( (uint32_t) (argp->count) );
+   			req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+   			replen = (RPC_REPHDRSIZE + auth->au_rslack + 2) << 2;
+   			xdr_inline_pages(&req->rq_rcv_buf, replen, argp->pages, argp->pgbase, argp->count);
+			}
+			return 0;
+      default:
+         break;
+   }
+   req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+static u32 *
+decode_bytes(u32 *p, char *sp, unsigned int *lenp, unsigned int maxlen)
+{
+   unsigned int    len;
+   if ((len = ntohl(*p++)) > maxlen)
+      return NULL;
+   *lenp = len;
+   memcpy(sp, p, len);
+   return p + XDR_QUADLEN(len);
+}
+*/
+
+static u32 *
+decode_bytes_alloc(u32 *p, char **sp, unsigned int *lenp, unsigned int maxlen)
+{
+   unsigned int    len;
+   char *buf;
+                                                                                   
+   if ((len = ntohl(*p++)) > maxlen)
+      return NULL;
+   *lenp = len;
+	if (len == 0)
+		return p;
+   buf = kmalloc(len, GFP_KERNEL);
+   memcpy(buf, p, len);
+   *sp = buf;
+   return p + XDR_QUADLEN(len);
+}
+
+#define DECODE_OP_HEADER \
+	unsigned int index_num; \
+	struct xdr_netobj *fh; \
+	struct repclnt_request *rqt; \
+	struct rnfs_pnode *pnode; \
+	unsigned int status; \
+	struct nfsd4_fs_locations *replist; \
+	if (!cdata || !cdata->rqt || !cdata->rqt->pnode) \
+		return -EIO; \
+	rqt = cdata->rqt; \
+	status = ntohl(*p++); \
+	index_num = cdata->index_num; \
+	pnode = cdata->rqt->pnode; \
+	replist = pnode->exp->ex_fslocs;
+
+static int
+repclt_decode_open_res(struct rpc_rqst *req, u32 *p, struct callback_data *cdata)
+{
+	DECODE_OP_HEADER;
+	fh = &pnode->fh[index_num];
+	p = decode_bytes_alloc(p, (char **) &fh->data, &fh->len, NFS_FHSIZE);
+	if (status == 0)
+		pnode->ack_num ++;
+   if (pnode->ack_num == replist->locations_count) {
+      pnode->pflags |= PNODE_COMPLETE;
+      if ((pnode->pflags & PNODE_WAIT_COMPLETE)) {
+         wake_up_interruptible(&pnode->pwait);
+			pnode->pflags &= ~PNODE_WAIT_COMPLETE;
+		}
+   } 
+	dprintk("repclt_decode_open: seq %u, pnode %ld, opnum %u, index %d, status %u, fhlen %u\n", rqt->seq, pnode->fi_inode->i_ino, rqt->opnum, index_num, status, fh->len);
+	if ((rqt->opnum == REP_OP_RENAME) || (rqt->opnum == REP_OP_LNK)) {
+		struct rep_rename *renamep = (struct rep_rename *) rqt->argp;
+		if (renamep->rqt) {
+			struct rnfs_pnode *tpnode = renamep->rqt->pnode;
+			if (!tpnode)
+				return -EIO;
+			fh = &tpnode->fh[index_num];
+			p = decode_bytes_alloc(p, (char **) &fh->data, &fh->len, NFS_FHSIZE);
+			if (status ==0)
+				tpnode->ack_num++;
+   		if (tpnode->ack_num == replist->locations_count) {
+      		tpnode->pflags |= PNODE_COMPLETE;
+      		if ((tpnode->pflags & PNODE_WAIT_COMPLETE)) {
+         		wake_up_interruptible(&tpnode->pwait);
+					tpnode->pflags &= ~PNODE_WAIT_COMPLETE;
+				}
+   		} 
+		}
+	}
+	return status;
+}
+
+static int
+repclt_decode_update_res(struct rpc_rqst *req, u32 *p, struct callback_data *cdata)
+{
+	DECODE_OP_HEADER;
+	//dprintk("repclt_decode_update: seq %u, pnode %ld, opnum %u, index %d, status %d\n", rqt->seq, pnode->fi_inode->i_ino, rqt->opnum, index_num, status);
+	if (status == 0)
+		atomic_inc(&rqt->ack_count);
+	if (rqt->opnum == REP_OP_OPEN) {
+		struct rep_open *openp = (struct rep_open *) rqt->argp;
+		struct xdr_netobj tmp;
+		if (!openp->pnode)
+			return -EIO;
+		fh = &openp->pnode->fh[index_num];
+		if (fh->len > 0)
+			fh = &tmp;
+		p = decode_bytes_alloc(p, (char **) &fh->data, &fh->len, NFS_FHSIZE);
+		if (fh->len > 0) {
+			openp->pnode->ack_num++;
+			if (openp->pnode->ack_num == replist->locations_count) {
+      		openp->pnode->pflags |= PNODE_COMPLETE;
+      		if ((openp->pnode->pflags & PNODE_WAIT_COMPLETE)) {
+         		wake_up_interruptible(&openp->pnode->pwait);
+					openp->pnode->pflags &= ~PNODE_WAIT_COMPLETE;
+				}
+			}
+			if (!list_empty(&replist->locations[index_num].rnfs_list))
+      	queue_work(replist->locations[index_num].rnfs_wq, &replist->locations[index_num].rnfs_work);
+		}
+	}
+	return status;
+}
+
+static int repclnt_decode_fread(struct rpc_rqst *req, u32 *p, unsigned long *count)
+{
+	struct kvec *iov = req->rq_rcv_buf.head;
+	unsigned int recvd, hdrlen;
+	dprintk("repclnt_decode_fread\n");
+   *count = ntohl(*p++);
+   hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+   if (iov->iov_len < hdrlen) {
+      printk("NFS: READ reply header overflowed: hdrlen %u, iov_len %u\n", hdrlen, iov->iov_len);
+      return -EIO;
+   } else if (iov->iov_len != hdrlen) {
+      xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
+   }
+                                                                                
+   recvd = req->rq_rcv_buf.len - hdrlen;
+   if (*count > recvd) {
+      printk("server cheating in read reply: count %lu, recvd %u\n", *count, recvd);
+      *count = recvd;
+   }
+	return 0;
+}
+
+static inline u32 *decode_time(u32 *p, struct timespec *time)
+{
+   time->tv_sec = ntohl(*p++);
+   time->tv_nsec = ntohl(*p++);
+   return p;
+}
+
+static u32 *repclnt_decode_fgetattr(u32 *p, struct kstat *statp)
+{
+	dprintk("repclnt_decode_fgetattr\n");
+   statp->mode = ntohl(*p++);
+   statp->nlink = ntohl(*p++);
+   statp->uid = ntohl(*p++);
+   statp->gid = ntohl(*p++);
+   statp->size = (u64)ntohl(*p++) << 32;
+   statp->size |= ntohl(*p++);
+   p = decode_time(p, &statp->mtime);
+   p = decode_time(p, &statp->ctime);
+	dprintk("repclt_decode_fgetattr_res, mode %u, uid %u, gid %u\n", statp->mode, statp->uid, statp->gid);
+	dprintk("repclt_decode_fgetattr_res, size %lld\n", statp->size);
+   return p;
+}
+
+static int
+repclt_decode_forward_res(struct rpc_rqst *req, u32 *p, struct repclnt_sendone_cdata *cdata)
+{
+	unsigned int status;
+	status = ntohl(*p++);
+	dprintk("repclt_decode_forward_res: status %d\n", status);
+	if (status)
+		return status;
+   switch (cdata->opnum) {
+		case REP_OP_FREAD:
+			return repclnt_decode_fread(req, p, (unsigned long *) cdata->resp);
+			break;
+		case REP_OP_FGETATTR:
+			p = repclnt_decode_fgetattr(p, (struct kstat *) cdata->resp);
+			break;
+		default:
+			break;
+	}
+	return 0;
+}
+
+static int
+repclt_encode_void(struct rpc_rqst *req, u32 *p, void *ptr)
+{
+  dprintk("repclt_encode_void\n");
+  return 0;
+}
+
+static int
+repclt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
+{
+	dprintk("repclt_decode_void\n");
+	return ntohl(*p++);
+}
+
+#define rep_replica_max    16
+#define rep_time_maxsz     2
+#define rep_name_maxsz    (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
+                                                                                
+#define rep_close_maxsz    2*rep_time_maxsz+1+rep_replica_max
+#define rep_write_maxsz    3
+#define rep_setattr_maxsz  6+5*rep_time_maxsz
+#define rep_create_maxsz   rep_name_maxsz+1+rep_name_maxsz+rep_setattr_maxsz
+#define rep_remove_maxsz   1+rep_name_maxsz
+#define rep_rename_maxsz   2*rep_name_maxsz +1+rep_name_maxsz+1+NFS_FHSIZE+1
+#define rep_open_maxsz     NFS_FHSIZE+1
+#define rep_wait_maxsz     1
+#define rep_op_maxsz			1+rep_rename_maxsz
+
+#define REP_void_sz           0
+#define REP_open_sz				1+1+1+2*rep_name_maxsz+NFS_FHSIZE+1+rep_op_maxsz
+#define REP_update_sz         1+1+1+NFS_FHSIZE+1+rep_name_maxsz+rep_op_maxsz
+#define REP_forward_sz        1+1+NFS_FHSIZE+rep_op_maxsz
+
+#define REP_open_res_sz     2 + NFS_FHSIZE + NFS_FHSIZE
+#define REP_update_res_sz     2 + NFS_FHSIZE
+#define REP_forward_res_sz    1 + 5 + 2*rep_time_maxsz
+
+#ifndef MAX
+#define MAX(a, b)          (((a) > (b)) ? (a) : (b) )
+#endif
+
+#define PROC(proc, argtype, restype)   \
+[REPPROC_##proc] = {                   \
+    .p_proc      = REPPROC_##proc,               \
+    .p_encode    = (kxdrproc_t) repclt_encode_##argtype,     \
+    .p_decode    = (kxdrproc_t) repclt_decode_##restype,     \
+    .p_bufsiz    = MAX(REP_##argtype##_sz, REP_##restype##_sz) << 2   \
+   }
+
+struct rpc_procinfo repclnt_procedures[] = {
+  PROC(NULL, void, void),
+  PROC(OPEN, open, open_res),
+  PROC(UPDATE, update, update_res),
+  PROC(FORWARD, forward, forward_res),
+};
+
+static struct rpc_version	repclnt_version1 = {
+  .number	= 1,
+  .nrprocs  = 4,
+  .procs    = repclnt_procedures,
+};
+
+static struct rpc_version	*	repclnt_versions[] = {
+  [1] = &repclnt_version1,
+};
+
+static struct rpc_stat	rep_stats;
+
+struct rpc_program	repclnt_program = {
+  .name		= "repd",
+  .number	= REP_PROGRAM,
+  .nrvers	= sizeof(repclnt_versions) / sizeof(repclnt_versions[0]),
+  .version	= repclnt_versions,
+  .stats		= &rep_stats,
+};
+
+/* repd rpc functions */
+static int repsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+   dprintk("repd: NULL called\n");
+   return rpc_success;
+}
+
+static inline u32 *decode_denyview(u32 *p, struct rep_dview *dview)
+{
+   unsigned int deny_num;
+   u8 *deny_list = NULL;
+   int i;
+
+   deny_num = ntohl(*p++);
+   if (deny_num == 0)
+      goto out;
+   if (!(deny_list = kmalloc(deny_num, GFP_KERNEL)))
+      goto out;
+   for (i=0; i<deny_num; i++)
+      deny_list[i] = ntohl(*p++);
+ out:
+   dview->deny_num = deny_num;
+   dview->deny_list = deny_list;
+   return p;
+}
+
+static inline u32 *decode_time_two(u32 *p, struct timespec *time1, struct timespec *time2)
+{
+   p = decode_time(p, time1);
+   return decode_time(p, time2);
+}
+
+#define SVC_DECODE_OP_HEADER(name) \
+	struct rep_##name *name; \
+	name = kmalloc(sizeof(struct rep_##name), GFP_KERNEL); \
+	*data = (void *) name; \
+	if (name == NULL) \
+		return p;
+
+static inline u32 *decode_close(u32 *p, void **data)
+{
+	SVC_DECODE_OP_HEADER(close);
+	p = decode_time_two(p, &close->mtime, &close->ctime);
+	p = decode_denyview(p, &close->dview);
+	return p;
+}
+
+static inline int decode_write(struct svc_rqst *rqstp, u32 *p, void **data)
+{
+   int len, v;
+   int pagelen;
+	struct repd_write *write;
+
+	write = kmalloc(sizeof(struct repd_write), GFP_KERNEL);
+	*data = (void *) write;
+	if (write == NULL)
+		return 0;
+	write->offset = (u64) ntohl(*p++) << 32;
+	write->offset |= ntohl(*p++);
+	write->count = ntohl(*p++);
+   len = write->count;
+   write->vec[0].iov_base = (void*)p;
+   write->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - (((void*)p) - rqstp->rq_arg.head[0].iov_base);
+   pagelen = rqstp->rq_arg.page_len;
+   v = 0;
+   while (len > write->vec[v].iov_len) {
+      len -= write->vec[v].iov_len;
+      v++;
+      write->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
+      if (pagelen >= PAGE_SIZE) {
+         write->vec[v].iov_len = PAGE_SIZE;
+         pagelen -= PAGE_SIZE;
+      } else {
+         write->vec[v].iov_len = pagelen;
+         pagelen -= len;
+      }
+   }
+   write->vec[v].iov_len = len;
+   write->vlen = v+1;
+   return write->vec[0].iov_len > 0;
+}
+
+static u32 *__decode_setattr(u32 *p, struct iattr *iap)
+{
+   iap->ia_valid = ntohl(*p++);
+   iap->ia_mode = ntohl(*p++);
+   iap->ia_uid = ntohl(*p++);
+   iap->ia_gid = ntohl(*p++);
+   iap->ia_size = (u64)ntohl(*p++) << 32;
+   iap->ia_size |= ntohl(*p++);
+   p = decode_time(p, &iap->ia_atime);
+   p = decode_time_two(p, &iap->ia_mtime, &iap->ia_ctime);
+   return p;
+}
+
+static u32 *decode_create(u32 *p, void **data)
+{
+	SVC_DECODE_OP_HEADER(create);
+   p = decode_bytes_alloc(p, &create->cr_name, &create->cr_namelen, NFS4_MAXNAMLEN);
+   create->cr_type = ntohl(*p++);
+   switch (create->cr_type) {
+      case S_IFLNK:
+         p = decode_bytes_alloc(p, &create->u.link.name, &create->u.link.namelen, NFS4_MAXNAMLEN);
+         break;
+      case S_IFBLK:
+      case S_IFCHR:
+         create->u.dev.specdata1 = ntohl(*p++);
+         create->u.dev.specdata2 = ntohl(*p++);
+         break;
+      case NFS3_CREATE_UNCHECKED:
+      case NFS3_CREATE_GUARDED:
+      case NFS3_CREATE_EXCLUSIVE:
+         create->u.verf = ntohl(*p++);
+         break;
+      default:
+         break;
+   }
+   return __decode_setattr(p, &create->cr_attrs);
+}
+
+static u32 *decode_remove(u32 *p, void **data)
+{
+	SVC_DECODE_OP_HEADER(remove);
+	remove->rm_type = ntohl(*p++);
+	p = decode_bytes_alloc(p, &remove->rm_name, &remove->rm_namelen, NFS4_MAXNAMLEN );
+	return p;
+}
+
+static u32 *decode_setattr(u32 *p, void **data)
+{
+	struct iattr *setattr;
+	setattr = kmalloc(sizeof(struct iattr), GFP_KERNEL);
+	*data = (void *) setattr;
+	if (setattr == NULL)
+		return p;
+	p = __decode_setattr(p, setattr);
+	return p;
+}
+
+static inline u32 *
+decode_string(u32 *p, char **sp, unsigned int maxlen)
+{
+   unsigned int    len;
+   char *buf;
+   if ((len = ntohl(*p++)) > maxlen)
+      return NULL;
+   buf = kmalloc(len+1, GFP_KERNEL);
+   memcpy(buf, p, len);
+	buf[len] = '\0';
+   *sp = buf;
+   return p + XDR_QUADLEN(len);
+}
+
+static u32 *decode_rename(u32 *p, void **data)
+{
+   struct repd_rename *rename;
+   rename = kmalloc(sizeof(struct repd_rename), GFP_KERNEL);
+   *data = (void *) rename;
+   if (rename == NULL)
+      return p;
+	p = decode_bytes_alloc(p, &rename->sname, &rename->snamelen, NFS4_MAXNAMLEN);
+	p = decode_bytes_alloc(p, &rename->tname, &rename->tnamelen, NFS4_MAXNAMLEN);
+	dprintk("decode_rename: sname %s, slen %d, tname %s, tlen %d\n", rename->sname, rename->snamelen, rename->tname, rename->tnamelen);
+	rename->tdentry = NULL;
+	rename->rnode = NULL;
+	if (ntohl(*p++))
+		p = decode_string(p, &rename->tpath, NFS4_MAXNAMLEN);
+	else
+		rename->tpath = NULL;
+	if (ntohl(*p++)) {
+		p = decode_bytes_alloc(p, (char **) &rename->tprimary_fh.data, &rename->tprimary_fh.len, NFS4_MAXNAMLEN);
+		rename->seq = ntohl(*p++);
+	} else
+		rename->tprimary_fh.data = NULL;
+	return p;
+}
+
+static u32 *decode_open(u32 *p, void **data)
+{
+	struct repd_open *open;
+	open = kmalloc(sizeof(struct repd_open), GFP_KERNEL);
+	*data = (void *) open;
+	if (open == NULL)
+		return p;
+	p = decode_bytes_alloc(p, (char **) &open->primary_fh.data, &open->primary_fh.len, NFS4_MAXNAMLEN);
+	open->seq = ntohl(*p++);
+	return p;
+}
+
+static u32 *decode_wait(u32 *p, void **data)
+{
+	struct repd_wait *wait;
+	wait = kmalloc(sizeof(struct repd_wait), GFP_KERNEL);
+	*data = (void *) wait;
+	if (wait == NULL)
+		return p;
+	p = xdr_decode_string_inplace(p, (char **) &wait->fh_base, &wait->fh_size, NFS4_MAXNAMLEN);
+	wait->seq = ntohl(*p++);
+	wait->rnode = NULL;
+	return p;
+}
+
+static u32 *decode_fh(u32 *p, void **data)
+{
+	struct xdr_netobj *fh;
+	//dprintk("decode_fh\n");
+	fh = kmalloc(sizeof(*fh), GFP_KERNEL);
+	*data = (void *) fh;
+	if (fh == NULL)
+		return p;
+	return decode_bytes_alloc(p, (char **) &fh->data, &fh->len, NFS4_MAXNAMLEN);
+}
+
+static u32 *decode_fread(u32 *p, void **data)
+{
+	struct rep_fread *argp;
+	dprintk("repsvc_decode_fread\n");
+	argp = kmalloc(sizeof(*argp), GFP_KERNEL);
+	*data = (void *) argp;
+	if (argp == NULL)
+		return p;
+	argp->offset = (u64) ntohl(*p++) << 32;
+   argp->offset |= ntohl(*p++);
+   argp->count = ntohl(*p++);
+	return p;
+}
+
+static inline int decode_op(struct svc_rqst *rqstp, u32 *p, unsigned int *opnum, void **data)
+{
+   *opnum = ntohl(*p++);
+   *data = NULL;
+   switch (*opnum) {
+		case REP_OP_CLOSE:
+         p = decode_close(p, data);
+         break;
+		case REP_OP_WRITE:
+			return decode_write(rqstp, p, data);
+			break;
+      case REP_OP_CREATE:
+         p = decode_create(p, data);
+         break;
+      case REP_OP_REMOVE:
+         p = decode_remove(p, data);
+         break;
+      case REP_OP_SETATTR:
+			p = decode_setattr(p, data);
+         break;
+      case REP_OP_RENAME:
+      case REP_OP_LNK:
+			p = decode_rename(p, data);
+         break;
+		case REP_OP_OPEN:
+			p = decode_open(p, data);
+			break;
+		case REP_OP_WAIT:
+			p = decode_wait(p, data);
+			break;
+		case REP_OP_CALLBACK:
+			p = decode_fh(p, data);
+			break;
+		case REP_OP_FREAD:
+			p = decode_fread(p, data);
+			break;
+      default:
+         break;
+   }
+   return xdr_argsize_check(rqstp, p);
+}
+
+static int repsvc_decode_open(struct svc_rqst *rqstp, u32 *p, struct repd_openargs *argp)
+{
+	struct repd_update *update;
+	if (!(update = kmalloc(sizeof(*update), GFP_KERNEL)))
+		return -ENOMEM;
+	update->seq = ntohl(*p++);
+	update->uid = ntohl(*p++);
+	update->gid = ntohl(*p++);
+	update->path = NULL;
+	argp->update = update;
+	p = xdr_decode_string_inplace(p, &argp->rpath, &argp->rpathlen, NFS4_MAXNAMLEN);
+	p = xdr_decode_string_inplace(p, &argp->path, &argp->pathlen, NFS4_MAXNAMLEN);
+	p = decode_bytes_alloc(p, (char **) &argp->primary_fh.data, &argp->primary_fh.len, NFS_FHSIZE);
+	argp->single = ntohl(*p++);
+	dprintk("repsvc_decode_open: single %u\n", argp->single);
+	p = decode_time(p, &update->mtime);
+	return decode_op(rqstp, p, &update->opnum, &update->data);
+}
+
+static int repsvc_decode_update(struct svc_rqst *rqstp, u32 *p, struct repd_updateargs *argp)
+{
+	struct repd_update *update;
+	if (!(update = kmalloc(sizeof(*update), GFP_KERNEL)))
+		return -ENOMEM;
+	update->seq = ntohl(*p++);
+	update->uid = ntohl(*p++);
+	update->gid = ntohl(*p++);
+	argp->update = update;
+	p = xdr_decode_string_inplace(p, (char **) &argp->fh_base, &argp->fh_size, NFS_FHSIZE);
+	p = decode_string(p, &update->path, NFS4_MAXNAMLEN);
+	p = decode_time(p, &update->mtime);
+	return decode_op(rqstp, p, &update->opnum, &update->data);
+}
+
+static int repsvc_decode_forward(struct svc_rqst *rqstp, u32 *p, struct repd_forwardargs *argp)
+{
+	argp->uid = ntohl(*p++);
+	argp->gid = ntohl(*p++);
+	dprintk("repsvc_decode_forward\n");
+	p = decode_bytes_alloc(p, (char **) &argp->fh_base, &argp->fh_size, NFS_FHSIZE);
+	return decode_op(rqstp, p, &argp->opnum, &argp->data);
+}
+
+static int repsvc_encode_open(struct svc_rqst *rqstp, u32 *p, struct repd_openres *resp)
+{
+	dprintk("repsvc_encode_open: seq %u\n", resp->seq);
+	p = encode_bytes(p, resp->fh[0].data, resp->fh[0].len);
+	kfree(resp->fh[0].data);
+	if ((resp->opnum == REP_OP_RENAME) || (resp->opnum == REP_OP_LNK)) {
+		p = encode_bytes(p, resp->fh[1].data, resp->fh[1].len);
+		kfree(resp->fh[1].data);
+	}
+   return xdr_ressize_check(rqstp, p);
+}
+
+static int repsvc_encode_update(struct svc_rqst *rqstp, u32 *p, struct repd_updateres *resp)
+{
+	//dprintk("repsvc_encode_update: seq %u\n", resp->seq);
+	if (resp->opnum == REP_OP_OPEN) {
+		dprintk("repsvc_encode_update_in_open: fh len %d\n", resp->fh.len);
+		p = encode_bytes(p, resp->fh.data, resp->fh.len);
+		if (resp->fh.data)
+			kfree(resp->fh.data);
+	}
+	return xdr_ressize_check(rqstp, p);
+}
+
+static int repsvc_encode_forward(struct svc_rqst *rqstp, u32 *p, struct repd_forwardres *resp)
+{
+	switch (resp->opnum) {
+		case REP_OP_FREAD:
+			dprintk("repsvc_encode_fread\n");
+			if (resp->data) {
+				unsigned long *count = (unsigned long *) resp->data;
+				*p++ = htonl(*count);
+				xdr_ressize_check(rqstp, p);
+				rqstp->rq_res.page_len = *count;
+   			if (*count & 3) {
+      			rqstp->rq_restailpage = 0;
+      			rqstp->rq_res.tail[0].iov_base = p;
+      			*p = 0;
+      			rqstp->rq_res.tail[0].iov_len = 4 - (*count&3);
+   			}
+				kfree(resp->data);
+   			return 1;
+			}
+			break;
+		case REP_OP_FGETATTR:
+			if (resp->data) {
+				struct kstat *statp = (struct kstat *) resp->data;
+				dprintk("repsvc_encode_fgetattr: mode %u, uid %u, gid %u\n", statp->mode, statp->uid, statp->gid);
+   			*p++ = htonl((u32) statp->mode);
+   			*p++ = htonl((u32) statp->nlink);
+   			*p++ = htonl((u32) statp->uid);
+   			*p++ = htonl((u32) statp->gid);
+   			*p++ = htonl( (uint32_t) ((statp->size) >> 32) );
+   			*p++ = htonl( (uint32_t) (statp->size) );
+   			p = encode_time(p, &statp->mtime);
+   			p = encode_time(p, &statp->ctime);
+			}
+			break;
+		default:
+			break;
+	}
+	if (resp->data)
+		kfree(resp->data);
+   return xdr_ressize_check(rqstp, p);
+}
+
+static int repsvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+   return xdr_argsize_check(rqstp, p);
+}
+
+static int repsvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+   return xdr_ressize_check(rqstp, p);
+}
+
+struct repd_void      { u32 dummy; };
+
+#define SPROC(name, xargt, xrest, argt, rest, cache, respsize) \
+   { .pc_func     = (svc_procfunc) repsvc_proc_##name, \
+     .pc_decode   = (kxdrproc_t) repsvc_decode_##xargt, \
+     .pc_encode  = (kxdrproc_t) repsvc_encode_##xrest, \
+     .pc_release = NULL,              \
+     .pc_argsize = sizeof(struct repd_##argt),     \
+     .pc_ressize = sizeof(struct rest),     \
+     .pc_cachetype = cache,          \
+     .pc_xdrressize = respsize,          \
+}
+
+#define ST 1      /* status*/
+#define FH 17     /* filehandle with length */
+#define STAT 10     /* stat attr */
+
+static struct svc_procedure   repd_procedures[4] = {
+   SPROC(null, void, void, void, repd_void, RC_NOCACHE, ST),
+   SPROC(open, open, open, openargs, repd_openres, RC_NOCACHE, ST+FH+FH),
+   SPROC(update, update, update, updateargs, repd_updateres, RC_NOCACHE, ST+FH),
+   SPROC(forward, forward, forward, forwardargs, repd_forwardres, RC_NOCACHE, NFSD_BUFSIZE/4),
+};
+
+struct svc_stat   repd_stats = {
+   .program    = &repsvc_program,
+};
+
+#define REPD_XDRSIZE    sizeof(struct repd_openargs)
+//#define REPD_XDRSIZE    1024
+static struct svc_version  repd_version1 = {
+   .vs_vers = 1,
+   .vs_nproc = 4,
+   .vs_proc = repd_procedures,
+   .vs_dispatch   = nfsd_dispatch,
+   .vs_xdrsize = REPD_XDRSIZE,
+};
+
+static struct svc_version  * repd_version[] = {
+   [1] = &repd_version1,
+};
+
+#define REPD_NRVERS   (sizeof(repd_version)/sizeof(repd_version[0]))
+
+struct svc_program   repsvc_program = {
+   .pg_prog =  REP_PROGRAM,
+   .pg_nvers   = REPD_NRVERS,
+   .pg_vers = repd_version,
+   .pg_name = "repd",
+   .pg_class = "nfsd",
+   .pg_stats = &repd_stats,
+   .pg_authenticate = &svc_set_client,
+};
diff -puN include/linux/sunrpc/svc.h~rnfs-all include/linux/sunrpc/svc.h
--- rnfs-linux-2.6.16-rc3/include/linux/sunrpc/svc.h~rnfs-all	2007-07-16 22:51:23.000000000 -0400
+++ rnfs-linux-2.6.16-rc3-jiayingz/include/linux/sunrpc/svc.h	2007-07-16 22:51:35.000000000 -0400
@@ -49,7 +49,7 @@ struct svc_serv {
  * This is use to determine the max number of pages nfsd is
  * willing to return in a single READ operation.
  */
-#define RPCSVC_MAXPAYLOAD	(64*1024u)
+#define RPCSVC_MAXPAYLOAD	(1024*1024u)
 
 /*
  * RPC Requsts and replies are stored in one or more pages.
_
