The complete set of citi nfsv4 patches combined into one patch. Changes since 2.6.14-rc5-CITI_NFS4_ALL-1 * Update to 2.6.14 and Trond's latest * Fix oops in nfsd4_process_open1 * andros: no recovery dir by default --- linux-2.6.14-bfields/Makefile | 6 linux-2.6.14-bfields/fs/Kconfig | 1 linux-2.6.14-bfields/fs/exec.c | 12 linux-2.6.14-bfields/fs/lockd/host.c | 4 linux-2.6.14-bfields/fs/lockd/svclock.c | 144 - linux-2.6.14-bfields/fs/lockd/svcsubs.c | 43 linux-2.6.14-bfields/fs/lockd/xdr4.c | 4 linux-2.6.14-bfields/fs/locks.c | 124 linux-2.6.14-bfields/fs/namei.c | 96 linux-2.6.14-bfields/fs/nfs/delegation.c | 40 linux-2.6.14-bfields/fs/nfs/delegation.h | 17 linux-2.6.14-bfields/fs/nfs/dir.c | 70 linux-2.6.14-bfields/fs/nfs/direct.c | 10 linux-2.6.14-bfields/fs/nfs/file.c | 30 linux-2.6.14-bfields/fs/nfs/inode.c | 209 + linux-2.6.14-bfields/fs/nfs/nfs2xdr.c | 1 linux-2.6.14-bfields/fs/nfs/nfs3proc.c | 118 linux-2.6.14-bfields/fs/nfs/nfs3xdr.c | 1 linux-2.6.14-bfields/fs/nfs/nfs4_fs.h | 57 linux-2.6.14-bfields/fs/nfs/nfs4proc.c | 833 +++--- linux-2.6.14-bfields/fs/nfs/nfs4state.c | 260 + linux-2.6.14-bfields/fs/nfs/nfs4xdr.c | 305 +- linux-2.6.14-bfields/fs/nfs/proc.c | 44 linux-2.6.14-bfields/fs/nfs/read.c | 5 linux-2.6.14-bfields/fs/nfs/write.c | 16 linux-2.6.14-bfields/fs/nfsd/export.c | 68 linux-2.6.14-bfields/fs/nfsd/nfs3xdr.c | 6 linux-2.6.14-bfields/fs/nfsd/nfs4callback.c | 9 linux-2.6.14-bfields/fs/nfsd/nfs4idmap.c | 4 linux-2.6.14-bfields/fs/nfsd/nfs4proc.c | 37 linux-2.6.14-bfields/fs/nfsd/nfs4recover.c | 10 linux-2.6.14-bfields/fs/nfsd/nfs4state.c | 390 +- linux-2.6.14-bfields/fs/nfsd/nfs4xdr.c | 5 linux-2.6.14-bfields/fs/nfsd/nfsctl.c | 4 linux-2.6.14-bfields/fs/nfsd/vfs.c | 5 linux-2.6.14-bfields/fs/open.c | 79 linux-2.6.14-bfields/include/linux/fs.h | 21 linux-2.6.14-bfields/include/linux/namei.h | 8 linux-2.6.14-bfields/include/linux/nfs_fs.h | 30 linux-2.6.14-bfields/include/linux/nfs_xdr.h | 65 linux-2.6.14-bfields/include/linux/nfsd/state.h | 10 linux-2.6.14-bfields/include/linux/nfsd/xdr4.h | 5 linux-2.6.14-bfields/include/linux/sunrpc/auth.h | 7 linux-2.6.14-bfields/include/linux/sunrpc/cache.h | 28 linux-2.6.14-bfields/include/linux/sunrpc/debug.h | 3 linux-2.6.14-bfields/include/linux/sunrpc/gss_api.h | 27 linux-2.6.14-bfields/include/linux/sunrpc/gss_err.h | 10 linux-2.6.14-bfields/include/linux/sunrpc/gss_krb5.h | 29 linux-2.6.14-bfields/include/linux/sunrpc/gss_spkm3.h | 6 linux-2.6.14-bfields/include/linux/sunrpc/msg_prot.h | 25 linux-2.6.14-bfields/include/linux/sunrpc/svc.h | 6 linux-2.6.14-bfields/include/linux/sunrpc/xdr.h | 6 linux-2.6.14-bfields/include/linux/sunrpc/xprt.h | 229 + linux-2.6.14-bfields/net/sunrpc/Makefile | 2 linux-2.6.14-bfields/net/sunrpc/auth.c | 16 linux-2.6.14-bfields/net/sunrpc/auth_gss/Makefile | 2 linux-2.6.14-bfields/net/sunrpc/auth_gss/auth_gss.c | 187 + linux-2.6.14-bfields/net/sunrpc/auth_gss/gss_krb5_crypto.c | 260 + linux-2.6.14-bfields/net/sunrpc/auth_gss/gss_krb5_mech.c | 51 linux-2.6.14-bfields/net/sunrpc/auth_gss/gss_krb5_seal.c | 53 linux-2.6.14-bfields/net/sunrpc/auth_gss/gss_krb5_unseal.c | 39 linux-2.6.14-bfields/net/sunrpc/auth_gss/gss_krb5_wrap.c | 366 ++ linux-2.6.14-bfields/net/sunrpc/auth_gss/gss_mech_switch.c | 29 linux-2.6.14-bfields/net/sunrpc/auth_gss/gss_spkm3_mech.c | 37 linux-2.6.14-bfields/net/sunrpc/auth_gss/gss_spkm3_seal.c | 15 linux-2.6.14-bfields/net/sunrpc/auth_gss/gss_spkm3_token.c | 3 linux-2.6.14-bfields/net/sunrpc/auth_gss/gss_spkm3_unseal.c | 4 linux-2.6.14-bfields/net/sunrpc/auth_gss/svcauth_gss.c | 606 +++- linux-2.6.14-bfields/net/sunrpc/auth_null.c | 2 linux-2.6.14-bfields/net/sunrpc/auth_unix.c | 2 linux-2.6.14-bfields/net/sunrpc/clnt.c | 179 - linux-2.6.14-bfields/net/sunrpc/pmap_clnt.c | 12 linux-2.6.14-bfields/net/sunrpc/rpc_pipe.c | 29 linux-2.6.14-bfields/net/sunrpc/socklib.c | 175 + linux-2.6.14-bfields/net/sunrpc/sunrpc_syms.c | 3 linux-2.6.14-bfields/net/sunrpc/svc.c | 2 linux-2.6.14-bfields/net/sunrpc/svcauth_unix.c | 2 linux-2.6.14-bfields/net/sunrpc/svcsock.c | 5 linux-2.6.14-bfields/net/sunrpc/sysctl.c | 25 linux-2.6.14-bfields/net/sunrpc/xdr.c | 177 - linux-2.6.14-bfields/net/sunrpc/xprt.c | 1614 +++--------- linux-2.6.14-bfields/net/sunrpc/xprtsock.c | 1261 +++++++++ 82 files changed, 5634 insertions(+), 3106 deletions(-) diff -puN fs/exec.c~CITI_NFS4_ALL fs/exec.c --- linux-2.6.14/fs/exec.c~CITI_NFS4_ALL 2005-11-07 13:26:14.000000000 -0500 +++ linux-2.6.14-bfields/fs/exec.c 2005-11-07 13:26:18.000000000 -0500 @@ -126,8 +126,7 @@ asmlinkage long sys_uselib(const char __ struct nameidata nd; int error; - nd.intent.open.flags = FMODE_READ; - error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ); if (error) goto out; @@ -139,7 +138,7 @@ asmlinkage long sys_uselib(const char __ if (error) goto exit; - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nameidata_to_filp(&nd, O_RDONLY); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -167,6 +166,7 @@ asmlinkage long sys_uselib(const char __ out: return error; exit: + release_open_intent(&nd); path_release(&nd); goto out; } @@ -490,8 +490,7 @@ struct file *open_exec(const char *name) int err; struct file *file; - nd.intent.open.flags = FMODE_READ; - err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ); file = ERR_PTR(err); if (!err) { @@ -504,7 +503,7 @@ struct file *open_exec(const char *name) err = -EACCES; file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nameidata_to_filp(&nd, O_RDONLY); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { @@ -516,6 +515,7 @@ out: return file; } } + release_open_intent(&nd); path_release(&nd); } goto out; diff -puN fs/lockd/host.c~CITI_NFS4_ALL fs/lockd/host.c --- linux-2.6.14/fs/lockd/host.c~CITI_NFS4_ALL 2005-11-07 13:26:14.000000000 -0500 +++ linux-2.6.14-bfields/fs/lockd/host.c 2005-11-07 13:26:18.000000000 -0500 @@ -173,11 +173,10 @@ nlm_bind_host(struct nlm_host *host) /* If we've already created an RPC client, check whether * RPC rebind is required - * Note: why keep rebinding if we're on a tcp connection? */ if ((clnt = host->h_rpcclnt) != NULL) { xprt = clnt->cl_xprt; - if (!xprt->stream && time_after_eq(jiffies, host->h_nextrebind)) { + if (time_after_eq(jiffies, host->h_nextrebind)) { clnt->cl_port = 0; host->h_nextrebind = jiffies + NLM_HOST_REBIND; dprintk("lockd: next rebind in %ld jiffies\n", @@ -189,7 +188,6 @@ nlm_bind_host(struct nlm_host *host) goto forgetit; xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); - xprt->nocong = 1; /* No congestion control for NLM */ xprt->resvport = 1; /* NLM requires a reserved port */ /* Existing NLM servers accept AUTH_UNIX only */ diff -puN fs/lockd/svcsubs.c~CITI_NFS4_ALL fs/lockd/svcsubs.c --- linux-2.6.14/fs/lockd/svcsubs.c~CITI_NFS4_ALL 2005-11-07 13:26:14.000000000 -0500 +++ linux-2.6.14-bfields/fs/lockd/svcsubs.c 2005-11-07 13:26:18.000000000 -0500 @@ -30,6 +30,36 @@ static struct nlm_file * nlm_files[FILE_NRHASH]; static DECLARE_MUTEX(nlm_file_sema); +#ifdef NFSD_DEBUG +static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) +{ + u32 *fhp = (u32*)f->data; + + /* print the first 32 bytes of the fh */ + dprintk("lockd: %s (%08x %08x %08x %08x %08x %08x %08x %08x)\n", + msg, fhp[0], fhp[1], fhp[2], fhp[3], + fhp[4], fhp[5], fhp[6], fhp[7]); +} + +static inline void nlm_debug_print_file(char *msg, struct nlm_file *file) +{ + struct inode *inode = file->f_file->f_dentry->d_inode; + + dprintk("lockd: %s %s/%ld\n", + msg, inode->i_sb->s_id, inode->i_ino); +} +#else +static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) +{ + return; +} + +static inline void nlm_debug_print_file(char *msg, struct nlm_file *file) +{ + return; +} +#endif + static inline unsigned int file_hash(struct nfs_fh *f) { unsigned int tmp=0; @@ -55,11 +85,8 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file *file; unsigned int hash; u32 nfserr; - u32 *fhp = (u32*)f->data; - - dprintk("lockd: nlm_file_lookup(%08x %08x %08x %08x %08x %08x)\n", - fhp[0], fhp[1], fhp[2], fhp[3], fhp[4], fhp[5]); + nlm_debug_print_fh("nlm_file_lookup", f); hash = file_hash(f); @@ -70,8 +97,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, if (!nfs_compare_fh(&file->f_handle, f)) goto found; - dprintk("lockd: creating file for (%08x %08x %08x %08x %08x %08x)\n", - fhp[0], fhp[1], fhp[2], fhp[3], fhp[4], fhp[5]); + nlm_debug_print_fh("creating file for", f); nfserr = nlm_lck_denied_nolocks; file = (struct nlm_file *) kmalloc(sizeof(*file), GFP_KERNEL); @@ -124,11 +150,10 @@ out_free: static inline void nlm_delete_file(struct nlm_file *file) { - struct inode *inode = file->f_file->f_dentry->d_inode; struct nlm_file **fp, *f; - dprintk("lockd: closing file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); + nlm_debug_print_file("closing file", file); + fp = nlm_files + file->f_hash; while ((f = *fp) != NULL) { if (f == file) { diff -puN fs/locks.c~CITI_NFS4_ALL fs/locks.c --- linux-2.6.14/fs/locks.c~CITI_NFS4_ALL 2005-11-07 13:26:14.000000000 -0500 +++ linux-2.6.14-bfields/fs/locks.c 2005-11-07 13:26:29.000000000 -0500 @@ -316,21 +316,22 @@ static int flock_to_posix_lock(struct fi /* POSIX-1996 leaves the case l->l_len < 0 undefined; POSIX-2001 defines it. */ start += l->l_start; - end = start + l->l_len - 1; - if (l->l_len < 0) { + if (start < 0) + return -EINVAL; + fl->fl_end = OFFSET_MAX; + if (l->l_len > 0) { + end = start + l->l_len - 1; + fl->fl_end = end; + } else if (l->l_len < 0) { end = start - 1; + fl->fl_end = end; start += l->l_len; + if (start < 0) + return -EINVAL; } - - if (start < 0) - return -EINVAL; - if (l->l_len > 0 && end < 0) - return -EOVERFLOW; - fl->fl_start = start; /* we record the absolute position */ - fl->fl_end = end; - if (l->l_len == 0) - fl->fl_end = OFFSET_MAX; + if (fl->fl_end < fl->fl_start) + return -EOVERFLOW; fl->fl_owner = current->files; fl->fl_pid = current->tgid; @@ -362,14 +363,21 @@ static int flock64_to_posix_lock(struct return -EINVAL; } - if (((start += l->l_start) < 0) || (l->l_len < 0)) + start += l->l_start; + if (start < 0) return -EINVAL; - fl->fl_end = start + l->l_len - 1; - if (l->l_len > 0 && fl->fl_end < 0) - return -EOVERFLOW; + fl->fl_end = OFFSET_MAX; + if (l->l_len > 0) { + fl->fl_end = start + l->l_len - 1; + } else if (l->l_len < 0) { + fl->fl_end = start - 1; + start += l->l_len; + if (start < 0) + return -EINVAL; + } fl->fl_start = start; /* we record the absolute position */ - if (l->l_len == 0) - fl->fl_end = OFFSET_MAX; + if (fl->fl_end < fl->fl_start) + return -EOVERFLOW; fl->fl_owner = current->files; fl->fl_pid = current->tgid; @@ -503,12 +511,7 @@ static void locks_delete_block(struct fi static void locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) { - if (!list_empty(&waiter->fl_block)) { - printk(KERN_ERR "locks_insert_block: removing duplicated lock " - "(pid=%d %Ld-%Ld type=%d)\n", waiter->fl_pid, - waiter->fl_start, waiter->fl_end, waiter->fl_type); - __locks_delete_block(waiter); - } + BUG_ON(!list_empty(&waiter->fl_block)); list_add_tail(&waiter->fl_block, &blocker->fl_block); waiter->fl_next = blocker; if (IS_POSIX(blocker)) @@ -647,8 +650,9 @@ static int locks_block_on_timeout(struct return result; } -struct file_lock * -posix_test_lock(struct file *filp, struct file_lock *fl) +int +posix_test_lock(struct file *filp, struct file_lock *fl, + struct file_lock *conflock) { struct file_lock *cfl; @@ -659,9 +663,13 @@ posix_test_lock(struct file *filp, struc if (posix_locks_conflict(cfl, fl)) break; } + if (cfl) { + locks_copy_lock(conflock, cfl); + unlock_kernel(); + return 1; + } unlock_kernel(); - - return (cfl); + return 0; } EXPORT_SYMBOL(posix_test_lock); @@ -763,8 +771,9 @@ out: } EXPORT_SYMBOL(posix_lock_file); +EXPORT_SYMBOL(posix_lock_file_conf); -static int __posix_lock_file(struct inode *inode, struct file_lock *request) +static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock) { struct file_lock *fl; struct file_lock *new_fl, *new_fl2; @@ -788,6 +797,8 @@ static int __posix_lock_file(struct inod continue; if (!posix_locks_conflict(request, fl)) continue; + if (conflock) + locks_copy_lock(conflock, fl); error = -EAGAIN; if (!(request->fl_flags & FL_SLEEP)) goto out; @@ -829,12 +840,16 @@ static int __posix_lock_file(struct inod /* Detect adjacent or overlapping regions (if same lock type) */ if (request->fl_type == fl->fl_type) { + /* In all comparisons of start vs end, use + * "start - 1" rather than "end + 1". If end + * is OFFSET_MAX, end + 1 will become negative. + */ if (fl->fl_end < request->fl_start - 1) goto next_lock; /* If the next lock in the list has entirely bigger * addresses than the new one, insert the lock here. */ - if (fl->fl_start > request->fl_end + 1) + if (fl->fl_start - 1 > request->fl_end) break; /* If we come here, the new and old lock are of the @@ -952,7 +967,20 @@ static int __posix_lock_file(struct inod */ int posix_lock_file(struct file *filp, struct file_lock *fl) { - return __posix_lock_file(filp->f_dentry->d_inode, fl); + return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, NULL); +} + +/** + * posix_lock_file_conf - Apply a POSIX-style lock to a file + * @filp: The file to apply the lock to + * @fl: The lock to be applied + * @conflock: Place to return a copy of the conflicting lock, if found. + * + * Except for the conflock parameter, acts just like posix_lock_file. + */ +int posix_lock_file_conf(struct file *filp, struct file_lock *fl, struct file_lock *conflock) +{ + return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, conflock); } /** @@ -969,7 +997,7 @@ int posix_lock_file_wait(struct file *fi int error; might_sleep (); for (;;) { - error = __posix_lock_file(filp->f_dentry->d_inode, fl); + error = posix_lock_file(filp, fl); if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1041,7 +1069,7 @@ int locks_mandatory_area(int read_write, fl.fl_end = offset + count - 1; for (;;) { - error = __posix_lock_file(inode, &fl); + error = __posix_lock_file_conf(inode, &fl, NULL); if (error != -EAGAIN) break; if (!(fl.fl_flags & FL_SLEEP)) @@ -1534,7 +1562,7 @@ asmlinkage long sys_flock(unsigned int f */ int fcntl_getlk(struct file *filp, struct flock __user *l) { - struct file_lock *fl, file_lock; + struct file_lock *fl, cfl, file_lock; struct flock flock; int error; @@ -1558,7 +1586,7 @@ int fcntl_getlk(struct file *filp, struc else fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); } else { - fl = posix_test_lock(filp, &file_lock); + fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL); } flock.l_type = F_UNLCK; @@ -1655,7 +1683,7 @@ again: error = filp->f_op->lock(filp, cmd, file_lock); else { for (;;) { - error = __posix_lock_file(inode, file_lock); + error = posix_lock_file(filp, file_lock); if ((error != -EAGAIN) || (cmd == F_SETLK)) break; error = wait_event_interruptible(file_lock->fl_wait, @@ -1688,7 +1716,7 @@ out: */ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) { - struct file_lock *fl, file_lock; + struct file_lock *fl, cfl, file_lock; struct flock64 flock; int error; @@ -1712,7 +1740,7 @@ int fcntl_getlk64(struct file *filp, str else fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); } else { - fl = posix_test_lock(filp, &file_lock); + fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL); } flock.l_type = F_UNLCK; @@ -1798,7 +1826,7 @@ again: error = filp->f_op->lock(filp, cmd, file_lock); else { for (;;) { - error = __posix_lock_file(inode, file_lock); + error = posix_lock_file(filp, file_lock); if ((error != -EAGAIN) || (cmd == F_SETLK64)) break; error = wait_event_interruptible(file_lock->fl_wait, @@ -1926,29 +1954,15 @@ void locks_remove_flock(struct file *fil } /** - * posix_block_lock - blocks waiting for a file lock - * @blocker: the lock which is blocking - * @waiter: the lock which conflicts and has to wait - * - * lockd needs to block waiting for locks. - */ -void -posix_block_lock(struct file_lock *blocker, struct file_lock *waiter) -{ - locks_insert_block(blocker, waiter); -} - -EXPORT_SYMBOL(posix_block_lock); - -/** * posix_unblock_lock - stop waiting for a file lock * @filp: how the file was opened * @waiter: the lock which was waiting + * @unlock: if set, unlock the lock if it's already applied * * lockd needs to block waiting for locks. */ void -posix_unblock_lock(struct file *filp, struct file_lock *waiter) +posix_unblock_lock(struct file *filp, struct file_lock *waiter, int unlock) { /* * A remote machine may cancel the lock request after it's been @@ -1958,7 +1972,7 @@ posix_unblock_lock(struct file *filp, st if (waiter->fl_next) { __locks_delete_block(waiter); unlock_kernel(); - } else { + } else if (unlock) { unlock_kernel(); waiter->fl_type = F_UNLCK; posix_lock_file(filp, waiter); diff -puN fs/namei.c~CITI_NFS4_ALL fs/namei.c --- linux-2.6.14/fs/namei.c~CITI_NFS4_ALL 2005-11-07 13:26:14.000000000 -0500 +++ linux-2.6.14-bfields/fs/namei.c 2005-11-07 13:26:25.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -317,6 +318,18 @@ void path_release_on_umount(struct namei mntput_no_expire(nd->mnt); } +/** + * release_open_intent - free up open intent resources + * @nd: pointer to nameidata + */ +void release_open_intent(struct nameidata *nd) +{ + if (nd->intent.open.file->f_dentry == NULL) + put_filp(nd->intent.open.file); + else + fput(nd->intent.open.file); +} + /* * Internal lookup() using the new generic dcache. * SMP-safe @@ -750,6 +763,7 @@ static fastcall int __link_path_walk(con struct qstr this; unsigned int c; + nd->flags |= LOOKUP_CONTINUE; err = exec_permission_lite(inode, nd); if (err == -EAGAIN) { err = permission(inode, MAY_EXEC, nd); @@ -802,7 +816,6 @@ static fastcall int __link_path_walk(con if (err < 0) break; } - nd->flags |= LOOKUP_CONTINUE; /* This does the actual lookups.. */ err = do_lookup(nd, &this, &next); if (err) @@ -1052,6 +1065,70 @@ out: return retval; } +static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags, int create_mode) +{ + struct file *filp = get_empty_filp(); + int err; + + if (filp == NULL) + return -ENFILE; + nd->intent.open.file = filp; + nd->intent.open.flags = open_flags; + nd->intent.open.create_mode = create_mode; + err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd); + if (IS_ERR(nd->intent.open.file)) { + if (err == 0) { + err = PTR_ERR(nd->intent.open.file); + path_release(nd); + } + } else if (err != 0) + release_open_intent(nd); + return err; +} + +/** + * path_lookup_open - lookup a file path with open intent + * @name: pointer to file name + * @lookup_flags: lookup intent flags + * @nd: pointer to nameidata + * @open_flags: open intent flags + */ +int path_lookup_open(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags) +{ + return __path_lookup_intent_open(name, lookup_flags, nd, + open_flags, 0); +} + +/** + * path_lookup_create - lookup a file path with open + create intent + * @name: pointer to file name + * @lookup_flags: lookup intent flags + * @nd: pointer to nameidata + * @open_flags: open intent flags + * @create_mode: create intent flags + */ +int path_lookup_create(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags, int create_mode) +{ + return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd, + open_flags, create_mode); +} + +int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags) +{ + char *tmp = getname(name); + int err = PTR_ERR(tmp); + + if (!IS_ERR(tmp)) { + err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0); + putname(tmp); + } + return err; +} + /* * Restricted form of lookup. Doesn't follow links, single-component only, * needs parent already locked. Doesn't follow mounts. @@ -1212,6 +1289,7 @@ static inline int may_delete(struct inod return -ENOENT; if (victim->d_flags & DCACHE_NFSFS_RENAMED) return -EBUSY; + dentry_break_lease(victim, O_NONBLOCK); /*XXX: ignoring -EAGAIN */ return 0; } @@ -1416,27 +1494,27 @@ int may_open(struct nameidata *nd, int a */ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) { - int acc_mode, error = 0; + int acc_mode, error; struct path path; struct dentry *dir; int count = 0; acc_mode = ACC_MODE(flag); + /* O_TRUNC implies we need access checks for write permissions */ + if (flag & O_TRUNC) + acc_mode |= MAY_WRITE; + /* Allow the LSM permission hook to distinguish append access from general write access. */ if (flag & O_APPEND) acc_mode |= MAY_APPEND; - /* Fill in the open() intent data */ - nd->intent.open.flags = flag; - nd->intent.open.create_mode = mode; - /* * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { - error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); + error = path_lookup_open(pathname, lookup_flags(flag), nd, flag); if (error) return error; goto ok; @@ -1445,7 +1523,7 @@ int open_namei(const char * pathname, in /* * Create - we need to know the parent. */ - error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); + error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode); if (error) return error; @@ -1520,6 +1598,8 @@ ok: exit_dput: dput_path(&path, nd); exit: + if (!IS_ERR(nd->intent.open.file)) + release_open_intent(nd); path_release(nd); return error; diff -puN fs/nfs/delegation.c~CITI_NFS4_ALL fs/nfs/delegation.c --- linux-2.6.14/fs/nfs/delegation.c~CITI_NFS4_ALL 2005-11-07 13:26:14.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/delegation.c 2005-11-07 13:26:18.000000000 -0500 @@ -31,11 +31,42 @@ static void nfs_free_delegation(struct n kfree(delegation); } +static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct file_lock *fl; + int status; + + for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { + if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) + continue; + if ((struct nfs_open_context *)fl->fl_file->private_data != ctx) + continue; + status = nfs4_lock_delegation_recall(state, fl); + if (status >= 0) + continue; + switch (status) { + default: + printk(KERN_ERR "%s: unhandled error %d.\n", + __FUNCTION__, status); + case -NFS4ERR_EXPIRED: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + case -NFS4ERR_STALE_CLIENTID: + nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state); + goto out_err; + } + } + return 0; +out_err: + return status; +} + static void nfs_delegation_claim_opens(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *ctx; struct nfs4_state *state; + int err; again: spin_lock(&inode->i_lock); @@ -47,9 +78,12 @@ again: continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); - if (nfs4_open_delegation_recall(ctx->dentry, state) < 0) - return; + err = nfs4_open_delegation_recall(ctx->dentry, state); + if (err >= 0) + err = nfs_delegation_claim_locks(ctx, state); put_nfs_open_context(ctx); + if (err != 0) + return; goto again; } spin_unlock(&inode->i_lock); @@ -142,7 +176,7 @@ static void nfs_msync_inode(struct inode /* * Basic procedure for returning a delegation to the server */ -int nfs_inode_return_delegation(struct inode *inode) +int __nfs_inode_return_delegation(struct inode *inode) { struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); diff -puN fs/nfs/delegation.h~CITI_NFS4_ALL fs/nfs/delegation.h --- linux-2.6.14/fs/nfs/delegation.h~CITI_NFS4_ALL 2005-11-07 13:26:14.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/delegation.h 2005-11-07 13:26:18.000000000 -0500 @@ -25,7 +25,7 @@ struct nfs_delegation { int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -int nfs_inode_return_delegation(struct inode *inode); +int __nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); @@ -38,6 +38,7 @@ void nfs_delegation_reap_unclaimed(struc /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); +int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); static inline int nfs_have_delegation(struct inode *inode, int flags) { @@ -47,11 +48,25 @@ static inline int nfs_have_delegation(st return 1; return 0; } + +static inline int nfs_inode_return_delegation(struct inode *inode) +{ + int err = 0; + + if (NFS_I(inode)->delegation != NULL) + err = __nfs_inode_return_delegation(inode); + return err; +} #else static inline int nfs_have_delegation(struct inode *inode, int flags) { return 0; } + +static inline int nfs_inode_return_delegation(struct inode *inode) +{ + return 0; +} #endif #endif diff -puN fs/nfs/dir.c~CITI_NFS4_ALL fs/nfs/dir.c --- linux-2.6.14/fs/nfs/dir.c~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/dir.c 2005-11-07 13:26:18.000000000 -0500 @@ -532,6 +532,7 @@ static int nfs_readdir(struct file *filp my_entry.eof = 0; my_entry.fh = &fh; my_entry.fattr = &fattr; + nfs_fattr_init(&fattr); desc->entry = &my_entry; while(!desc->entry->eof) { @@ -565,8 +566,6 @@ static int nfs_readdir(struct file *filp } } unlock_kernel(); - if (desc->error < 0) - return desc->error; if (res < 0) return res; return 0; @@ -803,6 +802,7 @@ static int nfs_dentry_delete(struct dent */ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { + nfs_inode_return_delegation(inode); if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { lock_kernel(); inode->i_nlink--; @@ -853,12 +853,6 @@ static struct dentry *nfs_lookup(struct dentry->d_op = NFS_PROTO(dir)->dentry_ops; lock_kernel(); - /* Revalidate parent directory attribute cache */ - error = nfs_revalidate_inode(NFS_SERVER(dir), dir); - if (error < 0) { - res = ERR_PTR(error); - goto out_unlock; - } /* If we're doing an exclusive create, optimize away the lookup */ if (nfs_is_exclusive_create(dir, nd)) @@ -916,7 +910,6 @@ static int is_atomic_open(struct inode * static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *res = NULL; - struct inode *inode = NULL; int error; /* Check that we are indeed trying to open this file */ @@ -930,8 +923,10 @@ static struct dentry *nfs_atomic_lookup( dentry->d_op = NFS_PROTO(dir)->dentry_ops; /* Let vfs_create() deal with O_EXCL */ - if (nd->intent.open.flags & O_EXCL) - goto no_entry; + if (nd->intent.open.flags & O_EXCL) { + d_add(dentry, NULL); + goto out; + } /* Open the file on the server */ lock_kernel(); @@ -945,32 +940,30 @@ static struct dentry *nfs_atomic_lookup( if (nd->intent.open.flags & O_CREAT) { nfs_begin_data_update(dir); - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); nfs_end_data_update(dir); } else - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); + if (IS_ERR(res)) { + error = PTR_ERR(res); switch (error) { /* Make a negative dentry */ case -ENOENT: - inode = NULL; - break; + res = NULL; + goto out; /* This turned out not to be a regular file */ + case -EISDIR: + case -ENOTDIR: + goto no_open; case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) goto no_open; - /* case -EISDIR: */ /* case -EINVAL: */ default: - res = ERR_PTR(error); goto out; } - } -no_entry: - res = d_add_unique(dentry, inode); - if (res != NULL) + } else if (res != NULL) dentry = res; nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -1014,7 +1007,7 @@ static int nfs_open_revalidate(struct de */ lock_kernel(); verifier = nfs_save_change_attribute(dir); - ret = nfs4_open_revalidate(dir, dentry, openflags); + ret = nfs4_open_revalidate(dir, dentry, openflags, nd); if (!ret) nfs_set_verifier(dentry, verifier); unlock_kernel(); @@ -1137,7 +1130,7 @@ static int nfs_create(struct inode *dir, lock_kernel(); nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); nfs_end_data_update(dir); if (error != 0) goto out_err; @@ -1264,6 +1257,9 @@ dentry->d_parent->d_name.name, dentry->d sprintf(silly, ".nfs%*.*lx", i_inosize, i_inosize, dentry->d_inode->i_ino); + /* Return delegation in anticipation of the rename */ + nfs_inode_return_delegation(dentry->d_inode); + sdentry = NULL; do { char *suffix = silly + slen - countersize; @@ -1332,6 +1328,7 @@ static int nfs_safe_remove(struct dentry nfs_begin_data_update(dir); if (inode != NULL) { + nfs_inode_return_delegation(inode); nfs_begin_data_update(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ @@ -1438,17 +1435,14 @@ nfs_link(struct dentry *old_dentry, stru old_dentry->d_parent->d_name.name, old_dentry->d_name.name, dentry->d_parent->d_name.name, dentry->d_name.name); - /* - * Drop the dentry in advance to force a new lookup. - * Since nfs_proc_link doesn't return a file handle, - * we can't use the existing dentry. - */ lock_kernel(); - d_drop(dentry); - nfs_begin_data_update(dir); nfs_begin_data_update(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); + if (error == 0) { + atomic_inc(&inode->i_count); + d_instantiate(dentry, inode); + } nfs_end_data_update(inode); nfs_end_data_update(dir); unlock_kernel(); @@ -1512,9 +1506,11 @@ static int nfs_rename(struct inode *old_ */ if (!new_inode) goto go_ahead; - if (S_ISDIR(new_inode->i_mode)) - goto out; - else if (atomic_read(&new_dentry->d_count) > 2) { + if (S_ISDIR(new_inode->i_mode)) { + error = -EISDIR; + if (!S_ISDIR(old_inode->i_mode)) + goto out; + } else if (atomic_read(&new_dentry->d_count) > 2) { int err; /* copy the target dentry's name */ dentry = d_alloc(new_dentry->d_parent, @@ -1539,7 +1535,8 @@ static int nfs_rename(struct inode *old_ #endif goto out; } - } + } else + new_inode->i_nlink--; go_ahead: /* @@ -1549,6 +1546,7 @@ go_ahead: nfs_wb_all(old_inode); shrink_dcache_parent(old_dentry); } + nfs_inode_return_delegation(old_inode); if (new_inode) d_delete(new_dentry); diff -puN fs/nfs/direct.c~CITI_NFS4_ALL fs/nfs/direct.c --- linux-2.6.14/fs/nfs/direct.c~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/direct.c 2005-11-07 13:26:18.000000000 -0500 @@ -655,7 +655,6 @@ nfs_file_direct_read(struct kiocb *iocb, struct file *file = iocb->ki_filp; struct nfs_open_context *ctx = (struct nfs_open_context *) file->private_data; - struct dentry *dentry = file->f_dentry; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct iovec iov = { @@ -664,7 +663,8 @@ nfs_file_direct_read(struct kiocb *iocb, }; dprintk("nfs: direct read(%s/%s, %lu@%lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, + file->f_dentry->d_parent->d_name.name, + file->f_dentry->d_name.name, (unsigned long) count, (unsigned long) pos); if (!is_sync_kiocb(iocb)) @@ -730,7 +730,6 @@ nfs_file_direct_write(struct kiocb *iocb struct file *file = iocb->ki_filp; struct nfs_open_context *ctx = (struct nfs_open_context *) file->private_data; - struct dentry *dentry = file->f_dentry; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct iovec iov = { @@ -739,8 +738,9 @@ nfs_file_direct_write(struct kiocb *iocb }; dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - inode->i_ino, (unsigned long) count, (unsigned long) pos); + file->f_dentry->d_parent->d_name.name, + file->f_dentry->d_name.name, inode->i_ino, + (unsigned long) count, (unsigned long) pos); if (!is_sync_kiocb(iocb)) goto out; diff -puN fs/nfs/file.c~CITI_NFS4_ALL fs/nfs/file.c --- linux-2.6.14/fs/nfs/file.c~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/file.c 2005-11-07 13:26:21.000000000 -0500 @@ -205,8 +205,8 @@ nfs_file_flush(struct file *file) if (!status) { status = ctx->error; ctx->error = 0; - if (!status && !nfs_have_delegation(inode, FMODE_READ)) - __nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (!status) + nfs_revalidate_inode(NFS_SERVER(inode), inode); } unlock_kernel(); return status; @@ -376,22 +376,30 @@ out_swapfile: static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) { + struct file_lock cfl; struct inode *inode = filp->f_mapping->host; int status = 0; lock_kernel(); - /* Use local locking if mounted with "-onolock" */ - if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) - status = NFS_PROTO(inode)->lock(filp, cmd, fl); - else { - struct file_lock *cfl = posix_test_lock(filp, fl); - - fl->fl_type = F_UNLCK; - if (cfl != NULL) - memcpy(fl, cfl, sizeof(*fl)); + /* Try local locking first */ + if (posix_test_lock(filp, fl, &cfl)) { + locks_copy_lock(fl, &cfl); + goto out; } + + if (nfs_have_delegation(inode, FMODE_READ)) + goto out_noconflict; + + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) + goto out_noconflict; + + status = NFS_PROTO(inode)->lock(filp, cmd, fl); +out: unlock_kernel(); return status; +out_noconflict: + fl->fl_type = F_UNLCK; + goto out; } static int do_vfs_lock(struct file *file, struct file_lock *fl) diff -puN fs/nfs/inode.c~CITI_NFS4_ALL fs/nfs/inode.c --- linux-2.6.14/fs/nfs/inode.c~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/inode.c 2005-11-07 13:26:18.000000000 -0500 @@ -358,6 +358,35 @@ out_no_root: return no_root_error; } +static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) +{ + to->to_initval = timeo * HZ / 10; + to->to_retries = retrans; + if (!to->to_retries) + to->to_retries = 2; + + switch (proto) { + case IPPROTO_TCP: + if (!to->to_initval) + to->to_initval = 60 * HZ; + if (to->to_initval > NFS_MAX_TCP_TIMEOUT) + to->to_initval = NFS_MAX_TCP_TIMEOUT; + to->to_increment = to->to_initval; + to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); + to->to_exponential = 0; + break; + case IPPROTO_UDP: + default: + if (!to->to_initval) + to->to_initval = 11 * HZ / 10; + if (to->to_initval > NFS_MAX_UDP_TIMEOUT) + to->to_initval = NFS_MAX_UDP_TIMEOUT; + to->to_maxval = NFS_MAX_UDP_TIMEOUT; + to->to_exponential = 1; + break; + } +} + /* * Create an RPC client handle. */ @@ -367,22 +396,12 @@ nfs_create_client(struct nfs_server *ser struct rpc_timeout timeparms; struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; - int tcp = (data->flags & NFS_MOUNT_TCP); + int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; - /* Initialize timeout values */ - timeparms.to_initval = data->timeo * HZ / 10; - timeparms.to_retries = data->retrans; - timeparms.to_maxval = tcp ? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT; - timeparms.to_exponential = 1; - - if (!timeparms.to_initval) - timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10; - if (!timeparms.to_retries) - timeparms.to_retries = 5; + nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); /* create transport and client */ - xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, - &server->addr, &timeparms); + xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { dprintk("%s: cannot create RPC transport. Error = %ld\n", __FUNCTION__, PTR_ERR(xprt)); @@ -576,7 +595,6 @@ static int nfs_show_options(struct seq_f { NFS_MOUNT_SOFT, ",soft", ",hard" }, { NFS_MOUNT_INTR, ",intr", "" }, { NFS_MOUNT_POSIX, ",posix", "" }, - { NFS_MOUNT_TCP, ",tcp", ",udp" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", ",lock" }, @@ -585,6 +603,8 @@ static int nfs_show_options(struct seq_f }; struct proc_nfs_info *nfs_infop; struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + char buf[12]; + char *proto; seq_printf(m, ",v%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); @@ -603,6 +623,18 @@ static int nfs_show_options(struct seq_f else seq_puts(m, nfs_infop->nostr); } + switch (nfss->client->cl_xprt->prot) { + case IPPROTO_TCP: + proto = "tcp"; + break; + case IPPROTO_UDP: + proto = "udp"; + break; + default: + snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot); + proto = buf; + } + seq_printf(m, ",proto=%s", proto); seq_puts(m, ",addr="); seq_escape(m, nfss->hostname, " \t\n\\"); return 0; @@ -753,7 +785,8 @@ nfs_fhget(struct super_block *sb, struct else init_special_inode(inode, inode->i_mode, fattr->rdev); - nfsi->read_cache_jiffies = fattr->timestamp; + nfsi->read_cache_jiffies = fattr->time_start; + nfsi->last_updated = jiffies; inode->i_atime = fattr->atime; inode->i_mtime = fattr->mtime; inode->i_ctime = fattr->ctime; @@ -821,6 +854,11 @@ nfs_setattr(struct dentry *dentry, struc filemap_fdatawait(inode->i_mapping); nfs_wb_all(inode); } + /* + * Return any delegations if we're going to change ACLs + */ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) + nfs_inode_return_delegation(inode); error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error == 0) nfs_refresh_inode(inode, &fattr); @@ -971,13 +1009,18 @@ void nfs_file_set_open_context(struct fi spin_unlock(&inode->i_lock); } -struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode) +/* + * Given an inode, search for an open context with the desired characteristics + */ +struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *pos, *ctx = NULL; spin_lock(&inode->i_lock); list_for_each_entry(pos, &nfsi->open_files, list) { + if (cred != NULL && pos->cred != cred) + continue; if ((pos->mode & mode) == mode) { ctx = get_nfs_open_context(pos); break; @@ -1019,15 +1062,11 @@ int nfs_open(struct inode *inode, struct ctx->mode = filp->f_mode; nfs_file_set_open_context(filp, ctx); put_nfs_open_context(ctx); - if ((filp->f_mode & FMODE_WRITE) != 0) - nfs_begin_data_update(inode); return 0; } int nfs_release(struct inode *inode, struct file *filp) { - if ((filp->f_mode & FMODE_WRITE) != 0) - nfs_end_data_update(inode); nfs_file_clear_open_context(filp); return 0; } @@ -1083,14 +1122,15 @@ __nfs_revalidate_inode(struct nfs_server goto out; } + spin_lock(&inode->i_lock); status = nfs_update_inode(inode, &fattr, verifier); if (status) { + spin_unlock(&inode->i_lock); dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), status); goto out; } - spin_lock(&inode->i_lock); cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; @@ -1098,7 +1138,7 @@ __nfs_revalidate_inode(struct nfs_server * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). */ - if (verifier == nfsi->cache_change_attribute) + if (time_after_eq(verifier, nfsi->cache_change_attribute)) nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); spin_unlock(&inode->i_lock); @@ -1165,7 +1205,7 @@ void nfs_revalidate_mapping(struct inode if (S_ISDIR(inode->i_mode)) { memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); /* This ensures we revalidate child dentries */ - nfsi->cache_change_attribute++; + nfsi->cache_change_attribute = jiffies; } spin_unlock(&inode->i_lock); @@ -1197,20 +1237,19 @@ void nfs_end_data_update(struct inode *i struct nfs_inode *nfsi = NFS_I(inode); if (!nfs_have_delegation(inode, FMODE_READ)) { - /* Mark the attribute cache for revalidation */ - spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + /* Directories and symlinks: invalidate page cache */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { + spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_DATA; - spin_unlock(&inode->i_lock); + spin_unlock(&inode->i_lock); + } } - nfsi->cache_change_attribute ++; + nfsi->cache_change_attribute = jiffies; atomic_dec(&nfsi->data_updates); } /** - * nfs_refresh_inode - verify consistency of the inode attribute cache + * nfs_check_inode_attributes - verify consistency of the inode attribute cache * @inode - pointer to inode * @fattr - updated attributes * @@ -1218,13 +1257,12 @@ void nfs_end_data_update(struct inode *i * so that fattr carries weak cache consistency data, then it may * also update the ctime/mtime/change_attribute. */ -int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; int data_unstable; - spin_lock(&inode->i_lock); /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); @@ -1241,14 +1279,12 @@ int nfs_refresh_inode(struct inode *inod } if ((fattr->valid & NFS_ATTR_FATTR) == 0) { - spin_unlock(&inode->i_lock); return 0; } /* Has the inode gone and changed behind our back? */ if (nfsi->fileid != fattr->fileid || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { - spin_unlock(&inode->i_lock); return -EIO; } @@ -1288,11 +1324,67 @@ int nfs_refresh_inode(struct inode *inod if (!timespec_equal(&inode->i_atime, &fattr->atime)) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; - nfsi->read_cache_jiffies = fattr->timestamp; - spin_unlock(&inode->i_lock); + nfsi->read_cache_jiffies = fattr->time_start; return 0; } +/** + * nfs_refresh_inode - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * Check that an RPC call that returned attributes has not overlapped with + * other recent updates of the inode metadata, then decide whether it is + * safe to do a full update of the inode attributes, or whether just to + * call nfs_check_inode_attributes. + */ +int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int status; + + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + spin_lock(&inode->i_lock); + nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; + if (nfs_verify_change_attribute(inode, fattr->time_start)) + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + if (time_after(fattr->time_start, nfsi->last_updated)) + status = nfs_update_inode(inode, fattr, fattr->time_start); + else + status = nfs_check_inode_attributes(inode, fattr); + + spin_unlock(&inode->i_lock); + return status; +} + +/** + * nfs_post_op_update_inode - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * After an operation that has changed the inode metadata, mark the + * attribute cache as being invalid, then try to update it. + */ +int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int status = 0; + + spin_lock(&inode->i_lock); + if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { + nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + goto out; + } + status = nfs_update_inode(inode, fattr, fattr->time_start); + if (time_after_eq(fattr->time_start, nfsi->cache_change_attribute)) + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); + nfsi->cache_change_attribute = jiffies; +out: + spin_unlock(&inode->i_lock); + return status; +} + /* * Many nfs protocol calls return the new file attributes after * an operation. Here we update the inode to reflect the state @@ -1328,20 +1420,17 @@ static int nfs_update_inode(struct inode goto out_err; } - spin_lock(&inode->i_lock); - /* * Make sure the inode's type hasn't changed. */ - if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { - spin_unlock(&inode->i_lock); + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; - } /* * Update the read time so we don't revalidate too often. */ - nfsi->read_cache_jiffies = fattr->timestamp; + nfsi->read_cache_jiffies = fattr->time_start; + nfsi->last_updated = jiffies; /* Are we racing with known updates of the metadata on the server? */ data_unstable = ! (nfs_verify_change_attribute(inode, verifier) || @@ -1354,7 +1443,7 @@ static int nfs_update_inode(struct inode /* Do we perhaps have any outstanding writes? */ if (nfsi->npages == 0) { /* No, but did we race with nfs_end_data_update()? */ - if (verifier == nfsi->cache_change_attribute) { + if (time_after_eq(verifier, nfsi->cache_change_attribute)) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_DATA; } @@ -1430,7 +1519,6 @@ static int nfs_update_inode(struct inode if (!nfs_have_delegation(inode, FMODE_READ)) nfsi->cache_validity |= invalid; - spin_unlock(&inode->i_lock); return 0; out_changed: /* @@ -1639,8 +1727,7 @@ static void nfs4_clear_inode(struct inod struct nfs_inode *nfsi = NFS_I(inode); /* If we are holding a delegation, return it! */ - if (nfsi->delegation != NULL) - nfs_inode_return_delegation(inode); + nfs_inode_return_delegation(inode); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); /* Now clear out any remaining state */ @@ -1669,7 +1756,7 @@ static int nfs4_fill_super(struct super_ struct rpc_clnt *clnt = NULL; struct rpc_timeout timeparms; rpc_authflavor_t authflavour; - int proto, err = -EIO; + int err = -EIO; sb->s_blocksize_bits = 0; sb->s_blocksize = 0; @@ -1687,30 +1774,8 @@ static int nfs4_fill_super(struct super_ server->acdirmax = data->acdirmax*HZ; server->rpc_ops = &nfs_v4_clientops; - /* Initialize timeout values */ - - timeparms.to_initval = data->timeo * HZ / 10; - timeparms.to_retries = data->retrans; - timeparms.to_exponential = 1; - if (!timeparms.to_retries) - timeparms.to_retries = 5; - proto = data->proto; - /* Which IP protocol do we use? */ - switch (proto) { - case IPPROTO_TCP: - timeparms.to_maxval = RPC_MAX_TCP_TIMEOUT; - if (!timeparms.to_initval) - timeparms.to_initval = 600 * HZ / 10; - break; - case IPPROTO_UDP: - timeparms.to_maxval = RPC_MAX_UDP_TIMEOUT; - if (!timeparms.to_initval) - timeparms.to_initval = 11 * HZ / 10; - break; - default: - return -EINVAL; - } + nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { @@ -1735,7 +1800,7 @@ static int nfs4_fill_super(struct super_ down_write(&clp->cl_sem); if (IS_ERR(clp->cl_rpcclient)) { - xprt = xprt_create_proto(proto, &server->addr, &timeparms); + xprt = xprt_create_proto(data->proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { up_write(&clp->cl_sem); err = PTR_ERR(xprt); diff -puN fs/nfs/nfs2xdr.c~CITI_NFS4_ALL fs/nfs/nfs2xdr.c --- linux-2.6.14/fs/nfs/nfs2xdr.c~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/nfs2xdr.c 2005-11-07 13:26:18.000000000 -0500 @@ -143,7 +143,6 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; fattr->rdev = 0; } - fattr->timestamp = jiffies; return p; } diff -puN fs/nfs/nfs3proc.c~CITI_NFS4_ALL fs/nfs/nfs3proc.c --- linux-2.6.14/fs/nfs/nfs3proc.c~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/nfs3proc.c 2005-11-07 13:26:28.000000000 -0500 @@ -68,27 +68,39 @@ nfs3_async_handle_jukebox(struct rpc_tas return 1; } -/* - * Bare-bones access to getattr: this is for nfs_read_super. - */ static int -nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) +do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) { int status; dprintk("%s: call fsinfo\n", __FUNCTION__); - info->fattr->valid = 0; - status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); + nfs_fattr_init(info->fattr); + status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0); dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); if (!(info->fattr->valid & NFS_ATTR_FATTR)) { - status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0); + status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); } return status; } /* + * Bare-bones access to getattr: this is for nfs_read_super. + */ +static int +nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + status = do_proc_get_root(server->client, fhandle, info); + if (status && server->client_sys != server->client) + status = do_proc_get_root(server->client_sys, fhandle, info); + return status; +} + +/* * One function for each procedure in the NFS protocol. */ static int @@ -98,7 +110,7 @@ nfs3_proc_getattr(struct nfs_server *ser int status; dprintk("NFS call getattr\n"); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call(server->client, NFS3PROC_GETATTR, fhandle, fattr, 0); dprintk("NFS reply getattr: %d\n", status); @@ -117,7 +129,7 @@ nfs3_proc_setattr(struct dentry *dentry, int status; dprintk("NFS call setattr\n"); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); if (status == 0) nfs_setattr_update_inode(inode, sattr); @@ -143,8 +155,8 @@ nfs3_proc_lookup(struct inode *dir, stru int status; dprintk("NFS call lookup %s\n", name->name); - dir_attr.valid = 0; - fattr->valid = 0; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0); if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, @@ -174,7 +186,6 @@ static int nfs3_proc_access(struct inode int status; dprintk("NFS call access\n"); - fattr.valid = 0; if (mode & MAY_READ) arg.access |= NFS3_ACCESS_READ; @@ -189,6 +200,7 @@ static int nfs3_proc_access(struct inode if (mode & MAY_EXEC) arg.access |= NFS3_ACCESS_EXECUTE; } + nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); if (status == 0) { @@ -217,7 +229,7 @@ static int nfs3_proc_readlink(struct ino int status; dprintk("NFS call readlink\n"); - fattr.valid = 0; + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK, &args, &fattr, 0); nfs_refresh_inode(inode, &fattr); @@ -240,7 +252,7 @@ static int nfs3_proc_read(struct nfs_rea dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -263,10 +275,10 @@ static int nfs3_proc_write(struct nfs_wr dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); if (status >= 0) - nfs_refresh_inode(inode, fattr); + nfs_post_op_update_inode(inode, fattr); dprintk("NFS reply write: %d\n", status); return status < 0? status : wdata->res.count; } @@ -285,10 +297,10 @@ static int nfs3_proc_commit(struct nfs_w dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, (long long) cdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status >= 0) - nfs_refresh_inode(inode, fattr); + nfs_post_op_update_inode(inode, fattr); dprintk("NFS reply commit: %d\n", status); return status; } @@ -299,7 +311,7 @@ static int nfs3_proc_commit(struct nfs_w */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; @@ -329,10 +341,10 @@ nfs3_proc_create(struct inode *dir, stru sattr->ia_mode &= ~current->fs->umask; again: - dir_attr.valid = 0; - fattr.valid = 0; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); /* If the server doesn't support the exclusive creation semantics, * try again with simple 'guarded' mode. */ @@ -401,9 +413,9 @@ nfs3_proc_remove(struct inode *dir, stru int status; dprintk("NFS call remove %s\n", name->name); - dir_attr.valid = 0; + nfs_fattr_init(&dir_attr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply remove: %d\n", status); return status; } @@ -422,7 +434,7 @@ nfs3_proc_unlink_setup(struct rpc_messag ptr->arg.fh = NFS_FH(dir->d_inode); ptr->arg.name = name->name; ptr->arg.len = name->len; - ptr->res.valid = 0; + nfs_fattr_init(&ptr->res); msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; msg->rpc_argp = &ptr->arg; msg->rpc_resp = &ptr->res; @@ -439,7 +451,7 @@ nfs3_proc_unlink_done(struct dentry *dir return 1; if (msg->rpc_argp) { dir_attr = (struct nfs_fattr*)msg->rpc_resp; - nfs_refresh_inode(dir->d_inode, dir_attr); + nfs_post_op_update_inode(dir->d_inode, dir_attr); kfree(msg->rpc_argp); } return 0; @@ -465,11 +477,11 @@ nfs3_proc_rename(struct inode *old_dir, int status; dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); - old_dir_attr.valid = 0; - new_dir_attr.valid = 0; + nfs_fattr_init(&old_dir_attr); + nfs_fattr_init(&new_dir_attr); status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0); - nfs_refresh_inode(old_dir, &old_dir_attr); - nfs_refresh_inode(new_dir, &new_dir_attr); + nfs_post_op_update_inode(old_dir, &old_dir_attr); + nfs_post_op_update_inode(new_dir, &new_dir_attr); dprintk("NFS reply rename: %d\n", status); return status; } @@ -491,11 +503,11 @@ nfs3_proc_link(struct inode *inode, stru int status; dprintk("NFS call link %s\n", name->name); - dir_attr.valid = 0; - fattr.valid = 0; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0); - nfs_refresh_inode(dir, &dir_attr); - nfs_refresh_inode(inode, &fattr); + nfs_post_op_update_inode(dir, &dir_attr); + nfs_post_op_update_inode(inode, &fattr); dprintk("NFS reply link: %d\n", status); return status; } @@ -524,10 +536,10 @@ nfs3_proc_symlink(struct inode *dir, str if (path->len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; dprintk("NFS call symlink %s -> %s\n", name->name, path->name); - dir_attr.valid = 0; - fattr->valid = 0; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply symlink: %d\n", status); return status; } @@ -552,13 +564,13 @@ nfs3_proc_mkdir(struct inode *dir, struc int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); - dir_attr.valid = 0; - fattr.valid = 0; sattr->ia_mode &= ~current->fs->umask; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); if (status != 0) goto out; status = nfs_instantiate(dentry, &fhandle, &fattr); @@ -582,9 +594,9 @@ nfs3_proc_rmdir(struct inode *dir, struc int status; dprintk("NFS call rmdir %s\n", name->name); - dir_attr.valid = 0; + nfs_fattr_init(&dir_attr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply rmdir: %d\n", status); return status; } @@ -634,7 +646,7 @@ nfs3_proc_readdir(struct dentry *dentry, dprintk("NFS call readdir%s %d\n", plus? "plus" : "", (unsigned int) cookie); - dir_attr.valid = 0; + nfs_fattr_init(&dir_attr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_refresh_inode(dir, &dir_attr); dprintk("NFS reply readdir: %d\n", status); @@ -676,10 +688,10 @@ nfs3_proc_mknod(struct inode *dir, struc sattr->ia_mode &= ~current->fs->umask; - dir_attr.valid = 0; - fattr.valid = 0; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); if (status != 0) goto out; status = nfs_instantiate(dentry, &fh, &fattr); @@ -698,7 +710,7 @@ nfs3_proc_statfs(struct nfs_server *serv int status; dprintk("NFS call fsstat\n"); - stat->fattr->valid = 0; + nfs_fattr_init(stat->fattr); status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); dprintk("NFS reply statfs: %d\n", status); return status; @@ -711,7 +723,7 @@ nfs3_proc_fsinfo(struct nfs_server *serv int status; dprintk("NFS call fsinfo\n"); - info->fattr->valid = 0; + nfs_fattr_init(info->fattr); status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); dprintk("NFS reply fsinfo: %d\n", status); return status; @@ -724,7 +736,7 @@ nfs3_proc_pathconf(struct nfs_server *se int status; dprintk("NFS call pathconf\n"); - info->fattr->valid = 0; + nfs_fattr_init(info->fattr); status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); dprintk("NFS reply pathconf: %d\n", status); return status; @@ -735,7 +747,7 @@ extern u32 *nfs3_decode_dirent(u32 *, st static void nfs3_read_done(struct rpc_task *task) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; if (nfs3_async_handle_jukebox(task)) return; @@ -775,7 +787,7 @@ nfs3_write_done(struct rpc_task *task) return; data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs_refresh_inode(data->inode, data->res.fattr); + nfs_post_op_update_inode(data->inode, data->res.fattr); nfs_writeback_done(task); } @@ -819,7 +831,7 @@ nfs3_commit_done(struct rpc_task *task) return; data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs_refresh_inode(data->inode, data->res.fattr); + nfs_post_op_update_inode(data->inode, data->res.fattr); nfs_commit_done(task); } diff -puN fs/nfs/nfs3xdr.c~CITI_NFS4_ALL fs/nfs/nfs3xdr.c --- linux-2.6.14/fs/nfs/nfs3xdr.c~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/nfs3xdr.c 2005-11-07 13:26:18.000000000 -0500 @@ -174,7 +174,6 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt /* Update the mode bits */ fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3); - fattr->timestamp = jiffies; return p; } diff -puN fs/nfs/nfs4_fs.h~CITI_NFS4_ALL fs/nfs/nfs4_fs.h --- linux-2.6.14/fs/nfs/nfs4_fs.h~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/nfs4_fs.h 2005-11-07 13:26:18.000000000 -0500 @@ -93,25 +93,50 @@ struct nfs4_client { }; /* + * struct rpc_sequence ensures that RPC calls are sent in the exact + * order that they appear on the list. + */ +struct rpc_sequence { + struct rpc_wait_queue wait; /* RPC call delay queue */ + spinlock_t lock; /* Protects the list */ + struct list_head list; /* Defines sequence of RPC calls */ +}; + +#define NFS_SEQID_CONFIRMED 1 +struct nfs_seqid_counter { + struct rpc_sequence *sequence; + int flags; + u32 counter; +}; + +struct nfs_seqid { + struct nfs_seqid_counter *sequence; + struct list_head list; +}; + +static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) +{ + if (seqid_mutating_err(-status)) + seqid->flags |= NFS_SEQID_CONFIRMED; +} + +/* * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only * semantics by allowing the server to identify replayed requests. - * - * The ->so_sema is held during all state_owner seqid-mutating operations: - * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize - * so_seqid. */ struct nfs4_state_owner { + spinlock_t so_lock; struct list_head so_list; /* per-clientid list of state_owners */ struct nfs4_client *so_client; u32 so_id; /* 32-bit identifier, unique */ - struct semaphore so_sema; - u32 so_seqid; /* protected by so_sema */ atomic_t so_count; struct rpc_cred *so_cred; /* Associated cred */ struct list_head so_states; struct list_head so_delegations; + struct nfs_seqid_counter so_seqid; + struct rpc_sequence so_sequence; }; /* @@ -132,7 +157,7 @@ struct nfs4_lock_state { fl_owner_t ls_owner; /* POSIX lock owner */ #define NFS_LOCK_INITIALIZED 1 int ls_flags; - u32 ls_seqid; + struct nfs_seqid_counter ls_seqid; u32 ls_id; nfs4_stateid ls_stateid; atomic_t ls_count; @@ -153,7 +178,6 @@ struct nfs4_state { struct inode *inode; /* Pointer to the inode */ unsigned long flags; /* Do we hold any locks? */ - struct semaphore lock_sema; /* Serializes file locking operations */ spinlock_t state_lock; /* Protects the lock_states list */ nfs4_stateid stateid; @@ -190,9 +214,9 @@ extern int nfs4_proc_setclientid(struct extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *); -extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); -extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); -extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); +extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; @@ -223,13 +247,18 @@ extern void nfs4_drop_state_owner(struct extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); -extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); -extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); +extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter); +extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); +extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); +extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); +extern void nfs_free_seqid(struct nfs_seqid *seqid); + extern const nfs4_stateid zero_stateid; /* nfs4xdr.c */ diff -puN fs/nfs/nfs4proc.c~CITI_NFS4_ALL fs/nfs/nfs4proc.c --- linux-2.6.14/fs/nfs/nfs4proc.c~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/nfs4proc.c 2005-11-07 13:26:18.000000000 -0500 @@ -47,6 +47,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -56,10 +57,11 @@ #define NFS4_POLL_RETRY_MIN (1*HZ) #define NFS4_POLL_RETRY_MAX (15*HZ) +static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); +static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); -static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception); +static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; @@ -185,8 +187,26 @@ static void update_changeattr(struct ino { struct nfs_inode *nfsi = NFS_I(inode); + spin_lock(&inode->i_lock); + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (cinfo->before == nfsi->change_attr && cinfo->atomic) nfsi->change_attr = cinfo->after; + spin_unlock(&inode->i_lock); +} + +/* Helper for asynchronous RPC calls */ +static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin, + rpc_action tk_exit, void *calldata) +{ + struct rpc_task *task; + + if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC))) + return -ENOMEM; + + task->tk_calldata = calldata; + task->tk_action = tk_begin; + rpc_execute(task); + return 0; } static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) @@ -194,22 +214,22 @@ static void update_open_stateid(struct n struct inode *inode = state->inode; open_flags &= (FMODE_READ|FMODE_WRITE); - /* Protect against nfs4_find_state() */ + /* Protect against nfs4_find_state_byowner() */ + spin_lock(&state->owner->so_lock); spin_lock(&inode->i_lock); - state->state |= open_flags; - /* NB! List reordering - see the reclaim code for why. */ - if ((open_flags & FMODE_WRITE) && 0 == state->nwriters++) - list_move(&state->open_states, &state->owner->so_states); + memcpy(&state->stateid, stateid, sizeof(state->stateid)); + if ((open_flags & FMODE_WRITE)) + state->nwriters++; if (open_flags & FMODE_READ) state->nreaders++; - memcpy(&state->stateid, stateid, sizeof(state->stateid)); + nfs4_state_set_mode_locked(state, state->state | open_flags); spin_unlock(&inode->i_lock); + spin_unlock(&state->owner->so_lock); } /* * OPEN_RECLAIM: * reclaim state on the server after a reboot. - * Assumes caller is holding the sp->so_sem */ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) { @@ -218,7 +238,6 @@ static int _nfs4_open_reclaim(struct nfs struct nfs_delegation *delegation = NFS_I(inode)->delegation; struct nfs_openargs o_arg = { .fh = NFS_FH(inode), - .seqid = sp->so_seqid, .id = sp->so_id, .open_flags = state->state, .clientid = server->nfs4_state->cl_clientid, @@ -245,8 +264,13 @@ static int _nfs4_open_reclaim(struct nfs } o_arg.u.delegation_type = delegation->type; } + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (o_arg.seqid == NULL) + return -ENOMEM; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + /* Confirm the sequence as being established */ + nfs_confirm_seqid(&sp->so_seqid, status); + nfs_increment_open_seqid(status, o_arg.seqid); if (status == 0) { memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); if (o_res.delegation_type != 0) { @@ -256,6 +280,7 @@ static int _nfs4_open_reclaim(struct nfs nfs_async_inode_return_delegation(inode, &o_res.stateid); } } + nfs_free_seqid(o_arg.seqid); clear_bit(NFS_DELEGATED_STATE, &state->flags); /* Ensure we update the inode attributes */ NFS_CACHEINV(inode); @@ -302,23 +327,35 @@ static int _nfs4_open_delegation_recall( }; int status = 0; - down(&sp->so_sema); if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) goto out; if (state->state == 0) goto out; - arg.seqid = sp->so_seqid; + arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + status = -ENOMEM; + if (arg.seqid == NULL) + goto out; arg.open_flags = state->state; memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + nfs_increment_open_seqid(status, arg.seqid); + if (status != 0) + goto out_free; + if(res.rflags & NFS4_OPEN_RESULT_CONFIRM) { + status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode), + sp, &res.stateid, arg.seqid); + if (status != 0) + goto out_free; + } + nfs_confirm_seqid(&sp->so_seqid, 0); if (status >= 0) { memcpy(state->stateid.data, res.stateid.data, sizeof(state->stateid.data)); clear_bit(NFS_DELEGATED_STATE, &state->flags); } +out_free: + nfs_free_seqid(arg.seqid); out: - up(&sp->so_sema); dput(parent); return status; } @@ -345,11 +382,11 @@ int nfs4_open_delegation_recall(struct d return err; } -static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) +static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) { struct nfs_open_confirmargs arg = { .fh = fh, - .seqid = sp->so_seqid, + .seqid = seqid, .stateid = *stateid, }; struct nfs_open_confirmres res; @@ -362,7 +399,9 @@ static inline int _nfs4_proc_open_confir int status; status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + /* Confirm the sequence as being established */ + nfs_confirm_seqid(&sp->so_seqid, status); + nfs_increment_open_seqid(status, seqid); if (status >= 0) memcpy(stateid, &res.stateid, sizeof(*stateid)); return status; @@ -380,21 +419,41 @@ static int _nfs4_proc_open(struct inode int status; /* Update sequence id. The caller must serialize! */ - o_arg->seqid = sp->so_seqid; o_arg->id = sp->so_id; o_arg->clientid = sp->so_client->cl_clientid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + if (status == 0) { + /* OPEN on anything except a regular file is disallowed in NFSv4 */ + switch (o_res->f_attr->mode & S_IFMT) { + case S_IFREG: + break; + case S_IFLNK: + status = -ELOOP; + break; + case S_IFDIR: + status = -EISDIR; + break; + default: + status = -ENOTDIR; + } + } + + nfs_increment_open_seqid(status, o_arg->seqid); if (status != 0) goto out; - update_changeattr(dir, &o_res->cinfo); + if (o_arg->open_flags & O_CREAT) { + update_changeattr(dir, &o_res->cinfo); + nfs_post_op_update_inode(dir, o_res->dir_attr); + } else + nfs_refresh_inode(dir, o_res->dir_attr); if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { status = _nfs4_proc_open_confirm(server->client, &o_res->fh, - sp, &o_res->stateid); + sp, &o_res->stateid, o_arg->seqid); if (status != 0) goto out; } + nfs_confirm_seqid(&sp->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); out: @@ -441,9 +500,7 @@ static int _nfs4_open_expired(struct nfs struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(dir); struct nfs_delegation *delegation = NFS_I(inode)->delegation; - struct nfs_fattr f_attr = { - .valid = 0, - }; + struct nfs_fattr f_attr, dir_attr; struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .open_flags = state->state, @@ -453,6 +510,7 @@ static int _nfs4_open_expired(struct nfs }; struct nfs_openres o_res = { .f_attr = &f_attr, + .dir_attr = &dir_attr, .server = server, }; int status = 0; @@ -465,6 +523,12 @@ static int _nfs4_open_expired(struct nfs set_bit(NFS_DELEGATED_STATE, &state->flags); goto out; } + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + status = -ENOMEM; + if (o_arg.seqid == NULL) + goto out; + nfs_fattr_init(&f_attr); + nfs_fattr_init(&dir_attr); status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_nodeleg; @@ -490,6 +554,7 @@ static int _nfs4_open_expired(struct nfs nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); } out_nodeleg: + nfs_free_seqid(o_arg.seqid); clear_bit(NFS_DELEGATED_STATE, &state->flags); out: dput(parent); @@ -564,7 +629,6 @@ static int _nfs4_open_delegated(struct i dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); goto out_err; } - down(&sp->so_sema); state = nfs4_get_open_state(inode, sp); if (state == NULL) goto out_err; @@ -589,7 +653,6 @@ static int _nfs4_open_delegated(struct i set_bit(NFS_DELEGATED_STATE, &state->flags); update_open_stateid(state, &delegation->stateid, open_flags); out_ok: - up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&nfsi->rwsem); up_read(&clp->cl_sem); @@ -600,11 +663,12 @@ out_err: if (sp != NULL) { if (state != NULL) nfs4_put_open_state(state); - up(&sp->so_sema); nfs4_put_state_owner(sp); } up_read(&nfsi->rwsem); up_read(&clp->cl_sem); + if (err != -EACCES) + nfs_inode_return_delegation(inode); return err; } @@ -635,9 +699,7 @@ static int _nfs4_do_open(struct inode *d struct nfs4_client *clp = server->nfs4_state; struct inode *inode = NULL; int status; - struct nfs_fattr f_attr = { - .valid = 0, - }; + struct nfs_fattr f_attr, dir_attr; struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .open_flags = flags, @@ -648,6 +710,7 @@ static int _nfs4_do_open(struct inode *d }; struct nfs_openres o_res = { .f_attr = &f_attr, + .dir_attr = &dir_attr, .server = server, }; @@ -665,8 +728,12 @@ static int _nfs4_do_open(struct inode *d } else o_arg.u.attrs = sattr; /* Serialization for the sequence id */ - down(&sp->so_sema); + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (o_arg.seqid == NULL) + return -ENOMEM; + nfs_fattr_init(&f_attr); + nfs_fattr_init(&dir_attr); status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_err; @@ -681,7 +748,7 @@ static int _nfs4_do_open(struct inode *d update_open_stateid(state, &o_res.stateid, flags); if (o_res.delegation_type != 0) nfs_inode_set_delegation(inode, cred, &o_res); - up(&sp->so_sema); + nfs_free_seqid(o_arg.seqid); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); *res = state; @@ -690,7 +757,7 @@ out_err: if (sp != NULL) { if (state != NULL) nfs4_put_open_state(state); - up(&sp->so_sema); + nfs_free_seqid(o_arg.seqid); nfs4_put_state_owner(sp); } /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ @@ -718,7 +785,7 @@ static struct nfs4_state *nfs4_do_open(s * It is actually a sign of a bug on the client or on the server. * * If we receive a BAD_SEQID error in the particular case of - * doing an OPEN, we assume that nfs4_increment_seqid() will + * doing an OPEN, we assume that nfs_increment_open_seqid() will * have unhashed the old state_owner for us, and that we can * therefore safely retry using a new one. We should still warn * the user though... @@ -728,6 +795,16 @@ static struct nfs4_state *nfs4_do_open(s exception.retry = 1; continue; } + /* + * BAD_STATEID on OPEN means that the server cancelled our + * state before it received the OPEN_CONFIRM. + * Recover by retrying the request as per the discussion + * on Page 181 of RFC3530. + */ + if (status == -NFS4ERR_BAD_STATEID) { + exception.retry = 1; + continue; + } res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), status, &exception)); } while (exception.retry); @@ -755,7 +832,7 @@ static int _nfs4_do_setattr(struct nfs_s }; int status; - fattr->valid = 0; + nfs_fattr_init(fattr); if (state != NULL) { msg.rpc_cred = state->owner->so_cred; @@ -787,19 +864,30 @@ struct nfs4_closedata { struct nfs4_state *state; struct nfs_closeargs arg; struct nfs_closeres res; + struct nfs_fattr fattr; }; +static void nfs4_free_closedata(struct nfs4_closedata *calldata) +{ + struct nfs4_state *state = calldata->state; + struct nfs4_state_owner *sp = state->owner; + + nfs4_put_open_state(calldata->state); + nfs_free_seqid(calldata->arg.seqid); + nfs4_put_state_owner(sp); + kfree(calldata); +} + static void nfs4_close_done(struct rpc_task *task) { struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; struct nfs4_state *state = calldata->state; - struct nfs4_state_owner *sp = state->owner; struct nfs_server *server = NFS_SERVER(calldata->inode); /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ - nfs4_increment_seqid(task->tk_status, sp); + nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: memcpy(&state->stateid, &calldata->res.stateid, @@ -807,7 +895,6 @@ static void nfs4_close_done(struct rpc_t break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - state->state = calldata->arg.open_flags; nfs4_schedule_state_recovery(server->nfs4_state); break; default: @@ -816,25 +903,50 @@ static void nfs4_close_done(struct rpc_t return; } } - state->state = calldata->arg.open_flags; - nfs4_put_open_state(state); - up(&sp->so_sema); - nfs4_put_state_owner(sp); - up_read(&server->nfs4_state->cl_sem); - kfree(calldata); + nfs_refresh_inode(calldata->inode, calldata->res.fattr); + nfs4_free_closedata(calldata); } -static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata) +static void nfs4_close_begin(struct rpc_task *task) { + struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_state *state = calldata->state; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], .rpc_argp = &calldata->arg, .rpc_resp = &calldata->res, - .rpc_cred = calldata->state->owner->so_cred, + .rpc_cred = state->owner->so_cred, }; - if (calldata->arg.open_flags != 0) + int mode = 0, old_mode; + int status; + + status = nfs_wait_on_sequence(calldata->arg.seqid, task); + if (status != 0) + return; + /* Recalculate the new open mode in case someone reopened the file + * while we were waiting in line to be scheduled. + */ + spin_lock(&state->owner->so_lock); + spin_lock(&calldata->inode->i_lock); + mode = old_mode = state->state; + if (state->nreaders == 0) + mode &= ~FMODE_READ; + if (state->nwriters == 0) + mode &= ~FMODE_WRITE; + nfs4_state_set_mode_locked(state, mode); + spin_unlock(&calldata->inode->i_lock); + spin_unlock(&state->owner->so_lock); + if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) { + nfs4_free_closedata(calldata); + task->tk_exit = NULL; + rpc_exit(task, 0); + return; + } + nfs_fattr_init(calldata->res.fattr); + if (mode != 0) msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata); + calldata->arg.open_flags = mode; + rpc_call_setup(task, &msg, 0); } /* @@ -848,42 +960,59 @@ static inline int nfs4_close_call(struct * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) +int nfs4_do_close(struct inode *inode, struct nfs4_state *state) { + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_closedata *calldata; - int status; + int status = -ENOMEM; - /* Tell caller we're done */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { - state->state = mode; - return 0; - } - calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL); + calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); if (calldata == NULL) - return -ENOMEM; + goto out; calldata->inode = inode; calldata->state = state; calldata->arg.fh = NFS_FH(inode); + calldata->arg.stateid = &state->stateid; /* Serialization for the sequence id */ - calldata->arg.seqid = state->owner->so_seqid; - calldata->arg.open_flags = mode; - memcpy(&calldata->arg.stateid, &state->stateid, - sizeof(calldata->arg.stateid)); - status = nfs4_close_call(NFS_SERVER(inode)->client, calldata); - /* - * Return -EINPROGRESS on success in order to indicate to the - * caller that an asynchronous RPC call has been launched, and - * that it will release the semaphores on completion. - */ - return (status == 0) ? -EINPROGRESS : status; + calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); + if (calldata->arg.seqid == NULL) + goto out_free_calldata; + calldata->arg.bitmask = server->attr_bitmask; + calldata->res.fattr = &calldata->fattr; + calldata->res.server = server; + + status = nfs4_call_async(server->client, nfs4_close_begin, + nfs4_close_done, calldata); + if (status == 0) + goto out; + + nfs_free_seqid(calldata->arg.seqid); +out_free_calldata: + kfree(calldata); +out: + return status; } -struct inode * +static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +{ + struct file *filp; + + filp = lookup_instantiate_filp(nd, dentry, NULL); + if (!IS_ERR(filp)) { + struct nfs_open_context *ctx; + ctx = (struct nfs_open_context *)filp->private_data; + ctx->state = state; + } else + nfs4_close_state(state, nd->intent.open.flags); +} + +struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; + struct dentry *res; if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -897,16 +1026,23 @@ nfs4_atomic_open(struct inode *dir, stru cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); if (IS_ERR(cred)) - return (struct inode *)cred; + return (struct dentry *)cred; state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); put_rpccred(cred); - if (IS_ERR(state)) - return (struct inode *)state; - return state->inode; + if (IS_ERR(state)) { + if (PTR_ERR(state) == -ENOENT) + d_add(dentry, NULL); + return (struct dentry *)state; + } + res = d_add_unique(dentry, state->inode); + if (res != NULL) + dentry = res; + nfs4_intent_set_file(nd, dentry, state); + return res; } int -nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { struct rpc_cred *cred; struct nfs4_state *state; @@ -919,18 +1055,30 @@ nfs4_open_revalidate(struct inode *dir, if (IS_ERR(state)) state = nfs4_do_open(dir, dentry, openflags, NULL, cred); put_rpccred(cred); - if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) - return 1; - if (IS_ERR(state)) - return 0; + if (IS_ERR(state)) { + switch (PTR_ERR(state)) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + lookup_instantiate_filp(nd, (struct dentry *)state, NULL); + return 1; + case -ENOENT: + if (dentry->d_inode == NULL) + return 1; + } + goto out_drop; + } inode = state->inode; + iput(inode); if (inode == dentry->d_inode) { - iput(inode); + nfs4_intent_set_file(nd, dentry, state); return 1; } - d_drop(dentry); nfs4_close_state(state, openflags); - iput(inode); +out_drop: + d_drop(dentry); return 0; } @@ -974,13 +1122,12 @@ static int nfs4_server_capabilities(stru static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { - struct nfs_fattr * fattr = info->fattr; struct nfs4_lookup_root_arg args = { .bitmask = nfs4_fattr_bitmap, }; struct nfs4_lookup_res res = { .server = server, - .fattr = fattr, + .fattr = info->fattr, .fh = fhandle, }; struct rpc_message msg = { @@ -988,7 +1135,7 @@ static int _nfs4_lookup_root(struct nfs_ .rpc_argp = &args, .rpc_resp = &res, }; - fattr->valid = 0; + nfs_fattr_init(info->fattr); return rpc_call_sync(server->client, &msg, 0); } @@ -1051,7 +1198,7 @@ static int nfs4_proc_get_root(struct nfs q.len = p - q.name; do { - fattr->valid = 0; + nfs_fattr_init(fattr); status = nfs4_handle_exception(server, rpc_call_sync(server->client, &msg, 0), &exception); @@ -1088,7 +1235,7 @@ static int _nfs4_proc_getattr(struct nfs .rpc_resp = &res, }; - fattr->valid = 0; + nfs_fattr_init(fattr); return rpc_call_sync(server->client, &msg, 0); } @@ -1127,30 +1274,27 @@ nfs4_proc_setattr(struct dentry *dentry, { struct rpc_cred *cred; struct inode *inode = dentry->d_inode; - struct nfs4_state *state; + struct nfs_open_context *ctx; + struct nfs4_state *state = NULL; int status; - fattr->valid = 0; + nfs_fattr_init(fattr); cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); - /* Search for an existing WRITE delegation first */ - state = nfs4_open_delegated(inode, FMODE_WRITE, cred); - if (!IS_ERR(state)) { - /* NB: nfs4_open_delegated() bumps the inode->i_count */ - iput(inode); - } else { - /* Search for an existing open(O_WRITE) stateid */ - state = nfs4_find_state(inode, cred, FMODE_WRITE); - } + + /* Search for an existing open(O_WRITE) file */ + ctx = nfs_find_open_context(inode, cred, FMODE_WRITE); + if (ctx != NULL) + state = ctx->state; status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); if (status == 0) nfs_setattr_update_inode(inode, sattr); - if (state != NULL) - nfs4_close_state(state, FMODE_WRITE); + if (ctx != NULL) + put_nfs_open_context(ctx); put_rpccred(cred); return status; } @@ -1176,7 +1320,7 @@ static int _nfs4_proc_lookup(struct inod .rpc_resp = &res, }; - fattr->valid = 0; + nfs_fattr_init(fattr); dprintk("NFS call lookup %s\n", name->name); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); @@ -1325,7 +1469,7 @@ static int _nfs4_proc_read(struct nfs_re dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(server->client, &msg, flags); if (!status) renew_lease(server, timestamp); @@ -1362,7 +1506,7 @@ static int _nfs4_proc_write(struct nfs_w dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(server->client, &msg, rpcflags); dprintk("NFS reply write: %d\n", status); return status; @@ -1396,7 +1540,7 @@ static int _nfs4_proc_commit(struct nfs_ dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, (long long) cdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply commit: %d\n", status); return status; @@ -1431,7 +1575,7 @@ static int nfs4_proc_commit(struct nfs_w static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs4_state *state; struct rpc_cred *cred; @@ -1453,24 +1597,30 @@ nfs4_proc_create(struct inode *dir, stru struct nfs_fattr fattr; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(state->inode), sattr, state); - if (status == 0) { + if (status == 0) nfs_setattr_update_inode(state->inode, sattr); - goto out; - } - } else if (flags != 0) - goto out; - nfs4_close_state(state, flags); + } + if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) + nfs4_intent_set_file(nd, dentry, state); + else + nfs4_close_state(state, flags); out: return status; } static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_remove_arg args = { .fh = NFS_FH(dir), .name = name, + .bitmask = server->attr_bitmask, + }; + struct nfs_fattr dir_attr; + struct nfs4_remove_res res = { + .server = server, + .dir_attr = &dir_attr, }; - struct nfs4_change_info res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE], .rpc_argp = &args, @@ -1478,9 +1628,12 @@ static int _nfs4_proc_remove(struct inod }; int status; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (status == 0) - update_changeattr(dir, &res); + nfs_fattr_init(res.dir_attr); + status = rpc_call_sync(server->client, &msg, 0); + if (status == 0) { + update_changeattr(dir, &res.cinfo); + nfs_post_op_update_inode(dir, res.dir_attr); + } return status; } @@ -1498,12 +1651,14 @@ static int nfs4_proc_remove(struct inode struct unlink_desc { struct nfs4_remove_arg args; - struct nfs4_change_info res; + struct nfs4_remove_res res; + struct nfs_fattr dir_attr; }; static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name) { + struct nfs_server *server = NFS_SERVER(dir->d_inode); struct unlink_desc *up; up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL); @@ -1512,6 +1667,9 @@ static int nfs4_proc_unlink_setup(struct up->args.fh = NFS_FH(dir->d_inode); up->args.name = name; + up->args.bitmask = server->attr_bitmask; + up->res.server = server; + up->res.dir_attr = &up->dir_attr; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; msg->rpc_argp = &up->args; @@ -1526,7 +1684,8 @@ static int nfs4_proc_unlink_done(struct if (msg->rpc_resp != NULL) { up = container_of(msg->rpc_resp, struct unlink_desc, res); - update_changeattr(dir->d_inode, &up->res); + update_changeattr(dir->d_inode, &up->res.cinfo); + nfs_post_op_update_inode(dir->d_inode, up->res.dir_attr); kfree(up); msg->rpc_resp = NULL; msg->rpc_argp = NULL; @@ -1537,13 +1696,20 @@ static int nfs4_proc_unlink_done(struct static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { + struct nfs_server *server = NFS_SERVER(old_dir); struct nfs4_rename_arg arg = { .old_dir = NFS_FH(old_dir), .new_dir = NFS_FH(new_dir), .old_name = old_name, .new_name = new_name, + .bitmask = server->attr_bitmask, + }; + struct nfs_fattr old_fattr, new_fattr; + struct nfs4_rename_res res = { + .server = server, + .old_fattr = &old_fattr, + .new_fattr = &new_fattr, }; - struct nfs4_rename_res res = { }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], .rpc_argp = &arg, @@ -1551,11 +1717,15 @@ static int _nfs4_proc_rename(struct inod }; int status; - status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); + nfs_fattr_init(res.old_fattr); + nfs_fattr_init(res.new_fattr); + status = rpc_call_sync(server->client, &msg, 0); if (!status) { update_changeattr(old_dir, &res.old_cinfo); + nfs_post_op_update_inode(old_dir, res.old_fattr); update_changeattr(new_dir, &res.new_cinfo); + nfs_post_op_update_inode(new_dir, res.new_fattr); } return status; } @@ -1576,22 +1746,34 @@ static int nfs4_proc_rename(struct inode static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) { + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_link_arg arg = { .fh = NFS_FH(inode), .dir_fh = NFS_FH(dir), .name = name, + .bitmask = server->attr_bitmask, + }; + struct nfs_fattr fattr, dir_attr; + struct nfs4_link_res res = { + .server = server, + .fattr = &fattr, + .dir_attr = &dir_attr, }; - struct nfs4_change_info cinfo = { }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], .rpc_argp = &arg, - .rpc_resp = &cinfo, + .rpc_resp = &res, }; int status; - status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); - if (!status) - update_changeattr(dir, &cinfo); + nfs_fattr_init(res.fattr); + nfs_fattr_init(res.dir_attr); + status = rpc_call_sync(server->client, &msg, 0); + if (!status) { + update_changeattr(dir, &res.cinfo); + nfs_post_op_update_inode(dir, res.dir_attr); + nfs_refresh_inode(inode, res.fattr); + } return status; } @@ -1613,6 +1795,7 @@ static int _nfs4_proc_symlink(struct ino struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); + struct nfs_fattr dir_fattr; struct nfs4_create_arg arg = { .dir_fh = NFS_FH(dir), .server = server, @@ -1625,6 +1808,7 @@ static int _nfs4_proc_symlink(struct ino .server = server, .fh = fhandle, .fattr = fattr, + .dir_fattr = &dir_fattr, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK], @@ -1636,11 +1820,13 @@ static int _nfs4_proc_symlink(struct ino if (path->len > NFS4_MAXPATHLEN) return -ENAMETOOLONG; arg.u.symlink = path; - fattr->valid = 0; + nfs_fattr_init(fattr); + nfs_fattr_init(&dir_fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) update_changeattr(dir, &res.dir_cinfo); + nfs_post_op_update_inode(dir, res.dir_fattr); return status; } @@ -1664,7 +1850,7 @@ static int _nfs4_proc_mkdir(struct inode { struct nfs_server *server = NFS_SERVER(dir); struct nfs_fh fhandle; - struct nfs_fattr fattr; + struct nfs_fattr fattr, dir_fattr; struct nfs4_create_arg arg = { .dir_fh = NFS_FH(dir), .server = server, @@ -1677,6 +1863,7 @@ static int _nfs4_proc_mkdir(struct inode .server = server, .fh = &fhandle, .fattr = &fattr, + .dir_fattr = &dir_fattr, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], @@ -1685,11 +1872,13 @@ static int _nfs4_proc_mkdir(struct inode }; int status; - fattr.valid = 0; + nfs_fattr_init(&fattr); + nfs_fattr_init(&dir_fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) { update_changeattr(dir, &res.dir_cinfo); + nfs_post_op_update_inode(dir, res.dir_fattr); status = nfs_instantiate(dentry, &fhandle, &fattr); } return status; @@ -1762,7 +1951,7 @@ static int _nfs4_proc_mknod(struct inode { struct nfs_server *server = NFS_SERVER(dir); struct nfs_fh fh; - struct nfs_fattr fattr; + struct nfs_fattr fattr, dir_fattr; struct nfs4_create_arg arg = { .dir_fh = NFS_FH(dir), .server = server, @@ -1774,6 +1963,7 @@ static int _nfs4_proc_mknod(struct inode .server = server, .fh = &fh, .fattr = &fattr, + .dir_fattr = &dir_fattr, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], @@ -1783,7 +1973,8 @@ static int _nfs4_proc_mknod(struct inode int status; int mode = sattr->ia_mode; - fattr.valid = 0; + nfs_fattr_init(&fattr); + nfs_fattr_init(&dir_fattr); BUG_ON(!(sattr->ia_valid & ATTR_MODE)); BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); @@ -1805,6 +1996,7 @@ static int _nfs4_proc_mknod(struct inode status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (status == 0) { update_changeattr(dir, &res.dir_cinfo); + nfs_post_op_update_inode(dir, res.dir_fattr); status = nfs_instantiate(dentry, &fh, &fattr); } return status; @@ -1836,7 +2028,7 @@ static int _nfs4_proc_statfs(struct nfs_ .rpc_resp = fsstat, }; - fsstat->fattr->valid = 0; + nfs_fattr_init(fsstat->fattr); return rpc_call_sync(server->client, &msg, 0); } @@ -1883,7 +2075,7 @@ static int nfs4_do_fsinfo(struct nfs_ser static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { - fsinfo->fattr->valid = 0; + nfs_fattr_init(fsinfo->fattr); return nfs4_do_fsinfo(server, fhandle, fsinfo); } @@ -1906,7 +2098,7 @@ static int _nfs4_proc_pathconf(struct nf return 0; } - pathconf->fattr->valid = 0; + nfs_fattr_init(pathconf->fattr); return rpc_call_sync(server->client, &msg, 0); } @@ -1973,8 +2165,10 @@ nfs4_write_done(struct rpc_task *task) rpc_restart_call(task); return; } - if (task->tk_status >= 0) + if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), data->timestamp); + nfs_post_op_update_inode(inode, data->res.fattr); + } /* Call back common NFS writeback processing */ nfs_writeback_done(task); } @@ -1990,6 +2184,7 @@ nfs4_proc_write_setup(struct nfs_write_d .rpc_cred = data->cred, }; struct inode *inode = data->inode; + struct nfs_server *server = NFS_SERVER(inode); int stable; int flags; @@ -2001,6 +2196,8 @@ nfs4_proc_write_setup(struct nfs_write_d } else stable = NFS_UNSTABLE; data->args.stable = stable; + data->args.bitmask = server->attr_bitmask; + data->res.server = server; data->timestamp = jiffies; @@ -2022,6 +2219,8 @@ nfs4_commit_done(struct rpc_task *task) rpc_restart_call(task); return; } + if (task->tk_status >= 0) + nfs_post_op_update_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ nfs_commit_done(task); } @@ -2037,8 +2236,12 @@ nfs4_proc_commit_setup(struct nfs_write_ .rpc_cred = data->cred, }; struct inode *inode = data->inode; + struct nfs_server *server = NFS_SERVER(inode); int flags; + data->args.bitmask = server->attr_bitmask; + data->res.server = server; + /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; @@ -2106,65 +2309,6 @@ nfs4_proc_renew(struct nfs4_client *clp) return 0; } -/* - * We will need to arrange for the VFS layer to provide an atomic open. - * Until then, this open method is prone to inefficiency and race conditions - * due to the lookup, potential create, and open VFS calls from sys_open() - * placed on the wire. - */ -static int -nfs4_proc_file_open(struct inode *inode, struct file *filp) -{ - struct dentry *dentry = filp->f_dentry; - struct nfs_open_context *ctx; - struct nfs4_state *state = NULL; - struct rpc_cred *cred; - int status = -ENOMEM; - - dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", - (int)dentry->d_parent->d_name.len, - dentry->d_parent->d_name.name, - (int)dentry->d_name.len, dentry->d_name.name); - - - /* Find our open stateid */ - cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); - ctx = alloc_nfs_open_context(dentry, cred); - put_rpccred(cred); - if (unlikely(ctx == NULL)) - return -ENOMEM; - status = -EIO; /* ERACE actually */ - state = nfs4_find_state(inode, cred, filp->f_mode); - if (unlikely(state == NULL)) - goto no_state; - ctx->state = state; - nfs4_close_state(state, filp->f_mode); - ctx->mode = filp->f_mode; - nfs_file_set_open_context(filp, ctx); - put_nfs_open_context(ctx); - if (filp->f_mode & FMODE_WRITE) - nfs_begin_data_update(inode); - return 0; -no_state: - printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - put_nfs_open_context(ctx); - return status; -} - -/* - * Release our state - */ -static int -nfs4_proc_file_release(struct inode *inode, struct file *filp) -{ - if (filp->f_mode & FMODE_WRITE) - nfs_end_data_update(inode); - nfs_file_clear_open_context(filp); - return 0; -} - static inline int nfs4_server_supports_acls(struct nfs_server *server) { return (server->caps & NFS_CAP_ACLS) @@ -2285,7 +2429,7 @@ static inline ssize_t nfs4_get_acl_uncac return -ENOMEM; args.acl_pages[0] = localpage; args.acl_pgbase = 0; - args.acl_len = PAGE_SIZE; + resp_len = args.acl_len = PAGE_SIZE; } else { resp_buf = buf; buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); @@ -2345,6 +2489,7 @@ static int nfs4_proc_set_acl(struct inod if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; + nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); if (ret == 0) @@ -2353,7 +2498,7 @@ static int nfs4_proc_set_acl(struct inod } static int -nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) +nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { struct nfs4_client *clp = server->nfs4_state; @@ -2431,7 +2576,7 @@ static int nfs4_delay(struct rpc_clnt *c /* This is the error handling routine for processes that are allowed * to sleep. */ -int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs4_client *clp = server->nfs4_state; int ret = errorcode; @@ -2450,12 +2595,10 @@ int nfs4_handle_exception(struct nfs_ser case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: ret = nfs4_delay(server->client, &exception->timeout); - if (ret == 0) - exception->retry = 1; - break; + if (ret != 0) + break; case -NFS4ERR_OLD_STATEID: - if (ret == 0) - exception->retry = 1; + exception->retry = 1; } /* We failed to handle the error */ return nfs4_map_errors(ret); @@ -2632,7 +2775,6 @@ static int _nfs4_proc_getlk(struct nfs4_ down_read(&clp->cl_sem); nlo.clientid = clp->cl_clientid; - down(&state->lock_sema); status = nfs4_set_lock_state(state, request); if (status != 0) goto out; @@ -2659,7 +2801,6 @@ static int _nfs4_proc_getlk(struct nfs4_ status = 0; } out: - up(&state->lock_sema); up_read(&clp->cl_sem); return status; } @@ -2696,67 +2837,129 @@ static int do_vfs_lock(struct file *file return res; } -static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +struct nfs4_unlockdata { + struct nfs_lockargs arg; + struct nfs_locku_opargs luargs; + struct nfs_lockres res; + struct nfs4_lock_state *lsp; + struct nfs_open_context *ctx; + atomic_t refcount; + struct completion completion; +}; + +static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata) { - struct inode *inode = state->inode; - struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_client *clp = server->nfs4_state; - struct nfs_lockargs arg = { - .fh = NFS_FH(inode), - .type = nfs4_lck_type(cmd, request), - .offset = request->fl_start, - .length = nfs4_lck_length(request), - }; - struct nfs_lockres res = { - .server = server, - }; + if (atomic_dec_and_test(&calldata->refcount)) { + nfs_free_seqid(calldata->luargs.seqid); + nfs4_put_lock_state(calldata->lsp); + put_nfs_open_context(calldata->ctx); + kfree(calldata); + } +} + +static void nfs4_locku_complete(struct nfs4_unlockdata *calldata) +{ + complete(&calldata->completion); + nfs4_locku_release_calldata(calldata); +} + +static void nfs4_locku_done(struct rpc_task *task) +{ + struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; + + nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid); + switch (task->tk_status) { + case 0: + memcpy(calldata->lsp->ls_stateid.data, + calldata->res.u.stateid.data, + sizeof(calldata->lsp->ls_stateid.data)); + break; + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(calldata->res.server->nfs4_state); + break; + default: + if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) { + rpc_restart_call(task); + return; + } + } + nfs4_locku_complete(calldata); +} + +static void nfs4_locku_begin(struct rpc_task *task) +{ + struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], - .rpc_argp = &arg, - .rpc_resp = &res, - .rpc_cred = state->owner->so_cred, + .rpc_argp = &calldata->arg, + .rpc_resp = &calldata->res, + .rpc_cred = calldata->lsp->ls_state->owner->so_cred, }; + int status; + + status = nfs_wait_on_sequence(calldata->luargs.seqid, task); + if (status != 0) + return; + if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { + nfs4_locku_complete(calldata); + task->tk_exit = NULL; + rpc_exit(task, 0); + return; + } + rpc_call_setup(task, &msg, 0); +} + +static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_unlockdata *calldata; + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_lock_state *lsp; - struct nfs_locku_opargs luargs; int status; - - down_read(&clp->cl_sem); - down(&state->lock_sema); + + /* Is this a delegated lock? */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + return do_vfs_lock(request->fl_file, request); + status = nfs4_set_lock_state(state, request); if (status != 0) - goto out; + return status; lsp = request->fl_u.nfs4_fl.owner; /* We might have lost the locks! */ if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) - goto out; - luargs.seqid = lsp->ls_seqid; - memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); - arg.u.locku = &luargs; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_lock_seqid(status, lsp); + return 0; + calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + return -ENOMEM; + calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (calldata->luargs.seqid == NULL) { + kfree(calldata); + return -ENOMEM; + } + calldata->luargs.stateid = &lsp->ls_stateid; + calldata->arg.fh = NFS_FH(inode); + calldata->arg.type = nfs4_lck_type(cmd, request); + calldata->arg.offset = request->fl_start; + calldata->arg.length = nfs4_lck_length(request); + calldata->arg.u.locku = &calldata->luargs; + calldata->res.server = server; + calldata->lsp = lsp; + atomic_inc(&lsp->ls_count); - if (status == 0) - memcpy(&lsp->ls_stateid, &res.u.stateid, - sizeof(lsp->ls_stateid)); -out: - up(&state->lock_sema); - if (status == 0) - do_vfs_lock(request->fl_file, request); - up_read(&clp->cl_sem); - return status; -} + /* Ensure we don't close file until we're done freeing locks! */ + calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data); -static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) -{ - struct nfs4_exception exception = { }; - int err; + atomic_set(&calldata->refcount, 2); + init_completion(&calldata->completion); - do { - err = nfs4_handle_exception(NFS_SERVER(state->inode), - _nfs4_proc_unlck(state, cmd, request), - &exception); - } while (exception.retry); - return err; + status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin, + nfs4_locku_done, calldata); + if (status == 0) + wait_for_completion_interruptible(&calldata->completion); + do_vfs_lock(request->fl_file, request); + nfs4_locku_release_calldata(calldata); + return status; } static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim) @@ -2764,11 +2967,23 @@ static int _nfs4_do_setlk(struct nfs4_st struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; + struct nfs_lock_opargs largs = { + .lock_stateid = &lsp->ls_stateid, + .open_stateid = &state->stateid, + .lock_owner = { + .clientid = server->nfs4_state->cl_clientid, + .id = lsp->ls_id, + }, + .reclaim = reclaim, + }; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), .offset = request->fl_start, .length = nfs4_lck_length(request), + .u = { + .lock = &largs, + }, }; struct nfs_lockres res = { .server = server, @@ -2779,53 +2994,39 @@ static int _nfs4_do_setlk(struct nfs4_st .rpc_resp = &res, .rpc_cred = state->owner->so_cred, }; - struct nfs_lock_opargs largs = { - .reclaim = reclaim, - .new_lock_owner = 0, - }; - int status; + int status = -ENOMEM; - if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { + largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (largs.lock_seqid == NULL) + return -ENOMEM; + if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { struct nfs4_state_owner *owner = state->owner; - struct nfs_open_to_lock otl = { - .lock_owner = { - .clientid = server->nfs4_state->cl_clientid, - }, - }; - - otl.lock_seqid = lsp->ls_seqid; - otl.lock_owner.id = lsp->ls_id; - memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); - largs.u.open_lock = &otl; + + largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid); + if (largs.open_seqid == NULL) + goto out; largs.new_lock_owner = 1; - arg.u.lock = &largs; - down(&owner->so_sema); - otl.open_seqid = owner->so_seqid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment open_owner seqid on success, and - * seqid mutating errors */ - nfs4_increment_seqid(status, owner); - up(&owner->so_sema); - if (status == 0) { - lsp->ls_flags |= NFS_LOCK_INITIALIZED; - lsp->ls_seqid++; + /* increment open seqid on success, and seqid mutating errors */ + if (largs.new_lock_owner != 0) { + nfs_increment_open_seqid(status, largs.open_seqid); + if (status == 0) + nfs_confirm_seqid(&lsp->ls_seqid, 0); } - } else { - struct nfs_exist_lock el = { - .seqid = lsp->ls_seqid, - }; - memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); - largs.u.exist_lock = ⪙ - arg.u.lock = &largs; + nfs_free_seqid(largs.open_seqid); + } else status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment seqid on success, and * seqid mutating errors*/ - nfs4_increment_lock_seqid(status, lsp); - } + /* increment lock seqid on success, and seqid mutating errors*/ + nfs_increment_lock_seqid(status, largs.lock_seqid); /* save the returned stateid. */ - if (status == 0) - memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); - else if (status == -NFS4ERR_DENIED) + if (status == 0) { + memcpy(lsp->ls_stateid.data, res.u.stateid.data, + sizeof(lsp->ls_stateid.data)); + lsp->ls_flags |= NFS_LOCK_INITIALIZED; + } else if (status == -NFS4ERR_DENIED) status = -EAGAIN; +out: + nfs_free_seqid(largs.lock_seqid); return status; } @@ -2835,6 +3036,9 @@ static int nfs4_lock_reclaim(struct nfs4 struct nfs4_exception exception = { }; int err; + /* Cache the lock if possible... */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + return 0; do { err = _nfs4_do_setlk(state, F_SETLK, request, 1); if (err != -NFS4ERR_DELAY) @@ -2850,6 +3054,9 @@ static int nfs4_lock_expired(struct nfs4 struct nfs4_exception exception = { }; int err; + err = nfs4_set_lock_state(state, request); + if (err != 0) + return err; do { err = _nfs4_do_setlk(state, F_SETLK, request, 0); if (err != -NFS4ERR_DELAY) @@ -2865,17 +3072,25 @@ static int _nfs4_proc_setlk(struct nfs4_ int status; down_read(&clp->cl_sem); - down(&state->lock_sema); - status = nfs4_set_lock_state(state, request); - if (status == 0) - status = _nfs4_do_setlk(state, cmd, request, 0); - up(&state->lock_sema); - if (status == 0) { - /* Note: we always want to sleep here! */ - request->fl_flags |= FL_SLEEP; - if (do_vfs_lock(request->fl_file, request) < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); + /* Is this a delegated open? */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + /* Yes: cache locks! */ + status = do_vfs_lock(request->fl_file, request); + /* ...but avoid races with delegation recall... */ + if (status < 0 || test_bit(NFS_DELEGATED_STATE, &state->flags)) + goto out; } + status = nfs4_set_lock_state(state, request); + if (status != 0) + goto out; + status = _nfs4_do_setlk(state, cmd, request, 0); + if (status != 0) + goto out; + /* Note: we always want to sleep here! */ + request->fl_flags |= FL_SLEEP; + if (do_vfs_lock(request->fl_file, request) < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); +out: up_read(&clp->cl_sem); return status; } @@ -2929,6 +3144,24 @@ nfs4_proc_lock(struct file *filp, int cm return status; } +int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + err = nfs4_set_lock_state(state, fl); + if (err != 0) + goto out; + do { + err = _nfs4_do_setlk(state, F_SETLK, fl, 0); + if (err != -NFS4ERR_DELAY) + break; + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); +out: + return err; +} #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" @@ -3024,8 +3257,8 @@ struct nfs_rpc_ops nfs_v4_clientops = { .read_setup = nfs4_proc_read_setup, .write_setup = nfs4_proc_write_setup, .commit_setup = nfs4_proc_commit_setup, - .file_open = nfs4_proc_file_open, - .file_release = nfs4_proc_file_release, + .file_open = nfs_open, + .file_release = nfs_release, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, }; diff -puN fs/nfs/nfs4state.c~CITI_NFS4_ALL fs/nfs/nfs4state.c --- linux-2.6.14/fs/nfs/nfs4state.c~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/nfs4state.c 2005-11-07 13:26:18.000000000 -0500 @@ -264,13 +264,16 @@ nfs4_alloc_state_owner(void) { struct nfs4_state_owner *sp; - sp = kmalloc(sizeof(*sp),GFP_KERNEL); + sp = kzalloc(sizeof(*sp),GFP_KERNEL); if (!sp) return NULL; - init_MUTEX(&sp->so_sema); - sp->so_seqid = 0; /* arbitrary */ + spin_lock_init(&sp->so_lock); INIT_LIST_HEAD(&sp->so_states); INIT_LIST_HEAD(&sp->so_delegations); + rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); + sp->so_seqid.sequence = &sp->so_sequence; + spin_lock_init(&sp->so_sequence.lock); + INIT_LIST_HEAD(&sp->so_sequence.list); atomic_set(&sp->so_count, 1); return sp; } @@ -359,35 +362,25 @@ nfs4_alloc_open_state(void) memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); - init_MUTEX(&state->lock_sema); spin_lock_init(&state->state_lock); return state; } -static struct nfs4_state * -__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) +void +nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs4_state *state; - - mode &= (FMODE_READ|FMODE_WRITE); - list_for_each_entry(state, &nfsi->open_states, inode_states) { - if (state->owner->so_cred != cred) - continue; - if ((mode & FMODE_READ) != 0 && state->nreaders == 0) - continue; - if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0) - continue; - if ((state->state & mode) != mode) - continue; - atomic_inc(&state->count); - if (mode & FMODE_READ) - state->nreaders++; + if (state->state == mode) + return; + /* NB! List reordering - see the reclaim code for why. */ + if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) { if (mode & FMODE_WRITE) - state->nwriters++; - return state; - } - return NULL; + list_move(&state->open_states, &state->owner->so_states); + else + list_move_tail(&state->open_states, &state->owner->so_states); + } + if (mode == 0) + list_del_init(&state->inode_states); + state->state = mode; } static struct nfs4_state * @@ -398,7 +391,7 @@ __nfs4_find_state_byowner(struct inode * list_for_each_entry(state, &nfsi->open_states, inode_states) { /* Is this in the process of being freed? */ - if (state->nreaders == 0 && state->nwriters == 0) + if (state->state == 0) continue; if (state->owner == owner) { atomic_inc(&state->count); @@ -408,17 +401,6 @@ __nfs4_find_state_byowner(struct inode * return NULL; } -struct nfs4_state * -nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) -{ - struct nfs4_state *state; - - spin_lock(&inode->i_lock); - state = __nfs4_find_state(inode, cred, mode); - spin_unlock(&inode->i_lock); - return state; -} - static void nfs4_free_open_state(struct nfs4_state *state) { @@ -437,21 +419,23 @@ nfs4_get_open_state(struct inode *inode, if (state) goto out; new = nfs4_alloc_open_state(); + spin_lock(&owner->so_lock); spin_lock(&inode->i_lock); state = __nfs4_find_state_byowner(inode, owner); if (state == NULL && new != NULL) { state = new; - /* Caller *must* be holding owner->so_sem */ - /* Note: The reclaim code dictates that we add stateless - * and read-only stateids to the end of the list */ - list_add_tail(&state->open_states, &owner->so_states); state->owner = owner; atomic_inc(&owner->so_count); list_add(&state->inode_states, &nfsi->open_states); state->inode = igrab(inode); spin_unlock(&inode->i_lock); + /* Note: The reclaim code dictates that we add stateless + * and read-only stateids to the end of the list */ + list_add_tail(&state->open_states, &owner->so_states); + spin_unlock(&owner->so_lock); } else { spin_unlock(&inode->i_lock); + spin_unlock(&owner->so_lock); if (new) nfs4_free_open_state(new); } @@ -461,68 +445,59 @@ out: /* * Beware! Caller must be holding exactly one - * reference to clp->cl_sem and owner->so_sema! + * reference to clp->cl_sem! */ void nfs4_put_open_state(struct nfs4_state *state) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) + if (!atomic_dec_and_lock(&state->count, &owner->so_lock)) return; + spin_lock(&inode->i_lock); if (!list_empty(&state->inode_states)) list_del(&state->inode_states); - spin_unlock(&inode->i_lock); list_del(&state->open_states); + spin_unlock(&inode->i_lock); + spin_unlock(&owner->so_lock); iput(inode); - BUG_ON (state->state != 0); nfs4_free_open_state(state); nfs4_put_state_owner(owner); } /* - * Beware! Caller must be holding no references to clp->cl_sem! - * of owner->so_sema! + * Close the current file. */ void nfs4_close_state(struct nfs4_state *state, mode_t mode) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - struct nfs4_client *clp = owner->so_client; - int newstate; + int oldstate, newstate = 0; atomic_inc(&owner->so_count); - down_read(&clp->cl_sem); - down(&owner->so_sema); /* Protect against nfs4_find_state() */ + spin_lock(&owner->so_lock); spin_lock(&inode->i_lock); if (mode & FMODE_READ) state->nreaders--; if (mode & FMODE_WRITE) state->nwriters--; - if (state->nwriters == 0) { - if (state->nreaders == 0) - list_del_init(&state->inode_states); - /* See reclaim code */ - list_move_tail(&state->open_states, &owner->so_states); + oldstate = newstate = state->state; + if (state->nreaders == 0) + newstate &= ~FMODE_READ; + if (state->nwriters == 0) + newstate &= ~FMODE_WRITE; + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + nfs4_state_set_mode_locked(state, newstate); + oldstate = newstate; } spin_unlock(&inode->i_lock); - newstate = 0; - if (state->state != 0) { - if (state->nreaders) - newstate |= FMODE_READ; - if (state->nwriters) - newstate |= FMODE_WRITE; - if (state->state == newstate) - goto out; - if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS) - return; - } -out: + spin_unlock(&owner->so_lock); + + if (oldstate != newstate && nfs4_do_close(inode, state) == 0) + return; nfs4_put_open_state(state); - up(&owner->so_sema); nfs4_put_state_owner(owner); - up_read(&clp->cl_sem); } /* @@ -546,19 +521,16 @@ __nfs4_find_lock_state(struct nfs4_state * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding state->lock_sema */ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; struct nfs4_client *clp = state->owner->so_client; - lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); if (lsp == NULL) return NULL; - lsp->ls_flags = 0; - lsp->ls_seqid = 0; /* arbitrary */ - memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + lsp->ls_seqid.sequence = &state->owner->so_sequence; atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; spin_lock(&clp->cl_lock); @@ -572,7 +544,7 @@ static struct nfs4_lock_state *nfs4_allo * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding state->lock_sema and clp->cl_sem + * The caller must be holding clp->cl_sem */ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { @@ -605,7 +577,7 @@ static struct nfs4_lock_state *nfs4_get_ * Release reference to lock_state, and free it if we see that * it is no longer in use */ -static void nfs4_put_lock_state(struct nfs4_lock_state *lsp) +void nfs4_put_lock_state(struct nfs4_lock_state *lsp) { struct nfs4_state *state; @@ -673,29 +645,94 @@ void nfs4_copy_stateid(nfs4_stateid *dst nfs4_put_lock_state(lsp); } -/* -* Called with state->lock_sema and clp->cl_sem held. -*/ -void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) -{ - if (status == NFS_OK || seqid_mutating_err(-status)) - lsp->ls_seqid++; +struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) +{ + struct nfs_seqid *new; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new != NULL) { + new->sequence = counter; + INIT_LIST_HEAD(&new->list); + } + return new; +} + +void nfs_free_seqid(struct nfs_seqid *seqid) +{ + struct rpc_sequence *sequence = seqid->sequence->sequence; + + if (!list_empty(&seqid->list)) { + spin_lock(&sequence->lock); + list_del(&seqid->list); + spin_unlock(&sequence->lock); + } + rpc_wake_up_next(&sequence->wait); + kfree(seqid); } /* -* Called with sp->so_sema and clp->cl_sem held. -* -* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or -* failed with a seqid incrementing error - -* see comments nfs_fs.h:seqid_mutating_error() -*/ -void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) -{ - if (status == NFS_OK || seqid_mutating_err(-status)) - sp->so_seqid++; - /* If the server returns BAD_SEQID, unhash state_owner here */ - if (status == -NFS4ERR_BAD_SEQID) + * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) +{ + switch (status) { + case 0: + break; + case -NFS4ERR_BAD_SEQID: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_BADXDR: + case -NFS4ERR_RESOURCE: + case -NFS4ERR_NOFILEHANDLE: + /* Non-seqid mutating errors */ + return; + }; + /* + * Note: no locking needed as we are guaranteed to be first + * on the sequence list + */ + seqid->sequence->counter++; +} + +void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) +{ + if (status == -NFS4ERR_BAD_SEQID) { + struct nfs4_state_owner *sp = container_of(seqid->sequence, + struct nfs4_state_owner, so_seqid); nfs4_drop_state_owner(sp); + } + return nfs_increment_seqid(status, seqid); +} + +/* + * Increment the seqid if the LOCK/LOCKU succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) +{ + return nfs_increment_seqid(status, seqid); +} + +int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) +{ + struct rpc_sequence *sequence = seqid->sequence->sequence; + int status = 0; + + if (sequence->list.next == &seqid->list) + goto out; + spin_lock(&sequence->lock); + if (!list_empty(&sequence->list)) { + rpc_sleep_on(&sequence->wait, task, NULL, NULL); + status = -EAGAIN; + } else + list_add(&seqid->list, &sequence->list); + spin_unlock(&sequence->lock); +out: + return status; } static int reclaimer(void *); @@ -747,7 +784,7 @@ static int nfs4_reclaim_locks(struct nfs int status = 0; for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) + if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) continue; @@ -762,7 +799,7 @@ static int nfs4_reclaim_locks(struct nfs case -NFS4ERR_NO_GRACE: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: - /* kill_proc(fl->fl_owner, SIGLOST, 1); */ + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ break; case -NFS4ERR_STALE_CLIENTID: goto out_err; @@ -791,8 +828,6 @@ static int nfs4_reclaim_open_state(struc if (state->state == 0) continue; status = ops->recover_open(sp, state); - list_for_each_entry(lock, &state->lock_states, ls_locks) - lock->ls_flags &= ~NFS_LOCK_INITIALIZED; if (status >= 0) { status = nfs4_reclaim_locks(ops, state); if (status < 0) @@ -831,6 +866,28 @@ out_err: return status; } +static void nfs4_state_mark_reclaim(struct nfs4_client *clp) +{ + struct nfs4_state_owner *sp; + struct nfs4_state *state; + struct nfs4_lock_state *lock; + + /* Reset all sequence ids to zero */ + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + sp->so_seqid.counter = 0; + sp->so_seqid.flags = 0; + spin_lock(&sp->so_lock); + list_for_each_entry(state, &sp->so_states, open_states) { + list_for_each_entry(lock, &state->lock_states, ls_locks) { + lock->ls_seqid.counter = 0; + lock->ls_seqid.flags = 0; + lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + } + } + spin_unlock(&sp->so_lock); + } +} + static int reclaimer(void *ptr) { struct reclaimer_args *args = (struct reclaimer_args *)ptr; @@ -864,6 +921,7 @@ restart_loop: default: ops = &nfs4_network_partition_recovery_ops; }; + nfs4_state_mark_reclaim(clp); status = __nfs4_init_client(clp); if (status) goto out_error; diff -puN fs/nfs/nfs4xdr.c~CITI_NFS4_ALL fs/nfs/nfs4xdr.c --- linux-2.6.14/fs/nfs/nfs4xdr.c~CITI_NFS4_ALL 2005-11-07 13:26:15.000000000 -0500 +++ linux-2.6.14-bfields/fs/nfs/nfs4xdr.c 2005-11-07 13:26:18.000000000 -0500 @@ -95,6 +95,8 @@ static int nfs_stat_to_errno(int); #define decode_getattr_maxsz (op_decode