diff -dpruN Linux-2.4.4/fs/nfs/dir.c linux/fs/nfs/dir.c --- Linux-2.4.4/fs/nfs/dir.c Tue May 1 14:32:16 2001 +++ linux/fs/nfs/dir.c Tue May 1 15:30:35 2001 @@ -370,7 +370,7 @@ static int nfs_readdir(struct file *filp struct nfs_entry my_entry; long res; - res = nfs_revalidate(dentry); + res = nfs_revalidate_inode(dentry); if (res < 0) return res; @@ -516,7 +516,7 @@ static int nfs_lookup_revalidate(struct goto out_valid; if (IS_ROOT(dentry)) { - __nfs_revalidate_inode(NFS_SERVER(inode), inode); + __nfs_revalidate_inode(inode); goto out_valid_renew; } diff -dpruN Linux-2.4.4/fs/nfs/file.c linux/fs/nfs/file.c --- Linux-2.4.4/fs/nfs/file.c Tue May 1 14:31:33 2001 +++ linux/fs/nfs/file.c Tue May 1 14:49:32 2001 @@ -91,14 +91,13 @@ static ssize_t nfs_file_read(struct file * file, char * buf, size_t count, loff_t *ppos) { struct dentry * dentry = file->f_dentry; - struct inode * inode = dentry->d_inode; ssize_t result; dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) *ppos); - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + result = nfs_revalidate_inode(dentry); if (!result) result = generic_file_read(file, buf, count, ppos); return result; @@ -108,13 +107,12 @@ static int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) { struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; int status; dfprintk(VFS, "nfs: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + status = nfs_revalidate_inode(dentry); if (!status) status = generic_file_mmap(file, vma); return status; @@ -224,7 +222,7 @@ nfs_file_write(struct file *file, const result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + result = nfs_revalidate_inode(dentry); if (result) goto out; diff -dpruN Linux-2.4.4/fs/nfs/inode.c linux/fs/nfs/inode.c --- Linux-2.4.4/fs/nfs/inode.c Tue May 1 14:33:01 2001 +++ linux/fs/nfs/inode.c Wed May 2 14:56:55 2001 @@ -111,8 +111,7 @@ nfs_read_inode(struct inode * inode) inode->u.nfs_i.ncommit = 0; inode->u.nfs_i.npages = 0; NFS_CACHEINV(inode); - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + nfs_reset_attr_timeout(inode); } static void @@ -529,8 +528,7 @@ nfs_statfs(struct super_block *sb, struc void nfs_zap_caches(struct inode *inode) { - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + nfs_reset_attr_timeout(inode); invalidate_inode_pages(inode); @@ -551,6 +549,29 @@ nfs_invalidate_inode(struct inode *inode nfs_zap_caches(inode); } +static inline void +nfs_fill_in_attributes(struct inode *inode, struct nfs_fattr *fattr) +{ + inode->i_mode = fattr->mode; + inode->i_nlink = fattr->nlink; + inode->i_uid = fattr->uid; + inode->i_gid = fattr->gid; + + if (fattr->valid & NFS_ATTR_FATTR_V3) { + /* + * report the blocks in 512byte units + */ + inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); + inode->i_blksize = inode->i_sb->s_blocksize; + } else { + inode->i_blocks = fattr->du.nfs2.blocks; + inode->i_blksize = fattr->du.nfs2.blocksize; + } + inode->i_rdev = 0; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + inode->i_rdev = to_kdev_t(fattr->rdev); +} + /* * Fill in inode information from the fattr. */ @@ -579,10 +600,7 @@ nfs_fill_inode(struct inode *inode, stru inode->i_op = &nfs_symlink_inode_operations; else init_special_inode(inode, inode->i_mode, fattr->rdev); - /* - * Preset the size and mtime, as there's no need - * to invalidate the caches. - */ + inode->i_size = nfs_size_to_loff_t(fattr->size); inode->i_mtime = nfs_time_to_secs(fattr->mtime); inode->i_atime = nfs_time_to_secs(fattr->atime); @@ -591,11 +609,11 @@ nfs_fill_inode(struct inode *inode, stru NFS_CACHE_MTIME(inode) = fattr->mtime; NFS_CACHE_ATIME(inode) = fattr->atime; NFS_CACHE_ISIZE(inode) = fattr->size; - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + nfs_reset_attr_timeout(inode); + nfs_fill_in_attributes(inode, fattr); memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); - } - nfs_refresh_inode(inode, fattr); + } else + __nfs_refresh_inode(inode, fattr, NFS_CHECK_INVALIDATE); } struct nfs_find_desc { @@ -640,7 +658,7 @@ nfs_inode_is_stale(struct inode *inode, /* Has the filehandle changed? If so is the old one stale? */ if (memcmp(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)) != 0 && - __nfs_revalidate_inode(NFS_SERVER(inode),inode) == -ESTALE) + __nfs_revalidate_inode(inode) == -ESTALE) return 1; return 0; @@ -708,16 +726,9 @@ nfs_notify_change(struct dentry *dentry, struct nfs_fattr fattr; int error; - /* - * Make sure the inode is up-to-date. - */ - error = nfs_revalidate(dentry); - if (error) { -#ifdef NFS_PARANOIA -printk("nfs_notify_change: revalidate failed, error=%d\n", error); -#endif + error = nfs_revalidate_inode(dentry); + if (error) goto out; - } if (!S_ISREG(inode->i_mode)) attr->ia_valid &= ~ATTR_SIZE; @@ -741,16 +752,11 @@ printk("nfs_notify_change: revalidate fa } /* - * If we changed the size or mtime, update the inode - * now to avoid invalidating the page cache. + * XXX: this is not quite right. for NFSv3, the server could + * return attributes that indicate other changes to the file + * that necessitate invalidating our page cache. */ - if (!(fattr.valid & NFS_ATTR_WCC)) { - fattr.pre_size = NFS_CACHE_ISIZE(inode); - fattr.pre_mtime = NFS_CACHE_MTIME(inode); - fattr.pre_ctime = NFS_CACHE_CTIME(inode); - fattr.valid |= NFS_ATTR_WCC; - } - error = nfs_refresh_inode(inode, &fattr); + error = __nfs_refresh_inode(inode, &fattr, NFS_DONT_INVALIDATE); out: return error; } @@ -773,13 +779,23 @@ nfs_wait_on_inode(struct inode *inode, i } /* - * Externally visible revalidation function + * Soft revalidation -- if the attributes were checked recently, + * pretend they are up to date. */ int nfs_revalidate(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - return nfs_revalidate_inode(NFS_SERVER(inode), inode); + int error = 0; + + if (time_before(jiffies, (NFS_READTIME(inode) + NFS_ATTRTIMEO(inode)))) + error = NFS_STALE(inode) ? -ESTALE : 0; + else { + lock_kernel(); + error = __nfs_revalidate_inode(inode); + unlock_kernel(); + } + return error; } /* @@ -820,26 +836,21 @@ int nfs_release(struct inode *inode, str * the cached attributes have to be refreshed. */ int -__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) +__nfs_revalidate_inode(struct inode *inode) { - int status = 0; + int status = -ESTALE; struct nfs_fattr fattr; dfprintk(PAGECACHE, "NFS: revalidating (%x/%Ld)\n", inode->i_dev, (long long)NFS_FILEID(inode)); - lock_kernel(); - if (!inode || is_bad_inode(inode) || NFS_STALE(inode)) { - unlock_kernel(); - return -ESTALE; - } + if (is_bad_inode(inode) || NFS_STALE(inode)) + goto out_nowait; while (NFS_REVALIDATING(inode)) { status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); - if (status < 0) { - unlock_kernel(); - return status; - } + if (status < 0) + goto out_nowait; if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) { status = NFS_STALE(inode) ? -ESTALE : 0; goto out_nowait; @@ -858,7 +869,7 @@ __nfs_revalidate_inode(struct nfs_server goto out; } - status = nfs_refresh_inode(inode, &fattr); + status = __nfs_refresh_inode(inode, &fattr, NFS_CHECK_INVALIDATE); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%x/%Ld) refresh failed, error=%d\n", inode->i_dev, (long long)NFS_FILEID(inode), status); @@ -869,8 +880,8 @@ __nfs_revalidate_inode(struct nfs_server out: NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; wake_up(&inode->i_wait); - out_nowait: - unlock_kernel(); + +out_nowait: return status; } @@ -885,145 +896,100 @@ out: * our best to flush them, we make them sleep during the attribute refresh. * * A very similar scenario holds for the dir cache. + * + * Also note that servers that don't support sub-second file timestamps + * will cause problems for this logic. Clients can update the same file + * during a single second without changing its size. The file's mtime + * won't change, so no cache invalidation will occur. The same problem + * holds for ctime -- during file creation, a single client can make + * several separate changes to a file's attributes, but the ctime will + * be the same. */ int -nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +__nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr, int check) { - __u64 new_size, new_mtime; - loff_t new_isize; - int invalid = 0; - int error = -EIO; - - if (!inode || !fattr) { - printk(KERN_ERR "nfs_refresh_inode: inode or fattr is NULL\n"); - goto out; - } - if (inode->i_mode == 0) { - printk(KERN_ERR "nfs_refresh_inode: empty inode\n"); - goto out; - } + loff_t new_isize; + int invalid; + int error = -EIO; - if ((fattr->valid & NFS_ATTR_FATTR) == 0) + if (!inode->i_mode) goto out; if (is_bad_inode(inode)) goto out; - dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d info=0x%x)\n", - inode->i_dev, inode->i_ino, - atomic_read(&inode->i_count), fattr->valid); - - if (NFS_FSID(inode) != fattr->fsid || - NFS_FILEID(inode) != fattr->fileid) { - printk(KERN_ERR "nfs_refresh_inode: inode number mismatch\n" - "expected (0x%Lx/0x%Lx), got (0x%Lx/0x%Lx)\n", - (long long)NFS_FSID(inode), (long long)NFS_FILEID(inode), - (long long)fattr->fsid, (long long)fattr->fileid); - goto out; - } + NFS_FILEID(inode) != fattr->fileid) + goto out_mismatch; - /* - * Make sure the inode's type hasn't changed. - */ if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; - new_mtime = fattr->mtime; - new_size = fattr->size; - new_isize = nfs_size_to_loff_t(fattr->size); + dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d info=0x%x)\n", + inode->i_dev, inode->i_ino, + atomic_read(&inode->i_count), fattr->valid); error = 0; - - /* - * Update the read time so we don't revalidate too often. - */ + invalid = 0; NFS_READTIME(inode) = jiffies; - /* - * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. - * NOT inode->i_size!!! - */ - if (NFS_CACHE_ISIZE(inode) != new_size) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: isize change on %x/%ld\n", inode->i_dev, inode->i_ino); -#endif - invalid = 1; - } + if (check == NFS_CHECK_INVALIDATE) { + /* + * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. + * NOT inode->i_size!!! + */ + if (NFS_CACHE_ISIZE(inode) != fattr->size) + invalid = 1; - /* - * Note: we don't check inode->i_mtime since pipes etc. - * can change this value in VFS without requiring a - * cache revalidation. - */ - if (NFS_CACHE_MTIME(inode) != new_mtime) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: mtime change on %x/%ld\n", inode->i_dev, inode->i_ino); -#endif - invalid = 1; - } + /* + * Note: we don't check inode->i_mtime since pipes etc. + * can change this value in VFS without requiring a + * cache revalidation. + */ + if (NFS_CACHE_MTIME(inode) != fattr->mtime) + invalid = 1; - /* Check Weak Cache Consistency data. - * If size and mtime match the pre-operation values, we can - * assume that any attribute changes were caused by our NFS - * operation, so there's no need to invalidate the caches. - */ - if ((fattr->valid & NFS_ATTR_WCC) - && NFS_CACHE_ISIZE(inode) == fattr->pre_size - && NFS_CACHE_MTIME(inode) == fattr->pre_mtime) { - invalid = 0; + /* + * Check Weak Cache Consistency data. + * If size and mtime match the pre-operation values, we can + * assume that any attribute changes were caused by our NFS + * operation, so there's no need to invalidate the caches. + */ + if ((fattr->valid & NFS_ATTR_WCC) + && NFS_CACHE_ISIZE(inode) == fattr->pre_size + && NFS_CACHE_MTIME(inode) == fattr->pre_mtime) + invalid = 0; } /* - * If we have pending writebacks, things can get - * messy. + * If we have pending writebacks, things can get messy. */ - if (nfs_have_writebacks(inode) && new_isize < inode->i_size) + NFS_CACHE_ISIZE(inode) = fattr->size; + new_isize = nfs_size_to_loff_t(fattr->size); + if (nfs_have_writebacks(inode) && new_isize > inode->i_size) new_isize = inode->i_size; - NFS_CACHE_CTIME(inode) = fattr->ctime; - inode->i_ctime = nfs_time_to_secs(fattr->ctime); - /* If we've been messing around with atime, don't + /* + * If we've been messing around with atime, don't * update it. Save the server value in NFS_CACHE_ATIME. */ NFS_CACHE_ATIME(inode) = fattr->atime; if (time_before(inode->i_atime, nfs_time_to_secs(fattr->atime))) inode->i_atime = nfs_time_to_secs(fattr->atime); - NFS_CACHE_MTIME(inode) = new_mtime; - inode->i_mtime = nfs_time_to_secs(new_mtime); - - NFS_CACHE_ISIZE(inode) = new_size; - inode->i_size = new_isize; - - inode->i_mode = fattr->mode; - inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + NFS_CACHE_MTIME(inode) = fattr->mtime; + inode->i_mtime = nfs_time_to_secs(fattr->mtime); - if (fattr->valid & NFS_ATTR_FATTR_V3) { - /* - * report the blocks in 512byte units - */ - inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - inode->i_blksize = inode->i_sb->s_blocksize; - } else { - inode->i_blocks = fattr->du.nfs2.blocks; - inode->i_blksize = fattr->du.nfs2.blocksize; - } - inode->i_rdev = 0; - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - inode->i_rdev = to_kdev_t(fattr->rdev); - - /* Update attrtimeo value */ - if (!invalid && time_after(jiffies, NFS_ATTRTIMEO_UPDATE(inode)+NFS_ATTRTIMEO(inode))) { - if ((NFS_ATTRTIMEO(inode) <<= 1) > NFS_MAXATTRTIMEO(inode)) - NFS_ATTRTIMEO(inode) = NFS_MAXATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; - } + NFS_CACHE_CTIME(inode) = fattr->ctime; + inode->i_ctime = nfs_time_to_secs(fattr->ctime); + nfs_fill_in_attributes(inode, fattr); - if (invalid) + if (!invalid) + nfs_lengthen_attr_timeout(inode); + else { nfs_zap_caches(inode); + nfs_reset_attr_timeout(inode); + } out: return error; @@ -1042,6 +1008,15 @@ out_changed: * (But we fall through to invalidate the caches.) */ nfs_invalidate_inode(inode); + goto out; + +out_mismatch: +#ifdef NFS_PARANOIA + printk(KERN_ERR "nfs_refresh_inode: inode number mismatch\n" + "expected (0x%Lx/0x%Lx), got (0x%Lx/0x%Lx)\n", + (long long)NFS_FSID(inode), (long long)NFS_FILEID(inode), + (long long)fattr->fsid, (long long)fattr->fileid); +#endif goto out; } diff -dpruN Linux-2.4.4/fs/nfs/write.c linux/fs/nfs/write.c --- Linux-2.4.4/fs/nfs/write.c Tue May 1 14:33:01 2001 +++ linux/fs/nfs/write.c Tue May 1 15:16:54 2001 @@ -147,20 +147,21 @@ static void nfs_writedata_release(struct } /* - * This function will be used to simulate weak cache consistency - * under NFSv2 when the NFSv3 attribute patch is included. - * For the moment, we just call nfs_refresh_inode(). + * An error may result in no WCC stats, or the server may not support + * WCC stats at all (eg, an NFSv2 server). Be certain not to + * invalidate the page cache when handling such replies. */ -static __inline__ int +static __inline__ void nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr) { - if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) { - fattr->pre_size = NFS_CACHE_ISIZE(inode); - fattr->pre_mtime = NFS_CACHE_MTIME(inode); - fattr->pre_ctime = NFS_CACHE_CTIME(inode); - fattr->valid |= NFS_ATTR_WCC; - } - return nfs_refresh_inode(inode, fattr); + if (fattr->valid & NFS_ATTR_FATTR) { + int check = NFS_DONT_INVALIDATE; + + if (fattr->valid & NFS_ATTR_WCC) + check = NFS_CHECK_INVALIDATE; + + __nfs_refresh_inode(inode, fattr, check); + } } /* diff -dpruN Linux-2.4.4/include/linux/nfs_fs.h linux/include/linux/nfs_fs.h --- Linux-2.4.4/include/linux/nfs_fs.h Tue May 1 14:33:09 2001 +++ linux/include/linux/nfs_fs.h Tue May 1 15:39:03 2001 @@ -142,14 +142,61 @@ extern int nfs_inode_is_stale(struct ino struct nfs_fattr *); extern struct inode *nfs_fhget(struct dentry *, struct nfs_fh *, struct nfs_fattr *); -extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); +extern int __nfs_refresh_inode(struct inode *, struct nfs_fattr *, int); extern int nfs_revalidate(struct dentry *); extern int nfs_permission(struct inode *, int); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); -extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); +extern int __nfs_revalidate_inode(struct inode *); extern int nfs_notify_change(struct dentry *, struct iattr *); +#define NFS_DONT_INVALIDATE (0) +#define NFS_CHECK_INVALIDATE (1) + +static __inline__ void +nfs_lengthen_attr_timeout(struct inode *inode) +{ + if (time_after(jiffies, + NFS_ATTRTIMEO_UPDATE(inode) + NFS_ATTRTIMEO(inode))) { + + unsigned int max_timeout = NFS_MAXATTRTIMEO(inode); + + if ((NFS_ATTRTIMEO(inode) <<= 1) > max_timeout) + NFS_ATTRTIMEO(inode) = max_timeout; + + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + } +} + +static __inline__ void +nfs_reset_attr_timeout(struct inode *inode) +{ + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; +} + +/* + * This stub checks to see if there's any work to do when updating + * cached attributes. Common case: no changes. + */ +static __inline__ int +nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + if (fattr->valid & NFS_ATTR_FATTR) { + if ((NFS_CACHE_ISIZE(inode) == fattr->size) && + (NFS_CACHE_MTIME(inode) == fattr->mtime) && + (NFS_CACHE_CTIME(inode) == fattr->ctime)) { + + NFS_READTIME(inode) = jiffies; + nfs_lengthen_attr_timeout(inode); + return 0; + } else + return __nfs_refresh_inode(inode, fattr, + NFS_CHECK_INVALIDATE); + } else + return -EIO; +} + /* * linux/fs/nfs/file.c */ @@ -265,12 +312,22 @@ extern int nfs3_mount(struct sockaddr_i /* * inline functions */ -static inline int -nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) + +/* + * NFSv2 needs to explicitly revalidate inodes periodically. + * NFSv3 will get attribute info back on almost every OTW operation, + * so explicitly asking for attribute info isn't needed. + */ +static __inline__ int +nfs_revalidate_inode(struct dentry *dentry) { - if (time_before(jiffies, NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) - return NFS_STALE(inode) ? -ESTALE : 0; - return __nfs_revalidate_inode(server, inode); +#ifdef CONFIG_NFS_V3 + if (NFS_PROTO(dentry->d_inode)->version == 2) + return nfs_revalidate(dentry); + return 0; +#else + return nfs_revalidate(dentry); +#endif } static inline loff_t