Redefines read/write interface for pNFS I/O. NFSv3 calls proceed as normal, but NFSv4 read/write calls (for which a layout driver exists) will request the layout from the pNFS server and use the layout driver for I/O. --- linux-2.6.14-pnfs-current-dhildebz/fs/nfs/file.c | 10 linux-2.6.14-pnfs-current-dhildebz/fs/nfs/inode.c | 2 linux-2.6.14-pnfs-current-dhildebz/fs/nfs/pnfs.c | 142 ++++++++++++++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/pnfs.h | 2 linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs_fs.h | 2 5 files changed, 156 insertions(+), 2 deletions(-) diff -puN fs/nfs/file.c~client-rw fs/nfs/file.c --- linux-2.6.14-pnfs-current/fs/nfs/file.c~client-rw 2006-01-12 13:19:11.839980000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/file.c 2006-01-12 13:19:11.885980000 -0500 @@ -32,6 +32,7 @@ #include #include "delegation.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -50,8 +51,13 @@ static int nfs_flock(struct file *filp, struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, +#ifdef CONFIG_NFS_V4 + .read = pnfs_file_read, + .write = pnfs_file_write, +#else + .read = do_sync_read, + .write = do_sync_write, +#endif .aio_read = nfs_file_read, .aio_write = nfs_file_write, .mmap = nfs_file_mmap, diff -puN fs/nfs/pnfs.c~client-rw fs/nfs/pnfs.c --- linux-2.6.14-pnfs-current/fs/nfs/pnfs.c~client-rw 2006-01-12 13:19:11.847980000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/pnfs.c 2006-01-12 13:19:11.901980000 -0500 @@ -344,5 +344,147 @@ out: return result; } +/* + * Call the appropriate parallel I/O subsystem read function. + * If no I/O device driver exists, or one does match the returned + * fstype, then call regular NFS processing. + */ +ssize_t +pnfs_file_read(struct file* filp, char __user *buf, size_t count, loff_t* pos) +{ + struct dentry * dentry = filp->f_dentry; + struct inode* inode = dentry->d_inode; + ssize_t result = count; + /* struct timeval begin = { 0, 0 }, end = { 0, 0 }; */ + struct nfs_inode* nfsi = NFS_I(inode); + struct nfs_server* nfss = NFS_SERVER(inode); + + /* + do_gettimeofday(&begin); +*/ + + /* pNFS is only for v4 */ + if (nfss->rpc_ops->version != 4) + { + return do_sync_read(filp, buf, count, pos); + } + + if (!nfss->pnfs_curr_ld) + { + dprintk("%s:No pNFS module defined for fstype %d. Using standard NFS read.\n",__FUNCTION__,nfss->pnfs_curr_ld->id); + return do_sync_read(filp, buf, count, pos); + } + + dprintk("%s:fstype:%d\n",__FUNCTION__, nfss->pnfs_curr_ld->id); + dfprintk(IO, "%s:(%s/%s, %lu@%lu)\n",__FUNCTION__, + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long) *pos); + + /* Step 1: Retrieve and set layout if not allready cached*/ + if ((result = virtual_update_layout(inode, + (struct nfs_open_context *)filp->private_data, + count, + *pos, + FMODE_READ))) + { + return result; + } + + /* Step 2: Call I/O device driver's read function */ + if (nfss->pnfs_curr_ld->ld_io_ops && nfss->pnfs_curr_ld->ld_io_ops->read) + result = nfss->pnfs_curr_ld->ld_io_ops->read(nfsi->current_layout, filp, buf, count, pos); + else + result = -EIO; + +/* + do_gettimeofday(&end); + if (end.tv_usec < begin.tv_usec) { + end.tv_usec = 1000000; end.tv_sec--; + } + end.tv_sec -= begin.tv_sec; + end.tv_usec -= begin.tv_usec; + printk("Sec:%ld usec: %ld\n",end.tv_sec,end.tv_usec); +*/ + dprintk("%s end (err:%Zd)\n",__FUNCTION__,result); + return result; +} + +/* + * Call the appropriate parallel I/O subsystem write function. + * If no I/O device driver exists, or one does match the returned + * fstype, then call regular NFS processing. + */ +ssize_t +pnfs_file_write(struct file* filp, const char __user *buf, size_t count, loff_t* pos) +{ + struct dentry * dentry = filp->f_dentry; + struct inode* inode = dentry->d_inode; + ssize_t result = count; + const int isblk = S_ISBLK(inode->i_mode); + /* struct timeval begin = { 0, 0 }, end = { 0, 0 }; */ + struct nfs_server* nfss = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); + +/* + do_gettimeofday(&begin); +*/ + + /* pNFS is only for v4 */ + if (nfss->rpc_ops->version != 4) + { + nfsi->nfs_dirty = 1; + return do_sync_write(filp,buf,count,pos); + } + + if (!nfss->pnfs_curr_ld) + { + dprintk("%s:No pNFS module defined for fstype %d. Using standard NFS write.\n",__FUNCTION__,nfss->pnfs_curr_ld->id); + nfsi->nfs_dirty = 1; + return do_sync_write(filp,buf,count,pos); + } + + dprintk("%s:fstype:%d\n",__FUNCTION__, nfss->pnfs_curr_ld->id); + dfprintk(IO, "%s:(%s/%s(%ld), %lu@%lu)\n",__FUNCTION__, + dentry->d_parent->d_name.name, dentry->d_name.name, + inode->i_ino, (unsigned long) count, (unsigned long) *pos); + + /* Need to adjust write param if this is an append, etc */ + generic_write_checks(filp,pos,&count,isblk); + + dprintk("%s:Readjusted %lu@%lu)\n",__FUNCTION__, + (unsigned long) count, (unsigned long) *pos); + + /* Step 1: Retrieve and set layout if not allready cached*/ + if ((result = virtual_update_layout(inode, + (struct nfs_open_context *)filp->private_data, + count, + *pos, + FMODE_WRITE))) + return result; + + /* Step 2: Call I/O device driver's write function */ + if (nfss->pnfs_curr_ld->ld_io_ops && nfss->pnfs_curr_ld->ld_io_ops->write) + { + result = nfss->pnfs_curr_ld->ld_io_ops->write(nfsi->current_layout, filp, buf, count, pos); + /* Step 3: Mark the inode as dirty, requiring a fsync via the layout driver */ + if (result) + nfsi->pnfs_dirty = 1; + } + else + result = -EIO; + +/* + do_gettimeofday(&end); + if (end.tv_usec < begin.tv_usec) { + end.tv_usec = 1000000; end.tv_sec--; + } + end.tv_sec -= begin.tv_sec; + end.tv_usec -= begin.tv_usec; + printk("Sec:%ld usec: %ld\n",end.tv_sec,end.tv_usec); +*/ + dprintk("%s end (err:%Zd)\n",__FUNCTION__,result); + return result; +} + EXPORT_SYMBOL(pnfs_unregister_layoutdriver); EXPORT_SYMBOL(pnfs_register_layoutdriver); diff -puN include/linux/nfs_fs.h~client-rw include/linux/nfs_fs.h --- linux-2.6.14-pnfs-current/include/linux/nfs_fs.h~client-rw 2006-01-12 13:19:11.856980000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs_fs.h 2006-01-12 13:19:11.907980000 -0500 @@ -190,6 +190,8 @@ struct nfs_inode { /* pNFS layout information */ struct pnfs_layout_type* current_layout; + unsigned int pnfs_dirty; /* pNFS dirty flag for determining fsync patch*/ + unsigned int nfs_dirty; /* NFS dirty flag for determining fsync path*/ #endif /* CONFIG_NFS_V4*/ struct inode vfs_inode; }; diff -puN fs/nfs/inode.c~client-rw fs/nfs/inode.c --- linux-2.6.14-pnfs-current/fs/nfs/inode.c~client-rw 2006-01-12 13:19:11.868980000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/inode.c 2006-01-12 13:19:11.918980000 -0500 @@ -2058,6 +2058,8 @@ static struct file_system_type nfs4_fs_t nfsi->delegation = NULL; \ nfsi->delegation_state = 0; \ init_rwsem(&nfsi->rwsem); \ + nfsi->pnfs_dirty = 0; \ + nfsi->nfs_dirty = 0; \ } while(0) #define register_nfs4fs() register_filesystem(&nfs4_fs_type) #define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) diff -puN fs/nfs/pnfs.h~client-rw fs/nfs/pnfs.h --- linux-2.6.14-pnfs-current/fs/nfs/pnfs.h~client-rw 2006-01-12 13:19:11.875980000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/pnfs.h 2006-01-12 13:19:11.932980000 -0500 @@ -14,5 +14,7 @@ void set_pnfs_layoutdriver(struct super_block *sb, u32 id); void unmount_pnfs_layoutdriver(struct super_block *sb); +ssize_t pnfs_file_write(struct file* filp, const char __user *buf, size_t count, loff_t* pos); +ssize_t pnfs_file_read(struct file* filp, char __user *buf, size_t count, loff_t* pos); #endif /* FS_NFS_PNFS_H */ _