Create XDR and surrounding functions on the client for the LAYOUTCOMMIT command. The layout driver commit operation and layoutcommit are called in nfs_commit_list, replacing the standard NFS commit processing. The extent, offset, and lastbytewritten arguments for layoutcommit are tracked in the read/write calls and then reset on layoutcommit completion. --- linux-2.6.14-pnfs-current-dhildebz/fs/nfs/nfs4proc.c | 78 +++++++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/nfs4xdr.c | 100 +++++++++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/pnfs.c | 30 ++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/write.c | 126 +++++++++--- linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs4.h | 1 linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs_fs.h | 5 linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs_xdr.h | 4 linux-2.6.14-pnfs-current-dhildebz/include/linux/pnfs_xdr.h | 21 ++ 8 files changed, 338 insertions(+), 27 deletions(-) diff -puN fs/nfs/nfs4proc.c~client-layoutcommit fs/nfs/nfs4proc.c --- linux-2.6.14-pnfs-current/fs/nfs/nfs4proc.c~client-layoutcommit 2006-01-13 17:30:00.124499000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/nfs4proc.c 2006-01-13 17:30:00.206418000 -0500 @@ -752,6 +752,9 @@ static int _nfs4_do_open(struct inode *d /* Initialize extents */ NFS_I(inode)->current_layout = NULL; + NFS_I(inode)->pnfs_write_begin_pos = 0; + NFS_I(inode)->pnfs_write_end_pos = 0; + nfs_free_seqid(o_arg.seqid); nfs4_put_state_owner(sp); @@ -1540,6 +1543,46 @@ static int nfs4_proc_write(struct nfs_wr return err; } +static int _pnfs_proc_layoutcommit(struct nfs_write_data *cdata) +{ + struct inode *inode = cdata->inode; + struct nfs_fattr *fattr = cdata->lc_res.fattr; + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTCOMMIT], + .rpc_argp = &cdata->lc_args, + .rpc_resp = &cdata->lc_res, /* layoutcommit specific results */ + .rpc_cred = cdata->cred, + }; + int status; + + dprintk("NFS call layoutcommit %lld @ %lld\n", cdata->lc_args.length, cdata->lc_args.offset); + + cdata->res.fattr = cdata->lc_res.fattr; /* req by nfs4_commit_done */ + nfs_fattr_init(fattr); + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply layoutcommit: %d\n", status); + return status; +} + +/* DH: Performs sync layoutcommit. As a note, O_DIRECT will + * require this, but I currently haven't modified O_DIRECT for pNFS, + * and I'm not sure if I ever will as bypassing the pagecache might + * be the fastest way to do this. (well, I'm sure we will + * eventually...) + */ +static int pnfs_proc_layoutcommit(struct nfs_write_data *cdata) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(cdata->inode), + _pnfs_proc_layoutcommit(cdata), + &exception); + } while (exception.retry); + return err; +} + static int _nfs4_proc_commit(struct nfs_write_data *cdata) { struct inode *inode = cdata->inode; @@ -2241,6 +2284,39 @@ nfs4_commit_done(struct rpc_task *task) nfs_commit_done(task); } +/* DH: Commit the data layout to the server. + * A caller must actually have a layout to call this function. + */ +static void +nfs4_proc_pnfs_layoutcommit_setup(struct nfs_write_data* data, int how) +{ + struct rpc_task *task = &data->task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTCOMMIT], + .rpc_argp = &data->lc_args, + .rpc_resp = &data->lc_res, + .rpc_cred = data->cred, + }; + struct inode *inode = data->inode; + struct nfs_server *server = NFS_SERVER(inode); + int flags; + + data->lc_args.bitmask = server->attr_bitmask; + + /* Need to set both as down stream commit functions look + * at the res.server value + */ + data->lc_res.server = server; + data->res.server = server; + + /* Set the initial flags for the task. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + + /* Finalize the task. */ + rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags); + rpc_call_setup(task, &msg, 0); +} + static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how) { @@ -3278,6 +3354,8 @@ struct nfs_rpc_ops nfs_v4_clientops = { .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, .pnfs_layoutget = nfs4_proc_pnfs_layoutget, + .pnfs_layoutcommit_setup = nfs4_proc_pnfs_layoutcommit_setup, + .pnfs_layoutcommit = pnfs_proc_layoutcommit, }; /* diff -puN fs/nfs/nfs4xdr.c~client-layoutcommit fs/nfs/nfs4xdr.c --- linux-2.6.14-pnfs-current/fs/nfs/nfs4xdr.c~client-layoutcommit 2006-01-13 17:30:00.139484000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/nfs4xdr.c 2006-01-13 17:36:37.992125000 -0500 @@ -419,6 +419,17 @@ static int nfs_stat_to_errno(int); encode_pnfs_layoutget_sz) #define NFS4_dec_pnfs_layoutget_sz (compound_decode_hdr_maxsz + \ decode_pnfs_layoutget_maxsz) +#define encode_pnfs_layoutcommit_sz (11 + PNFS_LAYOUT_MAXSIZE + \ + op_encode_hdr_maxsz) +#define decode_pnfs_layoutcommit_maxsz (2 + op_decode_hdr_maxsz) +#define NFS4_enc_pnfs_layoutcommit_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_pnfs_layoutcommit_sz + \ + encode_getattr_maxsz) +#define NFS4_dec_pnfs_layoutcommit_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_pnfs_layoutcommit_maxsz + \ + decode_getattr_maxsz) static struct { unsigned int mode; @@ -4329,6 +4340,94 @@ out: } +/* DH: Encode request for commiting the layout information for pNFS. +*/ +static int encode_pnfs_layoutcommit(struct xdr_stream *xdr, const struct pnfs_layoutcommit_arg *args) +{ + uint32_t *p; + + if (args->new_layout_size > PNFS_LAYOUT_MAXSIZE) + return -EINVAL; + RESERVE_SPACE(36); + WRITE32(OP_LAYOUTCOMMIT); + WRITE64(args->clientid); + WRITE64(args->offset); + WRITE64(args->length); + WRITE64(args->lastbytewritten); /* last byte written */ + WRITE32(args->time_modify); + WRITE32(args->time_access); + + RESERVE_SPACE(4 + args->new_layout_size); + WRITE32(args->new_layout_size); + if (args->new_layout_size > 0) + WRITEMEM(args->new_layout, args->new_layout_size); + return 0; +} + +/* DH: Receive a new stateid after commiting the layout +*/ +static int decode_pnfs_layoutcommit(struct xdr_stream *xdr, struct rpc_rqst *req, struct pnfs_layoutcommit_res *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT); + if (status) + return status; + + READ_BUF(8); + READ64(res->newsize); /* newsize */ + return 0; +} + +/* + * DH: Encode LAYOUTCOMMIT request + */ +static int nfs4_xdr_enc_pnfs_layoutcommit(struct rpc_rqst *req, uint32_t *p, struct pnfs_layoutcommit_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_pnfs_layoutcommit(&xdr, args); + if (status) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: + return status; +} + +/* + * DH: Decode LAYOUTCOMMIT response + */ +static int nfs4_xdr_dec_pnfs_layoutcommit(struct rpc_rqst *rqstp, uint32_t *p, struct pnfs_layoutcommit_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_pnfs_layoutcommit(&xdr,rqstp,res); + if (status) + goto out; + decode_getfattr(&xdr, res->fattr, res->server); +out: + return status; +} + uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { uint32_t bitmap[2] = {0}; @@ -4499,6 +4598,7 @@ struct rpc_procinfo nfs4_procedures[] = PROC(GETACL, enc_getacl, dec_getacl), PROC(SETACL, enc_setacl, dec_setacl), PROC(PNFS_LAYOUTGET, enc_pnfs_layoutget, dec_pnfs_layoutget), + PROC(PNFS_LAYOUTCOMMIT, enc_pnfs_layoutcommit, dec_pnfs_layoutcommit), }; struct rpc_version nfs_version4 = { diff -puN fs/nfs/pnfs.c~client-layoutcommit fs/nfs/pnfs.c --- linux-2.6.14-pnfs-current/fs/nfs/pnfs.c~client-layoutcommit 2006-01-13 17:30:00.146477000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/pnfs.c 2006-01-13 17:30:00.229394000 -0500 @@ -455,6 +455,7 @@ pnfs_writepages(struct nfs_write_data* w int numpages, status = -EIO, pgcount=0, temp; struct nfs_server* nfss = NFS_SERVER(inode); struct nfs_inode* nfsi = NFS_I(inode); + loff_t end_pos, orig_offset = args->offset; dprintk("%s: Writing ino:%lu %u@%llu\n", __FUNCTION__, inode->i_ino, args->count, args->offset); @@ -496,7 +497,14 @@ pnfs_writepages(struct nfs_write_data* w (void*)wdata); /* Step 3: Mark the inode as dirty, requiring a fsync via the layout driver */ if (status) + { nfsi->pnfs_dirty = 1; + if (orig_offset < nfsi->pnfs_write_begin_pos) + nfsi->pnfs_write_begin_pos = orig_offset; + end_pos = orig_offset + status - 1; /* I'm being inclusive */ + if (end_pos > nfsi->pnfs_write_end_pos) + nfsi->pnfs_write_end_pos = end_pos; + } } else { @@ -735,5 +743,27 @@ pnfs_file_write(struct file* filp, const return result; } +int +pnfs_commit(struct inode* inode, struct list_head *head, int sync, struct nfs_write_data *data) +{ + int result = 0; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_server* nfss = NFS_SERVER(inode); + dprintk("%s: Begin\n",__FUNCTION__); + + if (nfss->pnfs_curr_ld->ld_io_ops->commit) + { + dfprintk(IO, "%s: Calling layout driver commit\n",__FUNCTION__); + result = nfss->pnfs_curr_ld->ld_io_ops->commit(nfsi->current_layout, inode, head, sync, (void*)data); + /* Reset dirty flag to avoid unecessary syncs + * TODO: Only want to reset the dirty flag if the commits succeed. + */ + nfsi->pnfs_dirty = 0; + } + + dprintk("%s end (err:%Zd)\n",__FUNCTION__,result); + return result; +} + EXPORT_SYMBOL(pnfs_unregister_layoutdriver); EXPORT_SYMBOL(pnfs_register_layoutdriver); diff -puN fs/nfs/write.c~client-layoutcommit fs/nfs/write.c --- linux-2.6.14-pnfs-current/fs/nfs/write.c~client-layoutcommit 2006-01-13 17:30:00.154470000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/write.c 2006-01-13 17:30:00.239384000 -0500 @@ -64,6 +64,7 @@ #include "delegation.h" #include "pnfs.h" +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -83,6 +84,7 @@ static int nfs_wait_on_write_congestion( static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how); +extern int pnfs_commit(struct inode* inode, struct list_head *head, int sync, struct nfs_write_data *data); static kmem_cache_t *nfs_wdata_cachep; mempool_t *nfs_wdata_mempool; @@ -1237,22 +1239,9 @@ static void nfs_commit_release(struct rp /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_commit_rpcsetup(struct list_head *head, - struct nfs_write_data *data, int how) +static void nfs_commit_rpcsetup(struct nfs_write_data *data, + int how) { - struct nfs_page *first; - struct inode *inode; - - /* Set up the RPC argument and reply structs - * NB: take care not to mess about with data->commit et al. */ - - list_splice_init(head, &data->pages); - first = nfs_list_entry(data->pages.next); - inode = first->wb_context->dentry->d_inode; - - data->inode = inode; - data->cred = first->wb_context->cred; - data->args.fh = NFS_FH(data->inode); /* Note: we always request a commit of the entire inode */ data->args.offset = 0; @@ -1262,10 +1251,10 @@ static void nfs_commit_rpcsetup(struct l data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); - NFS_PROTO(inode)->commit_setup(data, how); + NFS_PROTO(data->inode)->commit_setup(data, how); data->task.tk_priority = flush_task_priority(how); - data->task.tk_cookie = (unsigned long)inode; + data->task.tk_cookie = (unsigned long)data->inode; data->task.tk_calldata = data; /* Release requests */ data->task.tk_release = nfs_commit_release; @@ -1274,23 +1263,89 @@ static void nfs_commit_rpcsetup(struct l } /* + * Set up the argument/result storage required for the RPC call. + */ +static void pnfs_layoutcommit_rpcsetup(struct nfs_write_data *data, + int how) +{ + struct nfs_inode *nfsi = NFS_I(data->inode); + struct nfs4_client *clp = NFS_SERVER(data->inode)->nfs4_state; + + data->lc_args.fh = NFS_FH(data->inode); + data->lc_args.clientid = clp->cl_clientid; + + /* Initialize new layout size. Should be reset by layout driver + * if neede */ + data->lc_args.new_layout_size = 0; + + /* DH: Need to keep track of the offset and length in the inode + * so I can just state the values + * here and then reset them after I finish commiting the layout. + */ + data->lc_args.offset = nfsi->pnfs_write_begin_pos; + data->lc_args.length = nfsi->pnfs_write_end_pos - nfsi->pnfs_write_begin_pos + 1; + data->lc_args.lastbytewritten = nfsi->pnfs_write_end_pos; + + /* Need to set both for nfs4_commit_done + */ + data->lc_res.fattr = &data->fattr; + data->res.fattr = &data->fattr; + nfs_fattr_init(&data->fattr); + + NFS_PROTO(data->inode)->pnfs_layoutcommit_setup(data, how); + + data->task.tk_priority = flush_task_priority(how); + data->task.tk_cookie = (unsigned long)data->inode; + data->task.tk_calldata = data; + /* Release requests */ + data->task.tk_release = nfs_commit_release; + + dprintk("NFS: %4d initiated layoutcommit call. bpos: %llu epos: %llu\n", data->task.tk_pid, nfsi->pnfs_write_begin_pos, nfsi->pnfs_write_end_pos); +} + +/* * Commit dirty pages */ static int nfs_commit_list(struct list_head *head, int how) { - struct nfs_write_data *data; - struct nfs_page *req; + struct nfs_write_data *data = NULL; + struct nfs_page *req; + struct nfs_page *first; + struct inode *ino; + int error; - data = nfs_commit_alloc(); + printk("%s Begin\n", __FUNCTION__); - if (!data) - goto out_bad; - - /* Set up the argument struct */ - nfs_commit_rpcsetup(head, data, how); + data = nfs_commit_alloc(); + if (!data) + goto out_bad; + + list_splice_init(head, &data->pages); + first = nfs_list_entry(data->pages.next); + ino = first->wb_context->dentry->d_inode; + + data->inode = ino; + data->cred = first->wb_context->cred; + + if (use_pnfs_io(ino, 0)) + { + /* DH-TODO: Is this all async or do I need to monitor return values? */ + if ((error = pnfs_commit(ino, head, how, data)) != 0) + { + printk("pNFS Commit to data servers failed!! (err:%d)\n", error); + goto out_bad; + } + pnfs_layoutcommit_rpcsetup(data, how); + nfs_execute_write(data); + } + else + { + /* Set up the argument struct */ + nfs_commit_rpcsetup(data, how); + nfs_execute_write(data); + } - nfs_execute_write(data); return 0; out_bad: while (!list_empty(head)) { @@ -1304,6 +1359,8 @@ nfs_commit_list(struct list_head *head, /* * COMMIT call returned + * DH: LAYOUTCOMMIT call return as well. Layout drivers should not clean up the pages, + * but let this function do it after the layoutcommit. */ void nfs_commit_done(struct rpc_task *task) @@ -1319,7 +1376,7 @@ nfs_commit_done(struct rpc_task *task) req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - dprintk("NFS: commit (%s/%Ld %d@%Ld)", + dprintk("NFS: commit (%s/%Ld %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, @@ -1331,6 +1388,14 @@ nfs_commit_done(struct rpc_task *task) goto next; } + /* Layoutcommit doesn't have a verifier to check, so skip this part */ + if (use_pnfs_io(data->inode, 0)) + { + nfs_inode_remove_request(req); + dprintk(" OK\n"); + goto next; + } + /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { @@ -1346,6 +1411,13 @@ nfs_commit_done(struct rpc_task *task) nfs_clear_page_writeback(req); res++; } + /* DH: Re-set write extent and pos for layoutcommit. + * TODO: should I reset them even if the layoutcommit fails? + */ + if (task->tk_status >= 0) { + NFS_I(data->inode)->pnfs_write_begin_pos = 0; + NFS_I(data->inode)->pnfs_write_end_pos = 0; + } sub_page_state(nr_unstable,res); } #endif diff -puN include/linux/nfs4.h~client-layoutcommit include/linux/nfs4.h --- linux-2.6.14-pnfs-current/include/linux/nfs4.h~client-layoutcommit 2006-01-13 17:30:00.158465000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs4.h 2006-01-13 17:30:00.244379000 -0500 @@ -391,6 +391,7 @@ enum { NFSPROC4_CLNT_GETACL, NFSPROC4_CLNT_SETACL, NFSPROC4_CLNT_PNFS_LAYOUTGET, + NFSPROC4_CLNT_PNFS_LAYOUTCOMMIT, }; #endif diff -puN include/linux/nfs_fs.h~client-layoutcommit include/linux/nfs_fs.h --- linux-2.6.14-pnfs-current/include/linux/nfs_fs.h~client-layoutcommit 2006-01-13 17:30:00.165458000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs_fs.h 2006-01-13 17:30:00.250373000 -0500 @@ -192,6 +192,11 @@ struct nfs_inode { struct pnfs_layout_type* current_layout; unsigned int pnfs_dirty; /* pNFS dirty flag for determining fsync patch*/ unsigned int nfs_dirty; /* NFS dirty flag for determining fsync path*/ +/* DH: These vars keep track of the maximum write range + * so the values can be used for layoutcommit. + */ + loff_t pnfs_write_begin_pos; + loff_t pnfs_write_end_pos; #endif /* CONFIG_NFS_V4*/ struct inode vfs_inode; }; diff -puN include/linux/nfs_xdr.h~client-layoutcommit include/linux/nfs_xdr.h --- linux-2.6.14-pnfs-current/include/linux/nfs_xdr.h~client-layoutcommit 2006-01-13 17:30:00.184439000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs_xdr.h 2006-01-13 17:30:00.257366000 -0500 @@ -709,6 +709,8 @@ struct nfs_write_data { struct page *pagevec[NFS_WRITE_MAXIOV]; struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ + struct pnfs_layoutcommit_arg lc_args; + struct pnfs_layoutcommit_res lc_res; #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif @@ -774,6 +776,8 @@ struct nfs_rpc_ops { int (*lock)(struct file *, int, struct file_lock *); void (*clear_acl_cache)(struct inode *); int (*pnfs_layoutget)(struct nfs4_pnfs_layoutget* layout); + void (*pnfs_layoutcommit_setup)(struct nfs_write_data *, int how); + int (*pnfs_layoutcommit) (struct nfs_write_data *); }; /* diff -puN include/linux/pnfs_xdr.h~client-layoutcommit include/linux/pnfs_xdr.h --- linux-2.6.14-pnfs-current/include/linux/pnfs_xdr.h~client-layoutcommit 2006-01-13 17:30:00.189434000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/include/linux/pnfs_xdr.h 2006-01-13 17:30:00.261362000 -0500 @@ -46,4 +46,25 @@ struct nfs4_pnfs_layoutget { struct nfs4_pnfs_layoutget_arg* args; struct nfs4_pnfs_layoutget_res* res; }; + +struct pnfs_layoutcommit_arg { + __u64 clientid; + __u64 offset; + __u64 length; + __u64 lastbytewritten; + __u32 time_modify; + __u32 time_access; + const u32 *bitmask; + struct nfs_fh *fh; + + __u32 new_layout_size; + void* new_layout; +}; + +struct pnfs_layoutcommit_res { + __u64 newsize; + struct nfs_fattr *fattr; + const struct nfs_server *server; +}; + #endif /* LINUX_PNFS_XDR_H */ _