All I/O in pNFS is handled by a storage or I/O protocol specific module. This module is referred to as the layout module or layout driver. This patch enables a client to request the new file system attribute to determine which storage protocol the mounted file system is utilizing. A layout module/driver implements the storage protocol. The layout module registers itself with the pNFS client when it is loaded. If the pNFS client can find a layout driver with the same type as the one returned from the mounted file system, then the pNFS client can execute direct I/O using the layout driver. --- linux-2.6.14-pnfs-current-dhildebz/fs/nfs/Makefile | 3 linux-2.6.14-pnfs-current-dhildebz/fs/nfs/inode.c | 16 linux-2.6.14-pnfs-current-dhildebz/fs/nfs/nfs4proc.c | 2 linux-2.6.14-pnfs-current-dhildebz/fs/nfs/nfs4xdr.c | 39 ++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/pnfs.c | 193 +++++++++++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/pnfs.h | 18 + linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs4_pnfs.h | 9 linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs_fs_sb.h | 2 linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs_xdr.h | 1 9 files changed, 281 insertions(+), 2 deletions(-) diff -puN fs/nfs/inode.c~client-ld fs/nfs/inode.c --- linux-2.6.14-pnfs-current/fs/nfs/inode.c~client-ld 2006-01-11 14:21:45.964318000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/inode.c 2006-01-12 13:11:12.769009000 -0500 @@ -41,6 +41,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_PARANOIA 1 @@ -79,6 +80,7 @@ static struct super_operations nfs_sops .show_options = nfs_show_options, }; + /* * RPC cruft for NFS */ @@ -349,6 +351,17 @@ nfs_sb_init(struct super_block *sb, rpc_ /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); + +#ifdef CONFIG_NFS_V4 + /* Set and initialize the layout driver */ + set_pnfs_layoutdriver(sb, fsinfo.layoutclass); +#else + server->pnfs_curr_ld = NULL; + server->pnfs_mountid = NULL; +#endif + + printk("%s: sb_init rsize:%d wsize:%d blksize:%lu bits:%d\n",__FUNCTION__,server->rsize,server->wsize,sb->s_blocksize,sb->s_blocksize_bits); + return 0; /* Yargs. It didn't work out. */ out_no_root: @@ -2012,6 +2025,9 @@ static void nfs4_kill_super(struct super { struct nfs_server *server = NFS_SB(sb); + /* pNFS: Shutdown mount point for layout driver */ + unmount_pnfs_layoutdriver(sb); + nfs_return_all_delegations(sb); kill_anon_super(sb); diff -puN fs/nfs/Makefile~client-ld fs/nfs/Makefile --- linux-2.6.14-pnfs-current/fs/nfs/Makefile~client-ld 2006-01-11 14:21:45.970318000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/Makefile 2006-01-12 11:42:24.991405000 -0500 @@ -11,6 +11,7 @@ nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3x nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ - callback.o callback_xdr.o callback_proc.o + callback.o callback_xdr.o callback_proc.o \ + pnfs.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-objs := $(nfs-y) diff -puN fs/nfs/nfs4proc.c~client-ld fs/nfs/nfs4proc.c --- linux-2.6.14-pnfs-current/fs/nfs/nfs4proc.c~client-ld 2006-01-11 14:21:45.983318000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/nfs4proc.c 2006-01-12 12:58:12.106226000 -0500 @@ -115,7 +115,7 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATT | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_LEASE_TIME, - 0 + FATTR4_WORD1_FS_LAYOUT_TYPES }; static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, diff -puN fs/nfs/nfs4xdr.c~client-ld fs/nfs/nfs4xdr.c --- linux-2.6.14-pnfs-current/fs/nfs/nfs4xdr.c~client-ld 2006-01-11 14:21:46.000309000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/nfs4xdr.c 2006-01-12 12:58:12.124226000 -0500 @@ -52,6 +52,7 @@ #include #include #include "nfs4_fs.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -2608,6 +2609,27 @@ static int decode_attr_time(struct xdr_s return 0; } +/* + * Decode potentially multiple layout types. Currently we only support + * one layout driver per file system. + */ +static int decode_pnfs_list(struct xdr_stream *xdr, uint32_t *layoutclass) +{ + uint32_t *p; + int num; + + READ_BUF(4); + READ32(num); + + if (num > 1) + printk("%s: Warning: Multiple pNFS layout drivers per filesystem not supported\n", __FUNCTION__); + + /* Decode and set first layout type */ + READ_BUF(num * 4); + READ32(*layoutclass); + return 0; +} + static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) { int status = 0; @@ -2656,6 +2678,21 @@ static int decode_attr_time_modify(struc return status; } +/* DH: The type of file system exported +*/ +static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *layoutclass) +{ + int status = 0; + + if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES)) { + status = decode_pnfs_list(xdr, layoutclass); + bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES; + } + return status; +} + static int verify_attr_len(struct xdr_stream *xdr, uint32_t *savep, uint32_t attrlen) { unsigned int attrwords = XDR_QUADLEN(attrlen); @@ -2913,6 +2950,8 @@ static int decode_fsinfo(struct xdr_stre if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) goto xdr_error; fsinfo->wtpref = fsinfo->wtmax; + if ((status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layoutclass)) != 0) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: diff -puN /dev/null fs/nfs/pnfs.c --- /dev/null 2006-01-09 05:56:56.224752500 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/pnfs.c 2006-01-12 13:09:55.428778000 -0500 @@ -0,0 +1,193 @@ +/* + * linux/fs/nfs/pnfs.c + * + * pNFS functions to call underlying I/O subsystem for read and write. + * Functions are also provided to inject the opaque file system layout + * information into + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Dean Hildebrand + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nfs4_fs.h" +#include "pnfs.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS +/* Locking: + * + * pnfs_spinlock: + * protects pnfs_modules_tbl. + */ +static spinlock_t pnfs_spinlock = SPIN_LOCK_UNLOCKED; + +/* + * pnfs_modules_tbl holds all pnfs modules + */ +static struct list_head pnfs_modules_tbl; +static int is_pnfs_initialized = 0; +static struct pnfs_client_operations pnfs_ops; + +/* + * struct pnfs_module - One per pNFS device module. + */ +struct pnfs_module { + struct pnfs_layoutdriver_type *pnfs_ld_type; + struct list_head pnfs_tblid; +}; + +static void +initialize_pnfs(void) +{ + INIT_LIST_HEAD(&pnfs_modules_tbl); + is_pnfs_initialized = 1; +} + +/* search pnfs_modules_tbl for right pnfs module */ +static int +find_pnfs(int id, struct pnfs_module **module) { + struct pnfs_module* local = NULL; + + dprintk("PNFS: %s: Searching for %d\n",__func__, id); + list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) { + if (local->pnfs_ld_type->id == id) { + *module = local; + return(1); + } + } + return 0; +} + +/* Unitialize a mountpoint in a layout driver */ +void +unmount_pnfs_layoutdriver(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + if (server->pnfs_curr_ld && + server->pnfs_curr_ld->ld_io_ops && + server->pnfs_curr_ld->ld_io_ops->uninitialize_mountpoint) + server->pnfs_curr_ld->ld_io_ops->uninitialize_mountpoint(server->pnfs_mountid); +} + +/* + * Set the server pnfs module to the first registered pnfs_type. + * Only one pNFS layout driver is supported. + */ +void +set_pnfs_layoutdriver(struct super_block *sb, u32 id) +{ + struct pnfs_module *mod; + struct pnfs_mount_type* mt; + struct nfs_server *server = NFS_SB(sb); + + if (!is_pnfs_initialized) + initialize_pnfs(); + + if (id > 0 && + find_pnfs(id, &mod)) + { + dprintk("%s: Setting pNFS module\n",__FUNCTION__); + server->pnfs_curr_ld = mod->pnfs_ld_type; + mt = server->pnfs_curr_ld->ld_io_ops->initialize_mountpoint(sb); + if (!mt) + { + printk("%s: Error initializing mount point for layout driver %d. ",__FUNCTION__, id); + goto out_err; + } + /* Layout driver succeeded in initializing mountpoint */ + server->pnfs_mountid = mt; + return; + } + + dprintk("%s: No pNFS module found for %d. ",__FUNCTION__, id); +out_err: + dprintk("Using NFSv4 I/O\n"); + server->pnfs_curr_ld = NULL; + server->pnfs_mountid = NULL; + return; +} + +/* Allow I/O module to set its functions structure */ +struct pnfs_client_operations* +pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) +{ + struct pnfs_module *pnfs_mod; + + if (!is_pnfs_initialized) + { + initialize_pnfs(); + } + + if ((pnfs_mod = kmalloc(sizeof(struct pnfs_module), GFP_KERNEL))!= NULL) + { + dprintk("%s Registering id:%d name:%s\n",__FUNCTION__, ld_type->id, ld_type->name); + pnfs_mod->pnfs_ld_type = ld_type; + INIT_LIST_HEAD(&pnfs_mod->pnfs_tblid); + + spin_lock(&pnfs_spinlock); + list_add(&pnfs_mod->pnfs_tblid, &pnfs_modules_tbl); + spin_unlock(&pnfs_spinlock); + } + + return &pnfs_ops; +} + +/* Allow I/O module to set its functions structure */ +void +pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) +{ + struct pnfs_module *pnfs_mod; + + if (!is_pnfs_initialized) + { + return; + } + + if (find_pnfs(ld_type->id, &pnfs_mod)) + { + dprintk("%s Deregistering id:%d\n",__FUNCTION__, ld_type->id); + spin_lock(&pnfs_spinlock); + list_del(&pnfs_mod->pnfs_tblid); + spin_unlock(&pnfs_spinlock); + kfree(pnfs_mod); + } +} + +EXPORT_SYMBOL(pnfs_unregister_layoutdriver); +EXPORT_SYMBOL(pnfs_register_layoutdriver); diff -puN /dev/null fs/nfs/pnfs.h --- /dev/null 2006-01-09 05:56:56.224752500 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/fs/nfs/pnfs.h 2006-01-12 13:09:07.222604000 -0500 @@ -0,0 +1,18 @@ +/* + * fs/nfs/pnfs.h + * + * pNFS client data structures. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Dean Hildebrand + */ + +#ifndef FS_NFS_PNFS_H +#define FS_NFS_PNFS_H + +void set_pnfs_layoutdriver(struct super_block *sb, u32 id); +void unmount_pnfs_layoutdriver(struct super_block *sb); + +#endif /* FS_NFS_PNFS_H */ diff -puN include/linux/nfs4_pnfs.h~client-ld include/linux/nfs4_pnfs.h --- linux-2.6.14-pnfs-current/include/linux/nfs4_pnfs.h~client-ld 2006-01-11 14:21:46.015294000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs4_pnfs.h 2006-01-11 14:21:46.101208000 -0500 @@ -136,4 +136,13 @@ struct pnfs_client_operations { struct pnfs_client_operations* pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); +#define NFS4_PNFS_MAX_LAYOUTS = 4; + +enum pnfs_layouttype4 { + LAYOUT_NFSV4_FILES = 1, + LAYOUT_OSD2_OBJECTS = 2, + LAYOUT_BLOCK_VOLUME = 3, + LAYOUT_PVFS2 = 4 +}; + #endif /* LINUX_NFS4_PNFS_H */ diff -puN include/linux/nfs_fs_sb.h~client-ld include/linux/nfs_fs_sb.h --- linux-2.6.14-pnfs-current/include/linux/nfs_fs_sb.h~client-ld 2006-01-11 14:21:46.025284000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs_fs_sb.h 2006-01-11 14:21:46.105205000 -0500 @@ -46,6 +46,8 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ + struct pnfs_layoutdriver_type * pnfs_curr_ld; /* Active layout driver */ + struct pnfs_mount_type * pnfs_mountid; /* Mount identifier for pNFS layout driver */ #endif }; diff -puN include/linux/nfs_xdr.h~client-ld include/linux/nfs_xdr.h --- linux-2.6.14-pnfs-current/include/linux/nfs_xdr.h~client-ld 2006-01-11 14:21:46.032277000 -0500 +++ linux-2.6.14-pnfs-current-dhildebz/include/linux/nfs_xdr.h 2006-01-12 12:58:12.140226000 -0500 @@ -64,6 +64,7 @@ struct nfs_fsinfo { __u32 dtpref; /* pref. readdir transfer size */ __u64 maxfilesize; __u32 lease_time; /* in seconds */ + __u32 layoutclass; /* supported pnfs layout driver */ }; struct nfs_fsstat { _