[sheepdog] [PATCH 06/12] sheep/nfs: add basic file system framework

Liu Yuan namei.unix at gmail.com
Tue Jan 28 21:19:06 CET 2014


Add basic inode, dentry structures and just support 'ls' /mntpoint operation.

For inode management, we use the hash scheme as in http implementation. This
will save us a lots of code on inode allocation and deallocation and management.

We also make use the simiar concepts found in traditional UNIX systems
- inode, represent files
- dentry, directry entry that map name components to 'inode numbers'
- extents, to manage free space for file data
- directory, a plain file that contains dentries, giving us tree structure

Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
 sheep/Makefile.am |   4 +-
 sheep/nfs/fs.c    | 288 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 sheep/nfs/fs.h    |  68 +++++++++++++
 sheep/nfs/mount.c |  44 ++++++++-
 sheep/nfs/nfs.c   | 290 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 sheep/nfs/nfs.h   |  18 ++++
 sheep/nfs/nfsd.c  |   6 +-
 sheep/nfs/xdr.c   |  12 +++
 8 files changed, 717 insertions(+), 13 deletions(-)
 create mode 100644 sheep/nfs/fs.c
 create mode 100644 sheep/nfs/fs.h

diff --git a/sheep/Makefile.am b/sheep/Makefile.am
index 3a82918..df3b626 100644
--- a/sheep/Makefile.am
+++ b/sheep/Makefile.am
@@ -35,7 +35,7 @@ sheep_SOURCES		+= http/http.c http/kv.c http/s3.c http/swift.c \
 endif
 
 if BUILD_NFS
-sheep_SOURCES		+= nfs/nfsd.c nfs/nfs.c nfs/xdr.c nfs/mount.c
+sheep_SOURCES		+= nfs/nfsd.c nfs/nfs.c nfs/xdr.c nfs/mount.c nfs/fs.c
 endif
 
 if BUILD_COROSYNC
@@ -59,7 +59,7 @@ sheep_DEPENDENCIES	= ../lib/libsheepdog.a
 
 
 noinst_HEADERS		= sheep_priv.h cluster.h http/http.h trace/trace.h
-			  nfs/nfs.h
+			  nfs/nfs.h nfs/fs.h
 
 EXTRA_DIST		= 
 
diff --git a/sheep/nfs/fs.c b/sheep/nfs/fs.c
new file mode 100644
index 0000000..23632dd
--- /dev/null
+++ b/sheep/nfs/fs.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (C) 2014 Taobao Inc.
+ *
+ * Liu Yuan <namei.unix at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <nfs://www.gnu.org/licenses/>.
+ */
+
+#include "nfs.h"
+
+#define ROOT_IDX (sd_hash("/", 1) % MAX_DATA_OBJS)
+
+struct inode_data {
+	struct sd_inode *sd_inode;
+	struct inode *inode;
+	const char *name;
+	uint32_t vid;
+	uint32_t idx;
+	bool create;
+};
+
+static struct inode_data *prepare_inode_data(struct inode *inode, uint32_t vid,
+					     const char *name)
+{
+	struct inode_data *id = xzalloc(sizeof(*id));
+
+	id->sd_inode = xmalloc(sizeof(struct sd_inode));
+	id->inode = inode;
+	id->vid = vid;
+	id->name = name;
+
+	return id;
+}
+
+static void finish_inode_data(struct inode_data *id)
+{
+	free(id->sd_inode);
+	free(id);
+}
+
+static int inode_do_create(struct inode_data *id)
+{
+	struct sd_inode *sd_inode = id->sd_inode;
+	struct inode *inode = id->inode;
+	uint32_t idx = id->idx;
+	uint32_t vid = sd_inode->vdi_id;
+	uint64_t oid = vid_to_data_oid(vid, idx);
+	bool create = id->create;
+	int ret;
+
+	inode->ino = oid;
+	ret = sd_write_object(oid, (char *)inode, INODE_META_SIZE + inode->size,
+			      0, create);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("failed to create object, %" PRIx64, oid);
+		goto out;
+	}
+	if (!create)
+		goto out;
+
+	INODE_SET_VID(sd_inode, idx, vid);
+	ret = sd_inode_write_vid(sheep_bnode_writer, sd_inode, idx,
+				 vid, vid, 0, false, false);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("failed to update sd inode, %" PRIx64,
+		       vid_to_vdi_oid(vid));
+		goto out;
+	}
+out:
+	return ret;
+}
+
+static int inode_lookup(struct inode_data *idata)
+{
+	struct sd_inode *sd_inode = idata->sd_inode;
+	uint32_t tmp_vid, idx, vid = idata->vid;
+	uint64_t hval, i;
+	bool create = true;
+	const char *name = idata->name;
+	int ret;
+
+	ret = sd_read_object(vid_to_vdi_oid(vid), (char *)sd_inode,
+			     sizeof(*sd_inode), 0);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("failed to read %" PRIx32 " %s", vid,
+		       sd_strerror(ret));
+		goto err;
+	}
+
+	hval = sd_hash(name, strlen(name));
+	for (i = 0; i < MAX_DATA_OBJS; i++) {
+		idx = (hval + i) % MAX_DATA_OBJS;
+		tmp_vid = INODE_GET_VID(sd_inode, idx);
+		if (tmp_vid) {
+			uint64_t oid = vid_to_data_oid(vid, idx);
+			uint64_t block;
+
+			ret = sd_read_object(oid, (char *)&block, sizeof(block),
+					     0);
+			if (ret != SD_RES_SUCCESS)
+				goto err;
+			if (block == 0) {
+				create = false;
+				goto out;
+			}
+		} else
+			break;
+	}
+	if (i == MAX_DATA_OBJS) {
+		ret = SD_RES_NO_SPACE;
+		goto err;
+	}
+out:
+	idata->create = create;
+	idata->idx = idx;
+	return SD_RES_SUCCESS;
+err:
+	return ret;
+}
+
+static inline int inode_create(struct inode *inode, uint32_t vid,
+			       const char *name)
+{
+	struct inode_data *id = prepare_inode_data(inode, vid, name);
+	int ret;
+
+	sys->cdrv->lock(vid);
+	ret = inode_lookup(id);
+	if (ret == SD_RES_SUCCESS)
+		ret = inode_do_create(id);
+	else
+		sd_err("failed to lookup %s", name);
+	sys->cdrv->unlock(vid);
+	finish_inode_data(id);
+	return ret;
+}
+
+static int nlink_inc(uint64_t ino)
+{
+	struct inode *inode = fs_read_inode_hdr(ino);
+	int ret;
+
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->nlink++;
+
+	ret = fs_write_inode_hdr(inode);
+	free(inode);
+	return ret;
+}
+
+static int dir_create(struct inode *inode, uint32_t vid, const char *name,
+		      uint64_t pino)
+{
+	struct inode_data *id = prepare_inode_data(inode, vid, name);
+	struct dentry *entry;
+	uint64_t myino;
+	int ret;
+
+	sys->cdrv->lock(vid);
+	ret = inode_lookup(id);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("failed to lookup %s", name);
+		goto out;
+	}
+
+	myino = vid_to_data_oid(id->vid, id->idx);
+
+	inode->nlink = 2; /* '.' and 'name' */
+	inode->size = 2 * sizeof(struct dentry);
+	inode->used = INODE_DATA_SIZE;
+	entry = (struct dentry *)inode->data;
+	entry->ino = myino;
+	entry->nlen = 1;
+	entry->name[0] = '.';
+	entry++;
+	entry->ino = pino;
+	entry->nlen = 2;
+	entry->name[0] = '.';
+	entry->name[1] = '.';
+
+	if (unlikely(myino == pino))
+		inode->nlink++; /* I'm root */
+	else {
+		ret = nlink_inc(pino);
+		if (ret != SD_RES_SUCCESS) {
+			sd_err("failed to inc nlink %"PRIx64, pino);
+			goto out;
+		}
+	}
+
+	ret = inode_do_create(id);
+out:
+	sys->cdrv->unlock(vid);
+	finish_inode_data(id);
+	return ret;
+}
+
+int fs_make_root(uint32_t vid)
+{
+	struct inode *root = xzalloc(sizeof(*root));
+	int ret;
+
+	root->mode = S_IFDIR | sd_def_dmode;
+	root->uid = 0;
+	root->gid = 0;
+	root->atime = root->mtime = root->ctime = time(NULL);
+
+	ret = dir_create(root, vid, "/", fs_root_ino(vid));
+	free(root);
+	return ret;
+}
+
+uint64_t fs_root_ino(uint32_t vid)
+{
+	return vid_to_data_oid(vid, ROOT_IDX);
+}
+
+static struct inode *inode_read(uint64_t ino, uint64_t size)
+{
+	struct inode *inode = xmalloc(size);
+	long ret;
+
+	ret = sd_read_object(ino, (char *)inode, size, 0);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("failed to read %" PRIx64 " %s", ino, sd_strerror(ret));
+		free(inode);
+		inode = (struct inode *)-ret;
+	}
+	return inode;
+}
+
+struct inode *fs_read_inode_hdr(uint64_t ino)
+{
+	return inode_read(ino, INODE_HDR_SIZE);
+}
+
+struct inode *fs_read_inode_full(uint64_t ino)
+{
+	return inode_read(ino, sizeof(struct inode));
+}
+
+static int inode_write(struct inode *inode, uint64_t size)
+{
+	uint64_t oid = inode->ino;
+	int ret;
+
+	ret = sd_write_object(oid, (char *)inode, size, 0, 0);
+	if (ret != SD_RES_SUCCESS)
+		sd_err("failed to write %" PRIx64" %s", oid, sd_strerror(ret));
+
+	return ret;
+}
+
+int fs_write_inode_hdr(struct inode *inode)
+{
+	return inode_write(inode, INODE_HDR_SIZE);
+}
+
+int fs_write_inode_full(struct inode *inode)
+{
+	return inode_write(inode, sizeof(*inode));
+}
+
+int fs_read_dir(struct inode *inode, uint64_t offset,
+		int (*dentry_reader)(struct inode *, struct dentry *, void *),
+		void *data)
+{
+	struct dentry *entry = (struct dentry *)(inode->data + offset);
+	int ret = SD_RES_SUCCESS;
+	uint64_t dentry_count = inode->size / sizeof(struct dentry);
+	uint64_t i;
+
+	sd_debug("%"PRIu64", %"PRIu64, offset, inode->size);
+
+	for (i = offset / sizeof(*entry); i < dentry_count; i++) {
+		ret = dentry_reader(inode, entry + i, data);
+		if (ret != SD_RES_SUCCESS)
+			break;
+	}
+	return ret;
+}
diff --git a/sheep/nfs/fs.h b/sheep/nfs/fs.h
new file mode 100644
index 0000000..50c7052
--- /dev/null
+++ b/sheep/nfs/fs.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Taobao Inc.
+ *
+ * Liu Yuan <namei.unix at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <nfs://www.gnu.org/licenses/>.
+ */
+#ifndef _FS_H_
+#define _FS_H_
+
+#include "sheep_priv.h"
+
+struct extent {
+	uint64_t start;
+	uint64_t count;
+};
+
+#define INODE_HDR_SIZE    SECTOR_SIZE
+#define INODE_EXTENT_SIZE (BLOCK_SIZE * 2)
+#define INODE_META_SIZE (INODE_HDR_SIZE + INODE_EXTENT_SIZE)
+#define INODE_DATA_SIZE (SD_DATA_OBJ_SIZE - INODE_META_SIZE)
+
+struct inode {
+	union {
+		struct {
+			uint32_t mode;	/* File mode */
+			uint32_t nlink;	/* Links count */
+			uint32_t uid;	/* Owner Uid */
+			uint32_t gid;	/* Group Id */
+			uint64_t size;	/* Size in bytes */
+			uint64_t used;	/* Used in bytes */
+			uint64_t atime;	/* Access time */
+			uint64_t ctime;	/* Creation time */
+			uint64_t mtime;	/* Modification time */
+			uint64_t ino;   /* Inode number */
+			uint16_t extent_count; /* Number of extents */
+		};
+		uint8_t __pad1[INODE_HDR_SIZE];
+	};
+	union {
+		struct extent extent[0];
+		uint8_t __pad2[INODE_EXTENT_SIZE];
+	};
+	uint8_t data[INODE_DATA_SIZE];
+};
+
+struct dentry {
+	uint64_t ino;             /* Inode number */
+	uint16_t nlen;            /* Name length */
+	char name[NFS_MAXNAMLEN]; /* File name */
+};
+
+int fs_make_root(uint32_t vid);
+uint64_t fs_root_ino(uint32_t vid);
+struct inode *fs_read_inode_hdr(uint64_t ino);
+struct inode *fs_read_inode_full(uint64_t ino);
+int fs_write_inode_hdr(struct inode *inode);
+int fs_write_inode_full(struct inode *inode);
+int fs_read_dir(struct inode *inode, uint64_t offset,
+		int (*dentry_reader)(struct inode *, struct dentry *, void *),
+		void *data);
+
+#endif
diff --git a/sheep/nfs/mount.c b/sheep/nfs/mount.c
index 8f454cf..4ae1c63 100644
--- a/sheep/nfs/mount.c
+++ b/sheep/nfs/mount.c
@@ -16,26 +16,64 @@
 
 void *mount3_null(struct svc_req *req, struct nfs_arg *arg)
 {
-	return NULL;
+	static void *result;
+
+	return &result;
 }
 
 void *mount3_mnt(struct svc_req *req, struct nfs_arg *arg)
 {
-	return NULL;
+	static mountres3 result;
+	static int auth = AUTH_UNIX; /* FIXME: add auth support */
+	static struct svc_fh fh;
+	char *p = arg->mnt;
+	uint32_t vid;
+	int ret;
+
+	sd_debug("%s", p);
+
+	ret = sd_lookup_vdi(p, &vid);
+	switch (ret) {
+	case SD_RES_SUCCESS:
+		fh.ino = fs_root_ino(vid);
+		result.fhs_status = MNT3_OK;
+		break;
+	case SD_RES_NO_VDI:
+		result.fhs_status = MNT3ERR_NOENT;
+		goto out;
+	default:
+		result.fhs_status = MNT3ERR_SERVERFAULT;
+		goto out;
+	}
+
+	result.mountres3_u.mountinfo.fhandle.fhandle3_len = sizeof(fh);
+	result.mountres3_u.mountinfo.fhandle.fhandle3_val = (char *)&fh;
+	result.mountres3_u.mountinfo.auth_flavors.auth_flavors_len = 1;
+	result.mountres3_u.mountinfo.auth_flavors.auth_flavors_val = &auth;
+out:
+	return &result;
 }
 
 void *mount3_dump(struct svc_req *req, struct nfs_arg *arg)
 {
 	return NULL;
 }
+
 void *mount3_umnt(struct svc_req *req, struct nfs_arg *arg)
 {
-	return NULL;
+	static void *result;
+	char *p = arg->umnt;
+
+	sd_debug("%s", p);
+
+	return &result;
 }
+
 void *mount3_umntall(struct svc_req *req, struct nfs_arg *arg)
 {
 	return NULL;
 }
+
 void *mount3_export(struct svc_req *req, struct nfs_arg *arg)
 {
 	return NULL;
diff --git a/sheep/nfs/nfs.c b/sheep/nfs/nfs.c
index b352bac..f992434 100644
--- a/sheep/nfs/nfs.c
+++ b/sheep/nfs/nfs.c
@@ -14,14 +14,91 @@
 #include "sheep_priv.h"
 #include "nfs.h"
 
+/*
+ * Comment from Linux kernel nfsd for RPC payload size
+ *
+ * Maximum payload size supported by a kernel RPC server.
+ * This is use to determine the max number of pages nfsd is
+ * willing to return in a single READ operation.
+ *
+ * These happen to all be powers of 2, which is not strictly
+ * necessary but helps enforce the real limitation, which is
+ * that they should be multiples of PAGE_CACHE_SIZE.
+ *
+ * For UDP transports, a block plus NFS,RPC, and UDP headers
+ * has to fit into the IP datagram limit of 64K.  The largest
+ * feasible number for all known page sizes is probably 48K,
+ * but we choose 32K here.  This is the same as the historical
+ * Linux limit; someone who cares more about NFS/UDP performance
+ * can test a larger number.
+ *
+ * For TCP transports we have more freedom.  A size of 1MB is
+ * chosen to match the client limit.  Other OSes are known to
+ * have larger limits, but those numbers are probably beyond
+ * the point of diminishing returns.
+ */
+#define RPCSVC_MAXPAYLOAD	(1*1024*1024u)
+#define RPCSVC_MAXPAYLOAD_TCP	RPCSVC_MAXPAYLOAD
+#define RPCSVC_MAXPAYLOAD_UDP	(32*1024u)
+
+static struct svc_fh *get_svc_fh(struct nfs_arg *argp)
+{
+	struct nfs_fh3 *nfh = (struct nfs_fh3 *)argp;
+
+	if (unlikely(nfh->data.data_len != sizeof(struct svc_fh)))
+		panic("invalid nfs file handle len %u", nfh->data.data_len);
+
+	return (struct svc_fh *)(nfh->data.data_val);
+}
+
+static void update_post_attr(struct inode *inode, fattr3 *post)
+{
+	post->type = S_ISDIR(inode->mode) ? NF3DIR : NF3REG;
+	post->mode = inode->mode;
+	post->nlink = inode->nlink;
+	post->uid = inode->uid;
+	post->gid = inode->gid;
+	post->size = inode->size;
+	post->used = inode->used;
+	post->fsid = oid_to_vid(inode->ino);
+	post->fileid = inode->ino;
+	post->atime.seconds = inode->atime;
+	post->mtime.seconds = inode->mtime;
+	post->ctime.seconds = inode->ctime;
+}
+
 void *nfs3_null(struct svc_req *req, struct nfs_arg *argp)
 {
-	return NULL;
+	static void *result;
+
+	return &result;
 }
 
 void *nfs3_getattr(struct svc_req *req, struct nfs_arg *argp)
 {
-	return NULL;
+	static GETATTR3res result;
+	struct svc_fh *fh = get_svc_fh(argp);
+	struct fattr3 *post = &result.GETATTR3res_u.resok.obj_attributes;
+	struct inode *inode;
+
+	inode = fs_read_inode_hdr(fh->ino);
+	if (IS_ERR(inode)) {
+		switch (PTR_ERR(inode)) {
+		case SD_RES_NO_OBJ:
+			result.status = NFS3ERR_NOENT;
+			goto out;
+		default:
+			result.status = NFS3ERR_IO;
+			goto out;
+		}
+	}
+
+	update_post_attr(inode, post);
+	result.status = NFS3_OK;
+
+	free(inode);
+out:
+	return &result;
 }
 
 void *nfs3_setattr(struct svc_req *req, struct nfs_arg *argp)
@@ -34,9 +111,44 @@ void *nfs3_lookup(struct svc_req *req, struct nfs_arg *argp)
 	return NULL;
 }
 
+/* FIXME: implement UNIX ACL */
 void *nfs3_access(struct svc_req *req, struct nfs_arg *argp)
 {
-	return NULL;
+	static ACCESS3res result;
+	ACCESS3args *arg = &argp->access;
+	struct svc_fh *fh = get_svc_fh(argp);
+	struct post_op_attr *poa = &result.ACCESS3res_u.resok.obj_attributes;
+	struct fattr3 *post = &poa->post_op_attr_u.attributes;
+	uint32_t access;
+	struct inode *inode;
+
+	inode = fs_read_inode_hdr(fh->ino);
+	if (IS_ERR(inode)) {
+		switch (PTR_ERR(inode)) {
+		case SD_RES_NO_OBJ:
+			result.status = NFS3ERR_NOENT;
+			goto out;
+		default:
+			result.status = NFS3ERR_IO;
+			goto out;
+		}
+	}
+
+	poa->attributes_follow = true;
+	update_post_attr(inode, post);
+	access = ACCESS3_READ | ACCESS3_MODIFY | ACCESS3_EXTEND |
+		 ACCESS3_EXECUTE;
+	if (post->type == NF3DIR) {
+		access |= ACCESS3_LOOKUP | ACCESS3_DELETE;
+		access &= ~ACCESS3_EXECUTE;
+	}
+
+	result.status = NFS3_OK;
+	result.ACCESS3res_u.resok.access = access & arg->access;
+
+	free(inode);
+out:
+	return &result;
 }
 
 void *nfs3_readlink(struct svc_req *req, struct nfs_arg *argp)
@@ -94,14 +206,132 @@ void *nfs3_link(struct svc_req *req, struct nfs_arg *argp)
 	return NULL;
 }
 
+/* Linux NFS client will issue at most 32k count for readdir on my test */
+#define ENTRY3_MAX_LEN (32*1024)
+
+static char entry3_buffer[ENTRY3_MAX_LEN];
+static char entry3_name[ENTRY3_MAX_LEN];
+
+/*
+ * static READDIR3resok size with XDR overhead
+ *
+ * 88 bytes attributes, 8 bytes verifier, 4 bytes value_follows for
+ * first entry, 4 bytes eof flag
+ */
+#define RESOK_SIZE 104
+
+/*
+ * static entry3 size with XDR overhead
+ *
+ * 8 bytes fileid, 4 bytes name length, 8 bytes cookie, 4 byte value_follows
+ */
+#define ENTRY_SIZE 24
+
+/*
+ * size of a name with XDR overhead
+ *
+ * XDR pads to multiple of 4 bytes
+ */
+#define NAME_SIZE(x) (((strlen((x))+3)/4)*4)
+
+struct dir_reader_d {
+	uint32_t count;
+	uint32_t used;
+	entry3 *entries;
+	uint32_t iter;
+};
+
+static int nfs_dentry_reader(struct inode *inode, struct dentry *dentry,
+			     void *data)
+{
+	struct dir_reader_d *d = data;
+	uint32_t iter = d->iter;
+	uint64_t offset = (uint8_t *)(dentry + 1) - inode->data;
+
+	/* If we have enough room for next dentry */
+	d->used += ENTRY_SIZE + NAME_SIZE(dentry->name);
+	if (d->used > d->count)
+		return SD_RES_AGAIN;
+
+	d->entries[iter].fileid = dentry->ino;
+	strcpy(&entry3_name[iter * NFS_MAXNAMLEN], dentry->name);
+	d->entries[iter].name = &entry3_name[iter * NFS_MAXNAMLEN];
+	d->entries[iter].cookie = offset;
+	d->entries[iter].nextentry = NULL;
+	if (iter > 0)
+		d->entries[iter - 1].nextentry = d->entries + iter;
+	sd_debug("%s, %"PRIu64, d->entries[iter].name, offset);
+	d->iter++;
+
+	return SD_RES_SUCCESS;
+}
+
 void *nfs3_readdir(struct svc_req *req, struct nfs_arg *argp)
 {
-	return NULL;
+	static READDIR3res result;
+	READDIR3args *arg = &argp->readdir;
+	struct svc_fh *fh = get_svc_fh(argp);
+	struct post_op_attr *poa =
+		&result.READDIR3res_u.resok.dir_attributes;
+	struct fattr3 *post = &poa->post_op_attr_u.attributes;
+	struct inode *inode;
+	struct dir_reader_d wd;
+	int ret;
+
+	sd_debug("%"PRIx64" count %"PRIu32", at %"PRIu64, fh->ino,
+		 (uint32_t)arg->count, arg->cookie);
+
+	inode = fs_read_inode_full(fh->ino);
+	if (IS_ERR(inode)) {
+		switch (PTR_ERR(inode)) {
+		case SD_RES_NO_OBJ:
+			result.status = NFS3ERR_NOENT;
+			goto out;
+		default:
+			result.status = NFS3ERR_IO;
+			goto out;
+		}
+	}
+
+	if (!S_ISDIR(inode->mode)) {
+		result.status = NFS3ERR_NOTDIR;
+		goto out_free;
+	}
+
+	wd.count = arg->count;
+	wd.entries = (entry3 *)entry3_buffer;
+	wd.iter = 0;
+	wd.used = RESOK_SIZE;
+	ret = fs_read_dir(inode, arg->cookie, nfs_dentry_reader, &wd);
+	switch (ret) {
+	case SD_RES_SUCCESS:
+		result.status = NFS3_OK;
+		result.READDIR3res_u.resok.reply.eof = true;
+		break;
+	case SD_RES_AGAIN:
+		result.status = NFS3_OK;
+		break;
+	default:
+		result.status = NFS3ERR_IO;
+		goto out_free;
+	}
+
+	result.READDIR3res_u.resok.reply.entries = wd.entries;
+	poa->attributes_follow = true;
+	update_post_attr(inode, post);
+out_free:
+	free(inode);
+out:
+	return &result;
 }
 
 void *nfs3_readdirplus(struct svc_req *req, struct nfs_arg *argp)
 {
-	return NULL;
+	static READDIRPLUS3res result;
+
+	result.status = NFS3ERR_NOTSUPP;
+
+	return &result;
 }
 
 void *nfs3_fsstat(struct svc_req *req, struct nfs_arg *argp)
@@ -109,14 +339,60 @@ void *nfs3_fsstat(struct svc_req *req, struct nfs_arg *argp)
 	return NULL;
 }
 
+static uint32_t get_max_size(struct svc_req *req)
+{
+	int v, ret;
+	socklen_t l;
+
+	l = sizeof(v);
+	ret = getsockopt(req->rq_xprt->xp_sock, SOL_SOCKET, SO_TYPE, &v, &l);
+	if (ret < 0) {
+		sd_info("unable to determine socket type, use udp size");
+		goto out;
+	}
+	if (ret == SOCK_STREAM)
+		return RPCSVC_MAXPAYLOAD_TCP;
+out:
+	/* UDP is a safe value for all the transport */
+	return RPCSVC_MAXPAYLOAD_UDP;
+}
+
 void *nfs3_fsinfo(struct svc_req *req, struct nfs_arg *argp)
 {
-	return NULL;
+	static FSINFO3res result;
+	uint32_t maxsize = get_max_size(req);
+
+	result.status = NFS3_OK;
+	result.FSINFO3res_u.resok.obj_attributes.attributes_follow = false;
+	result.FSINFO3res_u.resok.rtmax = maxsize;
+	result.FSINFO3res_u.resok.rtpref = maxsize;
+	result.FSINFO3res_u.resok.rtmult = BLOCK_SIZE;
+	result.FSINFO3res_u.resok.wtmax = maxsize;
+	result.FSINFO3res_u.resok.wtpref = maxsize;
+	result.FSINFO3res_u.resok.wtmult = BLOCK_SIZE;
+	result.FSINFO3res_u.resok.dtpref = BLOCK_SIZE;
+	result.FSINFO3res_u.resok.maxfilesize = SD_MAX_VDI_SIZE;
+	result.FSINFO3res_u.resok.time_delta.seconds = 1;
+	result.FSINFO3res_u.resok.time_delta.nseconds = 0;
+	result.FSINFO3res_u.resok.properties = FSF3_HOMOGENEOUS;
+
+	return &result;
 }
 
 void *nfs3_pathconf(struct svc_req *req, struct nfs_arg *argp)
 {
-	return NULL;
+	static PATHCONF3res result;
+
+	result.status = NFS3_OK;
+	result.PATHCONF3res_u.resok.obj_attributes.attributes_follow = false;
+	result.PATHCONF3res_u.resok.linkmax = UINT32_MAX;
+	result.PATHCONF3res_u.resok.name_max = NFS_MAXNAMLEN;
+	result.PATHCONF3res_u.resok.no_trunc = true;
+	result.PATHCONF3res_u.resok.chown_restricted = false;
+	result.PATHCONF3res_u.resok.case_insensitive = false;
+	result.PATHCONF3res_u.resok.case_preserving = true;
+
+	return &result;
 }
 
 void *nfs3_commit(struct svc_req *req, struct nfs_arg *argp)
diff --git a/sheep/nfs/nfs.h b/sheep/nfs/nfs.h
index 1a0e501..5d6a67d 100644
--- a/sheep/nfs/nfs.h
+++ b/sheep/nfs/nfs.h
@@ -1,3 +1,15 @@
+/*
+ * Copyright (C) 2014 Taobao Inc.
+ *
+ * Liu Yuan <namei.unix at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <nfs://www.gnu.org/licenses/>.
+ */
 #ifndef _NFS_H
 #define _NFS_H
 
@@ -1230,4 +1242,10 @@ extern bool_t xdr_groupnode(XDR *, groupnode*);
 extern bool_t xdr_exports(XDR *, exports*);
 extern bool_t xdr_exportnode(XDR *, exportnode*);
 
+struct svc_fh {
+	uint64_t ino;
+};
+
+#include "fs.h"
+
 #endif /* !_NFS_H */
diff --git a/sheep/nfs/nfsd.c b/sheep/nfs/nfsd.c
index 6556673..31b3375 100644
--- a/sheep/nfs/nfsd.c
+++ b/sheep/nfs/nfsd.c
@@ -217,7 +217,11 @@ int nfs_create(const char *name)
 	if (ret != SD_RES_SUCCESS)
 		return ret;
 
-	return SD_RES_SUCCESS;
+	ret = fs_make_root(vdi);
+	if (ret != SD_RES_SUCCESS)
+		sd_delete_vdi(name);
+
+	return ret;
 }
 
 int nfs_delete(const char *name)
diff --git a/sheep/nfs/xdr.c b/sheep/nfs/xdr.c
index 7f9b0f4..8a47daa 100644
--- a/sheep/nfs/xdr.c
+++ b/sheep/nfs/xdr.c
@@ -1,3 +1,15 @@
+/*
+ * Copyright (C) 2014 Taobao Inc.
+ *
+ * Liu Yuan <namei.unix at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <nfs://www.gnu.org/licenses/>.
+ */
 #include "nfs.h"
 
 bool_t
-- 
1.8.1.2




More information about the sheepdog mailing list