[sheepdog] [PATCH v3 06/12] sheep/nfs: add basic file system framework
Liu Yuan
namei.unix at gmail.com
Wed Jan 29 09:26:20 CET 2014
Add basic inode, dentry structures and just support 'ls' /mntpoint operation.
For inode management, we use the hash scheme as in http implementation. This
will save us a lots of code on inode allocation and deallocation and management.
We also make use the simiar concepts found in traditional UNIX systems
- inode, represent files
- dentry, directry entry that map name components to 'inode numbers'
- extents, to manage free space for file data
- directory, a plain file that contains dentries, giving us tree structure
Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
sheep/Makefile.am | 4 +-
sheep/nfs/fs.c | 288 +++++++++++++++++++++++++++++++++++++++++++++++++++++
sheep/nfs/fs.h | 68 +++++++++++++
sheep/nfs/mount.c | 44 ++++++++-
sheep/nfs/nfs.c | 290 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
sheep/nfs/nfs.h | 18 ++++
sheep/nfs/nfsd.c | 6 +-
sheep/nfs/xdr.c | 12 +++
8 files changed, 717 insertions(+), 13 deletions(-)
create mode 100644 sheep/nfs/fs.c
create mode 100644 sheep/nfs/fs.h
diff --git a/sheep/Makefile.am b/sheep/Makefile.am
index 3a82918..df3b626 100644
--- a/sheep/Makefile.am
+++ b/sheep/Makefile.am
@@ -35,7 +35,7 @@ sheep_SOURCES += http/http.c http/kv.c http/s3.c http/swift.c \
endif
if BUILD_NFS
-sheep_SOURCES += nfs/nfsd.c nfs/nfs.c nfs/xdr.c nfs/mount.c
+sheep_SOURCES += nfs/nfsd.c nfs/nfs.c nfs/xdr.c nfs/mount.c nfs/fs.c
endif
if BUILD_COROSYNC
@@ -59,7 +59,7 @@ sheep_DEPENDENCIES = ../lib/libsheepdog.a
noinst_HEADERS = sheep_priv.h cluster.h http/http.h trace/trace.h
- nfs/nfs.h
+ nfs/nfs.h nfs/fs.h
EXTRA_DIST =
diff --git a/sheep/nfs/fs.c b/sheep/nfs/fs.c
new file mode 100644
index 0000000..23632dd
--- /dev/null
+++ b/sheep/nfs/fs.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (C) 2014 Taobao Inc.
+ *
+ * Liu Yuan <namei.unix at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <nfs://www.gnu.org/licenses/>.
+ */
+
+#include "nfs.h"
+
+#define ROOT_IDX (sd_hash("/", 1) % MAX_DATA_OBJS)
+
+struct inode_data {
+ struct sd_inode *sd_inode;
+ struct inode *inode;
+ const char *name;
+ uint32_t vid;
+ uint32_t idx;
+ bool create;
+};
+
+static struct inode_data *prepare_inode_data(struct inode *inode, uint32_t vid,
+ const char *name)
+{
+ struct inode_data *id = xzalloc(sizeof(*id));
+
+ id->sd_inode = xmalloc(sizeof(struct sd_inode));
+ id->inode = inode;
+ id->vid = vid;
+ id->name = name;
+
+ return id;
+}
+
+static void finish_inode_data(struct inode_data *id)
+{
+ free(id->sd_inode);
+ free(id);
+}
+
+static int inode_do_create(struct inode_data *id)
+{
+ struct sd_inode *sd_inode = id->sd_inode;
+ struct inode *inode = id->inode;
+ uint32_t idx = id->idx;
+ uint32_t vid = sd_inode->vdi_id;
+ uint64_t oid = vid_to_data_oid(vid, idx);
+ bool create = id->create;
+ int ret;
+
+ inode->ino = oid;
+ ret = sd_write_object(oid, (char *)inode, INODE_META_SIZE + inode->size,
+ 0, create);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("failed to create object, %" PRIx64, oid);
+ goto out;
+ }
+ if (!create)
+ goto out;
+
+ INODE_SET_VID(sd_inode, idx, vid);
+ ret = sd_inode_write_vid(sheep_bnode_writer, sd_inode, idx,
+ vid, vid, 0, false, false);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("failed to update sd inode, %" PRIx64,
+ vid_to_vdi_oid(vid));
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int inode_lookup(struct inode_data *idata)
+{
+ struct sd_inode *sd_inode = idata->sd_inode;
+ uint32_t tmp_vid, idx, vid = idata->vid;
+ uint64_t hval, i;
+ bool create = true;
+ const char *name = idata->name;
+ int ret;
+
+ ret = sd_read_object(vid_to_vdi_oid(vid), (char *)sd_inode,
+ sizeof(*sd_inode), 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("failed to read %" PRIx32 " %s", vid,
+ sd_strerror(ret));
+ goto err;
+ }
+
+ hval = sd_hash(name, strlen(name));
+ for (i = 0; i < MAX_DATA_OBJS; i++) {
+ idx = (hval + i) % MAX_DATA_OBJS;
+ tmp_vid = INODE_GET_VID(sd_inode, idx);
+ if (tmp_vid) {
+ uint64_t oid = vid_to_data_oid(vid, idx);
+ uint64_t block;
+
+ ret = sd_read_object(oid, (char *)&block, sizeof(block),
+ 0);
+ if (ret != SD_RES_SUCCESS)
+ goto err;
+ if (block == 0) {
+ create = false;
+ goto out;
+ }
+ } else
+ break;
+ }
+ if (i == MAX_DATA_OBJS) {
+ ret = SD_RES_NO_SPACE;
+ goto err;
+ }
+out:
+ idata->create = create;
+ idata->idx = idx;
+ return SD_RES_SUCCESS;
+err:
+ return ret;
+}
+
+static inline int inode_create(struct inode *inode, uint32_t vid,
+ const char *name)
+{
+ struct inode_data *id = prepare_inode_data(inode, vid, name);
+ int ret;
+
+ sys->cdrv->lock(vid);
+ ret = inode_lookup(id);
+ if (ret == SD_RES_SUCCESS)
+ ret = inode_do_create(id);
+ else
+ sd_err("failed to lookup %s", name);
+ sys->cdrv->unlock(vid);
+ finish_inode_data(id);
+ return ret;
+}
+
+static int nlink_inc(uint64_t ino)
+{
+ struct inode *inode = fs_read_inode_hdr(ino);
+ int ret;
+
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ inode->nlink++;
+
+ ret = fs_write_inode_hdr(inode);
+ free(inode);
+ return ret;
+}
+
+static int dir_create(struct inode *inode, uint32_t vid, const char *name,
+ uint64_t pino)
+{
+ struct inode_data *id = prepare_inode_data(inode, vid, name);
+ struct dentry *entry;
+ uint64_t myino;
+ int ret;
+
+ sys->cdrv->lock(vid);
+ ret = inode_lookup(id);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("failed to lookup %s", name);
+ goto out;
+ }
+
+ myino = vid_to_data_oid(id->vid, id->idx);
+
+ inode->nlink = 2; /* '.' and 'name' */
+ inode->size = 2 * sizeof(struct dentry);
+ inode->used = INODE_DATA_SIZE;
+ entry = (struct dentry *)inode->data;
+ entry->ino = myino;
+ entry->nlen = 1;
+ entry->name[0] = '.';
+ entry++;
+ entry->ino = pino;
+ entry->nlen = 2;
+ entry->name[0] = '.';
+ entry->name[1] = '.';
+
+ if (unlikely(myino == pino))
+ inode->nlink++; /* I'm root */
+ else {
+ ret = nlink_inc(pino);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("failed to inc nlink %"PRIx64, pino);
+ goto out;
+ }
+ }
+
+ ret = inode_do_create(id);
+out:
+ sys->cdrv->unlock(vid);
+ finish_inode_data(id);
+ return ret;
+}
+
+int fs_make_root(uint32_t vid)
+{
+ struct inode *root = xzalloc(sizeof(*root));
+ int ret;
+
+ root->mode = S_IFDIR | sd_def_dmode;
+ root->uid = 0;
+ root->gid = 0;
+ root->atime = root->mtime = root->ctime = time(NULL);
+
+ ret = dir_create(root, vid, "/", fs_root_ino(vid));
+ free(root);
+ return ret;
+}
+
+uint64_t fs_root_ino(uint32_t vid)
+{
+ return vid_to_data_oid(vid, ROOT_IDX);
+}
+
+static struct inode *inode_read(uint64_t ino, uint64_t size)
+{
+ struct inode *inode = xmalloc(size);
+ long ret;
+
+ ret = sd_read_object(ino, (char *)inode, size, 0);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("failed to read %" PRIx64 " %s", ino, sd_strerror(ret));
+ free(inode);
+ inode = (struct inode *)-ret;
+ }
+ return inode;
+}
+
+struct inode *fs_read_inode_hdr(uint64_t ino)
+{
+ return inode_read(ino, INODE_HDR_SIZE);
+}
+
+struct inode *fs_read_inode_full(uint64_t ino)
+{
+ return inode_read(ino, sizeof(struct inode));
+}
+
+static int inode_write(struct inode *inode, uint64_t size)
+{
+ uint64_t oid = inode->ino;
+ int ret;
+
+ ret = sd_write_object(oid, (char *)inode, size, 0, 0);
+ if (ret != SD_RES_SUCCESS)
+ sd_err("failed to write %" PRIx64" %s", oid, sd_strerror(ret));
+
+ return ret;
+}
+
+int fs_write_inode_hdr(struct inode *inode)
+{
+ return inode_write(inode, INODE_HDR_SIZE);
+}
+
+int fs_write_inode_full(struct inode *inode)
+{
+ return inode_write(inode, sizeof(*inode));
+}
+
+int fs_read_dir(struct inode *inode, uint64_t offset,
+ int (*dentry_reader)(struct inode *, struct dentry *, void *),
+ void *data)
+{
+ struct dentry *entry = (struct dentry *)(inode->data + offset);
+ int ret = SD_RES_SUCCESS;
+ uint64_t dentry_count = inode->size / sizeof(struct dentry);
+ uint64_t i;
+
+ sd_debug("%"PRIu64", %"PRIu64, offset, inode->size);
+
+ for (i = offset / sizeof(*entry); i < dentry_count; i++) {
+ ret = dentry_reader(inode, entry + i, data);
+ if (ret != SD_RES_SUCCESS)
+ break;
+ }
+ return ret;
+}
diff --git a/sheep/nfs/fs.h b/sheep/nfs/fs.h
new file mode 100644
index 0000000..50c7052
--- /dev/null
+++ b/sheep/nfs/fs.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Taobao Inc.
+ *
+ * Liu Yuan <namei.unix at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <nfs://www.gnu.org/licenses/>.
+ */
+#ifndef _FS_H_
+#define _FS_H_
+
+#include "sheep_priv.h"
+
+struct extent {
+ uint64_t start;
+ uint64_t count;
+};
+
+#define INODE_HDR_SIZE SECTOR_SIZE
+#define INODE_EXTENT_SIZE (BLOCK_SIZE * 2)
+#define INODE_META_SIZE (INODE_HDR_SIZE + INODE_EXTENT_SIZE)
+#define INODE_DATA_SIZE (SD_DATA_OBJ_SIZE - INODE_META_SIZE)
+
+struct inode {
+ union {
+ struct {
+ uint32_t mode; /* File mode */
+ uint32_t nlink; /* Links count */
+ uint32_t uid; /* Owner Uid */
+ uint32_t gid; /* Group Id */
+ uint64_t size; /* Size in bytes */
+ uint64_t used; /* Used in bytes */
+ uint64_t atime; /* Access time */
+ uint64_t ctime; /* Creation time */
+ uint64_t mtime; /* Modification time */
+ uint64_t ino; /* Inode number */
+ uint16_t extent_count; /* Number of extents */
+ };
+ uint8_t __pad1[INODE_HDR_SIZE];
+ };
+ union {
+ struct extent extent[0];
+ uint8_t __pad2[INODE_EXTENT_SIZE];
+ };
+ uint8_t data[INODE_DATA_SIZE];
+};
+
+struct dentry {
+ uint64_t ino; /* Inode number */
+ uint16_t nlen; /* Name length */
+ char name[NFS_MAXNAMLEN]; /* File name */
+};
+
+int fs_make_root(uint32_t vid);
+uint64_t fs_root_ino(uint32_t vid);
+struct inode *fs_read_inode_hdr(uint64_t ino);
+struct inode *fs_read_inode_full(uint64_t ino);
+int fs_write_inode_hdr(struct inode *inode);
+int fs_write_inode_full(struct inode *inode);
+int fs_read_dir(struct inode *inode, uint64_t offset,
+ int (*dentry_reader)(struct inode *, struct dentry *, void *),
+ void *data);
+
+#endif
diff --git a/sheep/nfs/mount.c b/sheep/nfs/mount.c
index 8f454cf..4ae1c63 100644
--- a/sheep/nfs/mount.c
+++ b/sheep/nfs/mount.c
@@ -16,26 +16,64 @@
void *mount3_null(struct svc_req *req, struct nfs_arg *arg)
{
- return NULL;
+ static void *result;
+
+ return &result;
}
void *mount3_mnt(struct svc_req *req, struct nfs_arg *arg)
{
- return NULL;
+ static mountres3 result;
+ static int auth = AUTH_UNIX; /* FIXME: add auth support */
+ static struct svc_fh fh;
+ char *p = arg->mnt;
+ uint32_t vid;
+ int ret;
+
+ sd_debug("%s", p);
+
+ ret = sd_lookup_vdi(p, &vid);
+ switch (ret) {
+ case SD_RES_SUCCESS:
+ fh.ino = fs_root_ino(vid);
+ result.fhs_status = MNT3_OK;
+ break;
+ case SD_RES_NO_VDI:
+ result.fhs_status = MNT3ERR_NOENT;
+ goto out;
+ default:
+ result.fhs_status = MNT3ERR_SERVERFAULT;
+ goto out;
+ }
+
+ result.mountres3_u.mountinfo.fhandle.fhandle3_len = sizeof(fh);
+ result.mountres3_u.mountinfo.fhandle.fhandle3_val = (char *)&fh;
+ result.mountres3_u.mountinfo.auth_flavors.auth_flavors_len = 1;
+ result.mountres3_u.mountinfo.auth_flavors.auth_flavors_val = &auth;
+out:
+ return &result;
}
void *mount3_dump(struct svc_req *req, struct nfs_arg *arg)
{
return NULL;
}
+
void *mount3_umnt(struct svc_req *req, struct nfs_arg *arg)
{
- return NULL;
+ static void *result;
+ char *p = arg->umnt;
+
+ sd_debug("%s", p);
+
+ return &result;
}
+
void *mount3_umntall(struct svc_req *req, struct nfs_arg *arg)
{
return NULL;
}
+
void *mount3_export(struct svc_req *req, struct nfs_arg *arg)
{
return NULL;
diff --git a/sheep/nfs/nfs.c b/sheep/nfs/nfs.c
index b352bac..f992434 100644
--- a/sheep/nfs/nfs.c
+++ b/sheep/nfs/nfs.c
@@ -14,14 +14,91 @@
#include "sheep_priv.h"
#include "nfs.h"
+/*
+ * Comment from Linux kernel nfsd for RPC payload size
+ *
+ * Maximum payload size supported by a kernel RPC server.
+ * This is use to determine the max number of pages nfsd is
+ * willing to return in a single READ operation.
+ *
+ * These happen to all be powers of 2, which is not strictly
+ * necessary but helps enforce the real limitation, which is
+ * that they should be multiples of PAGE_CACHE_SIZE.
+ *
+ * For UDP transports, a block plus NFS,RPC, and UDP headers
+ * has to fit into the IP datagram limit of 64K. The largest
+ * feasible number for all known page sizes is probably 48K,
+ * but we choose 32K here. This is the same as the historical
+ * Linux limit; someone who cares more about NFS/UDP performance
+ * can test a larger number.
+ *
+ * For TCP transports we have more freedom. A size of 1MB is
+ * chosen to match the client limit. Other OSes are known to
+ * have larger limits, but those numbers are probably beyond
+ * the point of diminishing returns.
+ */
+#define RPCSVC_MAXPAYLOAD (1*1024*1024u)
+#define RPCSVC_MAXPAYLOAD_TCP RPCSVC_MAXPAYLOAD
+#define RPCSVC_MAXPAYLOAD_UDP (32*1024u)
+
+static struct svc_fh *get_svc_fh(struct nfs_arg *argp)
+{
+ struct nfs_fh3 *nfh = (struct nfs_fh3 *)argp;
+
+ if (unlikely(nfh->data.data_len != sizeof(struct svc_fh)))
+ panic("invalid nfs file handle len %u", nfh->data.data_len);
+
+ return (struct svc_fh *)(nfh->data.data_val);
+}
+
+static void update_post_attr(struct inode *inode, fattr3 *post)
+{
+ post->type = S_ISDIR(inode->mode) ? NF3DIR : NF3REG;
+ post->mode = inode->mode;
+ post->nlink = inode->nlink;
+ post->uid = inode->uid;
+ post->gid = inode->gid;
+ post->size = inode->size;
+ post->used = inode->used;
+ post->fsid = oid_to_vid(inode->ino);
+ post->fileid = inode->ino;
+ post->atime.seconds = inode->atime;
+ post->mtime.seconds = inode->mtime;
+ post->ctime.seconds = inode->ctime;
+}
+
void *nfs3_null(struct svc_req *req, struct nfs_arg *argp)
{
- return NULL;
+ static void *result;
+
+ return &result;
}
void *nfs3_getattr(struct svc_req *req, struct nfs_arg *argp)
{
- return NULL;
+ static GETATTR3res result;
+ struct svc_fh *fh = get_svc_fh(argp);
+ struct fattr3 *post = &result.GETATTR3res_u.resok.obj_attributes;
+ struct inode *inode;
+
+ inode = fs_read_inode_hdr(fh->ino);
+ if (IS_ERR(inode)) {
+ switch (PTR_ERR(inode)) {
+ case SD_RES_NO_OBJ:
+ result.status = NFS3ERR_NOENT;
+ goto out;
+ default:
+ result.status = NFS3ERR_IO;
+ goto out;
+ }
+ }
+
+ update_post_attr(inode, post);
+ result.status = NFS3_OK;
+
+ free(inode);
+out:
+ return &result;
}
void *nfs3_setattr(struct svc_req *req, struct nfs_arg *argp)
@@ -34,9 +111,44 @@ void *nfs3_lookup(struct svc_req *req, struct nfs_arg *argp)
return NULL;
}
+/* FIXME: implement UNIX ACL */
void *nfs3_access(struct svc_req *req, struct nfs_arg *argp)
{
- return NULL;
+ static ACCESS3res result;
+ ACCESS3args *arg = &argp->access;
+ struct svc_fh *fh = get_svc_fh(argp);
+ struct post_op_attr *poa = &result.ACCESS3res_u.resok.obj_attributes;
+ struct fattr3 *post = &poa->post_op_attr_u.attributes;
+ uint32_t access;
+ struct inode *inode;
+
+ inode = fs_read_inode_hdr(fh->ino);
+ if (IS_ERR(inode)) {
+ switch (PTR_ERR(inode)) {
+ case SD_RES_NO_OBJ:
+ result.status = NFS3ERR_NOENT;
+ goto out;
+ default:
+ result.status = NFS3ERR_IO;
+ goto out;
+ }
+ }
+
+ poa->attributes_follow = true;
+ update_post_attr(inode, post);
+ access = ACCESS3_READ | ACCESS3_MODIFY | ACCESS3_EXTEND |
+ ACCESS3_EXECUTE;
+ if (post->type == NF3DIR) {
+ access |= ACCESS3_LOOKUP | ACCESS3_DELETE;
+ access &= ~ACCESS3_EXECUTE;
+ }
+
+ result.status = NFS3_OK;
+ result.ACCESS3res_u.resok.access = access & arg->access;
+
+ free(inode);
+out:
+ return &result;
}
void *nfs3_readlink(struct svc_req *req, struct nfs_arg *argp)
@@ -94,14 +206,132 @@ void *nfs3_link(struct svc_req *req, struct nfs_arg *argp)
return NULL;
}
+/* Linux NFS client will issue at most 32k count for readdir on my test */
+#define ENTRY3_MAX_LEN (32*1024)
+
+static char entry3_buffer[ENTRY3_MAX_LEN];
+static char entry3_name[ENTRY3_MAX_LEN];
+
+/*
+ * static READDIR3resok size with XDR overhead
+ *
+ * 88 bytes attributes, 8 bytes verifier, 4 bytes value_follows for
+ * first entry, 4 bytes eof flag
+ */
+#define RESOK_SIZE 104
+
+/*
+ * static entry3 size with XDR overhead
+ *
+ * 8 bytes fileid, 4 bytes name length, 8 bytes cookie, 4 byte value_follows
+ */
+#define ENTRY_SIZE 24
+
+/*
+ * size of a name with XDR overhead
+ *
+ * XDR pads to multiple of 4 bytes
+ */
+#define NAME_SIZE(x) (((strlen((x))+3)/4)*4)
+
+struct dir_reader_d {
+ uint32_t count;
+ uint32_t used;
+ entry3 *entries;
+ uint32_t iter;
+};
+
+static int nfs_dentry_reader(struct inode *inode, struct dentry *dentry,
+ void *data)
+{
+ struct dir_reader_d *d = data;
+ uint32_t iter = d->iter;
+ uint64_t offset = (uint8_t *)(dentry + 1) - inode->data;
+
+ /* If we have enough room for next dentry */
+ d->used += ENTRY_SIZE + NAME_SIZE(dentry->name);
+ if (d->used > d->count)
+ return SD_RES_AGAIN;
+
+ d->entries[iter].fileid = dentry->ino;
+ strcpy(&entry3_name[iter * NFS_MAXNAMLEN], dentry->name);
+ d->entries[iter].name = &entry3_name[iter * NFS_MAXNAMLEN];
+ d->entries[iter].cookie = offset;
+ d->entries[iter].nextentry = NULL;
+ if (iter > 0)
+ d->entries[iter - 1].nextentry = d->entries + iter;
+ sd_debug("%s, %"PRIu64, d->entries[iter].name, offset);
+ d->iter++;
+
+ return SD_RES_SUCCESS;
+}
+
void *nfs3_readdir(struct svc_req *req, struct nfs_arg *argp)
{
- return NULL;
+ static READDIR3res result;
+ READDIR3args *arg = &argp->readdir;
+ struct svc_fh *fh = get_svc_fh(argp);
+ struct post_op_attr *poa =
+ &result.READDIR3res_u.resok.dir_attributes;
+ struct fattr3 *post = &poa->post_op_attr_u.attributes;
+ struct inode *inode;
+ struct dir_reader_d wd;
+ int ret;
+
+ sd_debug("%"PRIx64" count %"PRIu32", at %"PRIu64, fh->ino,
+ (uint32_t)arg->count, arg->cookie);
+
+ inode = fs_read_inode_full(fh->ino);
+ if (IS_ERR(inode)) {
+ switch (PTR_ERR(inode)) {
+ case SD_RES_NO_OBJ:
+ result.status = NFS3ERR_NOENT;
+ goto out;
+ default:
+ result.status = NFS3ERR_IO;
+ goto out;
+ }
+ }
+
+ if (!S_ISDIR(inode->mode)) {
+ result.status = NFS3ERR_NOTDIR;
+ goto out_free;
+ }
+
+ wd.count = arg->count;
+ wd.entries = (entry3 *)entry3_buffer;
+ wd.iter = 0;
+ wd.used = RESOK_SIZE;
+ ret = fs_read_dir(inode, arg->cookie, nfs_dentry_reader, &wd);
+ switch (ret) {
+ case SD_RES_SUCCESS:
+ result.status = NFS3_OK;
+ result.READDIR3res_u.resok.reply.eof = true;
+ break;
+ case SD_RES_AGAIN:
+ result.status = NFS3_OK;
+ break;
+ default:
+ result.status = NFS3ERR_IO;
+ goto out_free;
+ }
+
+ result.READDIR3res_u.resok.reply.entries = wd.entries;
+ poa->attributes_follow = true;
+ update_post_attr(inode, post);
+out_free:
+ free(inode);
+out:
+ return &result;
}
void *nfs3_readdirplus(struct svc_req *req, struct nfs_arg *argp)
{
- return NULL;
+ static READDIRPLUS3res result;
+
+ result.status = NFS3ERR_NOTSUPP;
+
+ return &result;
}
void *nfs3_fsstat(struct svc_req *req, struct nfs_arg *argp)
@@ -109,14 +339,60 @@ void *nfs3_fsstat(struct svc_req *req, struct nfs_arg *argp)
return NULL;
}
+static uint32_t get_max_size(struct svc_req *req)
+{
+ int v, ret;
+ socklen_t l;
+
+ l = sizeof(v);
+ ret = getsockopt(req->rq_xprt->xp_sock, SOL_SOCKET, SO_TYPE, &v, &l);
+ if (ret < 0) {
+ sd_info("unable to determine socket type, use udp size");
+ goto out;
+ }
+ if (ret == SOCK_STREAM)
+ return RPCSVC_MAXPAYLOAD_TCP;
+out:
+ /* UDP is a safe value for all the transport */
+ return RPCSVC_MAXPAYLOAD_UDP;
+}
+
void *nfs3_fsinfo(struct svc_req *req, struct nfs_arg *argp)
{
- return NULL;
+ static FSINFO3res result;
+ uint32_t maxsize = get_max_size(req);
+
+ result.status = NFS3_OK;
+ result.FSINFO3res_u.resok.obj_attributes.attributes_follow = false;
+ result.FSINFO3res_u.resok.rtmax = maxsize;
+ result.FSINFO3res_u.resok.rtpref = maxsize;
+ result.FSINFO3res_u.resok.rtmult = BLOCK_SIZE;
+ result.FSINFO3res_u.resok.wtmax = maxsize;
+ result.FSINFO3res_u.resok.wtpref = maxsize;
+ result.FSINFO3res_u.resok.wtmult = BLOCK_SIZE;
+ result.FSINFO3res_u.resok.dtpref = BLOCK_SIZE;
+ result.FSINFO3res_u.resok.maxfilesize = SD_MAX_VDI_SIZE;
+ result.FSINFO3res_u.resok.time_delta.seconds = 1;
+ result.FSINFO3res_u.resok.time_delta.nseconds = 0;
+ result.FSINFO3res_u.resok.properties = FSF3_HOMOGENEOUS;
+
+ return &result;
}
void *nfs3_pathconf(struct svc_req *req, struct nfs_arg *argp)
{
- return NULL;
+ static PATHCONF3res result;
+
+ result.status = NFS3_OK;
+ result.PATHCONF3res_u.resok.obj_attributes.attributes_follow = false;
+ result.PATHCONF3res_u.resok.linkmax = UINT32_MAX;
+ result.PATHCONF3res_u.resok.name_max = NFS_MAXNAMLEN;
+ result.PATHCONF3res_u.resok.no_trunc = true;
+ result.PATHCONF3res_u.resok.chown_restricted = false;
+ result.PATHCONF3res_u.resok.case_insensitive = false;
+ result.PATHCONF3res_u.resok.case_preserving = true;
+
+ return &result;
}
void *nfs3_commit(struct svc_req *req, struct nfs_arg *argp)
diff --git a/sheep/nfs/nfs.h b/sheep/nfs/nfs.h
index 1a0e501..5d6a67d 100644
--- a/sheep/nfs/nfs.h
+++ b/sheep/nfs/nfs.h
@@ -1,3 +1,15 @@
+/*
+ * Copyright (C) 2014 Taobao Inc.
+ *
+ * Liu Yuan <namei.unix at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <nfs://www.gnu.org/licenses/>.
+ */
#ifndef _NFS_H
#define _NFS_H
@@ -1230,4 +1242,10 @@ extern bool_t xdr_groupnode(XDR *, groupnode*);
extern bool_t xdr_exports(XDR *, exports*);
extern bool_t xdr_exportnode(XDR *, exportnode*);
+struct svc_fh {
+ uint64_t ino;
+};
+
+#include "fs.h"
+
#endif /* !_NFS_H */
diff --git a/sheep/nfs/nfsd.c b/sheep/nfs/nfsd.c
index 6556673..31b3375 100644
--- a/sheep/nfs/nfsd.c
+++ b/sheep/nfs/nfsd.c
@@ -217,7 +217,11 @@ int nfs_create(const char *name)
if (ret != SD_RES_SUCCESS)
return ret;
- return SD_RES_SUCCESS;
+ ret = fs_make_root(vdi);
+ if (ret != SD_RES_SUCCESS)
+ sd_delete_vdi(name);
+
+ return ret;
}
int nfs_delete(const char *name)
diff --git a/sheep/nfs/xdr.c b/sheep/nfs/xdr.c
index 7f9b0f4..8a47daa 100644
--- a/sheep/nfs/xdr.c
+++ b/sheep/nfs/xdr.c
@@ -1,3 +1,15 @@
+/*
+ * Copyright (C) 2014 Taobao Inc.
+ *
+ * Liu Yuan <namei.unix at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <nfs://www.gnu.org/licenses/>.
+ */
#include "nfs.h"
bool_t
--
1.8.1.2
More information about the sheepdog
mailing list