[Sheepdog] [PATCH 2/4] sheep: factor out store_* operations
Liu Yuan
namei.unix at gmail.com
Mon Apr 30 18:16:24 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
- move stat_sheep() into ops.c too
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/ops.c | 328 +++++++++++++++++++++++++++++++++++++
sheep/sheep_priv.h | 23 ++-
sheep/store.c | 456 +---------------------------------------------------
3 files changed, 346 insertions(+), 461 deletions(-)
diff --git a/sheep/ops.c b/sheep/ops.c
index d5ba7fa..ed1da99 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -10,6 +10,17 @@
*/
#include <stdio.h>
#include <stdlib.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <mntent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/statvfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
#include "sheep_priv.h"
#include "strbuf.h"
@@ -54,6 +65,61 @@ struct flush_work {
struct work work;
};
+static void get_store_dir(struct strbuf *buf, int epoch)
+{
+ if (!strcmp(sd_store->name, "simple"))
+ strbuf_addf(buf, "%s%08u", obj_path, epoch);
+ else /* XXX assume other store doesn't need epoch/obj pattern */
+ strbuf_addf(buf, "%s", obj_path);
+}
+
+static int stat_sheep(uint64_t *store_size, uint64_t *store_free, uint32_t epoch)
+{
+ struct statvfs vs;
+ int ret;
+ DIR *dir;
+ struct dirent *d;
+ uint64_t used = 0;
+ struct stat s;
+ char path[1024];
+ struct strbuf store_dir = STRBUF_INIT;
+
+ ret = statvfs(mnt_path, &vs);
+ if (ret) {
+ ret = SD_RES_EIO;
+ goto out;
+ }
+
+ get_store_dir(&store_dir, epoch);
+ dir = opendir(store_dir.buf);
+ if (!dir) {
+ ret = SD_RES_EIO;
+ goto out;
+ }
+
+ while ((d = readdir(dir))) {
+ if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", store_dir.buf, d->d_name);
+
+ ret = stat(path, &s);
+ if (ret)
+ continue;
+
+ used += s.st_size;
+ }
+
+ closedir(dir);
+ ret = SD_RES_SUCCESS;
+
+ *store_size = (uint64_t)vs.f_frsize * vs.f_bfree + used;
+ *store_free = (uint64_t)vs.f_frsize * vs.f_bfree;
+out:
+ strbuf_release(&store_dir);
+ return ret;
+}
+
static int cluster_new_vdi(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
@@ -134,6 +200,28 @@ static int cluster_get_vdi_info(const struct sd_req *req, struct sd_rsp *rsp,
return ret;
}
+static int remove_epoch(int epoch)
+{
+ int ret;
+ char path[PATH_MAX];
+
+ dprintf("remove epoch %"PRIu32"\n", epoch);
+ snprintf(path, sizeof(path), "%s%08u", epoch_path, epoch);
+ ret = unlink(path);
+ if (ret && ret != -ENOENT) {
+ eprintf("failed to remove %s: %s\n", path, strerror(-ret));
+ return SD_RES_EIO;
+ }
+
+ snprintf(path, sizeof(path), "%s%08u/", jrnl_path, epoch);
+ ret = rmdir_r(path);
+ if (ret && ret != -ENOENT) {
+ eprintf("failed to remove %s: %s\n", path, strerror(-ret));
+ return SD_RES_EIO;
+ }
+ return 0;
+}
+
static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
@@ -548,6 +636,246 @@ static int local_trace_cat_ops(const struct sd_req *req, struct sd_rsp *rsp, voi
return SD_RES_SUCCESS;
}
+static int read_copy_from_replica(struct request *req, uint32_t epoch,
+ uint64_t oid, char *buf)
+{
+ int i, nr_copies, ret;
+ unsigned wlen, rlen;
+ char name[128];
+ struct sd_vnode *v;
+ struct sd_obj_req hdr;
+ struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
+ struct siocb iocb;
+ int fd;
+
+ nr_copies = get_nr_copies(req->vnodes);
+ for (i = 0; i < nr_copies; i++) {
+ v = oid_to_vnode(req->vnodes, oid, i);
+
+ addr_to_str(name, sizeof(name), v->addr, 0);
+
+ if (vnode_is_local(v)) {
+ memset(&iocb, 0, sizeof(iocb));
+ iocb.epoch = epoch;
+ ret = sd_store->open(oid, &iocb, 0);
+ if (ret != SD_RES_SUCCESS)
+ continue;
+
+ iocb.buf = buf;
+ iocb.length = SD_DATA_OBJ_SIZE;
+ iocb.offset = 0;
+ ret = sd_store->read(oid, &iocb);
+ if (ret != SD_RES_SUCCESS)
+ continue;
+ sd_store->close(oid, &iocb);
+ goto out;
+ }
+
+ fd = connect_to(name, v->port);
+ if (fd < 0)
+ continue;
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.opcode = SD_OP_READ_OBJ;
+ hdr.oid = oid;
+ hdr.epoch = epoch;
+
+ rlen = SD_DATA_OBJ_SIZE;
+ wlen = 0;
+ hdr.flags = SD_FLAG_CMD_IO_LOCAL;
+ hdr.data_length = rlen;
+ hdr.offset = 0;
+
+ ret = exec_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen);
+
+ close(fd);
+
+ dprintf("%x, %x\n", ret, rsp->result);
+ if (ret)
+ continue;
+
+ switch (rsp->result) {
+ case SD_RES_SUCCESS:
+ ret = SD_RES_SUCCESS;
+ goto out;
+ default:
+ ;
+ }
+ }
+
+ ret = rsp->result;
+out:
+ return ret;
+}
+
+static int store_remove_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
+{
+ struct sd_obj_req *hdr = (struct sd_obj_req *)req;
+ uint32_t epoch = hdr->epoch;
+ struct strbuf buf = STRBUF_INIT;
+ int ret = SD_RES_SUCCESS;
+
+ get_store_dir(&buf, epoch);
+ strbuf_addf(&buf, "%016" PRIx64, hdr->oid);
+ if (unlink(buf.buf) < 0) {
+ if (errno == ENOENT) {
+ ret = SD_RES_NO_OBJ;
+ goto out;
+ }
+ eprintf("%m\n");
+ ret = SD_RES_EIO;
+ }
+ pthread_rwlock_wrlock(&obj_list_cache.lock);
+ if (!objlist_cache_rb_remove(&obj_list_cache.root, hdr->oid))
+ obj_list_cache.cache_size--;
+ pthread_rwlock_unlock(&obj_list_cache.lock);
+ out:
+ strbuf_release(&buf);
+ return ret;
+}
+
+static int store_read_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
+{
+ struct sd_obj_req *hdr = (struct sd_obj_req *)req;
+ struct sd_obj_rsp *rsps = (struct sd_obj_rsp *)rsp;
+ struct request *request = (struct request *)data;
+ int ret;
+ uint32_t epoch = hdr->epoch;
+ struct siocb iocb;
+
+ memset(&iocb, 0, sizeof(iocb));
+ iocb.epoch = epoch;
+ iocb.flags = hdr->flags;
+ ret = sd_store->open(hdr->oid, &iocb, 0);
+ if (ret != SD_RES_SUCCESS)
+ return ret;
+
+ iocb.buf = request->data;
+ iocb.length = hdr->data_length;
+ iocb.offset = hdr->offset;
+ ret = sd_store->read(hdr->oid, &iocb);
+ if (ret != SD_RES_SUCCESS)
+ goto out;
+
+ rsps->data_length = hdr->data_length;
+ rsps->copies = sys->nr_copies;
+out:
+ sd_store->close(hdr->oid, &iocb);
+ return ret;
+}
+
+static int do_write_obj(struct siocb *iocb, struct sd_obj_req *req, uint32_t epoch, void *data)
+{
+ struct sd_obj_req *hdr = (struct sd_obj_req *)req;
+ uint64_t oid = hdr->oid;
+ int ret = SD_RES_SUCCESS;
+ void *jd = NULL;
+
+ iocb->buf = data;
+ iocb->length = hdr->data_length;
+ iocb->offset = hdr->offset;
+ if (is_vdi_obj(oid)) {
+ struct strbuf buf = STRBUF_INIT;
+
+ get_store_dir(&buf, epoch);
+ strbuf_addf(&buf, "%016" PRIx64, oid);
+ jd = jrnl_begin(data, hdr->data_length,
+ hdr->offset, buf.buf, jrnl_path);
+ if (!jd) {
+ strbuf_release(&buf);
+ return SD_RES_EIO;
+ }
+ ret = sd_store->write(oid, iocb);
+ jrnl_end(jd);
+ strbuf_release(&buf);
+ } else
+ ret = sd_store->write(oid, iocb);
+
+ return ret;
+}
+
+static int store_write_obj(const struct sd_req *req, struct sd_rsp *rsp,
+ void *data)
+{
+ struct sd_obj_req *hdr = (struct sd_obj_req *)req;
+ struct request *request = (struct request *)data;
+ int ret;
+ uint32_t epoch = hdr->epoch;
+ struct siocb iocb;
+
+ memset(&iocb, 0, sizeof(iocb));
+ iocb.epoch = epoch;
+ iocb.flags = hdr->flags;
+ ret = sd_store->open(hdr->oid, &iocb, 0);
+ if (ret != SD_RES_SUCCESS)
+ return ret;
+
+ ret = do_write_obj(&iocb, hdr, epoch, request->data);
+
+ sd_store->close(hdr->oid, &iocb);
+ return ret;
+}
+
+static int store_create_and_write_obj(const struct sd_req *req,
+ struct sd_rsp *rsp, void *data)
+{
+ struct sd_obj_req *hdr = (struct sd_obj_req *)req;
+ struct request *request = (struct request *)data;
+ struct sd_obj_req cow_hdr;
+ int ret;
+ uint32_t epoch = hdr->epoch;
+ char *buf = NULL;
+ struct siocb iocb;
+ unsigned data_length;
+
+ if (is_vdi_obj(hdr->oid))
+ data_length = SD_INODE_SIZE;
+ else if (is_vdi_attr_obj(hdr->oid))
+ data_length = SD_ATTR_OBJ_SIZE;
+ else
+ data_length = SD_DATA_OBJ_SIZE;
+
+ memset(&iocb, 0, sizeof(iocb));
+ iocb.epoch = epoch;
+ iocb.flags = hdr->flags;
+ iocb.length = data_length;
+ ret = sd_store->open(hdr->oid, &iocb, 1);
+ if (ret != SD_RES_SUCCESS)
+ return ret;
+ if (hdr->flags & SD_FLAG_CMD_COW) {
+ dprintf("%" PRIx64 ", %" PRIx64 "\n", hdr->oid, hdr->cow_oid);
+
+ buf = valloc(SD_DATA_OBJ_SIZE);
+ if (!buf) {
+ eprintf("can not allocate memory\n");
+ goto out;
+ }
+ if (hdr->data_length != SD_DATA_OBJ_SIZE) {
+ ret = read_copy_from_replica(request, hdr->epoch, hdr->cow_oid, buf);
+ if (ret != SD_RES_SUCCESS) {
+ eprintf("failed to read cow object\n");
+ goto out;
+ }
+ }
+
+ memcpy(buf + hdr->offset, request->data, hdr->data_length);
+ memcpy(&cow_hdr, hdr, sizeof(cow_hdr));
+ cow_hdr.offset = 0;
+ cow_hdr.data_length = SD_DATA_OBJ_SIZE;
+
+ ret = do_write_obj(&iocb, &cow_hdr, epoch, buf);
+ } else
+ ret = do_write_obj(&iocb, hdr, epoch, request->data);
+
+ if (SD_RES_SUCCESS == ret)
+ check_and_insert_objlist_cache(hdr->oid);
+out:
+ if (buf)
+ free(buf);
+ sd_store->close(hdr->oid, &iocb);
+ return ret;
+}
+
static struct sd_op_template sd_ops[] = {
/* cluster operations */
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index d5ddca4..2481fd6 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -206,11 +206,21 @@ static inline struct store_driver *find_store_driver(const char *name)
return NULL;
}
+struct objlist_cache {
+ struct rb_root root;
+ int cache_size;
+ pthread_rwlock_t lock;
+};
+
extern struct cluster_info *sys;
extern struct store_driver *sd_store;
extern char *obj_path;
+extern char *mnt_path;
+extern char *jrnl_path;
+extern char *epoch_path;
extern mode_t def_fmode;
extern mode_t def_dmode;
+extern struct objlist_cache obj_list_cache;
int create_listen_port(int port, void *data);
@@ -266,11 +276,6 @@ int get_cluster_flags(uint16_t *flags);
int set_cluster_store(const char *name);
int get_cluster_store(char *buf);
-int store_create_and_write_obj(const struct sd_req *, struct sd_rsp *, void *);
-int store_write_obj(const struct sd_req *, struct sd_rsp *, void *);
-int store_read_obj(const struct sd_req *, struct sd_rsp *, void *);
-int store_remove_obj(const struct sd_req *, struct sd_rsp *, void *);
-
int store_file_write(void *buffer, size_t len);
void *store_file_read(void);
int get_max_nr_copies_from(struct sd_node *entries, int nr);
@@ -279,11 +284,9 @@ int epoch_log_read(uint32_t epoch, char *buf, int len);
int epoch_log_read_nr(uint32_t epoch, char *buf, int len);
int epoch_log_read_remote(uint32_t epoch, char *buf, int len);
int get_latest_epoch(void);
-int remove_epoch(int epoch);
int set_cluster_ctime(uint64_t ctime);
uint64_t get_cluster_ctime(void);
-int stat_sheep(uint64_t *store_size, uint64_t *store_free, uint32_t epoch);
-int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *data);
+int get_obj_list(const struct sd_list_req *, struct sd_list_rsp *, void *);
int start_recovery(uint32_t epoch);
void resume_recovery_work(void);
@@ -307,6 +310,10 @@ int rmdir_r(char *dir_path);
int prealloc(int fd, uint32_t size);
+int init_objlist_cache(void);
+int objlist_cache_rb_remove(struct rb_root *root, uint64_t oid);
+int check_and_insert_objlist_cache(uint64_t oid);
+
/* Operations */
struct sd_op_template *get_sd_op(uint8_t opcode);
diff --git a/sheep/store.c b/sheep/store.c
index 321a3a6..6412556 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -35,249 +35,17 @@ struct sheepdog_config {
};
char *obj_path;
-static char *epoch_path;
-static char *mnt_path;
-static char *jrnl_path;
+char *mnt_path;
+char *jrnl_path;
+char *epoch_path;
static char *config_path;
-struct objlist_cache {
- struct rb_root root;
- int cache_size;
- pthread_rwlock_t lock;
-};
-
-struct objlist_cache_entry {
- uint64_t oid;
- struct rb_node node;
-};
-
-static struct objlist_cache obj_list_cache;
-
mode_t def_dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP;
mode_t def_fmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
struct store_driver *sd_store;
LIST_HEAD(store_drivers);
-static struct objlist_cache_entry *objlist_cache_rb_insert(struct rb_root *root,
- struct objlist_cache_entry *new)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct objlist_cache_entry *entry;
-
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct objlist_cache_entry, node);
-
- if (new->oid < entry->oid)
- p = &(*p)->rb_left;
- else if (new->oid > entry->oid)
- p = &(*p)->rb_right;
- else
- return entry; /* already has this entry */
- }
- rb_link_node(&new->node, parent, p);
- rb_insert_color(&new->node, root);
-
- return NULL; /* insert successfully */
-}
-
-static int objlist_cache_rb_remove(struct rb_root *root, uint64_t oid)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct objlist_cache_entry *entry;
-
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct objlist_cache_entry, node);
-
- if (oid < entry->oid)
- p = &(*p)->rb_left;
- else if (oid > entry->oid)
- p = &(*p)->rb_right;
- else {
- rb_erase(parent, root);
- return 0;
- }
- }
-
- return -1; /* fail to remove */
-}
-
-static int check_and_insert_objlist_cache(uint64_t oid)
-{
- struct objlist_cache_entry *entry, *p;
-
- entry = zalloc(sizeof(*entry));
-
- if (!entry) {
- eprintf("no memory to allocate cache entry.\n");
- return -1;
- }
-
- entry->oid = oid;
- rb_init_node(&entry->node);
-
- pthread_rwlock_wrlock(&obj_list_cache.lock);
- p = objlist_cache_rb_insert(&obj_list_cache.root, entry);
- if (p)
- free(entry);
- else
- obj_list_cache.cache_size++;
- pthread_rwlock_unlock(&obj_list_cache.lock);
-
- return 0;
-}
-
-static void get_store_dir(struct strbuf *buf, int epoch)
-{
- if (!strcmp(sd_store->name, "simple"))
- strbuf_addf(buf, "%s%08u", obj_path, epoch);
- else /* XXX assume other store doesn't need epoch/obj pattern */
- strbuf_addf(buf, "%s", obj_path);
-}
-
-int stat_sheep(uint64_t *store_size, uint64_t *store_free, uint32_t epoch)
-{
- struct statvfs vs;
- int ret;
- DIR *dir;
- struct dirent *d;
- uint64_t used = 0;
- struct stat s;
- char path[1024];
- struct strbuf store_dir = STRBUF_INIT;
-
- ret = statvfs(mnt_path, &vs);
- if (ret) {
- ret = SD_RES_EIO;
- goto out;
- }
-
- get_store_dir(&store_dir, epoch);
- dir = opendir(store_dir.buf);
- if (!dir) {
- ret = SD_RES_EIO;
- goto out;
- }
-
- while ((d = readdir(dir))) {
- if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
- continue;
-
- snprintf(path, sizeof(path), "%s/%s", store_dir.buf, d->d_name);
-
- ret = stat(path, &s);
- if (ret)
- continue;
-
- used += s.st_size;
- }
-
- closedir(dir);
- ret = SD_RES_SUCCESS;
-
- *store_size = (uint64_t)vs.f_frsize * vs.f_bfree + used;
- *store_free = (uint64_t)vs.f_frsize * vs.f_bfree;
-out:
- strbuf_release(&store_dir);
- return ret;
-}
-
-int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *data)
-{
- uint64_t *list = (uint64_t *)data;
- int nr = 0;
- int res = SD_RES_SUCCESS;
- struct objlist_cache_entry *entry;
- struct rb_node *p;
-
- pthread_rwlock_rdlock(&obj_list_cache.lock);
- for (p = rb_first(&obj_list_cache.root); p; p = rb_next(p)) {
- entry = rb_entry(p, struct objlist_cache_entry, node);
- list[nr++] = entry->oid;
- }
- pthread_rwlock_unlock(&obj_list_cache.lock);
-
- rsp->data_length = nr * sizeof(uint64_t);
-
- return res;
-}
-
-static int read_copy_from_replica(struct request *req, uint32_t epoch,
- uint64_t oid, char *buf)
-{
- int i, nr_copies, ret;
- unsigned wlen, rlen;
- char name[128];
- struct sd_vnode *v;
- struct sd_obj_req hdr;
- struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
- struct siocb iocb;
- int fd;
-
- nr_copies = get_nr_copies(req->vnodes);
- for (i = 0; i < nr_copies; i++) {
- v = oid_to_vnode(req->vnodes, oid, i);
-
- addr_to_str(name, sizeof(name), v->addr, 0);
-
- if (vnode_is_local(v)) {
- memset(&iocb, 0, sizeof(iocb));
- iocb.epoch = epoch;
- ret = sd_store->open(oid, &iocb, 0);
- if (ret != SD_RES_SUCCESS)
- continue;
-
- iocb.buf = buf;
- iocb.length = SD_DATA_OBJ_SIZE;
- iocb.offset = 0;
- ret = sd_store->read(oid, &iocb);
- if (ret != SD_RES_SUCCESS)
- continue;
- sd_store->close(oid, &iocb);
- goto out;
- }
-
- fd = connect_to(name, v->port);
- if (fd < 0)
- continue;
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.opcode = SD_OP_READ_OBJ;
- hdr.oid = oid;
- hdr.epoch = epoch;
-
- rlen = SD_DATA_OBJ_SIZE;
- wlen = 0;
- hdr.flags = SD_FLAG_CMD_IO_LOCAL;
- hdr.data_length = rlen;
- hdr.offset = 0;
-
- ret = exec_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen);
-
- close(fd);
-
- dprintf("%x, %x\n", ret, rsp->result);
- if (ret)
- continue;
-
- switch (rsp->result) {
- case SD_RES_SUCCESS:
- ret = SD_RES_SUCCESS;
- goto out;
- default:
- ;
- }
- }
-
- ret = rsp->result;
-out:
- return ret;
-}
-
static int do_local_io(struct request *req, uint32_t epoch)
{
struct sd_obj_req *hdr = (struct sd_obj_req *)&req->rq;
@@ -521,172 +289,6 @@ err_open:
return -1;
}
-int store_remove_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
-{
- struct sd_obj_req *hdr = (struct sd_obj_req *)req;
- uint32_t epoch = hdr->epoch;
- struct strbuf buf = STRBUF_INIT;
- int ret = SD_RES_SUCCESS;
-
- get_store_dir(&buf, epoch);
- strbuf_addf(&buf, "%016" PRIx64, hdr->oid);
- if (unlink(buf.buf) < 0) {
- if (errno == ENOENT) {
- ret = SD_RES_NO_OBJ;
- goto out;
- }
- eprintf("%m\n");
- ret = SD_RES_EIO;
- }
- pthread_rwlock_wrlock(&obj_list_cache.lock);
- if (!objlist_cache_rb_remove(&obj_list_cache.root, hdr->oid))
- obj_list_cache.cache_size--;
- pthread_rwlock_unlock(&obj_list_cache.lock);
- out:
- strbuf_release(&buf);
- return ret;
-}
-
-int store_read_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
-{
- struct sd_obj_req *hdr = (struct sd_obj_req *)req;
- struct sd_obj_rsp *rsps = (struct sd_obj_rsp *)rsp;
- struct request *request = (struct request *)data;
- int ret;
- uint32_t epoch = hdr->epoch;
- struct siocb iocb;
-
- memset(&iocb, 0, sizeof(iocb));
- iocb.epoch = epoch;
- iocb.flags = hdr->flags;
- ret = sd_store->open(hdr->oid, &iocb, 0);
- if (ret != SD_RES_SUCCESS)
- return ret;
-
- iocb.buf = request->data;
- iocb.length = hdr->data_length;
- iocb.offset = hdr->offset;
- ret = sd_store->read(hdr->oid, &iocb);
- if (ret != SD_RES_SUCCESS)
- goto out;
-
- rsps->data_length = hdr->data_length;
- rsps->copies = sys->nr_copies;
-out:
- sd_store->close(hdr->oid, &iocb);
- return ret;
-}
-
-static int do_write_obj(struct siocb *iocb, struct sd_obj_req *req, uint32_t epoch, void *data)
-{
- struct sd_obj_req *hdr = (struct sd_obj_req *)req;
- uint64_t oid = hdr->oid;
- int ret = SD_RES_SUCCESS;
- void *jd = NULL;
-
- iocb->buf = data;
- iocb->length = hdr->data_length;
- iocb->offset = hdr->offset;
- if (is_vdi_obj(oid)) {
- struct strbuf buf = STRBUF_INIT;
-
- get_store_dir(&buf, epoch);
- strbuf_addf(&buf, "%016" PRIx64, oid);
- jd = jrnl_begin(data, hdr->data_length,
- hdr->offset, buf.buf, jrnl_path);
- if (!jd) {
- strbuf_release(&buf);
- return SD_RES_EIO;
- }
- ret = sd_store->write(oid, iocb);
- jrnl_end(jd);
- strbuf_release(&buf);
- } else
- ret = sd_store->write(oid, iocb);
-
- return ret;
-}
-
-int store_write_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
-{
- struct sd_obj_req *hdr = (struct sd_obj_req *)req;
- struct request *request = (struct request *)data;
- int ret;
- uint32_t epoch = hdr->epoch;
- struct siocb iocb;
-
- memset(&iocb, 0, sizeof(iocb));
- iocb.epoch = epoch;
- iocb.flags = hdr->flags;
- ret = sd_store->open(hdr->oid, &iocb, 0);
- if (ret != SD_RES_SUCCESS)
- return ret;
-
- ret = do_write_obj(&iocb, hdr, epoch, request->data);
-
- sd_store->close(hdr->oid, &iocb);
- return ret;
-}
-
-int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
-{
- struct sd_obj_req *hdr = (struct sd_obj_req *)req;
- struct request *request = (struct request *)data;
- struct sd_obj_req cow_hdr;
- int ret;
- uint32_t epoch = hdr->epoch;
- char *buf = NULL;
- struct siocb iocb;
- unsigned data_length;
-
- if (is_vdi_obj(hdr->oid))
- data_length = SD_INODE_SIZE;
- else if (is_vdi_attr_obj(hdr->oid))
- data_length = SD_ATTR_OBJ_SIZE;
- else
- data_length = SD_DATA_OBJ_SIZE;
-
- memset(&iocb, 0, sizeof(iocb));
- iocb.epoch = epoch;
- iocb.flags = hdr->flags;
- iocb.length = data_length;
- ret = sd_store->open(hdr->oid, &iocb, 1);
- if (ret != SD_RES_SUCCESS)
- return ret;
- if (hdr->flags & SD_FLAG_CMD_COW) {
- dprintf("%" PRIx64 ", %" PRIx64 "\n", hdr->oid, hdr->cow_oid);
-
- buf = valloc(SD_DATA_OBJ_SIZE);
- if (!buf) {
- eprintf("can not allocate memory\n");
- goto out;
- }
- if (hdr->data_length != SD_DATA_OBJ_SIZE) {
- ret = read_copy_from_replica(request, hdr->epoch, hdr->cow_oid, buf);
- if (ret != SD_RES_SUCCESS) {
- eprintf("failed to read cow object\n");
- goto out;
- }
- }
-
- memcpy(buf + hdr->offset, request->data, hdr->data_length);
- memcpy(&cow_hdr, hdr, sizeof(cow_hdr));
- cow_hdr.offset = 0;
- cow_hdr.data_length = SD_DATA_OBJ_SIZE;
-
- ret = do_write_obj(&iocb, &cow_hdr, epoch, buf);
- } else
- ret = do_write_obj(&iocb, hdr, epoch, request->data);
-
- if (SD_RES_SUCCESS == ret)
- check_and_insert_objlist_cache(hdr->oid);
-out:
- if (buf)
- free(buf);
- sd_store->close(hdr->oid, &iocb);
- return ret;
-}
-
static int fix_object_consistency(struct request *req)
{
int ret = SD_RES_NO_MEM;
@@ -986,28 +588,6 @@ out:
return ret;
}
-int remove_epoch(int epoch)
-{
- int ret;
- char path[PATH_MAX];
-
- dprintf("remove epoch %"PRIu32"\n", epoch);
- snprintf(path, sizeof(path), "%s%08u", epoch_path, epoch);
- ret = unlink(path);
- if (ret && ret != -ENOENT) {
- eprintf("failed to remove %s: %s\n", path, strerror(-ret));
- return SD_RES_EIO;
- }
-
- snprintf(path, sizeof(path), "%s%08u/", jrnl_path, epoch);
- ret = rmdir_r(path);
- if (ret && ret != -ENOENT) {
- eprintf("failed to remove %s: %s\n", path, strerror(-ret));
- return SD_RES_EIO;
- }
- return 0;
-}
-
int set_cluster_ctime(uint64_t ct)
{
int fd, ret;
@@ -1194,36 +774,6 @@ static int init_config_path(const char *base_path)
return 0;
}
-static int init_objlist_cache(void)
-{
- int i;
- struct siocb iocb = { 0 };
- uint64_t *buf;
-
- pthread_rwlock_init(&obj_list_cache.lock, NULL);
- obj_list_cache.root = RB_ROOT;
- obj_list_cache.cache_size = 0;
-
- if (sd_store) {
- buf = zalloc(1 << 22);
- if (!buf) {
- eprintf("no memory to allocate.\n");
- return -1;
- }
-
- iocb.length = 0;
- iocb.buf = buf;
- sd_store->get_objlist(&iocb);
-
- for (i = 0; i < iocb.length; i++)
- check_and_insert_objlist_cache(buf[i]);
-
- free(buf);
- }
-
- return 0;
-}
-
static int init_store_driver(void)
{
char driver_name[STORE_LEN], *p;
--
1.7.8.2
More information about the sheepdog
mailing list