[sheepdog] [PATCH v4 3/7] sheep, farm: adapt to md support

Liu Yuan namei.unix at gmail.com
Tue Mar 12 07:00:17 CET 2013


From: Liu Yuan <tailai.ly at taobao.com>

This patch add another redirection inside mapping of oid-to-path, where we can
play tricks that return obj_path in non-md mode and return actual path from
underling md layer.

This also extends working directory to the sense that it is referred to all
the directories in the md array and add candy helpers to walk through
broadered WD.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/farm/farm.c   |   51 ++++++++----------
 sheep/farm/trunk.c  |   57 +++++++++-----------
 sheep/md.c          |   89 +++++++++++++++++++++++++++++++
 sheep/plain_store.c |  145 ++++++++++++++++++++++-----------------------------
 sheep/sheep_priv.h  |   10 ++--
 5 files changed, 203 insertions(+), 149 deletions(-)

diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index 1943dc4..6c5f6b8 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -11,7 +11,6 @@
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <dirent.h>
 #include <pthread.h>
 #include <linux/limits.h>
 #include <sys/file.h>
@@ -157,31 +156,6 @@ out:
 	return ret;
 }
 
-static int cleanup_working_dir(void)
-{
-	DIR *dir;
-	struct dirent *d;
-
-	sd_dprintf("try clean up working dir");
-	dir = opendir(obj_path);
-	if (!dir)
-		return -1;
-
-	while ((d = readdir(dir))) {
-		char p[PATH_MAX];
-		if (!strncmp(d->d_name, ".", 1))
-			continue;
-		snprintf(p, sizeof(p), "%s%s", obj_path, d->d_name);
-		if (unlink(p) < 0) {
-			sd_eprintf("%s:%m", p);
-			continue;
-		}
-		sd_dprintf("remove file %s", d->d_name);
-	}
-	closedir(dir);
-	return 0;
-}
-
 static int restore_objects_from_snap(uint32_t epoch)
 {
 	struct sha1_file_hdr hdr;
@@ -227,16 +201,33 @@ out:
 	return ret;
 }
 
+static int rm_object(uint64_t oid, char *path, void *arg)
+{
+	char p[PATH_MAX];
+	int ret = SD_RES_SUCCESS;
+
+	snprintf(p, sizeof(p), "%s/%"PRIx64, path, oid);
+	if (unlink(path) < 0) {
+		sd_eprintf("failed to remove cached object %m");
+		if (errno == ENOENT)
+			return SD_RES_SUCCESS;
+		ret = SD_RES_EIO;
+		goto out;
+	}
+out:
+	return ret;
+}
+
 static int farm_restore(const struct siocb *iocb)
 {
 	int ret = SD_RES_EIO, epoch = iocb->epoch;
 
 	sd_dprintf("try recover user epoch %d", epoch);
 
-	if (cleanup_working_dir() < 0) {
-		sd_eprintf("failed to clean up the working dir %m");
-		goto out;
-	}
+	/* Remove all the objects of WD and object cache */
+	for_each_object_in_wd(rm_object, true, NULL);
+	if (sys->enable_object_cache)
+		object_cache_format();
 
 	ret = restore_objects_from_snap(epoch);
 	if (ret != SD_RES_SUCCESS)
diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c
index 0658511..eaa4193 100644
--- a/sheep/farm/trunk.c
+++ b/sheep/farm/trunk.c
@@ -17,7 +17,6 @@
  * flat directory structure.
  */
 #include <pthread.h>
-#include <dirent.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
@@ -35,16 +34,15 @@ static int fill_entry_new_sha1(struct trunk_entry *entry)
 	struct sha1_file_hdr hdr = { .priv = 0 };
 
 	memcpy(hdr.tag, TAG_DATA, TAG_LEN);
-	strbuf_addstr(&buf, obj_path);
-	strbuf_addf(&buf, "%016" PRIx64, entry->oid);
+	strbuf_addstr(&buf, get_object_path(entry->oid));
+	strbuf_addf(&buf, "/%016" PRIx64, entry->oid);
 	fd = open(buf.buf, O_RDONLY);
-	strbuf_reset(&buf);
-
 	if (fd < 0) {
-		sd_dprintf("%m");
+		sd_dprintf("%m, %s", buf.buf);
 		ret = -1;
 		goto out;
 	}
+	strbuf_reset(&buf);
 	if (!strbuf_read(&buf, fd, SD_DATA_OBJ_SIZE) == SD_DATA_OBJ_SIZE) {
 		sd_dprintf("strbuf_read fail to read full");
 		ret = -1;
@@ -66,27 +64,41 @@ out:
 	return ret;
 }
 
-static int inc_object_nr(uint64_t oid, void *arg)
+static int inc_object_nr(uint64_t oid, char *wd, void *arg)
 {
 	uint64_t *object_nr = arg;
 
 	(*object_nr)++;
 
-	return 0;
+	return SD_RES_SUCCESS;
+}
+
+static int init_trunk_entry(uint64_t oid, char *path, void *arg)
+{
+	struct trunk_entry entry = {};
+	struct strbuf *buf = arg;
+
+	entry.oid = oid;
+	if (fill_entry_new_sha1(&entry) < 0)
+		return SD_RES_UNKNOWN;
+
+	strbuf_add(buf, &entry, sizeof(struct trunk_entry));
+	return SD_RES_SUCCESS;
 }
 
 int trunk_file_write(unsigned char *outsha1)
 {
 	struct strbuf buf;
 	struct sha1_file_hdr hdr = {};
-	struct trunk_entry entry = {};
-	struct dirent *d;
-	DIR *dir;
-	uint64_t data_size, oid, object_nr = 0;
+	uint64_t data_size, object_nr = 0;
 	int ret = 0;
 
 	/* Add the hdr first */
 	for_each_object_in_wd(inc_object_nr, false, &object_nr);
+	if (ret != SD_RES_SUCCESS) {
+		ret = -1;
+		goto out;
+	}
 	data_size = sizeof(struct trunk_entry) * object_nr;
 	hdr.size = data_size;
 	hdr.priv = object_nr;
@@ -94,35 +106,18 @@ int trunk_file_write(unsigned char *outsha1)
 	strbuf_init(&buf, sizeof(hdr) + data_size);
 	strbuf_add(&buf, &hdr, sizeof(hdr));
 
-	dir = opendir(obj_path);
-	if (!dir) {
+	ret = for_each_object_in_wd(init_trunk_entry, false,  &buf);
+	if (ret != SD_RES_SUCCESS) {
 		ret = -1;
 		goto out;
 	}
 
-	while ((d = readdir(dir))) {
-		if (!strncmp(d->d_name, ".", 1))
-			continue;
-
-		oid = strtoull(d->d_name, NULL, 16);
-		if (oid == 0 || oid == ULLONG_MAX)
-			continue;
-
-		entry.oid = oid;
-		if (fill_entry_new_sha1(&entry) < 0) {
-			ret = -1;
-			goto out;
-		}
-		strbuf_add(&buf, &entry, sizeof(struct trunk_entry));
-	}
-
 	if (sha1_file_write((void *)buf.buf, buf.len, outsha1) < 0) {
 		ret = -1;
 		goto out;
 	}
 	sd_dprintf("trunk sha1: %s", sha1_to_hex(outsha1));
 out:
-	closedir(dir);
 	strbuf_release(&buf);
 	return ret;
 }
diff --git a/sheep/md.c b/sheep/md.c
index 17e20ef..0bbb160 100644
--- a/sheep/md.c
+++ b/sheep/md.c
@@ -19,6 +19,7 @@
 #include <errno.h>
 #include <math.h>
 #include <sys/xattr.h>
+#include <dirent.h>
 
 #include "sheep_priv.h"
 
@@ -179,6 +180,94 @@ uint64_t md_init_space(void)
 	}
 	calculate_vdisks(md_disks, md_nr_disks, total);
 	md_nr_vds = disks_to_vdisks(md_disks, md_nr_disks, md_vds);
+	sys->enable_md = true;
 
 	return total;
 }
+
+char *get_object_path(uint64_t oid)
+{
+	struct vdisk *vd;
+
+	if (!sys->enable_md)
+		return obj_path;
+	vd = oid_to_vdisk(oid);
+	return md_disks[vd->idx].path;
+}
+
+/* If cleanup is true, temporary objects will be removed */
+static int for_each_object_in_path(char *path,
+				   int (*func)(uint64_t, char *, void *),
+				   bool cleanup, void *arg)
+{
+	DIR *dir;
+	struct dirent *d;
+	uint64_t oid;
+	int ret = SD_RES_SUCCESS;
+	char p[PATH_MAX];
+
+	dir = opendir(path);
+	if (!dir) {
+		sd_eprintf("failed to open %s, %m", path);
+		return SD_RES_EIO;
+	}
+
+	while ((d = readdir(dir))) {
+		if (!strncmp(d->d_name, ".", 1))
+			continue;
+
+		oid = strtoull(d->d_name, NULL, 16);
+		if (oid == 0 || oid == ULLONG_MAX)
+			continue;
+
+		/* don't call callback against temporary objects */
+		if (strlen(d->d_name) == 20 &&
+		    strcmp(d->d_name + 16, ".tmp") == 0) {
+			if (cleanup) {
+				snprintf(p, PATH_MAX, "%s/%016"PRIx64".tmp",
+					 path, oid);
+				sd_dprintf("remove tmp object %s", p);
+				unlink(p);
+			}
+			continue;
+		}
+
+		ret = func(oid, path, arg);
+		if (ret != SD_RES_SUCCESS)
+			break;
+	}
+	closedir(dir);
+	return ret;
+}
+
+int for_each_object_in_wd(int (*func)(uint64_t oid, char *path, void *arg),
+			  bool cleanup, void *arg)
+{
+	int i, ret;
+
+	if (!sys->enable_md)
+		return for_each_object_in_path(obj_path, func, cleanup, arg);
+
+	for (i = 0; i < md_nr_disks; i++) {
+		ret = for_each_object_in_path(md_disks[i].path, func,
+					      cleanup, arg);
+		if (ret != SD_RES_SUCCESS)
+			return ret;
+	}
+	return SD_RES_SUCCESS;
+}
+
+int for_each_obj_path(int (*func)(char *path))
+{
+	int i, ret;
+
+	if (!sys->enable_md)
+		return func(obj_path);
+
+	for (i = 0; i < md_nr_disks; i++) {
+		ret = func(md_disks[i].path);
+		if (ret != SD_RES_SUCCESS)
+			return ret;
+	}
+	return SD_RES_SUCCESS;
+}
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 357a0a6..d5fe6e2 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -13,13 +13,10 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
-#include <dirent.h>
 
 #include "sheep_priv.h"
 #include "config.h"
 
-static char stale_dir[PATH_MAX];
-
 static int get_open_flags(uint64_t oid, bool create, int fl)
 {
 	int flags = O_DSYNC | O_RDWR;
@@ -41,62 +38,20 @@ static int get_open_flags(uint64_t oid, bool create, int fl)
 
 static int get_obj_path(uint64_t oid, char *path)
 {
-	return snprintf(path, PATH_MAX, "%s%016" PRIx64, obj_path, oid);
+	return snprintf(path, PATH_MAX, "%s/%016" PRIx64,
+			get_object_path(oid), oid);
 }
 
 static int get_tmp_obj_path(uint64_t oid, char *path)
 {
-	return snprintf(path, PATH_MAX, "%s%016"PRIx64".tmp",
-			obj_path, oid);
+	return snprintf(path, PATH_MAX, "%s/%016"PRIx64".tmp",
+			get_object_path(oid), oid);
 }
 
 static int get_stale_obj_path(uint64_t oid, uint32_t epoch, char *path)
 {
-	return snprintf(path, PATH_MAX, "%s/%016"PRIx64".%"PRIu32,
-			stale_dir, oid, epoch);
-}
-
-/* If cleanup is true, temporary objects will be removed */
-int for_each_object_in_wd(int (*func)(uint64_t oid, void *arg), bool cleanup,
-			  void *arg)
-{
-	DIR *dir;
-	struct dirent *d;
-	uint64_t oid;
-	int ret = SD_RES_SUCCESS;
-	char path[PATH_MAX];
-
-	dir = opendir(obj_path);
-	if (!dir) {
-		sd_eprintf("failed to open %s, %m", obj_path);
-		return SD_RES_EIO;
-	}
-
-	while ((d = readdir(dir))) {
-		if (!strncmp(d->d_name, ".", 1))
-			continue;
-
-		oid = strtoull(d->d_name, NULL, 16);
-		if (oid == 0 || oid == ULLONG_MAX)
-			continue;
-
-		/* don't call callback against temporary objects */
-		if (strlen(d->d_name) == 20 &&
-		    strcmp(d->d_name + 16, ".tmp") == 0) {
-			if (cleanup) {
-				get_tmp_obj_path(oid, path);
-				sd_dprintf("remove tmp object %s", path);
-				unlink(path);
-			}
-			continue;
-		}
-
-		ret = func(oid, arg);
-		if (ret != SD_RES_SUCCESS)
-			break;
-	}
-	closedir(dir);
-	return ret;
+	return snprintf(path, PATH_MAX, "%s/.stale/%016"PRIx64".%"PRIu32,
+			get_object_path(oid), oid, epoch);
 }
 
 bool default_exist(uint64_t oid)
@@ -119,7 +74,7 @@ int err_to_sderr(uint64_t oid, int err)
 
 	switch (err) {
 	case ENOENT:
-		if (stat(obj_path, &s) < 0) {
+		if (stat(get_object_path(oid), &s) < 0) {
 			sd_eprintf("corrupted");
 			return SD_RES_EIO;
 		}
@@ -176,24 +131,54 @@ out:
 	return ret;
 }
 
-int default_cleanup(void)
+static int make_stale_dir(char *path)
 {
-	rmdir_r(stale_dir);
-	if (mkdir(stale_dir, 0755) < 0) {
-		sd_eprintf("%m");
-		return SD_RES_EIO;
+	char p[PATH_MAX];
+
+	snprintf(p, PATH_MAX, "%s/.stale", path);
+	if (mkdir(p, def_dmode) < 0) {
+		if (errno != EEXIST) {
+			sd_eprintf("%s failed, %m", p);
+			return SD_RES_EIO;
+		}
 	}
+	return SD_RES_SUCCESS;
+}
+
+static int purge_dir(char *path)
+{
+	if (purge_directory(path) < 0)
+		return SD_RES_EIO;
 
 	return SD_RES_SUCCESS;
 }
 
-static int init_vdi_copy_number(uint64_t oid)
+static int purge_stale_dir(char *path)
+{
+	char p[PATH_MAX];
+
+	snprintf(p, PATH_MAX, "%s/.stale", path);
+	return purge_dir(p);
+}
+
+int default_cleanup(void)
+{
+	int ret;
+
+	ret = for_each_obj_path(purge_stale_dir);
+	if (ret != SD_RES_SUCCESS)
+		return ret;
+
+	return SD_RES_SUCCESS;
+}
+
+static int init_vdi_copy_number(uint64_t oid, char *wd)
 {
 	char path[PATH_MAX];
 	int fd, flags = get_open_flags(oid, false, 0), ret;
 	struct sheepdog_inode *inode = xzalloc(sizeof(*inode));
 
-	snprintf(path, sizeof(path), "%s%016" PRIx64, obj_path, oid);
+	snprintf(path, sizeof(path), "%s/%016"PRIx64, wd, oid);
 
 	fd = open(path, flags);
 	if (fd < 0) {
@@ -217,7 +202,7 @@ out:
 	return SD_RES_SUCCESS;
 }
 
-static int init_objlist_and_vdi_bitmap(uint64_t oid, void *arg)
+static int init_objlist_and_vdi_bitmap(uint64_t oid, char *wd, void *arg)
 {
 	int ret;
 	objlist_cache_insert(oid);
@@ -225,7 +210,7 @@ static int init_objlist_and_vdi_bitmap(uint64_t oid, void *arg)
 	if (is_vdi_obj(oid)) {
 		sd_dprintf("found the VDI object %" PRIx64, oid);
 		set_bit(oid_to_vid(oid), sys->vdi_inuse);
-		ret = init_vdi_copy_number(oid);
+		ret = init_vdi_copy_number(oid, wd);
 		if (ret != SD_RES_SUCCESS)
 			return ret;
 	}
@@ -234,16 +219,12 @@ static int init_objlist_and_vdi_bitmap(uint64_t oid, void *arg)
 
 int default_init(const char *p)
 {
-	sd_dprintf("use plain store driver");
+	int ret;
 
-	/* create a stale directory */
-	snprintf(stale_dir, sizeof(stale_dir), "%s/.stale", p);
-	if (mkdir(stale_dir, 0755) < 0) {
-		if (errno != EEXIST) {
-			sd_eprintf("%m");
-			return SD_RES_EIO;
-		}
-	}
+	sd_dprintf("use plain store driver");
+	ret = for_each_obj_path(make_stale_dir);
+	if (ret != SD_RES_SUCCESS)
+		return ret;
 
 	return for_each_object_in_wd(init_objlist_and_vdi_bitmap, true, NULL);
 }
@@ -424,13 +405,14 @@ out:
 	return ret;
 }
 
-static int move_object_to_stale_dir(uint64_t oid, void *arg)
+static int move_object_to_stale_dir(uint64_t oid, char *wd, void *arg)
 {
 	char path[PATH_MAX], stale_path[PATH_MAX];
 	uint32_t tgt_epoch = *(int *)arg;
 
-	get_obj_path(oid, path);
-	get_stale_obj_path(oid, tgt_epoch, stale_path);
+	snprintf(path, PATH_MAX, "%s/%016" PRIx64, wd, oid);
+	snprintf(stale_path, PATH_MAX, "%s/.stale/%016"PRIx64".%"PRIu32, wd,
+		 oid, tgt_epoch);
 
 	if (rename(path, stale_path) < 0) {
 		sd_eprintf("failed to move stale object %"PRIX64" to %s, %m",
@@ -442,10 +424,10 @@ static int move_object_to_stale_dir(uint64_t oid, void *arg)
 	return SD_RES_SUCCESS;
 }
 
-static int check_stale_objects(uint64_t oid, void *arg)
+static int check_stale_objects(uint64_t oid, char *wd, void *arg)
 {
 	if (oid_stale(oid))
-		return move_object_to_stale_dir(oid, arg);
+		return move_object_to_stale_dir(oid, wd, arg);
 
 	return SD_RES_SUCCESS;
 }
@@ -464,15 +446,10 @@ int default_format(void)
 	unsigned ret;
 
 	sd_dprintf("try get a clean store");
-	ret = rmdir_r(obj_path);
-	if (ret && ret != -ENOENT) {
-		sd_eprintf("failed to remove %s: %s", obj_path, strerror(-ret));
-		return SD_RES_EIO;
-	}
-	if (mkdir(obj_path, def_dmode) < 0) {
-		sd_eprintf("%m");
-		return SD_RES_EIO;
-	}
+	ret = for_each_obj_path(purge_dir);
+	if (ret != SD_RES_SUCCESS)
+		return ret;
+
 	if (sys->enable_object_cache)
 		object_cache_format();
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index d6fcc58..da1df8e 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -124,8 +124,9 @@ struct cluster_info {
 
 	uatomic_bool use_journal;
 	bool backend_dio;
-	bool upgrade; /* upgrade data layout before starting service
-		       * if necessary*/
+	/* upgrade data layout before starting service if necessary*/
+	bool upgrade;
+	bool enable_md;
 };
 
 struct siocb {
@@ -181,8 +182,8 @@ int default_cleanup(void);
 int default_format(void);
 int default_remove_object(uint64_t oid);
 int default_purge_obj(void);
-int for_each_object_in_wd(int (*func)(uint64_t oid, void *arg), bool cleanup,
-			  void *arg);
+int for_each_object_in_wd(int (*func)(uint64_t, char *, void *), bool, void *);
+int for_each_obj_path(int (*func)(char *path));
 int err_to_sderr(uint64_t oid, int err);
 
 extern struct list_head store_drivers;
@@ -417,5 +418,6 @@ int journal_file_write(uint64_t oid, const char *buf, size_t size, off_t, bool);
 /* md.c */
 int md_init_disk(char *path);
 uint64_t md_init_space(void);
+char *get_object_path(uint64_t oid);
 
 #endif
-- 
1.7.9.5




More information about the sheepdog mailing list