[sheepdog] [PATCH v7 05/10] sheep: upgrade 'idx' in object_cache from 32bit to 64bit

Robin Dong robin.k.dong at gmail.com
Sat Nov 16 15:57:34 CET 2013


Change 'idx' in object cache from uint32_t to uint64_t for working correctly
on hyper volume.

Signed-off-by: Robin Dong <sanbai at taobao.com>
---
 include/sheepdog_proto.h |    2 +-
 lib/sd_inode.c           |    7 ++-
 sheep/object_cache.c     |   99 ++++++++++++++++++++++++----------------------
 3 files changed, 58 insertions(+), 50 deletions(-)

diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 5b2bfa8..72a1e3a 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -419,7 +419,7 @@ static inline uint64_t vid_to_vdi_oid(uint32_t vid)
 	return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT);
 }
 
-static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
+static inline uint64_t vid_to_data_oid(uint32_t vid, uint64_t idx)
 {
 	return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
 }
diff --git a/lib/sd_inode.c b/lib/sd_inode.c
index 26c9a3a..be9f904 100644
--- a/lib/sd_inode.c
+++ b/lib/sd_inode.c
@@ -495,7 +495,7 @@ static int insert_new_node(write_node_fn writer, read_node_fn reader,
 					path->p_ext, idx, vdi_id);
 			writer(path->p_idx->oid, path->p_ext_header,
 			       SD_INODE_DATA_INDEX_SIZE, 0, 0, inode->nr_copies,
-			       inode->copy_policy, true, false);
+			       inode->copy_policy, false, false);
 		} else if (path->p_ext_header) {
 			/* the last idx-node */
 			insert_ext_entry_nosearch(path->p_ext_header,
@@ -505,8 +505,11 @@ static int insert_new_node(write_node_fn writer, read_node_fn reader,
 				(LAST_EXT(path->p_ext_header) - 1)->idx;
 			writer(path->p_idx->oid, path->p_ext_header,
 			       SD_INODE_DATA_INDEX_SIZE, 0, 0, inode->nr_copies,
-			       inode->copy_policy, true, false);
+			       inode->copy_policy, false, false);
 		} else {
+			/* if btree is full, then panic */
+			if (header->entries >= EXT_IDX_MAX_ENTRIES)
+				panic("%s() B-tree is full!", __func__);
 			/* create a new ext-node */
 			leaf_node = xmalloc(SD_INODE_DATA_INDEX_SIZE);
 			sd_inode_init(leaf_node, 2);
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index e1968fc..3df88fd 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -16,15 +16,16 @@
 /*
  * Object Cache ID
  *
- *  0 - 19 (20 bits): data object space
- *  20 - 27 (8 bits): object flag space
- *  28 - 31 (4 bits): object type indentifier space
+ *  0 - 31 (32 bits): data object space
+ *  32 - 51 (20 bits): reserved
+ *  52 - 59 (8 bits): object flag space
+ *  60 - 63 (4 bits): object type indentifier space
  */
-#define CACHE_VDI_SHIFT       31 /* if the entry is identified as VDI object */
-#define CACHE_CREATE_SHIFT    27 /* If the entry should be created at backend */
+#define CACHE_VDI_SHIFT       63 /* if the entry is identified as VDI object */
+#define CACHE_CREATE_SHIFT    59 /* If the entry should be created at backend */
 
-#define CACHE_VDI_BIT         (UINT32_C(1) << CACHE_VDI_SHIFT)
-#define CACHE_CREATE_BIT      (UINT32_C(1) << CACHE_CREATE_SHIFT)
+#define CACHE_VDI_BIT         (UINT64_C(1) << CACHE_VDI_SHIFT)
+#define CACHE_CREATE_BIT      (UINT64_C(1) << CACHE_CREATE_SHIFT)
 
 #define CACHE_INDEX_MASK      (CACHE_CREATE_BIT)
 
@@ -39,7 +40,7 @@ struct global_cache {
 };
 
 struct object_cache_entry {
-	uint32_t idx; /* Index of this entry */
+	uint64_t idx; /* Index of this entry */
 	refcnt_t refcnt; /* Reference count of this entry */
 	uint64_t bmap; /* Each bit represents one dirty block in object */
 	struct object_cache *oc; /* Object cache this entry belongs to */
@@ -97,7 +98,7 @@ static inline int hash(uint64_t vid)
 }
 
 /* We should always use this helper to get entry idx */
-static inline uint32_t entry_idx(const struct object_cache_entry *entry)
+static inline uint64_t entry_idx(const struct object_cache_entry *entry)
 {
 	return entry->idx & ~CACHE_INDEX_MASK;
 }
@@ -108,15 +109,17 @@ static int object_cache_cmp(const struct object_cache_entry *a,
 	return intcmp(entry_idx(a), entry_idx(b));
 }
 
-static inline uint32_t object_cache_oid_to_idx(uint64_t oid)
+static inline uint64_t object_cache_oid_to_idx(uint64_t oid)
 {
-	uint32_t idx = data_oid_to_idx(oid);
+	uint64_t idx = data_oid_to_idx(oid);
 	if (is_vdi_obj(oid))
-		idx |= 1 << CACHE_VDI_SHIFT;
+		idx |= 1ULL << CACHE_VDI_SHIFT;
+	else if (is_vdi_btree_obj(oid))
+		idx |= VDI_BTREE_BIT;
 	return idx;
 }
 
-static inline bool idx_has_vdi_bit(uint32_t idx)
+static inline bool idx_has_vdi_bit(uint64_t idx)
 {
 	return !!(idx & CACHE_VDI_BIT);
 }
@@ -209,7 +212,7 @@ lru_tree_insert(struct rb_root *root, struct object_cache_entry *new)
 }
 
 static struct object_cache_entry *lru_tree_search(struct rb_root *root,
-						  uint32_t idx)
+						  uint64_t idx)
 {
 	struct object_cache_entry key = { .idx = idx };
 
@@ -279,7 +282,7 @@ free_cache_entry(struct object_cache_entry *entry)
 	free(entry);
 }
 
-static uint64_t idx_to_oid(uint32_t vid, uint32_t idx)
+static uint64_t idx_to_oid(uint32_t vid, uint64_t idx)
 {
 	if (idx_has_vdi_bit(idx))
 		return vid_to_vdi_oid(vid);
@@ -287,12 +290,12 @@ static uint64_t idx_to_oid(uint32_t vid, uint32_t idx)
 		return vid_to_data_oid(vid, idx);
 }
 
-static int remove_cache_object(struct object_cache *oc, uint32_t idx)
+static int remove_cache_object(struct object_cache *oc, uint64_t idx)
 {
 	int ret = SD_RES_SUCCESS;
 	char path[PATH_MAX];
 
-	snprintf(path, sizeof(path), "%s/%06"PRIx32"/%08"PRIx32,
+	snprintf(path, sizeof(path), "%s/%06"PRIx32"/%016"PRIx64,
 		 object_cache_dir, oc->vid, idx);
 	sd_debug("%"PRIx64, idx_to_oid(oc->vid, idx));
 	if (unlikely(unlink(path) < 0)) {
@@ -306,14 +309,14 @@ out:
 	return ret;
 }
 
-static int read_cache_object_noupdate(uint32_t vid, uint32_t idx, void *buf,
+static int read_cache_object_noupdate(uint32_t vid, uint64_t idx, void *buf,
 				      size_t count, off_t offset)
 {
 	size_t size;
 	int fd, flags = def_open_flags, ret = SD_RES_SUCCESS;
 	char p[PATH_MAX];
 
-	snprintf(p, sizeof(p), "%s/%06"PRIx32"/%08"PRIx32, object_cache_dir,
+	snprintf(p, sizeof(p), "%s/%06"PRIx32"/%016"PRIx64, object_cache_dir,
 		 vid, idx);
 
 	if (sys->object_cache_directio && !idx_has_vdi_bit(idx)) {
@@ -343,14 +346,14 @@ out:
 	return ret;
 }
 
-static int write_cache_object_noupdate(uint32_t vid, uint32_t idx, void *buf,
+static int write_cache_object_noupdate(uint32_t vid, uint64_t idx, void *buf,
 				       size_t count, off_t offset)
 {
 	size_t size;
 	int fd, flags = def_open_flags, ret = SD_RES_SUCCESS;
 	char p[PATH_MAX];
 
-	snprintf(p, sizeof(p), "%s/%06"PRIx32"/%08"PRIx32, object_cache_dir,
+	snprintf(p, sizeof(p), "%s/%06"PRIx32"/%016"PRIx64, object_cache_dir,
 		 vid, idx);
 	if (sys->object_cache_directio && !idx_has_vdi_bit(idx)) {
 		assert(is_aligned_to_pagesize(buf));
@@ -382,7 +385,8 @@ out:
 static int read_cache_object(struct object_cache_entry *entry, void *buf,
 			     size_t count, off_t offset)
 {
-	uint32_t vid = entry->oc->vid, idx = entry_idx(entry);
+	uint32_t vid = entry->oc->vid;
+	uint64_t idx = entry_idx(entry);
 	struct object_cache *oc = entry->oc;
 	int ret;
 
@@ -400,7 +404,8 @@ static int write_cache_object(struct object_cache_entry *entry, void *buf,
 			      size_t count, off_t offset, bool create,
 			      bool writeback)
 {
-	uint32_t vid = entry->oc->vid, idx = entry_idx(entry);
+	uint32_t vid = entry->oc->vid;
+	uint64_t idx = entry_idx(entry);
 	uint64_t oid = idx_to_oid(vid, idx);
 	struct object_cache *oc = entry->oc;
 	struct sd_req hdr;
@@ -447,7 +452,7 @@ out:
 	return ret;
 }
 
-static int push_cache_object(uint32_t vid, uint32_t idx, uint64_t bmap,
+static int push_cache_object(uint32_t vid, uint64_t idx, uint64_t bmap,
 			     bool create)
 {
 	struct sd_req hdr;
@@ -663,7 +668,7 @@ static void object_cache_try_to_reclaim(int delay)
 }
 
 static inline struct object_cache_entry *
-alloc_cache_entry(struct object_cache *oc, uint32_t idx)
+alloc_cache_entry(struct object_cache *oc, uint64_t idx)
 {
 	struct object_cache_entry *entry;
 
@@ -677,7 +682,7 @@ alloc_cache_entry(struct object_cache *oc, uint32_t idx)
 	return entry;
 }
 
-static void add_to_lru_cache(struct object_cache *oc, uint32_t idx, bool create)
+static void add_to_lru_cache(struct object_cache *oc, uint64_t idx, bool create)
 {
 	struct object_cache_entry *entry = alloc_cache_entry(oc, idx);
 
@@ -713,13 +718,13 @@ static inline int lookup_path(char *path)
 	return ret;
 }
 
-static int object_cache_lookup(struct object_cache *oc, uint32_t idx,
+static int object_cache_lookup(struct object_cache *oc, uint64_t idx,
 			       bool create, bool writeback)
 {
 	int fd, ret, flags = def_open_flags;
 	char path[PATH_MAX];
 
-	snprintf(path, sizeof(path), "%s/%06"PRIx32"/%08"PRIx32,
+	snprintf(path, sizeof(path), "%s/%06"PRIx32"/%016"PRIx64,
 		 object_cache_dir, oc->vid, idx);
 	if (!create)
 		return lookup_path(path);
@@ -744,19 +749,19 @@ out:
 	return ret;
 }
 
-static int create_cache_object(struct object_cache *oc, uint32_t idx,
+static int create_cache_object(struct object_cache *oc, uint64_t idx,
 			       void *buffer, size_t buf_size)
 {
 	int flags = def_open_flags | O_CREAT | O_EXCL, fd;
 	int ret = SD_RES_OID_EXIST;
 	char path[PATH_MAX], tmp_path[PATH_MAX];
 
-	snprintf(tmp_path, sizeof(tmp_path), "%s/%06"PRIx32"/%08"PRIx32".tmp",
+	snprintf(tmp_path, sizeof(tmp_path), "%s/%06"PRIx32"/%016"PRIx64".tmp",
 		object_cache_dir, oc->vid, idx);
 	fd = open(tmp_path, flags, sd_def_fmode);
 	if (fd < 0) {
 		if (likely(errno == EEXIST)) {
-			sd_debug("%08"PRIx32" already created", idx);
+			sd_debug("%016"PRIx64" already created", idx);
 			goto out;
 		}
 		sd_debug("%m");
@@ -767,11 +772,11 @@ static int create_cache_object(struct object_cache *oc, uint32_t idx,
 	ret = xwrite(fd, buffer, buf_size);
 	if (unlikely(ret != buf_size)) {
 		ret = SD_RES_EIO;
-		sd_err("failed, vid %"PRIx32", idx %"PRIx32, oc->vid, idx);
+		sd_err("failed, vid %"PRIx32", idx %"PRIx64, oc->vid, idx);
 		goto out_close;
 	}
 	/* This is intended to take care of partial write due to crash */
-	snprintf(path, sizeof(path), "%s/%06"PRIx32"/%08"PRIx32,
+	snprintf(path, sizeof(path), "%s/%06"PRIx32"/%016"PRIx64,
 		 object_cache_dir, oc->vid, idx);
 	ret = link(tmp_path, path);
 	if (unlikely(ret < 0)) {
@@ -785,7 +790,7 @@ static int create_cache_object(struct object_cache *oc, uint32_t idx,
 		goto out_close;
 	}
 	ret = SD_RES_SUCCESS;
-	sd_debug("%08"PRIx32" size %zu", idx, buf_size);
+	sd_debug("%016"PRIx64" size %zu", idx, buf_size);
 out_close:
 	close(fd);
 	unlink(tmp_path);
@@ -794,7 +799,7 @@ out:
 }
 
 /* Fetch the object, cache it in the clean state */
-static int object_cache_pull(struct object_cache *oc, uint32_t idx)
+static int object_cache_pull(struct object_cache *oc, uint64_t idx)
 {
 	struct sd_req hdr;
 	int ret = SD_RES_NO_MEM;
@@ -918,7 +923,7 @@ static int object_cache_push(struct object_cache *oc)
 bool object_is_cached(uint64_t oid)
 {
 	uint32_t vid = oid_to_vid(oid);
-	uint32_t idx = object_cache_oid_to_idx(oid);
+	uint64_t idx = object_cache_oid_to_idx(oid);
 	struct object_cache *cache;
 
 	cache = find_object_cache(vid, false);
@@ -960,7 +965,7 @@ void object_cache_delete(uint32_t vid)
 }
 
 static struct object_cache_entry *
-get_cache_entry_from(struct object_cache *cache, uint32_t idx)
+get_cache_entry_from(struct object_cache *cache, uint64_t idx)
 {
 	struct object_cache_entry *entry;
 
@@ -980,7 +985,7 @@ get_cache_entry_from(struct object_cache *cache, uint32_t idx)
 static struct object_cache_entry *oid_to_entry(uint64_t oid)
 {
 	uint32_t vid = oid_to_vid(oid);
-	uint32_t idx = object_cache_oid_to_idx(oid);
+	uint64_t idx = object_cache_oid_to_idx(oid);
 	struct object_cache *cache;
 	struct object_cache_entry *entry;
 
@@ -998,7 +1003,7 @@ static int object_cache_flush_and_delete(struct object_cache *oc)
 	DIR *dir;
 	struct dirent *d;
 	uint32_t vid = oc->vid;
-	unsigned long idx;
+	uint64_t idx;
 	uint64_t all = UINT64_MAX;
 	int ret = 0;
 	char p[PATH_MAX];
@@ -1022,8 +1027,8 @@ static int object_cache_flush_and_delete(struct object_cache *oc)
 			continue;
 		}
 
-		idx = strtoul(d->d_name, NULL, 16);
-		if (idx == ULONG_MAX)
+		idx = strtoull(d->d_name, NULL, 16);
+		if (idx == ULLONG_MAX)
 			continue;
 		if (push_cache_object(vid, idx, all, true) !=
 		    SD_RES_SUCCESS) {
@@ -1063,7 +1068,7 @@ bool bypass_object_cache(const struct request *req)
 			return true;
 		} else  {
 			/* For read requet, we can read cache if any */
-			uint32_t idx = object_cache_oid_to_idx(oid);
+			uint64_t idx = object_cache_oid_to_idx(oid);
 
 			if (object_cache_lookup(cache, idx, false, false) == 0)
 				return false;
@@ -1080,13 +1085,13 @@ int object_cache_handle_request(struct request *req)
 	struct sd_req *hdr = &req->rq;
 	uint64_t oid = req->rq.obj.oid;
 	uint32_t vid = oid_to_vid(oid);
-	uint32_t idx = object_cache_oid_to_idx(oid);
+	uint64_t idx = object_cache_oid_to_idx(oid);
 	struct object_cache *cache;
 	struct object_cache_entry *entry;
 	int ret;
 	bool create = false;
 
-	sd_debug("%08" PRIx32 ", len %" PRIu32 ", off %" PRIu32, idx,
+	sd_debug("%08" PRIx64 ", len %" PRIu32 ", off %" PRIu32, idx,
 		 hdr->data_length, hdr->obj.offset);
 
 	cache = find_object_cache(vid, true);
@@ -1211,7 +1216,7 @@ static int load_cache_object(struct object_cache *cache)
 {
 	DIR *dir;
 	struct dirent *d;
-	unsigned long idx;
+	uint64_t idx;
 	char path[PATH_MAX];
 	int ret = 0;
 
@@ -1235,8 +1240,8 @@ static int load_cache_object(struct object_cache *cache)
 			continue;
 		}
 
-		idx = strtoul(d->d_name, NULL, 16);
-		if (idx == ULONG_MAX)
+		idx = strtoull(d->d_name, NULL, 16);
+		if (idx == ULLONG_MAX)
 			continue;
 
 		/*
-- 
1.7.1




More information about the sheepdog mailing list