[sheepdog] [PATCH 3/3] add selectable object_size support of VDI operation (2/2)

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Thu Dec 11 09:29:39 CET 2014


> +	return object_size;
>  }
>  
>  static inline uint64_t data_oid_to_idx(uint64_t oid)
> diff --git a/lib/fec.c b/lib/fec.c
> index c4e7a6f..fb40773 100644
> --- a/lib/fec.c
> +++ b/lib/fec.c
> @@ -696,12 +696,13 @@ out:
>  }
>  
>  void fec_decode_buffer(struct fec *ctx, uint8_t *input[], const int in_idx[],
> -		      char *buf, int idx)
> +		      char *buf, int idx, uint32_t object_size)
>  {
>  	int i, j, d = ctx->d;
>  	size_t strip_size = SD_EC_DATA_STRIPE_SIZE / d;
> +	uint32_t nr_stripe_per_object = object_size / SD_EC_DATA_STRIPE_SIZE;
>  
> -	for (i = 0; i < SD_EC_NR_STRIPE_PER_OBJECT; i++) {
> +	for (i = 0; i < nr_stripe_per_object; i++) {
>  		const uint8_t *in[d];
>  		uint8_t out[strip_size];
>  
> @@ -713,9 +714,9 @@ void fec_decode_buffer(struct fec *ctx, uint8_t *input[], const int in_idx[],
>  }
>  
>  void isa_decode_buffer(struct fec *ctx, uint8_t *input[], const int in_idx[],
> -		       char *buf, int idx)
> +		       char *buf, int idx, uint32_t object_size)
>  {
> -	int ed = ctx->d, edp = ctx->dp, len = SD_DATA_OBJ_SIZE / ed, i;
> +	int ed = ctx->d, edp = ctx->dp, len = object_size / ed, i;
>  	unsigned char ec_tbl[ed * edp * 32];
>  	unsigned char bm[ed * ed];
>  	unsigned char cm[ed];
> diff --git a/sheep/gateway.c b/sheep/gateway.c
> index 7f7d1d1..408660a 100644
> --- a/sheep/gateway.c
> +++ b/sheep/gateway.c
> @@ -713,7 +713,7 @@ out:
>  static int gateway_handle_cow(struct request *req)
>  {
>  	uint64_t oid = req->rq.obj.oid;
> -	size_t len = get_objsize(oid);
> +	size_t len = get_objsize(oid, get_vdi_object_size(oid_to_vid(oid)));
>  	struct sd_req hdr, *req_hdr = &req->rq;
>  	char *buf = xvalloc(len);
>  	int ret;
> diff --git a/sheep/group.c b/sheep/group.c
> index 2b98a9b..e379241 100644
> --- a/sheep/group.c
> +++ b/sheep/group.c
> @@ -510,7 +510,7 @@ retry:
>  		if (vs[i].deleted)
>  			atomic_set_bit(vs[i].vid, sys->vdi_deleted);
>  		add_vdi_state(vs[i].vid, vs[i].nr_copies, vs[i].snapshot,
> -			      vs[i].copy_policy);
> +			      vs[i].copy_policy, vs[i].object_size);
>  	}
>  out:
>  	free(vs);
> @@ -766,6 +766,7 @@ static void cinfo_collection_done(struct work *work)
>  		sd_debug("nr_copies: %d", vs->nr_copies);
>  		sd_debug("snapshot: %d", vs->snapshot);
>  		sd_debug("copy_policy: %d", vs->copy_policy);
> +		sd_debug("object_size: %"PRIu32, vs->object_size);
>  		sd_debug("lock_state: %x", vs->lock_state);
>  		sd_debug("owner: %s",
>  			 addr_to_str(vs->lock_owner.addr, vs->lock_owner.port));
> diff --git a/sheep/journal.c b/sheep/journal.c
> index 5beabdf..4df9a74 100644
> --- a/sheep/journal.c
> +++ b/sheep/journal.c
> @@ -137,6 +137,7 @@ static int replay_journal_entry(struct journal_descriptor *jd)
>  {
>  	char path[PATH_MAX];
>  	ssize_t size;
> +	uint32_t object_size = 0;
>  	int fd, flags = O_WRONLY, ret = 0;
>  	void *buf = NULL;
>  	char *p = (char *)jd;
> @@ -168,9 +169,9 @@ static int replay_journal_entry(struct journal_descriptor *jd)
>  		sd_err("open %m");
>  		return -1;
>  	}
> -
>  	if (jd->create) {
> -		ret = prealloc(fd, get_objsize(jd->oid));
> +		object_size = get_vdi_object_size(oid_to_vid(jd->oid));
> +		ret = prealloc(fd, object_size);
>  		if (ret < 0)
>  			goto out;
>  	}
> diff --git a/sheep/object_cache.c b/sheep/object_cache.c
> index a0da92d..31eb003 100644
> --- a/sheep/object_cache.c
> +++ b/sheep/object_cache.c
> @@ -126,7 +126,8 @@ static inline bool idx_has_vdi_bit(uint64_t idx)
>  
>  static inline size_t get_cache_block_size(uint64_t oid)
>  {
> -	size_t bsize = DIV_ROUND_UP(get_objsize(oid),
> +	uint32_t object_size = get_vdi_object_size(oid_to_vid(oid));
> +	size_t bsize = DIV_ROUND_UP(get_objsize(oid, object_size),
>  				    sizeof(uint64_t) * BITS_PER_BYTE);
>  
>  	return round_up(bsize, BLOCK_SIZE); /* To be FS friendly */
> @@ -457,6 +458,7 @@ static int push_cache_object(uint32_t vid, uint64_t idx, uint64_t bmap,
>  	void *buf;
>  	off_t offset;
>  	uint64_t oid = idx_to_oid(vid, idx);
> +	uint32_t object_size = get_objsize(oid, get_vdi_object_size(vid));
>  	size_t data_length, bsize = get_cache_block_size(oid);
>  	int ret = SD_RES_NO_MEM;
>  	int first_bit, last_bit;
> @@ -473,7 +475,7 @@ static int push_cache_object(uint32_t vid, uint64_t idx, uint64_t bmap,
>  		 oid, bsize, bmap, first_bit, last_bit);
>  	offset = first_bit * bsize;
>  	data_length = min((last_bit - first_bit + 1) * bsize,
> -			  get_objsize(oid) - (size_t)offset);
> +			  object_size - (size_t)offset);
>  
>  	buf = xvalloc(data_length);
>  	ret = read_cache_object_noupdate(vid, idx, buf, data_length, offset);
> @@ -517,6 +519,7 @@ static void do_reclaim_object(struct object_cache *oc)
>  	struct object_cache_entry *entry;
>  	uint64_t oid;
>  	uint32_t cap;
> +	uint32_t cache_object_size = get_vdi_object_size(oc->vid) / 1048576;
>  
>  	write_lock_cache(oc);
>  	list_for_each_entry(entry, &oc->lru_head, lru_list) {
> @@ -539,7 +542,7 @@ static void do_reclaim_object(struct object_cache *oc)
>  		if (remove_cache_object(oc, entry_idx(entry)) != SD_RES_SUCCESS)
>  			continue;
>  		free_cache_entry(entry);
> -		cap = uatomic_sub_return(&gcache.capacity, CACHE_OBJECT_SIZE);
> +		cap = uatomic_sub_return(&gcache.capacity, cache_object_size);
>  		sd_debug("%"PRIx64" reclaimed. capacity:%"PRId32, oid, cap);
>  		if (cap <= HIGH_WATERMARK)
>  			break;
> @@ -685,13 +688,14 @@ alloc_cache_entry(struct object_cache *oc, uint64_t idx)
>  static void add_to_lru_cache(struct object_cache *oc, uint64_t idx, bool create)
>  {
>  	struct object_cache_entry *entry = alloc_cache_entry(oc, idx);
> +	uint32_t cache_object_size = get_vdi_object_size(oc->vid) / 1048576;
>  
>  	sd_debug("oid %"PRIx64" added", idx_to_oid(oc->vid, idx));
>  
>  	write_lock_cache(oc);
>  	if (unlikely(lru_tree_insert(&oc->lru_tree, entry)))
>  		panic("the object already exist");
> -	uatomic_add(&gcache.capacity, CACHE_OBJECT_SIZE);
> +	uatomic_add(&gcache.capacity, cache_object_size);
>  	list_add_tail(&entry->lru_list, &oc->lru_head);
>  	oc->total_count++;
>  	if (create) {
> @@ -736,7 +740,8 @@ static int object_cache_lookup(struct object_cache *oc, uint64_t idx,
>  		ret = SD_RES_EIO;
>  		goto out;
>  	}
> -	ret = prealloc(fd, get_objsize(idx_to_oid(oc->vid, idx)));
> +	ret = prealloc(fd, get_objsize(idx_to_oid(oc->vid, idx),
> +				       get_vdi_object_size(oc->vid)));
>  	if (unlikely(ret < 0)) {
>  		ret = SD_RES_EIO;
>  		goto out_close;
> @@ -804,7 +809,7 @@ static int object_cache_pull(struct object_cache *oc, uint64_t idx)
>  	struct sd_req hdr;
>  	int ret;
>  	uint64_t oid = idx_to_oid(oc->vid, idx);
> -	uint32_t data_length = get_objsize(oid);
> +	uint32_t data_length = get_objsize(oid, oc->vid);
>  	void *buf;
>  
>  	buf = xvalloc(data_length);
> @@ -939,11 +944,14 @@ void object_cache_delete(uint32_t vid)
>  	int h = hash(vid);
>  	struct object_cache_entry *entry;
>  	char path[PATH_MAX];
> +	uint32_t cache_object_size;
>  
>  	cache = find_object_cache(vid, false);
>  	if (!cache)
>  		return;
>  
> +	cache_object_size = get_vdi_object_size(cache->vid) / 1048576;
> +
>  	/* Firstly we free memory */
>  	sd_write_lock(&hashtable_lock[h]);
>  	hlist_del(&cache->hash);
> @@ -952,7 +960,7 @@ void object_cache_delete(uint32_t vid)
>  	write_lock_cache(cache);
>  	list_for_each_entry(entry, &cache->lru_head, lru_list) {
>  		free_cache_entry(entry);
> -		uatomic_sub(&gcache.capacity, CACHE_OBJECT_SIZE);
> +		uatomic_sub(&gcache.capacity, cache_object_size);
>  	}
>  	unlock_cache(cache);
>  	sd_destroy_rw_lock(&cache->lock);
> @@ -1294,6 +1302,7 @@ int object_cache_remove(uint64_t oid)
>  	/* Inc the entry refcount to exclude the reclaimer */
>  	struct object_cache_entry *entry = oid_to_entry(oid);
>  	struct object_cache *oc;
> +	uint32_t cache_object_size_mb;
>  	int ret;
>  
>  	if (!entry)
> @@ -1305,6 +1314,8 @@ int object_cache_remove(uint64_t oid)
>  	while (refcount_read(&entry->refcnt) > 1)
>  		usleep(100000); /* Object might be in push */
>  
> +	cache_object_size_mb = get_vdi_object_size(oc->vid) / 1048576;
> +
>  	write_lock_cache(oc);
>  	/*
>  	 * We assume no other thread will inc the refcount of this entry
> @@ -1321,7 +1332,7 @@ int object_cache_remove(uint64_t oid)
>  	free_cache_entry(entry);
>  	unlock_cache(oc);
>  
> -	uatomic_sub(&gcache.capacity, CACHE_OBJECT_SIZE);
> +	uatomic_sub(&gcache.capacity, cache_object_size_mb);
>  
>  	return SD_RES_SUCCESS;
>  }
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 0c2389a..e5f4c4c 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -93,6 +93,7 @@ static int cluster_new_vdi(struct request *req)
>  		.copy_policy = hdr->vdi.copy_policy,
>  		.store_policy = hdr->vdi.store_policy,
>  		.nr_copies = hdr->vdi.copies,
> +		.object_size = hdr->vdi.object_size,
>  		.time = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000,
>  	};
>  
> @@ -105,6 +106,9 @@ static int cluster_new_vdi(struct request *req)
>  	if (iocb.copy_policy)
>  		iocb.nr_copies = ec_policy_to_dp(iocb.copy_policy, NULL, NULL);
>  
> +	if (!hdr->vdi.object_size)
> +		iocb.object_size = sys->cinfo.object_size;
> +
>  	if (hdr->data_length != SD_MAX_VDI_LEN)
>  		return SD_RES_INVALID_PARMS;
>  
> @@ -115,6 +119,7 @@ static int cluster_new_vdi(struct request *req)
>  
>  	rsp->vdi.vdi_id = vid;
>  	rsp->vdi.copies = iocb.nr_copies;
> +	rsp->vdi.object_size = iocb.object_size;
>  
>  	return ret;
>  }
> @@ -236,6 +241,7 @@ static int cluster_get_vdi_info(struct request *req)
>  
>  	rsp->vdi.vdi_id = info.vid;
>  	rsp->vdi.copies = get_vdi_copy_number(info.vid);
> +	rsp->vdi.object_size = get_vdi_object_size(info.vid);
>  
>  	return ret;
>  }
> @@ -655,13 +661,14 @@ static int cluster_notify_vdi_add(const struct sd_req *req, struct sd_rsp *rsp,
>  		/* make the previous working vdi a snapshot */
>  		add_vdi_state(req->vdi_state.old_vid,
>  			      get_vdi_copy_number(req->vdi_state.old_vid),
> -			      true, req->vdi_state.copy_policy);
> +			      true, req->vdi_state.copy_policy,
> +			      get_vdi_object_size(req->vdi_state.old_vid));
>  
>  	if (req->vdi_state.set_bitmap)
>  		atomic_set_bit(req->vdi_state.new_vid, sys->vdi_inuse);
>  
>  	add_vdi_state(req->vdi_state.new_vid, req->vdi_state.copies, false,
> -		      req->vdi_state.copy_policy);
> +		      req->vdi_state.copy_policy, req->vdi_state.object_size);
>  
>  	return SD_RES_SUCCESS;
>  }
> @@ -759,9 +766,10 @@ static int cluster_alter_vdi_copy(const struct sd_req *req, struct sd_rsp *rsp,
>  
>  	uint32_t vid = req->vdi_state.new_vid;
>  	int nr_copies = req->vdi_state.copies;
> +	uint32_t object_size = req->vdi_state.object_size;
>  	struct vnode_info *vinfo;
>  
> -	add_vdi_state(vid, nr_copies, false, 0);
> +	add_vdi_state(vid, nr_copies, false, 0, object_size);
>  
>  	vinfo = get_vnode_info();
>  	start_recovery(vinfo, vinfo, false);
> diff --git a/sheep/plain_store.c b/sheep/plain_store.c
> index 1b7b66c..e344189 100644
> --- a/sheep/plain_store.c
> +++ b/sheep/plain_store.c
> @@ -152,7 +152,8 @@ static int default_trim(int fd, uint64_t oid, const struct siocb *iocb,
>  
>  	if (*poffset + *plen < iocb->offset + iocb->length) {
>  		uint64_t end = iocb->offset + iocb->length;
> -		if (end == get_objsize(oid))
> +		uint32_t object_size = get_vdi_object_size(oid_to_vid(oid));
> +		if (end == get_objsize(oid, object_size))
>  			/* This is necessary to punch the last block */
>  			end = round_up(end, BLOCK_SIZE);
>  		sd_debug("discard between %ld, %ld, %" PRIx64, *poffset + *plen,
> @@ -267,6 +268,7 @@ int default_cleanup(void)
>  static int init_vdi_state(uint64_t oid, const char *wd, uint32_t epoch)
>  {
>  	int ret;
> +	uint32_t object_size;
>  	struct sd_inode *inode = xzalloc(SD_INODE_HEADER_SIZE);
>  	struct siocb iocb = {
>  		.epoch = epoch,
> @@ -280,9 +282,9 @@ static int init_vdi_state(uint64_t oid, const char *wd, uint32_t epoch)
>  		       "wat %s", oid, epoch, wd);
>  		goto out;
>  	}
> -
> +	object_size = (UINT32_C(1) << inode->block_size_shift);
>  	add_vdi_state(oid_to_vid(oid), inode->nr_copies,
> -		      vdi_is_snapshot(inode), inode->copy_policy);
> +		      vdi_is_snapshot(inode), inode->copy_policy, object_size);
>  
>  	if (inode->name[0] == '\0')
>  		atomic_set_bit(oid_to_vid(oid), sys->vdi_deleted);
> @@ -402,9 +404,9 @@ size_t get_store_objsize(uint64_t oid)
>  		uint8_t policy = get_vdi_copy_policy(oid_to_vid(oid));
>  		int d;
>  		ec_policy_to_dp(policy, &d, NULL);
> -		return SD_DATA_OBJ_SIZE / d;
> +		return get_vdi_object_size(oid_to_vid(oid)) / d;
>  	}
> -	return get_objsize(oid);
> +	return get_objsize(oid, get_vdi_object_size(oid_to_vid(oid)));
>  }
>  
>  int default_create_and_write(uint64_t oid, const struct siocb *iocb)
> @@ -413,6 +415,7 @@ int default_create_and_write(uint64_t oid, const struct siocb *iocb)
>  	int flags = prepare_iocb(oid, iocb, true);
>  	int ret, fd;
>  	uint32_t len = iocb->length;
> +	uint32_t object_size = 0;
>  	size_t obj_size;
>  	uint64_t offset = iocb->offset;
>  
> @@ -452,7 +455,9 @@ int default_create_and_write(uint64_t oid, const struct siocb *iocb)
>  
>  	trim_zero_blocks(iocb->buf, &offset, &len);
>  
> -	if (offset != 0 || len != get_objsize(oid)) {
> +	object_size = get_vdi_object_size(oid_to_vid(oid));
> +
> +	if (offset != 0 || len != get_objsize(oid, object_size)) {
>  		if (is_sparse_object(oid))
>  			ret = xftruncate(fd, obj_size);
>  		else
> diff --git a/sheep/recovery.c b/sheep/recovery.c
> index 7874fc9..9bf2d9c 100644
> --- a/sheep/recovery.c
> +++ b/sheep/recovery.c
> @@ -429,6 +429,7 @@ static void *rebuild_erasure_object(uint64_t oid, uint8_t idx,
>  	char *lost = xvalloc(len);
>  	int i, j;
>  	uint8_t policy = get_vdi_copy_policy(oid_to_vid(oid));
> +	uint32_t object_size = get_vdi_object_size(oid_to_vid(oid));
>  	int ed = 0, edp;
>  	edp = ec_policy_to_dp(policy, &ed, NULL);
>  	struct fec *ctx = ec_init(ed, edp);
> @@ -458,7 +459,7 @@ static void *rebuild_erasure_object(uint64_t oid, uint8_t idx,
>  	}
>  
>  	/* Rebuild the lost replica */
> -	ec_decode_buffer(ctx, bufs, idxs, lost, idx);
> +	ec_decode_buffer(ctx, bufs, idxs, lost, idx, object_size);
>  out:
>  	ec_destroy(ctx);
>  	for (i = 0; i < ed; i++)
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index 5fc6b90..37946d1 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -219,6 +219,7 @@ struct vdi_iocb {
>  	uint8_t copy_policy;
>  	uint8_t store_policy;
>  	uint8_t nr_copies;
> +	uint32_t object_size;
>  	uint64_t time;
>  };
>  
> @@ -326,9 +327,12 @@ int fill_vdi_state_list(const struct sd_req *hdr,
>  bool oid_is_readonly(uint64_t oid);
>  int get_vdi_copy_number(uint32_t vid);
>  int get_vdi_copy_policy(uint32_t vid);
> +uint32_t get_vdi_object_size(uint32_t vid);
>  int get_obj_copy_number(uint64_t oid, int nr_zones);
>  int get_req_copy_number(struct request *req);
> -int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot, uint8_t);
> +uint32_t get_req_object_size(struct request *req);
> +int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot,
> +		  uint8_t, uint32_t object_size);
>  int vdi_exist(uint32_t vid);
>  int vdi_create(const struct vdi_iocb *iocb, uint32_t *new_vid);
>  int vdi_snapshot(const struct vdi_iocb *iocb, uint32_t *new_vid);
> diff --git a/sheep/vdi.c b/sheep/vdi.c
> index 1c8fb36..95b3230 100644
> --- a/sheep/vdi.c
> +++ b/sheep/vdi.c
> @@ -14,6 +14,7 @@
>  struct vdi_state_entry {
>  	uint32_t vid;
>  	unsigned int nr_copies;
> +	uint32_t object_size;
>  	bool snapshot;
>  	bool deleted;
>  	uint8_t copy_policy;
> @@ -132,6 +133,23 @@ int get_vdi_copy_policy(uint32_t vid)
>  	return entry->copy_policy;
>  }
>  
> +uint32_t get_vdi_object_size(uint32_t vid)
> +{
> +	struct vdi_state_entry *entry;
> +
> +	sd_read_lock(&vdi_state_lock);
> +	entry = vdi_state_search(&vdi_state_root, vid);
> +	sd_rw_unlock(&vdi_state_lock);
> +
> +	if (!entry) {
> +		sd_alert("copy number for %" PRIx32 " not found, set %" PRIx32,
> +			 vid, sys->cinfo.object_size);
> +		return sys->cinfo.object_size;
> +	}
> +
> +	return entry->object_size;
> +}
> +
>  int get_obj_copy_number(uint64_t oid, int nr_zones)
>  {
>  	return min(get_vdi_copy_number(oid_to_vid(oid)), nr_zones);
> @@ -149,7 +167,19 @@ int get_req_copy_number(struct request *req)
>  	return nr_copies;
>  }
>  
> -int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot, uint8_t cp)
> +uint32_t get_req_object_size(struct request *req)
> +{
> +	uint32_t object_size;
> +
> +	object_size = req->rq.data_length;
> +	if (!object_size)
> +		object_size = get_vdi_object_size(oid_to_vid(req->rq.obj.oid));
> +
> +	return object_size;
> +}
> +
> +int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot,
> +		  uint8_t cp, uint32_t object_size)
>  {
>  	struct vdi_state_entry *entry, *old;
>  
> @@ -158,6 +188,7 @@ int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot, uint8_t cp)
>  	entry->nr_copies = nr_copies;
>  	entry->snapshot = snapshot;
>  	entry->copy_policy = cp;
> +	entry->object_size = object_size;
>  
>  	entry->lock_state = LOCK_STATE_UNLOCKED;
>  	memset(&entry->owner, 0, sizeof(struct node_id));
> @@ -173,7 +204,8 @@ int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot, uint8_t cp)
>  		sd_mutex_unlock(&m);
>  	}
>  
> -	sd_debug("%" PRIx32 ", %d, %d", vid, nr_copies, cp);
> +	sd_debug("%" PRIx32 ", %d, %d, %"PRIu32,
> +		 vid, nr_copies, cp, object_size);
>  
>  	sd_write_lock(&vdi_state_lock);
>  	old = vdi_state_insert(&vdi_state_root, entry);
> @@ -183,6 +215,7 @@ int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot, uint8_t cp)
>  		entry->nr_copies = nr_copies;
>  		entry->snapshot = snapshot;
>  		entry->copy_policy = cp;
> +		entry->object_size = object_size;
>  	}
>  
>  	sd_rw_unlock(&vdi_state_lock);
> @@ -209,6 +242,7 @@ int fill_vdi_state_list(const struct sd_req *hdr,
>  		vs[last].nr_copies = entry->nr_copies;
>  		vs[last].snapshot = entry->snapshot;
>  		vs[last].copy_policy = entry->copy_policy;
> +		vs[last].object_size = entry->object_size;
>  		vs[last].lock_state = entry->lock_state;
>  		vs[last].lock_owner = entry->owner;
>  		vs[last].nr_participants = entry->nr_participants;
> @@ -251,6 +285,7 @@ static struct vdi_state *fill_vdi_state_list_with_alloc(int *result_nr)
>  		vs[i].snapshot = entry->snapshot;
>  		vs[i].deleted = entry->deleted;
>  		vs[i].copy_policy = entry->copy_policy;
> +		vs[i].object_size = entry->object_size;
>  		vs[i].lock_state = entry->lock_state;
>  		vs[i].lock_owner = entry->owner;
>  		vs[i].nr_participants = entry->nr_participants;
> @@ -861,7 +896,7 @@ static struct sd_inode *alloc_inode(const struct vdi_iocb *iocb,
>  				    struct generation_reference *gref)
>  {
>  	struct sd_inode *new = xzalloc(sizeof(*new));
> -	unsigned long block_size = SD_DATA_OBJ_SIZE;
> +	unsigned long block_size = iocb->object_size;
>  
>  	pstrcpy(new->name, sizeof(new->name), iocb->name);
>  	new->vdi_id = new_vid;
> @@ -903,9 +938,10 @@ static int create_vdi(const struct vdi_iocb *iocb, uint32_t new_snapid,
>  	int ret;
>  
>  	sd_debug("%s: size %" PRIu64 ", new_vid %" PRIx32 ", copies %d, "
> -		 "snapid %" PRIu32 " copy policy %"PRIu8 "store policy %"PRIu8,
> -		 iocb->name, iocb->size, new_vid, iocb->nr_copies, new_snapid,
> -		 new->copy_policy, new->store_policy);
> +		 "snapid %" PRIu32 " copy policy %"PRIu8 "store policy %"PRIu8
> +		 "object_size %"PRIu32, iocb->name, iocb->size, new_vid,
> +		  iocb->nr_copies, new_snapid, new->copy_policy,
> +		  new->store_policy, iocb->object_size);
>  
>  	ret = sd_write_object(vid_to_vdi_oid(new_vid), (char *)new,
>  			      sizeof(*new), 0, true);
> @@ -940,8 +976,9 @@ static int clone_vdi(const struct vdi_iocb *iocb, uint32_t new_snapid,
>  	int ret;
>  
>  	sd_debug("%s: size %" PRIu64 ", vid %" PRIx32 ", base %" PRIx32 ", "
> -		 "copies %d, snapid %" PRIu32, iocb->name, iocb->size, new_vid,
> -		 base_vid, iocb->nr_copies, new_snapid);
> +		 "copies %d, object_size %" PRIu32 ", snapid %" PRIu32,
> +		 iocb->name, iocb->size, new_vid, base_vid,
> +		 iocb->nr_copies, iocb->object_size, new_snapid);
>  
>  	ret = sd_read_object(vid_to_vdi_oid(base_vid), (char *)base,
>  			     sizeof(*base), 0);
> @@ -1002,8 +1039,9 @@ static int snapshot_vdi(const struct vdi_iocb *iocb, uint32_t new_snapid,
>  	int ret;
>  
>  	sd_debug("%s: size %" PRIu64 ", vid %" PRIx32 ", base %" PRIx32 ", "
> -		 "copies %d, snapid %" PRIu32, iocb->name, iocb->size, new_vid,
> -		 base_vid, iocb->nr_copies, new_snapid);
> +		 "copies %d, object_size %"PRIu32 ", snapid %" PRIu32,
> +		 iocb->name, iocb->size, new_vid, base_vid,
> +		 iocb->nr_copies, iocb->object_size, new_snapid);
>  
>  	ret = sd_read_object(vid_to_vdi_oid(base_vid), (char *)base,
>  			     sizeof(*base), 0);
> @@ -1071,8 +1109,9 @@ static int rebase_vdi(const struct vdi_iocb *iocb, uint32_t new_snapid,
>  	int ret;
>  
>  	sd_debug("%s: size %" PRIu64 ", vid %" PRIx32 ", base %" PRIx32 ", "
> -		 "cur %" PRIx32 ", copies %d, snapid %" PRIu32, iocb->name,
> -		 iocb->size, new_vid, base_vid, cur_vid, iocb->nr_copies,
> +		 "cur %" PRIx32 ", copies %d, object_size %"PRIu32
> +		 ", snapid %" PRIu32, iocb->name, iocb->size, new_vid,
> +		 base_vid, cur_vid, iocb->nr_copies, iocb->object_size,
>  		 new_snapid);
>  
>  	ret = sd_read_object(vid_to_vdi_oid(base_vid), (char *)base,
> @@ -1260,7 +1299,7 @@ int vdi_lookup(const struct vdi_iocb *iocb, struct vdi_info *info)
>  }
>  
>  static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies, uint32_t old_vid,
> -			  uint8_t copy_policy)
> +			  uint8_t copy_policy, uint32_t object_size)
>  {
>  	int ret;
>  	struct sd_req hdr;
> @@ -1271,11 +1310,13 @@ static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies, uint32_t old_vid,
>  	hdr.vdi_state.copies = nr_copies;
>  	hdr.vdi_state.set_bitmap = false;
>  	hdr.vdi_state.copy_policy = copy_policy;
> +	hdr.vdi_state.object_size = object_size;
>  
>  	ret = exec_local_req(&hdr, NULL);
>  	if (ret != SD_RES_SUCCESS)
>  		sd_err("fail to notify vdi add event(%" PRIx32 ", %d, %" PRIx32
> -		       ")", vdi_id, nr_copies, old_vid);
> +		       ", %"PRIu32 ")", vdi_id, nr_copies,
> +		       old_vid, object_size);
>  
>  	return ret;
>  }
> @@ -1326,7 +1367,7 @@ int vdi_create(const struct vdi_iocb *iocb, uint32_t *new_vid)
>  		info.snapid = 1;
>  	*new_vid = info.free_bit;
>  	ret = notify_vdi_add(*new_vid, iocb->nr_copies, info.vid,
> -			     iocb->copy_policy);
> +			     iocb->copy_policy, iocb->object_size);
>  	if (ret != SD_RES_SUCCESS)
>  		return ret;
>  
> @@ -1366,7 +1407,7 @@ int vdi_snapshot(const struct vdi_iocb *iocb, uint32_t *new_vid)
>  	assert(info.snapid > 0);
>  	*new_vid = info.free_bit;
>  	ret = notify_vdi_add(*new_vid, iocb->nr_copies, info.vid,
> -			     iocb->copy_policy);
> +			     iocb->copy_policy, iocb->object_size);
>  	if (ret != SD_RES_SUCCESS)
>  		return ret;
>  
> @@ -1745,6 +1786,15 @@ int sd_create_hyper_volume(const char *name, uint32_t *vdi_id)
>  	hdr.vdi.copies = sys->cinfo.nr_copies;
>  	hdr.vdi.copy_policy = sys->cinfo.copy_policy;
>  	hdr.vdi.store_policy = 1;
> +	/* XXX Cannot use both features, Hypervolume and Change object size */
> +	if (sys->cinfo.object_size != SD_DATA_OBJ_SIZE) {
> +		hdr.vdi.object_size = SD_DATA_OBJ_SIZE;
> +		sd_warn("Cluster default object size is not"
> +			" SD_DATA_OBJ_SIZE(%lu)."
> +			"Set VDI object size %lu and create HyperVolume",
> +			SD_DATA_OBJ_SIZE, SD_DATA_OBJ_SIZE);
> +	}
> +
>  
>  	ret = exec_local_req(&hdr, buf);
>  	if (ret != SD_RES_SUCCESS) {
> diff --git a/tests/unit/sheep/test_vdi.c b/tests/unit/sheep/test_vdi.c
> index 2f8946b..132caf5 100644
> --- a/tests/unit/sheep/test_vdi.c
> +++ b/tests/unit/sheep/test_vdi.c
> @@ -17,9 +17,9 @@
>  
>  START_TEST(test_vdi)
>  {
> -	add_vdi_state(1, 1, true, 0);
> -	add_vdi_state(2, 1, true, 0);
> -	add_vdi_state(3, 2, false, 0);
> +	add_vdi_state(1, 1, true, 0, 4194304);
> +	add_vdi_state(2, 1, true, 0, 4194304);
> +	add_vdi_state(3, 2, false, 0, 4194304);
>  
>  	ck_assert_int_eq(get_vdi_copy_number(1), 1);
>  	ck_assert_int_eq(get_vdi_copy_number(2), 1);
> -- 
> 1.7.1
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog



More information about the sheepdog mailing list