[Sheepdog] [RFC PATCH] sheep: introduce sd_op_template

Fri Oct 21 14:13:46 CEST 2011

On 10/21/2011 04:28 PM, MORITA Kazutaka wrote:

> When we want to add a new operation (SD_OP_xxxxx), it is not clear
> which codes we should modify.  And in some cases, we need to modify
> codes everywhere to implement one operation.  This is not a good
> design.
> 
> This patch abstracts out Sheepdog operations into sd_op_template, and
> moves all the request processing codes to sheep/ops.c.
> 
> The definition of sd_op_template is as follows:
> 
> struct sd_op_template {
>         enum sd_op_type type;
> 
>         int available_always;
> 
>         int (*process)(const struct sd_req *req, struct sd_rsp *rsp,
>                        void *data);
>         int (*post_process)(const struct sd_req *req, struct sd_rsp *rsp,
>                             void *data);
> };
> 
> 'type' is the type of the operation; SD_OP_TYPE_CLUSTER,
> SD_OP_TYPE_STORE, or SD_OP_TYPE_IO.
> 
> 'available_always' is set to non-zero if the operations should be
> processed even when the cluster is not working.
> 
> 'process()' and 'post_process()' are the main functions of this
> operation.  process() will be called in the worker thread, and
> post_process() will be called in the main thread.
> 
> If type is SD_OP_TYPE_CLUSTER, it is guaranteed that only one node
> processes a cluster operation at the same time.  We can use this for
> something like distributed locking.  process() will be called on the
> local node, and post_process() will be called on every nodes.
> 
> If type is SD_OP_TYPE_STORE, both process() and post_process() will be
> called on the local node.
> 
> If type is SD_OP_TYPE_IO, neither process() nor post_process() is used
> because this type of operation is heavily intertwined with Sheepdog
> core codes.  We will be unlikely to add new operations of this type.
> 
> Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> ---
>  sheep/Makefile.am  |    2 +-
>  sheep/group.c      |  286 +++--------------------------------
>  sheep/ops.c        |  427 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  sheep/sdnet.c      |  111 ++------------
>  sheep/sheep_priv.h |   48 ++++++-
>  sheep/store.c      |   19 +--
>  6 files changed, 522 insertions(+), 371 deletions(-)
>  create mode 100644 sheep/ops.c
> 
> diff --git a/sheep/Makefile.am b/sheep/Makefile.am
> index 2b9d58f..3db914d 100644
> --- a/sheep/Makefile.am
> +++ b/sheep/Makefile.am
> @@ -23,7 +23,7 @@ INCLUDES		= -I$(top_builddir)/include -I$(top_srcdir)/include $(libcpg_CFLAGS) $
>  
>  sbin_PROGRAMS		= sheep
>  
> -sheep_SOURCES		= sheep.c group.c sdnet.c store.c vdi.c work.c journal.c \
> +sheep_SOURCES		= sheep.c group.c sdnet.c store.c vdi.c work.c journal.c ops.c \
>  			  cluster/corosync.c
>  sheep_LDADD	  	= $(libcpg_LIBS) $(libcfg_LIBS) ../lib/libsheepdog.a -lpthread
>  sheep_DEPENDENCIES	= ../lib/libsheepdog.a
> diff --git a/sheep/group.c b/sheep/group.c
> index e22dabc..8664b6f 100644
> --- a/sheep/group.c
> +++ b/sheep/group.c
> @@ -101,17 +101,7 @@ static size_t get_join_message_size(struct join_message *jm)
>  	return sizeof(*jm) + jm->nr_nodes * sizeof(jm->nodes[0]);
>  }
>  
> -static int get_node_idx(struct sheepdog_node_list_entry *ent,
> -			struct sheepdog_node_list_entry *entries, int nr_nodes)
> -{
> -	ent = bsearch(ent, entries, nr_nodes, sizeof(*ent), node_cmp);
> -	if (!ent)
> -		return -1;
> -
> -	return ent - entries;
> -}
> -
> -static int get_zones_nr_from(struct sheepdog_node_list_entry *nodes, int nr_nodes)
> +int get_zones_nr_from(struct sheepdog_node_list_entry *nodes, int nr_nodes)
>  {
>  	int nr_zones = 0, i, j;
>  	uint32_t zones[SD_MAX_REDUNDANCY];
> @@ -146,116 +136,28 @@ void setup_ordered_sd_vnode_list(struct request *req)
>  	get_ordered_sd_vnode_list(req->entry, &req->nr_vnodes, &req->nr_zones);
>  }
>  
> -static void get_node_list(struct sd_node_req *req,
> -			  struct sd_node_rsp *rsp, void *data)
> +static void do_cluster_op(void *arg)
>  {
> -	int nr_nodes;
> -
> -	nr_nodes = sys->nr_nodes;
> -	memcpy(data, sys->nodes, sizeof(*sys->nodes) * nr_nodes);
> -	rsp->data_length = nr_nodes * sizeof(struct sheepdog_node_list_entry);
> -	rsp->nr_nodes = nr_nodes;
> -	rsp->local_idx = get_node_idx(&sys->this_node, data, nr_nodes);
> -	rsp->master_idx = -1;
> -}
> +	struct vdi_op_message *msg = arg;
> +	int ret;
> +	struct request *req;
>  
> -static int get_epoch(struct sd_obj_req *req,
> -		      struct sd_obj_rsp *rsp, void *data)
> -{
> -	int epoch = req->tgt_epoch;
> -	int len, ret;
> -	dprintf("%d\n", epoch);
> -	len = epoch_log_read(epoch, (char *)data, req->data_length);
> -	if (len == -1) {
> -		ret = SD_RES_NO_TAG;
> -		rsp->data_length = 0;
> -	} else {
> -		ret = SD_RES_SUCCESS;
> -		rsp->data_length = len;
> -	}
> -	return ret;
> -}
> +	req = list_first_entry(&sys->pending_list, struct request, pending_list);
> +	ret = req->op->process((const struct sd_req *)&msg->req,
> +			       (struct sd_rsp *)&msg->rsp, req->data);
>  
> -static void vdi_op(void *arg);
> +	msg->rsp.result = ret;
> +}
>  
>  void cluster_queue_request(struct work *work, int idx)
>  {
>  	struct request *req = container_of(work, struct request, work);
>  	struct sd_req *hdr = (struct sd_req *)&req->rq;
> -	struct sd_rsp *rsp = (struct sd_rsp *)&req->rp;
>  	struct vdi_op_message *msg;
> -	struct epoch_log *log;
> -	int ret = SD_RES_SUCCESS, i, max_logs, epoch;
> -	uint32_t sys_stat = sys_stat_get();
>  	size_t size;
>  
>  	eprintf("%p %x\n", req, hdr->opcode);
>  
> -	switch (hdr->opcode) {
> -	case SD_OP_GET_EPOCH:
> -		ret = get_epoch((struct sd_obj_req *)hdr,
> -			  (struct sd_obj_rsp *)rsp, req->data);
> -		break;
> -	case SD_OP_GET_NODE_LIST:
> -		get_node_list((struct sd_node_req *)hdr,
> -			      (struct sd_node_rsp *)rsp, req->data);
> -		break;
> -	case SD_OP_STAT_CLUSTER:
> -		max_logs = rsp->data_length / sizeof(*log);
> -		epoch = get_latest_epoch();
> -		rsp->data_length = 0;
> -		for (i = 0; i < max_logs; i++) {
> -			if (epoch <= 0)
> -				break;
> -
> -			log = (struct epoch_log *)req->data + i;
> -			log->epoch = epoch;
> -			log->ctime = get_cluster_ctime();
> -			log->nr_nodes = epoch_log_read(epoch, (char *)log->nodes,
> -						       sizeof(log->nodes));
> -			if (log->nr_nodes == -1)
> -				log->nr_nodes = epoch_log_read_remote(epoch,
> -								      (char *)log->nodes,
> -								      sizeof(log->nodes));
> -
> -			rsp->data_length += sizeof(*log);
> -			log->nr_nodes /= sizeof(log->nodes[0]);
> -			epoch--;
> -		}
> -
> -		switch (sys_stat) {
> -		case SD_STATUS_OK:
> -			ret = SD_RES_SUCCESS;
> -			break;
> -		case SD_STATUS_WAIT_FOR_FORMAT:
> -			ret = SD_RES_WAIT_FOR_FORMAT;
> -			break;
> -		case SD_STATUS_WAIT_FOR_JOIN:
> -			ret = SD_RES_WAIT_FOR_JOIN;
> -			break;
> -		case SD_STATUS_SHUTDOWN:
> -			ret = SD_RES_SHUTDOWN;
> -			break;
> -		case SD_STATUS_JOIN_FAILED:
> -			ret = SD_RES_JOIN_FAILED;
> -			break;
> -		case SD_STATUS_HALT:
> -			ret = SD_RES_HALT;
> -			break;
> -		default:
> -			ret = SD_RES_SYSTEM_ERROR;
> -			break;
> -		}
> -		break;
> -	default:
> -		/* forward request to group */
> -		goto forward;
> -	}
> -
> -	rsp->result = ret;
> -	return;
> -
> -forward:
>  	if (hdr->flags & SD_FLAG_CMD_WRITE)
>  		size = sizeof(*msg);
>  	else
> @@ -272,7 +174,12 @@ forward:
>  
>  	list_add(&req->pending_list, &sys->pending_list);
>  
> -	sys->cdrv->notify(msg, size, vdi_op);
> +	if (req->op->process)
> +		sys->cdrv->notify(msg, size, do_cluster_op);
> +	else {
> +		msg->rsp.result = SD_RES_SUCCESS;
> +		sys->cdrv->notify(msg, size, NULL);
> +	}
>  
>  	free(msg);
>  }
> @@ -628,85 +535,6 @@ join_finished:
>  	return;
>  }
>  
> -static void vdi_op(void *arg)
> -{
> -	struct vdi_op_message *msg = arg;
> -	const struct sd_vdi_req *hdr = &msg->req;
> -	struct sd_vdi_rsp *rsp = &msg->rsp;
> -	void *data, *tag;
> -	int ret = SD_RES_SUCCESS;
> -	struct sheepdog_vdi_attr *vattr;
> -	uint32_t vid = 0, attrid = 0, nr_copies = sys->nr_sobjs;
> -	uint64_t ctime = 0;
> -	struct request *req;
> -
> -	req = list_first_entry(&sys->pending_list, struct request, pending_list);
> -	data = req->data;
> -
> -	switch (hdr->opcode) {
> -	case SD_OP_NEW_VDI:
> -		ret = add_vdi(hdr->epoch, data, hdr->data_length, hdr->vdi_size, &vid,
> -			      hdr->base_vdi_id, hdr->copies,
> -			      hdr->snapid, &nr_copies);
> -		break;
> -	case SD_OP_DEL_VDI:
> -		ret = del_vdi(hdr->epoch, data, hdr->data_length, &vid,
> -			      hdr->snapid, &nr_copies);
> -		break;
> -	case SD_OP_LOCK_VDI:
> -	case SD_OP_GET_VDI_INFO:
> -		if (hdr->proto_ver != SD_PROTO_VER) {
> -			ret = SD_RES_VER_MISMATCH;
> -			break;
> -		}
> -		if (hdr->data_length == SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN)
> -			tag = (char *)data + SD_MAX_VDI_LEN;
> -		else if (hdr->data_length == SD_MAX_VDI_LEN)
> -			tag = NULL;
> -		else {
> -			ret = SD_RES_INVALID_PARMS;
> -			break;
> -		}
> -		ret = lookup_vdi(hdr->epoch, data, tag, &vid, hdr->snapid,
> -				 &nr_copies, NULL);
> -		if (ret != SD_RES_SUCCESS)
> -			break;
> -		break;
> -	case SD_OP_GET_VDI_ATTR:
> -		vattr = data;
> -		ret = lookup_vdi(hdr->epoch, vattr->name, vattr->tag,
> -				 &vid, hdr->snapid, &nr_copies, &ctime);
> -		if (ret != SD_RES_SUCCESS)
> -			break;
> -		/* the curernt vdi id can change if we take the snapshot,
> -		   so we use the hash value of the vdi name as the vdi id */
> -		vid = fnv_64a_buf(vattr->name, strlen(vattr->name), FNV1A_64_INIT);
> -		vid &= SD_NR_VDIS - 1;
> -		ret = get_vdi_attr(hdr->epoch, data, hdr->data_length, vid,
> -				   &attrid, nr_copies, ctime,
> -				   hdr->flags & SD_FLAG_CMD_CREAT,
> -				   hdr->flags & SD_FLAG_CMD_EXCL,
> -				   hdr->flags & SD_FLAG_CMD_DEL);
> -		break;
> -	case SD_OP_RELEASE_VDI:
> -		break;
> -	case SD_OP_MAKE_FS:
> -		ret = SD_RES_SUCCESS;
> -		break;
> -	case SD_OP_SHUTDOWN:
> -		break;
> -	default:
> -		ret = SD_RES_SYSTEM_ERROR;
> -		eprintf("opcode %d is not implemented\n", hdr->opcode);
> -		break;
> -	}
> -
> -	rsp->vdi_id = vid;
> -	rsp->attr_id = attrid;
> -	rsp->copies = nr_copies;
> -	rsp->result = ret;
> -}
> -
>  static void __sd_notify(struct cpg_event *cevent)
>  {
>  }
> @@ -715,86 +543,22 @@ static void __sd_notify_done(struct cpg_event *cevent)
>  {
>  	struct work_notify *w = container_of(cevent, struct work_notify, cev);
>  	struct vdi_op_message *msg = (struct vdi_op_message *)w->msg;
> -	const struct sd_vdi_req *hdr = &msg->req;
> -	struct sd_vdi_rsp *rsp = &msg->rsp;
> -	void *data = msg->data;
>  	struct request *req;
>  	int ret = msg->rsp.result;
> -	int i, latest_epoch;
> -	uint64_t ctime;
> -
> -	if (ret != SD_RES_SUCCESS)
> -		goto out;
> -
> -	switch (hdr->opcode) {
> -	case SD_OP_NEW_VDI:
> -	{
> -		unsigned long nr = rsp->vdi_id;
> -		vprintf(SDOG_INFO, "done %d %ld\n", ret, nr);
> -		set_bit(nr, sys->vdi_inuse);
> -		break;
> -	}
> -	case SD_OP_DEL_VDI:
> -		break;
> -	case SD_OP_LOCK_VDI:
> -	case SD_OP_RELEASE_VDI:
> -	case SD_OP_GET_VDI_INFO:
> -	case SD_OP_GET_VDI_ATTR:
> -		break;
> -	case SD_OP_MAKE_FS:
> -		sys->nr_sobjs = ((struct sd_so_req *)hdr)->copies;
> -		sys->flags = ((struct sd_so_req *)hdr)->flags;
> -		if (!sys->nr_sobjs)
> -			sys->nr_sobjs = SD_DEFAULT_REDUNDANCY;
> -
> -		ctime = ((struct sd_so_req *)hdr)->ctime;
> -		set_cluster_ctime(ctime);
> -
> -		latest_epoch = get_latest_epoch();
> -		for (i = 1; i <= latest_epoch; i++)
> -			remove_epoch(i);
> -		memset(sys->vdi_inuse, 0, sizeof(sys->vdi_inuse));
> -
> -		sys->epoch = 1;
> -		sys->recovered_epoch = 1;
> -
> -		dprintf("write epoch log, %d, %d\n", sys->epoch, sys->nr_nodes);
> -		ret = epoch_log_write(sys->epoch, (char *)sys->nodes,
> -				      sys->nr_nodes * sizeof(struct sheepdog_node_list_entry));
> -		if (ret < 0)
> -			eprintf("can't write epoch %u\n", sys->epoch);
> -		update_epoch_store(sys->epoch);
> +	struct sd_op_template *op = get_sd_op(msg->req.opcode);
>  
> -		set_cluster_copies(sys->nr_sobjs);
> -		set_cluster_flags(sys->flags);
> +	if (ret == SD_RES_SUCCESS && op->post_process)
> +		ret = op->post_process((const struct sd_req *)&msg->req,
> +				       (struct sd_rsp *)&msg->rsp, msg->data);
>  
> -		if (sys_flag_nohalt())
> -			sys_stat_set(SD_STATUS_OK);
> -		else {
> -			int nr_zones = get_zones_nr_from(sys->nodes, sys->nr_nodes);
> -
> -			if (nr_zones >= sys->nr_sobjs)
> -				sys_stat_set(SD_STATUS_OK);
> -			else
> -				sys_stat_set(SD_STATUS_HALT);
> -		}
> -		break;
> -	case SD_OP_SHUTDOWN:
> -		sys_stat_set(SD_STATUS_SHUTDOWN);
> -		break;
> -	default:
> -		eprintf("unknown operation %d\n", hdr->opcode);
> -		ret = SD_RES_UNKNOWN;
> -	}
> -out:
>  	if (!is_myself(w->sender.addr, w->sender.port))
>  		return;
>  
>  	req = list_first_entry(&sys->pending_list, struct request, pending_list);
>  
> -	rsp->result = ret;
> -	memcpy(req->data, data, rsp->data_length);
> -	memcpy(&req->rp, rsp, sizeof(req->rp));
> +	msg->rsp.result = ret;
> +	memcpy(req->data, msg->data, msg->rsp.data_length);
> +	memcpy(&req->rp, &msg->rsp, sizeof(req->rp));
>  	list_del(&req->pending_list);
>  	req->done(req);
>  }
> @@ -1226,7 +990,7 @@ do_retry:
>  
>  		list_del(&cevent->cpg_event_list);
>  
> -		if (is_io_request(req->rq.opcode)) {
> +		if (is_io_op(req->op)) {
>  			int copies = sys->nr_sobjs;
>  
>  			if (copies > req->nr_zones)
> @@ -1282,7 +1046,7 @@ do_retry:
>  			}
>  		}
>  
> -		if (is_cluster_request(req->rq.opcode))
> +		if (is_cluster_op(req->op))
>  			queue_work(sys->cpg_wqueue, &req->work);
>  		else if (req->rq.flags & SD_FLAG_CMD_IO_LOCAL)
>  			queue_work(sys->io_wqueue, &req->work);
> diff --git a/sheep/ops.c b/sheep/ops.c
> new file mode 100644
> index 0000000..0d38e7b
> --- /dev/null
> +++ b/sheep/ops.c
> @@ -0,0 +1,427 @@
> +/*
> + * Copyright (C) 2011 Nippon Telegraph and Telephone Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +#include <stdio.h>
> +#include <stdlib.h>
> +
> +#include "sheep_priv.h"
> +
> +static int process_new_vdi(const struct sd_req *req, struct sd_rsp *rsp,
> +			   void *data)
> +{
> +	const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req;
> +	struct sd_vdi_rsp *vdi_rsp = (struct sd_vdi_rsp *)rsp;
> +	uint32_t vid = 0, nr_copies = sys->nr_sobjs;
> +	int ret;
> +
> +	ret = add_vdi(hdr->epoch, data, hdr->data_length, hdr->vdi_size, &vid,
> +		      hdr->base_vdi_id, hdr->copies,
> +		      hdr->snapid, &nr_copies);
> +
> +	vdi_rsp->vdi_id = vid;
> +	vdi_rsp->copies = nr_copies;
> +
> +	return ret;
> +}
> +
> +static int post_process_new_vdi(const struct sd_req *req, struct sd_rsp *rsp,
> +				void *data)
> +{
> +	struct sd_vdi_rsp *vdi_rsp = (struct sd_vdi_rsp *)rsp;
> +	unsigned long nr = vdi_rsp->vdi_id;
> +	int ret = vdi_rsp->result;
> +
> +	vprintf(SDOG_INFO, "done %d %ld\n", ret, nr);
> +	set_bit(nr, sys->vdi_inuse);
> +
> +	return SD_RES_SUCCESS;
> +}
> +
> +static int process_del_vdi(const struct sd_req *req, struct sd_rsp *rsp,
> +			   void *data)
> +{
> +	const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req;
> +	struct sd_vdi_rsp *vdi_rsp = (struct sd_vdi_rsp *)rsp;
> +	uint32_t vid = 0, nr_copies = sys->nr_sobjs;
> +	int ret;
> +
> +	ret = del_vdi(hdr->epoch, data, hdr->data_length, &vid,
> +		      hdr->snapid, &nr_copies);
> +
> +	vdi_rsp->vdi_id = vid;
> +	vdi_rsp->copies = nr_copies;
> +
> +	return ret;
> +}
> +
> +static int process_get_vdi_info(const struct sd_req *req, struct sd_rsp *rsp,
> +				void *data)
> +{
> +	const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req;
> +	struct sd_vdi_rsp *vdi_rsp = (struct sd_vdi_rsp *)rsp;
> +	uint32_t vid = 0, nr_copies = sys->nr_sobjs;
> +	void *tag;
> +	int ret;
> +
> +	if (hdr->proto_ver != SD_PROTO_VER)
> +		return SD_RES_VER_MISMATCH;
> +
> +	if (hdr->data_length == SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN)
> +		tag = (char *)data + SD_MAX_VDI_LEN;
> +	else if (hdr->data_length == SD_MAX_VDI_LEN)
> +		tag = NULL;
> +	else
> +		return SD_RES_INVALID_PARMS;
> +
> +	ret = lookup_vdi(hdr->epoch, data, tag, &vid, hdr->snapid,
> +			 &nr_copies, NULL);
> +	if (ret != SD_RES_SUCCESS)
> +		return ret;
> +
> +	vdi_rsp->vdi_id = vid;
> +	vdi_rsp->copies = nr_copies;
> +
> +	return ret;
> +}
> +
> +static int post_process_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
> +				void *data)
> +{
> +	const struct sd_so_req *hdr = (const struct sd_so_req *)req;
> +	int i, latest_epoch, ret;
> +	uint64_t ctime;
> +
> +	sys->nr_sobjs = hdr->copies;
> +	sys->flags = hdr->flags;
> +	if (!sys->nr_sobjs)
> +		sys->nr_sobjs = SD_DEFAULT_REDUNDANCY;
> +
> +	ctime = hdr->ctime;
> +	set_cluster_ctime(ctime);
> +
> +	latest_epoch = get_latest_epoch();
> +	for (i = 1; i <= latest_epoch; i++)
> +		remove_epoch(i);
> +	memset(sys->vdi_inuse, 0, sizeof(sys->vdi_inuse));
> +
> +	sys->epoch = 1;
> +	sys->recovered_epoch = 1;
> +
> +	dprintf("write epoch log, %d, %d\n", sys->epoch, sys->nr_nodes);
> +	ret = epoch_log_write(sys->epoch, (char *)sys->nodes,
> +			      sys->nr_nodes * sizeof(struct sheepdog_node_list_entry));
> +	if (ret < 0) {
> +		eprintf("can't write epoch %u\n", sys->epoch);
> +		return SD_RES_EIO;
> +	}
> +	update_epoch_store(sys->epoch);
> +
> +	set_cluster_copies(sys->nr_sobjs);
> +	set_cluster_flags(sys->flags);
> +
> +	if (sys_flag_nohalt())
> +		sys_stat_set(SD_STATUS_OK);
> +	else {
> +		int nr_zones = get_zones_nr_from(sys->nodes, sys->nr_nodes);
> +
> +		if (nr_zones >= sys->nr_sobjs)
> +			sys_stat_set(SD_STATUS_OK);
> +		else
> +			sys_stat_set(SD_STATUS_HALT);
> +	}
> +
> +	return SD_RES_SUCCESS;
> +}
> +
> +static int post_process_shutdown(const struct sd_req *req, struct sd_rsp *rsp,
> +				 void *data)
> +{
> +	sys_stat_set(SD_STATUS_SHUTDOWN);
> +
> +	return SD_RES_SUCCESS;
> +}
> +
> +static int process_get_vdi_attr(const struct sd_req *req, struct sd_rsp *rsp,
> +				void *data)
> +{
> +	const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req;
> +	struct sd_vdi_rsp *vdi_rsp = (struct sd_vdi_rsp *)rsp;
> +	uint32_t vid = 0, attrid = 0, nr_copies = sys->nr_sobjs;
> +	uint64_t ctime = 0;
> +	int ret;
> +	struct sheepdog_vdi_attr *vattr;
> +
> +	vattr = data;
> +	ret = lookup_vdi(hdr->epoch, vattr->name, vattr->tag,
> +			 &vid, hdr->snapid, &nr_copies, &ctime);
> +	if (ret != SD_RES_SUCCESS)
> +		return ret;
> +
> +	/* the curernt vdi id can change if we take the snapshot,
> +	   so we use the hash value of the vdi name as the vdi id */
> +	vid = fnv_64a_buf(vattr->name, strlen(vattr->name), FNV1A_64_INIT);
> +	vid &= SD_NR_VDIS - 1;
> +	ret = get_vdi_attr(hdr->epoch, data, hdr->data_length, vid,
> +			   &attrid, nr_copies, ctime,
> +			   hdr->flags & SD_FLAG_CMD_CREAT,
> +			   hdr->flags & SD_FLAG_CMD_EXCL,
> +			   hdr->flags & SD_FLAG_CMD_DEL);
> +
> +	vdi_rsp->vdi_id = vid;
> +	vdi_rsp->attr_id = attrid;
> +	vdi_rsp->copies = nr_copies;
> +
> +	return ret;
> +}
> +
> +static int post_process_read_vdis(const struct sd_req *req, struct sd_rsp *rsp,
> +				  void *data)
> +{
> +	return read_vdis(data, req->data_length, &rsp->data_length);
> +}
> +
> +static int get_node_idx(struct sheepdog_node_list_entry *ent,
> +			struct sheepdog_node_list_entry *entries, int nr_nodes)
> +{
> +	ent = bsearch(ent, entries, nr_nodes, sizeof(*ent), node_cmp);
> +	if (!ent)
> +		return -1;
> +
> +	return ent - entries;
> +}
> +
> +static int post_process_get_node_list(const struct sd_req *req, struct sd_rsp *rsp,
> +				      void *data)
> +{
> +	struct sd_node_rsp *node_rsp = (struct sd_node_rsp *)rsp;
> +	int nr_nodes;
> +
> +	nr_nodes = sys->nr_nodes;
> +	memcpy(data, sys->nodes, sizeof(*sys->nodes) * nr_nodes);
> +	node_rsp->data_length = nr_nodes * sizeof(struct sheepdog_node_list_entry);
> +	node_rsp->nr_nodes = nr_nodes;
> +	node_rsp->local_idx = get_node_idx(&sys->this_node, data, nr_nodes);
> +	node_rsp->master_idx = -1;
> +
> +	return SD_RES_SUCCESS;
> +}
> +
> +static int process_stat_sheep(const struct sd_req *req, struct sd_rsp *rsp,
> +			      void *data)
> +{
> +	struct sd_node_rsp *node_rsp = (struct sd_node_rsp *)rsp;
> +	uint32_t epoch = req->epoch;
> +
> +	return stat_sheep(&node_rsp->store_size, &node_rsp->store_free, epoch);
> +}
> +
> +static int process_stat_cluster(const struct sd_req *req, struct sd_rsp *rsp,
> +				void *data)
> +{
> +	struct epoch_log *log;
> +	int i, max_logs, epoch;
> +	uint32_t sys_stat = sys_stat_get();
> +
> +	max_logs = rsp->data_length / sizeof(*log);
> +	epoch = get_latest_epoch();
> +	rsp->data_length = 0;
> +	for (i = 0; i < max_logs; i++) {
> +		if (epoch <= 0)
> +			break;
> +
> +		log = (struct epoch_log *)data + i;
> +		log->epoch = epoch;
> +		log->ctime = get_cluster_ctime();
> +		log->nr_nodes = epoch_log_read(epoch, (char *)log->nodes,
> +					       sizeof(log->nodes));
> +		if (log->nr_nodes == -1)
> +			log->nr_nodes = epoch_log_read_remote(epoch,
> +							      (char *)log->nodes,
> +							      sizeof(log->nodes));
> +
> +		rsp->data_length += sizeof(*log);
> +		log->nr_nodes /= sizeof(log->nodes[0]);
> +		epoch--;
> +	}
> +
> +	switch (sys_stat) {
> +	case SD_STATUS_OK:
> +		return SD_RES_SUCCESS;
> +	case SD_STATUS_WAIT_FOR_FORMAT:
> +		return SD_RES_WAIT_FOR_FORMAT;
> +	case SD_STATUS_WAIT_FOR_JOIN:
> +		return SD_RES_WAIT_FOR_JOIN;
> +	case SD_STATUS_SHUTDOWN:
> +		return SD_RES_SHUTDOWN;
> +	case SD_STATUS_JOIN_FAILED:
> +		return SD_RES_JOIN_FAILED;
> +	case SD_STATUS_HALT:
> +		return SD_RES_HALT;
> +	default:
> +		return SD_RES_SYSTEM_ERROR;
> +	}
> +}
> +
> +static int process_kill_node(const struct sd_req *req, struct sd_rsp *rsp,
> +			     void *data)
> +{
> +	exit(1);
> +}
> +
> +static int process_get_obj_list(const struct sd_req *req, struct sd_rsp *rsp,
> +				void *data)
> +{
> +	return get_obj_list((const struct sd_list_req *)req,
> +			    (struct sd_list_rsp *)rsp, data);
> +}
> +
> +static int process_get_epoch(const struct sd_req *req, struct sd_rsp *rsp,
> +			     void *data)
> +{
> +	const struct sd_obj_req *obj_req = (const struct sd_obj_req *)req;
> +	struct sd_obj_rsp *obj_rsp = (struct sd_obj_rsp *)rsp;
> +	int epoch = obj_req->tgt_epoch;
> +	int len, ret;
> +	dprintf("%d\n", epoch);
> +	len = epoch_log_read(epoch, (char *)data, obj_req->data_length);
> +	if (len == -1) {
> +		ret = SD_RES_NO_TAG;
> +		obj_rsp->data_length = 0;
> +	} else {
> +		ret = SD_RES_SUCCESS;
> +		obj_rsp->data_length = len;
> +	}
> +	return ret;
> +}
> +
> +static struct sd_op_template sd_ops[] = {
> +
> +	/* cluster operations */
> +	[SD_OP_NEW_VDI] = {
> +		.type = SD_OP_TYPE_CLUSTER,
> +		.process = process_new_vdi,
> +		.post_process = post_process_new_vdi,
> +	},
> +
> +	[SD_OP_DEL_VDI] = {
> +		.type = SD_OP_TYPE_CLUSTER,
> +		.process = process_del_vdi,
> +	},
> +
> +	[SD_OP_GET_VDI_INFO] = {
> +		.type = SD_OP_TYPE_CLUSTER,
> +		.process = process_get_vdi_info,
> +	},
> +
> +	[SD_OP_LOCK_VDI] = {
> +		.type = SD_OP_TYPE_CLUSTER,
> +		.process = process_get_vdi_info,
> +	},
> +
> +	[SD_OP_MAKE_FS] = {
> +		.type = SD_OP_TYPE_CLUSTER,
> +		.available_always = 1,
> +		.post_process = post_process_make_fs,
> +	},
> +
> +	[SD_OP_SHUTDOWN] = {
> +		.type = SD_OP_TYPE_CLUSTER,
> +		.post_process = post_process_shutdown,
> +	},
> +
> +	[SD_OP_GET_VDI_ATTR] = {
> +		.type = SD_OP_TYPE_CLUSTER,
> +		.process = process_get_vdi_attr,
> +	},
> +
> +	[SD_OP_RELEASE_VDI] = {
> +		.type = SD_OP_TYPE_CLUSTER,
> +	},
> +

How about replace process/post_process prefix with 'cluster/local/io' to
differentiate those static functions internally? since we don't
reference them outside ops.c

Thanks,
YUan