[sheepdog] [PATCH V3] collie: optimize 'collie vdi check' command
Liu Yuan
namei.unix at gmail.com
Tue Aug 28 14:44:41 CEST 2012
On 08/28/2012 07:42 PM, Yunkai Zhang wrote:
>
> User can do force repair by specify -R or --repair flag:
> * Force repair:
> $ collie vdi check -R test.img
> CHECKING VDI:test.img ...
> Failed oid: 9c5e6800000001
>>> copy[0], sha1: c78ca69c4be7401b6d1f11a37a4cec4226e736cd, from: 127.0.0.1:7000
>>> copy[1], sha1: 46dbc769de60a508faf134c6d51926741c0e38fa, from: 127.0.0.1:7001
>>> copy[2], sha1: c78ca69c4be7401b6d1f11a37a4cec4226e736cd, from: 127.0.0.1:7004
>>> Repaired successfully!
> REPAIRED
I suspect that user might want to fix the consistency with 'check command' running once like fsck,
so maybe --no-repair as an option is better.
>
> diff --git a/collie/common.c b/collie/common.c
> index ce8dcf7..dd54ae4 100644
> --- a/collie/common.c
> +++ b/collie/common.c
> @@ -214,8 +214,7 @@ int send_light_req_get_response(struct sd_req *hdr, const char *host, int port)
> ret = exec_req(fd, hdr, NULL, &wlen, &rlen);
> close(fd);
> if (ret) {
> - fprintf(stderr, "failed to connect to %s:%d\n",
> - host, port);
> + dprintf("failed to connect to %s:%d\n", host, port);
This is used only by collie, so fpritnf(stderr...) is better than dprintf, which print out useless
line number.
> return -1;
> }
>
> @@ -236,8 +235,7 @@ int send_light_req(struct sd_req *hdr, const char *host, int port)
> return -1;
>
> if (ret != SD_RES_SUCCESS) {
> - fprintf(stderr, "Response's result: %s\n",
> - sd_strerror(ret));
> + dprintf("Response's result: %s\n", sd_strerror(ret));
ditto.
> diff --git a/include/internal_proto.h b/include/internal_proto.h
> index c1d116a..96fee75 100644
> --- a/include/internal_proto.h
> +++ b/include/internal_proto.h
> @@ -61,10 +61,14 @@
> #define SD_OP_REMOVE_PEER 0xA6
> #define SD_OP_SET_CACHE_SIZE 0xA7
> #define SD_OP_ENABLE_RECOVER 0xA8
> -#define SD_OP_DISABLE_RECOVER 0xA9
> -#define SD_OP_INFO_RECOVER 0xAA
> -#define SD_OP_GET_VDI_COPIES 0xAB
> +#define SD_OP_DISABLE_RECOVER 0xA9
> +#define SD_OP_INFO_RECOVER 0xAA
> +#define SD_OP_GET_VDI_COPIES 0xAB
> #define SD_OP_COMPLETE_RECOVERY 0xAC
> +#define SD_OP_CALC_CHKSUM 0xAD
> +#define SD_OP_CALC_CHKSUM_PEER 0xAE
> +#define SD_OP_REPAIR_OBJ 0xAF
> +#define SD_OP_REPAIR_OBJ_PEER 0xB0
>
> /* internal flags for hdr.flags, must be above 0x80 */
> #define SD_FLAG_CMD_RECOVERY 0x0080
> @@ -169,6 +173,20 @@ struct sd_node_rsp {
> uint64_t store_free;
> };
>
> +struct sd_checksum_rsp {
> + uint8_t proto_ver;
> + uint8_t opcode;
> + uint16_t flags;
> + uint32_t epoch;
> + uint32_t id;
> + uint32_t data_length;
> + uint32_t result;
> + union {
> + uint8_t sha1[SHA1_LEN];
> + uint32_t __pad[7];
> + };
> +};
> +
> struct node_id {
> uint8_t addr[16];
> uint16_t port;
> diff --git a/include/sheep.h b/include/sheep.h
> index fbbab85..a97cb43 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -284,4 +284,20 @@ static inline int nodes_to_vnodes(struct sd_node *nodes, int nr,
> return nr_vnodes;
> }
>
> +static inline char *sha1_to_hex(const unsigned char *sha1)
> +{
> + static char buffer[50];
> + static const char hex[] = "0123456789abcdef";
> + char *buf = buffer;
> + int i;
> +
> + for (i = 0; i < SHA1_LEN; i++) {
> + unsigned int val = *sha1++;
> + *buf++ = hex[val >> 4];
> + *buf++ = hex[val & 0xf];
> + }
> + buffer[2 * SHA1_LEN] = 0;
> + return buffer;
> +}
> +
We can't move it out because both buffer and hex is static variable. And we'd better to keep
it in sha1_file.c and export it out as a function.
> #endif
> diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h
> index 1a0081e..da79159 100644
> --- a/sheep/farm/farm.h
> +++ b/sheep/farm/farm.h
> @@ -46,7 +46,6 @@ extern char farm_obj_dir[PATH_MAX];
> extern char *sha1_to_path(const unsigned char *sha1);
> extern int sha1_file_write(unsigned char *buf, unsigned len, unsigned char *);
> extern void *sha1_file_read(const unsigned char *sha1, struct sha1_file_hdr *);
> -extern char *sha1_to_hex(const unsigned char *sha1);
> extern int get_sha1_hex(const char *hex, unsigned char *sha1);
> extern int sha1_file_try_delete(const unsigned char *sha1);
>
> diff --git a/sheep/farm/sha1_file.c b/sheep/farm/sha1_file.c
> index b0abc16..3cd4f40 100644
> --- a/sheep/farm/sha1_file.c
> +++ b/sheep/farm/sha1_file.c
> @@ -257,18 +257,3 @@ int get_sha1_hex(const char *hex, unsigned char *sha1)
> }
> return 0;
> }
> -
> -char *sha1_to_hex(const unsigned char *sha1)
> -{
> - static char buffer[50];
> - static const char hex[] = "0123456789abcdef";
> - char *buf = buffer;
> - int i;
> -
> - for (i = 0; i < SHA1_LEN; i++) {
> - unsigned int val = *sha1++;
> - *buf++ = hex[val >> 4];
> - *buf++ = hex[val & 0xf];
> - }
> - return buffer;
> -}
> diff --git a/sheep/gateway.c b/sheep/gateway.c
> index 41d712b..528c912 100644
> --- a/sheep/gateway.c
> +++ b/sheep/gateway.c
> @@ -316,3 +316,119 @@ int gateway_remove_obj(struct request *req)
> {
> return gateway_forward_request(req);
> }
> +
> +int gateway_calc_obj_chksum(struct request *req)
> +{
> + struct sd_req fwdhdr, *hdr = &req->rq;
> + struct sd_checksum_rsp *rsp;
> + struct sd_vnode *vnodes[SD_MAX_COPIES];
> + struct node_id *nid;
> + char host[128];
> + unsigned int rlen, wlen;
> + uint64_t oid = hdr->obj.oid;
> + int fd, ret, nr_objs;
> + int i = 0, offset = 0;
> +
> + nr_objs = get_obj_copy_number(oid, req->vinfo->nr_zones);
> +
> + oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes,
> + oid, nr_objs, vnodes);
> +
> +loop:
> + nid = &vnodes[i]->nid;
> + addr_to_str(host, sizeof(host), nid->addr, 0);
> +
> + fd = connect_to(host, nid->port);
> + if (fd < 0) {
> + dprintf("Failed to connect\n");
> + return SD_RES_EIO;
> + }
> +
> + rlen = 0;
> + wlen = 0;
> +
> + memcpy(&fwdhdr, hdr, sizeof(fwdhdr));
> + fwdhdr.opcode = gateway_to_peer_opcode(hdr->opcode);
> + fwdhdr.flags = 0;
> + fwdhdr.data_length = 0;
> +
> + rsp = (struct sd_checksum_rsp *)&fwdhdr;
> + ret = exec_req(fd, &fwdhdr, NULL, &wlen, &rlen);
> + close(fd);
> + if (ret)
> + return SD_RES_EIO;
> +
> + if (rsp->result == SD_RES_SUCCESS)
> + memcpy((char *)req->data + offset, rsp->sha1, SHA1_LEN);
> + else if (rsp->result == SD_RES_NO_OBJ)
> + memset((char *)req->data + offset, 0, SHA1_LEN);
> + else
> + return rsp->result;
> +
> + memcpy((char *)req->data + offset + SHA1_LEN, nid, sizeof(*nid));
> + offset += SHA1_LEN + sizeof(*nid);
> +
> + if (++i < nr_objs)
> + goto loop;
> +
> + req->rp.obj.copies = nr_objs;
> + req->rp.data_length = offset;
> +
> + return SD_RES_SUCCESS;
> +}
> +
> +int gateway_repair_obj(struct request *req)
> +{
> + struct sd_req fwdhdr, *hdr = &req->rq;
> + struct sd_rsp *rsp = (struct sd_rsp *)&fwdhdr;
> + struct node_id *src, *dest;
> + struct sd_vnode *vnodes[SD_MAX_COPIES];
> + char host[128], to[128];
> + uint64_t oid = hdr->obj.oid;
> + unsigned rlen, wlen;
> + int i, fd, ret, err_ret, nr_objs;
> +
> + nr_objs = get_obj_copy_number(oid, req->vinfo->nr_zones);
> +
> + oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes,
> + oid, nr_objs, vnodes);
> +
> + i = 1;
> + src = &vnodes[0]->nid;
> + err_ret = SD_RES_SUCCESS;
> +loop:
> + dest = &vnodes[i]->nid;
> + addr_to_str(host, sizeof(host), dest->addr, 0);
> +
> + fd = connect_to(host, dest->port);
> + if (fd < 0) {
> + dprintf("Failed to connect\n");
> + return SD_RES_EIO;
> + }
> +
> + rlen = 0;
> + wlen = sizeof(*src);
> +
> + memcpy(&fwdhdr, hdr, sizeof(fwdhdr));
> + fwdhdr.opcode = gateway_to_peer_opcode(hdr->opcode);
> + fwdhdr.flags = 0;
> + fwdhdr.data_length = wlen;
> + fwdhdr.flags = SD_FLAG_CMD_WRITE;
> +
> + ret = exec_req(fd, &fwdhdr, src, &wlen, &rlen);
> + close(fd);
> + if (ret)
> + return SD_RES_EIO;
> +
> + if (rsp->result != SD_RES_SUCCESS) {
> + dprintf("oid:%"PRIx64", from:%s to:%s\n", oid,
> + addr_to_str(host, sizeof(host), src->addr, src->port),
> + addr_to_str(to, sizeof(to), dest->addr, dest->port));
> + err_ret = rsp->result;
> + }
> +
> + if (++i < nr_objs)
> + goto loop;
> +
> + return err_ret;
> +}
I think both two functions should use sockfd and we might actually share the forward_gateway_request()
by modifying its interface,
forward_gateway_request(req, func_pointer)
thus we can pass wait_forward_request() to it for write/remove/create_and_write/repair, and add a new
function for cacl_object_chksum. In this way, we can boost requests handling in parallel.
> diff --git a/sheep/ops.c b/sheep/ops.c
> index ccb1c5e..3225c59 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -26,6 +26,7 @@
> #include "strbuf.h"
> #include "trace/trace.h"
> #include "util.h"
> +#include "sha1.h"
>
> enum sd_op_type {
> SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
> @@ -219,7 +220,6 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
> int i, ret;
> uint32_t latest_epoch;
> uint64_t created_time;
> - struct siocb iocb = { 0 };
> struct store_driver *driver;
>
> driver = find_store_driver(data);
> @@ -228,7 +228,6 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
>
> sd_store = driver;
> latest_epoch = get_latest_epoch();
> - iocb.epoch = latest_epoch;
>
> ret = sd_store->format(data);
> if (ret != SD_RES_SUCCESS)
> @@ -688,6 +687,51 @@ static int local_kill_node(const struct sd_req *req, struct sd_rsp *rsp,
> return SD_RES_SUCCESS;
> }
>
> +static inline int read_object_from(struct node_id *src, uint32_t epoch,
> + uint64_t oid, char *buf)
> +{
> + struct sd_req hdr;
> + struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> + int fd, ret;
> + unsigned wlen, rlen;
> + char host[128];
> +
> + addr_to_str(host, sizeof(host), src->addr, 0);
> +
> + fd = connect_to(host, src->port);
> + if (fd < 0) {
> + dprintf("failed to connect to %s:%"PRIu32"\n",
> + host, src->port);
> + return SD_RES_NETWORK_ERROR;
> + }
> +
use sockfd cache.
Thanks,
Yuan
More information about the sheepdog
mailing list