[sheepdog] [PATCH V3] collie: optimize 'collie vdi check' command

Yunkai Zhang yunkai.me at gmail.com
Tue Aug 28 16:01:44 CEST 2012


On Tue, Aug 28, 2012 at 8:44 PM, Liu Yuan <namei.unix at gmail.com> wrote:
> On 08/28/2012 07:42 PM, Yunkai Zhang wrote:
>>
>> User can do force repair by specify -R or --repair flag:
>> * Force repair:
>> $ collie vdi check -R test.img
>> CHECKING VDI:test.img ...
>> Failed oid: 9c5e6800000001
>>>> copy[0], sha1: c78ca69c4be7401b6d1f11a37a4cec4226e736cd, from: 127.0.0.1:7000
>>>> copy[1], sha1: 46dbc769de60a508faf134c6d51926741c0e38fa, from: 127.0.0.1:7001
>>>> copy[2], sha1: c78ca69c4be7401b6d1f11a37a4cec4226e736cd, from: 127.0.0.1:7004
>>>> Repaired successfully!
>> REPAIRED
>
> I suspect that user might want to fix the consistency with 'check command' running once like fsck,
> so maybe --no-repair as an option is better.

I still insist on using --repair, 'collie vdi check' doesn't indicate
repair by its literal meaning. If you want to make repair as default
action, change its name to 'collie vdi repair', and add '--only-check'
option, will be better.

>
>>
>> diff --git a/collie/common.c b/collie/common.c
>> index ce8dcf7..dd54ae4 100644
>> --- a/collie/common.c
>> +++ b/collie/common.c
>> @@ -214,8 +214,7 @@ int send_light_req_get_response(struct sd_req *hdr, const char *host, int port)
>>       ret = exec_req(fd, hdr, NULL, &wlen, &rlen);
>>       close(fd);
>>       if (ret) {
>> -             fprintf(stderr, "failed to connect to  %s:%d\n",
>> -                     host, port);
>> +             dprintf("failed to connect to  %s:%d\n", host, port);
>
> This is used only by collie, so fpritnf(stderr...) is better than dprintf, which print out useless
> line number.

It's my typo, sorry.

>
>>               return -1;
>>       }
>>
>> @@ -236,8 +235,7 @@ int send_light_req(struct sd_req *hdr, const char *host, int port)
>>               return -1;
>>
>>       if (ret != SD_RES_SUCCESS) {
>> -             fprintf(stderr, "Response's result: %s\n",
>> -                     sd_strerror(ret));
>> +             dprintf("Response's result: %s\n", sd_strerror(ret));
>
> ditto.
>
>> diff --git a/include/internal_proto.h b/include/internal_proto.h
>> index c1d116a..96fee75 100644
>> --- a/include/internal_proto.h
>> +++ b/include/internal_proto.h
>> @@ -61,10 +61,14 @@
>>  #define SD_OP_REMOVE_PEER    0xA6
>>  #define SD_OP_SET_CACHE_SIZE 0xA7
>>  #define SD_OP_ENABLE_RECOVER 0xA8
>> -#define SD_OP_DISABLE_RECOVER 0xA9
>> -#define SD_OP_INFO_RECOVER 0xAA
>> -#define SD_OP_GET_VDI_COPIES 0xAB
>> +#define SD_OP_DISABLE_RECOVER  0xA9
>> +#define SD_OP_INFO_RECOVER     0xAA
>> +#define SD_OP_GET_VDI_COPIES   0xAB
>>  #define SD_OP_COMPLETE_RECOVERY 0xAC
>> +#define SD_OP_CALC_CHKSUM      0xAD
>> +#define SD_OP_CALC_CHKSUM_PEER 0xAE
>> +#define SD_OP_REPAIR_OBJ       0xAF
>> +#define SD_OP_REPAIR_OBJ_PEER  0xB0
>>
>>  /* internal flags for hdr.flags, must be above 0x80 */
>>  #define SD_FLAG_CMD_RECOVERY 0x0080
>> @@ -169,6 +173,20 @@ struct sd_node_rsp {
>>       uint64_t        store_free;
>>  };
>>
>> +struct sd_checksum_rsp {
>> +     uint8_t         proto_ver;
>> +     uint8_t         opcode;
>> +     uint16_t        flags;
>> +     uint32_t        epoch;
>> +     uint32_t        id;
>> +     uint32_t        data_length;
>> +     uint32_t        result;
>> +     union {
>> +             uint8_t         sha1[SHA1_LEN];
>> +             uint32_t        __pad[7];
>> +     };
>> +};
>> +
>>  struct node_id {
>>       uint8_t addr[16];
>>       uint16_t port;
>> diff --git a/include/sheep.h b/include/sheep.h
>> index fbbab85..a97cb43 100644
>> --- a/include/sheep.h
>> +++ b/include/sheep.h
>> @@ -284,4 +284,20 @@ static inline int nodes_to_vnodes(struct sd_node *nodes, int nr,
>>       return nr_vnodes;
>>  }
>>
>> +static inline char *sha1_to_hex(const unsigned char *sha1)
>> +{
>> +     static char buffer[50];
>> +     static const char hex[] = "0123456789abcdef";
>> +     char *buf = buffer;
>> +     int i;
>> +
>> +     for (i = 0; i < SHA1_LEN; i++) {
>> +             unsigned int val = *sha1++;
>> +             *buf++ = hex[val >> 4];
>> +             *buf++ = hex[val & 0xf];
>> +     }
>> +     buffer[2 * SHA1_LEN] = 0;
>> +     return buffer;
>> +}
>> +
>
> We can't move it out because both buffer and hex is static variable. And we'd better to keep
> it in sha1_file.c and export it out as a function.

Ok.

>
>>  #endif
>> diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h
>> index 1a0081e..da79159 100644
>> --- a/sheep/farm/farm.h
>> +++ b/sheep/farm/farm.h
>> @@ -46,7 +46,6 @@ extern char farm_obj_dir[PATH_MAX];
>>  extern char *sha1_to_path(const unsigned char *sha1);
>>  extern int sha1_file_write(unsigned char *buf, unsigned len, unsigned char *);
>>  extern void *sha1_file_read(const unsigned char *sha1, struct sha1_file_hdr *);
>> -extern char *sha1_to_hex(const unsigned char *sha1);
>>  extern int get_sha1_hex(const char *hex, unsigned char *sha1);
>>  extern int sha1_file_try_delete(const unsigned char *sha1);
>>
>> diff --git a/sheep/farm/sha1_file.c b/sheep/farm/sha1_file.c
>> index b0abc16..3cd4f40 100644
>> --- a/sheep/farm/sha1_file.c
>> +++ b/sheep/farm/sha1_file.c
>> @@ -257,18 +257,3 @@ int get_sha1_hex(const char *hex, unsigned char *sha1)
>>       }
>>       return 0;
>>  }
>> -
>> -char *sha1_to_hex(const unsigned char *sha1)
>> -{
>> -     static char buffer[50];
>> -     static const char hex[] = "0123456789abcdef";
>> -     char *buf = buffer;
>> -     int i;
>> -
>> -     for (i = 0; i < SHA1_LEN; i++) {
>> -             unsigned int val = *sha1++;
>> -             *buf++ = hex[val >> 4];
>> -             *buf++ = hex[val & 0xf];
>> -     }
>> -     return buffer;
>> -}
>> diff --git a/sheep/gateway.c b/sheep/gateway.c
>> index 41d712b..528c912 100644
>> --- a/sheep/gateway.c
>> +++ b/sheep/gateway.c
>> @@ -316,3 +316,119 @@ int gateway_remove_obj(struct request *req)
>>  {
>>       return gateway_forward_request(req);
>>  }
>> +
>> +int gateway_calc_obj_chksum(struct request *req)
>> +{
>> +     struct sd_req fwdhdr, *hdr = &req->rq;
>> +     struct sd_checksum_rsp *rsp;
>> +     struct sd_vnode *vnodes[SD_MAX_COPIES];
>> +     struct node_id *nid;
>> +     char host[128];
>> +     unsigned int rlen, wlen;
>> +     uint64_t oid = hdr->obj.oid;
>> +     int fd, ret, nr_objs;
>> +     int i = 0, offset = 0;
>> +
>> +     nr_objs = get_obj_copy_number(oid, req->vinfo->nr_zones);
>> +
>> +     oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes,
>> +                   oid, nr_objs, vnodes);
>> +
>> +loop:
>> +     nid = &vnodes[i]->nid;
>> +     addr_to_str(host, sizeof(host), nid->addr, 0);
>> +
>> +     fd = connect_to(host, nid->port);
>> +     if (fd < 0) {
>> +             dprintf("Failed to connect\n");
>> +             return SD_RES_EIO;
>> +     }
>> +
>> +     rlen = 0;
>> +     wlen = 0;
>> +
>> +     memcpy(&fwdhdr, hdr, sizeof(fwdhdr));
>> +     fwdhdr.opcode = gateway_to_peer_opcode(hdr->opcode);
>> +     fwdhdr.flags = 0;
>> +     fwdhdr.data_length = 0;
>> +
>> +     rsp = (struct sd_checksum_rsp *)&fwdhdr;
>> +     ret = exec_req(fd, &fwdhdr, NULL, &wlen, &rlen);
>> +     close(fd);
>> +     if (ret)
>> +             return SD_RES_EIO;
>> +
>> +     if (rsp->result == SD_RES_SUCCESS)
>> +             memcpy((char *)req->data + offset, rsp->sha1, SHA1_LEN);
>> +     else if (rsp->result == SD_RES_NO_OBJ)
>> +             memset((char *)req->data + offset, 0, SHA1_LEN);
>> +     else
>> +             return rsp->result;
>> +
>> +     memcpy((char *)req->data + offset + SHA1_LEN, nid, sizeof(*nid));
>> +     offset += SHA1_LEN + sizeof(*nid);
>> +
>> +     if (++i < nr_objs)
>> +             goto loop;
>> +
>> +     req->rp.obj.copies = nr_objs;
>> +     req->rp.data_length = offset;
>> +
>> +     return SD_RES_SUCCESS;
>> +}
>> +
>> +int gateway_repair_obj(struct request *req)
>> +{
>> +     struct sd_req fwdhdr, *hdr = &req->rq;
>> +     struct sd_rsp *rsp = (struct sd_rsp *)&fwdhdr;
>> +     struct node_id *src, *dest;
>> +     struct sd_vnode *vnodes[SD_MAX_COPIES];
>> +     char host[128], to[128];
>> +     uint64_t oid = hdr->obj.oid;
>> +     unsigned rlen, wlen;
>> +     int i, fd, ret, err_ret, nr_objs;
>> +
>> +     nr_objs = get_obj_copy_number(oid, req->vinfo->nr_zones);
>> +
>> +     oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes,
>> +                   oid, nr_objs, vnodes);
>> +
>> +     i = 1;
>> +     src = &vnodes[0]->nid;
>> +     err_ret = SD_RES_SUCCESS;
>> +loop:
>> +     dest = &vnodes[i]->nid;
>> +     addr_to_str(host, sizeof(host), dest->addr, 0);
>> +
>> +     fd = connect_to(host, dest->port);
>> +     if (fd < 0) {
>> +             dprintf("Failed to connect\n");
>> +             return SD_RES_EIO;
>> +     }
>> +
>> +     rlen = 0;
>> +     wlen = sizeof(*src);
>> +
>> +     memcpy(&fwdhdr, hdr, sizeof(fwdhdr));
>> +     fwdhdr.opcode = gateway_to_peer_opcode(hdr->opcode);
>> +     fwdhdr.flags = 0;
>> +     fwdhdr.data_length = wlen;
>> +     fwdhdr.flags = SD_FLAG_CMD_WRITE;
>> +
>> +     ret = exec_req(fd, &fwdhdr, src, &wlen, &rlen);
>> +     close(fd);
>> +     if (ret)
>> +             return SD_RES_EIO;
>> +
>> +     if (rsp->result != SD_RES_SUCCESS) {
>> +             dprintf("oid:%"PRIx64", from:%s to:%s\n", oid,
>> +                     addr_to_str(host, sizeof(host), src->addr, src->port),
>> +                     addr_to_str(to, sizeof(to), dest->addr, dest->port));
>> +             err_ret = rsp->result;
>> +     }
>> +
>> +     if (++i < nr_objs)
>> +             goto loop;
>> +
>> +     return err_ret;
>> +}
>
> I think both two functions should use sockfd and we might actually share the forward_gateway_request()

Oh, I forgot to use sockfd.

I also wanted to share forward_gateway_request, but I finally gave up.

I'll try again, thank you.


> by modifying its interface,
>
> forward_gateway_request(req, func_pointer)
>
> thus we can pass wait_forward_request() to it for write/remove/create_and_write/repair, and add a new
> function for cacl_object_chksum. In this way, we can boost requests handling in parallel.
>
>> diff --git a/sheep/ops.c b/sheep/ops.c
>> index ccb1c5e..3225c59 100644
>> --- a/sheep/ops.c
>> +++ b/sheep/ops.c
>> @@ -26,6 +26,7 @@
>>  #include "strbuf.h"
>>  #include "trace/trace.h"
>>  #include "util.h"
>> +#include "sha1.h"
>>
>>  enum sd_op_type {
>>       SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
>> @@ -219,7 +220,6 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
>>       int i, ret;
>>       uint32_t latest_epoch;
>>       uint64_t created_time;
>> -     struct siocb iocb = { 0 };
>>       struct store_driver *driver;
>>
>>       driver = find_store_driver(data);
>> @@ -228,7 +228,6 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
>>
>>       sd_store = driver;
>>       latest_epoch = get_latest_epoch();
>> -     iocb.epoch = latest_epoch;
>>
>>       ret = sd_store->format(data);
>>       if (ret != SD_RES_SUCCESS)
>> @@ -688,6 +687,51 @@ static int local_kill_node(const struct sd_req *req, struct sd_rsp *rsp,
>>       return SD_RES_SUCCESS;
>>  }
>>
>> +static inline int read_object_from(struct node_id *src, uint32_t epoch,
>> +                                uint64_t oid, char *buf)
>> +{
>> +     struct sd_req hdr;
>> +     struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
>> +     int fd, ret;
>> +     unsigned wlen, rlen;
>> +     char host[128];
>> +
>> +     addr_to_str(host, sizeof(host), src->addr, 0);
>> +
>> +     fd = connect_to(host, src->port);
>> +     if (fd < 0) {
>> +             dprintf("failed to connect to %s:%"PRIu32"\n",
>> +                     host, src->port);
>> +             return SD_RES_NETWORK_ERROR;
>> +     }
>> +
>
> use sockfd cache.
>
> Thanks,
> Yuan



-- 
Yunkai Zhang
Work at Taobao



More information about the sheepdog mailing list