[sheepdog] [PATCH 3/3] collie: retry read for eagain
Liu Yuan
namei.unix at gmail.com
Thu Jan 17 16:30:30 CET 2013
From: Liu Yuan <tailai.ly at taobao.com>
Collie should retry read for eagain to fix the false-timeout case.
This fix the 035 failure on collie side.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
collie/cluster.c | 10 +++++-----
collie/collie.c | 2 +-
collie/collie.h | 1 +
collie/common.c | 13 +++++++++----
collie/debug.c | 2 +-
collie/node.c | 2 +-
collie/vdi.c | 16 ++++++++--------
include/net.h | 1 +
lib/net.c | 23 ++++++++++++++++++-----
9 files changed, 45 insertions(+), 25 deletions(-)
diff --git a/collie/cluster.c b/collie/cluster.c
index 917e190..10ced3b 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -53,7 +53,7 @@ static int list_store(void)
sd_init_req(&hdr, SD_OP_GET_STORE_LIST);
hdr.data_length = 512;
- ret = exec_req(fd, &hdr, buf);
+ ret = collie_exec_req(fd, &hdr, buf);
close(fd);
if (ret) {
@@ -98,7 +98,7 @@ static int cluster_format(int argc, char **argv)
sd_init_req((struct sd_req *)&hdr, SD_OP_READ_VDIS);
hdr.data_length = sizeof(vdi_inuse);
- ret = exec_req(fd, (struct sd_req *)&hdr, &vdi_inuse);
+ ret = collie_exec_req(fd, (struct sd_req *)&hdr, &vdi_inuse);
if (ret < 0) {
fprintf(stderr, "Failed to read VDIs from %s:%d\n",
sdhost, sdport);
@@ -144,7 +144,7 @@ static int cluster_format(int argc, char **argv)
hdr.flags |= SD_FLAG_CMD_WRITE;
printf("using backend %s store\n", store_name);
- ret = exec_req(fd, (struct sd_req *)&hdr, store_name);
+ ret = collie_exec_req(fd, (struct sd_req *)&hdr, store_name);
close(fd);
if (ret) {
@@ -194,7 +194,7 @@ again:
sd_init_req(&hdr, SD_OP_STAT_CLUSTER);
hdr.data_length = log_length;
- ret = exec_req(fd, &hdr, logs);
+ ret = collie_exec_req(fd, &hdr, logs);
close(fd);
if (ret != 0)
@@ -311,7 +311,7 @@ static int list_snap(void)
sd_init_req(&hdr, SD_OP_GET_SNAP_FILE);
hdr.data_length = SD_DATA_OBJ_SIZE;
- ret = exec_req(fd, &hdr, buf);
+ ret = collie_exec_req(fd, &hdr, buf);
close(fd);
if (ret) {
diff --git a/collie/collie.c b/collie/collie.c
index e739662..a533141 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -68,7 +68,7 @@ static int update_node_list(int max_nodes, uint32_t epoch)
hdr.data_length = size;
- ret = exec_req(fd, (struct sd_req *)&hdr, buf);
+ ret = collie_exec_req(fd, (struct sd_req *)&hdr, buf);
if (ret) {
ret = -1;
goto out;
diff --git a/collie/collie.h b/collie/collie.h
index 1b00e68..7e9b6be 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -73,6 +73,7 @@ int sd_write_object(uint64_t oid, uint64_t cow_oid, void *data,
int copies, bool create, bool direct);
int send_light_req(struct sd_req *hdr, const char *host, int port);
int send_light_req_get_response(struct sd_req *hdr, const char *host, int port);
+int collie_exec_req(int sockfd, struct sd_req *hdr, void *data);
extern struct command vdi_command;
extern struct command node_command;
diff --git a/collie/common.c b/collie/common.c
index 5343ffa..e709964 100644
--- a/collie/common.c
+++ b/collie/common.c
@@ -63,7 +63,7 @@ int sd_read_object(uint64_t oid, void *data, unsigned int datalen,
if (direct)
hdr.flags |= SD_FLAG_CMD_DIRECT;
- ret = exec_req(fd, &hdr, data);
+ ret = collie_exec_req(fd, &hdr, data);
close(fd);
if (ret) {
@@ -113,7 +113,7 @@ int sd_write_object(uint64_t oid, uint64_t cow_oid, void *data,
hdr.obj.cow_oid = cow_oid;
hdr.obj.offset = offset;
- ret = exec_req(fd, &hdr, data);
+ ret = collie_exec_req(fd, &hdr, data);
close(fd);
if (ret) {
@@ -148,7 +148,7 @@ int parse_vdi(vdi_parser_func_t func, size_t size, void *data)
sd_init_req(&req, SD_OP_READ_VDIS);
req.data_length = sizeof(vdi_inuse);
- ret = exec_req(fd, &req, &vdi_inuse);
+ ret = collie_exec_req(fd, &req, &vdi_inuse);
if (ret < 0) {
fprintf(stderr, "Failed to read VDIs from %s:%d\n",
sdhost, sdport);
@@ -206,7 +206,7 @@ int send_light_req_get_response(struct sd_req *hdr, const char *host, int port)
if (fd < 0)
return -1;
- ret = exec_req(fd, hdr, NULL);
+ ret = collie_exec_req(fd, hdr, NULL);
close(fd);
if (ret) {
fprintf(stderr, "failed to connect to %s:%d\n",
@@ -238,3 +238,8 @@ int send_light_req(struct sd_req *hdr, const char *host, int port)
return 0;
}
+
+int collie_exec_req(int sockfd, struct sd_req *hdr, void *data)
+{
+ return net_exec_req(sockfd, hdr, data, true);
+}
diff --git a/collie/debug.c b/collie/debug.c
index 11d5934..708ba25 100644
--- a/collie/debug.c
+++ b/collie/debug.c
@@ -94,7 +94,7 @@ read_buffer:
sd_init_req(&hdr, SD_OP_TRACE_READ_BUF);
hdr.data_length = TRACE_BUF_LEN;
- ret = exec_req(fd, &hdr, buf);
+ ret = collie_exec_req(fd, &hdr, buf);
if (ret) {
fprintf(stderr, "Failed to connect\n");
diff --git a/collie/node.c b/collie/node.c
index f79927f..a87d095 100644
--- a/collie/node.c
+++ b/collie/node.c
@@ -161,7 +161,7 @@ static int node_cache(int argc, char **argv)
hdr.flags = SD_FLAG_CMD_WRITE;
hdr.data_length = sizeof(cache_size);
- ret = exec_req(fd, &hdr, (void *)&cache_size);
+ ret = collie_exec_req(fd, &hdr, (void *)&cache_size);
close(fd);
if (ret) {
diff --git a/collie/vdi.c b/collie/vdi.c
index 6ed7162..2547bd7 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -319,7 +319,7 @@ static void parse_objs(uint64_t oid, obj_parser_func_t func, void *data, unsigne
hdr.obj.oid = oid;
- ret = exec_req(fd, &hdr, buf);
+ ret = collie_exec_req(fd, &hdr, buf);
close(fd);
sprintf(name + strlen(name), ":%d", sd_nodes[i].nid.port);
@@ -410,7 +410,7 @@ static int find_vdi_name(const char *vdiname, uint32_t snapid, const char *tag,
hdr.flags = SD_FLAG_CMD_WRITE;
hdr.vdi.snapid = snapid;
- ret = exec_req(fd, &hdr, buf);
+ ret = collie_exec_req(fd, &hdr, buf);
if (ret) {
ret = -1;
goto out;
@@ -490,7 +490,7 @@ static int do_vdi_create(const char *vdiname, int64_t vdi_size,
hdr.vdi.vdi_size = roundup(vdi_size, 512);
hdr.vdi.copies = nr_copies;
- ret = exec_req(fd, &hdr, buf);
+ ret = collie_exec_req(fd, &hdr, buf);
close(fd);
@@ -751,7 +751,7 @@ static int do_vdi_delete(const char *vdiname, int snap_id, const char *snap_tag)
if (snap_tag)
pstrcpy(data + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
- ret = exec_req(fd, &hdr, data);
+ ret = collie_exec_req(fd, &hdr, data);
close(fd);
if (ret) {
@@ -884,7 +884,7 @@ static int do_track_object(uint64_t oid, uint8_t nr_copies)
sd_init_req(&hdr, SD_OP_STAT_CLUSTER);
hdr.data_length = log_length;
- ret = exec_req(fd, &hdr, logs);
+ ret = collie_exec_req(fd, &hdr, logs);
close(fd);
if (ret != 0)
@@ -1030,7 +1030,7 @@ static int find_vdi_attr_oid(const char *vdiname, const char *tag, uint32_t snap
if (delete)
hdr.flags |= SD_FLAG_CMD_DEL;
- ret = exec_req(fd, &hdr, &vattr);
+ ret = collie_exec_req(fd, &hdr, &vattr);
if (ret) {
ret = SD_RES_EIO;
goto out;
@@ -1391,7 +1391,7 @@ static void *read_object_from(const struct sd_vnode *vnode, uint64_t oid)
hdr.obj.oid = oid;
- ret = exec_req(fd, &hdr, buf);
+ ret = collie_exec_req(fd, &hdr, buf);
close(fd);
if (ret) {
@@ -1440,7 +1440,7 @@ static void write_object_to(const struct sd_vnode *vnode, uint64_t oid,
hdr.data_length = SD_DATA_OBJ_SIZE;
hdr.obj.oid = oid;
- ret = exec_req(fd, &hdr, buf);
+ ret = collie_exec_req(fd, &hdr, buf);
close(fd);
if (ret) {
diff --git a/include/net.h b/include/net.h
index 787ee79..97e93d0 100644
--- a/include/net.h
+++ b/include/net.h
@@ -52,6 +52,7 @@ int tx(struct connection *conn, enum conn_state next_state);
int connect_to(const char *name, int port);
int send_req(int sockfd, struct sd_req *hdr, void *data, unsigned int wlen);
int exec_req(int sockfd, struct sd_req *hdr, void *data);
+int net_exec_req(int sockfd, struct sd_req *hdr, void *data, bool retry_eagain);
int create_listen_ports(const char *bindaddr, int port,
int (*callback)(int fd, void *), void *data);
int create_unix_domain_socket(const char *unix_path,
diff --git a/lib/net.c b/lib/net.c
index 365c1e1..76b6eab 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -268,7 +268,7 @@ success:
return fd;
}
-int do_read(int sockfd, void *buf, int len)
+static int net_do_read(int sockfd, void *buf, int len, bool retry_eagain)
{
int ret;
reread:
@@ -276,7 +276,10 @@ reread:
if (ret < 0 || !ret) {
if (errno == EINTR)
goto reread;
- eprintf("failed to read from socket: %d\n", ret);
+ if (retry_eagain && errno == EAGAIN)
+ goto reread;
+
+ eprintf("failed to read from socket: %d, %d(%m)\n", ret, errno);
return 1;
}
@@ -288,6 +291,11 @@ reread:
return 0;
}
+int do_read(int sockfd, void *buf, int len)
+{
+ return net_do_read(sockfd, buf, len, false);
+}
+
static void forward_iov(struct msghdr *msg, int len)
{
while (msg->msg_iov->iov_len <= len) {
@@ -352,7 +360,7 @@ int send_req(int sockfd, struct sd_req *hdr, void *data, unsigned int wlen)
return ret;
}
-int exec_req(int sockfd, struct sd_req *hdr, void *data)
+int net_exec_req(int sockfd, struct sd_req *hdr, void *data, bool retry_eagain)
{
int ret;
struct sd_rsp *rsp = (struct sd_rsp *)hdr;
@@ -369,7 +377,7 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data)
if (send_req(sockfd, hdr, data, wlen))
return 1;
- ret = do_read(sockfd, rsp, sizeof(*rsp));
+ ret = net_do_read(sockfd, rsp, sizeof(*rsp), retry_eagain);
if (ret) {
eprintf("failed to read a response\n");
return 1;
@@ -379,7 +387,7 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data)
rlen = rsp->data_length;
if (rlen) {
- ret = do_read(sockfd, data, rlen);
+ ret = net_do_read(sockfd, data, rlen, retry_eagain);
if (ret) {
eprintf("failed to read the response data\n");
return 1;
@@ -389,6 +397,11 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data)
return 0;
}
+int exec_req(int sockfd, struct sd_req *hdr, void *data)
+{
+ return net_exec_req(sockfd, hdr, data, false);
+}
+
char *addr_to_str(char *str, int size, const uint8_t *addr, uint16_t port)
{
int af = AF_INET6;
--
1.7.9.5
More information about the sheepdog
mailing list