[Sheepdog] [RFC PATCH] sheep: add client side timeout support for socket
zituan at taobao.com
zituan at taobao.com
Fri Nov 25 12:28:57 CET 2011
From: Yibin Shen <zituan at taobao.com>
currently, sheep use infinite timeout with poll(), that will cause
some problem, e.g. node A leave sheep cluster exceptionally, then
infly IOs torward node A may not be terminated forever, so related
confchg event can not be handled.
this patch also change the forward_read_obj_req() from block read
into nonblock with poll().
Signed-off-by: Yibin Shen <zituan at taobao.com>
---
include/net.h | 2 +
sheep/store.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 62 insertions(+), 8 deletions(-)
diff --git a/include/net.h b/include/net.h
index 9e51fea..a0aed3b 100644
--- a/include/net.h
+++ b/include/net.h
@@ -3,6 +3,8 @@
#include <sys/socket.h>
+#define DEFAULT_POLL_TIMEOUT 30000
+
enum conn_state {
C_IO_HEADER = 0,
C_IO_DATA_INIT,
diff --git a/sheep/store.c b/sheep/store.c
index d4c3f27..22feda4 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -243,14 +243,14 @@ static int do_local_io(struct request *req, uint32_t epoch);
static int forward_read_obj_req(struct request *req, int idx)
{
- int i, n, nr, fd, ret;
+ int i, n, nr, fd, ret, pollret;
unsigned wlen, rlen;
struct sd_obj_req hdr = *(struct sd_obj_req *)&req->rq;
struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
struct sheepdog_vnode_list_entry *e;
uint64_t oid = hdr.oid;
int copies;
-
+ struct pollfd pfds;
e = req->entry;
nr = req->nr_vnodes;
@@ -285,21 +285,69 @@ static int forward_read_obj_req(struct request *req, int idx)
wlen = 0;
rlen = hdr.data_length;
- ret = exec_req(fd, (struct sd_req *)&hdr, req->data, &wlen, &rlen);
+ ret = send_req(fd, (struct sd_req *)&hdr, req->data, &wlen);
+ if (ret) { /* network errors */
+ ret = SD_RES_NETWORK_ERROR;
+ dprintf("fail %"PRIu32"\n", ret);
+ goto out;
+ }
+
+ pfds.fd = fd;
+ pfds.events = POLLIN;
+ ret = SD_RES_SUCCESS;
+
+ poll_again:
+ /*FIXME: what timout value is better? */
+ pollret= poll(&pfds, 1, DEFAULT_POLL_TIMEOUT);
+
+ if (pollret < 0) {
+ if (errno == EINTR)
+ goto poll_again;
+ ret = SD_RES_EIO;
+ } else if (pollret == 0) {
+ ret = SD_RES_EIO;
+ goto out;
+ }
+
+ if (pfds.fd < 0 || !(pfds.revents & POLLIN)) {
+ ret = SD_RES_NETWORK_ERROR;
+ goto out;
+ }
- if (ret) /* network errors */
+ ret = do_read(pfds.fd, rsp, sizeof(*rsp));
+ if (ret) {
ret = SD_RES_NETWORK_ERROR;
- else {
- memcpy(&req->rp, rsp, sizeof(*rsp));
+ vprintf(SDOG_INFO, "failed to read a response: %m\n");
+ goto out;
+ }
+
+ /* read the extra data */
+ if (rlen > rsp->data_length)
+ rlen = rsp->data_length;
+
+ if (rlen) {
+ ret = do_read(pfds.fd, req->data, rlen);
+ if (ret) {
+ ret = SD_RES_NETWORK_ERROR;
+ vprintf(SDOG_INFO, "failed to read the response data: %m\n");
+ goto out;
+ }
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ eprintf("fail %"PRIu32"\n", rsp->result);
ret = rsp->result;
+ goto out;
}
+
+ memcpy(&req->rp, rsp, sizeof(*rsp));
out:
return ret;
}
static int forward_write_obj_req(struct request *req, int idx)
{
- int i, n, nr, fd, ret;
+ int i, n, nr, fd, ret, pollret;
unsigned wlen;
char name[128];
struct sd_obj_req hdr = *(struct sd_obj_req *)&req->rq;
@@ -377,11 +425,15 @@ static int forward_write_obj_req(struct request *req, int idx)
ret = SD_RES_SUCCESS;
again:
- if (poll(pfds, nr_fds, -1) < 0) {
+ pollret = poll(pfds, nr_fds, DEFAULT_POLL_TIMEOUT);
+ if (pollret < 0) {
if (errno == EINTR)
goto again;
ret = SD_RES_EIO;
+ } else if ( pollret == 0 ) {/* poll time out */
+ ret = SD_RES_EIO;
+ goto out;
}
for (i = 0; i < nr_fds; i++) {
--
1.7.7.3
More information about the sheepdog
mailing list