[sheepdog] [PATCH] object cache: handle partial write due to unexpected crash
Liu Yuan
namei.unix at gmail.com
Thu Dec 13 07:09:51 CET 2012
From: Liu Yuan <tailai.ly at taobao.com>
Partial written object will lead sheep to leave cluster due to EIO returned.
Let's code create_cache_object() to do all-or-nothing write(2) for the pulled
object.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/object_cache.c | 42 ++++++++++++++++++++++++++++++++++--------
1 file changed, 34 insertions(+), 8 deletions(-)
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index d8ff191..791a57e 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -721,13 +721,11 @@ static int create_cache_object(struct object_cache *oc, uint32_t idx,
{
int flags = def_open_flags | O_CREAT | O_EXCL, fd;
int ret = SD_RES_OID_EXIST;
- struct strbuf buf;
-
- strbuf_init(&buf, PATH_MAX);
- strbuf_addstr(&buf, cache_dir);
- strbuf_addf(&buf, "/%06"PRIx32"/%08"PRIx32, oc->vid, idx);
+ char path[PATH_MAX], tmp_path[PATH_MAX];
- fd = open(buf.buf, flags, def_fmode);
+ sprintf(tmp_path, "%s/%06"PRIx32"/%08"PRIx32".tmp", cache_dir,
+ oc->vid, idx);
+ fd = open(tmp_path, flags, def_fmode);
if (fd < 0) {
if (errno == EEXIST) {
dprintf("%08"PRIx32" already created\n", idx);
@@ -738,6 +736,7 @@ static int create_cache_object(struct object_cache *oc, uint32_t idx,
goto out;
}
+ /* We need to extend it if the buffer is trimmed */
if (offset != 0 || buf_size != obj_size) {
ret = prealloc(fd, obj_size);
if (ret < 0) {
@@ -753,12 +752,24 @@ static int create_cache_object(struct object_cache *oc, uint32_t idx,
eprintf("failed, vid %"PRIx32", idx %"PRIx32"\n", oc->vid, idx);
goto out_close;
}
+ /* This is intended to take care of partial write due to crash */
+ sprintf(path, "%s/%06"PRIx32"/%08"PRIx32, cache_dir, oc->vid, idx);
+ ret = link(tmp_path, path);
+ if (ret < 0) {
+ if (errno == EEXIST) {
+ ret = SD_RES_OID_EXIST;
+ goto out_close;
+ }
+ dprintf("failed to link %s to %s: %m\n", tmp_path, path);
+ ret = err_to_sderr(idx_to_oid(oc->vid, idx), errno);
+ goto out_close;
+ }
ret = SD_RES_SUCCESS;
- dprintf("%08"PRIx32" size %zu\n", idx, buf_size);
+ dprintf("%08"PRIx32" size %zu\n", idx, obj_size);
out_close:
close(fd);
+ unlink(tmp_path);
out:
- strbuf_release(&buf);
return ret;
}
@@ -924,6 +935,13 @@ static int object_cache_flush_and_delete(struct object_cache *oc)
while ((d = readdir(dir))) {
if (!strncmp(d->d_name, ".", 1))
continue;
+ if (strcmp(d->d_name + 8, ".tmp") == 0) {
+ dprintf("try to del %s\n", d->d_name);
+ if (unlinkat(dirfd(dir), d->d_name, 0) < 0)
+ eprintf("%m\n");
+ continue;
+ }
+
idx = strtoul(d->d_name, NULL, 16);
if (idx == ULLONG_MAX)
continue;
@@ -1163,6 +1181,14 @@ static int load_existing_cache_object(struct object_cache *cache)
while ((d = readdir(dir))) {
if (!strncmp(d->d_name, ".", 1))
continue;
+
+ if (strcmp(d->d_name + 8, ".tmp") == 0) {
+ dprintf("try to del %s\n", d->d_name);
+ if (unlinkat(dirfd(dir), d->d_name, 0) < 0)
+ eprintf("%m\n");
+ continue;
+ }
+
idx = strtoul(d->d_name, NULL, 16);
if (idx == ULLONG_MAX)
continue;
--
1.7.9.5
More information about the sheepdog
mailing list