[Sheepdog] [PATCH] Journal support for atomic operations
Narendra Prasad Madanapalli
narendramind at gmail.com
Mon Dec 20 19:40:37 CET 2010
This patch adds the feature of atomicity while performing the operations
such
as vdi object and checksum updates.
With the help of the journalling API, implemented the task of updating
vdi object & checksum atomically in store_queue_request_local() for the
operations SD_OP_WRITE_OBJ & SD_OP_CREATE_AND_WRITE_OBJ.
Signed-off-by: Narendra <narendramind at gmail.com>
diff --git a/sheep/sheep.c b/sheep/sheep.c
index dc9a320..d6c776d 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -112,6 +112,8 @@ int main(int argc, char **argv)
if (is_daemon && daemon(0, 0))
exit(1);
+ jrnl_recover();
+
ret = init_event(EPOLL_SIZE);
if (ret)
exit(1);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index c66baf4..80c75b3 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -128,6 +128,7 @@ struct cluster_info {
extern struct cluster_info *sys;
+
int create_listen_port(int port, void *data);
int is_io_request(unsigned op);
@@ -190,6 +191,81 @@ int remove_object(struct sheepdog_node_list_entry *e,
int nodes, uint32_t node_version,
uint64_t oid, int nr);
+/* Journal */
+typedef uint32_t end_mark_t;
+typedef uint32_t jrnl_type_t;
+
+#define JRNL_TYPE_VDI 0
+#define JRNL_TYPE_CKSUM 1
+#define JRNL_MAX_TYPES 2
+
+#define SET_END_MARK 1UL
+#define UNSET_END_MARK 0UL
+#define IS_END_MARK_SET(var) (var == 1UL)
+
+
+/* Different Journal headers */
+typedef struct jrnl_cksum_head {
+ jrnl_type_t jh_type;
+ uint64_t jh_size;
+} jrnl_cksum_head_t;
+
+typedef struct jrnl_vdi_head {
+ jrnl_type_t jh_type;
+ uint64_t jh_offset;
+ uint64_t jh_size;
+} jrnl_vdi_head_t;
+
+typedef struct jrnl_cksum_data {
+ char aname_cksum[64];
+ uint64_t aval_cksum;
+} jrnl_cksum_data_t;
+
+typedef struct jrnl_file_desc {
+ uint32_t jf_epoch; /* epoch */
+ uint64_t jf_oid; /* Object id */
+ int jf_fd; /* Open file fd */
+ int jf_target_fd;
+} jrnl_file_desc_t;
+
+typedef struct jrnl_descriptor {
+ void *jd_head;
+ void *jd_data;
+ int jd_end_mark;
+ jrnl_file_desc_t jd_jfd;
+#define jdf_epoch jd_jfd.jf_epoch
+#define jdf_oid jd_jfd.jf_oid
+#define jdf_fd jd_jfd.jf_fd
+#define jdf_target_fd jd_jfd.jf_target_fd
+} jrnl_desc_t;
+
+typedef struct jrnl_handler {
+ int (*has_end_mark)(jrnl_desc_t *jd);
+ int (*write_header)(jrnl_desc_t *jd);
+ int (*write_data)(jrnl_desc_t *jd);
+ int (*write_end_mark)(jrnl_desc_t *jd);
+ int (*apply_to_target_object)(jrnl_file_desc_t *jfd);
+ int (*commit_data)(jrnl_desc_t *jd);
+} jrnl_handler_t;
+
+inline jrnl_type_t jrnl_get_type(jrnl_desc_t *jd);
+int jrnl_get_type_from_file(jrnl_file_desc_t *jfd, jrnl_type_t
*jrnl_type);
+int jrnl_exists(jrnl_file_desc_t *jfd);
+int jrnl_update_epoch_store(uint32_t epoch);
+int jrnl_open(jrnl_file_desc_t *jfd, int aflags);
+int jrnl_create(jrnl_file_desc_t *jfd);
+int jrnl_remove(jrnl_file_desc_t *jfd);
+inline int jrnl_close(jrnl_file_desc_t *jfd);
+
+inline int jrnl_has_end_mark(jrnl_desc_t *jd);
+inline int jrnl_write_header(jrnl_desc_t *jd);
+inline int jrnl_write_data(jrnl_desc_t *jd);
+inline int jrnl_write_end_mark(jrnl_desc_t *jd);
+inline int jrnl_apply_to_targe_object(jrnl_file_desc_t *jfd);
+inline int jrnl_commit_data(jrnl_desc_t *jd);
+int jrnl_perform(jrnl_desc_t *jd);
+int jrnl_recover(void);
+
static inline int is_myself(struct sheepdog_node_list_entry *e)
{
return e->id == sys->this_node.id;
diff --git a/sheep/store.c b/sheep/store.c
index a4d6155..0eac4d2 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -32,10 +32,45 @@
static char *obj_path;
static char *epoch_path;
static char *mnt_path;
+static char *jrnl_path;
static mode_t def_dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP |
S_IXGRP;
static mode_t def_fmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+/* Journal internal data structures */
+static int jrnl_vdi_has_end_mark(jrnl_desc_t *jd);
+static int jrnl_vdi_write_header(jrnl_desc_t *jd);
+static int jrnl_vdi_write_data(jrnl_desc_t *jd);
+static int jrnl_vdi_write_end_mark(jrnl_desc_t *jd);
+static int jrnl_vdi_apply_to_target_object(jrnl_file_desc_t *jfd);
+static int jrnl_vdi_commit_data(jrnl_desc_t *jd);
+
+static int jrnl_cksum_has_end_mark(jrnl_desc_t *jd);
+static int jrnl_cksum_write_header(jrnl_desc_t *jd);
+static int jrnl_cksum_write_data(jrnl_desc_t *jd);
+static int jrnl_cksum_write_end_mark(jrnl_desc_t *jd);
+static int jrnl_cksum_apply_to_target_object(jrnl_file_desc_t *jfd);
+static int jrnl_cksum_commit_data(jrnl_desc_t *jd);
+
+static jrnl_handler_t jrnl_handlers[JRNL_MAX_TYPES] = {
+ {
+ .has_end_mark = jrnl_vdi_has_end_mark,
+ .write_header = jrnl_vdi_write_header,
+ .write_data = jrnl_vdi_write_data,
+ .write_end_mark = jrnl_vdi_write_end_mark,
+ .apply_to_target_object = jrnl_vdi_apply_to_target_object,
+ .commit_data = jrnl_vdi_commit_data
+ },
+ {
+ .has_end_mark = jrnl_cksum_has_end_mark,
+ .write_header = jrnl_cksum_write_header,
+ .write_data = jrnl_cksum_write_data,
+ .write_end_mark = jrnl_cksum_write_end_mark,
+ .apply_to_target_object = jrnl_cksum_apply_to_target_object,
+ .commit_data = jrnl_cksum_commit_data
+ }
+};
+
static int obj_cmp(const void *oid1, const void *oid2)
{
const uint64_t hval1 = fnv_64a_buf((void *)oid1, sizeof(uint64_t),
FNV1A_64_INIT);
@@ -98,7 +133,8 @@ static int is_obj_in_range(uint64_t oid, uint64_t start,
uint64_t end)
return (start < hval || hval <= end);
}
-static int verify_object(int fd, char *buf, size_t len, int set_chksum)
+/* When set_cksum is set, epoch & oid are required to perform journalling.
*/
+static int verify_object(int fd, char *buf, size_t len, int set_chksum,
uint32_t epoch, uint64_t oid)
{
int ret;
uint64_t checksum;
@@ -128,11 +164,30 @@ static int verify_object(int fd, char *buf, size_t
len, int set_chksum)
}
if (set_chksum) {
- checksum = fnv_64a_buf(buf, len, FNV1A_64_INIT);
- ret = fsetxattr(fd, ANAME_CHECKSUM, &checksum, sizeof(checksum),
0);
- if (ret < 0) {
- eprintf("failed to set xattr, %m\n");
- goto err;
+ if (epoch && oid) {
+ jrnl_desc_t jd;
+ jrnl_cksum_head_t head;
+ jrnl_cksum_data_t data;
+
+ head.jh_type = JRNL_TYPE_CKSUM;
+ head.jh_size = sizeof(data);
+ strcpy(data.aname_cksum, ANAME_CHECKSUM);
+ data.aval_cksum = fnv_64a_buf(buf, len, FNV1A_64_INIT);
+ jd.jd_head = &head;
+ jd.jd_data = &data;
+ jd.jdf_epoch = epoch;
+ jd.jdf_oid = oid;
+ jd.jdf_target_fd = fd;
+ ret = jrnl_perform(&jd);
+ if (ret)
+ goto err;
+ } else {
+ checksum = fnv_64a_buf(buf, len, FNV1A_64_INIT);
+ ret = fsetxattr(fd, ANAME_CHECKSUM, &checksum,
sizeof(checksum), 0);
+ if (ret < 0) {
+ eprintf("failed to set xattr, %m\n");
+ goto err;
+ }
}
} else {
ret = fgetxattr(fd, ANAME_CHECKSUM, &checksum, sizeof(checksum));
@@ -199,7 +254,7 @@ static int get_obj_list(struct request *req)
}
obj_nr = read(fd, buf, buf_len);
- ret = verify_object(fd, buf, obj_nr, 0);
+ ret = verify_object(fd, buf, obj_nr, 0, 0, 0);
if (ret < 0) {
eprintf("verification failed, %s, %m\n", path);
close(fd);
@@ -633,6 +688,8 @@ static int store_queue_request_local(struct request
*req, uint32_t epoch)
uint64_t oid = hdr->oid;
uint32_t opcode = hdr->opcode;
char path[1024], *buf;
+ jrnl_desc_t jd;
+ jrnl_vdi_head_t jh;
switch (opcode) {
case SD_OP_CREATE_AND_WRITE_OBJ:
@@ -766,24 +823,31 @@ static int store_queue_request_local(struct request
*req, uint32_t epoch)
/* } */
}
/* fall through */
- case SD_OP_CREATE_AND_WRITE_OBJ:
- ret = pwrite64(fd, req->data, hdr->data_length, hdr->offset);
- if (ret != hdr->data_length) {
- if (errno == ENOSPC)
- ret = SD_RES_NO_SPACE;
- else
- ret = SD_RES_EIO;
+ case SD_OP_CREATE_AND_WRITE_OBJ:
+
+ jd.jdf_epoch = epoch;
+ jd.jdf_oid = oid;
+ jd.jdf_target_fd = fd;
+
+ jh.jh_type = JRNL_TYPE_VDI;
+ jh.jh_offset = hdr->offset;
+ jh.jh_size = hdr->data_length;
+
+ jd.jd_head = &jh;
+ jd.jd_data = req->data;
+ jd.jd_end_mark = SET_END_MARK;
+
+ ret = jrnl_perform(&jd);
+ if (ret)
goto out;
- }
if (!is_data_obj(oid)) {
- /* FIXME: need to update atomically */
-/* ret = verify_object(fd, NULL, 0, 1); */
-/* if (ret < 0) { */
-/* eprintf("failed to set checksum, %"PRIx64"\n", oid); */
-/* ret = SD_RES_EIO; */
-/* goto out; */
-/* } */
+ ret = verify_object(fd, NULL, 0, 1, epoch, oid);
+ if (ret < 0) {
+ eprintf("failed to set checksum, %"PRIx64"\n", oid);
+ ret = SD_RES_EIO;
+ goto out;
+ }
}
ret = SD_RES_SUCCESS;
@@ -1556,7 +1620,7 @@ static void __start_recovery(struct work *work, int
idx)
write(fd, rw->buf, sizeof(uint64_t) * rw->count);
fsync(fd);
- ret = verify_object(fd, rw->buf, sizeof(uint64_t) * rw->count, 1);
+ ret = verify_object(fd, rw->buf, sizeof(uint64_t) * rw->count, 1, 0,
0);
if (ret < 0) {
eprintf("failed to set check sum, %s, %m\n", path);
close(fd);
@@ -1786,6 +1850,27 @@ static int init_mnt_path(const char *base_path)
return 0;
}
+#define JRNL_PATH "/journal/"
+
+static int init_jrnl_path(const char *base_path)
+{
+ int new, ret;
+
+ /* Create journal directory */
+ jrnl_path = zalloc(strlen(base_path) + strlen(JRNL_PATH) + 1);
+ sprintf(jrnl_path, "%s" JRNL_PATH, base_path);
+
+ ret = init_path(jrnl_path, &new);
+ /* Error during directory creation */
+ if (ret)
+ return ret;
+ /* If journal is newly created */
+ if (new)
+ return 0;
+
+ return 0;
+}
+
int init_store(const char *d)
{
int ret;
@@ -1806,6 +1891,10 @@ int init_store(const char *d)
if (ret)
return ret;
+ ret = init_jrnl_path(d);
+ if (ret)
+ return ret;
+
return ret;
}
@@ -1838,3 +1927,464 @@ int get_global_nr_copies(uint32_t *copies)
{
return attr(epoch_path, ANAME_COPIES, copies, sizeof(*copies), 0);
}
+
+/* Journal APIs */
+int jrnl_exists(jrnl_file_desc_t *jfd)
+{
+ int ret;
+ char path[1024];
+ struct stat s;
+
+ snprintf(path, sizeof(path), "%s%08u/%016" PRIx64, jrnl_path,
jfd->jf_epoch, jfd->jf_oid);
+
+ ret = stat(path, &s);
+ if (ret)
+ return 1;
+
+ return 0;
+}
+
+int jrnl_update_epoch_store(uint32_t epoch)
+{
+ char new[1024];
+ struct stat s;
+
+ snprintf(new, sizeof(new), "%s%08u/", jrnl_path, epoch);
+ if (stat(new, &s) < 0)
+ if (errno == ENOENT)
+ mkdir(new, def_dmode);
+
+ return 0;
+}
+
+int jrnl_open(jrnl_file_desc_t *jfd, int aflags)
+{
+ char path[1024];
+ int flags = aflags;
+ int fd, ret;
+
+
+ jrnl_update_epoch_store(jfd->jf_epoch);
+ snprintf(path, sizeof(path), "%s%08u/%016" PRIx64, jrnl_path,
jfd->jf_epoch, jfd->jf_oid);
+
+ fd = open(path, flags, def_fmode);
+ if (fd < 0) {
+ eprintf("failed to open %s, %s\n", path, strerror(errno));
+ if (errno == ENOENT)
+ ret = SD_RES_NO_OBJ;
+ else
+ ret = SD_RES_UNKNOWN;
+ } else {
+ jfd->jf_fd = fd;
+ ret = SD_RES_SUCCESS;
+ }
+
+ return ret;
+}
+
+int jrnl_close(jrnl_file_desc_t *jfd)
+{
+ close(jfd->jf_fd);
+ jfd->jf_fd = -1;
+
+ return 0;
+}
+
+int jrnl_create(jrnl_file_desc_t *jfd)
+{
+ return jrnl_open(jfd, O_RDWR | O_CREAT);
+}
+
+inline uint32_t jrnl_get_type(jrnl_desc_t *jd)
+{
+ return *((uint32_t *) jd->jd_head);
+}
+
+int jrnl_get_type_from_file(jrnl_file_desc_t *jfd, jrnl_type_t *jrnl_type)
+{
+ ssize_t retsize;
+
+ retsize = pread64(jfd->jf_fd, jrnl_type, sizeof(*jrnl_type), 0);
+
+ if (retsize != sizeof(*jrnl_type))
+ return SD_RES_EIO;
+ else
+ return SD_RES_SUCCESS;
+}
+
+
+int jrnl_remove(jrnl_file_desc_t *jfd)
+{
+ char path[1024];
+ int ret;
+
+ snprintf(path, sizeof(path), "%s%08u/%016" PRIx64, jrnl_path,
jfd->jf_epoch, jfd->jf_oid);
+ ret = unlink(path);
+ if (ret) {
+ eprintf("failed to remove %s, %s\n", path, strerror(errno));
+ ret = SD_RES_EIO;
+ } else
+ ret = SD_RES_SUCCESS;
+
+ return ret;
+}
+
+int jrnl_has_end_mark(jrnl_desc_t *jd)
+{
+ return jrnl_handlers[jrnl_get_type(jd)].has_end_mark(jd);
+}
+
+int jrnl_write_header(jrnl_desc_t *jd)
+{
+ return jrnl_handlers[jrnl_get_type(jd)].write_header(jd);
+}
+
+int jrnl_write_data(jrnl_desc_t *jd)
+{
+ return jrnl_handlers[jrnl_get_type(jd)].write_data(jd);
+}
+
+int jrnl_write_end_mark(jrnl_desc_t *jd)
+{
+ return jrnl_handlers[jrnl_get_type(jd)].write_end_mark(jd);
+}
+
+int jrnl_apply_to_target_object(jrnl_file_desc_t *jfd)
+{
+ int ret;
+ jrnl_type_t jrnl_type;
+
+ ret = jrnl_get_type_from_file(jfd, &jrnl_type);
+
+ return jrnl_handlers[jrnl_type].apply_to_target_object(jfd);
+}
+
+int jrnl_commit_data(jrnl_desc_t *jd)
+{
+ return jrnl_handlers[jrnl_get_type(jd)].commit_data(jd);
+}
+
+int jrnl_perform(jrnl_desc_t *jd)
+{
+ int ret;
+
+ ret = jrnl_create(&jd->jd_jfd);
+ if (ret)
+ goto out;
+
+ ret = jrnl_write_header(jd);
+ if (ret)
+ goto out;
+
+ ret = jrnl_write_data(jd);
+ if (ret)
+ goto out;
+
+ ret = jrnl_write_end_mark(jd);
+ if (ret)
+ goto out;
+
+ ret = jrnl_commit_data(jd);
+ if (ret)
+ goto out;
+
+ ret = jrnl_remove(&jd->jd_jfd);
+
+out:
+ return ret;
+}
+
+int jrnl_recover(void)
+{
+ DIR *dir;
+ struct dirent *d;
+ char jrnl_dir[1024],
+ jrnl_file_path[1024],
+ obj_file_path[1024];
+ int epoch;
+
+ epoch = get_latest_epoch();
+ if (epoch < 0) {
+ return 1;
+ }
+ snprintf(jrnl_dir, sizeof(jrnl_dir), "%s%08u/", jrnl_path, epoch);
+
+ eprintf("Openning the directory%s.\n", jrnl_dir);
+ dir = opendir(jrnl_dir);
+ if (!dir)
+ return -1;
+
+ vprintf(SDOG_NOTICE "start jrnl_recovery.\n");
+ while ((d = readdir(dir))) {
+ int ret;
+ jrnl_file_desc_t jfd;
+
+ if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+ continue;
+
+ jfd.jf_epoch = epoch;
+ sscanf(d->d_name, "%" PRIx64, &jfd.jf_oid);
+ snprintf(jrnl_file_path, sizeof(jrnl_file_path), "%s%016" PRIx64,
+ jrnl_dir,
jfd.jf_oid);
+ snprintf(obj_file_path, sizeof(obj_file_path), "%s%08u/%016"
PRIx64,
+ obj_path, epoch,
jfd.jf_oid);
+ ret = jrnl_open(&jfd, O_RDONLY);
+ if (ret) {
+ eprintf("Unable to open the journal file, %s, for reading.\n",
jrnl_file_path);
+ goto end_while_3;
+ }
+ jfd.jf_target_fd = ob_open(epoch, jfd.jf_oid, 0, &ret);
+ if (ret) {
+ eprintf("Unable to open the object file, %s, to recover.\n",
obj_file_path);
+ goto end_while_2;
+ }
+ ret = jrnl_apply_to_target_object(&jfd);
+ if (ret)
+ eprintf("Unable to recover the object, %s.\n", obj_file_path);
+
+ close(jfd.jf_target_fd);
+ jfd.jf_target_fd = -1;
+ end_while_2:
+ jrnl_close(&jfd);
+ end_while_3:
+ vprintf(SDOG_INFO "recovered the object in journal, %s\n",
jrnl_file_path);
+ jrnl_remove(&jfd);
+ }
+ closedir(dir);
+ vprintf(SDOG_NOTICE "end jrnl_recovery.\n");
+
+ return 0;
+}
+
+/* VDI data journalling functions */
+static int jrnl_vdi_has_end_mark(jrnl_desc_t *jd)
+{
+ ssize_t ret;
+ end_mark_t end_mark = UNSET_END_MARK;
+ jrnl_vdi_head_t *head = (jrnl_vdi_head_t *) jd->jd_head;
+
+ ret = pread64(jd->jdf_fd, &end_mark, sizeof(end_mark),
+ sizeof(*head) + head->jh_size);
+
+ return (IS_END_MARK_SET(end_mark)? SET_END_MARK: UNSET_END_MARK);
+}
+
+int jrnl_vdi_write_header(jrnl_desc_t *jd)
+{
+ ssize_t ret;
+ jrnl_vdi_head_t *head = (jrnl_vdi_head_t *) jd->jd_head;
+
+ ret = pwrite64(jd->jdf_fd, head, sizeof(*head), 0);
+
+ if (ret != sizeof(*head)) {
+ if (errno == ENOSPC)
+ ret = SD_RES_NO_SPACE;
+ else
+ ret = SD_RES_EIO;
+ } else
+ ret = SD_RES_SUCCESS;
+
+ return ret;
+}
+
+int jrnl_vdi_write_data(jrnl_desc_t *jd)
+{
+ ssize_t ret;
+ jrnl_vdi_head_t *head = (jrnl_vdi_head_t *) jd->jd_head;
+
+ ret = pwrite64(jd->jdf_fd, jd->jd_data, head->jh_size, sizeof(*head));
+
+ if (ret != head->jh_size) {
+ if (errno == ENOSPC)
+ ret = SD_RES_NO_SPACE;
+ else
+ ret = SD_RES_EIO;
+ } else
+ ret = SD_RES_SUCCESS;
+
+ return ret;
+}
+
+int jrnl_vdi_write_end_mark(jrnl_desc_t *jd)
+{
+ ssize_t retsize;
+ int ret;
+ end_mark_t end_mark = SET_END_MARK;
+ jrnl_vdi_head_t *head = (jrnl_vdi_head_t *) jd->jd_head;
+
+ retsize = pwrite64(jd->jdf_fd, &end_mark, sizeof(end_mark),
+ sizeof(*head) + head->jh_size);
+
+ if (retsize != sizeof(end_mark)) {
+ if (errno == ENOSPC)
+ ret = SD_RES_NO_SPACE;
+ else
+ ret = SD_RES_EIO;
+ } else
+ ret = SD_RES_SUCCESS;
+
+ jd->jd_end_mark= end_mark;
+
+ return ret;
+}
+
+int jrnl_vdi_apply_to_target_object(jrnl_file_desc_t *jfd)
+{
+ char *buf;
+ int buf_len, res = 0;
+ ssize_t retsize;
+ jrnl_vdi_head_t jh;
+
+ /* FIXME: handle larger size */
+ buf_len = (1 << 22);
+ buf = (char *) malloc(buf_len);
+ if (!buf) {
+ eprintf("failed to allocate memory\n");
+ return SD_RES_NO_MEM;
+ }
+
+ /* Flush out journal to disk (vdi object) */
+ retsize = pread64(jfd->jf_fd, &jh, sizeof(jh), 0);
+ retsize = pread64(jfd->jf_fd, buf, jh.jh_size, sizeof(jh));
+ retsize = pwrite64(jfd->jf_target_fd, buf, jh.jh_size, jh.jh_offset);
+ if (retsize != jh.jh_size) {
+ if (errno == ENOSPC)
+ res = SD_RES_NO_SPACE;
+ else
+ res = SD_RES_EIO;
+ }
+
+ /* Clean up */
+ free(buf);
+
+ return res;
+}
+
+static int jrnl_vdi_commit_data(jrnl_desc_t *jd)
+{
+ int ret = 0;
+ ssize_t retsize;
+ jrnl_vdi_head_t *head = (jrnl_vdi_head_t *) jd->jd_head;
+
+ retsize = pwrite64(jd->jdf_target_fd, jd->jd_data, head->jh_size,
head->jh_offset);
+ if (retsize != head->jh_size) {
+ if (errno == ENOSPC)
+ ret = SD_RES_NO_SPACE;
+ else
+ ret = SD_RES_EIO;
+ }
+
+ return ret;
+}
+
+/* VDI check sum journalling functions */
+
+/* FIXME: Implment this function */
+
+static int jrnl_cksum_has_end_mark(jrnl_desc_t *jd)
+{
+ ssize_t ret;
+ end_mark_t end_mark = UNSET_END_MARK;
+ jrnl_cksum_head_t *head = (jrnl_cksum_head_t *) jd->jd_head;
+
+ ret = pread64(jd->jdf_fd, &end_mark, sizeof(end_mark),
+ sizeof(*head) + head->jh_size);
+
+ return (IS_END_MARK_SET(end_mark)? SET_END_MARK: UNSET_END_MARK);
+}
+
+int jrnl_cksum_write_header(jrnl_desc_t *jd)
+{
+ ssize_t ret;
+ jrnl_cksum_head_t *head = (jrnl_cksum_head_t *) jd->jd_head;
+
+ ret = pwrite64(jd->jdf_fd, head, sizeof(*head), 0);
+
+ if (ret != sizeof(*head)) {
+ if (errno == ENOSPC)
+ ret = SD_RES_NO_SPACE;
+ else
+ ret = SD_RES_EIO;
+ } else
+ ret = SD_RES_SUCCESS;
+
+ return ret;
+}
+
+int jrnl_cksum_write_data(jrnl_desc_t *jd)
+{
+ ssize_t ret;
+ jrnl_cksum_head_t *head = (jrnl_cksum_head_t *) jd->jd_head;
+ jrnl_cksum_data_t *data = (jrnl_cksum_data_t *) jd->jd_data;
+
+ ret = pwrite64(jd->jdf_fd, data, head->jh_size, sizeof(*head));
+ if (ret != head->jh_size) {
+ if (errno == ENOSPC)
+ ret = SD_RES_NO_SPACE;
+ else
+ ret = SD_RES_EIO;
+ } else
+ ret = SD_RES_SUCCESS;
+
+ return ret;
+}
+
+int jrnl_cksum_write_end_mark(jrnl_desc_t *jd)
+{
+ ssize_t retsize;
+ int ret;
+ end_mark_t end_mark = SET_END_MARK;
+ jrnl_cksum_head_t *head = (jrnl_cksum_head_t *) jd->jd_head;
+
+ retsize = pwrite64(jd->jdf_fd, &end_mark, sizeof(end_mark),
+ sizeof(*head) + head->jh_size);
+
+ if (retsize != sizeof(end_mark)) {
+ if (errno == ENOSPC)
+ ret = SD_RES_NO_SPACE;
+ else
+ ret = SD_RES_EIO;
+ } else
+ ret = SD_RES_SUCCESS;
+
+ jd->jd_end_mark = end_mark;
+
+ return ret;
+}
+
+int jrnl_cksum_apply_to_target_object(jrnl_file_desc_t *jfd)
+{
+ int ret;
+ ssize_t retsize;
+ jrnl_cksum_head_t head;
+ jrnl_cksum_data_t data;
+
+ /* Flush out journal to disk (vdi object) */
+ retsize = pread64(jfd->jf_fd, &head, sizeof(head), 0);
+ retsize = pread64(jfd->jf_fd, &data, head.jh_size, sizeof(head));
+
+ ret = fsetxattr(jfd->jf_target_fd, data.aname_cksum, &data.aval_cksum,
+ sizeof(data.aval_cksum),
0);
+ if (ret < 0) {
+ eprintf("failed to set xattr, %m\n");
+ return SD_RES_EIO;
+ }
+
+ return SD_RES_SUCCESS;
+}
+
+static int jrnl_cksum_commit_data(jrnl_desc_t *jd)
+{
+ int ret;
+ jrnl_cksum_data_t *data = (jrnl_cksum_data_t *) jd->jd_data;
+
+ ret = fsetxattr(jd->jdf_target_fd, data->aname_cksum,
&data->aval_cksum,
+ sizeof(data->aval_cksum),
0);
+ if (ret < 0) {
+ eprintf("failed to set xattr, %m\n");
+ return SD_RES_EIO;
+ }
+
+ return SD_RES_SUCCESS;
+}
+
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.wpkg.org/pipermail/sheepdog/attachments/20101221/ef9930dc/attachment-0002.html>
More information about the sheepdog
mailing list