[stgt] [PATCH] Add support for gluster backing store
Dan Lambright
dlambrig at redhat.com
Wed Mar 12 15:55:24 CET 2014
Gluster is a distributed file system (www.gluster.org). The
backing store gives block access protocols such as iSCSI
access to data within gluster.
Gluster data resides on volumes which are physically located
on servers. A "LUN" is represented by a file within
a volume. To specify which file to use:
--bstype=glfs
--backing-store="volume at hostname:filename"
Optionally, logs may be specified. See README.glfs
All I/Os are synchronous. This is based off the rdrw and
rbd backend storage drivers.
Signed-off-by: Dan Lambright <dlambrig at redhat.com>
---
Makefile | 1 +
doc/README.glfs | 50 +++++
usr/Makefile | 7 +
usr/bs_glfs.c | 561 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 619 insertions(+)
create mode 100644 doc/README.glfs
create mode 100644 usr/bs_glfs.c
diff --git a/Makefile b/Makefile
index b74f4ae..5fb6cfa 100644
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,7 @@ export VERSION PREFIX
# Export the feature switches so sub-make knows about them
export ISCSI_RDMA
export CEPH_RBD
+export GLFS_BD
.PHONY: all
all: programs doc conf scripts
diff --git a/doc/README.glfs b/doc/README.glfs
new file mode 100644
index 0000000..77c607e
--- /dev/null
+++ b/doc/README.glfs
@@ -0,0 +1,50 @@
+The 'glfs' backing-store driver provides block access to a file within the
+gluster distributed file system (www.gluster.org). The file represents a
+LUN visible to the initiator (the file may be a regular file in gluster's
+underlying XFS filesystem, or a gluster "block device"). This configuration
+gives gluster support for any access method supported by the target driver,
+such as iSCSI.
+
+The backing-store driver uses the interfaces to gluster in libgfapi.so.
+This file is part of the glusterfs-api package which can be downloaded from
+www.gluster.org.
+
+To build the glfs backing store driver, set the GLFS_BS environment
+variable (export GLFS_BD=1) before running Make.
+
+When a LUN on a target is created with backing store of type glfs (--bstype
+glfs), a handle to gluster is created. The handle is destroyed when the
+target is closed. The handle represents a particular volume on a gluster
+server and a particular file representing the LUN.
+
+To set the gluster volume and file, --backing-store takes the form:
+volume at hostname:filename
+
+For example, if the volume name was vol1 , and the host gprfs010, and file
+name was "disk1", you would set --backing-store to:
+
+--backing-store="vol1 at gprfs010:disk1"
+
+For each CDB, the driver issues an appropriate gluster api call against the
+handle. For example, WRITE_16 becomes glfs_pwrite(), SYNCHRONIZE_CACHE
+becomes glfs_fdatasync() and UNMAP becomes glfs_discard().
+
+Each call is synchronous, meaning the thread will wait for a response from
+gluster before returning to the caller. The libgfapi interfaces support
+asynchronous calls, and an asynchronous version of the driver has been
+tested. For more information on the asynchronous version please contact
+dlambrig at redhat.com.
+
+If the backing store driver was not used, the linux target driver could
+still write data to gluster by loopback mounting the gluster file system.
+The backing-store would be a file within the file system. Gluster uses FUSE
+to forward I/O from the kernel to it's user space daemon. The overhead
+incured by FUSE and the kernel is removed when the backing store driver and
+libgfapi package is used.
+
+The libgfapi interfaces supports logs. You can use the --bsopts option to
+set the logfile and loglevel.
+
+--bsops="logfile=glfs.log;loglevel=3"
+
+
diff --git a/usr/Makefile b/usr/Makefile
index e29826c..6234f3f 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -17,6 +17,10 @@ ifneq ($(CEPH_RBD),)
MODULES += bs_rbd.so
endif
+ifneq ($(GLFS_BD),)
+MODULES += bs_glfs.so
+endif
+
ifneq ($(shell test -e /usr/include/sys/eventfd.h && test -e /usr/include/libaio.h && echo 1),)
CFLAGS += -DUSE_EVENTFD
TGTD_OBJS += bs_aio.o
@@ -88,6 +92,9 @@ tgtimg: $(TGTIMG_OBJS)
bs_rbd.so: bs_rbd.c
$(CC) -shared $(CFLAGS) bs_rbd.c -o bs_rbd.so -lrados -lrbd
+bs_glfs.so: bs_glfs.c
+ $(CC) -I/usr/include/glusterfs/api -shared $(CFLAGS) bs_glfs.c -o bs_glfs.so -lgfapi
+
.PHONY: install
install: $(PROGRAMS) $(MODULES)
install -d -m 755 $(DESTDIR)$(sbindir)
diff --git a/usr/bs_glfs.c b/usr/bs_glfs.c
new file mode 100644
index 0000000..bb09f33
--- /dev/null
+++ b/usr/bs_glfs.c
@@ -0,0 +1,561 @@
+/*
+ * Synchronous glfs backing store routines
+ *
+ * modified from bs_rdb.c
+ * Copyright (C) 2013 Dan Lambright <dlambrig at redhat.com>
+ * Copyright (C) 2006-2007 FUJITA Tomonori <tomof at acm.org>
+ * Copyright (C) 2006-2007 Mike Christie <michaelc at cs.wisc.edu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+#define _XOPEN_SOURCE 600
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <linux/fs.h>
+#include <sys/epoll.h>
+
+#include "list.h"
+#include "util.h"
+#include "tgtd.h"
+#include "scsi.h"
+#include "spc.h"
+#include "bs_thread.h"
+
+#include "glfs.h"
+
+struct active_glfs {
+ char *name;
+ glfs_t *fs;
+ glfs_fd_t *gfd;
+ char *logfile;
+ int loglevel;
+};
+
+#define ALLOWED_BSOFLAGS (O_SYNC | O_DIRECT | O_RDWR | O_LARGEFILE)
+
+#define GLUSTER_PORT 24007
+
+#define GFSP(lu) ((struct active_glfs *) \
+ ((char *)lu + \
+ sizeof(struct scsi_lu) + \
+ sizeof(struct bs_thread_info)) \
+ )
+
+static void set_medium_error(int *result, uint8_t *key, uint16_t *asc)
+{
+ *result = SAM_STAT_CHECK_CONDITION;
+ *key = MEDIUM_ERROR;
+ *asc = ASC_READ_ERROR;
+}
+
+static int bs_glfs_discard(glfs_fd_t *gfd, off_t offset, size_t len)
+{
+#ifdef BS_GLFS_DISCARD
+ return glfs_discard(gfd, offset, len);
+#endif
+ return 0;
+}
+
+static void bs_glfs_request(struct scsi_cmd *cmd)
+{
+ glfs_fd_t *gfd = GFSP(cmd->dev)->gfd;
+ struct scsi_lu *lu = cmd->dev;
+ int ret;
+ uint32_t length;
+ int result = SAM_STAT_GOOD;
+ uint8_t key;
+ uint16_t asc;
+ char *tmpbuf;
+ size_t blocksize;
+ uint64_t offset = cmd->offset;
+ uint32_t tl = cmd->tl;
+ int do_verify = 0;
+ int i;
+ char *ptr;
+ const char *write_buf = NULL;
+ ret = length = 0;
+ key = asc = 0;
+
+ switch (cmd->scb[0]) {
+ case ORWRITE_16:
+ length = scsi_get_out_length(cmd);
+
+ tmpbuf = malloc(length);
+ if (!tmpbuf) {
+ result = SAM_STAT_CHECK_CONDITION;
+ key = HARDWARE_ERROR;
+ asc = ASC_INTERNAL_TGT_FAILURE;
+ break;
+ }
+
+ ret = glfs_pread(gfd, tmpbuf, length, offset, lu->bsoflags);
+
+ if (ret != length) {
+ set_medium_error(&result, &key, &asc);
+ free(tmpbuf);
+ break;
+ }
+
+ ptr = scsi_get_out_buffer(cmd);
+ for (i = 0; i < length; i++)
+ ptr[i] |= tmpbuf[i];
+
+ free(tmpbuf);
+
+ write_buf = scsi_get_out_buffer(cmd);
+ goto write;
+ case COMPARE_AND_WRITE:
+ /* Blocks are transferred twice, first the set that
+ * we compare to the existing data, and second the set
+ * to write if the compare was successful.
+ */
+ length = scsi_get_out_length(cmd) / 2;
+ if (length != cmd->tl) {
+ result = SAM_STAT_CHECK_CONDITION;
+ key = ILLEGAL_REQUEST;
+ asc = ASC_INVALID_FIELD_IN_CDB;
+ break;
+ }
+
+ tmpbuf = malloc(length);
+ if (!tmpbuf) {
+ result = SAM_STAT_CHECK_CONDITION;
+ key = HARDWARE_ERROR;
+ asc = ASC_INTERNAL_TGT_FAILURE;
+ break;
+ }
+
+ ret = glfs_pread(gfd, tmpbuf, length, offset, SEEK_SET);
+
+ if (ret != length) {
+ set_medium_error(&result, &key, &asc);
+ free(tmpbuf);
+ break;
+ }
+
+ if (memcmp(scsi_get_out_buffer(cmd), tmpbuf, length)) {
+ uint32_t pos = 0;
+ char *spos = scsi_get_out_buffer(cmd);
+ char *dpos = tmpbuf;
+
+ /*
+ * Data differed, this is assumed to be 'rare'
+ * so use a much more expensive byte-by-byte
+ * comparasion to find out at which offset the
+ * data differs.
+ */
+ for (pos = 0; pos < length && *spos++ == *dpos++;
+ pos++)
+ ;
+ result = SAM_STAT_CHECK_CONDITION;
+ key = MISCOMPARE;
+ asc = ASC_MISCOMPARE_DURING_VERIFY_OPERATION;
+ free(tmpbuf);
+ break;
+ }
+
+ free(tmpbuf);
+
+ write_buf = scsi_get_out_buffer(cmd) + length;
+ goto write;
+ case SYNCHRONIZE_CACHE:
+ case SYNCHRONIZE_CACHE_16:
+ /* TODO */
+ length = (cmd->scb[0] == SYNCHRONIZE_CACHE) ? 0 : 0;
+
+ if (cmd->scb[1] & 0x2) {
+ result = SAM_STAT_CHECK_CONDITION;
+ key = ILLEGAL_REQUEST;
+ asc = ASC_INVALID_FIELD_IN_CDB;
+ } else {
+ glfs_fdatasync(gfd);
+ }
+ break;
+ case WRITE_VERIFY:
+ case WRITE_VERIFY_12:
+ case WRITE_VERIFY_16:
+ do_verify = 1;
+ case WRITE_6:
+ case WRITE_10:
+ case WRITE_12:
+ case WRITE_16:
+ length = scsi_get_out_length(cmd);
+ write_buf = scsi_get_out_buffer(cmd);
+write:
+ ret = glfs_pwrite(gfd, write_buf, length, offset, lu->bsoflags);
+
+ if (ret == length) {
+ struct mode_pg *pg;
+
+ /*
+ * it would be better not to access to pg
+ * directy.
+ */
+ pg = find_mode_page(cmd->dev, 0x08, 0);
+ if (pg == NULL) {
+ result = SAM_STAT_CHECK_CONDITION;
+ key = ILLEGAL_REQUEST;
+ asc = ASC_INVALID_FIELD_IN_CDB;
+ break;
+ }
+ if (((cmd->scb[0] != WRITE_6) && (cmd->scb[1] & 0x8)) ||
+ !(pg->mode_data[0] & 0x04))
+ glfs_fdatasync(gfd);
+ } else
+ set_medium_error(&result, &key, &asc);
+
+ if (do_verify)
+ goto verify;
+ break;
+ case WRITE_SAME:
+ case WRITE_SAME_16:
+ /* WRITE_SAME used to punch hole in file */
+ if (cmd->scb[1] & 0x08) {
+ ret = bs_glfs_discard(gfd, offset, tl);
+ if (ret != 0) {
+ eprintf("Failed to punch hole for "
+ "WRITE_SAME command\n");
+ result = SAM_STAT_CHECK_CONDITION;
+ key = HARDWARE_ERROR;
+ asc = ASC_INTERNAL_TGT_FAILURE;
+ break;
+ }
+ break;
+ }
+ while (tl > 0) {
+ blocksize = 1 << cmd->dev->blk_shift;
+ tmpbuf = scsi_get_out_buffer(cmd);
+
+ switch (cmd->scb[1] & 0x06) {
+ case 0x02: /* PBDATA==0 LBDATA==1 */
+ put_unaligned_be32(offset, tmpbuf);
+ break;
+ case 0x04: /* PBDATA==1 LBDATA==0 */
+ /* physical sector format */
+ put_unaligned_be64(offset, tmpbuf);
+ break;
+ }
+
+ ret = glfs_pwrite(gfd, tmpbuf, blocksize,
+ offset, lu->bsoflags);
+
+ if (ret != blocksize)
+ set_medium_error(&result, &key, &asc);
+
+ offset += blocksize;
+ tl -= blocksize;
+ }
+ break;
+ case READ_6:
+ case READ_10:
+ case READ_12:
+ case READ_16:
+ length = scsi_get_in_length(cmd);
+ ret = glfs_pread(gfd, scsi_get_in_buffer(cmd),
+ length, offset, SEEK_SET);
+
+ if (ret != length) {
+ eprintf("Error on read %x %x", ret, length);
+ set_medium_error(&result, &key, &asc);
+ }
+ break;
+ case PRE_FETCH_10:
+ case PRE_FETCH_16:
+ if (ret != 0)
+ set_medium_error(&result, &key, &asc);
+ break;
+ case VERIFY_10:
+ case VERIFY_12:
+ case VERIFY_16:
+verify:
+ length = scsi_get_out_length(cmd);
+
+ tmpbuf = malloc(length);
+ if (!tmpbuf) {
+ result = SAM_STAT_CHECK_CONDITION;
+ key = HARDWARE_ERROR;
+ asc = ASC_INTERNAL_TGT_FAILURE;
+ break;
+ }
+
+ ret = glfs_pread(gfd, tmpbuf, length, offset, lu->bsoflags);
+
+ if (ret != length)
+ set_medium_error(&result, &key, &asc);
+ else if (memcmp(scsi_get_out_buffer(cmd), tmpbuf, length)) {
+ result = SAM_STAT_CHECK_CONDITION;
+ key = MISCOMPARE;
+ asc = ASC_MISCOMPARE_DURING_VERIFY_OPERATION;
+ }
+
+ free(tmpbuf);
+ break;
+ case UNMAP:
+ if (!cmd->dev->attrs.thinprovisioning) {
+ result = SAM_STAT_CHECK_CONDITION;
+ key = ILLEGAL_REQUEST;
+ asc = ASC_INVALID_FIELD_IN_CDB;
+ break;
+ }
+
+ length = scsi_get_out_length(cmd);
+ tmpbuf = scsi_get_out_buffer(cmd);
+
+ if (length < 8)
+ break;
+
+ length -= 8;
+ tmpbuf += 8;
+
+ while (length >= 16) {
+ offset = get_unaligned_be64(&tmpbuf[0]);
+ offset = offset << cmd->dev->blk_shift;
+
+ tl = get_unaligned_be32(&tmpbuf[8]);
+ tl = tl << cmd->dev->blk_shift;
+
+ if (offset + tl > cmd->dev->size) {
+ eprintf("UNMAP beyond EOF\n");
+ result = SAM_STAT_CHECK_CONDITION;
+ key = ILLEGAL_REQUEST;
+ asc = ASC_LBA_OUT_OF_RANGE;
+ break;
+ }
+
+ if (tl > 0) {
+ if (bs_glfs_discard(gfd, offset, tl) != 0) {
+ eprintf("Failed to punch hole for"
+ " UNMAP at offset:%" PRIu64
+ " length:%d\n",
+ offset, tl);
+ result = SAM_STAT_CHECK_CONDITION;
+ key = HARDWARE_ERROR;
+ asc = ASC_INTERNAL_TGT_FAILURE;
+ break;
+ }
+ }
+
+ length -= 16;
+ tmpbuf += 16;
+ }
+ break;
+ default:
+ break;
+ }
+
+ dprintf("io done %p %x %d %u\n", cmd, cmd->scb[0], ret, length);
+
+ scsi_set_result(cmd, result);
+
+ if (result != SAM_STAT_GOOD) {
+ eprintf("io error %p %x %x %d %d %" PRIu64 ", %m\n",
+ cmd, result, cmd->scb[0], ret, length, offset);
+ sense_data_build(cmd, key, asc);
+ }
+}
+
+static void parse_imagepath(char *image, char **server, char **vol, char **path)
+{
+ char *origp = strdup(image);
+ char *p, *sep;
+
+ p = origp;
+ sep = strchr(p, '@');
+ if (sep == NULL) {
+ *server = "";
+ } else {
+ *sep = '\0';
+ *server = strdup(p);
+ p = sep + 1;
+ }
+ sep = strchr(p, ':');
+ if (sep == NULL) {
+ *vol = "";
+ } else {
+ *vol = strdup(sep + 1);
+ *sep = '\0';
+ }
+
+ /* p points to path\0 */
+ *path = strdup(p);
+ free(origp);
+}
+
+static int bs_glfs_open(struct scsi_lu *lu, char *image, int *fd,
+ uint64_t *size)
+{
+ int ret = 0;
+ char *servername;
+ char *volname;
+ char *pathname;
+ int bsoflags = ALLOWED_BSOFLAGS;
+ glfs_t *fs = 0;
+
+ parse_imagepath(image, &volname, &pathname, &servername);
+
+ if (volname && servername && pathname) {
+ glfs_fd_t *gfd = NULL;
+ struct stat st;
+
+ fs = glfs_new(volname);
+ if (!fs)
+ goto fail;
+
+ ret = glfs_set_volfile_server(fs, "tcp", servername, GLUSTER_PORT);
+
+ ret = glfs_init(fs);
+ if (ret)
+ goto fail;
+
+ GFSP(lu)->fs = fs;
+
+ if (lu->bsoflags)
+ bsoflags = lu->bsoflags;
+
+ gfd = glfs_open(fs, pathname, bsoflags);
+ if (gfd == NULL)
+ goto fail;
+
+ ret = glfs_lstat(fs, pathname, &st);
+ if (ret)
+ goto fail;
+
+ GFSP(lu)->gfd = gfd;
+
+ *size = (long) st.st_size;
+
+ if (GFSP(lu)->logfile)
+ glfs_set_logging(fs, GFSP(lu)->logfile,
+ GFSP(lu)->loglevel);
+
+ return 0;
+ }
+fail:
+ if (fs)
+ glfs_fini(fs);
+
+ return -EIO;
+}
+
+static void bs_glfs_close(struct scsi_lu *lu)
+{
+ if (GFSP(lu)->gfd)
+ glfs_close(GFSP(lu)->gfd);
+
+ if (GFSP(lu)->gfd)
+ glfs_fini(GFSP(lu)->fs);
+}
+
+static char *slurp_to_semi(char **p)
+{
+ char *end = index(*p, ';');
+ char *ret;
+ int len;
+
+ if (end == NULL)
+ end = *p + strlen(*p);
+ len = end - *p;
+ ret = malloc(len + 1);
+ strncpy(ret, *p, len);
+ ret[len] = '\0';
+ *p = end;
+ /* Jump past the semicolon, if we stopped at one */
+ if (**p == ';')
+ *p = end + 1;
+ return ret;
+}
+
+static char *slurp_value(char **p)
+{
+ char *equal = index(*p, '=');
+ if (equal) {
+ *p = equal + 1;
+ return slurp_to_semi(p);
+ } else {
+ return NULL;
+ }
+}
+
+static int is_opt(const char *opt, char *p)
+{
+ int ret = 0;
+ if ((strncmp(p, opt, strlen(opt)) == 0) &&
+ (p[strlen(opt)] == '=')) {
+ ret = 1;
+ }
+ return ret;
+}
+
+static tgtadm_err bs_glfs_init(struct scsi_lu *lu, char *bsopts)
+{
+ struct bs_thread_info *info = BS_THREAD_I(lu);
+ char *logfile = NULL;
+ int loglevel = 0;
+ char *sloglevel;
+
+ while (bsopts && strlen(bsopts)) {
+ if (is_opt("logfile", bsopts))
+ logfile = slurp_value(&bsopts);
+ else if (is_opt("loglevel", bsopts)) {
+ sloglevel = slurp_value(&bsopts);
+ loglevel = atoi(sloglevel);
+ }
+ }
+
+ GFSP(lu)->logfile = logfile;
+ GFSP(lu)->loglevel = loglevel;
+
+ return bs_thread_open(info, bs_glfs_request, nr_iothreads);
+}
+
+static void bs_glfs_exit(struct scsi_lu *lu)
+{
+ struct bs_thread_info *info = BS_THREAD_I(lu);
+
+ if (GFSP(lu)->gfd)
+ glfs_close(GFSP(lu)->gfd);
+
+ if (GFSP(lu)->fs)
+ glfs_fini(GFSP(lu)->fs);
+
+ bs_thread_close(info);
+}
+
+static struct backingstore_template glfs_bst = {
+ .bs_name = "glfs",
+ .bs_datasize = sizeof(struct active_glfs) +
+ sizeof(struct bs_thread_info),
+ .bs_open = bs_glfs_open,
+ .bs_close = bs_glfs_close,
+ .bs_init = bs_glfs_init,
+ .bs_exit = bs_glfs_exit,
+ .bs_cmd_submit = bs_thread_cmd_submit,
+ .bs_oflags_supported = ALLOWED_BSOFLAGS
+};
+
+void register_bs_module(void)
+{
+ register_backingstore_template(&glfs_bst);
+}
--
1.8.5.3
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
More information about the stgt
mailing list