[stgt] [PATCH 1/4] iser, new implementation

Alexander Nezhinsky alexandern at Voltaire.COM
Wed Oct 13 15:31:24 CEST 2010


Signed-off-by: Alexander Nezhinsky <alexandern at voltaire.com>
---
 usr/iscsi/iser.h |  271 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 271 insertions(+), 0 deletions(-)
 create mode 100644 usr/iscsi/iser.h

diff --git a/usr/iscsi/iser.h b/usr/iscsi/iser.h
new file mode 100644
index 0000000..45f42a2
--- /dev/null
+++ b/usr/iscsi/iser.h
@@ -0,0 +1,271 @@
+/*
+ * Copyright (C) 2007 Dennis Dalessandro (dennis at osc.edu)
+ * Copyright (C) 2007 Ananth Devulapalli (ananth at osc.edu)
+ * Copyright (C) 2007 Pete Wyckoff (pw at osc.edu)
+ * Copyright (C) 2010 Voltaire, Inc. All rights reserved.
+ * Copyright (C) 2010 Alexander Nezhinsky (alexandern at voltaire.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+#ifndef ISER_H
+#define ISER_H
+
+#include "iscsid.h"
+
+/*
+ * The IB-extended version from the kernel.  Stags and VAs are in
+ * big-endian format.
+ */
+struct iser_hdr {
+	uint8_t   flags;
+	uint8_t   rsvd[3];
+	uint32_t  write_stag; /* write rkey */
+	uint64_t  write_va;
+	uint32_t  read_stag;  /* read rkey */
+	uint64_t  read_va;
+} __attribute__((packed));
+
+#define ISER_WSV	(0x08)
+#define ISER_RSV	(0x04)
+#define ISCSI_CTRL      (0x10)
+#define ISER_HELLO      (0x20)
+#define ISER_HELLORPLY  (0x30)
+
+struct iser_conn;
+
+enum iser_ib_op_code {
+	ISER_IB_RECV,
+	ISER_IB_SEND,
+	ISER_IB_RDMA_WRITE,
+	ISER_IB_RDMA_READ,
+};
+
+#define ISER_HDRS_SZ (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
+
+/*
+ * Work requests are either posted Receives for control messages,
+ * or Send and RDMA ops (also considered "send" by IB)
+ * They have different work request descriptors.
+ * During a flush, we need to know the type of op and the
+ * task to which it is related.
+ */
+struct iser_work_req {
+	struct list_head wr_list;
+	struct iser_task *task;
+	enum iser_ib_op_code iser_ib_op;
+	struct ibv_sge sge;
+	union {
+		struct ibv_recv_wr recv_wr;
+		struct ibv_send_wr send_wr;
+	};
+};
+
+/*
+ * Pre-registered memory.  Buffers are allocated by iscsi from us, handed
+ * to device to fill, then iser can send them directly without registration.
+ * Also for write path.
+ */
+struct iser_membuf {
+	void *addr;
+	unsigned size;
+	unsigned offset; /* offset within task data */
+	struct list_head task_list;
+	int rdma;
+	struct list_head pool_list;
+};
+
+struct iser_pdu {
+	struct iser_hdr *iser_hdr;
+	struct iscsi_hdr *bhs;
+	unsigned int ahssize;
+	void *ahs;
+	/* pdu data only, original buffer is reflected in ibv_sge */
+	struct iser_membuf membuf;
+};
+
+/*
+ * Each SCSI command may have its own RDMA parameters.  These appear on
+ * the connection then later are assigned to the particular task to be
+ * used when the target responds.
+ */
+struct iser_task {
+	struct iser_conn *conn;
+
+	struct iser_pdu pdu;
+
+	struct iser_work_req rxd;
+	struct iser_work_req txd;
+	struct iser_work_req rdmad;
+
+	int opcode;
+	int is_immediate;
+	int is_read;
+	int is_write;
+	int unsolicited;
+
+	uint64_t tag;
+	uint32_t cmd_sn;
+
+	unsigned long flags;
+
+	int in_len;
+	int out_len;
+
+	int unsol_sz;
+	int unsol_remains;
+	int rdma_rd_sz;
+	int rdma_rd_remains;
+	/* int rdma_rd_offset; // ToDo: multiple RDMA-Write buffers */
+	int rdma_wr_sz;
+	int rdma_wr_remains;
+
+	/* read and write from the initiator's point of view */
+	uint32_t rem_read_stag, rem_write_stag;
+	uint64_t rem_read_va, rem_write_va;
+
+	struct list_head in_buf_list;
+	int in_buf_num;
+
+	struct list_head out_buf_list;
+	int out_buf_num;
+
+	struct list_head exec_list;
+	struct list_head rdma_list;
+	struct list_head tx_list;
+	struct list_head recv_list;
+
+	/* linked to session->cmd_list */
+	struct list_head session_list;
+
+	struct list_head dout_task_list;
+
+	int result;
+	struct scsi_cmd scmd;
+
+	unsigned long extdata[0];
+};
+
+struct iser_device;
+
+enum iser_login_phase
+{
+	LOGIN_PHASE_INIT,
+	LOGIN_PHASE_START,      /* keep 1 send spot and 1 recv posted */
+	LOGIN_PHASE_LAST_SEND,  /* need 1 more send before ff */
+	LOGIN_PHASE_FF, 	/* full feature */
+
+	NUM_LOGIN_PHASE_VALS
+};
+
+/*
+ * Parallels iscsi_connection.  Adds more fields for iser.
+ */
+struct iser_conn {
+	struct iscsi_connection h;
+
+	struct ibv_qp *qp_hndl;
+	struct rdma_cm_id *cm_id;
+	struct iser_device *dev;
+
+	struct event_data sched_buf_alloc;
+	struct list_head buf_alloc_list;
+
+	struct event_data sched_rdma_rd;
+	struct list_head rdma_rd_list;
+
+	struct event_data sched_iosubmit;
+	struct list_head iosubmit_list;
+
+	struct event_data sched_tx;
+	struct list_head resp_tx_list;
+
+	struct list_head sent_list;
+
+	struct event_data sched_post_recv;
+	struct list_head post_recv_list;
+
+	struct event_data sched_conn_free;
+
+	struct sockaddr_storage peer_addr;  /* initiator address */
+	char *peer_name;
+	struct sockaddr_storage self_addr;  /* target address */
+	char *self_name;
+
+	unsigned int ssize, rsize, max_outst_pdu;
+
+	/* FF resources */
+	int ff_res_alloc;
+	void *task_pool; /* iser_task structures */
+	void *pdu_data_pool; /* memory for pdu, non-rdma send/recv */
+	struct ibv_mr *pdu_data_mr;   /* memory registration for pdu data */
+	struct iser_task *nop_in_task;
+	struct iser_task *text_tx_task;
+
+	enum iser_login_phase login_phase;
+
+	/* login phase resources, freed at full-feature */
+	int login_res_alloc;
+	void *login_task_pool;
+	void *login_data_pool;
+	struct ibv_mr *login_data_mr;
+	struct iser_task *login_rx_task;
+	struct iser_task *login_tx_task;
+
+	/* list of all iser conns */
+	struct list_head conn_list;
+};
+
+static inline struct iser_conn *ISER_CONN(struct iscsi_connection *iscsi_conn)
+{
+       return container_of(iscsi_conn, struct iser_conn, h);
+}
+
+/*
+ * Shared variables for a particular device.  The conn[] array will
+ * have to be broken out when multiple device support is added, maybe with
+ * a pointer into this "device" struct.
+ */
+struct iser_device {
+	struct list_head list;
+	struct ibv_context *ibv_ctxt;
+	struct ibv_pd *pd;
+	struct ibv_cq *cq;
+	struct ibv_comp_channel *cq_channel;
+	struct ibv_device_attr device_attr;
+
+	/* membuf registered buffer, list area, handle */
+	void *membuf_regbuf;
+	void *membuf_listbuf;
+	struct ibv_mr *membuf_mr;
+	int waiting_for_mem;
+
+	struct event_data poll_sched;
+
+	/* free and allocated membuf entries */
+	struct list_head membuf_free, membuf_alloc;
+};
+
+void iser_login_exec(struct iscsi_connection *iscsi_conn,
+		     struct iser_pdu *rx_pdu,
+		     struct iser_pdu *tx_pdu);
+int iser_login_complete(struct iscsi_connection *iscsi_conn);
+int iser_text_exec(struct iscsi_connection *iscsi_conn,
+		   struct iser_pdu *rx_pdu,
+		   struct iser_pdu *tx_pdu);
+
+void iser_conn_close(struct iser_conn *conn);
+
+#endif  /* ISER_H */
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



More information about the stgt mailing list