[Stgt-devel] [PATCH] bidirectional

Pete Wyckoff pw
Mon Mar 5 21:38:14 CET 2007


Make bidirectional transfers work.  Also updates iscsi code to use
new kernel data structures for extended CDBs in Panasas patch.

Signed-off-by: Pete Wyckoff <pw at osc.edu>
---
 usr/iscsi/iscsid.c |  234 ++++++++++++++++++++++++++++++++++++++++------------
 usr/iscsi/iscsid.h |   22 +++--
 2 files changed, 193 insertions(+), 63 deletions(-)

diff --git a/usr/iscsi/iscsid.c b/usr/iscsi/iscsid.c
index 9c326dc..1cbb2c3 100644
--- a/usr/iscsi/iscsid.c
+++ b/usr/iscsi/iscsid.c
@@ -739,10 +739,15 @@ static void cmnd_finish(struct iscsi_connection *conn)
 	}
 }
 
+/*
+ * Send the final command response.  For successful (and non-bidirectional)
+ * tasks that return data, this packet is not required.
+ */
 static int iscsi_cmd_rsp_build(struct iscsi_task *task)
 {
 	struct iscsi_connection *conn = task->conn;
 	struct iscsi_cmd_rsp *rsp = (struct iscsi_cmd_rsp *) &conn->rsp.bhs;
+	uint32_t residual;
 
 	dprintf("%p %x\n", task, task->scmd->scb[0]);
 
@@ -756,6 +761,31 @@ static int iscsi_cmd_rsp_build(struct iscsi_task *task)
 	rsp->exp_cmdsn = cpu_to_be32(conn->session->exp_cmd_sn);
 	rsp->max_cmdsn = cpu_to_be32(conn->session->exp_cmd_sn + MAX_QUEUE_CMD);
 
+	/* we never have write under/over flow, no way to signal that
+	 * back from the target currently. */
+
+	residual = 0;
+	if (task->dir == BIDIRECTIONAL) {
+		if (task->len < task->read_len) {
+			rsp->flags |= ISCSI_FLAG_CMD_BIDI_UNDERFLOW;
+			residual = task->read_len - task->len;
+		} else if (task->len > task->read_len) {
+			rsp->flags |= ISCSI_FLAG_CMD_BIDI_OVERFLOW;
+			residual = task->len - task->read_len;
+		}
+		rsp->bi_residual_count = cpu_to_be32(residual);
+		rsp->residual_count = 0;
+	} else {
+		if (task->len < task->read_len) {
+			rsp->flags |= ISCSI_FLAG_CMD_UNDERFLOW;
+			residual = task->read_len - task->len;
+		} else if (task->len > task->read_len) {
+			rsp->flags |= ISCSI_FLAG_CMD_OVERFLOW;
+			residual = task->len - task->read_len;
+		}
+		rsp->residual_count = cpu_to_be32(residual);
+	}
+
 	return 0;
 }
 
@@ -770,6 +800,9 @@ static int iscsi_sense_rsp_build(struct iscsi_task *task)
 	struct iscsi_cmd_rsp *rsp = (struct iscsi_cmd_rsp *) &conn->rsp.bhs;
 	struct iscsi_sense_data *sense;
 	unsigned char sense_len;
+ 	uint32_t residual;
+
+	dprintf("%p %x %d\n", task, task->scmd->scb[0], task->scmd->sense_len);
 
 	memset(rsp, 0, sizeof(*rsp));
 	rsp->opcode = ISCSI_OP_SCSI_CMD_RSP;
@@ -777,6 +810,34 @@ static int iscsi_sense_rsp_build(struct iscsi_task *task)
 	rsp->flags = ISCSI_FLAG_CMD_FINAL;
 	rsp->response = ISCSI_STATUS_CMD_COMPLETED;
 	rsp->cmd_status = SAM_STAT_CHECK_CONDITION;
+	rsp->statsn = cpu_to_be32(conn->stat_sn++);
+	rsp->exp_cmdsn = cpu_to_be32(conn->session->exp_cmd_sn);
+	rsp->max_cmdsn = cpu_to_be32(conn->session->exp_cmd_sn + MAX_QUEUE_CMD);
+
+	/* we never have write under/over flow, no way to signal that
+	 * back from the target currently. */
+
+	residual = 0;
+	if (task->dir == BIDIRECTIONAL) {
+		if (task->len < task->read_len) {
+			rsp->flags |= ISCSI_FLAG_CMD_BIDI_UNDERFLOW;
+			residual = task->read_len - task->len;
+		} else if (task->len > task->read_len) {
+			rsp->flags |= ISCSI_FLAG_CMD_BIDI_OVERFLOW;
+			residual = task->len - task->read_len;
+		}
+		rsp->bi_residual_count = cpu_to_be32(residual);
+		rsp->residual_count = 0;
+	} else {
+		if (task->len < task->read_len) {
+			rsp->flags |= ISCSI_FLAG_CMD_UNDERFLOW;
+			residual = task->read_len - task->len;
+		} else if (task->len > task->read_len) {
+			rsp->flags |= ISCSI_FLAG_CMD_OVERFLOW;
+			residual = task->len - task->read_len;
+		}
+		rsp->residual_count = cpu_to_be32(residual);
+	}
 
 	sense = (struct iscsi_sense_data *)task->scmd->sense_buffer;
 	sense_len = task->scmd->sense_len;
@@ -791,23 +852,25 @@ static int iscsi_sense_rsp_build(struct iscsi_task *task)
 	return 0;
 }
 
+/*
+ * Send a data-in PDU.  If status was 0, collapse the response message
+ * into the last data-in PDU.
+ */
 static int iscsi_data_rsp_build(struct iscsi_task *task)
 {
 	struct iscsi_connection *conn = task->conn;
 	struct iscsi_data_rsp *rsp = (struct iscsi_data_rsp *) &conn->rsp.bhs;
-	struct iscsi_cmd *req = (struct iscsi_cmd *) &task->req;
-	int residual, datalen, exp_datalen = ntohl(req->data_length);
+	int datalen, exp_datalen = task->read_len;
 	int max_burst = conn->session_param[ISCSI_PARAM_MAX_XMIT_DLENGTH].val;
+ 	uint32_t residual;
 
 	memset(rsp, 0, sizeof(*rsp));
 	rsp->opcode = ISCSI_OP_SCSI_DATA_IN;
 	rsp->itt = task->tag;
 	rsp->ttt = cpu_to_be32(ISCSI_RESERVED_TAG);
-	rsp->cmd_status = ISCSI_STATUS_CMD_COMPLETED;
 
 	rsp->offset = cpu_to_be32(task->offset);
-	rsp->datasn = cpu_to_be32(task->data_sn++);
-	rsp->cmd_status = task->result;
+	rsp->datasn = cpu_to_be32(task->exp_r2tsn++);
 
 	datalen = min(exp_datalen, task->len);
 	datalen -= task->offset;
@@ -815,21 +878,26 @@ static int iscsi_data_rsp_build(struct iscsi_task *task)
 	dprintf("%d %d %d %d %x\n", datalen, exp_datalen, task->len, max_burst, rsp->itt);
 
 	if (datalen <= max_burst) {
-		rsp->flags = ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS;
-		if (task->len < exp_datalen) {
-			rsp->flags |= ISCSI_FLAG_CMD_UNDERFLOW;
-			residual = exp_datalen - task->len;
-		} else if (task->len > exp_datalen) {
-			rsp->flags |= ISCSI_FLAG_CMD_OVERFLOW;
-			residual = task->len - exp_datalen;
-		} else
-			residual = 0;
-		rsp->residual_count = cpu_to_be32(residual);
+		rsp->flags = ISCSI_FLAG_CMD_FINAL;
+
+		/* collapse status into final packet if successful */
+		if (task->result == 0 && task->dir != BIDIRECTIONAL) {
+			rsp->flags |= ISCSI_FLAG_DATA_STATUS;
+			if (task->len < exp_datalen) {
+				rsp->flags |= ISCSI_FLAG_CMD_UNDERFLOW;
+				residual = exp_datalen - task->len;
+			} else if (task->len > exp_datalen) {
+				rsp->flags |= ISCSI_FLAG_CMD_OVERFLOW;
+				residual = task->len - exp_datalen;
+			} else
+				residual = 0;
+			rsp->cmd_status = task->result;
+			rsp->statsn = cpu_to_be32(conn->stat_sn++);
+			rsp->residual_count = cpu_to_be32(residual);
+		}
 	} else
 		datalen = max_burst;
 
-	if (rsp->flags & ISCSI_FLAG_CMD_FINAL)
-		rsp->statsn = cpu_to_be32(conn->stat_sn++);
 	rsp->exp_cmdsn = cpu_to_be32(conn->session->exp_cmd_sn);
 	rsp->max_cmdsn = cpu_to_be32(conn->session->exp_cmd_sn + MAX_QUEUE_CMD);
 
@@ -975,30 +1043,46 @@ static int cmd_attr(struct iscsi_task *task)
 	return attr;
 }
 
+/*
+ * Try to execute the command.  Called when the initial scsi command PDU
+ * arrives and after each finished data burst.  If there is still data-out
+ * unreceived, put back on the queue to wait until that is done.
+ */
 static int iscsi_scsi_cmd_execute(struct iscsi_task *task)
 {
 	struct iscsi_connection *conn = task->conn;
 	struct iscsi_cmd *req = (struct iscsi_cmd *) &task->req;
 	unsigned long uaddr = (unsigned long) task->data;
 	uint8_t rw = req->flags & ISCSI_FLAG_CMD_WRITE;
-	uint8_t *cdb, cdbbuf[260];
+	uint32_t data_len;
+	uint8_t *cdb, cdbbuf[260], *ahs, ahslen;
 	int cdblen;
 	int err = 0;
 
-	if (rw && task->r2t_count) {
-		if (!task->unsol_count)
+	/* wait for more data; if unsol, initiator will send without r2t */
+	if (task->r2t_count) {
+		if (!task->waiting_unsol)
 			list_add_tail(&task->c_list, &task->conn->tx_clist);
 		goto noqueue;
 	}
 
+	task->offset = 0;  /* for use as transmit pointer for data-ins */
+
+	/* build full cdb for target, possibly using ahs */
 	cdb = req->cdb;
 	cdblen = 16;
-	if (req->hlength) {
+	ahs = task->ahs;
+	ahslen = req->hlength * 4;
+	if (ahslen >= 4) {
 		/* concatenate extended cdb */
-		struct iscsi_ecdb_ahdr *ahs_extcdb = task->ahs;
+		struct iscsi_ecdb_ahdr *ahs_extcdb = (void *) ahs;
 		if (ahs_extcdb->ahstype == ISCSI_AHSTYPE_CDB) {
 			int extlen = ntohs(ahs_extcdb->ahslength) - 1;
 			dprintf("extcdb len %d\n", extlen);
+			if (4 + extlen > ahslen) {
+				err = -EINVAL;
+				goto noqueue;
+			}
 			if (extlen + 16 > sizeof(cdbbuf)) {
 				err = -ENOMEM;
 				goto noqueue;
@@ -1007,13 +1091,40 @@ static int iscsi_scsi_cmd_execute(struct iscsi_task *task)
 			memcpy(cdbbuf + 16, ahs_extcdb->ecdb, extlen);
 			cdb = cdbbuf;
 			cdblen = 16 + extlen;
+
+			/* advance pointers for possible bidi */
+			ahs += 4 + extlen;
+			ahslen -= 4 + extlen;
 		}
 	}
 
+	/* figure out incoming (write) and outgoing (read) sizes */
+	data_len = 0;
+	task->write_len = 0;
+	if (task->dir == WRITE || task->dir == BIDIRECTIONAL) {
+		task->write_len = ntohl(req->data_length);
+		data_len = task->write_len;
+	}
+	task->read_len = 0;
+	if (task->dir == BIDIRECTIONAL && ahslen >= 8) {
+		struct iscsi_rlength_ahdr *ahs_bidi = (void *) ahs;
+		if (ahs_bidi->ahstype == ISCSI_AHSTYPE_RLENGTH) {
+			task->read_len = ntohl(ahs_bidi->read_length);
+			dprintf("bidi read len %u\n", task->read_len);
+		}
+	}
+	if (task->dir == READ) {
+		task->read_len = ntohl(req->data_length);
+		data_len = task->read_len;
+	}
+
+	/*
+	 * When done, target will call iscsi_scsi_cmd_done with
+	 * addr and len of data-in, as well as sense.
+	 */
 	err = target_cmd_queue(conn->session->iscsi_nexus_id,
 			       cdb, cdblen, rw, uaddr, req->lun,
-			       ntohl(req->data_length),
-			       cmd_attr(task), req->itt);
+			       data_len, cmd_attr(task), req->itt);
 
 noqueue:
 	tgt_event_modify(conn->fd, EPOLLIN|EPOLLOUT);
@@ -1103,6 +1214,17 @@ static int iscsi_task_execute(struct iscsi_task *task)
 		tgt_event_modify(task->conn->fd, EPOLLIN | EPOLLOUT);
 		break;
 	case ISCSI_OP_SCSI_CMD:
+		/* convenient directionality for our internal use */
+		if (hdr->flags & ISCSI_FLAG_CMD_READ) {
+			if (hdr->flags & ISCSI_FLAG_CMD_WRITE)
+				task->dir = BIDIRECTIONAL;
+			else
+				task->dir = READ;
+		} else if (hdr->flags & ISCSI_FLAG_CMD_WRITE) {
+			task->dir = WRITE;
+		} else
+			task->dir = NONE;
+
 		err = iscsi_scsi_cmd_execute(task);
 		break;
 	case ISCSI_OP_SCSI_TMFUNC:
@@ -1128,12 +1250,14 @@ static int iscsi_data_out_rx_done(struct iscsi_task *task)
 	int err = 0;
 
 	if (hdr->ttt == cpu_to_be32(ISCSI_RESERVED_TAG)) {
+		/* unsolicited data, accumulate until final */
 		if (hdr->flags & ISCSI_FLAG_CMD_FINAL) {
-			task->unsol_count = 0;
+			task->waiting_unsol = 0;
 			if (!task_pending(task))
 				err = iscsi_scsi_cmd_execute(task);
 		}
 	} else {
+		/* response to a r2t we sent */
 		if (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))
 			return err;
 
@@ -1160,7 +1284,7 @@ found:
 		task->r2t_count,
 		ntoh24(req->dlength), be32_to_cpu(req->offset));
 
-	conn->rx_buffer = (void *) (unsigned long) task->c_buffer;
+	conn->rx_buffer = task->c_buffer;
 	conn->rx_buffer += be32_to_cpu(req->offset);
 	conn->rx_size = ntoh24(req->dlength);
 
@@ -1183,6 +1307,7 @@ static int iscsi_task_queue(struct iscsi_task *task)
 	dprintf("%x %x %x\n", be32_to_cpu(req->statsn), session->exp_cmd_sn,
 		req->opcode);
 
+	/* immediate live outside of the CmdSN space */
 	if (req->opcode & ISCSI_OP_IMMEDIATE)
 		return iscsi_task_execute(task);
 
@@ -1251,9 +1376,9 @@ static int iscsi_scsi_cmd_rx_start(struct iscsi_connection *conn)
 	dlen = ntoh24(req->dlength);  /* just part in this PDU */
 	tot_data_length = ntohl(req->data_length);  /* all data */
 
-	dprintf("%u %x %d %d %x task %p lens %d %d %d\n", conn->session->tsih,
+	dprintf("%u %x %d %d task %p lens %d %d %d\n", conn->session->tsih,
 		req->cdb[0], ntohl(req->data_length),
-		req->flags & ISCSI_FLAG_CMD_ATTR_MASK, req->itt,
+		req->flags & ISCSI_FLAG_CMD_ATTR_MASK,
 		task, ahslen, dlen, tot_data_length);
 
 	/*
@@ -1284,11 +1409,11 @@ static int iscsi_scsi_cmd_rx_start(struct iscsi_connection *conn)
 
 	if (req->flags & ISCSI_FLAG_CMD_WRITE) {
 		task->r2t_count = tot_data_length - dlen;  /* bytes left */
-		task->unsol_count = !(req->flags & ISCSI_FLAG_CMD_FINAL);
+		task->waiting_unsol = !(req->flags & ISCSI_FLAG_CMD_FINAL);
 		task->offset = dlen;
 
 		dprintf("%d %d %d %d\n", conn->rx_size, task->r2t_count,
-			task->unsol_count, task->offset);
+			task->waiting_unsol, task->offset);
 	}
 
 	list_add(&task->c_hlist, &conn->session->cmd_list);
@@ -1411,30 +1536,23 @@ static int iscsi_task_rx_start(struct iscsi_connection *conn)
 	return 0;
 }
 
+/*
+ * Send something.  Could be a data-in PDU or a response.  In theory
+ * this can happen while we are still waiting data-out, but in this
+ * implementation, all data-out is received first.
+ */
 static int iscsi_scsi_cmd_tx_start(struct iscsi_task *task)
 {
 	int err = 0;
-	struct iscsi_cmd *req = (struct iscsi_cmd *) &task->req;
 
 	if (task->r2t_count)
-		err = iscsi_r2t_build(task);
-	else {
-		/* Needs to clean up this mess. */
-
-		if (req->flags & ISCSI_FLAG_CMD_WRITE)
-			if (task->result)
-				err = iscsi_sense_rsp_build(task);
-			else
-				err = iscsi_cmd_rsp_build(task);
-		else {
-			if (task->result)
-				err = iscsi_sense_rsp_build(task);
-			else if (task->len)
-				err = iscsi_data_rsp_build(task);
-			else
-				err = iscsi_cmd_rsp_build(task);
-		}
-	}
+		err = iscsi_r2t_build(task);  /* still receiving data-out */
+	else if (task->offset < task->len)
+		err = iscsi_data_rsp_build(task);  /* sending data-in */
+	else if (task->result)
+		err = iscsi_sense_rsp_build(task);  /* final, status nonzero */
+	else
+		err = iscsi_cmd_rsp_build(task);  /* final response */
 
 	return err;
 }
@@ -1502,6 +1620,10 @@ static int iscsi_tm_tx_start(struct iscsi_task *task)
 	return 0;
 }
 
+/*
+ * Look at the response we just sent and figure out if there is anything
+ * more to do.
+ */
 static int iscsi_scsi_cmd_tx_done(struct iscsi_connection *conn)
 {
 	struct iscsi_hdr *hdr = &conn->rsp.bhs;
@@ -1511,8 +1633,9 @@ static int iscsi_scsi_cmd_tx_done(struct iscsi_connection *conn)
 	case ISCSI_OP_R2T:
 		break;
 	case ISCSI_OP_SCSI_DATA_IN:
-		if (!(hdr->flags & ISCSI_FLAG_CMD_FINAL)) {
-			dprintf("more data %x\n", hdr->itt);
+		if (task->offset < task->len || task->result != 0
+		   || task->dir == BIDIRECTIONAL) {
+			dprintf("more data or sense or bidir %x\n", hdr->itt);
 			list_add_tail(&task->c_list, &task->conn->tx_clist);
 			return 0;
 		}
@@ -1545,6 +1668,10 @@ static int iscsi_task_tx_done(struct iscsi_connection *conn)
 	return 0;
 }
 
+/*
+ * Pick a task that wants to transmit and switch the connection to
+ * begin the transmit state machine.
+ */
 static int iscsi_task_tx_start(struct iscsi_connection *conn)
 {
 	struct iscsi_task *task;
@@ -1556,10 +1683,9 @@ static int iscsi_task_tx_start(struct iscsi_connection *conn)
 	conn_write_pdu(conn);
 
 	task = list_entry(conn->tx_clist.next, struct iscsi_task, c_list);
-	dprintf("found a task %" PRIx64 " %u %u %u\n", task->tag,
+	dprintf("found a task %" PRIx64 " %u %u\n", task->tag,
 		ntohl(((struct iscsi_cmd *) (&task->req))->data_length),
-		task->offset,
-		task->r2t_count);
+		task->offset);
 
 	list_del(&task->c_list);
 
diff --git a/usr/iscsi/iscsid.h b/usr/iscsi/iscsid.h
index ea09967..3ae416e 100644
--- a/usr/iscsi/iscsid.h
+++ b/usr/iscsi/iscsid.h
@@ -31,12 +31,12 @@
 #define DIGEST_CRC32C           (1 << 1)
 
 #define sid64(isid, tsih)					\
-({								\
+(								\
 	(uint64_t) isid[0] <<  0 | (uint64_t) isid[1] <<  8 |	\
 	(uint64_t) isid[2] << 16 | (uint64_t) isid[3] << 24 |	\
 	(uint64_t) isid[4] << 32 | (uint64_t) isid[5] << 40 |	\
-	(uint64_t) tsih << 48;					\
-})
+	(uint64_t) tsih << 48					\
+)
 
 #define sid_to_tsih(sid) ((sid) >> 48)
 
@@ -98,18 +98,21 @@ struct iscsi_task {
 	struct list_head c_list;
 
 	unsigned long flags;
+	enum { NONE, WRITE, READ, BIDIRECTIONAL } dir;
+	uint32_t write_len;  /* from command pdu, write and read lengths */
+	uint32_t read_len;
 
 	uint64_t addr;
 	int result;
 	int len;
 	int rw;
 
-	int offset;
-	int data_sn;
-
-	int r2t_count;
-	int unsol_count;
-	int exp_r2tsn;
+ 	int offset;  /* progress in data buffer for rx or tx */
+ 
+ 	int r2t_count;      /* bytes to arrive in unsol data and to solicit */
+ 	int waiting_unsol;  /* bool, waiting for unsolicited data */
+ 
+ 	int exp_r2tsn;  /* next R2T or Data SN target should generate */
 
 	void *c_buffer;
 	void *ahs;  /* these point into c_buffer, parts after bhs */
@@ -145,6 +148,7 @@ struct iscsi_connection {
 	uint32_t stat_sn;
 	uint32_t exp_stat_sn;
 
+	/* these should be session-wide, not per-connection */
 	uint32_t cmd_sn;
 	uint32_t exp_cmd_sn;
 	uint32_t max_cmd_sn;
-- 
1.5.0.2




More information about the stgt mailing list