Commit c4232630 authored by Quinn Tran's avatar Quinn Tran Committed by Martin K. Petersen
Browse files

scsi: qla2xxx: Fix abort in bsg timeout



Current abort of bsg on timeout prematurely clears the
outstanding_cmds[]. Abort does not allow FW to return the IOCB/SRB. In
addition, bsg_job_done() is not called to return the BSG (i.e. leak).

Abort the outstanding bsg/SRB and wait for the completion. The
completion IOCB will wake up the bsg_timeout thread. If abort is not
successful, then driver will forcibly call bsg_job_done() and free the
srb.

Err Inject:

 - qaucli -z
 - assign CT Passthru IOCB's NportHandle with another initiator
   nport handle to trigger timeout.  Remote port will drop CT request.
 - bsg_job_done is properly called as part of cleanup

kernel: qla2xxx [0000:21:00.1]-7012:7: qla2x00_process_ct : 286 : Error Inject.
kernel: qla2xxx [0000:21:00.1]-7016:7: bsg rqst type: FC_BSG_HST_CT else type: 101 - loop-id=1 portid=fffffa.
kernel: qla2xxx [0000:21:00.1]-70bb:7: qla24xx_bsg_timeout CMD timeout. bsg ptr ffff9971a42f0838 msgcode 80000004 vendor cmd fa010000
kernel: qla2xxx [0000:21:00.1]-507c:7: Abort command issued - hdl=4b, type=5
kernel: qla2xxx [0000:21:00.1]-5040:7: ELS-CT pass-through-ct pass-through error hdl=4b comp_status-status=0x5 error subcode 1=0x0 error subcode 2=0xaf882e80.
kernel: qla2xxx [0000:21:00.1]-7009:7: qla2x00_bsg_job_done: sp hdl 4b, result=70000 bsg ptr ffff9971a42f0838
kernel: qla2xxx [0000:21:00.1]-802c:7: Aborting bsg ffff9971a42f0838 sp=ffff99760b87ba80 handle=4b rval=0
kernel: qla2xxx [0000:21:00.1]-708a:7: bsg abort success. bsg ffff9971a42f0838 sp=ffff99760b87ba80 handle=0x4b
kernel: qla2xxx [0000:21:00.1]-7012:7: qla2x00_process_ct : 286 : Error Inject.
kernel: qla2xxx [0000:21:00.1]-7016:7: bsg rqst type: FC_BSG_HST_CT else type: 101 - loop-id=1 portid=fffffa.
kernel: qla2xxx [0000:21:00.1]-70bb:7: qla24xx_bsg_timeout CMD timeout. bsg ptr ffff9971a42f43b8 msgcode 80000004 vendor cmd fa010000
kernel: qla2xxx [0000:21:00.1]-7012:7: qla_bsg_found : 2206 : Error Inject 2.
kernel: qla2xxx [0000:21:00.1]-802c:7: Aborting bsg ffff9971a42f43b8 sp=ffff99762c304440 handle=5e rval=5
kernel: qla2xxx [0000:21:00.1]-704f:7: bsg abort fail.  bsg=ffff9971a42f43b8 sp=ffff99762c304440 rval=5.
kernel: qla2xxx [0000:21:00.1]-7051:7: qla_bsg_found bsg_job_done : bsg ffff9971a42f43b8 result 0xfffffffa sp ffff99762c304440.

Cc: stable@vger.kernel.org
Fixes: c449b419 ("scsi: qla2xxx: Use QP lock to search for bsg")
Signed-off-by: default avatarQuinn Tran <qutran@marvell.com>
Signed-off-by: default avatarNilesh Javali <njavali@marvell.com>
Link: https://lore.kernel.org/r/20241115130313.46826-2-njavali@marvell.com


Reviewed-by: default avatarHimanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 0deb37c2
Loading
Loading
Loading
Loading
+92 −22
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ void qla2x00_bsg_job_done(srb_t *sp, int res)
{
	struct bsg_job *bsg_job = sp->u.bsg_job;
	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
	struct completion *comp = sp->comp;

	ql_dbg(ql_dbg_user, sp->vha, 0x7009,
	    "%s: sp hdl %x, result=%x bsg ptr %p\n",
@@ -35,6 +36,9 @@ void qla2x00_bsg_job_done(srb_t *sp, int res)
	bsg_reply->result = res;
	bsg_job_done(bsg_job, bsg_reply->result,
		       bsg_reply->reply_payload_rcv_len);

	if (comp)
		complete(comp);
}

void qla2x00_bsg_sp_free(srb_t *sp)
@@ -3061,7 +3065,7 @@ qla24xx_bsg_request(struct bsg_job *bsg_job)

static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job)
{
	bool found = false;
	bool found, do_bsg_done;
	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
	scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
	struct qla_hw_data *ha = vha->hw;
@@ -3069,6 +3073,11 @@ static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job)
	int cnt;
	unsigned long flags;
	struct req_que *req;
	int rval;
	DECLARE_COMPLETION_ONSTACK(comp);
	uint32_t ratov_j;

	found = do_bsg_done = false;

	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
	req = qpair->req;
@@ -3080,42 +3089,104 @@ static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job)
		     sp->type == SRB_ELS_CMD_HST ||
		     sp->type == SRB_ELS_CMD_HST_NOLOGIN) &&
		    sp->u.bsg_job == bsg_job) {
			req->outstanding_cmds[cnt] = NULL;

			found = true;
			sp->comp = &comp;
			break;
		}
	}
	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);

			if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) {
				ql_log(ql_log_warn, vha, 0x7089,
						"mbx abort_command failed.\n");
				bsg_reply->result = -EIO;
	if (!found)
		return false;

	if (ha->flags.eeh_busy) {
		/* skip over abort.  EEH handling will return the bsg. Wait for it */
		rval = QLA_SUCCESS;
		ql_dbg(ql_dbg_user, vha, 0x802c,
			"eeh encounter. bsg %p sp=%p handle=%x \n",
			bsg_job, sp, sp->handle);
	} else {
		rval = ha->isp_ops->abort_command(sp);
		ql_dbg(ql_dbg_user, vha, 0x802c,
			"Aborting bsg %p sp=%p handle=%x rval=%x\n",
			bsg_job, sp, sp->handle, rval);
	}

	switch (rval) {
	case QLA_SUCCESS:
		/* Wait for the command completion. */
		ratov_j = ha->r_a_tov / 10 * 4 * 1000;
		ratov_j = msecs_to_jiffies(ratov_j);

		if (!wait_for_completion_timeout(&comp, ratov_j)) {
			ql_log(ql_log_info, vha, 0x7089,
				"bsg abort timeout.  bsg=%p sp=%p handle %#x .\n",
				bsg_job, sp, sp->handle);

			do_bsg_done = true;
		} else {
			/* fw had returned the bsg */
			ql_dbg(ql_dbg_user, vha, 0x708a,
						"mbx abort_command success.\n");
				bsg_reply->result = 0;
				"bsg abort success. bsg %p sp=%p handle=%#x\n",
				bsg_job, sp, sp->handle);
			do_bsg_done = false;
		}
			/* ref: INIT */
			kref_put(&sp->cmd_kref, qla2x00_sp_release);
		break;
	default:
		ql_log(ql_log_info, vha, 0x704f,
			"bsg abort fail.  bsg=%p sp=%p rval=%x.\n",
			bsg_job, sp, rval);

			found = true;
			goto done;
		do_bsg_done = true;
		break;
	}

	if (!do_bsg_done)
		return true;

	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
	/*
	 * recheck to make sure it's still the same bsg_job due to
	 * qp_lock_ptr was released earlier.
	 */
	if (req->outstanding_cmds[cnt] &&
	    req->outstanding_cmds[cnt]->u.bsg_job != bsg_job) {
		/* fw had returned the bsg */
		spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
		return true;
	}
	req->outstanding_cmds[cnt] = NULL;
	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);

done:
	return found;
	/* ref: INIT */
	sp->comp = NULL;
	kref_put(&sp->cmd_kref, qla2x00_sp_release);
	bsg_reply->result = -ENXIO;
	bsg_reply->reply_payload_rcv_len = 0;

	ql_dbg(ql_dbg_user, vha, 0x7051,
	       "%s bsg_job_done : bsg %p result %#x sp %p.\n",
	       __func__, bsg_job, bsg_reply->result, sp);

	bsg_job_done(bsg_job, bsg_reply->result, bsg_reply->reply_payload_rcv_len);

	return true;
}

int
qla24xx_bsg_timeout(struct bsg_job *bsg_job)
{
	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
	struct fc_bsg_request *bsg_request = bsg_job->request;
	scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
	struct qla_hw_data *ha = vha->hw;
	int i;
	struct qla_qpair *qpair;

	ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n",
	    __func__, bsg_job);
	ql_log(ql_log_info, vha, 0x708b,
	       "%s CMD timeout. bsg ptr %p msgcode %x vendor cmd %x\n",
	       __func__, bsg_job, bsg_request->msgcode,
	       bsg_request->rqst_data.h_vendor.vendor_cmd[0]);

	if (qla2x00_isp_reg_stat(ha)) {
		ql_log(ql_log_info, vha, 0x9007,
@@ -3136,7 +3207,6 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
	}

	ql_log(ql_log_info, vha, 0x708b, "SRB not found to abort.\n");
	bsg_reply->result = -ENXIO;

done:
	return 0;