Commit 74c45052 authored by Jens Axboe

blk-mq: add a 'list' parameter to ->queue_rq()

Since we have the notion of a 'last' request in a chain, we can use
this to have the hardware optimize the issuing of requests. Add
a list_head parameter to queue_rq that the driver can use to
temporarily store hw commands for issue when 'last' is true. If we
are doing a chain of requests, pass in a NULL list for the first
request to force issue of that immediately, then batch the remainder
for deferred issue until the last request has been sent.

Instead of adding yet another argument to the hot ->queue_rq path,
encapsulate the passed arguments in a blk_mq_queue_data structure.
This is passed as a constant, and has been tested as faster than
passing 4 (or even 3) args through ->queue_rq. Update drivers for
the new ->queue_rq() prototype. There are no functional changes
in this patch for drivers - if they don't use the passed in list,
then they will just queue requests individually like before.
Signed-off-by: Jens Axboe
parent 34b48db6
......@@ -680,6 +680,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
struct request_queue *q = hctx->queue;
struct request *rq;
struct list_head *dptr;
int queued;
WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
......@@ -705,17 +707,28 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
* Start off with dptr being NULL, so we start the first request
* immediately, even if we have more pending.
dptr = NULL;
* Now process all the entries, sending them to the driver.
queued = 0;
while (!list_empty(&rq_list)) {
struct blk_mq_queue_data bd;
int ret;
rq = list_first_entry(&rq_list, struct request, queuelist);
ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
bd.rq = rq;
bd.list = dptr;
bd.last = list_empty(&rq_list);
ret = q->mq_ops->queue_rq(hctx, &bd);
switch (ret) {
......@@ -734,6 +747,13 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
if (ret == BLK_MQ_RQ_QUEUE_BUSY)
* We've done the first request. If we have more than 1
* left in the list, set dptr to defer issue.
if (!dptr && != rq_list.prev)
dptr = &driver_list;
if (!queued)
......@@ -1153,6 +1173,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
if (is_sync) {
struct blk_mq_queue_data bd = {
.rq = rq,
.list = NULL,
.last = 1
int ret;
blk_mq_bio_to_request(rq, bio);
......@@ -1162,7 +1187,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
* error (busy), just add it to our list as we previously
* would have done
ret = q->mq_ops->queue_rq(data.hctx, rq, true);
ret = q->mq_ops->queue_rq(data.hctx, &bd);
if (ret == BLK_MQ_RQ_QUEUE_OK)
goto done;
else {
......@@ -3775,9 +3775,10 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
return false;
static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool last)
static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
struct request *rq = bd->rq;
int ret;
if (unlikely(mtip_check_unal_depth(hctx, rq)))
......@@ -313,15 +313,15 @@ static void null_request_fn(struct request_queue *q)
static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool last)
static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
cmd->rq = rq;
cmd->rq = bd->rq;
cmd->nq = hctx->driver_data;
......@@ -158,10 +158,11 @@ static void virtblk_done(struct virtqueue *vq)
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
bool last)
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
struct virtio_blk *vblk = hctx->queue->queuedata;
struct request *req = bd->rq;
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
unsigned long flags;
unsigned int num;
......@@ -222,7 +223,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
notify = true;
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
......@@ -1858,9 +1858,10 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
bool last)
static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
struct request *req = bd->rq;
struct request_queue *q = req->q;
struct scsi_device *sdev = q->queuedata;
struct Scsi_Host *shost = sdev->host;
......@@ -79,7 +79,13 @@ struct blk_mq_tag_set {
struct list_head tag_list;
typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool);
struct blk_mq_queue_data {
struct request *rq;
struct list_head *list;
bool last;
typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
