Commit 73c10101 authored by Jens Axboe's avatar Jens Axboe

block: initial patch for on-stack per-task plugging

This patch adds support for creating a queuing context outside
of the queue itself. This enables us to batch up pieces of IO
before grabbing the block device queue lock and submitting them to
the IO scheduler.

The context is created on the stack of the process and assigned in
the task structure, so that we can auto-unplug it if we hit a schedule
event.

The current queue plugging happens implicitly if IO is submitted to
an empty device, yet callers have to remember to unplug that IO when
they are going to wait for it. This is an ugly API and has caused bugs
in the past. Additionally, it requires hacks in the vm (->sync_page()
callback) to handle that logic. By switching to an explicit plugging
scheme we make the API a lot nicer and can get rid of the ->sync_page()
hack in the vm.
Signed-off-by: default avatarJens Axboe <jaxboe@fusionio.com>
parent a488e749
This diff is collapsed.
......@@ -264,10 +264,9 @@ static bool blk_kick_flush(struct request_queue *q)
static void flush_data_end_io(struct request *rq, int error)
{
struct request_queue *q = rq->q;
bool was_empty = elv_queue_empty(q);
/* after populating an empty queue, kick it to avoid stall */
if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty)
if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
__blk_run_queue(q);
}
......
......@@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
}
EXPORT_SYMBOL(elv_rq_merge_ok);
static inline int elv_try_merge(struct request *__rq, struct bio *bio)
int elv_try_merge(struct request *__rq, struct bio *bio)
{
int ret = ELEVATOR_NO_MERGE;
......@@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
struct list_head *entry;
int stop_flags;
BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
if (q->last_merge == rq)
q->last_merge = NULL;
......@@ -696,6 +698,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
int plug)
{
BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
if (rq->cmd_flags & REQ_SOFTBARRIER) {
/* barriers are scheduling boundary, update end_sector */
if (rq->cmd_type == REQ_TYPE_FS ||
......
......@@ -152,6 +152,7 @@ enum rq_flag_bits {
__REQ_IO_STAT, /* account I/O stat */
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
__REQ_ON_PLUG, /* on plug list */
__REQ_NR_BITS, /* stops here */
};
......@@ -193,5 +194,6 @@ enum rq_flag_bits {
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
#define REQ_SECURE (1 << __REQ_SECURE)
#define REQ_ON_PLUG (1 << __REQ_ON_PLUG)
#endif /* __LINUX_BLK_TYPES_H */
......@@ -871,6 +871,31 @@ struct request_queue *blk_alloc_queue(gfp_t);
struct request_queue *blk_alloc_queue_node(gfp_t, int);
extern void blk_put_queue(struct request_queue *);
struct blk_plug {
unsigned long magic;
struct list_head list;
unsigned int should_sort;
};
extern void blk_start_plug(struct blk_plug *);
extern void blk_finish_plug(struct blk_plug *);
extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
static inline void blk_flush_plug(struct task_struct *tsk)
{
struct blk_plug *plug = tsk->plug;
if (unlikely(plug))
__blk_flush_plug(tsk, plug);
}
static inline bool blk_needs_flush_plug(struct task_struct *tsk)
{
struct blk_plug *plug = tsk->plug;
return plug && !list_empty(&plug->list);
}
/*
* tag stuff
*/
......@@ -1294,6 +1319,23 @@ static inline long nr_blockdev_pages(void)
return 0;
}
static inline void blk_start_plug(struct list_head *list)
{
}
static inline void blk_finish_plug(struct list_head *list)
{
}
static inline void blk_flush_plug(struct task_struct *tsk)
{
}
static inline bool blk_needs_flush_plug(struct task_struct *tsk)
{
return false;
}
#endif /* CONFIG_BLOCK */
#endif
......@@ -105,6 +105,7 @@ extern void elv_add_request(struct request_queue *, struct request *, int, int);
extern void __elv_add_request(struct request_queue *, struct request *, int, int);
extern void elv_insert(struct request_queue *, struct request *, int);
extern int elv_merge(struct request_queue *, struct request **, struct bio *);
extern int elv_try_merge(struct request *, struct bio *);
extern void elv_merge_requests(struct request_queue *, struct request *,
struct request *);
extern void elv_merged_request(struct request_queue *, struct request *, int);
......
......@@ -99,6 +99,7 @@ struct robust_list_head;
struct bio_list;
struct fs_struct;
struct perf_event_context;
struct blk_plug;
/*
* List of flags we want to share for kernel threads,
......@@ -1429,6 +1430,11 @@ struct task_struct {
/* stacked block device info */
struct bio_list *bio_list;
#ifdef CONFIG_BLOCK
/* stack plugging */
struct blk_plug *plug;
#endif
/* VM state */
struct reclaim_state *reclaim_state;
......
......@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
profile_task_exit(tsk);
WARN_ON(atomic_read(&tsk->fs_excl));
WARN_ON(blk_needs_flush_plug(tsk));
if (unlikely(in_interrupt()))
panic("Aiee, killing interrupt handler!");
......
......@@ -1204,6 +1204,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
#ifdef CONFIG_FUTEX
p->robust_list = NULL;
#ifdef CONFIG_COMPAT
......
......@@ -3978,6 +3978,16 @@ need_resched_nonpreemptible:
switch_count = &prev->nvcsw;
}
/*
* If we are going to sleep and we have plugged IO queued, make
* sure to submit it to avoid deadlocks.
*/
if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
raw_spin_unlock(&rq->lock);
blk_flush_plug(prev);
raw_spin_lock(&rq->lock);
}
pre_schedule(rq, prev);
if (unlikely(!rq->nr_running))
......@@ -5333,6 +5343,7 @@ void __sched io_schedule(void)
delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
blk_flush_plug(current);
current->in_iowait = 1;
schedule();
current->in_iowait = 0;
......@@ -5348,6 +5359,7 @@ long __sched io_schedule_timeout(long timeout)
delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
blk_flush_plug(current);
current->in_iowait = 1;
ret = schedule_timeout(timeout);
current->in_iowait = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment