cfq-iosched.c 53.6 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6
/*
 *  CFQ, or complete fairness queueing, disk scheduler.
 *
 *  Based on ideas from a previously unfinished io
 *  scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
 *
7
 *  Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
Linus Torvalds's avatar
Linus Torvalds committed
8 9
 */
#include <linux/module.h>
Al Viro's avatar
Al Viro committed
10 11
#include <linux/blkdev.h>
#include <linux/elevator.h>
Linus Torvalds's avatar
Linus Torvalds committed
12 13
#include <linux/hash.h>
#include <linux/rbtree.h>
14
#include <linux/ioprio.h>
Linus Torvalds's avatar
Linus Torvalds committed
15 16 17 18

/*
 * tunables
 */
19 20 21 22
static const int cfq_quantum = 4;		/* max queue in one round of service */
static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
static const int cfq_back_max = 16 * 1024;	/* maximum backwards seek, in KiB */
static const int cfq_back_penalty = 2;		/* penalty of a backwards seek */
Linus Torvalds's avatar
Linus Torvalds committed
23

24
static const int cfq_slice_sync = HZ / 10;
Jens Axboe's avatar
Jens Axboe committed
25
static int cfq_slice_async = HZ / 25;
26
static const int cfq_slice_async_rq = 2;
27
static int cfq_slice_idle = HZ / 125;
28

29 30 31
/*
 * grace period before allowing idle class to get disk access
 */
32
#define CFQ_IDLE_GRACE		(HZ / 10)
33 34 35 36 37 38

/*
 * below this threshold, we consider thinktime immediate
 */
#define CFQ_MIN_TT		(2)

39 40 41 42
#define CFQ_SLICE_SCALE		(5)

#define CFQ_KEY_ASYNC		(0)

Linus Torvalds's avatar
Linus Torvalds committed
43 44 45 46 47 48
/*
 * for the hash of cfqq inside the cfqd
 */
#define CFQ_QHASH_SHIFT		6
#define CFQ_QHASH_ENTRIES	(1 << CFQ_QHASH_SHIFT)

Jens Axboe's avatar
Jens Axboe committed
49 50
#define RQ_CIC(rq)		((struct cfq_io_context*)(rq)->elevator_private)
#define RQ_CFQQ(rq)		((rq)->elevator_private2)
Linus Torvalds's avatar
Linus Torvalds committed
51

52 53
static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool;
Linus Torvalds's avatar
Linus Torvalds committed
54

55
static DEFINE_PER_CPU(unsigned long, ioc_count);
56 57
static struct completion *ioc_gone;

58 59 60 61
#define CFQ_PRIO_LISTS		IOPRIO_BE_NR
#define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
#define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)

Jens Axboe's avatar
Jens Axboe committed
62 63 64
#define ASYNC			(0)
#define SYNC			(1)

Jens Axboe's avatar
Jens Axboe committed
65
#define cfq_cfqq_sync(cfqq)	((cfqq)->key != CFQ_KEY_ASYNC)
66

67 68
#define sample_valid(samples)	((samples) > 80)

69 70 71 72 73 74 75 76 77 78 79 80
/*
 * Most of our rbtree usage is for sorting with min extraction, so
 * if we cache the leftmost node we don't have to walk down the tree
 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
 * move this into the elevator for the rq sorting as well.
 */
struct cfq_rb_root {
	struct rb_root rb;
	struct rb_node *left;
};
#define CFQ_RB_ROOT	(struct cfq_rb_root) { RB_ROOT, NULL, }

81 82 83
/*
 * Per block device queue structure
 */
Linus Torvalds's avatar
Linus Torvalds committed
84
struct cfq_data {
85 86 87 88 89
	request_queue_t *queue;

	/*
	 * rr list of queues with requests and the count of them
	 */
90
	struct cfq_rb_root service_tree;
91 92 93 94 95
	unsigned int busy_queues;

	/*
	 * cfqq lookup hash
	 */
Linus Torvalds's avatar
Linus Torvalds committed
96 97
	struct hlist_head *cfq_hash;

98
	int rq_in_driver;
99
	int sync_flight;
100
	int hw_tag;
Linus Torvalds's avatar
Linus Torvalds committed
101

102 103 104 105 106
	/*
	 * idle window management
	 */
	struct timer_list idle_slice_timer;
	struct work_struct unplug_work;
Linus Torvalds's avatar
Linus Torvalds committed
107

108 109 110 111
	struct cfq_queue *active_queue;
	struct cfq_io_context *active_cic;

	struct timer_list idle_class_timer;
Linus Torvalds's avatar
Linus Torvalds committed
112

Jens Axboe's avatar
Jens Axboe committed
113
	sector_t last_position;
114
	unsigned long last_end_request;
Linus Torvalds's avatar
Linus Torvalds committed
115 116 117 118 119

	/*
	 * tunables, see top of file
	 */
	unsigned int cfq_quantum;
120
	unsigned int cfq_fifo_expire[2];
Linus Torvalds's avatar
Linus Torvalds committed
121 122
	unsigned int cfq_back_penalty;
	unsigned int cfq_back_max;
123 124 125
	unsigned int cfq_slice[2];
	unsigned int cfq_slice_async_rq;
	unsigned int cfq_slice_idle;
126 127

	struct list_head cic_list;
Jens Axboe's avatar
Jens Axboe committed
128 129 130

	sector_t new_seek_mean;
	u64 new_seek_total;
Linus Torvalds's avatar
Linus Torvalds committed
131 132
};

133 134 135
/*
 * Per process-grouping structure
 */
Linus Torvalds's avatar
Linus Torvalds committed
136 137 138 139 140
struct cfq_queue {
	/* reference count */
	atomic_t ref;
	/* parent cfq_data */
	struct cfq_data *cfqd;
141
	/* cfqq lookup hash */
Linus Torvalds's avatar
Linus Torvalds committed
142 143
	struct hlist_node cfq_hash;
	/* hash key */
144
	unsigned int key;
145 146 147 148
	/* service_tree member */
	struct rb_node rb_node;
	/* service_tree key */
	unsigned long rb_key;
Linus Torvalds's avatar
Linus Torvalds committed
149 150 151
	/* sorted list of pending requests */
	struct rb_root sort_list;
	/* if fifo isn't expired, next request to serve */
Jens Axboe's avatar
Jens Axboe committed
152
	struct request *next_rq;
Linus Torvalds's avatar
Linus Torvalds committed
153 154 155 156
	/* requests queued in sort_list */
	int queued[2];
	/* currently allocated requests */
	int allocated[2];
157 158
	/* pending metadata requests */
	int meta_pending;
Linus Torvalds's avatar
Linus Torvalds committed
159
	/* fifo list of requests in sort_list */
160
	struct list_head fifo;
Linus Torvalds's avatar
Linus Torvalds committed
161

162
	unsigned long slice_end;
163
	long slice_resid;
Linus Torvalds's avatar
Linus Torvalds committed
164

Jens Axboe's avatar
Jens Axboe committed
165 166
	/* number of requests that are on the dispatch list or inside driver */
	int dispatched;
167 168 169 170 171

	/* io prio of this group */
	unsigned short ioprio, org_ioprio;
	unsigned short ioprio_class, org_ioprio_class;

Jens Axboe's avatar
Jens Axboe committed
172 173
	/* various state flags, see below */
	unsigned int flags;
Jens Axboe's avatar
Jens Axboe committed
174 175

	sector_t last_request_pos;
Linus Torvalds's avatar
Linus Torvalds committed
176 177
};

Jens Axboe's avatar
Jens Axboe committed
178
enum cfqq_state_flags {
179 180 181 182 183 184 185 186 187
	CFQ_CFQQ_FLAG_on_rr = 0,	/* on round-robin busy list */
	CFQ_CFQQ_FLAG_wait_request,	/* waiting for a request */
	CFQ_CFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
	CFQ_CFQQ_FLAG_must_alloc_slice,	/* per-slice must_alloc flag */
	CFQ_CFQQ_FLAG_must_dispatch,	/* must dispatch, even if expired */
	CFQ_CFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
	CFQ_CFQQ_FLAG_idle_window,	/* slice idling enabled */
	CFQ_CFQQ_FLAG_prio_changed,	/* task priority has changed */
	CFQ_CFQQ_FLAG_queue_new,	/* queue never been serviced */
188
	CFQ_CFQQ_FLAG_slice_new,	/* no requests dispatched in slice */
Jens Axboe's avatar
Jens Axboe committed
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
};

#define CFQ_CFQQ_FNS(name)						\
static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq)		\
{									\
	cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name);			\
}									\
static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq)	\
{									\
	cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name);			\
}									\
static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq)		\
{									\
	return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0;	\
}

CFQ_CFQQ_FNS(on_rr);
CFQ_CFQQ_FNS(wait_request);
CFQ_CFQQ_FNS(must_alloc);
CFQ_CFQQ_FNS(must_alloc_slice);
CFQ_CFQQ_FNS(must_dispatch);
CFQ_CFQQ_FNS(fifo_expire);
CFQ_CFQQ_FNS(idle_window);
CFQ_CFQQ_FNS(prio_changed);
213
CFQ_CFQQ_FNS(queue_new);
214
CFQ_CFQQ_FNS(slice_new);
Jens Axboe's avatar
Jens Axboe committed
215 216 217
#undef CFQ_CFQQ_FNS

static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
Jens Axboe's avatar
Jens Axboe committed
218
static void cfq_dispatch_insert(request_queue_t *, struct request *);
219
static struct cfq_queue *cfq_get_queue(struct cfq_data *, unsigned int, struct task_struct *, gfp_t);
Linus Torvalds's avatar
Linus Torvalds committed
220

Andrew Morton's avatar
Andrew Morton committed
221 222 223 224 225 226
/*
 * scheduler run of queue, if there are requests pending and no one in the
 * driver that will restart queueing
 */
static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
{
227
	if (cfqd->busy_queues)
Andrew Morton's avatar
Andrew Morton committed
228 229 230 231 232 233 234
		kblockd_schedule_work(&cfqd->unplug_work);
}

static int cfq_queue_empty(request_queue_t *q)
{
	struct cfq_data *cfqd = q->elevator->elevator_data;

235
	return !cfqd->busy_queues;
Andrew Morton's avatar
Andrew Morton committed
236 237
}

238
static inline pid_t cfq_queue_pid(struct task_struct *task, int rw, int is_sync)
239
{
240 241 242 243
	/*
	 * Use the per-process queue, for read requests and syncronous writes
	 */
	if (!(rw & REQ_RW) || is_sync)
244 245 246 247 248
		return task->pid;

	return CFQ_KEY_ASYNC;
}

249 250 251 252 253
/*
 * Scale schedule slice based on io priority. Use the sync time slice only
 * if a queue is marked sync and has sync io queued. A sync queue with async
 * io only, should not get full sync slice length.
 */
254 255
static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync,
				 unsigned short prio)
256
{
257
	const int base_slice = cfqd->cfq_slice[sync];
258

259 260 261 262
	WARN_ON(prio >= IOPRIO_BE_NR);

	return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
}
263

264 265 266 267
static inline int
cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
}

static inline void
cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
}

/*
 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end
 * isn't valid until the first request from the dispatch is activated
 * and the slice time set.
 */
static inline int cfq_slice_used(struct cfq_queue *cfqq)
{
	if (cfq_cfqq_slice_new(cfqq))
		return 0;
	if (time_before(jiffies, cfqq->slice_end))
		return 0;

	return 1;
}

Linus Torvalds's avatar
Linus Torvalds committed
291
/*
Jens Axboe's avatar
Jens Axboe committed
292
 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
Linus Torvalds's avatar
Linus Torvalds committed
293
 * We choose the request that is closest to the head right now. Distance
294
 * behind the head is penalized and only allowed to a certain extent.
Linus Torvalds's avatar
Linus Torvalds committed
295
 */
Jens Axboe's avatar
Jens Axboe committed
296 297
static struct request *
cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
Linus Torvalds's avatar
Linus Torvalds committed
298 299 300
{
	sector_t last, s1, s2, d1 = 0, d2 = 0;
	unsigned long back_max;
301 302 303
#define CFQ_RQ1_WRAP	0x01 /* request 1 wraps */
#define CFQ_RQ2_WRAP	0x02 /* request 2 wraps */
	unsigned wrap = 0; /* bit mask: requests behind the disk head? */
Linus Torvalds's avatar
Linus Torvalds committed
304

Jens Axboe's avatar
Jens Axboe committed
305 306 307 308
	if (rq1 == NULL || rq1 == rq2)
		return rq2;
	if (rq2 == NULL)
		return rq1;
309

Jens Axboe's avatar
Jens Axboe committed
310 311 312 313
	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
		return rq1;
	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
		return rq2;
314 315 316 317
	if (rq_is_meta(rq1) && !rq_is_meta(rq2))
		return rq1;
	else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
		return rq2;
Linus Torvalds's avatar
Linus Torvalds committed
318

Jens Axboe's avatar
Jens Axboe committed
319 320
	s1 = rq1->sector;
	s2 = rq2->sector;
Linus Torvalds's avatar
Linus Torvalds committed
321

Jens Axboe's avatar
Jens Axboe committed
322
	last = cfqd->last_position;
Linus Torvalds's avatar
Linus Torvalds committed
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338

	/*
	 * by definition, 1KiB is 2 sectors
	 */
	back_max = cfqd->cfq_back_max * 2;

	/*
	 * Strict one way elevator _except_ in the case where we allow
	 * short backward seeks which are biased as twice the cost of a
	 * similar forward seek.
	 */
	if (s1 >= last)
		d1 = s1 - last;
	else if (s1 + back_max >= last)
		d1 = (last - s1) * cfqd->cfq_back_penalty;
	else
339
		wrap |= CFQ_RQ1_WRAP;
Linus Torvalds's avatar
Linus Torvalds committed
340 341 342 343 344 345

	if (s2 >= last)
		d2 = s2 - last;
	else if (s2 + back_max >= last)
		d2 = (last - s2) * cfqd->cfq_back_penalty;
	else
346
		wrap |= CFQ_RQ2_WRAP;
Linus Torvalds's avatar
Linus Torvalds committed
347 348

	/* Found required data */
349 350 351 352 353 354

	/*
	 * By doing switch() on the bit mask "wrap" we avoid having to
	 * check two variables for all permutations: --> faster!
	 */
	switch (wrap) {
Jens Axboe's avatar
Jens Axboe committed
355
	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
356
		if (d1 < d2)
Jens Axboe's avatar
Jens Axboe committed
357
			return rq1;
358
		else if (d2 < d1)
Jens Axboe's avatar
Jens Axboe committed
359
			return rq2;
360 361
		else {
			if (s1 >= s2)
Jens Axboe's avatar
Jens Axboe committed
362
				return rq1;
363
			else
Jens Axboe's avatar
Jens Axboe committed
364
				return rq2;
365
		}
Linus Torvalds's avatar
Linus Torvalds committed
366

367
	case CFQ_RQ2_WRAP:
Jens Axboe's avatar
Jens Axboe committed
368
		return rq1;
369
	case CFQ_RQ1_WRAP:
Jens Axboe's avatar
Jens Axboe committed
370 371
		return rq2;
	case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
372 373 374 375 376 377 378 379
	default:
		/*
		 * Since both rqs are wrapped,
		 * start with the one that's further behind head
		 * (--> only *one* back seek required),
		 * since back seek takes more time than forward.
		 */
		if (s1 <= s2)
Jens Axboe's avatar
Jens Axboe committed
380
			return rq1;
Linus Torvalds's avatar
Linus Torvalds committed
381
		else
Jens Axboe's avatar
Jens Axboe committed
382
			return rq2;
Linus Torvalds's avatar
Linus Torvalds committed
383 384 385
	}
}

386 387 388
/*
 * The below is leftmost cache rbtree addon
 */
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
static struct rb_node *cfq_rb_first(struct cfq_rb_root *root)
{
	if (!root->left)
		root->left = rb_first(&root->rb);

	return root->left;
}

static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
{
	if (root->left == n)
		root->left = NULL;

	rb_erase(n, &root->rb);
	RB_CLEAR_NODE(n);
}

Linus Torvalds's avatar
Linus Torvalds committed
406 407 408
/*
 * would be nice to take fifo expire time into account as well
 */
Jens Axboe's avatar
Jens Axboe committed
409 410 411
static struct request *
cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
		  struct request *last)
Linus Torvalds's avatar
Linus Torvalds committed
412
{
413 414
	struct rb_node *rbnext = rb_next(&last->rb_node);
	struct rb_node *rbprev = rb_prev(&last->rb_node);
Jens Axboe's avatar
Jens Axboe committed
415
	struct request *next = NULL, *prev = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
416

417
	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
Linus Torvalds's avatar
Linus Torvalds committed
418 419

	if (rbprev)
Jens Axboe's avatar
Jens Axboe committed
420
		prev = rb_entry_rq(rbprev);
Linus Torvalds's avatar
Linus Torvalds committed
421

422
	if (rbnext)
Jens Axboe's avatar
Jens Axboe committed
423
		next = rb_entry_rq(rbnext);
424 425 426
	else {
		rbnext = rb_first(&cfqq->sort_list);
		if (rbnext && rbnext != &last->rb_node)
Jens Axboe's avatar
Jens Axboe committed
427
			next = rb_entry_rq(rbnext);
428
	}
Linus Torvalds's avatar
Linus Torvalds committed
429

430
	return cfq_choose_req(cfqd, next, prev);
Linus Torvalds's avatar
Linus Torvalds committed
431 432
}

433 434
static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
				      struct cfq_queue *cfqq)
Linus Torvalds's avatar
Linus Torvalds committed
435
{
436 437 438
	/*
	 * just an approximation, should be ok.
	 */
439 440
	return (cfqd->busy_queues - 1) * (cfq_prio_slice(cfqd, 1, 0) -
		       cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
441 442
}

443 444 445 446 447
/*
 * The cfqd->service_tree holds all pending cfq_queue's that have
 * requests waiting to be processed. It is sorted in the order that
 * we will service the queues.
 */
448
static void cfq_service_tree_add(struct cfq_data *cfqd,
449
				    struct cfq_queue *cfqq, int add_front)
450
{
451
	struct rb_node **p = &cfqd->service_tree.rb.rb_node;
452 453
	struct rb_node *parent = NULL;
	unsigned long rb_key;
454
	int left;
455

456 457 458 459 460 461
	if (!add_front) {
		rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
		rb_key += cfqq->slice_resid;
		cfqq->slice_resid = 0;
	} else
		rb_key = 0;
Linus Torvalds's avatar
Linus Torvalds committed
462

463
	if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
464
		/*
465
		 * same position, nothing more to do
466
		 */
467 468
		if (rb_key == cfqq->rb_key)
			return;
Linus Torvalds's avatar
Linus Torvalds committed
469

470
		cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
Linus Torvalds's avatar
Linus Torvalds committed
471
	}
472

473
	left = 1;
474
	while (*p) {
475
		struct cfq_queue *__cfqq;
476
		struct rb_node **n;
477

478 479 480
		parent = *p;
		__cfqq = rb_entry(parent, struct cfq_queue, rb_node);

481 482
		/*
		 * sort RT queues first, we always want to give
483 484
		 * preference to them. IDLE queues goes to the back.
		 * after that, sort on the next service time.
485 486
		 */
		if (cfq_class_rt(cfqq) > cfq_class_rt(__cfqq))
487
			n = &(*p)->rb_left;
488
		else if (cfq_class_rt(cfqq) < cfq_class_rt(__cfqq))
489 490 491 492 493
			n = &(*p)->rb_right;
		else if (cfq_class_idle(cfqq) < cfq_class_idle(__cfqq))
			n = &(*p)->rb_left;
		else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq))
			n = &(*p)->rb_right;
494
		else if (rb_key < __cfqq->rb_key)
495 496 497 498 499
			n = &(*p)->rb_left;
		else
			n = &(*p)->rb_right;

		if (n == &(*p)->rb_right)
500
			left = 0;
501 502

		p = n;
503 504
	}

505 506 507
	if (left)
		cfqd->service_tree.left = &cfqq->rb_node;

508 509
	cfqq->rb_key = rb_key;
	rb_link_node(&cfqq->rb_node, parent, p);
510
	rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
Linus Torvalds's avatar
Linus Torvalds committed
511 512
}

513 514 515
/*
 * Update cfqq's position in the service tree.
 */
516
static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Jens Axboe's avatar
Jens Axboe committed
517 518 519 520
{
	/*
	 * Resorting requires the cfqq to be on the RR list already.
	 */
521
	if (cfq_cfqq_on_rr(cfqq))
522
		cfq_service_tree_add(cfqd, cfqq, 0);
Jens Axboe's avatar
Jens Axboe committed
523 524
}

Linus Torvalds's avatar
Linus Torvalds committed
525 526
/*
 * add to busy list of queues for service, trying to be fair in ordering
527
 * the pending list according to last request service
Linus Torvalds's avatar
Linus Torvalds committed
528 529
 */
static inline void
530
cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Linus Torvalds's avatar
Linus Torvalds committed
531
{
Jens Axboe's avatar
Jens Axboe committed
532 533
	BUG_ON(cfq_cfqq_on_rr(cfqq));
	cfq_mark_cfqq_on_rr(cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
534 535
	cfqd->busy_queues++;

536
	cfq_resort_rr_list(cfqd, cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
537 538
}

539 540 541 542
/*
 * Called when the cfqq no longer has requests pending, remove it from
 * the service tree.
 */
Linus Torvalds's avatar
Linus Torvalds committed
543 544 545
static inline void
cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
Jens Axboe's avatar
Jens Axboe committed
546 547
	BUG_ON(!cfq_cfqq_on_rr(cfqq));
	cfq_clear_cfqq_on_rr(cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
548

549 550
	if (!RB_EMPTY_NODE(&cfqq->rb_node))
		cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
551

Linus Torvalds's avatar
Linus Torvalds committed
552 553 554 555 556 557 558
	BUG_ON(!cfqd->busy_queues);
	cfqd->busy_queues--;
}

/*
 * rb tree support functions
 */
Jens Axboe's avatar
Jens Axboe committed
559
static inline void cfq_del_rq_rb(struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
560
{
Jens Axboe's avatar
Jens Axboe committed
561
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
562
	struct cfq_data *cfqd = cfqq->cfqd;
Jens Axboe's avatar
Jens Axboe committed
563
	const int sync = rq_is_sync(rq);
Linus Torvalds's avatar
Linus Torvalds committed
564

565 566
	BUG_ON(!cfqq->queued[sync]);
	cfqq->queued[sync]--;
Linus Torvalds's avatar
Linus Torvalds committed
567

Jens Axboe's avatar
Jens Axboe committed
568
	elv_rb_del(&cfqq->sort_list, rq);
Linus Torvalds's avatar
Linus Torvalds committed
569

570
	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
571
		cfq_del_cfqq_rr(cfqd, cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
572 573
}

Jens Axboe's avatar
Jens Axboe committed
574
static void cfq_add_rq_rb(struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
575
{
Jens Axboe's avatar
Jens Axboe committed
576
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
Linus Torvalds's avatar
Linus Torvalds committed
577
	struct cfq_data *cfqd = cfqq->cfqd;
578
	struct request *__alias;
Linus Torvalds's avatar
Linus Torvalds committed
579

580
	cfqq->queued[rq_is_sync(rq)]++;
Linus Torvalds's avatar
Linus Torvalds committed
581 582 583 584 585

	/*
	 * looks a little odd, but the first insert might return an alias.
	 * if that happens, put the alias on the dispatch list
	 */
586
	while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
Jens Axboe's avatar
Jens Axboe committed
587
		cfq_dispatch_insert(cfqd->queue, __alias);
588 589 590

	if (!cfq_cfqq_on_rr(cfqq))
		cfq_add_cfqq_rr(cfqd, cfqq);
591 592 593 594 595 596

	/*
	 * check if this request is a better next-serve candidate
	 */
	cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
	BUG_ON(!cfqq->next_rq);
Linus Torvalds's avatar
Linus Torvalds committed
597 598 599
}

static inline void
Jens Axboe's avatar
Jens Axboe committed
600
cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
601
{
602 603
	elv_rb_del(&cfqq->sort_list, rq);
	cfqq->queued[rq_is_sync(rq)]--;
Jens Axboe's avatar
Jens Axboe committed
604
	cfq_add_rq_rb(rq);
Linus Torvalds's avatar
Linus Torvalds committed
605 606
}

607 608
static struct request *
cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
609
{
610
	struct task_struct *tsk = current;
611
	pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio), bio_sync(bio));
612
	struct cfq_queue *cfqq;
Linus Torvalds's avatar
Linus Torvalds committed
613

614
	cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
615 616 617
	if (cfqq) {
		sector_t sector = bio->bi_sector + bio_sectors(bio);

618
		return elv_rb_find(&cfqq->sort_list, sector);
619
	}
Linus Torvalds's avatar
Linus Torvalds committed
620 621 622 623

	return NULL;
}

624
static void cfq_activate_request(request_queue_t *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
625
{
626
	struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe's avatar
Jens Axboe committed
627

628
	cfqd->rq_in_driver++;
629 630 631 632 633 634 635 636 637

	/*
	 * If the depth is larger 1, it really could be queueing. But lets
	 * make the mark a little higher - idling could still be good for
	 * low queueing, and a low queueing number could also just indicate
	 * a SCSI mid layer like behaviour where limit+1 is often seen.
	 */
	if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
		cfqd->hw_tag = 1;
Jens Axboe's avatar
Jens Axboe committed
638 639

	cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
Linus Torvalds's avatar
Linus Torvalds committed
640 641
}

642
static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
643
{
644 645 646 647
	struct cfq_data *cfqd = q->elevator->elevator_data;

	WARN_ON(!cfqd->rq_in_driver);
	cfqd->rq_in_driver--;
Linus Torvalds's avatar
Linus Torvalds committed
648 649
}

650
static void cfq_remove_request(struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
651
{
Jens Axboe's avatar
Jens Axboe committed
652
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
653

Jens Axboe's avatar
Jens Axboe committed
654 655
	if (cfqq->next_rq == rq)
		cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
Linus Torvalds's avatar
Linus Torvalds committed
656

657
	list_del_init(&rq->queuelist);
Jens Axboe's avatar
Jens Axboe committed
658
	cfq_del_rq_rb(rq);
659 660 661 662 663

	if (rq_is_meta(rq)) {
		WARN_ON(!cfqq->meta_pending);
		cfqq->meta_pending--;
	}
Linus Torvalds's avatar
Linus Torvalds committed
664 665
}

666
static int cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
667 668 669 670
{
	struct cfq_data *cfqd = q->elevator->elevator_data;
	struct request *__rq;

671
	__rq = cfq_find_rq_fmerge(cfqd, bio);
672
	if (__rq && elv_rq_merge_ok(__rq, bio)) {
673 674
		*req = __rq;
		return ELEVATOR_FRONT_MERGE;
Linus Torvalds's avatar
Linus Torvalds committed
675 676 677 678 679
	}

	return ELEVATOR_NO_MERGE;
}

680 681
static void cfq_merged_request(request_queue_t *q, struct request *req,
			       int type)
Linus Torvalds's avatar
Linus Torvalds committed
682
{
683
	if (type == ELEVATOR_FRONT_MERGE) {
Jens Axboe's avatar
Jens Axboe committed
684
		struct cfq_queue *cfqq = RQ_CFQQ(req);
Linus Torvalds's avatar
Linus Torvalds committed
685

Jens Axboe's avatar
Jens Axboe committed
686
		cfq_reposition_rq_rb(cfqq, req);
Linus Torvalds's avatar
Linus Torvalds committed
687 688 689 690 691 692 693
	}
}

static void
cfq_merged_requests(request_queue_t *q, struct request *rq,
		    struct request *next)
{
694 695 696 697 698 699 700
	/*
	 * reposition in fifo if next is older than rq
	 */
	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
	    time_before(next->start_time, rq->start_time))
		list_move(&rq->queuelist, &next->queuelist);

701
	cfq_remove_request(next);
702 703
}

704 705 706 707 708 709 710 711 712
static int cfq_allow_merge(request_queue_t *q, struct request *rq,
			   struct bio *bio)
{
	struct cfq_data *cfqd = q->elevator->elevator_data;
	const int rw = bio_data_dir(bio);
	struct cfq_queue *cfqq;
	pid_t key;

	/*
713
	 * Disallow merge of a sync bio into an async request.
714
	 */
715
	if ((bio_data_dir(bio) == READ || bio_sync(bio)) && !rq_is_sync(rq))
716 717 718
		return 0;

	/*
719 720
	 * Lookup the cfqq that this bio will be queued with. Allow
	 * merge only if rq is queued there.
721
	 */
722
	key = cfq_queue_pid(current, rw, bio_sync(bio));
723
	cfqq = cfq_find_cfq_hash(cfqd, key, current->ioprio);
724 725 726

	if (cfqq == RQ_CFQQ(rq))
		return 1;
727

728
	return 0;
729 730
}

731 732 733 734 735 736 737 738 739 740
static inline void
__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	if (cfqq) {
		/*
		 * stop potential idle class queues waiting service
		 */
		del_timer(&cfqd->idle_class_timer);

		cfqq->slice_end = 0;
Jens Axboe's avatar
Jens Axboe committed
741 742
		cfq_clear_cfqq_must_alloc_slice(cfqq);
		cfq_clear_cfqq_fifo_expire(cfqq);
743
		cfq_mark_cfqq_slice_new(cfqq);
Jens Axboe's avatar
Jens Axboe committed
744
		cfq_clear_cfqq_queue_new(cfqq);
745 746 747 748 749
	}

	cfqd->active_queue = cfqq;
}

750 751 752 753 754
/*
 * current cfqq expired its slice (or was too idle), select new one
 */
static void
__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
755
		    int timed_out)
756 757 758 759 760 761 762 763
{
	if (cfq_cfqq_wait_request(cfqq))
		del_timer(&cfqd->idle_slice_timer);

	cfq_clear_cfqq_must_dispatch(cfqq);
	cfq_clear_cfqq_wait_request(cfqq);

	/*
764
	 * store what was left of this slice, if the queue idled/timed out
765
	 */
766
	if (timed_out && !cfq_cfqq_slice_new(cfqq))
767
		cfqq->slice_resid = cfqq->slice_end - jiffies;
768

769
	cfq_resort_rr_list(cfqd, cfqq);
770 771 772 773 774 775 776 777 778 779

	if (cfqq == cfqd->active_queue)
		cfqd->active_queue = NULL;

	if (cfqd->active_cic) {
		put_io_context(cfqd->active_cic->ioc);
		cfqd->active_cic = NULL;
	}
}

780
static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out)
781 782 783 784
{
	struct cfq_queue *cfqq = cfqd->active_queue;

	if (cfqq)
785
		__cfq_slice_expired(cfqd, cfqq, timed_out);
786 787
}

788 789 790 791
/*
 * Get next queue for service. Unless we have a queue preemption,
 * we'll simply select the first cfqq in the service tree.
 */
Jens Axboe's avatar
Jens Axboe committed
792
static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
793
{
794 795
	struct cfq_queue *cfqq;
	struct rb_node *n;
796

797 798
	if (RB_EMPTY_ROOT(&cfqd->service_tree.rb))
		return NULL;
799

800 801
	n = cfq_rb_first(&cfqd->service_tree);
	cfqq = rb_entry(n, struct cfq_queue, rb_node);
802

803 804 805 806 807 808 809 810 811 812 813 814 815
	if (cfq_class_idle(cfqq)) {
		unsigned long end;

		/*
		 * if we have idle queues and no rt or be queues had
		 * pending requests, either allow immediate service if
		 * the grace period has passed or arm the idle grace
		 * timer
		 */
		end = cfqd->last_end_request + CFQ_IDLE_GRACE;
		if (time_before(jiffies, end)) {
			mod_timer(&cfqd->idle_class_timer, end);
			cfqq = NULL;
816
		}
817 818
	}

Jens Axboe's avatar
Jens Axboe committed
819 820 821
	return cfqq;
}

822 823 824
/*
 * Get and set a new active queue for service.
 */
Jens Axboe's avatar
Jens Axboe committed
825 826 827 828
static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
{
	struct cfq_queue *cfqq;

829
	cfqq = cfq_get_next_queue(cfqd);
830
	__cfq_set_active_queue(cfqd, cfqq);
Jens Axboe's avatar
Jens Axboe committed
831
	return cfqq;
832 833
}

834 835 836 837 838 839 840 841 842
static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
					  struct request *rq)
{
	if (rq->sector >= cfqd->last_position)
		return rq->sector - cfqd->last_position;
	else
		return cfqd->last_position - rq->sector;
}

Jens Axboe's avatar
Jens Axboe committed
843 844 845 846 847 848 849 850 851 852
static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
{
	struct cfq_io_context *cic = cfqd->active_cic;

	if (!sample_valid(cic->seek_samples))
		return 0;

	return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean;
}

853 854
static int cfq_close_cooperator(struct cfq_data *cfq_data,
				struct cfq_queue *cfqq)
Jens Axboe's avatar
Jens Axboe committed
855 856
{
	/*
857 858 859
	 * We should notice if some of the queues are cooperating, eg
	 * working closely on the same area of the disk. In that case,
	 * we can group them together and don't waste time idling.
Jens Axboe's avatar
Jens Axboe committed
860
	 */
861
	return 0;
Jens Axboe's avatar
Jens Axboe committed
862 863 864
}

#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024))
865

Jens Axboe's avatar
Jens Axboe committed
866
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
867
{
868
	struct cfq_queue *cfqq = cfqd->active_queue;
869
	struct cfq_io_context *cic;
870 871
	unsigned long sl;

872
	WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
Jens Axboe's avatar
Jens Axboe committed
873
	WARN_ON(cfq_cfqq_slice_new(cfqq));
874 875 876 877

	/*
	 * idle is disabled, either manually or by past process history
	 */
Jens Axboe's avatar
Jens Axboe committed
878 879 880
	if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq))
		return;

881 882 883
	/*
	 * task has exited, don't wait
	 */
884 885
	cic = cfqd->active_cic;
	if (!cic || !cic->ioc->task)
Jens Axboe's avatar
Jens Axboe committed
886 887 888 889 890
		return;

	/*
	 * See if this prio level has a good candidate
	 */
Jens Axboe's avatar
Jens Axboe committed
891 892
	if (cfq_close_cooperator(cfqd, cfqq) &&
	    (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
Jens Axboe's avatar
Jens Axboe committed
893
		return;
894

Jens Axboe's avatar
Jens Axboe committed
895 896
	cfq_mark_cfqq_must_dispatch(cfqq);
	cfq_mark_cfqq_wait_request(cfqq);
897

898 899 900 901 902
	/*
	 * we don't want to idle for seeks, but we do want to allow
	 * fair distribution of slice time for a process doing back-to-back
	 * seeks. so allow a little bit of time for him to submit a new rq
	 */
Jens Axboe's avatar
Jens Axboe committed
903
	sl = cfqd->cfq_slice_idle;
904
	if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
905
		sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
906

907
	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
Linus Torvalds's avatar
Linus Torvalds committed
908 909
}

910 911 912
/*
 * Move request from internal lists to the request queue dispatch list.
 */
Jens Axboe's avatar
Jens Axboe committed
913
static void cfq_dispatch_insert(request_queue_t *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
914
{
915
	struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe's avatar
Jens Axboe committed
916
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
917

918
	cfq_remove_request(rq);
Jens Axboe's avatar
Jens Axboe committed
919
	cfqq->dispatched++;
920
	elv_dispatch_sort(q, rq);
921 922 923

	if (cfq_cfqq_sync(cfqq))
		cfqd->sync_flight++;
Linus Torvalds's avatar
Linus Torvalds committed
924 925 926 927 928
}

/*
 * return expired entry, or NULL to just start from scratch in rbtree
 */
Jens Axboe's avatar
Jens Axboe committed
929
static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
Linus Torvalds's avatar
Linus Torvalds committed
930 931
{
	struct cfq_data *cfqd = cfqq->cfqd;
932
	struct request *rq;
933
	int fifo;
Linus Torvalds's avatar
Linus Torvalds committed
934

Jens Axboe's avatar
Jens Axboe committed
935
	if (cfq_cfqq_fifo_expire(cfqq))
Linus Torvalds's avatar
Linus Torvalds committed
936
		return NULL;
937 938 939

	cfq_mark_cfqq_fifo_expire(cfqq);

940 941
	if (list_empty(&cfqq->fifo))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
942

Jens Axboe's avatar
Jens Axboe committed
943
	fifo = cfq_cfqq_sync(cfqq);
944
	rq = rq_entry_fifo(cfqq->fifo.next);
Linus Torvalds's avatar
Linus Torvalds committed
945

Jens Axboe's avatar
Jens Axboe committed
946 947
	if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
948

Jens Axboe's avatar
Jens Axboe committed
949
	return rq;
Linus Torvalds's avatar
Linus Torvalds committed
950 951
}

952 953 954 955
static inline int
cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	const int base_rq = cfqd->cfq_slice_async_rq;
Linus Torvalds's avatar
Linus Torvalds committed
956

957
	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
Linus Torvalds's avatar
Linus Torvalds committed
958

959
	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
Linus Torvalds's avatar
Linus Torvalds committed
960 961
}

962
/*
963 964
 * Select a queue for service. If we have a current active queue,
 * check whether to continue servicing it, or retrieve and set a new one.
965
 */
966
static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
Linus Torvalds's avatar
Linus Torvalds committed
967 968 969
{
	struct cfq_queue *cfqq;

970 971 972
	cfqq = cfqd->active_queue;
	if (!cfqq)
		goto new_queue;
Linus Torvalds's avatar
Linus Torvalds committed
973

974
	/*
Jens Axboe's avatar
Jens Axboe committed
975
	 * The active queue has run out of time, expire it and select new.
976
	 */
Jens Axboe's avatar
Jens Axboe committed
977
	if (cfq_slice_used(cfqq))
Jens Axboe's avatar
Jens Axboe committed
978
		goto expire;
Linus Torvalds's avatar
Linus Torvalds committed
979

980
	/*
Jens Axboe's avatar
Jens Axboe committed
981 982
	 * The active queue has requests and isn't expired, allow it to
	 * dispatch.
983
	 */
984
	if (!RB_EMPTY_ROOT(&cfqq->sort_list))
985
		goto keep_queue;
Jens Axboe's avatar
Jens Axboe committed
986 987 988 989 990 991

	/*
	 * No requests pending. If the active queue still has requests in
	 * flight or is idling for a new request, allow either of these
	 * conditions to happen (or time out) before selecting a new queue.
	 */
992 993
	if (timer_pending(&cfqd->idle_slice_timer) ||
	    (cfqq->dispatched && cfq_cfqq_idle_window(cfqq))) {
994 995
		cfqq = NULL;
		goto keep_queue;
996 997
	}

Jens Axboe's avatar
Jens Axboe committed
998
expire: