cfq-iosched.c 55.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6
/*
 *  CFQ, or complete fairness queueing, disk scheduler.
 *
 *  Based on ideas from a previously unfinished io
 *  scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
 *
7
 *  Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
Linus Torvalds's avatar
Linus Torvalds committed
8 9
 */
#include <linux/module.h>
Al Viro's avatar
Al Viro committed
10 11
#include <linux/blkdev.h>
#include <linux/elevator.h>
Linus Torvalds's avatar
Linus Torvalds committed
12
#include <linux/rbtree.h>
13
#include <linux/ioprio.h>
Linus Torvalds's avatar
Linus Torvalds committed
14 15 16 17

/*
 * tunables
 */
18 19 20 21
static const int cfq_quantum = 4;		/* max queue in one round of service */
static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
static const int cfq_back_max = 16 * 1024;	/* maximum backwards seek, in KiB */
static const int cfq_back_penalty = 2;		/* penalty of a backwards seek */
Linus Torvalds's avatar
Linus Torvalds committed
22

23
static const int cfq_slice_sync = HZ / 10;
Jens Axboe's avatar
Jens Axboe committed
24
static int cfq_slice_async = HZ / 25;
25
static const int cfq_slice_async_rq = 2;
26
static int cfq_slice_idle = HZ / 125;
27

28 29 30
/*
 * grace period before allowing idle class to get disk access
 */
31
#define CFQ_IDLE_GRACE		(HZ / 10)
32 33 34 35 36 37

/*
 * below this threshold, we consider thinktime immediate
 */
#define CFQ_MIN_TT		(2)

38 39
#define CFQ_SLICE_SCALE		(5)

Jens Axboe's avatar
Jens Axboe committed
40 41
#define RQ_CIC(rq)		((struct cfq_io_context*)(rq)->elevator_private)
#define RQ_CFQQ(rq)		((rq)->elevator_private2)
Linus Torvalds's avatar
Linus Torvalds committed
42

43 44
static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool;
Linus Torvalds's avatar
Linus Torvalds committed
45

46
static DEFINE_PER_CPU(unsigned long, ioc_count);
47 48
static struct completion *ioc_gone;

49 50 51 52
#define CFQ_PRIO_LISTS		IOPRIO_BE_NR
#define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
#define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)

Jens Axboe's avatar
Jens Axboe committed
53 54 55
#define ASYNC			(0)
#define SYNC			(1)

56 57
#define sample_valid(samples)	((samples) > 80)

58 59 60 61 62 63 64 65 66 67 68 69
/*
 * Most of our rbtree usage is for sorting with min extraction, so
 * if we cache the leftmost node we don't have to walk down the tree
 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
 * move this into the elevator for the rq sorting as well.
 */
struct cfq_rb_root {
	struct rb_root rb;
	struct rb_node *left;
};
#define CFQ_RB_ROOT	(struct cfq_rb_root) { RB_ROOT, NULL, }

70 71 72
/*
 * Per block device queue structure
 */
Linus Torvalds's avatar
Linus Torvalds committed
73
struct cfq_data {
74
	struct request_queue *queue;
75 76 77 78

	/*
	 * rr list of queues with requests and the count of them
	 */
79
	struct cfq_rb_root service_tree;
80 81 82
	unsigned int busy_queues;

	int rq_in_driver;
83
	int sync_flight;
84
	int hw_tag;
Linus Torvalds's avatar
Linus Torvalds committed
85

86 87 88 89 90
	/*
	 * idle window management
	 */
	struct timer_list idle_slice_timer;
	struct work_struct unplug_work;
Linus Torvalds's avatar
Linus Torvalds committed
91

92 93 94
	struct cfq_queue *active_queue;
	struct cfq_io_context *active_cic;

95 96 97 98 99
	/*
	 * async queue for each priority case
	 */
	struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
	struct cfq_queue *async_idle_cfqq;
100

101
	struct timer_list idle_class_timer;
Linus Torvalds's avatar
Linus Torvalds committed
102

Jens Axboe's avatar
Jens Axboe committed
103
	sector_t last_position;
104
	unsigned long last_end_request;
Linus Torvalds's avatar
Linus Torvalds committed
105 106 107 108 109

	/*
	 * tunables, see top of file
	 */
	unsigned int cfq_quantum;
110
	unsigned int cfq_fifo_expire[2];
Linus Torvalds's avatar
Linus Torvalds committed
111 112
	unsigned int cfq_back_penalty;
	unsigned int cfq_back_max;
113 114 115
	unsigned int cfq_slice[2];
	unsigned int cfq_slice_async_rq;
	unsigned int cfq_slice_idle;
116 117

	struct list_head cic_list;
Linus Torvalds's avatar
Linus Torvalds committed
118 119
};

120 121 122
/*
 * Per process-grouping structure
 */
Linus Torvalds's avatar
Linus Torvalds committed
123 124 125 126 127
struct cfq_queue {
	/* reference count */
	atomic_t ref;
	/* parent cfq_data */
	struct cfq_data *cfqd;
128 129 130 131
	/* service_tree member */
	struct rb_node rb_node;
	/* service_tree key */
	unsigned long rb_key;
Linus Torvalds's avatar
Linus Torvalds committed
132 133 134
	/* sorted list of pending requests */
	struct rb_root sort_list;
	/* if fifo isn't expired, next request to serve */
Jens Axboe's avatar
Jens Axboe committed
135
	struct request *next_rq;
Linus Torvalds's avatar
Linus Torvalds committed
136 137 138 139
	/* requests queued in sort_list */
	int queued[2];
	/* currently allocated requests */
	int allocated[2];
140 141
	/* pending metadata requests */
	int meta_pending;
Linus Torvalds's avatar
Linus Torvalds committed
142
	/* fifo list of requests in sort_list */
143
	struct list_head fifo;
Linus Torvalds's avatar
Linus Torvalds committed
144

145
	unsigned long slice_end;
146
	long slice_resid;
Linus Torvalds's avatar
Linus Torvalds committed
147

Jens Axboe's avatar
Jens Axboe committed
148 149
	/* number of requests that are on the dispatch list or inside driver */
	int dispatched;
150 151 152 153 154

	/* io prio of this group */
	unsigned short ioprio, org_ioprio;
	unsigned short ioprio_class, org_ioprio_class;

Jens Axboe's avatar
Jens Axboe committed
155 156
	/* various state flags, see below */
	unsigned int flags;
Linus Torvalds's avatar
Linus Torvalds committed
157 158
};

Jens Axboe's avatar
Jens Axboe committed
159
enum cfqq_state_flags {
160 161 162 163 164 165 166 167 168
	CFQ_CFQQ_FLAG_on_rr = 0,	/* on round-robin busy list */
	CFQ_CFQQ_FLAG_wait_request,	/* waiting for a request */
	CFQ_CFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
	CFQ_CFQQ_FLAG_must_alloc_slice,	/* per-slice must_alloc flag */
	CFQ_CFQQ_FLAG_must_dispatch,	/* must dispatch, even if expired */
	CFQ_CFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
	CFQ_CFQQ_FLAG_idle_window,	/* slice idling enabled */
	CFQ_CFQQ_FLAG_prio_changed,	/* task priority has changed */
	CFQ_CFQQ_FLAG_queue_new,	/* queue never been serviced */
169
	CFQ_CFQQ_FLAG_slice_new,	/* no requests dispatched in slice */
170
	CFQ_CFQQ_FLAG_sync,		/* synchronous queue */
Jens Axboe's avatar
Jens Axboe committed
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
};

#define CFQ_CFQQ_FNS(name)						\
static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq)		\
{									\
	cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name);			\
}									\
static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq)	\
{									\
	cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name);			\
}									\
static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq)		\
{									\
	return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0;	\
}

CFQ_CFQQ_FNS(on_rr);
CFQ_CFQQ_FNS(wait_request);
CFQ_CFQQ_FNS(must_alloc);
CFQ_CFQQ_FNS(must_alloc_slice);
CFQ_CFQQ_FNS(must_dispatch);
CFQ_CFQQ_FNS(fifo_expire);
CFQ_CFQQ_FNS(idle_window);
CFQ_CFQQ_FNS(prio_changed);
195
CFQ_CFQQ_FNS(queue_new);
196
CFQ_CFQQ_FNS(slice_new);
197
CFQ_CFQQ_FNS(sync);
Jens Axboe's avatar
Jens Axboe committed
198 199
#undef CFQ_CFQQ_FNS

200
static void cfq_dispatch_insert(struct request_queue *, struct request *);
201
static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
202
				       struct io_context *, gfp_t);
203
static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
						struct io_context *);

static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
					    int is_sync)
{
	return cic->cfqq[!!is_sync];
}

static inline void cic_set_cfqq(struct cfq_io_context *cic,
				struct cfq_queue *cfqq, int is_sync)
{
	cic->cfqq[!!is_sync] = cfqq;
}

/*
 * We regard a request as SYNC, if it's either a read or has the SYNC bit
 * set (in which case it could also be direct WRITE).
 */
static inline int cfq_bio_sync(struct bio *bio)
{
	if (bio_data_dir(bio) == READ || bio_sync(bio))
		return 1;

	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
229

Andrew Morton's avatar
Andrew Morton committed
230 231 232 233 234 235
/*
 * scheduler run of queue, if there are requests pending and no one in the
 * driver that will restart queueing
 */
static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
{
236
	if (cfqd->busy_queues)
Andrew Morton's avatar
Andrew Morton committed
237 238 239
		kblockd_schedule_work(&cfqd->unplug_work);
}

240
static int cfq_queue_empty(struct request_queue *q)
Andrew Morton's avatar
Andrew Morton committed
241 242 243
{
	struct cfq_data *cfqd = q->elevator->elevator_data;

244
	return !cfqd->busy_queues;
Andrew Morton's avatar
Andrew Morton committed
245 246
}

247 248 249 250 251
/*
 * Scale schedule slice based on io priority. Use the sync time slice only
 * if a queue is marked sync and has sync io queued. A sync queue with async
 * io only, should not get full sync slice length.
 */
252 253
static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync,
				 unsigned short prio)
254
{
255
	const int base_slice = cfqd->cfq_slice[sync];
256

257 258 259 260
	WARN_ON(prio >= IOPRIO_BE_NR);

	return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
}
261

262 263 264 265
static inline int
cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
}

static inline void
cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
}

/*
 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end
 * isn't valid until the first request from the dispatch is activated
 * and the slice time set.
 */
static inline int cfq_slice_used(struct cfq_queue *cfqq)
{
	if (cfq_cfqq_slice_new(cfqq))
		return 0;
	if (time_before(jiffies, cfqq->slice_end))
		return 0;

	return 1;
}

Linus Torvalds's avatar
Linus Torvalds committed
289
/*
Jens Axboe's avatar
Jens Axboe committed
290
 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
Linus Torvalds's avatar
Linus Torvalds committed
291
 * We choose the request that is closest to the head right now. Distance
292
 * behind the head is penalized and only allowed to a certain extent.
Linus Torvalds's avatar
Linus Torvalds committed
293
 */
Jens Axboe's avatar
Jens Axboe committed
294 295
static struct request *
cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
Linus Torvalds's avatar
Linus Torvalds committed
296 297 298
{
	sector_t last, s1, s2, d1 = 0, d2 = 0;
	unsigned long back_max;
299 300 301
#define CFQ_RQ1_WRAP	0x01 /* request 1 wraps */
#define CFQ_RQ2_WRAP	0x02 /* request 2 wraps */
	unsigned wrap = 0; /* bit mask: requests behind the disk head? */
Linus Torvalds's avatar
Linus Torvalds committed
302

Jens Axboe's avatar
Jens Axboe committed
303 304 305 306
	if (rq1 == NULL || rq1 == rq2)
		return rq2;
	if (rq2 == NULL)
		return rq1;
307

Jens Axboe's avatar
Jens Axboe committed
308 309 310 311
	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
		return rq1;
	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
		return rq2;
312 313 314 315
	if (rq_is_meta(rq1) && !rq_is_meta(rq2))
		return rq1;
	else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
		return rq2;
Linus Torvalds's avatar
Linus Torvalds committed
316

Jens Axboe's avatar
Jens Axboe committed
317 318
	s1 = rq1->sector;
	s2 = rq2->sector;
Linus Torvalds's avatar
Linus Torvalds committed
319

Jens Axboe's avatar
Jens Axboe committed
320
	last = cfqd->last_position;
Linus Torvalds's avatar
Linus Torvalds committed
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336

	/*
	 * by definition, 1KiB is 2 sectors
	 */
	back_max = cfqd->cfq_back_max * 2;

	/*
	 * Strict one way elevator _except_ in the case where we allow
	 * short backward seeks which are biased as twice the cost of a
	 * similar forward seek.
	 */
	if (s1 >= last)
		d1 = s1 - last;
	else if (s1 + back_max >= last)
		d1 = (last - s1) * cfqd->cfq_back_penalty;
	else
337
		wrap |= CFQ_RQ1_WRAP;
Linus Torvalds's avatar
Linus Torvalds committed
338 339 340 341 342 343

	if (s2 >= last)
		d2 = s2 - last;
	else if (s2 + back_max >= last)
		d2 = (last - s2) * cfqd->cfq_back_penalty;
	else
344
		wrap |= CFQ_RQ2_WRAP;
Linus Torvalds's avatar
Linus Torvalds committed
345 346

	/* Found required data */
347 348 349 350 351 352

	/*
	 * By doing switch() on the bit mask "wrap" we avoid having to
	 * check two variables for all permutations: --> faster!
	 */
	switch (wrap) {
Jens Axboe's avatar
Jens Axboe committed
353
	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
354
		if (d1 < d2)
Jens Axboe's avatar
Jens Axboe committed
355
			return rq1;
356
		else if (d2 < d1)
Jens Axboe's avatar
Jens Axboe committed
357
			return rq2;
358 359
		else {
			if (s1 >= s2)
Jens Axboe's avatar
Jens Axboe committed
360
				return rq1;
361
			else
Jens Axboe's avatar
Jens Axboe committed
362
				return rq2;
363
		}
Linus Torvalds's avatar
Linus Torvalds committed
364

365
	case CFQ_RQ2_WRAP:
Jens Axboe's avatar
Jens Axboe committed
366
		return rq1;
367
	case CFQ_RQ1_WRAP:
Jens Axboe's avatar
Jens Axboe committed
368 369
		return rq2;
	case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
370 371 372 373 374 375 376 377
	default:
		/*
		 * Since both rqs are wrapped,
		 * start with the one that's further behind head
		 * (--> only *one* back seek required),
		 * since back seek takes more time than forward.
		 */
		if (s1 <= s2)
Jens Axboe's avatar
Jens Axboe committed
378
			return rq1;
Linus Torvalds's avatar
Linus Torvalds committed
379
		else
Jens Axboe's avatar
Jens Axboe committed
380
			return rq2;
Linus Torvalds's avatar
Linus Torvalds committed
381 382 383
	}
}

384 385 386
/*
 * The below is leftmost cache rbtree addon
 */
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
static struct rb_node *cfq_rb_first(struct cfq_rb_root *root)
{
	if (!root->left)
		root->left = rb_first(&root->rb);

	return root->left;
}

static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
{
	if (root->left == n)
		root->left = NULL;

	rb_erase(n, &root->rb);
	RB_CLEAR_NODE(n);
}

Linus Torvalds's avatar
Linus Torvalds committed
404 405 406
/*
 * would be nice to take fifo expire time into account as well
 */
Jens Axboe's avatar
Jens Axboe committed
407 408 409
static struct request *
cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
		  struct request *last)
Linus Torvalds's avatar
Linus Torvalds committed
410
{
411 412
	struct rb_node *rbnext = rb_next(&last->rb_node);
	struct rb_node *rbprev = rb_prev(&last->rb_node);
Jens Axboe's avatar
Jens Axboe committed
413
	struct request *next = NULL, *prev = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
414

415
	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
Linus Torvalds's avatar
Linus Torvalds committed
416 417

	if (rbprev)
Jens Axboe's avatar
Jens Axboe committed
418
		prev = rb_entry_rq(rbprev);
Linus Torvalds's avatar
Linus Torvalds committed
419

420
	if (rbnext)
Jens Axboe's avatar
Jens Axboe committed
421
		next = rb_entry_rq(rbnext);
422 423 424
	else {
		rbnext = rb_first(&cfqq->sort_list);
		if (rbnext && rbnext != &last->rb_node)
Jens Axboe's avatar
Jens Axboe committed
425
			next = rb_entry_rq(rbnext);
426
	}
Linus Torvalds's avatar
Linus Torvalds committed
427

428
	return cfq_choose_req(cfqd, next, prev);
Linus Torvalds's avatar
Linus Torvalds committed
429 430
}

431 432
static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
				      struct cfq_queue *cfqq)
Linus Torvalds's avatar
Linus Torvalds committed
433
{
434 435 436
	/*
	 * just an approximation, should be ok.
	 */
437 438
	return (cfqd->busy_queues - 1) * (cfq_prio_slice(cfqd, 1, 0) -
		       cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
439 440
}

441 442 443 444 445
/*
 * The cfqd->service_tree holds all pending cfq_queue's that have
 * requests waiting to be processed. It is sorted in the order that
 * we will service the queues.
 */
446
static void cfq_service_tree_add(struct cfq_data *cfqd,
447
				    struct cfq_queue *cfqq, int add_front)
448
{
449
	struct rb_node **p = &cfqd->service_tree.rb.rb_node;
450 451
	struct rb_node *parent = NULL;
	unsigned long rb_key;
452
	int left;
453

454 455 456 457 458 459
	if (!add_front) {
		rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
		rb_key += cfqq->slice_resid;
		cfqq->slice_resid = 0;
	} else
		rb_key = 0;
Linus Torvalds's avatar
Linus Torvalds committed
460

461
	if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
462
		/*
463
		 * same position, nothing more to do
464
		 */
465 466
		if (rb_key == cfqq->rb_key)
			return;
Linus Torvalds's avatar
Linus Torvalds committed
467

468
		cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
Linus Torvalds's avatar
Linus Torvalds committed
469
	}
470

471
	left = 1;
472
	while (*p) {
473
		struct cfq_queue *__cfqq;
474
		struct rb_node **n;
475

476 477 478
		parent = *p;
		__cfqq = rb_entry(parent, struct cfq_queue, rb_node);

479 480
		/*
		 * sort RT queues first, we always want to give
481 482
		 * preference to them. IDLE queues goes to the back.
		 * after that, sort on the next service time.
483 484
		 */
		if (cfq_class_rt(cfqq) > cfq_class_rt(__cfqq))
485
			n = &(*p)->rb_left;
486
		else if (cfq_class_rt(cfqq) < cfq_class_rt(__cfqq))
487 488 489 490 491
			n = &(*p)->rb_right;
		else if (cfq_class_idle(cfqq) < cfq_class_idle(__cfqq))
			n = &(*p)->rb_left;
		else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq))
			n = &(*p)->rb_right;
492
		else if (rb_key < __cfqq->rb_key)
493 494 495 496 497
			n = &(*p)->rb_left;
		else
			n = &(*p)->rb_right;

		if (n == &(*p)->rb_right)
498
			left = 0;
499 500

		p = n;
501 502
	}

503 504 505
	if (left)
		cfqd->service_tree.left = &cfqq->rb_node;

506 507
	cfqq->rb_key = rb_key;
	rb_link_node(&cfqq->rb_node, parent, p);
508
	rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
Linus Torvalds's avatar
Linus Torvalds committed
509 510
}

511 512 513
/*
 * Update cfqq's position in the service tree.
 */
514
static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Jens Axboe's avatar
Jens Axboe committed
515 516 517 518
{
	/*
	 * Resorting requires the cfqq to be on the RR list already.
	 */
519
	if (cfq_cfqq_on_rr(cfqq))
520
		cfq_service_tree_add(cfqd, cfqq, 0);
Jens Axboe's avatar
Jens Axboe committed
521 522
}

Linus Torvalds's avatar
Linus Torvalds committed
523 524
/*
 * add to busy list of queues for service, trying to be fair in ordering
525
 * the pending list according to last request service
Linus Torvalds's avatar
Linus Torvalds committed
526 527
 */
static inline void
528
cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Linus Torvalds's avatar
Linus Torvalds committed
529
{
Jens Axboe's avatar
Jens Axboe committed
530 531
	BUG_ON(cfq_cfqq_on_rr(cfqq));
	cfq_mark_cfqq_on_rr(cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
532 533
	cfqd->busy_queues++;

534
	cfq_resort_rr_list(cfqd, cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
535 536
}

537 538 539 540
/*
 * Called when the cfqq no longer has requests pending, remove it from
 * the service tree.
 */
Linus Torvalds's avatar
Linus Torvalds committed
541 542 543
static inline void
cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
Jens Axboe's avatar
Jens Axboe committed
544 545
	BUG_ON(!cfq_cfqq_on_rr(cfqq));
	cfq_clear_cfqq_on_rr(cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
546

547 548
	if (!RB_EMPTY_NODE(&cfqq->rb_node))
		cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
549

Linus Torvalds's avatar
Linus Torvalds committed
550 551 552 553 554 555 556
	BUG_ON(!cfqd->busy_queues);
	cfqd->busy_queues--;
}

/*
 * rb tree support functions
 */
Jens Axboe's avatar
Jens Axboe committed
557
static inline void cfq_del_rq_rb(struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
558
{
Jens Axboe's avatar
Jens Axboe committed
559
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
560
	struct cfq_data *cfqd = cfqq->cfqd;
Jens Axboe's avatar
Jens Axboe committed
561
	const int sync = rq_is_sync(rq);
Linus Torvalds's avatar
Linus Torvalds committed
562

563 564
	BUG_ON(!cfqq->queued[sync]);
	cfqq->queued[sync]--;
Linus Torvalds's avatar
Linus Torvalds committed
565

Jens Axboe's avatar
Jens Axboe committed
566
	elv_rb_del(&cfqq->sort_list, rq);
Linus Torvalds's avatar
Linus Torvalds committed
567

568
	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
569
		cfq_del_cfqq_rr(cfqd, cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
570 571
}

Jens Axboe's avatar
Jens Axboe committed
572
static void cfq_add_rq_rb(struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
573
{
Jens Axboe's avatar
Jens Axboe committed
574
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
Linus Torvalds's avatar
Linus Torvalds committed
575
	struct cfq_data *cfqd = cfqq->cfqd;
576
	struct request *__alias;
Linus Torvalds's avatar
Linus Torvalds committed
577

578
	cfqq->queued[rq_is_sync(rq)]++;
Linus Torvalds's avatar
Linus Torvalds committed
579 580 581 582 583

	/*
	 * looks a little odd, but the first insert might return an alias.
	 * if that happens, put the alias on the dispatch list
	 */
584
	while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
Jens Axboe's avatar
Jens Axboe committed
585
		cfq_dispatch_insert(cfqd->queue, __alias);
586 587 588

	if (!cfq_cfqq_on_rr(cfqq))
		cfq_add_cfqq_rr(cfqd, cfqq);
589 590 591 592 593 594

	/*
	 * check if this request is a better next-serve candidate
	 */
	cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
	BUG_ON(!cfqq->next_rq);
Linus Torvalds's avatar
Linus Torvalds committed
595 596 597
}

static inline void
Jens Axboe's avatar
Jens Axboe committed
598
cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
599
{
600 601
	elv_rb_del(&cfqq->sort_list, rq);
	cfqq->queued[rq_is_sync(rq)]--;
Jens Axboe's avatar
Jens Axboe committed
602
	cfq_add_rq_rb(rq);
Linus Torvalds's avatar
Linus Torvalds committed
603 604
}

605 606
static struct request *
cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
607
{
608
	struct task_struct *tsk = current;
609
	struct cfq_io_context *cic;
610
	struct cfq_queue *cfqq;
Linus Torvalds's avatar
Linus Torvalds committed
611

612
	cic = cfq_cic_lookup(cfqd, tsk->io_context);
613 614 615 616
	if (!cic)
		return NULL;

	cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
617 618 619
	if (cfqq) {
		sector_t sector = bio->bi_sector + bio_sectors(bio);

620
		return elv_rb_find(&cfqq->sort_list, sector);
621
	}
Linus Torvalds's avatar
Linus Torvalds committed
622 623 624 625

	return NULL;
}

626
static void cfq_activate_request(struct request_queue *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
627
{
628
	struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe's avatar
Jens Axboe committed
629

630
	cfqd->rq_in_driver++;
631 632 633 634 635 636 637 638 639

	/*
	 * If the depth is larger 1, it really could be queueing. But lets
	 * make the mark a little higher - idling could still be good for
	 * low queueing, and a low queueing number could also just indicate
	 * a SCSI mid layer like behaviour where limit+1 is often seen.
	 */
	if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
		cfqd->hw_tag = 1;
Jens Axboe's avatar
Jens Axboe committed
640 641

	cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
Linus Torvalds's avatar
Linus Torvalds committed
642 643
}

644
static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
645
{
646 647 648 649
	struct cfq_data *cfqd = q->elevator->elevator_data;

	WARN_ON(!cfqd->rq_in_driver);
	cfqd->rq_in_driver--;
Linus Torvalds's avatar
Linus Torvalds committed
650 651
}

652
static void cfq_remove_request(struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
653
{
Jens Axboe's avatar
Jens Axboe committed
654
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
655

Jens Axboe's avatar
Jens Axboe committed
656 657
	if (cfqq->next_rq == rq)
		cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
Linus Torvalds's avatar
Linus Torvalds committed
658

659
	list_del_init(&rq->queuelist);
Jens Axboe's avatar
Jens Axboe committed
660
	cfq_del_rq_rb(rq);
661 662 663 664 665

	if (rq_is_meta(rq)) {
		WARN_ON(!cfqq->meta_pending);
		cfqq->meta_pending--;
	}
Linus Torvalds's avatar
Linus Torvalds committed
666 667
}

668 669
static int cfq_merge(struct request_queue *q, struct request **req,
		     struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
670 671 672 673
{
	struct cfq_data *cfqd = q->elevator->elevator_data;
	struct request *__rq;

674
	__rq = cfq_find_rq_fmerge(cfqd, bio);
675
	if (__rq && elv_rq_merge_ok(__rq, bio)) {
676 677
		*req = __rq;
		return ELEVATOR_FRONT_MERGE;
Linus Torvalds's avatar
Linus Torvalds committed
678 679 680 681 682
	}

	return ELEVATOR_NO_MERGE;
}

683
static void cfq_merged_request(struct request_queue *q, struct request *req,
684
			       int type)
Linus Torvalds's avatar
Linus Torvalds committed
685
{
686
	if (type == ELEVATOR_FRONT_MERGE) {
Jens Axboe's avatar
Jens Axboe committed
687
		struct cfq_queue *cfqq = RQ_CFQQ(req);
Linus Torvalds's avatar
Linus Torvalds committed
688

Jens Axboe's avatar
Jens Axboe committed
689
		cfq_reposition_rq_rb(cfqq, req);
Linus Torvalds's avatar
Linus Torvalds committed
690 691 692 693
	}
}

static void
694
cfq_merged_requests(struct request_queue *q, struct request *rq,
Linus Torvalds's avatar
Linus Torvalds committed
695 696
		    struct request *next)
{
697 698 699 700 701 702 703
	/*
	 * reposition in fifo if next is older than rq
	 */
	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
	    time_before(next->start_time, rq->start_time))
		list_move(&rq->queuelist, &next->queuelist);

704
	cfq_remove_request(next);
705 706
}

707
static int cfq_allow_merge(struct request_queue *q, struct request *rq,
708 709 710
			   struct bio *bio)
{
	struct cfq_data *cfqd = q->elevator->elevator_data;
711
	struct cfq_io_context *cic;
712 713 714
	struct cfq_queue *cfqq;

	/*
715
	 * Disallow merge of a sync bio into an async request.
716
	 */
717
	if (cfq_bio_sync(bio) && !rq_is_sync(rq))
718 719 720
		return 0;

	/*
721 722
	 * Lookup the cfqq that this bio will be queued with. Allow
	 * merge only if rq is queued there.
723
	 */
724
	cic = cfq_cic_lookup(cfqd, current->io_context);
725 726
	if (!cic)
		return 0;
727

728
	cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
729 730
	if (cfqq == RQ_CFQQ(rq))
		return 1;
731

732
	return 0;
733 734
}

735 736 737 738 739 740 741 742 743 744
static inline void
__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	if (cfqq) {
		/*
		 * stop potential idle class queues waiting service
		 */
		del_timer(&cfqd->idle_class_timer);

		cfqq->slice_end = 0;
Jens Axboe's avatar
Jens Axboe committed
745 746
		cfq_clear_cfqq_must_alloc_slice(cfqq);
		cfq_clear_cfqq_fifo_expire(cfqq);
747
		cfq_mark_cfqq_slice_new(cfqq);
Jens Axboe's avatar
Jens Axboe committed
748
		cfq_clear_cfqq_queue_new(cfqq);
749 750 751 752 753
	}

	cfqd->active_queue = cfqq;
}

754 755 756 757 758
/*
 * current cfqq expired its slice (or was too idle), select new one
 */
static void
__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
759
		    int timed_out)
760 761 762 763 764 765 766 767
{
	if (cfq_cfqq_wait_request(cfqq))
		del_timer(&cfqd->idle_slice_timer);

	cfq_clear_cfqq_must_dispatch(cfqq);
	cfq_clear_cfqq_wait_request(cfqq);

	/*
768
	 * store what was left of this slice, if the queue idled/timed out
769
	 */
770
	if (timed_out && !cfq_cfqq_slice_new(cfqq))
771
		cfqq->slice_resid = cfqq->slice_end - jiffies;
772

773
	cfq_resort_rr_list(cfqd, cfqq);
774 775 776 777 778 779 780 781 782 783

	if (cfqq == cfqd->active_queue)
		cfqd->active_queue = NULL;

	if (cfqd->active_cic) {
		put_io_context(cfqd->active_cic->ioc);
		cfqd->active_cic = NULL;
	}
}

784
static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out)
785 786 787 788
{
	struct cfq_queue *cfqq = cfqd->active_queue;

	if (cfqq)
789
		__cfq_slice_expired(cfqd, cfqq, timed_out);
790 791
}

792 793 794 795 796 797 798 799 800 801 802 803 804 805
static int start_idle_class_timer(struct cfq_data *cfqd)
{
	unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
	unsigned long now = jiffies;

	if (time_before(now, end) &&
	    time_after_eq(now, cfqd->last_end_request)) {
		mod_timer(&cfqd->idle_class_timer, end);
		return 1;
	}

	return 0;
}

806 807 808 809
/*
 * Get next queue for service. Unless we have a queue preemption,
 * we'll simply select the first cfqq in the service tree.
 */
Jens Axboe's avatar
Jens Axboe committed
810
static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
811
{
812 813
	struct cfq_queue *cfqq;
	struct rb_node *n;
814

815 816
	if (RB_EMPTY_ROOT(&cfqd->service_tree.rb))
		return NULL;
817

818 819
	n = cfq_rb_first(&cfqd->service_tree);
	cfqq = rb_entry(n, struct cfq_queue, rb_node);
820

821 822 823 824 825 826 827
	if (cfq_class_idle(cfqq)) {
		/*
		 * if we have idle queues and no rt or be queues had
		 * pending requests, either allow immediate service if
		 * the grace period has passed or arm the idle grace
		 * timer
		 */
828
		if (start_idle_class_timer(cfqd))
829
			cfqq = NULL;
830 831
	}

Jens Axboe's avatar
Jens Axboe committed
832 833 834
	return cfqq;
}

835 836 837
/*
 * Get and set a new active queue for service.
 */
Jens Axboe's avatar
Jens Axboe committed
838 839 840 841
static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
{
	struct cfq_queue *cfqq;

842
	cfqq = cfq_get_next_queue(cfqd);
843
	__cfq_set_active_queue(cfqd, cfqq);
Jens Axboe's avatar
Jens Axboe committed
844
	return cfqq;
845 846
}

847 848 849 850 851 852 853 854 855
static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
					  struct request *rq)
{
	if (rq->sector >= cfqd->last_position)
		return rq->sector - cfqd->last_position;
	else
		return cfqd->last_position - rq->sector;
}

Jens Axboe's avatar
Jens Axboe committed
856 857 858 859 860 861 862 863 864 865
static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
{
	struct cfq_io_context *cic = cfqd->active_cic;

	if (!sample_valid(cic->seek_samples))
		return 0;

	return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean;
}

866 867
static int cfq_close_cooperator(struct cfq_data *cfq_data,
				struct cfq_queue *cfqq)
Jens Axboe's avatar
Jens Axboe committed
868 869
{
	/*
870 871 872
	 * We should notice if some of the queues are cooperating, eg
	 * working closely on the same area of the disk. In that case,
	 * we can group them together and don't waste time idling.
Jens Axboe's avatar
Jens Axboe committed
873
	 */
874
	return 0;
Jens Axboe's avatar
Jens Axboe committed
875 876 877
}

#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024))
878

Jens Axboe's avatar
Jens Axboe committed
879
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
880
{
881
	struct cfq_queue *cfqq = cfqd->active_queue;
882
	struct cfq_io_context *cic;
883 884
	unsigned long sl;

885
	WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
Jens Axboe's avatar
Jens Axboe committed
886
	WARN_ON(cfq_cfqq_slice_new(cfqq));
887 888 889 890

	/*
	 * idle is disabled, either manually or by past process history
	 */
Jens Axboe's avatar
Jens Axboe committed
891 892 893
	if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq))
		return;

894 895 896
	/*
	 * task has exited, don't wait
	 */
897
	cic = cfqd->active_cic;
898
	if (!cic || !atomic_read(&cic->ioc->nr_tasks))
Jens Axboe's avatar
Jens Axboe committed
899 900 901 902 903
		return;

	/*
	 * See if this prio level has a good candidate
	 */
Jens Axboe's avatar
Jens Axboe committed
904 905
	if (cfq_close_cooperator(cfqd, cfqq) &&
	    (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
Jens Axboe's avatar
Jens Axboe committed
906
		return;
907

Jens Axboe's avatar
Jens Axboe committed
908 909
	cfq_mark_cfqq_must_dispatch(cfqq);
	cfq_mark_cfqq_wait_request(cfqq);
910

911 912 913 914 915
	/*
	 * we don't want to idle for seeks, but we do want to allow
	 * fair distribution of slice time for a process doing back-to-back
	 * seeks. so allow a little bit of time for him to submit a new rq
	 */
Jens Axboe's avatar
Jens Axboe committed
916
	sl = cfqd->cfq_slice_idle;
917
	if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
918
		sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
919

920
	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
Linus Torvalds's avatar
Linus Torvalds committed
921 922
}

923 924 925
/*
 * Move request from internal lists to the request queue dispatch list.
 */
926
static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
927
{
928
	struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe's avatar
Jens Axboe committed
929
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
930

931
	cfq_remove_request(rq);
Jens Axboe's avatar
Jens Axboe committed
932
	cfqq->dispatched++;
933
	elv_dispatch_sort(q, rq);
934 935 936

	if (cfq_cfqq_sync(cfqq))
		cfqd->sync_flight++;
Linus Torvalds's avatar
Linus Torvalds committed
937 938 939 940 941
}

/*
 * return expired entry, or NULL to just start from scratch in rbtree
 */
Jens Axboe's avatar
Jens Axboe committed
942
static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
Linus Torvalds's avatar
Linus Torvalds committed
943 944
{
	struct cfq_data *cfqd = cfqq->cfqd;
945
	struct request *rq;
946
	int fifo;
Linus Torvalds's avatar
Linus Torvalds committed
947

Jens Axboe's avatar
Jens Axboe committed
948
	if (cfq_cfqq_fifo_expire(cfqq))
Linus Torvalds's avatar
Linus Torvalds committed
949
		return NULL;
950 951 952

	cfq_mark_cfqq_fifo_expire(cfqq);

953 954
	if (list_empty(&cfqq->fifo))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
955

Jens Axboe's avatar
Jens Axboe committed
956
	fifo = cfq_cfqq_sync(cfqq);
957
	rq = rq_entry_fifo(cfqq->fifo.next);
Linus Torvalds's avatar
Linus Torvalds committed
958

Jens Axboe's avatar
Jens Axboe committed
959 960
	if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
961

Jens Axboe's avatar
Jens Axboe committed
962
	return rq;
Linus Torvalds's avatar
Linus Torvalds committed
963 964
}

965 966 967 968
static inline int
cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	const int base_rq = cfqd->cfq_slice_async_rq;
Linus Torvalds's avatar
Linus Torvalds committed
969

970
	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
Linus Torvalds's avatar
Linus Torvalds committed
971

972
	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
Linus Torvalds's avatar
Linus Torvalds committed
973 974
}

975
/*
976 977
 * Select a queue for service. If we have a current active queue,
 * check whether to continue servicing it, or retrieve and set a new one.
978
 */
979
static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
Linus Torvalds's avatar
Linus Torvalds committed
980 981 982
{
	struct cfq_queue *cfqq;

983 984 985
	cfqq = cfqd->active_queue;
	if (!cfqq)
		goto new_queue;
Linus Torvalds's avatar
Linus Torvalds committed
986

987
	/*
Jens Axboe's avatar
Jens Axboe committed
988
	 * The active queue has run out of time, expire it and select new.
989
	 */
Jens Axboe's avatar
Jens Axboe committed
990
	if (cfq_slice_used(cfqq))
Jens Axboe's avatar
Jens Axboe committed
991
		goto expire;
Linus Torvalds's avatar
Linus Torvalds committed
992

993
	/*