cfq-iosched.c 53.8 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6
/*
 *  CFQ, or complete fairness queueing, disk scheduler.
 *
 *  Based on ideas from a previously unfinished io
 *  scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
 *
7
 *  Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
Linus Torvalds's avatar
Linus Torvalds committed
8 9
 */
#include <linux/module.h>
Al Viro's avatar
Al Viro committed
10 11
#include <linux/blkdev.h>
#include <linux/elevator.h>
Linus Torvalds's avatar
Linus Torvalds committed
12
#include <linux/rbtree.h>
13
#include <linux/ioprio.h>
Linus Torvalds's avatar
Linus Torvalds committed
14 15 16 17

/*
 * tunables
 */
18 19 20 21
static const int cfq_quantum = 4;		/* max queue in one round of service */
static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
static const int cfq_back_max = 16 * 1024;	/* maximum backwards seek, in KiB */
static const int cfq_back_penalty = 2;		/* penalty of a backwards seek */
Linus Torvalds's avatar
Linus Torvalds committed
22

23
static const int cfq_slice_sync = HZ / 10;
Jens Axboe's avatar
Jens Axboe committed
24
static int cfq_slice_async = HZ / 25;
25
static const int cfq_slice_async_rq = 2;
26
static int cfq_slice_idle = HZ / 125;
27

28 29 30
/*
 * grace period before allowing idle class to get disk access
 */
31
#define CFQ_IDLE_GRACE		(HZ / 10)
32 33 34 35 36 37

/*
 * below this threshold, we consider thinktime immediate
 */
#define CFQ_MIN_TT		(2)

38 39
#define CFQ_SLICE_SCALE		(5)

Jens Axboe's avatar
Jens Axboe committed
40 41
#define RQ_CIC(rq)		((struct cfq_io_context*)(rq)->elevator_private)
#define RQ_CFQQ(rq)		((rq)->elevator_private2)
Linus Torvalds's avatar
Linus Torvalds committed
42

43 44
static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool;
Linus Torvalds's avatar
Linus Torvalds committed
45

46
static DEFINE_PER_CPU(unsigned long, ioc_count);
47 48
static struct completion *ioc_gone;

49 50 51 52
#define CFQ_PRIO_LISTS		IOPRIO_BE_NR
#define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
#define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)

Jens Axboe's avatar
Jens Axboe committed
53 54 55
#define ASYNC			(0)
#define SYNC			(1)

56 57
#define sample_valid(samples)	((samples) > 80)

58 59 60 61 62 63 64 65 66 67 68 69
/*
 * Most of our rbtree usage is for sorting with min extraction, so
 * if we cache the leftmost node we don't have to walk down the tree
 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
 * move this into the elevator for the rq sorting as well.
 */
struct cfq_rb_root {
	struct rb_root rb;
	struct rb_node *left;
};
#define CFQ_RB_ROOT	(struct cfq_rb_root) { RB_ROOT, NULL, }

70 71 72
/*
 * Per block device queue structure
 */
Linus Torvalds's avatar
Linus Torvalds committed
73
struct cfq_data {
74 75 76 77 78
	request_queue_t *queue;

	/*
	 * rr list of queues with requests and the count of them
	 */
79
	struct cfq_rb_root service_tree;
80 81 82
	unsigned int busy_queues;

	int rq_in_driver;
83
	int sync_flight;
84
	int hw_tag;
Linus Torvalds's avatar
Linus Torvalds committed
85

86 87 88 89 90
	/*
	 * idle window management
	 */
	struct timer_list idle_slice_timer;
	struct work_struct unplug_work;
Linus Torvalds's avatar
Linus Torvalds committed
91

92 93 94
	struct cfq_queue *active_queue;
	struct cfq_io_context *active_cic;

95 96 97 98 99
	/*
	 * async queue for each priority case
	 */
	struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
	struct cfq_queue *async_idle_cfqq;
100

101
	struct timer_list idle_class_timer;
Linus Torvalds's avatar
Linus Torvalds committed
102

Jens Axboe's avatar
Jens Axboe committed
103
	sector_t last_position;
104
	unsigned long last_end_request;
Linus Torvalds's avatar
Linus Torvalds committed
105 106 107 108 109

	/*
	 * tunables, see top of file
	 */
	unsigned int cfq_quantum;
110
	unsigned int cfq_fifo_expire[2];
Linus Torvalds's avatar
Linus Torvalds committed
111 112
	unsigned int cfq_back_penalty;
	unsigned int cfq_back_max;
113 114 115
	unsigned int cfq_slice[2];
	unsigned int cfq_slice_async_rq;
	unsigned int cfq_slice_idle;
116 117

	struct list_head cic_list;
Linus Torvalds's avatar
Linus Torvalds committed
118 119
};

120 121 122
/*
 * Per process-grouping structure
 */
Linus Torvalds's avatar
Linus Torvalds committed
123 124 125 126 127
struct cfq_queue {
	/* reference count */
	atomic_t ref;
	/* parent cfq_data */
	struct cfq_data *cfqd;
128 129 130 131
	/* service_tree member */
	struct rb_node rb_node;
	/* service_tree key */
	unsigned long rb_key;
Linus Torvalds's avatar
Linus Torvalds committed
132 133 134
	/* sorted list of pending requests */
	struct rb_root sort_list;
	/* if fifo isn't expired, next request to serve */
Jens Axboe's avatar
Jens Axboe committed
135
	struct request *next_rq;
Linus Torvalds's avatar
Linus Torvalds committed
136 137 138 139
	/* requests queued in sort_list */
	int queued[2];
	/* currently allocated requests */
	int allocated[2];
140 141
	/* pending metadata requests */
	int meta_pending;
Linus Torvalds's avatar
Linus Torvalds committed
142
	/* fifo list of requests in sort_list */
143
	struct list_head fifo;
Linus Torvalds's avatar
Linus Torvalds committed
144

145
	unsigned long slice_end;
146
	long slice_resid;
Linus Torvalds's avatar
Linus Torvalds committed
147

Jens Axboe's avatar
Jens Axboe committed
148 149
	/* number of requests that are on the dispatch list or inside driver */
	int dispatched;
150 151 152 153 154

	/* io prio of this group */
	unsigned short ioprio, org_ioprio;
	unsigned short ioprio_class, org_ioprio_class;

Jens Axboe's avatar
Jens Axboe committed
155 156
	/* various state flags, see below */
	unsigned int flags;
Linus Torvalds's avatar
Linus Torvalds committed
157 158
};

Jens Axboe's avatar
Jens Axboe committed
159
enum cfqq_state_flags {
160 161 162 163 164 165 166 167 168
	CFQ_CFQQ_FLAG_on_rr = 0,	/* on round-robin busy list */
	CFQ_CFQQ_FLAG_wait_request,	/* waiting for a request */
	CFQ_CFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
	CFQ_CFQQ_FLAG_must_alloc_slice,	/* per-slice must_alloc flag */
	CFQ_CFQQ_FLAG_must_dispatch,	/* must dispatch, even if expired */
	CFQ_CFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
	CFQ_CFQQ_FLAG_idle_window,	/* slice idling enabled */
	CFQ_CFQQ_FLAG_prio_changed,	/* task priority has changed */
	CFQ_CFQQ_FLAG_queue_new,	/* queue never been serviced */
169
	CFQ_CFQQ_FLAG_slice_new,	/* no requests dispatched in slice */
170
	CFQ_CFQQ_FLAG_sync,		/* synchronous queue */
Jens Axboe's avatar
Jens Axboe committed
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
};

#define CFQ_CFQQ_FNS(name)						\
static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq)		\
{									\
	cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name);			\
}									\
static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq)	\
{									\
	cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name);			\
}									\
static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq)		\
{									\
	return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0;	\
}

CFQ_CFQQ_FNS(on_rr);
CFQ_CFQQ_FNS(wait_request);
CFQ_CFQQ_FNS(must_alloc);
CFQ_CFQQ_FNS(must_alloc_slice);
CFQ_CFQQ_FNS(must_dispatch);
CFQ_CFQQ_FNS(fifo_expire);
CFQ_CFQQ_FNS(idle_window);
CFQ_CFQQ_FNS(prio_changed);
195
CFQ_CFQQ_FNS(queue_new);
196
CFQ_CFQQ_FNS(slice_new);
197
CFQ_CFQQ_FNS(sync);
Jens Axboe's avatar
Jens Axboe committed
198 199
#undef CFQ_CFQQ_FNS

Jens Axboe's avatar
Jens Axboe committed
200
static void cfq_dispatch_insert(request_queue_t *, struct request *);
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
				       struct task_struct *, gfp_t);
static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *,
						struct io_context *);

static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
					    int is_sync)
{
	return cic->cfqq[!!is_sync];
}

static inline void cic_set_cfqq(struct cfq_io_context *cic,
				struct cfq_queue *cfqq, int is_sync)
{
	cic->cfqq[!!is_sync] = cfqq;
}

/*
 * We regard a request as SYNC, if it's either a read or has the SYNC bit
 * set (in which case it could also be direct WRITE).
 */
static inline int cfq_bio_sync(struct bio *bio)
{
	if (bio_data_dir(bio) == READ || bio_sync(bio))
		return 1;

	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
229

Andrew Morton's avatar
Andrew Morton committed
230 231 232 233 234 235
/*
 * scheduler run of queue, if there are requests pending and no one in the
 * driver that will restart queueing
 */
static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
{
236
	if (cfqd->busy_queues)
Andrew Morton's avatar
Andrew Morton committed
237 238 239 240 241 242 243
		kblockd_schedule_work(&cfqd->unplug_work);
}

static int cfq_queue_empty(request_queue_t *q)
{
	struct cfq_data *cfqd = q->elevator->elevator_data;

244
	return !cfqd->busy_queues;
Andrew Morton's avatar
Andrew Morton committed
245 246
}

247 248 249 250 251
/*
 * Scale schedule slice based on io priority. Use the sync time slice only
 * if a queue is marked sync and has sync io queued. A sync queue with async
 * io only, should not get full sync slice length.
 */
252 253
static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync,
				 unsigned short prio)
254
{
255
	const int base_slice = cfqd->cfq_slice[sync];
256

257 258 259 260
	WARN_ON(prio >= IOPRIO_BE_NR);

	return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
}
261

262 263 264 265
static inline int
cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
}

static inline void
cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
}

/*
 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end
 * isn't valid until the first request from the dispatch is activated
 * and the slice time set.
 */
static inline int cfq_slice_used(struct cfq_queue *cfqq)
{
	if (cfq_cfqq_slice_new(cfqq))
		return 0;
	if (time_before(jiffies, cfqq->slice_end))
		return 0;

	return 1;
}

Linus Torvalds's avatar
Linus Torvalds committed
289
/*
Jens Axboe's avatar
Jens Axboe committed
290
 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
Linus Torvalds's avatar
Linus Torvalds committed
291
 * We choose the request that is closest to the head right now. Distance
292
 * behind the head is penalized and only allowed to a certain extent.
Linus Torvalds's avatar
Linus Torvalds committed
293
 */
Jens Axboe's avatar
Jens Axboe committed
294 295
static struct request *
cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
Linus Torvalds's avatar
Linus Torvalds committed
296 297 298
{
	sector_t last, s1, s2, d1 = 0, d2 = 0;
	unsigned long back_max;
299 300 301
#define CFQ_RQ1_WRAP	0x01 /* request 1 wraps */
#define CFQ_RQ2_WRAP	0x02 /* request 2 wraps */
	unsigned wrap = 0; /* bit mask: requests behind the disk head? */
Linus Torvalds's avatar
Linus Torvalds committed
302

Jens Axboe's avatar
Jens Axboe committed
303 304 305 306
	if (rq1 == NULL || rq1 == rq2)
		return rq2;
	if (rq2 == NULL)
		return rq1;
307

Jens Axboe's avatar
Jens Axboe committed
308 309 310 311
	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
		return rq1;
	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
		return rq2;
312 313 314 315
	if (rq_is_meta(rq1) && !rq_is_meta(rq2))
		return rq1;
	else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
		return rq2;
Linus Torvalds's avatar
Linus Torvalds committed
316

Jens Axboe's avatar
Jens Axboe committed
317 318
	s1 = rq1->sector;
	s2 = rq2->sector;
Linus Torvalds's avatar
Linus Torvalds committed
319

Jens Axboe's avatar
Jens Axboe committed
320
	last = cfqd->last_position;
Linus Torvalds's avatar
Linus Torvalds committed
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336

	/*
	 * by definition, 1KiB is 2 sectors
	 */
	back_max = cfqd->cfq_back_max * 2;

	/*
	 * Strict one way elevator _except_ in the case where we allow
	 * short backward seeks which are biased as twice the cost of a
	 * similar forward seek.
	 */
	if (s1 >= last)
		d1 = s1 - last;
	else if (s1 + back_max >= last)
		d1 = (last - s1) * cfqd->cfq_back_penalty;
	else
337
		wrap |= CFQ_RQ1_WRAP;
Linus Torvalds's avatar
Linus Torvalds committed
338 339 340 341 342 343

	if (s2 >= last)
		d2 = s2 - last;
	else if (s2 + back_max >= last)
		d2 = (last - s2) * cfqd->cfq_back_penalty;
	else
344
		wrap |= CFQ_RQ2_WRAP;
Linus Torvalds's avatar
Linus Torvalds committed
345 346

	/* Found required data */
347 348 349 350 351 352

	/*
	 * By doing switch() on the bit mask "wrap" we avoid having to
	 * check two variables for all permutations: --> faster!
	 */
	switch (wrap) {
Jens Axboe's avatar
Jens Axboe committed
353
	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
354
		if (d1 < d2)
Jens Axboe's avatar
Jens Axboe committed
355
			return rq1;
356
		else if (d2 < d1)
Jens Axboe's avatar
Jens Axboe committed
357
			return rq2;
358 359
		else {
			if (s1 >= s2)
Jens Axboe's avatar
Jens Axboe committed
360
				return rq1;
361
			else
Jens Axboe's avatar
Jens Axboe committed
362
				return rq2;
363
		}
Linus Torvalds's avatar
Linus Torvalds committed
364

365
	case CFQ_RQ2_WRAP:
Jens Axboe's avatar
Jens Axboe committed
366
		return rq1;
367
	case CFQ_RQ1_WRAP:
Jens Axboe's avatar
Jens Axboe committed
368 369
		return rq2;
	case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
370 371 372 373 374 375 376 377
	default:
		/*
		 * Since both rqs are wrapped,
		 * start with the one that's further behind head
		 * (--> only *one* back seek required),
		 * since back seek takes more time than forward.
		 */
		if (s1 <= s2)
Jens Axboe's avatar
Jens Axboe committed
378
			return rq1;
Linus Torvalds's avatar
Linus Torvalds committed
379
		else
Jens Axboe's avatar
Jens Axboe committed
380
			return rq2;
Linus Torvalds's avatar
Linus Torvalds committed
381 382 383
	}
}

384 385 386
/*
 * The below is leftmost cache rbtree addon
 */
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
static struct rb_node *cfq_rb_first(struct cfq_rb_root *root)
{
	if (!root->left)
		root->left = rb_first(&root->rb);

	return root->left;
}

static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
{
	if (root->left == n)
		root->left = NULL;

	rb_erase(n, &root->rb);
	RB_CLEAR_NODE(n);
}

Linus Torvalds's avatar
Linus Torvalds committed
404 405 406
/*
 * would be nice to take fifo expire time into account as well
 */
Jens Axboe's avatar
Jens Axboe committed
407 408 409
static struct request *
cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
		  struct request *last)
Linus Torvalds's avatar
Linus Torvalds committed
410
{
411 412
	struct rb_node *rbnext = rb_next(&last->rb_node);
	struct rb_node *rbprev = rb_prev(&last->rb_node);
Jens Axboe's avatar
Jens Axboe committed
413
	struct request *next = NULL, *prev = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
414

415
	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
Linus Torvalds's avatar
Linus Torvalds committed
416 417

	if (rbprev)
Jens Axboe's avatar
Jens Axboe committed
418
		prev = rb_entry_rq(rbprev);
Linus Torvalds's avatar
Linus Torvalds committed
419

420
	if (rbnext)
Jens Axboe's avatar
Jens Axboe committed
421
		next = rb_entry_rq(rbnext);
422 423 424
	else {
		rbnext = rb_first(&cfqq->sort_list);
		if (rbnext && rbnext != &last->rb_node)
Jens Axboe's avatar
Jens Axboe committed
425
			next = rb_entry_rq(rbnext);
426
	}
Linus Torvalds's avatar
Linus Torvalds committed
427

428
	return cfq_choose_req(cfqd, next, prev);
Linus Torvalds's avatar
Linus Torvalds committed
429 430
}

431 432
static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
				      struct cfq_queue *cfqq)
Linus Torvalds's avatar
Linus Torvalds committed
433
{
434 435 436
	/*
	 * just an approximation, should be ok.
	 */
437 438
	return (cfqd->busy_queues - 1) * (cfq_prio_slice(cfqd, 1, 0) -
		       cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
439 440
}

441 442 443 444 445
/*
 * The cfqd->service_tree holds all pending cfq_queue's that have
 * requests waiting to be processed. It is sorted in the order that
 * we will service the queues.
 */
446
static void cfq_service_tree_add(struct cfq_data *cfqd,
447
				    struct cfq_queue *cfqq, int add_front)
448
{
449
	struct rb_node **p = &cfqd->service_tree.rb.rb_node;
450 451
	struct rb_node *parent = NULL;
	unsigned long rb_key;
452
	int left;
453

454 455 456 457 458 459
	if (!add_front) {
		rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
		rb_key += cfqq->slice_resid;
		cfqq->slice_resid = 0;
	} else
		rb_key = 0;
Linus Torvalds's avatar
Linus Torvalds committed
460

461
	if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
462
		/*
463
		 * same position, nothing more to do
464
		 */
465 466
		if (rb_key == cfqq->rb_key)
			return;
Linus Torvalds's avatar
Linus Torvalds committed
467

468
		cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
Linus Torvalds's avatar
Linus Torvalds committed
469
	}
470

471
	left = 1;
472
	while (*p) {
473
		struct cfq_queue *__cfqq;
474
		struct rb_node **n;
475

476 477 478
		parent = *p;
		__cfqq = rb_entry(parent, struct cfq_queue, rb_node);

479 480
		/*
		 * sort RT queues first, we always want to give
481 482
		 * preference to them. IDLE queues goes to the back.
		 * after that, sort on the next service time.
483 484
		 */
		if (cfq_class_rt(cfqq) > cfq_class_rt(__cfqq))
485
			n = &(*p)->rb_left;
486
		else if (cfq_class_rt(cfqq) < cfq_class_rt(__cfqq))
487 488 489 490 491
			n = &(*p)->rb_right;
		else if (cfq_class_idle(cfqq) < cfq_class_idle(__cfqq))
			n = &(*p)->rb_left;
		else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq))
			n = &(*p)->rb_right;
492
		else if (rb_key < __cfqq->rb_key)
493 494 495 496 497
			n = &(*p)->rb_left;
		else
			n = &(*p)->rb_right;

		if (n == &(*p)->rb_right)
498
			left = 0;
499 500

		p = n;
501 502
	}

503 504 505
	if (left)
		cfqd->service_tree.left = &cfqq->rb_node;

506 507
	cfqq->rb_key = rb_key;
	rb_link_node(&cfqq->rb_node, parent, p);
508
	rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
Linus Torvalds's avatar
Linus Torvalds committed
509 510
}

511 512 513
/*
 * Update cfqq's position in the service tree.
 */
514
static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Jens Axboe's avatar
Jens Axboe committed
515 516 517 518
{
	/*
	 * Resorting requires the cfqq to be on the RR list already.
	 */
519
	if (cfq_cfqq_on_rr(cfqq))
520
		cfq_service_tree_add(cfqd, cfqq, 0);
Jens Axboe's avatar
Jens Axboe committed
521 522
}

Linus Torvalds's avatar
Linus Torvalds committed
523 524
/*
 * add to busy list of queues for service, trying to be fair in ordering
525
 * the pending list according to last request service
Linus Torvalds's avatar
Linus Torvalds committed
526 527
 */
static inline void
528
cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Linus Torvalds's avatar
Linus Torvalds committed
529
{
Jens Axboe's avatar
Jens Axboe committed
530 531
	BUG_ON(cfq_cfqq_on_rr(cfqq));
	cfq_mark_cfqq_on_rr(cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
532 533
	cfqd->busy_queues++;

534
	cfq_resort_rr_list(cfqd, cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
535 536
}

537 538 539 540
/*
 * Called when the cfqq no longer has requests pending, remove it from
 * the service tree.
 */
Linus Torvalds's avatar
Linus Torvalds committed
541 542 543
static inline void
cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
Jens Axboe's avatar
Jens Axboe committed
544 545
	BUG_ON(!cfq_cfqq_on_rr(cfqq));
	cfq_clear_cfqq_on_rr(cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
546

547 548
	if (!RB_EMPTY_NODE(&cfqq->rb_node))
		cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
549

Linus Torvalds's avatar
Linus Torvalds committed
550 551 552 553 554 555 556
	BUG_ON(!cfqd->busy_queues);
	cfqd->busy_queues--;
}

/*
 * rb tree support functions
 */
Jens Axboe's avatar
Jens Axboe committed
557
static inline void cfq_del_rq_rb(struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
558
{
Jens Axboe's avatar
Jens Axboe committed
559
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
560
	struct cfq_data *cfqd = cfqq->cfqd;
Jens Axboe's avatar
Jens Axboe committed
561
	const int sync = rq_is_sync(rq);
Linus Torvalds's avatar
Linus Torvalds committed
562

563 564
	BUG_ON(!cfqq->queued[sync]);
	cfqq->queued[sync]--;
Linus Torvalds's avatar
Linus Torvalds committed
565

Jens Axboe's avatar
Jens Axboe committed
566
	elv_rb_del(&cfqq->sort_list, rq);
Linus Torvalds's avatar
Linus Torvalds committed
567

568
	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
569
		cfq_del_cfqq_rr(cfqd, cfqq);
Linus Torvalds's avatar
Linus Torvalds committed
570 571
}

Jens Axboe's avatar
Jens Axboe committed
572
static void cfq_add_rq_rb(struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
573
{
Jens Axboe's avatar
Jens Axboe committed
574
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
Linus Torvalds's avatar
Linus Torvalds committed
575
	struct cfq_data *cfqd = cfqq->cfqd;
576
	struct request *__alias;
Linus Torvalds's avatar
Linus Torvalds committed
577

578
	cfqq->queued[rq_is_sync(rq)]++;
Linus Torvalds's avatar
Linus Torvalds committed
579 580 581 582 583

	/*
	 * looks a little odd, but the first insert might return an alias.
	 * if that happens, put the alias on the dispatch list
	 */
584
	while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
Jens Axboe's avatar
Jens Axboe committed
585
		cfq_dispatch_insert(cfqd->queue, __alias);
586 587 588

	if (!cfq_cfqq_on_rr(cfqq))
		cfq_add_cfqq_rr(cfqd, cfqq);
589 590 591 592 593 594

	/*
	 * check if this request is a better next-serve candidate
	 */
	cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
	BUG_ON(!cfqq->next_rq);
Linus Torvalds's avatar
Linus Torvalds committed
595 596 597
}

static inline void
Jens Axboe's avatar
Jens Axboe committed
598
cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
599
{
600 601
	elv_rb_del(&cfqq->sort_list, rq);
	cfqq->queued[rq_is_sync(rq)]--;
Jens Axboe's avatar
Jens Axboe committed
602
	cfq_add_rq_rb(rq);
Linus Torvalds's avatar
Linus Torvalds committed
603 604
}

605 606
static struct request *
cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
607
{
608
	struct task_struct *tsk = current;
609
	struct cfq_io_context *cic;
610
	struct cfq_queue *cfqq;
Linus Torvalds's avatar
Linus Torvalds committed
611

612 613 614 615 616
	cic = cfq_cic_rb_lookup(cfqd, tsk->io_context);
	if (!cic)
		return NULL;

	cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
617 618 619
	if (cfqq) {
		sector_t sector = bio->bi_sector + bio_sectors(bio);

620
		return elv_rb_find(&cfqq->sort_list, sector);
621
	}
Linus Torvalds's avatar
Linus Torvalds committed
622 623 624 625

	return NULL;
}

626
static void cfq_activate_request(request_queue_t *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
627
{
628
	struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe's avatar
Jens Axboe committed
629

630
	cfqd->rq_in_driver++;
631 632 633 634 635 636 637 638 639

	/*
	 * If the depth is larger 1, it really could be queueing. But lets
	 * make the mark a little higher - idling could still be good for
	 * low queueing, and a low queueing number could also just indicate
	 * a SCSI mid layer like behaviour where limit+1 is often seen.
	 */
	if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
		cfqd->hw_tag = 1;
Jens Axboe's avatar
Jens Axboe committed
640 641

	cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
Linus Torvalds's avatar
Linus Torvalds committed
642 643
}

644
static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
645
{
646 647 648 649
	struct cfq_data *cfqd = q->elevator->elevator_data;

	WARN_ON(!cfqd->rq_in_driver);
	cfqd->rq_in_driver--;
Linus Torvalds's avatar
Linus Torvalds committed
650 651
}

652
static void cfq_remove_request(struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
653
{
Jens Axboe's avatar
Jens Axboe committed
654
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
655

Jens Axboe's avatar
Jens Axboe committed
656 657
	if (cfqq->next_rq == rq)
		cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
Linus Torvalds's avatar
Linus Torvalds committed
658

659
	list_del_init(&rq->queuelist);
Jens Axboe's avatar
Jens Axboe committed
660
	cfq_del_rq_rb(rq);
661 662 663 664 665

	if (rq_is_meta(rq)) {
		WARN_ON(!cfqq->meta_pending);
		cfqq->meta_pending--;
	}
Linus Torvalds's avatar
Linus Torvalds committed
666 667
}

668
static int cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
669 670 671 672
{
	struct cfq_data *cfqd = q->elevator->elevator_data;
	struct request *__rq;

673
	__rq = cfq_find_rq_fmerge(cfqd, bio);
674
	if (__rq && elv_rq_merge_ok(__rq, bio)) {
675 676
		*req = __rq;
		return ELEVATOR_FRONT_MERGE;
Linus Torvalds's avatar
Linus Torvalds committed
677 678 679 680 681
	}

	return ELEVATOR_NO_MERGE;
}

682 683
static void cfq_merged_request(request_queue_t *q, struct request *req,
			       int type)
Linus Torvalds's avatar
Linus Torvalds committed
684
{
685
	if (type == ELEVATOR_FRONT_MERGE) {
Jens Axboe's avatar
Jens Axboe committed
686
		struct cfq_queue *cfqq = RQ_CFQQ(req);
Linus Torvalds's avatar
Linus Torvalds committed
687

Jens Axboe's avatar
Jens Axboe committed
688
		cfq_reposition_rq_rb(cfqq, req);
Linus Torvalds's avatar
Linus Torvalds committed
689 690 691 692 693 694 695
	}
}

static void
cfq_merged_requests(request_queue_t *q, struct request *rq,
		    struct request *next)
{
696 697 698 699 700 701 702
	/*
	 * reposition in fifo if next is older than rq
	 */
	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
	    time_before(next->start_time, rq->start_time))
		list_move(&rq->queuelist, &next->queuelist);

703
	cfq_remove_request(next);
704 705
}

706 707 708 709
static int cfq_allow_merge(request_queue_t *q, struct request *rq,
			   struct bio *bio)
{
	struct cfq_data *cfqd = q->elevator->elevator_data;
710
	struct cfq_io_context *cic;
711 712 713
	struct cfq_queue *cfqq;

	/*
714
	 * Disallow merge of a sync bio into an async request.
715
	 */
716
	if (cfq_bio_sync(bio) && !rq_is_sync(rq))
717 718 719
		return 0;

	/*
720 721
	 * Lookup the cfqq that this bio will be queued with. Allow
	 * merge only if rq is queued there.
722
	 */
723 724 725
	cic = cfq_cic_rb_lookup(cfqd, current->io_context);
	if (!cic)
		return 0;
726

727
	cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
728 729
	if (cfqq == RQ_CFQQ(rq))
		return 1;
730

731
	return 0;
732 733
}

734 735 736 737 738 739 740 741 742 743
static inline void
__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	if (cfqq) {
		/*
		 * stop potential idle class queues waiting service
		 */
		del_timer(&cfqd->idle_class_timer);

		cfqq->slice_end = 0;
Jens Axboe's avatar
Jens Axboe committed
744 745
		cfq_clear_cfqq_must_alloc_slice(cfqq);
		cfq_clear_cfqq_fifo_expire(cfqq);
746
		cfq_mark_cfqq_slice_new(cfqq);
Jens Axboe's avatar
Jens Axboe committed
747
		cfq_clear_cfqq_queue_new(cfqq);
748 749 750 751 752
	}

	cfqd->active_queue = cfqq;
}

753 754 755 756 757
/*
 * current cfqq expired its slice (or was too idle), select new one
 */
static void
__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
758
		    int timed_out)
759 760 761 762 763 764 765 766
{
	if (cfq_cfqq_wait_request(cfqq))
		del_timer(&cfqd->idle_slice_timer);

	cfq_clear_cfqq_must_dispatch(cfqq);
	cfq_clear_cfqq_wait_request(cfqq);

	/*
767
	 * store what was left of this slice, if the queue idled/timed out
768
	 */
769
	if (timed_out && !cfq_cfqq_slice_new(cfqq))
770
		cfqq->slice_resid = cfqq->slice_end - jiffies;
771

772
	cfq_resort_rr_list(cfqd, cfqq);
773 774 775 776 777 778 779 780 781 782

	if (cfqq == cfqd->active_queue)
		cfqd->active_queue = NULL;

	if (cfqd->active_cic) {
		put_io_context(cfqd->active_cic->ioc);
		cfqd->active_cic = NULL;
	}
}

783
static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out)
784 785 786 787
{
	struct cfq_queue *cfqq = cfqd->active_queue;

	if (cfqq)
788
		__cfq_slice_expired(cfqd, cfqq, timed_out);
789 790
}

791 792 793 794
/*
 * Get next queue for service. Unless we have a queue preemption,
 * we'll simply select the first cfqq in the service tree.
 */
Jens Axboe's avatar
Jens Axboe committed
795
static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
796
{
797 798
	struct cfq_queue *cfqq;
	struct rb_node *n;
799

800 801
	if (RB_EMPTY_ROOT(&cfqd->service_tree.rb))
		return NULL;
802

803 804
	n = cfq_rb_first(&cfqd->service_tree);
	cfqq = rb_entry(n, struct cfq_queue, rb_node);
805

806 807 808 809 810 811 812 813 814 815 816 817 818
	if (cfq_class_idle(cfqq)) {
		unsigned long end;

		/*
		 * if we have idle queues and no rt or be queues had
		 * pending requests, either allow immediate service if
		 * the grace period has passed or arm the idle grace
		 * timer
		 */
		end = cfqd->last_end_request + CFQ_IDLE_GRACE;
		if (time_before(jiffies, end)) {
			mod_timer(&cfqd->idle_class_timer, end);
			cfqq = NULL;
819
		}
820 821
	}

Jens Axboe's avatar
Jens Axboe committed
822 823 824
	return cfqq;
}

825 826 827
/*
 * Get and set a new active queue for service.
 */
Jens Axboe's avatar
Jens Axboe committed
828 829 830 831
static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
{
	struct cfq_queue *cfqq;

832
	cfqq = cfq_get_next_queue(cfqd);
833
	__cfq_set_active_queue(cfqd, cfqq);
Jens Axboe's avatar
Jens Axboe committed
834
	return cfqq;
835 836
}

837 838 839 840 841 842 843 844 845
static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
					  struct request *rq)
{
	if (rq->sector >= cfqd->last_position)
		return rq->sector - cfqd->last_position;
	else
		return cfqd->last_position - rq->sector;
}

Jens Axboe's avatar
Jens Axboe committed
846 847 848 849 850 851 852 853 854 855
static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
{
	struct cfq_io_context *cic = cfqd->active_cic;

	if (!sample_valid(cic->seek_samples))
		return 0;

	return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean;
}

856 857
static int cfq_close_cooperator(struct cfq_data *cfq_data,
				struct cfq_queue *cfqq)
Jens Axboe's avatar
Jens Axboe committed
858 859
{
	/*
860 861 862
	 * We should notice if some of the queues are cooperating, eg
	 * working closely on the same area of the disk. In that case,
	 * we can group them together and don't waste time idling.
Jens Axboe's avatar
Jens Axboe committed
863
	 */
864
	return 0;
Jens Axboe's avatar
Jens Axboe committed
865 866 867
}

#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024))
868

Jens Axboe's avatar
Jens Axboe committed
869
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
870
{
871
	struct cfq_queue *cfqq = cfqd->active_queue;
872
	struct cfq_io_context *cic;
873 874
	unsigned long sl;

875
	WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
Jens Axboe's avatar
Jens Axboe committed
876
	WARN_ON(cfq_cfqq_slice_new(cfqq));
877 878 879 880

	/*
	 * idle is disabled, either manually or by past process history
	 */
Jens Axboe's avatar
Jens Axboe committed
881 882 883
	if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq))
		return;

884 885 886
	/*
	 * task has exited, don't wait
	 */
887 888
	cic = cfqd->active_cic;
	if (!cic || !cic->ioc->task)
Jens Axboe's avatar
Jens Axboe committed
889 890 891 892 893
		return;

	/*
	 * See if this prio level has a good candidate
	 */
Jens Axboe's avatar
Jens Axboe committed
894 895
	if (cfq_close_cooperator(cfqd, cfqq) &&
	    (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
Jens Axboe's avatar
Jens Axboe committed
896
		return;
897

Jens Axboe's avatar
Jens Axboe committed
898 899
	cfq_mark_cfqq_must_dispatch(cfqq);
	cfq_mark_cfqq_wait_request(cfqq);
900

901 902 903 904 905
	/*
	 * we don't want to idle for seeks, but we do want to allow
	 * fair distribution of slice time for a process doing back-to-back
	 * seeks. so allow a little bit of time for him to submit a new rq
	 */
Jens Axboe's avatar
Jens Axboe committed
906
	sl = cfqd->cfq_slice_idle;
907
	if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
908
		sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
909

910
	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
Linus Torvalds's avatar
Linus Torvalds committed
911 912
}

913 914 915
/*
 * Move request from internal lists to the request queue dispatch list.
 */
Jens Axboe's avatar
Jens Axboe committed
916
static void cfq_dispatch_insert(request_queue_t *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
917
{
918
	struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe's avatar
Jens Axboe committed
919
	struct cfq_queue *cfqq = RQ_CFQQ(rq);
920

921
	cfq_remove_request(rq);
Jens Axboe's avatar
Jens Axboe committed
922
	cfqq->dispatched++;
923
	elv_dispatch_sort(q, rq);
924 925 926

	if (cfq_cfqq_sync(cfqq))
		cfqd->sync_flight++;
Linus Torvalds's avatar
Linus Torvalds committed
927 928 929 930 931
}

/*
 * return expired entry, or NULL to just start from scratch in rbtree
 */
Jens Axboe's avatar
Jens Axboe committed
932
static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
Linus Torvalds's avatar
Linus Torvalds committed
933 934
{
	struct cfq_data *cfqd = cfqq->cfqd;
935
	struct request *rq;
936
	int fifo;
Linus Torvalds's avatar
Linus Torvalds committed
937

Jens Axboe's avatar
Jens Axboe committed
938
	if (cfq_cfqq_fifo_expire(cfqq))
Linus Torvalds's avatar
Linus Torvalds committed
939
		return NULL;
940 941 942

	cfq_mark_cfqq_fifo_expire(cfqq);

943 944
	if (list_empty(&cfqq->fifo))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
945

Jens Axboe's avatar
Jens Axboe committed
946
	fifo = cfq_cfqq_sync(cfqq);
947
	rq = rq_entry_fifo(cfqq->fifo.next);
Linus Torvalds's avatar
Linus Torvalds committed
948

Jens Axboe's avatar
Jens Axboe committed
949 950
	if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
951

Jens Axboe's avatar
Jens Axboe committed
952
	return rq;
Linus Torvalds's avatar
Linus Torvalds committed
953 954
}

955 956 957 958
static inline int
cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
	const int base_rq = cfqd->cfq_slice_async_rq;
Linus Torvalds's avatar
Linus Torvalds committed
959

960
	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
Linus Torvalds's avatar
Linus Torvalds committed
961

962
	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
Linus Torvalds's avatar
Linus Torvalds committed
963 964
}

965
/*
966 967
 * Select a queue for service. If we have a current active queue,
 * check whether to continue servicing it, or retrieve and set a new one.
968
 */
969
static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
Linus Torvalds's avatar
Linus Torvalds committed
970 971 972
{
	struct cfq_queue *cfqq;

973 974 975
	cfqq = cfqd->active_queue;
	if (!cfqq)
		goto new_queue;
Linus Torvalds's avatar
Linus Torvalds committed
976

977
	/*
Jens Axboe's avatar
Jens Axboe committed
978
	 * The active queue has run out of time, expire it and select new.
979
	 */
Jens Axboe's avatar
Jens Axboe committed
980
	if (cfq_slice_used(cfqq))
Jens Axboe's avatar
Jens Axboe committed
981
		goto expire;
Linus Torvalds's avatar
Linus Torvalds committed
982

983
	/*
Jens Axboe's avatar
Jens Axboe committed
984 985
	 * The active queue has requests and isn't expired, allow it to
	 * dispatch.
986
	 */
987
	if (!RB_EMPTY_ROOT(&cfqq->sort_list))
988
		goto keep_queue;
Jens Axboe's avatar
Jens Axboe committed
989 990 991 992 993 994

	/*
	 * No requests pending. If the active queue still has requests in
	 * flight or is idling for a new request, allow either of these
	 * conditions to happen (or time out) before selecting a new queue.
	 */
995 996
	if (timer_pending(&cfqd->idle_slice_timer) ||
	    (cfqq->dispatched && cfq_cfqq_idle_window(cfqq))) {
997 998
		cfqq = NULL;
		goto keep_queue;
999 1000
	}

Jens Axboe's avatar
Jens Axboe committed
1001
expire:
1002
	cfq_slice_expired(cfqd, 0);
Jens Axboe's avatar
Jens Axboe committed
1003 1004
new_queue:
	cfqq = cfq_set_active_queue(cfqd);
1005
keep_queue: