blk-ioc.c 10.4 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 * Functions related to io context handling
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
9
#include <linux/slab.h>
10 11 12 13 14 15 16 17

#include "blk.h"

/*
 * For io context allocations
 */
static struct kmem_cache *iocontext_cachep;

18 19 20 21 22 23 24 25 26 27 28 29 30
/**
 * get_io_context - increment reference count to io_context
 * @ioc: io_context to get
 *
 * Increment reference count to @ioc.
 */
void get_io_context(struct io_context *ioc)
{
	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
	atomic_long_inc(&ioc->refcount);
}
EXPORT_SYMBOL(get_io_context);

31 32 33 34 35 36 37
static void icq_free_icq_rcu(struct rcu_head *head)
{
	struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);

	kmem_cache_free(icq->__rcu_icq_cache, icq);
}

38
/* Exit an icq. Called with both ioc and q locked. */
39
static void ioc_exit_icq(struct io_cq *icq)
40 41 42 43 44 45
{
	struct elevator_type *et = icq->q->elevator->type;

	if (icq->flags & ICQ_EXITED)
		return;

46 47 48
	if (et->uses_mq && et->ops.mq.exit_icq)
		et->ops.mq.exit_icq(icq);
	else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
49
		et->ops.sq.elevator_exit_icq_fn(icq);
50 51 52 53 54 55

	icq->flags |= ICQ_EXITED;
}

/* Release an icq.  Called with both ioc and q locked. */
static void ioc_destroy_icq(struct io_cq *icq)
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
{
	struct io_context *ioc = icq->ioc;
	struct request_queue *q = icq->q;
	struct elevator_type *et = q->elevator->type;

	lockdep_assert_held(&ioc->lock);
	lockdep_assert_held(q->queue_lock);

	radix_tree_delete(&ioc->icq_tree, icq->q->id);
	hlist_del_init(&icq->ioc_node);
	list_del_init(&icq->q_node);

	/*
	 * Both setting lookup hint to and clearing it from @icq are done
	 * under queue_lock.  If it's not pointing to @icq now, it never
	 * will.  Hint assignment itself can race safely.
	 */
73
	if (rcu_access_pointer(ioc->icq_hint) == icq)
74 75
		rcu_assign_pointer(ioc->icq_hint, NULL);

76
	ioc_exit_icq(icq);
77 78 79 80 81 82 83 84 85

	/*
	 * @icq->q might have gone away by the time RCU callback runs
	 * making it impossible to determine icq_cache.  Record it in @icq.
	 */
	icq->__rcu_icq_cache = et->icq_cache;
	call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
}

86 87
/*
 * Slow path for ioc release in put_io_context().  Performs double-lock
88
 * dancing to unlink all icq's and then frees ioc.
89 90
 */
static void ioc_release_fn(struct work_struct *work)
91
{
92 93
	struct io_context *ioc = container_of(work, struct io_context,
					      release_work);
94
	unsigned long flags;
95

96 97 98 99 100 101 102
	/*
	 * Exiting icq may call into put_io_context() through elevator
	 * which will trigger lockdep warning.  The ioc's are guaranteed to
	 * be different, use a different locking subclass here.  Use
	 * irqsave variant as there's no spin_lock_irq_nested().
	 */
	spin_lock_irqsave_nested(&ioc->lock, flags, 1);
103

104 105 106
	while (!hlist_empty(&ioc->icq_list)) {
		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
						struct io_cq, ioc_node);
107 108 109
		struct request_queue *q = icq->q;

		if (spin_trylock(q->queue_lock)) {
110
			ioc_destroy_icq(icq);
111 112 113 114 115
			spin_unlock(q->queue_lock);
		} else {
			spin_unlock_irqrestore(&ioc->lock, flags);
			cpu_relax();
			spin_lock_irqsave_nested(&ioc->lock, flags, 1);
116 117
		}
	}
118

119
	spin_unlock_irqrestore(&ioc->lock, flags);
120 121

	kmem_cache_free(iocontext_cachep, ioc);
122 123
}

Tejun Heo's avatar
Tejun Heo committed
124 125 126 127 128
/**
 * put_io_context - put a reference of io_context
 * @ioc: io_context to put
 *
 * Decrement reference count of @ioc and release it if the count reaches
129
 * zero.
130
 */
131
void put_io_context(struct io_context *ioc)
132
{
133
	unsigned long flags;
134
	bool free_ioc = false;
135

136
	if (ioc == NULL)
Tejun Heo's avatar
Tejun Heo committed
137
		return;
138

Tejun Heo's avatar
Tejun Heo committed
139
	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
140

141
	/*
142 143
	 * Releasing ioc requires reverse order double locking and we may
	 * already be holding a queue_lock.  Do it asynchronously from wq.
144
	 */
145 146 147
	if (atomic_long_dec_and_test(&ioc->refcount)) {
		spin_lock_irqsave(&ioc->lock, flags);
		if (!hlist_empty(&ioc->icq_list))
148 149
			queue_work(system_power_efficient_wq,
					&ioc->release_work);
150 151
		else
			free_ioc = true;
152
		spin_unlock_irqrestore(&ioc->lock, flags);
153
	}
154 155 156

	if (free_ioc)
		kmem_cache_free(iocontext_cachep, ioc);
157
}
158
EXPORT_SYMBOL(put_io_context);
159

160 161 162 163 164 165 166 167
/**
 * put_io_context_active - put active reference on ioc
 * @ioc: ioc of interest
 *
 * Undo get_io_context_active().  If active reference reaches zero after
 * put, @ioc can never issue further IOs and ioscheds are notified.
 */
void put_io_context_active(struct io_context *ioc)
168
{
169
	unsigned long flags;
170
	struct io_cq *icq;
171

172
	if (!atomic_dec_and_test(&ioc->active_ref)) {
173 174 175 176 177 178 179 180 181 182 183
		put_io_context(ioc);
		return;
	}

	/*
	 * Need ioc lock to walk icq_list and q lock to exit icq.  Perform
	 * reverse double locking.  Read comment in ioc_release_fn() for
	 * explanation on the nested locking annotation.
	 */
retry:
	spin_lock_irqsave_nested(&ioc->lock, flags, 1);
184
	hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
185 186 187 188 189 190 191 192 193 194 195 196 197
		if (icq->flags & ICQ_EXITED)
			continue;
		if (spin_trylock(icq->q->queue_lock)) {
			ioc_exit_icq(icq);
			spin_unlock(icq->q->queue_lock);
		} else {
			spin_unlock_irqrestore(&ioc->lock, flags);
			cpu_relax();
			goto retry;
		}
	}
	spin_unlock_irqrestore(&ioc->lock, flags);

198
	put_io_context(ioc);
199 200
}

201 202 203 204 205 206 207 208 209 210 211 212 213 214
/* Called by the exiting task */
void exit_io_context(struct task_struct *task)
{
	struct io_context *ioc;

	task_lock(task);
	ioc = task->io_context;
	task->io_context = NULL;
	task_unlock(task);

	atomic_dec(&ioc->nr_tasks);
	put_io_context_active(ioc);
}

215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
/**
 * ioc_clear_queue - break any ioc association with the specified queue
 * @q: request_queue being cleared
 *
 * Walk @q->icq_list and exit all io_cq's.  Must be called with @q locked.
 */
void ioc_clear_queue(struct request_queue *q)
{
	lockdep_assert_held(q->queue_lock);

	while (!list_empty(&q->icq_list)) {
		struct io_cq *icq = list_entry(q->icq_list.next,
					       struct io_cq, q_node);
		struct io_context *ioc = icq->ioc;

		spin_lock(&ioc->lock);
231
		ioc_destroy_icq(icq);
232 233 234 235
		spin_unlock(&ioc->lock);
	}
}

236
int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
237
{
238
	struct io_context *ioc;
239
	int ret;
240

Tejun Heo's avatar
Tejun Heo committed
241 242 243
	ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
				    node);
	if (unlikely(!ioc))
244
		return -ENOMEM;
Tejun Heo's avatar
Tejun Heo committed
245 246 247

	/* initialize */
	atomic_long_set(&ioc->refcount, 1);
248
	atomic_set(&ioc->nr_tasks, 1);
249
	atomic_set(&ioc->active_ref, 1);
Tejun Heo's avatar
Tejun Heo committed
250
	spin_lock_init(&ioc->lock);
251 252
	INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
	INIT_HLIST_HEAD(&ioc->icq_list);
253
	INIT_WORK(&ioc->release_work, ioc_release_fn);
254

255 256 257 258 259 260 261
	/*
	 * Try to install.  ioc shouldn't be installed if someone else
	 * already did or @task, which isn't %current, is exiting.  Note
	 * that we need to allow ioc creation on exiting %current as exit
	 * path may issue IOs from e.g. exit_files().  The exit path is
	 * responsible for not issuing IO after exit_io_context().
	 */
262
	task_lock(task);
263 264
	if (!task->io_context &&
	    (task == current || !(task->flags & PF_EXITING)))
265
		task->io_context = ioc;
266
	else
267
		kmem_cache_free(iocontext_cachep, ioc);
268 269 270

	ret = task->io_context ? 0 : -EBUSY;

271
	task_unlock(task);
272

273
	return ret;
274 275
}

276 277 278 279 280 281 282 283 284
/**
 * get_task_io_context - get io_context of a task
 * @task: task of interest
 * @gfp_flags: allocation flags, used if allocation is necessary
 * @node: allocation node, used if allocation is necessary
 *
 * Return io_context of @task.  If it doesn't exist, it is created with
 * @gfp_flags and @node.  The returned io_context has its reference count
 * incremented.
285
 *
286
 * This function always goes through task_lock() and it's better to use
287
 * %current->io_context + get_io_context() for %current.
288
 */
289 290
struct io_context *get_task_io_context(struct task_struct *task,
				       gfp_t gfp_flags, int node)
291
{
292
	struct io_context *ioc;
293

294
	might_sleep_if(gfpflags_allow_blocking(gfp_flags));
295

296 297 298 299 300 301 302 303
	do {
		task_lock(task);
		ioc = task->io_context;
		if (likely(ioc)) {
			get_io_context(ioc);
			task_unlock(task);
			return ioc;
		}
304
		task_unlock(task);
305
	} while (!create_task_io_context(task, gfp_flags, node));
306

307
	return NULL;
308
}
309
EXPORT_SYMBOL(get_task_io_context);
310

311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
/**
 * ioc_lookup_icq - lookup io_cq from ioc
 * @ioc: the associated io_context
 * @q: the associated request_queue
 *
 * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
 * with @q->queue_lock held.
 */
struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q)
{
	struct io_cq *icq;

	lockdep_assert_held(q->queue_lock);

	/*
	 * icq's are indexed from @ioc using radix tree and hint pointer,
	 * both of which are protected with RCU.  All removals are done
	 * holding both q and ioc locks, and we're holding q lock - if we
	 * find a icq which points to us, it's guaranteed to be valid.
	 */
	rcu_read_lock();
	icq = rcu_dereference(ioc->icq_hint);
	if (icq && icq->q == q)
		goto out;

	icq = radix_tree_lookup(&ioc->icq_tree, q->id);
	if (icq && icq->q == q)
		rcu_assign_pointer(ioc->icq_hint, icq);	/* allowed to race */
	else
		icq = NULL;
out:
	rcu_read_unlock();
	return icq;
}
EXPORT_SYMBOL(ioc_lookup_icq);

347 348
/**
 * ioc_create_icq - create and link io_cq
349
 * @ioc: io_context of interest
350 351 352
 * @q: request_queue of interest
 * @gfp_mask: allocation mask
 *
353 354
 * Make sure io_cq linking @ioc and @q exists.  If icq doesn't exist, they
 * will be created using @gfp_mask.
355 356 357 358
 *
 * The caller is responsible for ensuring @ioc won't go away and @q is
 * alive and will stay alive until this function returns.
 */
359 360
struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
			     gfp_t gfp_mask)
361 362 363 364 365 366 367 368 369 370
{
	struct elevator_type *et = q->elevator->type;
	struct io_cq *icq;

	/* allocate stuff */
	icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
				    q->node);
	if (!icq)
		return NULL;

371
	if (radix_tree_maybe_preload(gfp_mask) < 0) {
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
		kmem_cache_free(et->icq_cache, icq);
		return NULL;
	}

	icq->ioc = ioc;
	icq->q = q;
	INIT_LIST_HEAD(&icq->q_node);
	INIT_HLIST_NODE(&icq->ioc_node);

	/* lock both q and ioc and try to link @icq */
	spin_lock_irq(q->queue_lock);
	spin_lock(&ioc->lock);

	if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
		list_add(&icq->q_node, &q->icq_list);
388 389 390
		if (et->uses_mq && et->ops.mq.init_icq)
			et->ops.mq.init_icq(icq);
		else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
391
			et->ops.sq.elevator_init_icq_fn(icq);
392 393 394 395 396 397 398 399 400 401 402 403 404
	} else {
		kmem_cache_free(et->icq_cache, icq);
		icq = ioc_lookup_icq(ioc, q);
		if (!icq)
			printk(KERN_ERR "cfq: icq link failed!\n");
	}

	spin_unlock(&ioc->lock);
	spin_unlock_irq(q->queue_lock);
	radix_tree_preload_end();
	return icq;
}

Adrian Bunk's avatar
Adrian Bunk committed
405
static int __init blk_ioc_init(void)
406 407 408 409 410 411
{
	iocontext_cachep = kmem_cache_create("blkdev_ioc",
			sizeof(struct io_context), 0, SLAB_PANIC, NULL);
	return 0;
}
subsys_initcall(blk_ioc_init);