mbcache.c 12 KB
Newer Older
Jan Kara's avatar
Jan Kara committed
1 2 3 4 5 6
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/list_bl.h>
#include <linux/module.h>
#include <linux/sched.h>
Jan Kara's avatar
Jan Kara committed
7
#include <linux/workqueue.h>
Jan Kara's avatar
Jan Kara committed
8
#include <linux/mbcache.h>
Jan Kara's avatar
Jan Kara committed
9 10 11 12

/*
 * Mbcache is a simple key-value store. Keys need not be unique, however
 * key-value pairs are expected to be unique (we use this fact in
Jan Kara's avatar
Jan Kara committed
13
 * mb_cache_entry_delete_block()).
Jan Kara's avatar
Jan Kara committed
14 15 16 17 18 19 20 21 22 23 24 25
 *
 * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
 * They use hash of a block contents as a key and block number as a value.
 * That's why keys need not be unique (different xattr blocks may end up having
 * the same hash). However block number always uniquely identifies a cache
 * entry.
 *
 * We provide functions for creation and removal of entries, search by key,
 * and a special "delete entry with given key-value pair" operation. Fixed
 * size hash table is used for fast key lookups.
 */

Jan Kara's avatar
Jan Kara committed
26
struct mb_cache {
Jan Kara's avatar
Jan Kara committed
27 28 29 30
	/* Hash table of entries */
	struct hlist_bl_head	*c_hash;
	/* log2 of hash table size */
	int			c_bucket_bits;
Jan Kara's avatar
Jan Kara committed
31 32
	/* Maximum entries in cache to avoid degrading hash too much */
	int			c_max_entries;
33 34 35
	/* Protects c_list, c_entry_count */
	spinlock_t		c_list_lock;
	struct list_head	c_list;
Jan Kara's avatar
Jan Kara committed
36 37 38
	/* Number of entries in cache */
	unsigned long		c_entry_count;
	struct shrinker		c_shrink;
Jan Kara's avatar
Jan Kara committed
39 40
	/* Work for shrinking when the cache has too many entries */
	struct work_struct	c_shrink_work;
Jan Kara's avatar
Jan Kara committed
41 42
};

Jan Kara's avatar
Jan Kara committed
43
static struct kmem_cache *mb_entry_cache;
Jan Kara's avatar
Jan Kara committed
44

Jan Kara's avatar
Jan Kara committed
45 46
static unsigned long mb_cache_shrink(struct mb_cache *cache,
				     unsigned int nr_to_scan);
Jan Kara's avatar
Jan Kara committed
47

48 49
static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
							u32 key)
50
{
51
	return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
52 53
}

Jan Kara's avatar
Jan Kara committed
54 55 56 57 58 59
/*
 * Number of entries to reclaim synchronously when there are too many entries
 * in cache
 */
#define SYNC_SHRINK_BATCH 64

Jan Kara's avatar
Jan Kara committed
60
/*
Jan Kara's avatar
Jan Kara committed
61
 * mb_cache_entry_create - create entry in cache
Jan Kara's avatar
Jan Kara committed
62 63 64 65
 * @cache - cache where the entry should be created
 * @mask - gfp mask with which the entry should be allocated
 * @key - key of the entry
 * @block - block that contains data
66
 * @reusable - is the block reusable by other inodes?
Jan Kara's avatar
Jan Kara committed
67 68 69 70 71
 *
 * Creates entry in @cache with key @key and records that data is stored in
 * block @block. The function returns -EBUSY if entry with the same key
 * and for the same block already exists in cache. Otherwise 0 is returned.
 */
Jan Kara's avatar
Jan Kara committed
72
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
73
			  sector_t block, bool reusable)
Jan Kara's avatar
Jan Kara committed
74
{
Jan Kara's avatar
Jan Kara committed
75
	struct mb_cache_entry *entry, *dup;
Jan Kara's avatar
Jan Kara committed
76 77 78
	struct hlist_bl_node *dup_node;
	struct hlist_bl_head *head;

Jan Kara's avatar
Jan Kara committed
79 80 81 82 83
	/* Schedule background reclaim if there are too many entries */
	if (cache->c_entry_count >= cache->c_max_entries)
		schedule_work(&cache->c_shrink_work);
	/* Do some sync reclaim if background reclaim cannot keep up */
	if (cache->c_entry_count >= 2*cache->c_max_entries)
Jan Kara's avatar
Jan Kara committed
84
		mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
Jan Kara's avatar
Jan Kara committed
85

Jan Kara's avatar
Jan Kara committed
86
	entry = kmem_cache_alloc(mb_entry_cache, mask);
Jan Kara's avatar
Jan Kara committed
87 88 89
	if (!entry)
		return -ENOMEM;

90
	INIT_LIST_HEAD(&entry->e_list);
Jan Kara's avatar
Jan Kara committed
91 92 93 94
	/* One ref for hash, one ref returned */
	atomic_set(&entry->e_refcnt, 1);
	entry->e_key = key;
	entry->e_block = block;
95
	entry->e_reusable = reusable;
96
	head = mb_cache_entry_head(cache, key);
Jan Kara's avatar
Jan Kara committed
97 98 99 100
	hlist_bl_lock(head);
	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
		if (dup->e_key == key && dup->e_block == block) {
			hlist_bl_unlock(head);
Jan Kara's avatar
Jan Kara committed
101
			kmem_cache_free(mb_entry_cache, entry);
Jan Kara's avatar
Jan Kara committed
102 103 104 105 106 107
			return -EBUSY;
		}
	}
	hlist_bl_add_head(&entry->e_hash_list, head);
	hlist_bl_unlock(head);

108 109
	spin_lock(&cache->c_list_lock);
	list_add_tail(&entry->e_list, &cache->c_list);
Jan Kara's avatar
Jan Kara committed
110 111 112
	/* Grab ref for LRU list */
	atomic_inc(&entry->e_refcnt);
	cache->c_entry_count++;
113
	spin_unlock(&cache->c_list_lock);
Jan Kara's avatar
Jan Kara committed
114 115 116

	return 0;
}
Jan Kara's avatar
Jan Kara committed
117
EXPORT_SYMBOL(mb_cache_entry_create);
Jan Kara's avatar
Jan Kara committed
118

Jan Kara's avatar
Jan Kara committed
119
void __mb_cache_entry_free(struct mb_cache_entry *entry)
Jan Kara's avatar
Jan Kara committed
120
{
Jan Kara's avatar
Jan Kara committed
121
	kmem_cache_free(mb_entry_cache, entry);
Jan Kara's avatar
Jan Kara committed
122
}
Jan Kara's avatar
Jan Kara committed
123
EXPORT_SYMBOL(__mb_cache_entry_free);
Jan Kara's avatar
Jan Kara committed
124

Jan Kara's avatar
Jan Kara committed
125 126 127
static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
					   struct mb_cache_entry *entry,
					   u32 key)
Jan Kara's avatar
Jan Kara committed
128
{
Jan Kara's avatar
Jan Kara committed
129
	struct mb_cache_entry *old_entry = entry;
Jan Kara's avatar
Jan Kara committed
130 131 132
	struct hlist_bl_node *node;
	struct hlist_bl_head *head;

133
	head = mb_cache_entry_head(cache, key);
Jan Kara's avatar
Jan Kara committed
134 135 136 137 138 139
	hlist_bl_lock(head);
	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
		node = entry->e_hash_list.next;
	else
		node = hlist_bl_first(head);
	while (node) {
Jan Kara's avatar
Jan Kara committed
140
		entry = hlist_bl_entry(node, struct mb_cache_entry,
Jan Kara's avatar
Jan Kara committed
141
				       e_hash_list);
142
		if (entry->e_key == key && entry->e_reusable) {
Jan Kara's avatar
Jan Kara committed
143 144 145 146 147 148 149 150 151
			atomic_inc(&entry->e_refcnt);
			goto out;
		}
		node = node->next;
	}
	entry = NULL;
out:
	hlist_bl_unlock(head);
	if (old_entry)
Jan Kara's avatar
Jan Kara committed
152
		mb_cache_entry_put(cache, old_entry);
Jan Kara's avatar
Jan Kara committed
153 154 155 156 157

	return entry;
}

/*
Jan Kara's avatar
Jan Kara committed
158
 * mb_cache_entry_find_first - find the first entry in cache with given key
Jan Kara's avatar
Jan Kara committed
159 160 161 162 163 164
 * @cache: cache where we should search
 * @key: key to look for
 *
 * Search in @cache for entry with key @key. Grabs reference to the first
 * entry found and returns the entry.
 */
Jan Kara's avatar
Jan Kara committed
165 166
struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
						 u32 key)
Jan Kara's avatar
Jan Kara committed
167 168 169
{
	return __entry_find(cache, NULL, key);
}
Jan Kara's avatar
Jan Kara committed
170
EXPORT_SYMBOL(mb_cache_entry_find_first);
Jan Kara's avatar
Jan Kara committed
171 172

/*
Jan Kara's avatar
Jan Kara committed
173
 * mb_cache_entry_find_next - find next entry in cache with the same
Jan Kara's avatar
Jan Kara committed
174 175 176 177 178 179 180 181
 * @cache: cache where we should search
 * @entry: entry to start search from
 *
 * Finds next entry in the hash chain which has the same key as @entry.
 * If @entry is unhashed (which can happen when deletion of entry races
 * with the search), finds the first entry in the hash chain. The function
 * drops reference to @entry and returns with a reference to the found entry.
 */
Jan Kara's avatar
Jan Kara committed
182 183
struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
						struct mb_cache_entry *entry)
Jan Kara's avatar
Jan Kara committed
184 185 186
{
	return __entry_find(cache, entry, entry->e_key);
}
Jan Kara's avatar
Jan Kara committed
187
EXPORT_SYMBOL(mb_cache_entry_find_next);
Jan Kara's avatar
Jan Kara committed
188

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
/*
 * mb_cache_entry_get - get a cache entry by block number (and key)
 * @cache - cache we work with
 * @key - key of block number @block
 * @block - block number
 */
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
					  sector_t block)
{
	struct hlist_bl_node *node;
	struct hlist_bl_head *head;
	struct mb_cache_entry *entry;

	head = mb_cache_entry_head(cache, key);
	hlist_bl_lock(head);
	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
		if (entry->e_key == key && entry->e_block == block) {
			atomic_inc(&entry->e_refcnt);
			goto out;
		}
	}
	entry = NULL;
out:
	hlist_bl_unlock(head);
	return entry;
}
EXPORT_SYMBOL(mb_cache_entry_get);

Jan Kara's avatar
Jan Kara committed
217
/* mb_cache_entry_delete_block - remove information about block from cache
Jan Kara's avatar
Jan Kara committed
218
 * @cache - cache we work with
219 220
 * @key - key of block @block
 * @block - block number
Jan Kara's avatar
Jan Kara committed
221 222 223
 *
 * Remove entry from cache @cache with key @key with data stored in @block.
 */
Jan Kara's avatar
Jan Kara committed
224 225
void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
				 sector_t block)
Jan Kara's avatar
Jan Kara committed
226 227 228
{
	struct hlist_bl_node *node;
	struct hlist_bl_head *head;
Jan Kara's avatar
Jan Kara committed
229
	struct mb_cache_entry *entry;
Jan Kara's avatar
Jan Kara committed
230

231
	head = mb_cache_entry_head(cache, key);
Jan Kara's avatar
Jan Kara committed
232 233 234 235 236 237
	hlist_bl_lock(head);
	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
		if (entry->e_key == key && entry->e_block == block) {
			/* We keep hash list reference to keep entry alive */
			hlist_bl_del_init(&entry->e_hash_list);
			hlist_bl_unlock(head);
238 239 240
			spin_lock(&cache->c_list_lock);
			if (!list_empty(&entry->e_list)) {
				list_del_init(&entry->e_list);
Jan Kara's avatar
Jan Kara committed
241 242 243
				cache->c_entry_count--;
				atomic_dec(&entry->e_refcnt);
			}
244
			spin_unlock(&cache->c_list_lock);
Jan Kara's avatar
Jan Kara committed
245
			mb_cache_entry_put(cache, entry);
Jan Kara's avatar
Jan Kara committed
246 247 248 249 250
			return;
		}
	}
	hlist_bl_unlock(head);
}
Jan Kara's avatar
Jan Kara committed
251
EXPORT_SYMBOL(mb_cache_entry_delete_block);
Jan Kara's avatar
Jan Kara committed
252

Jan Kara's avatar
Jan Kara committed
253
/* mb_cache_entry_touch - cache entry got used
Jan Kara's avatar
Jan Kara committed
254 255 256
 * @cache - cache the entry belongs to
 * @entry - entry that got used
 *
257
 * Marks entry as used to give hit higher chances of surviving in cache.
Jan Kara's avatar
Jan Kara committed
258
 */
Jan Kara's avatar
Jan Kara committed
259 260
void mb_cache_entry_touch(struct mb_cache *cache,
			  struct mb_cache_entry *entry)
Jan Kara's avatar
Jan Kara committed
261
{
262
	entry->e_referenced = 1;
Jan Kara's avatar
Jan Kara committed
263
}
Jan Kara's avatar
Jan Kara committed
264
EXPORT_SYMBOL(mb_cache_entry_touch);
Jan Kara's avatar
Jan Kara committed
265

Jan Kara's avatar
Jan Kara committed
266 267
static unsigned long mb_cache_count(struct shrinker *shrink,
				    struct shrink_control *sc)
Jan Kara's avatar
Jan Kara committed
268
{
Jan Kara's avatar
Jan Kara committed
269 270
	struct mb_cache *cache = container_of(shrink, struct mb_cache,
					      c_shrink);
Jan Kara's avatar
Jan Kara committed
271 272 273 274 275

	return cache->c_entry_count;
}

/* Shrink number of entries in cache */
Jan Kara's avatar
Jan Kara committed
276 277
static unsigned long mb_cache_shrink(struct mb_cache *cache,
				     unsigned int nr_to_scan)
Jan Kara's avatar
Jan Kara committed
278
{
Jan Kara's avatar
Jan Kara committed
279
	struct mb_cache_entry *entry;
Jan Kara's avatar
Jan Kara committed
280 281 282
	struct hlist_bl_head *head;
	unsigned int shrunk = 0;

283 284 285
	spin_lock(&cache->c_list_lock);
	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
		entry = list_first_entry(&cache->c_list,
Jan Kara's avatar
Jan Kara committed
286
					 struct mb_cache_entry, e_list);
287 288
		if (entry->e_referenced) {
			entry->e_referenced = 0;
289 290 291 292
			list_move_tail(&cache->c_list, &entry->e_list);
			continue;
		}
		list_del_init(&entry->e_list);
Jan Kara's avatar
Jan Kara committed
293 294 295 296 297
		cache->c_entry_count--;
		/*
		 * We keep LRU list reference so that entry doesn't go away
		 * from under us.
		 */
298
		spin_unlock(&cache->c_list_lock);
299
		head = mb_cache_entry_head(cache, entry->e_key);
Jan Kara's avatar
Jan Kara committed
300 301 302 303 304 305
		hlist_bl_lock(head);
		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
			hlist_bl_del_init(&entry->e_hash_list);
			atomic_dec(&entry->e_refcnt);
		}
		hlist_bl_unlock(head);
Jan Kara's avatar
Jan Kara committed
306
		if (mb_cache_entry_put(cache, entry))
Jan Kara's avatar
Jan Kara committed
307 308
			shrunk++;
		cond_resched();
309
		spin_lock(&cache->c_list_lock);
Jan Kara's avatar
Jan Kara committed
310
	}
311
	spin_unlock(&cache->c_list_lock);
Jan Kara's avatar
Jan Kara committed
312 313 314 315

	return shrunk;
}

Jan Kara's avatar
Jan Kara committed
316 317
static unsigned long mb_cache_scan(struct shrinker *shrink,
				   struct shrink_control *sc)
Jan Kara's avatar
Jan Kara committed
318 319
{
	int nr_to_scan = sc->nr_to_scan;
Jan Kara's avatar
Jan Kara committed
320
	struct mb_cache *cache = container_of(shrink, struct mb_cache,
Jan Kara's avatar
Jan Kara committed
321
					      c_shrink);
Jan Kara's avatar
Jan Kara committed
322
	return mb_cache_shrink(cache, nr_to_scan);
Jan Kara's avatar
Jan Kara committed
323 324 325 326 327
}

/* We shrink 1/X of the cache when we have too many entries in it */
#define SHRINK_DIVISOR 16

Jan Kara's avatar
Jan Kara committed
328
static void mb_cache_shrink_worker(struct work_struct *work)
Jan Kara's avatar
Jan Kara committed
329
{
Jan Kara's avatar
Jan Kara committed
330 331 332
	struct mb_cache *cache = container_of(work, struct mb_cache,
					      c_shrink_work);
	mb_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
Jan Kara's avatar
Jan Kara committed
333 334
}

Jan Kara's avatar
Jan Kara committed
335
/*
Jan Kara's avatar
Jan Kara committed
336
 * mb_cache_create - create cache
Jan Kara's avatar
Jan Kara committed
337 338 339 340
 * @bucket_bits: log2 of the hash table size
 *
 * Create cache for keys with 2^bucket_bits hash entries.
 */
Jan Kara's avatar
Jan Kara committed
341
struct mb_cache *mb_cache_create(int bucket_bits)
Jan Kara's avatar
Jan Kara committed
342
{
Jan Kara's avatar
Jan Kara committed
343
	struct mb_cache *cache;
Jan Kara's avatar
Jan Kara committed
344 345 346 347 348 349
	int bucket_count = 1 << bucket_bits;
	int i;

	if (!try_module_get(THIS_MODULE))
		return NULL;

Jan Kara's avatar
Jan Kara committed
350
	cache = kzalloc(sizeof(struct mb_cache), GFP_KERNEL);
Jan Kara's avatar
Jan Kara committed
351 352 353
	if (!cache)
		goto err_out;
	cache->c_bucket_bits = bucket_bits;
Jan Kara's avatar
Jan Kara committed
354
	cache->c_max_entries = bucket_count << 4;
355 356
	INIT_LIST_HEAD(&cache->c_list);
	spin_lock_init(&cache->c_list_lock);
Jan Kara's avatar
Jan Kara committed
357 358 359 360 361 362 363 364 365
	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
				GFP_KERNEL);
	if (!cache->c_hash) {
		kfree(cache);
		goto err_out;
	}
	for (i = 0; i < bucket_count; i++)
		INIT_HLIST_BL_HEAD(&cache->c_hash[i]);

Jan Kara's avatar
Jan Kara committed
366 367
	cache->c_shrink.count_objects = mb_cache_count;
	cache->c_shrink.scan_objects = mb_cache_scan;
Jan Kara's avatar
Jan Kara committed
368
	cache->c_shrink.seeks = DEFAULT_SEEKS;
369 370 371 372 373
	if (register_shrinker(&cache->c_shrink)) {
		kfree(cache->c_hash);
		kfree(cache);
		goto err_out;
	}
Jan Kara's avatar
Jan Kara committed
374

Jan Kara's avatar
Jan Kara committed
375
	INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
Jan Kara's avatar
Jan Kara committed
376

Jan Kara's avatar
Jan Kara committed
377 378 379 380 381 382
	return cache;

err_out:
	module_put(THIS_MODULE);
	return NULL;
}
Jan Kara's avatar
Jan Kara committed
383
EXPORT_SYMBOL(mb_cache_create);
Jan Kara's avatar
Jan Kara committed
384 385

/*
Jan Kara's avatar
Jan Kara committed
386
 * mb_cache_destroy - destroy cache
Jan Kara's avatar
Jan Kara committed
387 388 389 390 391
 * @cache: the cache to destroy
 *
 * Free all entries in cache and cache itself. Caller must make sure nobody
 * (except shrinker) can reach @cache when calling this.
 */
Jan Kara's avatar
Jan Kara committed
392
void mb_cache_destroy(struct mb_cache *cache)
Jan Kara's avatar
Jan Kara committed
393
{
Jan Kara's avatar
Jan Kara committed
394
	struct mb_cache_entry *entry, *next;
Jan Kara's avatar
Jan Kara committed
395 396 397 398 399 400 401

	unregister_shrinker(&cache->c_shrink);

	/*
	 * We don't bother with any locking. Cache must not be used at this
	 * point.
	 */
402
	list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
Jan Kara's avatar
Jan Kara committed
403 404 405 406 407
		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
			hlist_bl_del_init(&entry->e_hash_list);
			atomic_dec(&entry->e_refcnt);
		} else
			WARN_ON(1);
408
		list_del(&entry->e_list);
Jan Kara's avatar
Jan Kara committed
409
		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
Jan Kara's avatar
Jan Kara committed
410
		mb_cache_entry_put(cache, entry);
Jan Kara's avatar
Jan Kara committed
411 412 413 414 415
	}
	kfree(cache->c_hash);
	kfree(cache);
	module_put(THIS_MODULE);
}
Jan Kara's avatar
Jan Kara committed
416
EXPORT_SYMBOL(mb_cache_destroy);
Jan Kara's avatar
Jan Kara committed
417

Jan Kara's avatar
Jan Kara committed
418
static int __init mbcache_init(void)
Jan Kara's avatar
Jan Kara committed
419
{
Jan Kara's avatar
Jan Kara committed
420 421
	mb_entry_cache = kmem_cache_create("mbcache",
				sizeof(struct mb_cache_entry), 0,
Jan Kara's avatar
Jan Kara committed
422
				SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
Jan Kara's avatar
Jan Kara committed
423
	BUG_ON(!mb_entry_cache);
Jan Kara's avatar
Jan Kara committed
424 425 426
	return 0;
}

Jan Kara's avatar
Jan Kara committed
427
static void __exit mbcache_exit(void)
Jan Kara's avatar
Jan Kara committed
428
{
Jan Kara's avatar
Jan Kara committed
429
	kmem_cache_destroy(mb_entry_cache);
Jan Kara's avatar
Jan Kara committed
430 431
}

Jan Kara's avatar
Jan Kara committed
432 433
module_init(mbcache_init)
module_exit(mbcache_exit)
Jan Kara's avatar
Jan Kara committed
434 435 436 437

MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
MODULE_LICENSE("GPL");