mbcache.c 11.9 KB
Newer Older
Jan Kara's avatar
Jan Kara committed
1 2 3 4 5 6
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/list_bl.h>
#include <linux/module.h>
#include <linux/sched.h>
Jan Kara's avatar
Jan Kara committed
7
#include <linux/workqueue.h>
Jan Kara's avatar
Jan Kara committed
8
#include <linux/mbcache.h>
Jan Kara's avatar
Jan Kara committed
9 10 11 12

/*
 * Mbcache is a simple key-value store. Keys need not be unique, however
 * key-value pairs are expected to be unique (we use this fact in
Jan Kara's avatar
Jan Kara committed
13
 * mb_cache_entry_delete_block()).
Jan Kara's avatar
Jan Kara committed
14 15 16 17 18 19 20 21 22 23 24 25
 *
 * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
 * They use hash of a block contents as a key and block number as a value.
 * That's why keys need not be unique (different xattr blocks may end up having
 * the same hash). However block number always uniquely identifies a cache
 * entry.
 *
 * We provide functions for creation and removal of entries, search by key,
 * and a special "delete entry with given key-value pair" operation. Fixed
 * size hash table is used for fast key lookups.
 */

Jan Kara's avatar
Jan Kara committed
26
struct mb_cache {
Jan Kara's avatar
Jan Kara committed
27 28 29 30
	/* Hash table of entries */
	struct hlist_bl_head	*c_hash;
	/* log2 of hash table size */
	int			c_bucket_bits;
Jan Kara's avatar
Jan Kara committed
31
	/* Maximum entries in cache to avoid degrading hash too much */
32
	unsigned long		c_max_entries;
33 34 35
	/* Protects c_list, c_entry_count */
	spinlock_t		c_list_lock;
	struct list_head	c_list;
Jan Kara's avatar
Jan Kara committed
36 37 38
	/* Number of entries in cache */
	unsigned long		c_entry_count;
	struct shrinker		c_shrink;
Jan Kara's avatar
Jan Kara committed
39 40
	/* Work for shrinking when the cache has too many entries */
	struct work_struct	c_shrink_work;
Jan Kara's avatar
Jan Kara committed
41 42
};

Jan Kara's avatar
Jan Kara committed
43
static struct kmem_cache *mb_entry_cache;
Jan Kara's avatar
Jan Kara committed
44

Jan Kara's avatar
Jan Kara committed
45
static unsigned long mb_cache_shrink(struct mb_cache *cache,
46
				     unsigned long nr_to_scan);
Jan Kara's avatar
Jan Kara committed
47

48 49
static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
							u32 key)
50
{
51
	return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
52 53
}

Jan Kara's avatar
Jan Kara committed
54 55 56 57 58 59
/*
 * Number of entries to reclaim synchronously when there are too many entries
 * in cache
 */
#define SYNC_SHRINK_BATCH 64

Jan Kara's avatar
Jan Kara committed
60
/*
Jan Kara's avatar
Jan Kara committed
61
 * mb_cache_entry_create - create entry in cache
Jan Kara's avatar
Jan Kara committed
62 63 64 65
 * @cache - cache where the entry should be created
 * @mask - gfp mask with which the entry should be allocated
 * @key - key of the entry
 * @block - block that contains data
66
 * @reusable - is the block reusable by other inodes?
Jan Kara's avatar
Jan Kara committed
67 68 69 70 71
 *
 * Creates entry in @cache with key @key and records that data is stored in
 * block @block. The function returns -EBUSY if entry with the same key
 * and for the same block already exists in cache. Otherwise 0 is returned.
 */
Jan Kara's avatar
Jan Kara committed
72
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
73
			  sector_t block, bool reusable)
Jan Kara's avatar
Jan Kara committed
74
{
Jan Kara's avatar
Jan Kara committed
75
	struct mb_cache_entry *entry, *dup;
Jan Kara's avatar
Jan Kara committed
76 77 78
	struct hlist_bl_node *dup_node;
	struct hlist_bl_head *head;

Jan Kara's avatar
Jan Kara committed
79 80 81 82 83
	/* Schedule background reclaim if there are too many entries */
	if (cache->c_entry_count >= cache->c_max_entries)
		schedule_work(&cache->c_shrink_work);
	/* Do some sync reclaim if background reclaim cannot keep up */
	if (cache->c_entry_count >= 2*cache->c_max_entries)
Jan Kara's avatar
Jan Kara committed
84
		mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
Jan Kara's avatar
Jan Kara committed
85

Jan Kara's avatar
Jan Kara committed
86
	entry = kmem_cache_alloc(mb_entry_cache, mask);
Jan Kara's avatar
Jan Kara committed
87 88 89
	if (!entry)
		return -ENOMEM;

90
	INIT_LIST_HEAD(&entry->e_list);
Jan Kara's avatar
Jan Kara committed
91 92 93 94
	/* One ref for hash, one ref returned */
	atomic_set(&entry->e_refcnt, 1);
	entry->e_key = key;
	entry->e_block = block;
95
	entry->e_reusable = reusable;
96
	head = mb_cache_entry_head(cache, key);
Jan Kara's avatar
Jan Kara committed
97 98 99 100
	hlist_bl_lock(head);
	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
		if (dup->e_key == key && dup->e_block == block) {
			hlist_bl_unlock(head);
Jan Kara's avatar
Jan Kara committed
101
			kmem_cache_free(mb_entry_cache, entry);
Jan Kara's avatar
Jan Kara committed
102 103 104 105 106 107
			return -EBUSY;
		}
	}
	hlist_bl_add_head(&entry->e_hash_list, head);
	hlist_bl_unlock(head);

108 109
	spin_lock(&cache->c_list_lock);
	list_add_tail(&entry->e_list, &cache->c_list);
Jan Kara's avatar
Jan Kara committed
110 111 112
	/* Grab ref for LRU list */
	atomic_inc(&entry->e_refcnt);
	cache->c_entry_count++;
113
	spin_unlock(&cache->c_list_lock);
Jan Kara's avatar
Jan Kara committed
114 115 116

	return 0;
}
Jan Kara's avatar
Jan Kara committed
117
EXPORT_SYMBOL(mb_cache_entry_create);
Jan Kara's avatar
Jan Kara committed
118

Jan Kara's avatar
Jan Kara committed
119
void __mb_cache_entry_free(struct mb_cache_entry *entry)
Jan Kara's avatar
Jan Kara committed
120
{
Jan Kara's avatar
Jan Kara committed
121
	kmem_cache_free(mb_entry_cache, entry);
Jan Kara's avatar
Jan Kara committed
122
}
Jan Kara's avatar
Jan Kara committed
123
EXPORT_SYMBOL(__mb_cache_entry_free);
Jan Kara's avatar
Jan Kara committed
124

Jan Kara's avatar
Jan Kara committed
125 126 127
static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
					   struct mb_cache_entry *entry,
					   u32 key)
Jan Kara's avatar
Jan Kara committed
128
{
Jan Kara's avatar
Jan Kara committed
129
	struct mb_cache_entry *old_entry = entry;
Jan Kara's avatar
Jan Kara committed
130 131 132
	struct hlist_bl_node *node;
	struct hlist_bl_head *head;

133
	head = mb_cache_entry_head(cache, key);
Jan Kara's avatar
Jan Kara committed
134 135 136 137 138 139
	hlist_bl_lock(head);
	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
		node = entry->e_hash_list.next;
	else
		node = hlist_bl_first(head);
	while (node) {
Jan Kara's avatar
Jan Kara committed
140
		entry = hlist_bl_entry(node, struct mb_cache_entry,
Jan Kara's avatar
Jan Kara committed
141
				       e_hash_list);
142
		if (entry->e_key == key && entry->e_reusable) {
Jan Kara's avatar
Jan Kara committed
143 144 145 146 147 148 149 150 151
			atomic_inc(&entry->e_refcnt);
			goto out;
		}
		node = node->next;
	}
	entry = NULL;
out:
	hlist_bl_unlock(head);
	if (old_entry)
Jan Kara's avatar
Jan Kara committed
152
		mb_cache_entry_put(cache, old_entry);
Jan Kara's avatar
Jan Kara committed
153 154 155 156 157

	return entry;
}

/*
Jan Kara's avatar
Jan Kara committed
158
 * mb_cache_entry_find_first - find the first entry in cache with given key
Jan Kara's avatar
Jan Kara committed
159 160 161 162 163 164
 * @cache: cache where we should search
 * @key: key to look for
 *
 * Search in @cache for entry with key @key. Grabs reference to the first
 * entry found and returns the entry.
 */
Jan Kara's avatar
Jan Kara committed
165 166
struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
						 u32 key)
Jan Kara's avatar
Jan Kara committed
167 168 169
{
	return __entry_find(cache, NULL, key);
}
Jan Kara's avatar
Jan Kara committed
170
EXPORT_SYMBOL(mb_cache_entry_find_first);
Jan Kara's avatar
Jan Kara committed
171 172

/*
Jan Kara's avatar
Jan Kara committed
173
 * mb_cache_entry_find_next - find next entry in cache with the same
Jan Kara's avatar
Jan Kara committed
174 175 176 177 178 179 180 181
 * @cache: cache where we should search
 * @entry: entry to start search from
 *
 * Finds next entry in the hash chain which has the same key as @entry.
 * If @entry is unhashed (which can happen when deletion of entry races
 * with the search), finds the first entry in the hash chain. The function
 * drops reference to @entry and returns with a reference to the found entry.
 */
Jan Kara's avatar
Jan Kara committed
182 183
struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
						struct mb_cache_entry *entry)
Jan Kara's avatar
Jan Kara committed
184 185 186
{
	return __entry_find(cache, entry, entry->e_key);
}
Jan Kara's avatar
Jan Kara committed
187
EXPORT_SYMBOL(mb_cache_entry_find_next);
Jan Kara's avatar
Jan Kara committed
188

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
/*
 * mb_cache_entry_get - get a cache entry by block number (and key)
 * @cache - cache we work with
 * @key - key of block number @block
 * @block - block number
 */
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
					  sector_t block)
{
	struct hlist_bl_node *node;
	struct hlist_bl_head *head;
	struct mb_cache_entry *entry;

	head = mb_cache_entry_head(cache, key);
	hlist_bl_lock(head);
	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
		if (entry->e_key == key && entry->e_block == block) {
			atomic_inc(&entry->e_refcnt);
			goto out;
		}
	}
	entry = NULL;
out:
	hlist_bl_unlock(head);
	return entry;
}
EXPORT_SYMBOL(mb_cache_entry_get);

Jan Kara's avatar
Jan Kara committed
217
/* mb_cache_entry_delete_block - remove information about block from cache
Jan Kara's avatar
Jan Kara committed
218
 * @cache - cache we work with
219 220
 * @key - key of block @block
 * @block - block number
Jan Kara's avatar
Jan Kara committed
221 222 223
 *
 * Remove entry from cache @cache with key @key with data stored in @block.
 */
Jan Kara's avatar
Jan Kara committed
224 225
void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
				 sector_t block)
Jan Kara's avatar
Jan Kara committed
226 227 228
{
	struct hlist_bl_node *node;
	struct hlist_bl_head *head;
Jan Kara's avatar
Jan Kara committed
229
	struct mb_cache_entry *entry;
Jan Kara's avatar
Jan Kara committed
230

231
	head = mb_cache_entry_head(cache, key);
Jan Kara's avatar
Jan Kara committed
232 233 234 235 236 237
	hlist_bl_lock(head);
	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
		if (entry->e_key == key && entry->e_block == block) {
			/* We keep hash list reference to keep entry alive */
			hlist_bl_del_init(&entry->e_hash_list);
			hlist_bl_unlock(head);
238 239 240
			spin_lock(&cache->c_list_lock);
			if (!list_empty(&entry->e_list)) {
				list_del_init(&entry->e_list);
Jan Kara's avatar
Jan Kara committed
241 242 243
				cache->c_entry_count--;
				atomic_dec(&entry->e_refcnt);
			}
244
			spin_unlock(&cache->c_list_lock);
Jan Kara's avatar
Jan Kara committed
245
			mb_cache_entry_put(cache, entry);
Jan Kara's avatar
Jan Kara committed
246 247 248 249 250
			return;
		}
	}
	hlist_bl_unlock(head);
}
Jan Kara's avatar
Jan Kara committed
251
EXPORT_SYMBOL(mb_cache_entry_delete_block);
Jan Kara's avatar
Jan Kara committed
252

Jan Kara's avatar
Jan Kara committed
253
/* mb_cache_entry_touch - cache entry got used
Jan Kara's avatar
Jan Kara committed
254 255 256
 * @cache - cache the entry belongs to
 * @entry - entry that got used
 *
257
 * Marks entry as used to give hit higher chances of surviving in cache.
Jan Kara's avatar
Jan Kara committed
258
 */
Jan Kara's avatar
Jan Kara committed
259 260
void mb_cache_entry_touch(struct mb_cache *cache,
			  struct mb_cache_entry *entry)
Jan Kara's avatar
Jan Kara committed
261
{
262
	entry->e_referenced = 1;
Jan Kara's avatar
Jan Kara committed
263
}
Jan Kara's avatar
Jan Kara committed
264
EXPORT_SYMBOL(mb_cache_entry_touch);
Jan Kara's avatar
Jan Kara committed
265

Jan Kara's avatar
Jan Kara committed
266 267
static unsigned long mb_cache_count(struct shrinker *shrink,
				    struct shrink_control *sc)
Jan Kara's avatar
Jan Kara committed
268
{
Jan Kara's avatar
Jan Kara committed
269 270
	struct mb_cache *cache = container_of(shrink, struct mb_cache,
					      c_shrink);
Jan Kara's avatar
Jan Kara committed
271 272 273 274 275

	return cache->c_entry_count;
}

/* Shrink number of entries in cache */
Jan Kara's avatar
Jan Kara committed
276
static unsigned long mb_cache_shrink(struct mb_cache *cache,
277
				     unsigned long nr_to_scan)
Jan Kara's avatar
Jan Kara committed
278
{
Jan Kara's avatar
Jan Kara committed
279
	struct mb_cache_entry *entry;
Jan Kara's avatar
Jan Kara committed
280
	struct hlist_bl_head *head;
281
	unsigned long shrunk = 0;
Jan Kara's avatar
Jan Kara committed
282

283 284 285
	spin_lock(&cache->c_list_lock);
	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
		entry = list_first_entry(&cache->c_list,
Jan Kara's avatar
Jan Kara committed
286
					 struct mb_cache_entry, e_list);
287 288
		if (entry->e_referenced) {
			entry->e_referenced = 0;
289
			list_move_tail(&entry->e_list, &cache->c_list);
290 291 292
			continue;
		}
		list_del_init(&entry->e_list);
Jan Kara's avatar
Jan Kara committed
293 294 295 296 297
		cache->c_entry_count--;
		/*
		 * We keep LRU list reference so that entry doesn't go away
		 * from under us.
		 */
298
		spin_unlock(&cache->c_list_lock);
299
		head = mb_cache_entry_head(cache, entry->e_key);
Jan Kara's avatar
Jan Kara committed
300 301 302 303 304 305
		hlist_bl_lock(head);
		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
			hlist_bl_del_init(&entry->e_hash_list);
			atomic_dec(&entry->e_refcnt);
		}
		hlist_bl_unlock(head);
Jan Kara's avatar
Jan Kara committed
306
		if (mb_cache_entry_put(cache, entry))
Jan Kara's avatar
Jan Kara committed
307 308
			shrunk++;
		cond_resched();
309
		spin_lock(&cache->c_list_lock);
Jan Kara's avatar
Jan Kara committed
310
	}
311
	spin_unlock(&cache->c_list_lock);
Jan Kara's avatar
Jan Kara committed
312 313 314 315

	return shrunk;
}

Jan Kara's avatar
Jan Kara committed
316 317
static unsigned long mb_cache_scan(struct shrinker *shrink,
				   struct shrink_control *sc)
Jan Kara's avatar
Jan Kara committed
318
{
Jan Kara's avatar
Jan Kara committed
319
	struct mb_cache *cache = container_of(shrink, struct mb_cache,
Jan Kara's avatar
Jan Kara committed
320
					      c_shrink);
321
	return mb_cache_shrink(cache, sc->nr_to_scan);
Jan Kara's avatar
Jan Kara committed
322 323 324 325 326
}

/* We shrink 1/X of the cache when we have too many entries in it */
#define SHRINK_DIVISOR 16

Jan Kara's avatar
Jan Kara committed
327
static void mb_cache_shrink_worker(struct work_struct *work)
Jan Kara's avatar
Jan Kara committed
328
{
Jan Kara's avatar
Jan Kara committed
329 330 331
	struct mb_cache *cache = container_of(work, struct mb_cache,
					      c_shrink_work);
	mb_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
Jan Kara's avatar
Jan Kara committed
332 333
}

Jan Kara's avatar
Jan Kara committed
334
/*
Jan Kara's avatar
Jan Kara committed
335
 * mb_cache_create - create cache
Jan Kara's avatar
Jan Kara committed
336 337 338 339
 * @bucket_bits: log2 of the hash table size
 *
 * Create cache for keys with 2^bucket_bits hash entries.
 */
Jan Kara's avatar
Jan Kara committed
340
struct mb_cache *mb_cache_create(int bucket_bits)
Jan Kara's avatar
Jan Kara committed
341
{
Jan Kara's avatar
Jan Kara committed
342
	struct mb_cache *cache;
343 344
	unsigned long bucket_count = 1UL << bucket_bits;
	unsigned long i;
Jan Kara's avatar
Jan Kara committed
345

Jan Kara's avatar
Jan Kara committed
346
	cache = kzalloc(sizeof(struct mb_cache), GFP_KERNEL);
Jan Kara's avatar
Jan Kara committed
347 348 349
	if (!cache)
		goto err_out;
	cache->c_bucket_bits = bucket_bits;
Jan Kara's avatar
Jan Kara committed
350
	cache->c_max_entries = bucket_count << 4;
351 352
	INIT_LIST_HEAD(&cache->c_list);
	spin_lock_init(&cache->c_list_lock);
Jan Kara's avatar
Jan Kara committed
353 354 355 356 357 358 359 360 361
	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
				GFP_KERNEL);
	if (!cache->c_hash) {
		kfree(cache);
		goto err_out;
	}
	for (i = 0; i < bucket_count; i++)
		INIT_HLIST_BL_HEAD(&cache->c_hash[i]);

Jan Kara's avatar
Jan Kara committed
362 363
	cache->c_shrink.count_objects = mb_cache_count;
	cache->c_shrink.scan_objects = mb_cache_scan;
Jan Kara's avatar
Jan Kara committed
364
	cache->c_shrink.seeks = DEFAULT_SEEKS;
365 366 367 368 369
	if (register_shrinker(&cache->c_shrink)) {
		kfree(cache->c_hash);
		kfree(cache);
		goto err_out;
	}
Jan Kara's avatar
Jan Kara committed
370

Jan Kara's avatar
Jan Kara committed
371
	INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
Jan Kara's avatar
Jan Kara committed
372

Jan Kara's avatar
Jan Kara committed
373 374 375 376 377
	return cache;

err_out:
	return NULL;
}
Jan Kara's avatar
Jan Kara committed
378
EXPORT_SYMBOL(mb_cache_create);
Jan Kara's avatar
Jan Kara committed
379 380

/*
Jan Kara's avatar
Jan Kara committed
381
 * mb_cache_destroy - destroy cache
Jan Kara's avatar
Jan Kara committed
382 383 384 385 386
 * @cache: the cache to destroy
 *
 * Free all entries in cache and cache itself. Caller must make sure nobody
 * (except shrinker) can reach @cache when calling this.
 */
Jan Kara's avatar
Jan Kara committed
387
void mb_cache_destroy(struct mb_cache *cache)
Jan Kara's avatar
Jan Kara committed
388
{
Jan Kara's avatar
Jan Kara committed
389
	struct mb_cache_entry *entry, *next;
Jan Kara's avatar
Jan Kara committed
390 391 392 393 394 395 396

	unregister_shrinker(&cache->c_shrink);

	/*
	 * We don't bother with any locking. Cache must not be used at this
	 * point.
	 */
397
	list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
Jan Kara's avatar
Jan Kara committed
398 399 400 401 402
		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
			hlist_bl_del_init(&entry->e_hash_list);
			atomic_dec(&entry->e_refcnt);
		} else
			WARN_ON(1);
403
		list_del(&entry->e_list);
Jan Kara's avatar
Jan Kara committed
404
		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
Jan Kara's avatar
Jan Kara committed
405
		mb_cache_entry_put(cache, entry);
Jan Kara's avatar
Jan Kara committed
406 407 408 409
	}
	kfree(cache->c_hash);
	kfree(cache);
}
Jan Kara's avatar
Jan Kara committed
410
EXPORT_SYMBOL(mb_cache_destroy);
Jan Kara's avatar
Jan Kara committed
411

Jan Kara's avatar
Jan Kara committed
412
static int __init mbcache_init(void)
Jan Kara's avatar
Jan Kara committed
413
{
Jan Kara's avatar
Jan Kara committed
414 415
	mb_entry_cache = kmem_cache_create("mbcache",
				sizeof(struct mb_cache_entry), 0,
Jan Kara's avatar
Jan Kara committed
416
				SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
417 418
	if (!mb_entry_cache)
		return -ENOMEM;
Jan Kara's avatar
Jan Kara committed
419 420 421
	return 0;
}

Jan Kara's avatar
Jan Kara committed
422
static void __exit mbcache_exit(void)
Jan Kara's avatar
Jan Kara committed
423
{
Jan Kara's avatar
Jan Kara committed
424
	kmem_cache_destroy(mb_entry_cache);
Jan Kara's avatar
Jan Kara committed
425 426
}

Jan Kara's avatar
Jan Kara committed
427 428
module_init(mbcache_init)
module_exit(mbcache_exit)
Jan Kara's avatar
Jan Kara committed
429 430 431 432

MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
MODULE_LICENSE("GPL");