zram_drv.c 35.1 KB
Newer Older
1
/*
2
 * Compressed RAM block device
3
 *
4
 * Copyright (C) 2008, 2009, 2010  Nitin Gupta
Minchan Kim's avatar
Minchan Kim committed
5
 *               2012, 2013 Minchan Kim
6 7 8 9 10 11 12 13 14
 *
 * This code is released using a dual license strategy: BSD/GPL
 * You can choose the licence that better fits your requirements.
 *
 * Released under the terms of 3-clause BSD License
 * Released under the terms of GNU General Public License Version 2.0
 *
 */

15
#define KMSG_COMPONENT "zram"
16 17 18 19
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

#include <linux/module.h>
#include <linux/kernel.h>
20
#include <linux/bio.h>
21 22 23 24 25 26
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
27
#include <linux/slab.h>
28 29
#include <linux/string.h>
#include <linux/vmalloc.h>
30
#include <linux/err.h>
31
#include <linux/idr.h>
32
#include <linux/sysfs.h>
33

34
#include "zram_drv.h"
35

36
static DEFINE_IDR(zram_index_idr);
37 38 39
/* idr index must be protected */
static DEFINE_MUTEX(zram_index_mutex);

40
static int zram_major;
41
static const char *default_compressor = "lzo";
42 43

/* Module params (documentation at end) */
44
static unsigned int num_devices = 1;
45

46 47 48 49 50 51 52 53 54
static inline void deprecated_attr_warn(const char *name)
{
	pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
			task_pid_nr(current),
			current->comm,
			name,
			"See zram documentation.");
}

55
#define ZRAM_ATTR_RO(name)						\
56
static ssize_t name##_show(struct device *d,				\
57 58 59
				struct device_attribute *attr, char *b)	\
{									\
	struct zram *zram = dev_to_zram(d);				\
60 61
									\
	deprecated_attr_warn(__stringify(name));			\
62
	return scnprintf(b, PAGE_SIZE, "%llu\n",			\
63 64
		(u64)atomic64_read(&zram->stats.name));			\
}									\
65
static DEVICE_ATTR_RO(name);
66

67
static inline bool init_done(struct zram *zram)
68
{
69
	return zram->disksize;
70 71
}

72 73 74 75 76
static inline struct zram *dev_to_zram(struct device *dev)
{
	return (struct zram *)dev_to_disk(dev)->private_data;
}

77
/* flag operations require table entry bit_spin_lock() being held */
78 79
static int zram_test_flag(struct zram_meta *meta, u32 index,
			enum zram_pageflags flag)
80
{
81 82
	return meta->table[index].value & BIT(flag);
}
83

84 85 86 87 88
static void zram_set_flag(struct zram_meta *meta, u32 index,
			enum zram_pageflags flag)
{
	meta->table[index].value |= BIT(flag);
}
89

90 91 92 93 94
static void zram_clear_flag(struct zram_meta *meta, u32 index,
			enum zram_pageflags flag)
{
	meta->table[index].value &= ~BIT(flag);
}
95

96 97 98
static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
{
	return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
99 100
}

101 102
static void zram_set_obj_size(struct zram_meta *meta,
					u32 index, size_t size)
103
{
104
	unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
105

106 107 108
	meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
}

109
static inline bool is_partial_io(struct bio_vec *bvec)
110 111 112 113 114 115 116
{
	return bvec->bv_len != PAGE_SIZE;
}

/*
 * Check if request is within bounds and aligned on zram logical blocks.
 */
117
static inline bool valid_io_request(struct zram *zram,
118 119 120 121 122 123
		sector_t start, unsigned int size)
{
	u64 end, bound;

	/* unaligned request */
	if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
124
		return false;
125
	if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
126
		return false;
127 128 129 130 131

	end = start + (size >> SECTOR_SHIFT);
	bound = zram->disksize >> SECTOR_SHIFT;
	/* out of range range */
	if (unlikely(start >= bound || end > bound || start > end))
132
		return false;
133 134

	/* I/O request is valid */
135
	return true;
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
}

static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
{
	if (*offset + bvec->bv_len >= PAGE_SIZE)
		(*index)++;
	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}

static inline void update_used_max(struct zram *zram,
					const unsigned long pages)
{
	unsigned long old_max, cur_max;

	old_max = atomic_long_read(&zram->stats.max_used_pages);

	do {
		cur_max = old_max;
		if (pages > cur_max)
			old_max = atomic_long_cmpxchg(
				&zram->stats.max_used_pages, cur_max, pages);
	} while (old_max != cur_max);
}

160
static bool page_zero_filled(void *ptr)
161 162 163 164 165 166 167 168
{
	unsigned int pos;
	unsigned long *page;

	page = (unsigned long *)ptr;

	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
		if (page[pos])
169
			return false;
170 171
	}

172
	return true;
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
}

static void handle_zero_page(struct bio_vec *bvec)
{
	struct page *page = bvec->bv_page;
	void *user_mem;

	user_mem = kmap_atomic(page);
	if (is_partial_io(bvec))
		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
	else
		clear_page(user_mem);
	kunmap_atomic(user_mem);

	flush_dcache_page(page);
188 189 190 191 192
}

static ssize_t initstate_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
193
	u32 val;
194 195
	struct zram *zram = dev_to_zram(dev);

196 197 198
	down_read(&zram->init_lock);
	val = init_done(zram);
	up_read(&zram->init_lock);
199

200
	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
201 202
}

203 204 205 206 207 208 209 210
static ssize_t disksize_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
}

211 212 213 214 215
static ssize_t orig_data_size_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

216
	deprecated_attr_warn("orig_data_size");
217
	return scnprintf(buf, PAGE_SIZE, "%llu\n",
218
		(u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
219 220 221 222 223 224 225 226
}

static ssize_t mem_used_total_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val = 0;
	struct zram *zram = dev_to_zram(dev);

227
	deprecated_attr_warn("mem_used_total");
228
	down_read(&zram->init_lock);
229 230
	if (init_done(zram)) {
		struct zram_meta *meta = zram->meta;
231
		val = zs_get_total_pages(meta->mem_pool);
232
	}
233 234
	up_read(&zram->init_lock);

235
	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
236 237
}

238 239 240 241 242 243
static ssize_t mem_limit_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val;
	struct zram *zram = dev_to_zram(dev);

244
	deprecated_attr_warn("mem_limit");
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
	down_read(&zram->init_lock);
	val = zram->limit_pages;
	up_read(&zram->init_lock);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}

static ssize_t mem_limit_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	u64 limit;
	char *tmp;
	struct zram *zram = dev_to_zram(dev);

	limit = memparse(buf, &tmp);
	if (buf == tmp) /* no chars parsed, invalid input */
		return -EINVAL;

	down_write(&zram->init_lock);
	zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
	up_write(&zram->init_lock);

	return len;
}

Minchan Kim's avatar
Minchan Kim committed
270 271 272 273 274 275
static ssize_t mem_used_max_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val = 0;
	struct zram *zram = dev_to_zram(dev);

276
	deprecated_attr_warn("mem_used_max");
Minchan Kim's avatar
Minchan Kim committed
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
	down_read(&zram->init_lock);
	if (init_done(zram))
		val = atomic_long_read(&zram->stats.max_used_pages);
	up_read(&zram->init_lock);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}

static ssize_t mem_used_max_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	int err;
	unsigned long val;
	struct zram *zram = dev_to_zram(dev);

	err = kstrtoul(buf, 10, &val);
	if (err || val != 0)
		return -EINVAL;

	down_read(&zram->init_lock);
297 298
	if (init_done(zram)) {
		struct zram_meta *meta = zram->meta;
Minchan Kim's avatar
Minchan Kim committed
299 300
		atomic_long_set(&zram->stats.max_used_pages,
				zs_get_total_pages(meta->mem_pool));
301
	}
Minchan Kim's avatar
Minchan Kim committed
302 303 304 305 306
	up_read(&zram->init_lock);

	return len;
}

307 308 309 310 311 312 313 314 315
/*
 * We switched to per-cpu streams and this attr is not needed anymore.
 * However, we will keep it around for some time, because:
 * a) we may revert per-cpu streams in the future
 * b) it's visible to user space and we need to follow our 2 years
 *    retirement rule; but we already have a number of 'soon to be
 *    altered' attrs, so max_comp_streams need to wait for the next
 *    layoff cycle.
 */
316 317 318
static ssize_t max_comp_streams_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
319
	return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
320 321
}

322 323 324
static ssize_t max_comp_streams_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
325
	return len;
326 327
}

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
static ssize_t comp_algorithm_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	size_t sz;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
	sz = zcomp_available_show(zram->compressor, buf);
	up_read(&zram->init_lock);

	return sz;
}

static ssize_t comp_algorithm_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	struct zram *zram = dev_to_zram(dev);
345
	char compressor[CRYPTO_MAX_ALG_NAME];
346 347
	size_t sz;

348 349 350 351 352 353 354
	strlcpy(compressor, buf, sizeof(compressor));
	/* ignore trailing newline */
	sz = strlen(compressor);
	if (sz > 0 && compressor[sz - 1] == '\n')
		compressor[sz - 1] = 0x00;

	if (!zcomp_available_algorithm(compressor))
355 356
		return -EINVAL;

357 358 359 360 361 362
	down_write(&zram->init_lock);
	if (init_done(zram)) {
		up_write(&zram->init_lock);
		pr_info("Can't change algorithm for initialized device\n");
		return -EBUSY;
	}
363

364
	strlcpy(zram->compressor, compressor, sizeof(compressor));
365 366 367 368
	up_write(&zram->init_lock);
	return len;
}

369 370
static ssize_t compact_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
371
{
372 373
	struct zram *zram = dev_to_zram(dev);
	struct zram_meta *meta;
374

375 376 377 378 379
	down_read(&zram->init_lock);
	if (!init_done(zram)) {
		up_read(&zram->init_lock);
		return -EINVAL;
	}
380

381
	meta = zram->meta;
382
	zs_compact(meta->mem_pool);
383
	up_read(&zram->init_lock);
384

385
	return len;
386 387
}

388 389
static ssize_t io_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
390
{
391 392
	struct zram *zram = dev_to_zram(dev);
	ssize_t ret;
393

394 395 396 397 398 399 400 401
	down_read(&zram->init_lock);
	ret = scnprintf(buf, PAGE_SIZE,
			"%8llu %8llu %8llu %8llu\n",
			(u64)atomic64_read(&zram->stats.failed_reads),
			(u64)atomic64_read(&zram->stats.failed_writes),
			(u64)atomic64_read(&zram->stats.invalid_io),
			(u64)atomic64_read(&zram->stats.notify_free));
	up_read(&zram->init_lock);
402

403
	return ret;
404 405
}

406 407
static ssize_t mm_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
408
{
409
	struct zram *zram = dev_to_zram(dev);
410
	struct zs_pool_stats pool_stats;
411 412 413
	u64 orig_size, mem_used = 0;
	long max_used;
	ssize_t ret;
414

415 416
	memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));

417
	down_read(&zram->init_lock);
418
	if (init_done(zram)) {
419
		mem_used = zs_get_total_pages(zram->meta->mem_pool);
420 421
		zs_pool_stats(zram->meta->mem_pool, &pool_stats);
	}
422

423 424
	orig_size = atomic64_read(&zram->stats.pages_stored);
	max_used = atomic_long_read(&zram->stats.max_used_pages);
425

426
	ret = scnprintf(buf, PAGE_SIZE,
427
			"%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
428 429 430 431 432 433
			orig_size << PAGE_SHIFT,
			(u64)atomic64_read(&zram->stats.compr_data_size),
			mem_used << PAGE_SHIFT,
			zram->limit_pages << PAGE_SHIFT,
			max_used << PAGE_SHIFT,
			(u64)atomic64_read(&zram->stats.zero_pages),
434
			pool_stats.pages_compacted);
435
	up_read(&zram->init_lock);
436

437 438 439
	return ret;
}

440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
static ssize_t debug_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int version = 1;
	struct zram *zram = dev_to_zram(dev);
	ssize_t ret;

	down_read(&zram->init_lock);
	ret = scnprintf(buf, PAGE_SIZE,
			"version: %d\n%8llu\n",
			version,
			(u64)atomic64_read(&zram->stats.writestall));
	up_read(&zram->init_lock);

	return ret;
}

457 458
static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
459
static DEVICE_ATTR_RO(debug_stat);
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
ZRAM_ATTR_RO(num_reads);
ZRAM_ATTR_RO(num_writes);
ZRAM_ATTR_RO(failed_reads);
ZRAM_ATTR_RO(failed_writes);
ZRAM_ATTR_RO(invalid_io);
ZRAM_ATTR_RO(notify_free);
ZRAM_ATTR_RO(zero_pages);
ZRAM_ATTR_RO(compr_data_size);

static inline bool zram_meta_get(struct zram *zram)
{
	if (atomic_inc_not_zero(&zram->refcount))
		return true;
	return false;
}

static inline void zram_meta_put(struct zram *zram)
{
	atomic_dec(&zram->refcount);
}

static void zram_meta_free(struct zram_meta *meta, u64 disksize)
{
	size_t num_pages = disksize >> PAGE_SHIFT;
	size_t index;
485 486 487 488 489 490 491 492 493 494 495

	/* Free all pages that are still in this zram device */
	for (index = 0; index < num_pages; index++) {
		unsigned long handle = meta->table[index].handle;

		if (!handle)
			continue;

		zs_free(meta->mem_pool, handle);
	}

496 497 498 499 500
	zs_destroy_pool(meta->mem_pool);
	vfree(meta->table);
	kfree(meta);
}

501
static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
502 503 504
{
	size_t num_pages;
	struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
505

506
	if (!meta)
507
		return NULL;
508 509 510 511 512

	num_pages = disksize >> PAGE_SHIFT;
	meta->table = vzalloc(num_pages * sizeof(*meta->table));
	if (!meta->table) {
		pr_err("Error allocating zram address table\n");
513
		goto out_error;
514 515
	}

516
	meta->mem_pool = zs_create_pool(pool_name);
517 518
	if (!meta->mem_pool) {
		pr_err("Error creating memory pool\n");
519
		goto out_error;
520 521 522 523
	}

	return meta;

524
out_error:
525 526
	vfree(meta->table);
	kfree(meta);
527
	return NULL;
528 529
}

530 531 532 533 534
/*
 * To protect concurrent access to the same index entry,
 * caller should hold this table index entry's bit_spinlock to
 * indicate this index entry is accessing.
 */
535
static void zram_free_page(struct zram *zram, size_t index)
536
{
Minchan Kim's avatar
Minchan Kim committed
537 538
	struct zram_meta *meta = zram->meta;
	unsigned long handle = meta->table[index].handle;
539

540
	if (unlikely(!handle)) {
541 542 543 544
		/*
		 * No memory is allocated for zero filled pages.
		 * Simply clear zero page flag.
		 */
Minchan Kim's avatar
Minchan Kim committed
545 546
		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
			zram_clear_flag(meta, index, ZRAM_ZERO);
547
			atomic64_dec(&zram->stats.zero_pages);
548 549 550 551
		}
		return;
	}

Minchan Kim's avatar
Minchan Kim committed
552
	zs_free(meta->mem_pool, handle);
553

554 555
	atomic64_sub(zram_get_obj_size(meta, index),
			&zram->stats.compr_data_size);
556
	atomic64_dec(&zram->stats.pages_stored);
557

Minchan Kim's avatar
Minchan Kim committed
558
	meta->table[index].handle = 0;
559
	zram_set_obj_size(meta, index, 0);
560 561
}

562
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
563
{
564
	int ret = 0;
565
	unsigned char *cmem;
Minchan Kim's avatar
Minchan Kim committed
566
	struct zram_meta *meta = zram->meta;
Minchan Kim's avatar
Minchan Kim committed
567
	unsigned long handle;
568
	unsigned int size;
Minchan Kim's avatar
Minchan Kim committed
569

570
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
Minchan Kim's avatar
Minchan Kim committed
571
	handle = meta->table[index].handle;
572
	size = zram_get_obj_size(meta, index);
573

Minchan Kim's avatar
Minchan Kim committed
574
	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
575
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
576
		clear_page(mem);
577 578
		return 0;
	}
579

Minchan Kim's avatar
Minchan Kim committed
580
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
581
	if (size == PAGE_SIZE) {
582
		copy_page(mem, cmem);
583 584 585 586 587 588
	} else {
		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);

		ret = zcomp_decompress(zstrm, cmem, size, mem);
		zcomp_stream_put(zram->comp);
	}
Minchan Kim's avatar
Minchan Kim committed
589
	zs_unmap_object(meta->mem_pool, handle);
590
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
591

592
	/* Should NEVER happen. Return bio error if it does. */
593
	if (unlikely(ret)) {
594 595
		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
		return ret;
596
	}
597

598
	return 0;
599 600
}

601
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
602
			  u32 index, int offset)
603 604
{
	int ret;
605 606
	struct page *page;
	unsigned char *user_mem, *uncmem = NULL;
Minchan Kim's avatar
Minchan Kim committed
607
	struct zram_meta *meta = zram->meta;
608 609
	page = bvec->bv_page;

610
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
Minchan Kim's avatar
Minchan Kim committed
611 612
	if (unlikely(!meta->table[index].handle) ||
			zram_test_flag(meta, index, ZRAM_ZERO)) {
613
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
614
		handle_zero_page(bvec);
615 616
		return 0;
	}
617
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
618

619 620
	if (is_partial_io(bvec))
		/* Use  a temporary buffer to decompress the page */
621 622 623 624
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);

	user_mem = kmap_atomic(page);
	if (!is_partial_io(bvec))
625 626 627
		uncmem = user_mem;

	if (!uncmem) {
628
		pr_err("Unable to allocate temp memory\n");
629 630 631
		ret = -ENOMEM;
		goto out_cleanup;
	}
632

633
	ret = zram_decompress_page(zram, uncmem, index);
634
	/* Should NEVER happen. Return bio error if it does. */
635
	if (unlikely(ret))
636
		goto out_cleanup;
637

638 639 640 641 642 643 644 645 646 647 648
	if (is_partial_io(bvec))
		memcpy(user_mem + bvec->bv_offset, uncmem + offset,
				bvec->bv_len);

	flush_dcache_page(page);
	ret = 0;
out_cleanup:
	kunmap_atomic(user_mem);
	if (is_partial_io(bvec))
		kfree(uncmem);
	return ret;
649 650 651 652
}

static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
			   int offset)
653
{
654
	int ret = 0;
655
	unsigned int clen;
656
	unsigned long handle = 0;
657
	struct page *page;
658
	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
Minchan Kim's avatar
Minchan Kim committed
659
	struct zram_meta *meta = zram->meta;
660
	struct zcomp_strm *zstrm = NULL;
Minchan Kim's avatar
Minchan Kim committed
661
	unsigned long alloced_pages;
662

663
	page = bvec->bv_page;
664 665 666 667 668
	if (is_partial_io(bvec)) {
		/*
		 * This is a partial IO. We need to read the full page
		 * before to write the changes.
		 */
669
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
670 671 672 673
		if (!uncmem) {
			ret = -ENOMEM;
			goto out;
		}
674
		ret = zram_decompress_page(zram, uncmem, index);
675
		if (ret)
676 677 678
			goto out;
	}

679
compress_again:
680
	user_mem = kmap_atomic(page);
681
	if (is_partial_io(bvec)) {
682 683
		memcpy(uncmem + offset, user_mem + bvec->bv_offset,
		       bvec->bv_len);
684 685 686
		kunmap_atomic(user_mem);
		user_mem = NULL;
	} else {
687
		uncmem = user_mem;
688
	}
689 690

	if (page_zero_filled(uncmem)) {
691 692
		if (user_mem)
			kunmap_atomic(user_mem);
693
		/* Free memory associated with this sector now. */
694
		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
695
		zram_free_page(zram, index);
Minchan Kim's avatar
Minchan Kim committed
696
		zram_set_flag(meta, index, ZRAM_ZERO);
697
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
698

699
		atomic64_inc(&zram->stats.zero_pages);
700 701
		ret = 0;
		goto out;
702
	}
703

704
	zstrm = zcomp_stream_get(zram->comp);
705
	ret = zcomp_compress(zstrm, uncmem, &clen);
706 707 708 709 710
	if (!is_partial_io(bvec)) {
		kunmap_atomic(user_mem);
		user_mem = NULL;
		uncmem = NULL;
	}
711

712
	if (unlikely(ret)) {
713
		pr_err("Compression failed! err=%d\n", ret);
714
		goto out;
715
	}
716

717
	src = zstrm->buffer;
718 719
	if (unlikely(clen > max_zpage_size)) {
		clen = PAGE_SIZE;
720 721
		if (is_partial_io(bvec))
			src = uncmem;
722
	}
723

724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
	/*
	 * handle allocation has 2 paths:
	 * a) fast path is executed with preemption disabled (for
	 *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
	 *  since we can't sleep;
	 * b) slow path enables preemption and attempts to allocate
	 *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
	 *  put per-cpu compression stream and, thus, to re-do
	 *  the compression once handle is allocated.
	 *
	 * if we have a 'non-null' handle here then we are coming
	 * from the slow path and handle has already been allocated.
	 */
	if (!handle)
		handle = zs_malloc(meta->mem_pool, clen,
				__GFP_KSWAPD_RECLAIM |
				__GFP_NOWARN |
				__GFP_HIGHMEM);
742
	if (!handle) {
743
		zcomp_stream_put(zram->comp);
744 745
		zstrm = NULL;

746 747
		atomic64_inc(&zram->stats.writestall);

748 749 750 751 752
		handle = zs_malloc(meta->mem_pool, clen,
				GFP_NOIO | __GFP_HIGHMEM);
		if (handle)
			goto compress_again;

753
		pr_err("Error allocating memory for compressed page: %u, size=%u\n",
754
			index, clen);
755 756
		ret = -ENOMEM;
		goto out;
757
	}
758

Minchan Kim's avatar
Minchan Kim committed
759
	alloced_pages = zs_get_total_pages(meta->mem_pool);
760 761
	update_used_max(zram, alloced_pages);

Minchan Kim's avatar
Minchan Kim committed
762
	if (zram->limit_pages && alloced_pages > zram->limit_pages) {
763 764 765 766 767
		zs_free(meta->mem_pool, handle);
		ret = -ENOMEM;
		goto out;
	}

Minchan Kim's avatar
Minchan Kim committed
768
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
769

770
	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
771
		src = kmap_atomic(page);
772
		copy_page(cmem, src);
773
		kunmap_atomic(src);
774 775 776
	} else {
		memcpy(cmem, src, clen);
	}
777

778
	zcomp_stream_put(zram->comp);
779
	zstrm = NULL;
Minchan Kim's avatar
Minchan Kim committed
780
	zs_unmap_object(meta->mem_pool, handle);
781

782 783 784 785
	/*
	 * Free memory associated with this sector
	 * before overwriting unused sectors.
	 */
786
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
787 788
	zram_free_page(zram, index);

Minchan Kim's avatar
Minchan Kim committed
789
	meta->table[index].handle = handle;
790 791
	zram_set_obj_size(meta, index, clen);
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
792

793
	/* Update stats */
794 795
	atomic64_add(clen, &zram->stats.compr_data_size);
	atomic64_inc(&zram->stats.pages_stored);
796
out:
797
	if (zstrm)
798
		zcomp_stream_put(zram->comp);
799 800
	if (is_partial_io(bvec))
		kfree(uncmem);
801
	return ret;
802 803
}

Joonsoo Kim's avatar
Joonsoo Kim committed
804 805 806 807 808 809 810 811 812
/*
 * zram_bio_discard - handler on discard request
 * @index: physical block index in PAGE_SIZE units
 * @offset: byte offset within physical block
 */
static void zram_bio_discard(struct zram *zram, u32 index,
			     int offset, struct bio *bio)
{
	size_t n = bio->bi_iter.bi_size;
813
	struct zram_meta *meta = zram->meta;
Joonsoo Kim's avatar
Joonsoo Kim committed
814 815 816 817 818 819 820 821 822 823 824 825

	/*
	 * zram manages data in physical block size units. Because logical block
	 * size isn't identical with physical block size on some arch, we
	 * could get a discard request pointing to a specific offset within a
	 * certain physical block.  Although we can handle this request by
	 * reading that physiclal block and decompressing and partially zeroing
	 * and re-compressing and then re-storing it, this isn't reasonable
	 * because our intent with a discard request is to save memory.  So
	 * skipping this logical block is appropriate here.
	 */
	if (offset) {
826
		if (n <= (PAGE_SIZE - offset))
Joonsoo Kim's avatar
Joonsoo Kim committed
827 828
			return;

829
		n -= (PAGE_SIZE - offset);
Joonsoo Kim's avatar
Joonsoo Kim committed
830 831 832 833
		index++;
	}

	while (n >= PAGE_SIZE) {
834
		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
Joonsoo Kim's avatar
Joonsoo Kim committed
835
		zram_free_page(zram, index);
836
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
837
		atomic64_inc(&zram->stats.notify_free);
Joonsoo Kim's avatar
Joonsoo Kim committed
838 839 840 841 842
		index++;
		n -= PAGE_SIZE;
	}
}

843 844
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
			int offset, int rw)
845
{
846
	unsigned long start_time = jiffies;
847 848
	int ret;

849 850
	generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
			&zram->disk->part0);
851

852 853 854 855 856 857
	if (rw == READ) {
		atomic64_inc(&zram->stats.num_reads);
		ret = zram_bvec_read(zram, bvec, index, offset);
	} else {
		atomic64_inc(&zram->stats.num_writes);
		ret = zram_bvec_write(zram, bvec, index, offset);
858
	}
859

860
	generic_end_io_acct(rw, &zram->disk->part0, start_time);
861

862 863 864 865 866
	if (unlikely(ret)) {
		if (rw == READ)
			atomic64_inc(&zram->stats.failed_reads);
		else
			atomic64_inc(&zram->stats.failed_writes);
867
	}
868

869
	return ret;
870 871
}

872
static void __zram_make_request(struct zram *zram, struct bio *bio)
873
{
874
	int offset, rw;
875
	u32 index;
876 877
	struct bio_vec bvec;
	struct bvec_iter iter;
878

879 880 881
	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
	offset = (bio->bi_iter.bi_sector &
		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
882

Joonsoo Kim's avatar
Joonsoo Kim committed
883 884
	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
		zram_bio_discard(zram, index, offset, bio);
885
		bio_endio(bio);
Joonsoo Kim's avatar
Joonsoo Kim committed
886 887 888
		return;
	}

889
	rw = bio_data_dir(bio);
890
	bio_for_each_segment(bvec, bio, iter) {
891 892
		int max_transfer_size = PAGE_SIZE - offset;

893
		if (bvec.bv_len > max_transfer_size) {
894 895 896 897 898 899
			/*
			 * zram_bvec_rw() can only make operation on a single
			 * zram page. Split the bio vector.
			 */
			struct bio_vec bv;

900
			bv.bv_page = bvec.bv_page;
901
			bv.bv_len = max_transfer_size;
902
			bv.bv_offset = bvec.bv_offset;
903

904
			if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0)
905 906
				goto out;

907
			bv.bv_len = bvec.bv_len - max_transfer_size;
908
			bv.bv_offset += max_transfer_size;
909
			if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0)
910 911
				goto out;
		} else
912
			if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0)
913 914
				goto out;

915
		update_position(&index, &offset, &bvec);
916
	}
917

918
	bio_endio(bio);
919
	return;
920 921 922 923 924 925

out:
	bio_io_error(bio);
}

/*
926
 * Handler function for all zram I/O requests.
927
 */
928
static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
929
{
930
	struct zram *zram = queue->queuedata;