zram_drv.c 35 KB
Newer Older
1
/*
2
 * Compressed RAM block device
3
 *
4
 * Copyright (C) 2008, 2009, 2010  Nitin Gupta
Minchan Kim's avatar
Minchan Kim committed
5
 *               2012, 2013 Minchan Kim
6 7 8 9 10 11 12 13 14
 *
 * This code is released using a dual license strategy: BSD/GPL
 * You can choose the licence that better fits your requirements.
 *
 * Released under the terms of 3-clause BSD License
 * Released under the terms of GNU General Public License Version 2.0
 *
 */

15
#define KMSG_COMPONENT "zram"
16 17 18 19
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

#include <linux/module.h>
#include <linux/kernel.h>
20
#include <linux/bio.h>
21 22 23 24 25 26
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
27
#include <linux/slab.h>
28 29
#include <linux/string.h>
#include <linux/vmalloc.h>
30
#include <linux/err.h>
31
#include <linux/idr.h>
32
#include <linux/sysfs.h>
33

34
#include "zram_drv.h"
35

36
static DEFINE_IDR(zram_index_idr);
37 38 39
/* idr index must be protected */
static DEFINE_MUTEX(zram_index_mutex);

40
static int zram_major;
41
static const char *default_compressor = "lzo";
42 43

/* Module params (documentation at end) */
44
static unsigned int num_devices = 1;
45

46 47 48 49 50 51 52 53 54
static inline void deprecated_attr_warn(const char *name)
{
	pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
			task_pid_nr(current),
			current->comm,
			name,
			"See zram documentation.");
}

55
#define ZRAM_ATTR_RO(name)						\
56
static ssize_t name##_show(struct device *d,				\
57 58 59
				struct device_attribute *attr, char *b)	\
{									\
	struct zram *zram = dev_to_zram(d);				\
60 61
									\
	deprecated_attr_warn(__stringify(name));			\
62
	return scnprintf(b, PAGE_SIZE, "%llu\n",			\
63 64
		(u64)atomic64_read(&zram->stats.name));			\
}									\
65
static DEVICE_ATTR_RO(name);
66

67
static inline bool init_done(struct zram *zram)
68
{
69
	return zram->disksize;
70 71
}

72 73 74 75 76
static inline struct zram *dev_to_zram(struct device *dev)
{
	return (struct zram *)dev_to_disk(dev)->private_data;
}

77
/* flag operations require table entry bit_spin_lock() being held */
78 79
static int zram_test_flag(struct zram_meta *meta, u32 index,
			enum zram_pageflags flag)
80
{
81 82
	return meta->table[index].value & BIT(flag);
}
83

84 85 86 87 88
static void zram_set_flag(struct zram_meta *meta, u32 index,
			enum zram_pageflags flag)
{
	meta->table[index].value |= BIT(flag);
}
89

90 91 92 93 94
static void zram_clear_flag(struct zram_meta *meta, u32 index,
			enum zram_pageflags flag)
{
	meta->table[index].value &= ~BIT(flag);
}
95

96 97 98
static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
{
	return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
99 100
}

101 102
static void zram_set_obj_size(struct zram_meta *meta,
					u32 index, size_t size)
103
{
104
	unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
105

106 107 108
	meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
}

109
static inline bool is_partial_io(struct bio_vec *bvec)
110 111 112 113 114 115 116
{
	return bvec->bv_len != PAGE_SIZE;
}

/*
 * Check if request is within bounds and aligned on zram logical blocks.
 */
117
static inline bool valid_io_request(struct zram *zram,
118 119 120 121 122 123
		sector_t start, unsigned int size)
{
	u64 end, bound;

	/* unaligned request */
	if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
124
		return false;
125
	if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
126
		return false;
127 128 129 130 131

	end = start + (size >> SECTOR_SHIFT);
	bound = zram->disksize >> SECTOR_SHIFT;
	/* out of range range */
	if (unlikely(start >= bound || end > bound || start > end))
132
		return false;
133 134

	/* I/O request is valid */
135
	return true;
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
}

static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
{
	if (*offset + bvec->bv_len >= PAGE_SIZE)
		(*index)++;
	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}

static inline void update_used_max(struct zram *zram,
					const unsigned long pages)
{
	unsigned long old_max, cur_max;

	old_max = atomic_long_read(&zram->stats.max_used_pages);

	do {
		cur_max = old_max;
		if (pages > cur_max)
			old_max = atomic_long_cmpxchg(
				&zram->stats.max_used_pages, cur_max, pages);
	} while (old_max != cur_max);
}

160
static bool page_zero_filled(void *ptr)
161 162 163 164 165 166 167 168
{
	unsigned int pos;
	unsigned long *page;

	page = (unsigned long *)ptr;

	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
		if (page[pos])
169
			return false;
170 171
	}

172
	return true;
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
}

static void handle_zero_page(struct bio_vec *bvec)
{
	struct page *page = bvec->bv_page;
	void *user_mem;

	user_mem = kmap_atomic(page);
	if (is_partial_io(bvec))
		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
	else
		clear_page(user_mem);
	kunmap_atomic(user_mem);

	flush_dcache_page(page);
188 189 190 191 192
}

static ssize_t initstate_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
193
	u32 val;
194 195
	struct zram *zram = dev_to_zram(dev);

196 197 198
	down_read(&zram->init_lock);
	val = init_done(zram);
	up_read(&zram->init_lock);
199

200
	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
201 202
}

203 204 205 206 207 208 209 210
static ssize_t disksize_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
}

211 212 213 214 215
static ssize_t orig_data_size_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

216
	deprecated_attr_warn("orig_data_size");
217
	return scnprintf(buf, PAGE_SIZE, "%llu\n",
218
		(u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
219 220 221 222 223 224 225 226
}

static ssize_t mem_used_total_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val = 0;
	struct zram *zram = dev_to_zram(dev);

227
	deprecated_attr_warn("mem_used_total");
228
	down_read(&zram->init_lock);
229 230
	if (init_done(zram)) {
		struct zram_meta *meta = zram->meta;
231
		val = zs_get_total_pages(meta->mem_pool);
232
	}
233 234
	up_read(&zram->init_lock);

235
	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
236 237
}

238 239 240 241 242 243
static ssize_t mem_limit_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val;
	struct zram *zram = dev_to_zram(dev);

244
	deprecated_attr_warn("mem_limit");
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
	down_read(&zram->init_lock);
	val = zram->limit_pages;
	up_read(&zram->init_lock);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}

static ssize_t mem_limit_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	u64 limit;
	char *tmp;
	struct zram *zram = dev_to_zram(dev);

	limit = memparse(buf, &tmp);
	if (buf == tmp) /* no chars parsed, invalid input */
		return -EINVAL;

	down_write(&zram->init_lock);
	zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
	up_write(&zram->init_lock);

	return len;
}

Minchan Kim's avatar
Minchan Kim committed
270 271 272 273 274 275
static ssize_t mem_used_max_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val = 0;
	struct zram *zram = dev_to_zram(dev);

276
	deprecated_attr_warn("mem_used_max");
Minchan Kim's avatar
Minchan Kim committed
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
	down_read(&zram->init_lock);
	if (init_done(zram))
		val = atomic_long_read(&zram->stats.max_used_pages);
	up_read(&zram->init_lock);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}

static ssize_t mem_used_max_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	int err;
	unsigned long val;
	struct zram *zram = dev_to_zram(dev);

	err = kstrtoul(buf, 10, &val);
	if (err || val != 0)
		return -EINVAL;

	down_read(&zram->init_lock);
297 298
	if (init_done(zram)) {
		struct zram_meta *meta = zram->meta;
Minchan Kim's avatar
Minchan Kim committed
299 300
		atomic_long_set(&zram->stats.max_used_pages,
				zs_get_total_pages(meta->mem_pool));
301
	}
Minchan Kim's avatar
Minchan Kim committed
302 303 304 305 306
	up_read(&zram->init_lock);

	return len;
}

307 308 309 310 311 312 313 314 315
/*
 * We switched to per-cpu streams and this attr is not needed anymore.
 * However, we will keep it around for some time, because:
 * a) we may revert per-cpu streams in the future
 * b) it's visible to user space and we need to follow our 2 years
 *    retirement rule; but we already have a number of 'soon to be
 *    altered' attrs, so max_comp_streams need to wait for the next
 *    layoff cycle.
 */
316 317 318
static ssize_t max_comp_streams_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
319
	return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
320 321
}

322 323 324
static ssize_t max_comp_streams_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
325
	return len;
326 327
}

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
static ssize_t comp_algorithm_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	size_t sz;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
	sz = zcomp_available_show(zram->compressor, buf);
	up_read(&zram->init_lock);

	return sz;
}

static ssize_t comp_algorithm_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	struct zram *zram = dev_to_zram(dev);
345 346
	size_t sz;

347 348 349
	if (!zcomp_available_algorithm(buf))
		return -EINVAL;

350 351 352 353 354 355 356
	down_write(&zram->init_lock);
	if (init_done(zram)) {
		up_write(&zram->init_lock);
		pr_info("Can't change algorithm for initialized device\n");
		return -EBUSY;
	}
	strlcpy(zram->compressor, buf, sizeof(zram->compressor));
357 358 359 360 361 362

	/* ignore trailing newline */
	sz = strlen(zram->compressor);
	if (sz > 0 && zram->compressor[sz - 1] == '\n')
		zram->compressor[sz - 1] = 0x00;

363 364 365 366
	up_write(&zram->init_lock);
	return len;
}

367 368
static ssize_t compact_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
369
{
370 371
	struct zram *zram = dev_to_zram(dev);
	struct zram_meta *meta;
372

373 374 375 376 377
	down_read(&zram->init_lock);
	if (!init_done(zram)) {
		up_read(&zram->init_lock);
		return -EINVAL;
	}
378

379
	meta = zram->meta;
380
	zs_compact(meta->mem_pool);
381
	up_read(&zram->init_lock);
382

383
	return len;
384 385
}

386 387
static ssize_t io_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
388
{
389 390
	struct zram *zram = dev_to_zram(dev);
	ssize_t ret;
391

392 393 394 395 396 397 398 399
	down_read(&zram->init_lock);
	ret = scnprintf(buf, PAGE_SIZE,
			"%8llu %8llu %8llu %8llu\n",
			(u64)atomic64_read(&zram->stats.failed_reads),
			(u64)atomic64_read(&zram->stats.failed_writes),
			(u64)atomic64_read(&zram->stats.invalid_io),
			(u64)atomic64_read(&zram->stats.notify_free));
	up_read(&zram->init_lock);
400

401
	return ret;
402 403
}

404 405
static ssize_t mm_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
406
{
407
	struct zram *zram = dev_to_zram(dev);
408
	struct zs_pool_stats pool_stats;
409 410 411
	u64 orig_size, mem_used = 0;
	long max_used;
	ssize_t ret;
412

413 414
	memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));

415
	down_read(&zram->init_lock);
416
	if (init_done(zram)) {
417
		mem_used = zs_get_total_pages(zram->meta->mem_pool);
418 419
		zs_pool_stats(zram->meta->mem_pool, &pool_stats);
	}
420

421 422
	orig_size = atomic64_read(&zram->stats.pages_stored);
	max_used = atomic_long_read(&zram->stats.max_used_pages);
423

424
	ret = scnprintf(buf, PAGE_SIZE,
425
			"%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
426 427 428 429 430 431
			orig_size << PAGE_SHIFT,
			(u64)atomic64_read(&zram->stats.compr_data_size),
			mem_used << PAGE_SHIFT,
			zram->limit_pages << PAGE_SHIFT,
			max_used << PAGE_SHIFT,
			(u64)atomic64_read(&zram->stats.zero_pages),
432
			pool_stats.pages_compacted);
433
	up_read(&zram->init_lock);
434

435 436 437
	return ret;
}

438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
static ssize_t debug_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int version = 1;
	struct zram *zram = dev_to_zram(dev);
	ssize_t ret;

	down_read(&zram->init_lock);
	ret = scnprintf(buf, PAGE_SIZE,
			"version: %d\n%8llu\n",
			version,
			(u64)atomic64_read(&zram->stats.writestall));
	up_read(&zram->init_lock);

	return ret;
}

455 456
static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
457
static DEVICE_ATTR_RO(debug_stat);
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
ZRAM_ATTR_RO(num_reads);
ZRAM_ATTR_RO(num_writes);
ZRAM_ATTR_RO(failed_reads);
ZRAM_ATTR_RO(failed_writes);
ZRAM_ATTR_RO(invalid_io);
ZRAM_ATTR_RO(notify_free);
ZRAM_ATTR_RO(zero_pages);
ZRAM_ATTR_RO(compr_data_size);

static inline bool zram_meta_get(struct zram *zram)
{
	if (atomic_inc_not_zero(&zram->refcount))
		return true;
	return false;
}

static inline void zram_meta_put(struct zram *zram)
{
	atomic_dec(&zram->refcount);
}

static void zram_meta_free(struct zram_meta *meta, u64 disksize)
{
	size_t num_pages = disksize >> PAGE_SHIFT;
	size_t index;
483 484 485 486 487 488 489 490 491 492 493

	/* Free all pages that are still in this zram device */
	for (index = 0; index < num_pages; index++) {
		unsigned long handle = meta->table[index].handle;

		if (!handle)
			continue;

		zs_free(meta->mem_pool, handle);
	}

494 495 496 497 498
	zs_destroy_pool(meta->mem_pool);
	vfree(meta->table);
	kfree(meta);
}

499
static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
500 501 502
{
	size_t num_pages;
	struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
503

504
	if (!meta)
505
		return NULL;
506 507 508 509 510

	num_pages = disksize >> PAGE_SHIFT;
	meta->table = vzalloc(num_pages * sizeof(*meta->table));
	if (!meta->table) {
		pr_err("Error allocating zram address table\n");
511
		goto out_error;
512 513
	}

514
	meta->mem_pool = zs_create_pool(pool_name);
515 516
	if (!meta->mem_pool) {
		pr_err("Error creating memory pool\n");
517
		goto out_error;
518 519 520 521
	}

	return meta;

522
out_error:
523 524
	vfree(meta->table);
	kfree(meta);
525
	return NULL;
526 527
}

528 529 530 531 532
/*
 * To protect concurrent access to the same index entry,
 * caller should hold this table index entry's bit_spinlock to
 * indicate this index entry is accessing.
 */
533
static void zram_free_page(struct zram *zram, size_t index)
534
{
Minchan Kim's avatar
Minchan Kim committed
535 536
	struct zram_meta *meta = zram->meta;
	unsigned long handle = meta->table[index].handle;
537

538
	if (unlikely(!handle)) {
539 540 541 542
		/*
		 * No memory is allocated for zero filled pages.
		 * Simply clear zero page flag.
		 */
Minchan Kim's avatar
Minchan Kim committed
543 544
		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
			zram_clear_flag(meta, index, ZRAM_ZERO);
545
			atomic64_dec(&zram->stats.zero_pages);
546 547 548 549
		}
		return;
	}

Minchan Kim's avatar
Minchan Kim committed
550
	zs_free(meta->mem_pool, handle);
551

552 553
	atomic64_sub(zram_get_obj_size(meta, index),
			&zram->stats.compr_data_size);
554
	atomic64_dec(&zram->stats.pages_stored);
555

Minchan Kim's avatar
Minchan Kim committed
556
	meta->table[index].handle = 0;
557
	zram_set_obj_size(meta, index, 0);
558 559
}

560
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
561
{
562
	int ret = 0;
563
	unsigned char *cmem;
Minchan Kim's avatar
Minchan Kim committed
564
	struct zram_meta *meta = zram->meta;
Minchan Kim's avatar
Minchan Kim committed
565
	unsigned long handle;
Minchan Kim's avatar
Minchan Kim committed
566
	size_t size;
Minchan Kim's avatar
Minchan Kim committed
567

568
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
Minchan Kim's avatar
Minchan Kim committed
569
	handle = meta->table[index].handle;
570
	size = zram_get_obj_size(meta, index);
571

Minchan Kim's avatar
Minchan Kim committed
572
	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
573
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
574
		clear_page(mem);
575 576
		return 0;
	}
577

Minchan Kim's avatar
Minchan Kim committed
578
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
Minchan Kim's avatar
Minchan Kim committed
579
	if (size == PAGE_SIZE)
580
		copy_page(mem, cmem);
581
	else
582
		ret = zcomp_decompress(zram->comp, cmem, size, mem);
Minchan Kim's avatar
Minchan Kim committed
583
	zs_unmap_object(meta->mem_pool, handle);
584
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
585

586
	/* Should NEVER happen. Return bio error if it does. */
587
	if (unlikely(ret)) {
588 589
		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
		return ret;
590
	}
591

592
	return 0;
593 594
}

595
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
596
			  u32 index, int offset)
597 598
{
	int ret;
599 600
	struct page *page;
	unsigned char *user_mem, *uncmem = NULL;
Minchan Kim's avatar
Minchan Kim committed
601
	struct zram_meta *meta = zram->meta;
602 603
	page = bvec->bv_page;

604
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
Minchan Kim's avatar
Minchan Kim committed
605 606
	if (unlikely(!meta->table[index].handle) ||
			zram_test_flag(meta, index, ZRAM_ZERO)) {
607
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
608
		handle_zero_page(bvec);
609 610
		return 0;
	}
611
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
612

613 614
	if (is_partial_io(bvec))
		/* Use  a temporary buffer to decompress the page */
615 616 617 618
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);

	user_mem = kmap_atomic(page);
	if (!is_partial_io(bvec))
619 620 621
		uncmem = user_mem;

	if (!uncmem) {
622
		pr_err("Unable to allocate temp memory\n");
623 624 625
		ret = -ENOMEM;
		goto out_cleanup;
	}
626

627
	ret = zram_decompress_page(zram, uncmem, index);
628
	/* Should NEVER happen. Return bio error if it does. */
629
	if (unlikely(ret))
630
		goto out_cleanup;
631

632 633 634 635 636 637 638 639 640 641 642
	if (is_partial_io(bvec))
		memcpy(user_mem + bvec->bv_offset, uncmem + offset,
				bvec->bv_len);

	flush_dcache_page(page);
	ret = 0;
out_cleanup:
	kunmap_atomic(user_mem);
	if (is_partial_io(bvec))
		kfree(uncmem);
	return ret;
643 644 645 646
}

static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
			   int offset)
647
{
648
	int ret = 0;
649
	size_t clen;
650
	unsigned long handle = 0;
651
	struct page *page;
652
	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
Minchan Kim's avatar
Minchan Kim committed
653
	struct zram_meta *meta = zram->meta;
654
	struct zcomp_strm *zstrm = NULL;
Minchan Kim's avatar
Minchan Kim committed
655
	unsigned long alloced_pages;
656

657
	page = bvec->bv_page;
658 659 660 661 662
	if (is_partial_io(bvec)) {
		/*
		 * This is a partial IO. We need to read the full page
		 * before to write the changes.
		 */
663
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
664 665 666 667
		if (!uncmem) {
			ret = -ENOMEM;
			goto out;
		}
668
		ret = zram_decompress_page(zram, uncmem, index);
669
		if (ret)
670 671 672
			goto out;
	}

673
compress_again:
674
	user_mem = kmap_atomic(page);
675
	if (is_partial_io(bvec)) {
676 677
		memcpy(uncmem + offset, user_mem + bvec->bv_offset,
		       bvec->bv_len);
678 679 680
		kunmap_atomic(user_mem);
		user_mem = NULL;
	} else {
681
		uncmem = user_mem;
682
	}
683 684

	if (page_zero_filled(uncmem)) {
685 686
		if (user_mem)
			kunmap_atomic(user_mem);
687
		/* Free memory associated with this sector now. */
688
		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
689
		zram_free_page(zram, index);
Minchan Kim's avatar
Minchan Kim committed
690
		zram_set_flag(meta, index, ZRAM_ZERO);
691
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
692

693
		atomic64_inc(&zram->stats.zero_pages);
694 695
		ret = 0;
		goto out;
696
	}
697

698
	zstrm = zcomp_stream_get(zram->comp);
699
	ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
700 701 702 703 704
	if (!is_partial_io(bvec)) {
		kunmap_atomic(user_mem);
		user_mem = NULL;
		uncmem = NULL;
	}
705

706
	if (unlikely(ret)) {
707
		pr_err("Compression failed! err=%d\n", ret);
708
		goto out;
709
	}
710

711
	src = zstrm->buffer;
712 713
	if (unlikely(clen > max_zpage_size)) {
		clen = PAGE_SIZE;
714 715
		if (is_partial_io(bvec))
			src = uncmem;
716
	}
717

718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
	/*
	 * handle allocation has 2 paths:
	 * a) fast path is executed with preemption disabled (for
	 *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
	 *  since we can't sleep;
	 * b) slow path enables preemption and attempts to allocate
	 *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
	 *  put per-cpu compression stream and, thus, to re-do
	 *  the compression once handle is allocated.
	 *
	 * if we have a 'non-null' handle here then we are coming
	 * from the slow path and handle has already been allocated.
	 */
	if (!handle)
		handle = zs_malloc(meta->mem_pool, clen,
				__GFP_KSWAPD_RECLAIM |
				__GFP_NOWARN |
				__GFP_HIGHMEM);
736
	if (!handle) {
737
		zcomp_stream_put(zram->comp);
738 739
		zstrm = NULL;

740 741
		atomic64_inc(&zram->stats.writestall);

742 743 744 745 746
		handle = zs_malloc(meta->mem_pool, clen,
				GFP_NOIO | __GFP_HIGHMEM);
		if (handle)
			goto compress_again;

747
		pr_err("Error allocating memory for compressed page: %u, size=%zu\n",
748
			index, clen);
749 750
		ret = -ENOMEM;
		goto out;
751
	}
752

Minchan Kim's avatar
Minchan Kim committed
753
	alloced_pages = zs_get_total_pages(meta->mem_pool);
754 755
	update_used_max(zram, alloced_pages);

Minchan Kim's avatar
Minchan Kim committed
756
	if (zram->limit_pages && alloced_pages > zram->limit_pages) {
757 758 759 760 761
		zs_free(meta->mem_pool, handle);
		ret = -ENOMEM;
		goto out;
	}

Minchan Kim's avatar
Minchan Kim committed
762
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
763

764
	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
765
		src = kmap_atomic(page);
766
		copy_page(cmem, src);
767
		kunmap_atomic(src);
768 769 770
	} else {
		memcpy(cmem, src, clen);
	}
771

772
	zcomp_stream_put(zram->comp);
773
	zstrm = NULL;
Minchan Kim's avatar
Minchan Kim committed
774
	zs_unmap_object(meta->mem_pool, handle);
775

776 777 778 779
	/*
	 * Free memory associated with this sector
	 * before overwriting unused sectors.
	 */
780
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
781 782
	zram_free_page(zram, index);

Minchan Kim's avatar
Minchan Kim committed
783
	meta->table[index].handle = handle;
784 785
	zram_set_obj_size(meta, index, clen);
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
786

787
	/* Update stats */
788 789
	atomic64_add(clen, &zram->stats.compr_data_size);
	atomic64_inc(&zram->stats.pages_stored);
790
out:
791
	if (zstrm)
792
		zcomp_stream_put(zram->comp);
793 794
	if (is_partial_io(bvec))
		kfree(uncmem);
795
	return ret;
796 797
}

Joonsoo Kim's avatar
Joonsoo Kim committed
798 799 800 801 802 803 804 805 806
/*
 * zram_bio_discard - handler on discard request
 * @index: physical block index in PAGE_SIZE units
 * @offset: byte offset within physical block
 */
static void zram_bio_discard(struct zram *zram, u32 index,
			     int offset, struct bio *bio)
{
	size_t n = bio->bi_iter.bi_size;
807
	struct zram_meta *meta = zram->meta;
Joonsoo Kim's avatar
Joonsoo Kim committed
808 809 810 811 812 813 814 815 816 817 818 819

	/*
	 * zram manages data in physical block size units. Because logical block
	 * size isn't identical with physical block size on some arch, we
	 * could get a discard request pointing to a specific offset within a
	 * certain physical block.  Although we can handle this request by
	 * reading that physiclal block and decompressing and partially zeroing
	 * and re-compressing and then re-storing it, this isn't reasonable
	 * because our intent with a discard request is to save memory.  So
	 * skipping this logical block is appropriate here.
	 */
	if (offset) {
820
		if (n <= (PAGE_SIZE - offset))
Joonsoo Kim's avatar
Joonsoo Kim committed
821 822
			return;

823
		n -= (PAGE_SIZE - offset);
Joonsoo Kim's avatar
Joonsoo Kim committed
824 825 826 827
		index++;
	}

	while (n >= PAGE_SIZE) {
828
		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
Joonsoo Kim's avatar
Joonsoo Kim committed
829
		zram_free_page(zram, index);
830
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
831
		atomic64_inc(&zram->stats.notify_free);
Joonsoo Kim's avatar
Joonsoo Kim committed
832 833 834 835 836
		index++;
		n -= PAGE_SIZE;
	}
}

837 838
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
			int offset, int rw)
839
{
840
	unsigned long start_time = jiffies;
841 842
	int ret;

843 844
	generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
			&zram->disk->part0);
845

846 847 848 849 850 851
	if (rw == READ) {
		atomic64_inc(&zram->stats.num_reads);
		ret = zram_bvec_read(zram, bvec, index, offset);
	} else {
		atomic64_inc(&zram->stats.num_writes);
		ret = zram_bvec_write(zram, bvec, index, offset);
852
	}
853

854
	generic_end_io_acct(rw, &zram->disk->part0, start_time);
855

856 857 858 859 860
	if (unlikely(ret)) {
		if (rw == READ)
			atomic64_inc(&zram->stats.failed_reads);
		else
			atomic64_inc(&zram->stats.failed_writes);
861
	}
862

863
	return ret;
864 865
}

866
static void __zram_make_request(struct zram *zram, struct bio *bio)
867
{
868
	int offset, rw;
869
	u32 index;
870 871
	struct bio_vec bvec;
	struct bvec_iter iter;
872

873 874 875
	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
	offset = (bio->bi_iter.bi_sector &
		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
876

Joonsoo Kim's avatar
Joonsoo Kim committed
877 878
	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
		zram_bio_discard(zram, index, offset, bio);
879
		bio_endio(bio);
Joonsoo Kim's avatar
Joonsoo Kim committed
880 881 882
		return;
	}

883
	rw = bio_data_dir(bio);
884
	bio_for_each_segment(bvec, bio, iter) {
885 886
		int max_transfer_size = PAGE_SIZE - offset;

887
		if (bvec.bv_len > max_transfer_size) {
888 889 890 891 892 893
			/*
			 * zram_bvec_rw() can only make operation on a single
			 * zram page. Split the bio vector.
			 */
			struct bio_vec bv;

894
			bv.bv_page = bvec.bv_page;
895
			bv.bv_len = max_transfer_size;
896
			bv.bv_offset = bvec.bv_offset;
897

898
			if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0)
899 900
				goto out;

901
			bv.bv_len = bvec.bv_len - max_transfer_size;
902
			bv.bv_offset += max_transfer_size;
903
			if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0)
904 905
				goto out;
		} else
906
			if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0)
907 908
				goto out;

909
		update_position(&index, &offset, &bvec);
910
	}
911

912
	bio_endio(bio);
913
	return;
914 915 916 917 918 919

out:
	bio_io_error(bio);
}

/*
920
 * Handler function for all zram I/O requests.
921
 */
922
static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
923
{
924
	struct zram *zram = queue->queuedata;
925

926
	if (unlikely(!zram_meta_get(zram)))
927
		goto error;