zram_drv.c 35.1 KB
Newer Older
1
/*
2
 * Compressed RAM block device
3
 *
4
 * Copyright (C) 2008, 2009, 2010  Nitin Gupta
Minchan Kim's avatar
Minchan Kim committed
5
 *               2012, 2013 Minchan Kim
6 7 8 9 10 11 12 13 14
 *
 * This code is released using a dual license strategy: BSD/GPL
 * You can choose the licence that better fits your requirements.
 *
 * Released under the terms of 3-clause BSD License
 * Released under the terms of GNU General Public License Version 2.0
 *
 */

15
#define KMSG_COMPONENT "zram"
16 17 18 19
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

#include <linux/module.h>
#include <linux/kernel.h>
20
#include <linux/bio.h>
21 22 23 24 25 26
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
27
#include <linux/slab.h>
28 29
#include <linux/string.h>
#include <linux/vmalloc.h>
30
#include <linux/err.h>
31
#include <linux/idr.h>
32
#include <linux/sysfs.h>
33

34
#include "zram_drv.h"
35

36
static DEFINE_IDR(zram_index_idr);
37 38 39
/* idr index must be protected */
static DEFINE_MUTEX(zram_index_mutex);

40
static int zram_major;
41
static const char *default_compressor = "lzo";
42 43

/* Module params (documentation at end) */
44
static unsigned int num_devices = 1;
45

46 47 48 49 50 51 52 53 54
static inline void deprecated_attr_warn(const char *name)
{
	pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
			task_pid_nr(current),
			current->comm,
			name,
			"See zram documentation.");
}

55
#define ZRAM_ATTR_RO(name)						\
56
static ssize_t name##_show(struct device *d,				\
57 58 59
				struct device_attribute *attr, char *b)	\
{									\
	struct zram *zram = dev_to_zram(d);				\
60 61
									\
	deprecated_attr_warn(__stringify(name));			\
62
	return scnprintf(b, PAGE_SIZE, "%llu\n",			\
63 64
		(u64)atomic64_read(&zram->stats.name));			\
}									\
65
static DEVICE_ATTR_RO(name);
66

67
static inline bool init_done(struct zram *zram)
68
{
69
	return zram->disksize;
70 71
}

72 73 74 75 76
static inline struct zram *dev_to_zram(struct device *dev)
{
	return (struct zram *)dev_to_disk(dev)->private_data;
}

77
/* flag operations require table entry bit_spin_lock() being held */
78 79
static int zram_test_flag(struct zram_meta *meta, u32 index,
			enum zram_pageflags flag)
80
{
81 82
	return meta->table[index].value & BIT(flag);
}
83

84 85 86 87 88
static void zram_set_flag(struct zram_meta *meta, u32 index,
			enum zram_pageflags flag)
{
	meta->table[index].value |= BIT(flag);
}
89

90 91 92 93 94
static void zram_clear_flag(struct zram_meta *meta, u32 index,
			enum zram_pageflags flag)
{
	meta->table[index].value &= ~BIT(flag);
}
95

96 97 98
static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
{
	return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
99 100
}

101 102
static void zram_set_obj_size(struct zram_meta *meta,
					u32 index, size_t size)
103
{
104
	unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
105

106 107 108
	meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
}

109
static inline bool is_partial_io(struct bio_vec *bvec)
110 111 112 113 114 115 116
{
	return bvec->bv_len != PAGE_SIZE;
}

/*
 * Check if request is within bounds and aligned on zram logical blocks.
 */
117
static inline bool valid_io_request(struct zram *zram,
118 119 120 121 122 123
		sector_t start, unsigned int size)
{
	u64 end, bound;

	/* unaligned request */
	if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
124
		return false;
125
	if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
126
		return false;
127 128 129 130 131

	end = start + (size >> SECTOR_SHIFT);
	bound = zram->disksize >> SECTOR_SHIFT;
	/* out of range range */
	if (unlikely(start >= bound || end > bound || start > end))
132
		return false;
133 134

	/* I/O request is valid */
135
	return true;
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
}

static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
{
	if (*offset + bvec->bv_len >= PAGE_SIZE)
		(*index)++;
	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}

static inline void update_used_max(struct zram *zram,
					const unsigned long pages)
{
	unsigned long old_max, cur_max;

	old_max = atomic_long_read(&zram->stats.max_used_pages);

	do {
		cur_max = old_max;
		if (pages > cur_max)
			old_max = atomic_long_cmpxchg(
				&zram->stats.max_used_pages, cur_max, pages);
	} while (old_max != cur_max);
}

160
static bool page_zero_filled(void *ptr)
161 162 163 164 165 166 167 168
{
	unsigned int pos;
	unsigned long *page;

	page = (unsigned long *)ptr;

	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
		if (page[pos])
169
			return false;
170 171
	}

172
	return true;
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
}

static void handle_zero_page(struct bio_vec *bvec)
{
	struct page *page = bvec->bv_page;
	void *user_mem;

	user_mem = kmap_atomic(page);
	if (is_partial_io(bvec))
		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
	else
		clear_page(user_mem);
	kunmap_atomic(user_mem);

	flush_dcache_page(page);
188 189 190 191 192
}

static ssize_t initstate_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
193
	u32 val;
194 195
	struct zram *zram = dev_to_zram(dev);

196 197 198
	down_read(&zram->init_lock);
	val = init_done(zram);
	up_read(&zram->init_lock);
199

200
	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
201 202
}

203 204 205 206 207 208 209 210
static ssize_t disksize_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
}

211 212 213 214 215
static ssize_t orig_data_size_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

216
	deprecated_attr_warn("orig_data_size");
217
	return scnprintf(buf, PAGE_SIZE, "%llu\n",
218
		(u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
219 220 221 222 223 224 225 226
}

static ssize_t mem_used_total_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val = 0;
	struct zram *zram = dev_to_zram(dev);

227
	deprecated_attr_warn("mem_used_total");
228
	down_read(&zram->init_lock);
229 230
	if (init_done(zram)) {
		struct zram_meta *meta = zram->meta;
231
		val = zs_get_total_pages(meta->mem_pool);
232
	}
233 234
	up_read(&zram->init_lock);

235
	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
236 237
}

238 239 240 241 242 243
static ssize_t mem_limit_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val;
	struct zram *zram = dev_to_zram(dev);

244
	deprecated_attr_warn("mem_limit");
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
	down_read(&zram->init_lock);
	val = zram->limit_pages;
	up_read(&zram->init_lock);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}

static ssize_t mem_limit_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	u64 limit;
	char *tmp;
	struct zram *zram = dev_to_zram(dev);

	limit = memparse(buf, &tmp);
	if (buf == tmp) /* no chars parsed, invalid input */
		return -EINVAL;

	down_write(&zram->init_lock);
	zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
	up_write(&zram->init_lock);

	return len;
}

Minchan Kim's avatar
Minchan Kim committed
270 271 272 273 274 275
static ssize_t mem_used_max_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val = 0;
	struct zram *zram = dev_to_zram(dev);

276
	deprecated_attr_warn("mem_used_max");
Minchan Kim's avatar
Minchan Kim committed
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
	down_read(&zram->init_lock);
	if (init_done(zram))
		val = atomic_long_read(&zram->stats.max_used_pages);
	up_read(&zram->init_lock);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}

static ssize_t mem_used_max_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	int err;
	unsigned long val;
	struct zram *zram = dev_to_zram(dev);

	err = kstrtoul(buf, 10, &val);
	if (err || val != 0)
		return -EINVAL;

	down_read(&zram->init_lock);
297 298
	if (init_done(zram)) {
		struct zram_meta *meta = zram->meta;
Minchan Kim's avatar
Minchan Kim committed
299 300
		atomic_long_set(&zram->stats.max_used_pages,
				zs_get_total_pages(meta->mem_pool));
301
	}
Minchan Kim's avatar
Minchan Kim committed
302 303 304 305 306
	up_read(&zram->init_lock);

	return len;
}

307 308 309 310 311 312 313 314 315
/*
 * We switched to per-cpu streams and this attr is not needed anymore.
 * However, we will keep it around for some time, because:
 * a) we may revert per-cpu streams in the future
 * b) it's visible to user space and we need to follow our 2 years
 *    retirement rule; but we already have a number of 'soon to be
 *    altered' attrs, so max_comp_streams need to wait for the next
 *    layoff cycle.
 */
316 317 318
static ssize_t max_comp_streams_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
319
	return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
320 321
}

322 323 324
static ssize_t max_comp_streams_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
325
	return len;
326 327
}

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
static ssize_t comp_algorithm_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	size_t sz;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
	sz = zcomp_available_show(zram->compressor, buf);
	up_read(&zram->init_lock);

	return sz;
}

static ssize_t comp_algorithm_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	struct zram *zram = dev_to_zram(dev);
345 346
	size_t sz;

347 348 349
	if (!zcomp_available_algorithm(buf))
		return -EINVAL;

350 351 352 353 354 355 356
	down_write(&zram->init_lock);
	if (init_done(zram)) {
		up_write(&zram->init_lock);
		pr_info("Can't change algorithm for initialized device\n");
		return -EBUSY;
	}
	strlcpy(zram->compressor, buf, sizeof(zram->compressor));
357 358 359 360 361 362

	/* ignore trailing newline */
	sz = strlen(zram->compressor);
	if (sz > 0 && zram->compressor[sz - 1] == '\n')
		zram->compressor[sz - 1] = 0x00;

363 364 365 366
	up_write(&zram->init_lock);
	return len;
}

367 368
static ssize_t compact_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
369
{
370 371
	struct zram *zram = dev_to_zram(dev);
	struct zram_meta *meta;
372

373 374 375 376 377
	down_read(&zram->init_lock);
	if (!init_done(zram)) {
		up_read(&zram->init_lock);
		return -EINVAL;
	}
378

379
	meta = zram->meta;
380
	zs_compact(meta->mem_pool);
381
	up_read(&zram->init_lock);
382

383
	return len;
384 385
}

386 387
static ssize_t io_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
388
{
389 390
	struct zram *zram = dev_to_zram(dev);
	ssize_t ret;
391

392 393 394 395 396 397 398 399
	down_read(&zram->init_lock);
	ret = scnprintf(buf, PAGE_SIZE,
			"%8llu %8llu %8llu %8llu\n",
			(u64)atomic64_read(&zram->stats.failed_reads),
			(u64)atomic64_read(&zram->stats.failed_writes),
			(u64)atomic64_read(&zram->stats.invalid_io),
			(u64)atomic64_read(&zram->stats.notify_free));
	up_read(&zram->init_lock);
400

401
	return ret;
402 403
}

404 405
static ssize_t mm_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
406
{
407
	struct zram *zram = dev_to_zram(dev);
408
	struct zs_pool_stats pool_stats;
409 410 411
	u64 orig_size, mem_used = 0;
	long max_used;
	ssize_t ret;
412

413 414
	memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));

415
	down_read(&zram->init_lock);
416
	if (init_done(zram)) {
417
		mem_used = zs_get_total_pages(zram->meta->mem_pool);
418 419
		zs_pool_stats(zram->meta->mem_pool, &pool_stats);
	}
420

421 422
	orig_size = atomic64_read(&zram->stats.pages_stored);
	max_used = atomic_long_read(&zram->stats.max_used_pages);
423

424
	ret = scnprintf(buf, PAGE_SIZE,
425
			"%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
426 427 428 429 430 431
			orig_size << PAGE_SHIFT,
			(u64)atomic64_read(&zram->stats.compr_data_size),
			mem_used << PAGE_SHIFT,
			zram->limit_pages << PAGE_SHIFT,
			max_used << PAGE_SHIFT,
			(u64)atomic64_read(&zram->stats.zero_pages),
432
			pool_stats.pages_compacted);
433
	up_read(&zram->init_lock);
434

435 436 437
	return ret;
}

438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
static ssize_t debug_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int version = 1;
	struct zram *zram = dev_to_zram(dev);
	ssize_t ret;

	down_read(&zram->init_lock);
	ret = scnprintf(buf, PAGE_SIZE,
			"version: %d\n%8llu\n",
			version,
			(u64)atomic64_read(&zram->stats.writestall));
	up_read(&zram->init_lock);

	return ret;
}

455 456
static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
457
static DEVICE_ATTR_RO(debug_stat);
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
ZRAM_ATTR_RO(num_reads);
ZRAM_ATTR_RO(num_writes);
ZRAM_ATTR_RO(failed_reads);
ZRAM_ATTR_RO(failed_writes);
ZRAM_ATTR_RO(invalid_io);
ZRAM_ATTR_RO(notify_free);
ZRAM_ATTR_RO(zero_pages);
ZRAM_ATTR_RO(compr_data_size);

static inline bool zram_meta_get(struct zram *zram)
{
	if (atomic_inc_not_zero(&zram->refcount))
		return true;
	return false;
}

static inline void zram_meta_put(struct zram *zram)
{
	atomic_dec(&zram->refcount);
}

static void zram_meta_free(struct zram_meta *meta, u64 disksize)
{
	size_t num_pages = disksize >> PAGE_SHIFT;
	size_t index;
483 484 485 486 487 488 489 490 491 492 493

	/* Free all pages that are still in this zram device */
	for (index = 0; index < num_pages; index++) {
		unsigned long handle = meta->table[index].handle;

		if (!handle)
			continue;

		zs_free(meta->mem_pool, handle);
	}

494 495 496 497 498
	zs_destroy_pool(meta->mem_pool);
	vfree(meta->table);
	kfree(meta);
}

499
static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
500 501 502
{
	size_t num_pages;
	struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
503

504
	if (!meta)
505
		return NULL;
506 507 508 509 510

	num_pages = disksize >> PAGE_SHIFT;
	meta->table = vzalloc(num_pages * sizeof(*meta->table));
	if (!meta->table) {
		pr_err("Error allocating zram address table\n");
511
		goto out_error;
512 513
	}

514
	meta->mem_pool = zs_create_pool(pool_name);
515 516
	if (!meta->mem_pool) {
		pr_err("Error creating memory pool\n");
517
		goto out_error;
518 519 520 521
	}

	return meta;

522
out_error:
523 524
	vfree(meta->table);
	kfree(meta);
525
	return NULL;
526 527
}

528 529 530 531 532
/*
 * To protect concurrent access to the same index entry,
 * caller should hold this table index entry's bit_spinlock to
 * indicate this index entry is accessing.
 */
533
static void zram_free_page(struct zram *zram, size_t index)
534
{
Minchan Kim's avatar
Minchan Kim committed
535 536
	struct zram_meta *meta = zram->meta;
	unsigned long handle = meta->table[index].handle;
537

538
	if (unlikely(!handle)) {
539 540 541 542
		/*
		 * No memory is allocated for zero filled pages.
		 * Simply clear zero page flag.
		 */
Minchan Kim's avatar
Minchan Kim committed
543 544
		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
			zram_clear_flag(meta, index, ZRAM_ZERO);
545
			atomic64_dec(&zram->stats.zero_pages);
546 547 548 549
		}
		return;
	}

Minchan Kim's avatar
Minchan Kim committed
550
	zs_free(meta->mem_pool, handle);
551

552 553
	atomic64_sub(zram_get_obj_size(meta, index),
			&zram->stats.compr_data_size);
554
	atomic64_dec(&zram->stats.pages_stored);
555

Minchan Kim's avatar
Minchan Kim committed
556
	meta->table[index].handle = 0;
557
	zram_set_obj_size(meta, index, 0);
558 559
}

560
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
561
{
562
	int ret = 0;
563
	unsigned char *cmem;
Minchan Kim's avatar
Minchan Kim committed
564
	struct zram_meta *meta = zram->meta;
Minchan Kim's avatar
Minchan Kim committed
565
	unsigned long handle;
566
	unsigned int size;
Minchan Kim's avatar
Minchan Kim committed
567

568
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
Minchan Kim's avatar
Minchan Kim committed
569
	handle = meta->table[index].handle;
570
	size = zram_get_obj_size(meta, index);
571

Minchan Kim's avatar
Minchan Kim committed
572
	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
573
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
574
		clear_page(mem);
575 576
		return 0;
	}
577

Minchan Kim's avatar
Minchan Kim committed
578
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
579
	if (size == PAGE_SIZE) {
580
		copy_page(mem, cmem);
581 582 583 584 585 586
	} else {
		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);

		ret = zcomp_decompress(zstrm, cmem, size, mem);
		zcomp_stream_put(zram->comp);
	}
Minchan Kim's avatar
Minchan Kim committed
587
	zs_unmap_object(meta->mem_pool, handle);
588
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
589

590
	/* Should NEVER happen. Return bio error if it does. */
591
	if (unlikely(ret)) {
592 593
		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
		return ret;
594
	}
595

596
	return 0;
597 598
}

599
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
600
			  u32 index, int offset)
601 602
{
	int ret;
603 604
	struct page *page;
	unsigned char *user_mem, *uncmem = NULL;
Minchan Kim's avatar
Minchan Kim committed
605
	struct zram_meta *meta = zram->meta;
606 607
	page = bvec->bv_page;

608
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
Minchan Kim's avatar
Minchan Kim committed
609 610
	if (unlikely(!meta->table[index].handle) ||
			zram_test_flag(meta, index, ZRAM_ZERO)) {
611
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
612
		handle_zero_page(bvec);
613 614
		return 0;
	}
615
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
616

617 618
	if (is_partial_io(bvec))
		/* Use  a temporary buffer to decompress the page */
619 620 621 622
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);

	user_mem = kmap_atomic(page);
	if (!is_partial_io(bvec))
623 624 625
		uncmem = user_mem;

	if (!uncmem) {
626
		pr_err("Unable to allocate temp memory\n");
627 628 629
		ret = -ENOMEM;
		goto out_cleanup;
	}
630

631
	ret = zram_decompress_page(zram, uncmem, index);
632
	/* Should NEVER happen. Return bio error if it does. */
633
	if (unlikely(ret))
634
		goto out_cleanup;
635

636 637 638 639 640 641 642 643 644 645 646
	if (is_partial_io(bvec))
		memcpy(user_mem + bvec->bv_offset, uncmem + offset,
				bvec->bv_len);

	flush_dcache_page(page);
	ret = 0;
out_cleanup:
	kunmap_atomic(user_mem);
	if (is_partial_io(bvec))
		kfree(uncmem);
	return ret;
647 648 649 650
}

static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
			   int offset)
651
{
652
	int ret = 0;
653
	unsigned int clen;
654
	unsigned long handle = 0;
655
	struct page *page;
656
	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
Minchan Kim's avatar
Minchan Kim committed
657
	struct zram_meta *meta = zram->meta;
658
	struct zcomp_strm *zstrm = NULL;
Minchan Kim's avatar
Minchan Kim committed
659
	unsigned long alloced_pages;
660

661
	page = bvec->bv_page;
662 663 664 665 666
	if (is_partial_io(bvec)) {
		/*
		 * This is a partial IO. We need to read the full page
		 * before to write the changes.
		 */
667
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
668 669 670 671
		if (!uncmem) {
			ret = -ENOMEM;
			goto out;
		}
672
		ret = zram_decompress_page(zram, uncmem, index);
673
		if (ret)
674 675 676
			goto out;
	}

677
compress_again:
678
	user_mem = kmap_atomic(page);
679
	if (is_partial_io(bvec)) {
680 681
		memcpy(uncmem + offset, user_mem + bvec->bv_offset,
		       bvec->bv_len);
682 683 684
		kunmap_atomic(user_mem);
		user_mem = NULL;
	} else {
685
		uncmem = user_mem;
686
	}
687 688

	if (page_zero_filled(uncmem)) {
689 690
		if (user_mem)
			kunmap_atomic(user_mem);
691
		/* Free memory associated with this sector now. */
692
		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
693
		zram_free_page(zram, index);
Minchan Kim's avatar
Minchan Kim committed
694
		zram_set_flag(meta, index, ZRAM_ZERO);
695
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
696

697
		atomic64_inc(&zram->stats.zero_pages);
698 699
		ret = 0;
		goto out;
700
	}
701

702
	zstrm = zcomp_stream_get(zram->comp);
703
	ret = zcomp_compress(zstrm, uncmem, &clen);
704 705 706 707 708
	if (!is_partial_io(bvec)) {
		kunmap_atomic(user_mem);
		user_mem = NULL;
		uncmem = NULL;
	}
709

710
	if (unlikely(ret)) {
711
		pr_err("Compression failed! err=%d\n", ret);
712
		goto out;
713
	}
714

715
	src = zstrm->buffer;
716 717
	if (unlikely(clen > max_zpage_size)) {
		clen = PAGE_SIZE;
718 719
		if (is_partial_io(bvec))
			src = uncmem;
720
	}
721

722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
	/*
	 * handle allocation has 2 paths:
	 * a) fast path is executed with preemption disabled (for
	 *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
	 *  since we can't sleep;
	 * b) slow path enables preemption and attempts to allocate
	 *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
	 *  put per-cpu compression stream and, thus, to re-do
	 *  the compression once handle is allocated.
	 *
	 * if we have a 'non-null' handle here then we are coming
	 * from the slow path and handle has already been allocated.
	 */
	if (!handle)
		handle = zs_malloc(meta->mem_pool, clen,
				__GFP_KSWAPD_RECLAIM |
				__GFP_NOWARN |
				__GFP_HIGHMEM);
740
	if (!handle) {
741
		zcomp_stream_put(zram->comp);
742 743
		zstrm = NULL;

744 745
		atomic64_inc(&zram->stats.writestall);

746 747 748 749 750
		handle = zs_malloc(meta->mem_pool, clen,
				GFP_NOIO | __GFP_HIGHMEM);
		if (handle)
			goto compress_again;

751
		pr_err("Error allocating memory for compressed page: %u, size=%u\n",
752
			index, clen);
753 754
		ret = -ENOMEM;
		goto out;
755
	}
756

Minchan Kim's avatar
Minchan Kim committed
757
	alloced_pages = zs_get_total_pages(meta->mem_pool);
758 759
	update_used_max(zram, alloced_pages);

Minchan Kim's avatar
Minchan Kim committed
760
	if (zram->limit_pages && alloced_pages > zram->limit_pages) {
761 762 763 764 765
		zs_free(meta->mem_pool, handle);
		ret = -ENOMEM;
		goto out;
	}

Minchan Kim's avatar
Minchan Kim committed
766
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
767

768
	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
769
		src = kmap_atomic(page);
770
		copy_page(cmem, src);
771
		kunmap_atomic(src);
772 773 774
	} else {
		memcpy(cmem, src, clen);
	}
775

776
	zcomp_stream_put(zram->comp);
777
	zstrm = NULL;
Minchan Kim's avatar
Minchan Kim committed
778
	zs_unmap_object(meta->mem_pool, handle);
779

780 781 782 783
	/*
	 * Free memory associated with this sector
	 * before overwriting unused sectors.
	 */
784
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
785 786
	zram_free_page(zram, index);

Minchan Kim's avatar
Minchan Kim committed
787
	meta->table[index].handle = handle;
788 789
	zram_set_obj_size(meta, index, clen);
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
790

791
	/* Update stats */
792 793
	atomic64_add(clen, &zram->stats.compr_data_size);
	atomic64_inc(&zram->stats.pages_stored);
794
out:
795
	if (zstrm)
796
		zcomp_stream_put(zram->comp);
797 798
	if (is_partial_io(bvec))
		kfree(uncmem);
799
	return ret;
800 801
}

Joonsoo Kim's avatar
Joonsoo Kim committed
802 803 804 805 806 807 808 809 810
/*
 * zram_bio_discard - handler on discard request
 * @index: physical block index in PAGE_SIZE units
 * @offset: byte offset within physical block
 */
static void zram_bio_discard(struct zram *zram, u32 index,
			     int offset, struct bio *bio)
{
	size_t n = bio->bi_iter.bi_size;
811
	struct zram_meta *meta = zram->meta;
Joonsoo Kim's avatar
Joonsoo Kim committed
812 813 814 815 816 817 818 819 820 821 822 823

	/*
	 * zram manages data in physical block size units. Because logical block
	 * size isn't identical with physical block size on some arch, we
	 * could get a discard request pointing to a specific offset within a
	 * certain physical block.  Although we can handle this request by
	 * reading that physiclal block and decompressing and partially zeroing
	 * and re-compressing and then re-storing it, this isn't reasonable
	 * because our intent with a discard request is to save memory.  So
	 * skipping this logical block is appropriate here.
	 */
	if (offset) {
824
		if (n <= (PAGE_SIZE - offset))
Joonsoo Kim's avatar
Joonsoo Kim committed
825 826
			return;

827
		n -= (PAGE_SIZE - offset);
Joonsoo Kim's avatar
Joonsoo Kim committed
828 829 830 831
		index++;
	}

	while (n >= PAGE_SIZE) {
832
		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
Joonsoo Kim's avatar
Joonsoo Kim committed
833
		zram_free_page(zram, index);
834
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
835
		atomic64_inc(&zram->stats.notify_free);
Joonsoo Kim's avatar
Joonsoo Kim committed
836 837 838 839 840
		index++;
		n -= PAGE_SIZE;
	}
}

841 842
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
			int offset, int rw)
843
{
844
	unsigned long start_time = jiffies;
845 846
	int ret;

847 848
	generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
			&zram->disk->part0);
849

850 851 852 853 854 855
	if (rw == READ) {
		atomic64_inc(&zram->stats.num_reads);
		ret = zram_bvec_read(zram, bvec, index, offset);
	} else {
		atomic64_inc(&zram->stats.num_writes);
		ret = zram_bvec_write(zram, bvec, index, offset);
856
	}
857

858
	generic_end_io_acct(rw, &zram->disk->part0, start_time);
859

860 861 862 863 864
	if (unlikely(ret)) {
		if (rw == READ)
			atomic64_inc(&zram->stats.failed_reads);
		else
			atomic64_inc(&zram->stats.failed_writes);
865
	}
866

867
	return ret;
868 869
}

870
static void __zram_make_request(struct zram *zram, struct bio *bio)
871
{
872
	int offset, rw;
873
	u32 index;
874 875
	struct bio_vec bvec;
	struct bvec_iter iter;
876

877 878 879
	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
	offset = (bio->bi_iter.bi_sector &
		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
880

Joonsoo Kim's avatar
Joonsoo Kim committed
881 882
	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
		zram_bio_discard(zram, index, offset, bio);
883
		bio_endio(bio);
Joonsoo Kim's avatar
Joonsoo Kim committed
884 885 886
		return;
	}

887
	rw = bio_data_dir(bio);
888
	bio_for_each_segment(bvec, bio, iter) {
889 890
		int max_transfer_size = PAGE_SIZE - offset;

891
		if (bvec.bv_len > max_transfer_size) {
892 893 894 895 896 897
			/*
			 * zram_bvec_rw() can only make operation on a single
			 * zram page. Split the bio vector.
			 */
			struct bio_vec bv;

898
			bv.bv_page = bvec.bv_page;
899
			bv.bv_len = max_transfer_size;
900
			bv.bv_offset = bvec.bv_offset;
901

902
			if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0)
903 904
				goto out;

905
			bv.bv_len = bvec.bv_len - max_transfer_size;
906
			bv.bv_offset += max_transfer_size;
907
			if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0)
908 909
				goto out;
		} else
910
			if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0)
911 912
				goto out;

913
		update_position(&index, &offset, &bvec);
914
	}
915

916
	bio_endio(bio);
917
	return;
918 919 920 921 922 923

out:
	bio_io_error(bio);
}

/*
924
 * Handler function for all zram I/O requests.
925
 */
926
static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
927
{
928
	struct zram *zram = queue->queuedata;
929

930
	if (unlikely(!zram_meta_get(zram)))
931
		goto error;