swiotlb.c 29.5 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3
/*
 * Dynamic DMA mapping support.
 *
Jan Beulich's avatar
Jan Beulich committed
4
 * This implementation is a fallback for platforms that do not support
Linus Torvalds's avatar
Linus Torvalds committed
5 6 7 8 9 10 11 12 13
 * I/O TLBs (aka DMA address translation hardware).
 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
 * Copyright (C) 2000, 2003 Hewlett-Packard Co
 *	David Mosberger-Tang <davidm@hpl.hp.com>
 *
 * 03/05/07 davidm	Switch from PCI-DMA to generic device DMA API.
 * 00/12/13 davidm	Rename to swiotlb.c and add mark_clean() to avoid
 *			unnecessary i-cache flushing.
14 15 16
 * 04/07/.. ak		Better overflow handling. Assorted fixes.
 * 05/09/10 linville	Add support for syncing ranges, support syncing for
 *			DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
17
 * 08/12/11 beckyb	Add highmem support
Linus Torvalds's avatar
Linus Torvalds committed
18 19
 */

Kees Cook's avatar
Kees Cook committed
20 21
#define pr_fmt(fmt) "software IO TLB: " fmt

Linus Torvalds's avatar
Linus Torvalds committed
22
#include <linux/cache.h>
23
#include <linux/dma-mapping.h>
Linus Torvalds's avatar
Linus Torvalds committed
24
#include <linux/mm.h>
25
#include <linux/export.h>
Linus Torvalds's avatar
Linus Torvalds committed
26 27
#include <linux/spinlock.h>
#include <linux/string.h>
28
#include <linux/swiotlb.h>
29
#include <linux/pfn.h>
Linus Torvalds's avatar
Linus Torvalds committed
30 31
#include <linux/types.h>
#include <linux/ctype.h>
32
#include <linux/highmem.h>
33
#include <linux/gfp.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
34
#include <linux/scatterlist.h>
35
#include <linux/mem_encrypt.h>
Linus Torvalds's avatar
Linus Torvalds committed
36 37 38 39 40 41

#include <asm/io.h>
#include <asm/dma.h>

#include <linux/init.h>
#include <linux/bootmem.h>
42
#include <linux/iommu-helper.h>
Linus Torvalds's avatar
Linus Torvalds committed
43

44
#define CREATE_TRACE_POINTS
45 46
#include <trace/events/swiotlb.h>

Linus Torvalds's avatar
Linus Torvalds committed
47 48 49
#define OFFSET(val,align) ((unsigned long)	\
	                   ( (val) & ( (align) - 1)))

50 51 52 53 54 55 56 57 58
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))

/*
 * Minimum IO TLB size to bother booting with.  Systems with mainly
 * 64bit capable cards will only lightly use the swiotlb.  If we can't
 * allocate a contiguous 1MB, we're probably in trouble anyway.
 */
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)

59
enum swiotlb_force swiotlb_force;
Linus Torvalds's avatar
Linus Torvalds committed
60 61

/*
62 63
 * Used to do a quick range check in swiotlb_tbl_unmap_single and
 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
Linus Torvalds's avatar
Linus Torvalds committed
64 65
 * API.
 */
66
static phys_addr_t io_tlb_start, io_tlb_end;
Linus Torvalds's avatar
Linus Torvalds committed
67 68

/*
69
 * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
Linus Torvalds's avatar
Linus Torvalds committed
70 71 72 73 74 75 76 77 78
 * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
 */
static unsigned long io_tlb_nslabs;

/*
 * When the IOMMU overflows we return a fallback buffer. This sets the size.
 */
static unsigned long io_tlb_overflow = 32*1024;

79
static phys_addr_t io_tlb_overflow_buffer;
Linus Torvalds's avatar
Linus Torvalds committed
80 81 82 83 84 85 86 87

/*
 * This is a free list describing the number of free entries available from
 * each index
 */
static unsigned int *io_tlb_list;
static unsigned int io_tlb_index;

88 89 90 91 92 93
/*
 * Max segment that we can provide which (if pages are contingous) will
 * not be bounced (unless SWIOTLB_FORCE is set).
 */
unsigned int max_segment;

Linus Torvalds's avatar
Linus Torvalds committed
94 95 96 97
/*
 * We need to save away the original address corresponding to a mapped entry
 * for the sync operations.
 */
98
#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
99
static phys_addr_t *io_tlb_orig_addr;
Linus Torvalds's avatar
Linus Torvalds committed
100 101 102 103 104 105

/*
 * Protect the above data structures in the map and unmap calls
 */
static DEFINE_SPINLOCK(io_tlb_lock);

106 107
static int late_alloc;

Linus Torvalds's avatar
Linus Torvalds committed
108 109 110 111
static int __init
setup_io_tlb_npages(char *str)
{
	if (isdigit(*str)) {
112
		io_tlb_nslabs = simple_strtoul(str, &str, 0);
Linus Torvalds's avatar
Linus Torvalds committed
113 114 115 116 117
		/* avoid tail segment of size < IO_TLB_SEGSIZE */
		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
	}
	if (*str == ',')
		++str;
118
	if (!strcmp(str, "force")) {
119
		swiotlb_force = SWIOTLB_FORCE;
120 121 122 123
	} else if (!strcmp(str, "noforce")) {
		swiotlb_force = SWIOTLB_NO_FORCE;
		io_tlb_nslabs = 1;
	}
124

125
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
126
}
127
early_param("swiotlb", setup_io_tlb_npages);
Linus Torvalds's avatar
Linus Torvalds committed
128 129
/* make io_tlb_overflow tunable too? */

130
unsigned long swiotlb_nr_tbl(void)
131 132 133
{
	return io_tlb_nslabs;
}
134
EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
135

136 137 138 139 140 141 142 143 144 145 146 147 148 149
unsigned int swiotlb_max_segment(void)
{
	return max_segment;
}
EXPORT_SYMBOL_GPL(swiotlb_max_segment);

void swiotlb_set_max_segment(unsigned int val)
{
	if (swiotlb_force == SWIOTLB_FORCE)
		max_segment = 1;
	else
		max_segment = rounddown(val, PAGE_SIZE);
}

150 151 152 153 154 155 156 157 158 159 160
/* default to 64MB */
#define IO_TLB_DEFAULT_SIZE (64UL<<20)
unsigned long swiotlb_size_or_default(void)
{
	unsigned long size;

	size = io_tlb_nslabs << IO_TLB_SHIFT;

	return size ? size : (IO_TLB_DEFAULT_SIZE);
}

161 162 163 164 165 166 167 168 169
void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { }

/* For swiotlb, clear memory encryption mask from dma addresses */
static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
				      phys_addr_t address)
{
	return __sme_clr(phys_to_dma(hwdev, address));
}

170
/* Note that this doesn't work with highmem page */
171 172
static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
				      volatile void *address)
173
{
174
	return phys_to_dma(hwdev, virt_to_phys(address));
175 176
}

177 178
static bool no_iotlb_memory;

179
void swiotlb_print_info(void)
180
{
181
	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
182

183
	if (no_iotlb_memory) {
Kees Cook's avatar
Kees Cook committed
184
		pr_warn("No low mem\n");
185 186 187
		return;
	}

Kees Cook's avatar
Kees Cook committed
188
	pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n",
189
	       (unsigned long long)io_tlb_start,
190
	       (unsigned long long)io_tlb_end,
Kees Cook's avatar
Kees Cook committed
191
	       bytes >> 20);
192 193
}

194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
/*
 * Early SWIOTLB allocation may be too early to allow an architecture to
 * perform the desired operations.  This function allows the architecture to
 * call SWIOTLB when the operations are possible.  It needs to be called
 * before the SWIOTLB memory is used.
 */
void __init swiotlb_update_mem_attributes(void)
{
	void *vaddr;
	unsigned long bytes;

	if (no_iotlb_memory || late_alloc)
		return;

	vaddr = phys_to_virt(io_tlb_start);
	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
	swiotlb_set_mem_attributes(vaddr, bytes);
	memset(vaddr, 0, bytes);

	vaddr = phys_to_virt(io_tlb_overflow_buffer);
	bytes = PAGE_ALIGN(io_tlb_overflow);
	swiotlb_set_mem_attributes(vaddr, bytes);
	memset(vaddr, 0, bytes);
}

219
int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
Linus Torvalds's avatar
Linus Torvalds committed
220
{
221
	void *v_overflow_buffer;
Jan Beulich's avatar
Jan Beulich committed
222
	unsigned long i, bytes;
Linus Torvalds's avatar
Linus Torvalds committed
223

224
	bytes = nslabs << IO_TLB_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
225

226
	io_tlb_nslabs = nslabs;
227 228
	io_tlb_start = __pa(tlb);
	io_tlb_end = io_tlb_start + bytes;
Linus Torvalds's avatar
Linus Torvalds committed
229

230 231 232
	/*
	 * Get the overflow emergency buffer
	 */
233
	v_overflow_buffer = memblock_virt_alloc_low_nopanic(
234 235
						PAGE_ALIGN(io_tlb_overflow),
						PAGE_SIZE);
236
	if (!v_overflow_buffer)
237
		return -ENOMEM;
238 239 240

	io_tlb_overflow_buffer = __pa(v_overflow_buffer);

Linus Torvalds's avatar
Linus Torvalds committed
241 242 243 244 245
	/*
	 * Allocate and initialize the free list array.  This array is used
	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
	 * between io_tlb_start and io_tlb_end.
	 */
246 247 248 249 250 251
	io_tlb_list = memblock_virt_alloc(
				PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
				PAGE_SIZE);
	io_tlb_orig_addr = memblock_virt_alloc(
				PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
				PAGE_SIZE);
252 253 254 255 256
	for (i = 0; i < io_tlb_nslabs; i++) {
		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
	}
	io_tlb_index = 0;
Linus Torvalds's avatar
Linus Torvalds committed
257

258 259
	if (verbose)
		swiotlb_print_info();
260

261
	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
262
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
263 264
}

265 266 267 268
/*
 * Statically reserve bounce buffer space and initialize bounce buffer data
 * structures for the software IO TLB used to implement the DMA API.
 */
269 270
void  __init
swiotlb_init(int verbose)
271
{
272
	size_t default_size = IO_TLB_DEFAULT_SIZE;
273
	unsigned char *vstart;
274 275 276 277 278 279 280 281 282
	unsigned long bytes;

	if (!io_tlb_nslabs) {
		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
	}

	bytes = io_tlb_nslabs << IO_TLB_SHIFT;

283
	/* Get IO TLB memory from the low pages */
284
	vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
285 286
	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
		return;
287

288
	if (io_tlb_start)
289 290
		memblock_free_early(io_tlb_start,
				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
Kees Cook's avatar
Kees Cook committed
291
	pr_warn("Cannot allocate buffer");
292
	no_iotlb_memory = true;
Linus Torvalds's avatar
Linus Torvalds committed
293 294
}

295 296 297 298 299 300
/*
 * Systems with larger DMA zones (those that don't support ISA) can
 * initialize the swiotlb later using the slab allocator if needed.
 * This should be just like above, but with some error catching.
 */
int
Jan Beulich's avatar
Jan Beulich committed
301
swiotlb_late_init_with_default_size(size_t default_size)
302
{
303
	unsigned long bytes, req_nslabs = io_tlb_nslabs;
304
	unsigned char *vstart = NULL;
305
	unsigned int order;
306
	int rc = 0;
307 308 309 310 311 312 313 314 315

	if (!io_tlb_nslabs) {
		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
	}

	/*
	 * Get IO TLB memory from the low pages
	 */
Jan Beulich's avatar
Jan Beulich committed
316
	order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
317
	io_tlb_nslabs = SLABS_PER_PAGE << order;
Jan Beulich's avatar
Jan Beulich committed
318
	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
319 320

	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
321 322 323
		vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
						  order);
		if (vstart)
324 325 326 327
			break;
		order--;
	}

328
	if (!vstart) {
329 330 331
		io_tlb_nslabs = req_nslabs;
		return -ENOMEM;
	}
Jan Beulich's avatar
Jan Beulich committed
332
	if (order != get_order(bytes)) {
Kees Cook's avatar
Kees Cook committed
333 334
		pr_warn("only able to allocate %ld MB\n",
			(PAGE_SIZE << order) >> 20);
335 336
		io_tlb_nslabs = SLABS_PER_PAGE << order;
	}
337
	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
338
	if (rc)
339
		free_pages((unsigned long)vstart, order);
340

341 342 343 344 345 346 347
	return rc;
}

int
swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
{
	unsigned long i, bytes;
348
	unsigned char *v_overflow_buffer;
349 350 351 352

	bytes = nslabs << IO_TLB_SHIFT;

	io_tlb_nslabs = nslabs;
353 354
	io_tlb_start = virt_to_phys(tlb);
	io_tlb_end = io_tlb_start + bytes;
355

356
	swiotlb_set_mem_attributes(tlb, bytes);
357
	memset(tlb, 0, bytes);
358

359 360 361 362 363 364 365 366
	/*
	 * Get the overflow emergency buffer
	 */
	v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
						     get_order(io_tlb_overflow));
	if (!v_overflow_buffer)
		goto cleanup2;

367 368
	swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow);
	memset(v_overflow_buffer, 0, io_tlb_overflow);
369 370
	io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);

371 372 373 374 375 376 377 378
	/*
	 * Allocate and initialize the free list array.  This array is used
	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
	 * between io_tlb_start and io_tlb_end.
	 */
	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
	                              get_order(io_tlb_nslabs * sizeof(int)));
	if (!io_tlb_list)
379
		goto cleanup3;
380

381 382 383 384
	io_tlb_orig_addr = (phys_addr_t *)
		__get_free_pages(GFP_KERNEL,
				 get_order(io_tlb_nslabs *
					   sizeof(phys_addr_t)));
385
	if (!io_tlb_orig_addr)
386
		goto cleanup4;
387

388 389 390 391 392
	for (i = 0; i < io_tlb_nslabs; i++) {
		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
	}
	io_tlb_index = 0;
393

394
	swiotlb_print_info();
395

396 397
	late_alloc = 1;

398 399
	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);

400 401 402
	return 0;

cleanup4:
403 404
	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
	                                                 sizeof(int)));
405
	io_tlb_list = NULL;
406 407 408 409
cleanup3:
	free_pages((unsigned long)v_overflow_buffer,
		   get_order(io_tlb_overflow));
	io_tlb_overflow_buffer = 0;
410
cleanup2:
411
	io_tlb_end = 0;
412
	io_tlb_start = 0;
413
	io_tlb_nslabs = 0;
414
	max_segment = 0;
415 416 417
	return -ENOMEM;
}

418 419
void __init swiotlb_free(void)
{
420
	if (!io_tlb_orig_addr)
421 422 423
		return;

	if (late_alloc) {
424
		free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
425 426 427 428 429
			   get_order(io_tlb_overflow));
		free_pages((unsigned long)io_tlb_orig_addr,
			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
								 sizeof(int)));
430
		free_pages((unsigned long)phys_to_virt(io_tlb_start),
431 432
			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
	} else {
433 434 435 436 437 438 439 440
		memblock_free_late(io_tlb_overflow_buffer,
				   PAGE_ALIGN(io_tlb_overflow));
		memblock_free_late(__pa(io_tlb_orig_addr),
				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
		memblock_free_late(__pa(io_tlb_list),
				   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
		memblock_free_late(io_tlb_start,
				   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
441
	}
442
	io_tlb_nslabs = 0;
443
	max_segment = 0;
444 445
}

446
int is_swiotlb_buffer(phys_addr_t paddr)
447
{
448
	return paddr >= io_tlb_start && paddr < io_tlb_end;
449 450
}

451 452 453
/*
 * Bounce: copy the swiotlb buffer back to the original dma location
 */
454 455
static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
			   size_t size, enum dma_data_direction dir)
456
{
457 458
	unsigned long pfn = PFN_DOWN(orig_addr);
	unsigned char *vaddr = phys_to_virt(tlb_addr);
459 460 461

	if (PageHighMem(pfn_to_page(pfn))) {
		/* The buffer does not have a mapping.  Map it in and copy */
462
		unsigned int offset = orig_addr & ~PAGE_MASK;
463 464 465 466 467
		char *buffer;
		unsigned int sz = 0;
		unsigned long flags;

		while (size) {
Becky Bruce's avatar
Becky Bruce committed
468
			sz = min_t(size_t, PAGE_SIZE - offset, size);
469 470

			local_irq_save(flags);
471
			buffer = kmap_atomic(pfn_to_page(pfn));
472
			if (dir == DMA_TO_DEVICE)
473
				memcpy(vaddr, buffer + offset, sz);
474
			else
475
				memcpy(buffer + offset, vaddr, sz);
476
			kunmap_atomic(buffer);
477
			local_irq_restore(flags);
478 479 480

			size -= sz;
			pfn++;
481
			vaddr += sz;
482
			offset = 0;
483
		}
484 485
	} else if (dir == DMA_TO_DEVICE) {
		memcpy(vaddr, phys_to_virt(orig_addr), size);
486
	} else {
487
		memcpy(phys_to_virt(orig_addr), vaddr, size);
488
	}
489 490
}

491 492 493
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
				   dma_addr_t tbl_dma_addr,
				   phys_addr_t orig_addr, size_t size,
494 495
				   enum dma_data_direction dir,
				   unsigned long attrs)
Linus Torvalds's avatar
Linus Torvalds committed
496 497
{
	unsigned long flags;
498
	phys_addr_t tlb_addr;
Linus Torvalds's avatar
Linus Torvalds committed
499 500
	unsigned int nslots, stride, index, wrap;
	int i;
501 502 503 504
	unsigned long mask;
	unsigned long offset_slots;
	unsigned long max_slots;

505 506 507
	if (no_iotlb_memory)
		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");

508 509 510
	if (sme_active())
		pr_warn_once("SME is active and system is using DMA bounce buffers\n");

511 512
	mask = dma_get_seg_boundary(hwdev);

513 514 515
	tbl_dma_addr &= mask;

	offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
516 517 518 519

	/*
 	 * Carefully handle integer overflow which can occur when mask == ~0UL.
 	 */
520 521 522
	max_slots = mask + 1
		    ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
Linus Torvalds's avatar
Linus Torvalds committed
523 524

	/*
525 526
	 * For mappings greater than or equal to a page, we limit the stride
	 * (and hence alignment) to a page size.
Linus Torvalds's avatar
Linus Torvalds committed
527 528
	 */
	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
529
	if (size >= PAGE_SIZE)
Linus Torvalds's avatar
Linus Torvalds committed
530 531 532 533
		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
	else
		stride = 1;

534
	BUG_ON(!nslots);
Linus Torvalds's avatar
Linus Torvalds committed
535 536 537 538 539 540

	/*
	 * Find suitable number of IO TLB entries size that will fit this
	 * request and allocate a buffer from that IO TLB pool.
	 */
	spin_lock_irqsave(&io_tlb_lock, flags);
Andrew Morton's avatar
Andrew Morton committed
541 542 543 544 545 546
	index = ALIGN(io_tlb_index, stride);
	if (index >= io_tlb_nslabs)
		index = 0;
	wrap = index;

	do {
547 548
		while (iommu_is_span_boundary(index, nslots, offset_slots,
					      max_slots)) {
549 550 551
			index += stride;
			if (index >= io_tlb_nslabs)
				index = 0;
Andrew Morton's avatar
Andrew Morton committed
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
			if (index == wrap)
				goto not_found;
		}

		/*
		 * If we find a slot that indicates we have 'nslots' number of
		 * contiguous buffers, we allocate the buffers from that slot
		 * and mark the entries as '0' indicating unavailable.
		 */
		if (io_tlb_list[index] >= nslots) {
			int count = 0;

			for (i = index; i < (int) (index + nslots); i++)
				io_tlb_list[i] = 0;
			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
				io_tlb_list[i] = ++count;
568
			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
Linus Torvalds's avatar
Linus Torvalds committed
569

Andrew Morton's avatar
Andrew Morton committed
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
			/*
			 * Update the indices to avoid searching in the next
			 * round.
			 */
			io_tlb_index = ((index + nslots) < io_tlb_nslabs
					? (index + nslots) : 0);

			goto found;
		}
		index += stride;
		if (index >= io_tlb_nslabs)
			index = 0;
	} while (index != wrap);

not_found:
	spin_unlock_irqrestore(&io_tlb_lock, flags);
586
	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
587
		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
588
	return SWIOTLB_MAP_ERROR;
Andrew Morton's avatar
Andrew Morton committed
589
found:
Linus Torvalds's avatar
Linus Torvalds committed
590 591 592 593 594 595 596
	spin_unlock_irqrestore(&io_tlb_lock, flags);

	/*
	 * Save away the mapping from the original address to the DMA address.
	 * This is needed when we sync the memory.  Then we sync the buffer if
	 * needed.
	 */
597
	for (i = 0; i < nslots; i++)
598
		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
599 600
	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
601
		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
Linus Torvalds's avatar
Linus Torvalds committed
602

603
	return tlb_addr;
Linus Torvalds's avatar
Linus Torvalds committed
604
}
605
EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
Linus Torvalds's avatar
Linus Torvalds committed
606

607 608 609 610
/*
 * Allocates bounce buffer and returns its kernel virtual address.
 */

611 612
static phys_addr_t
map_single(struct device *hwdev, phys_addr_t phys, size_t size,
613
	   enum dma_data_direction dir, unsigned long attrs)
614
{
615 616 617 618 619 620 621
	dma_addr_t start_dma_addr;

	if (swiotlb_force == SWIOTLB_NO_FORCE) {
		dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
				     &phys);
		return SWIOTLB_MAP_ERROR;
	}
622

623
	start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
624 625
	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
				      dir, attrs);
626 627
}

Linus Torvalds's avatar
Linus Torvalds committed
628 629 630
/*
 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
 */
631
void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
632 633
			      size_t size, enum dma_data_direction dir,
			      unsigned long attrs)
Linus Torvalds's avatar
Linus Torvalds committed
634 635 636
{
	unsigned long flags;
	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
637 638
	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
	phys_addr_t orig_addr = io_tlb_orig_addr[index];
Linus Torvalds's avatar
Linus Torvalds committed
639 640 641 642

	/*
	 * First, sync the memory before unmapping the entry
	 */
643
	if (orig_addr != INVALID_PHYS_ADDR &&
644
	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
645
	    ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
646
		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
Linus Torvalds's avatar
Linus Torvalds committed
647 648 649

	/*
	 * Return the buffer to the free list by setting the corresponding
650
	 * entries to indicate the number of contiguous entries available.
Linus Torvalds's avatar
Linus Torvalds committed
651 652 653 654 655 656 657 658 659 660 661
	 * While returning the entries to the free list, we merge the entries
	 * with slots below and above the pool being returned.
	 */
	spin_lock_irqsave(&io_tlb_lock, flags);
	{
		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
			 io_tlb_list[index + nslots] : 0);
		/*
		 * Step 1: return the slots to the free list, merging the
		 * slots with superceeding slots
		 */
662
		for (i = index + nslots - 1; i >= index; i--) {
Linus Torvalds's avatar
Linus Torvalds committed
663
			io_tlb_list[i] = ++count;
664 665
			io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
		}
Linus Torvalds's avatar
Linus Torvalds committed
666 667 668 669 670 671 672 673 674
		/*
		 * Step 2: merge the returned slots with the preceding slots,
		 * if available (non zero)
		 */
		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
			io_tlb_list[i] = ++count;
	}
	spin_unlock_irqrestore(&io_tlb_lock, flags);
}
675
EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
Linus Torvalds's avatar
Linus Torvalds committed
676

677 678 679
void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
			     size_t size, enum dma_data_direction dir,
			     enum dma_sync_target target)
Linus Torvalds's avatar
Linus Torvalds committed
680
{
681 682
	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
	phys_addr_t orig_addr = io_tlb_orig_addr[index];
683

684 685
	if (orig_addr == INVALID_PHYS_ADDR)
		return;
686
	orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
Keir Fraser's avatar
Keir Fraser committed
687

688 689 690
	switch (target) {
	case SYNC_FOR_CPU:
		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
691
			swiotlb_bounce(orig_addr, tlb_addr,
692
				       size, DMA_FROM_DEVICE);
693 694
		else
			BUG_ON(dir != DMA_TO_DEVICE);
695 696 697
		break;
	case SYNC_FOR_DEVICE:
		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
698
			swiotlb_bounce(orig_addr, tlb_addr,
699
				       size, DMA_TO_DEVICE);
700 701
		else
			BUG_ON(dir != DMA_FROM_DEVICE);
702 703
		break;
	default:
Linus Torvalds's avatar
Linus Torvalds committed
704
		BUG();
705
	}
Linus Torvalds's avatar
Linus Torvalds committed
706
}
707
EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
Linus Torvalds's avatar
Linus Torvalds committed
708 709 710

void *
swiotlb_alloc_coherent(struct device *hwdev, size_t size,
Al Viro's avatar
Al Viro committed
711
		       dma_addr_t *dma_handle, gfp_t flags)
Linus Torvalds's avatar
Linus Torvalds committed
712
{
713
	bool warn = !(flags & __GFP_NOWARN);
Jan Beulich's avatar
Jan Beulich committed
714
	dma_addr_t dev_addr;
Linus Torvalds's avatar
Linus Torvalds committed
715 716
	void *ret;
	int order = get_order(size);
717
	u64 dma_mask = DMA_BIT_MASK(32);
718 719 720

	if (hwdev && hwdev->coherent_dma_mask)
		dma_mask = hwdev->coherent_dma_mask;
Linus Torvalds's avatar
Linus Torvalds committed
721

722
	ret = (void *)__get_free_pages(flags, order);
723 724 725 726 727 728 729 730 731
	if (ret) {
		dev_addr = swiotlb_virt_to_bus(hwdev, ret);
		if (dev_addr + size - 1 > dma_mask) {
			/*
			 * The allocated memory isn't reachable by the device.
			 */
			free_pages((unsigned long) ret, order);
			ret = NULL;
		}
Linus Torvalds's avatar
Linus Torvalds committed
732 733 734
	}
	if (!ret) {
		/*
735 736
		 * We are either out of memory or the device can't DMA to
		 * GFP_DMA memory; fall back on map_single(), which
Becky Bruce's avatar
Becky Bruce committed
737
		 * will grab memory from the lowest available address range.
Linus Torvalds's avatar
Linus Torvalds committed
738
		 */
739 740
		phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE,
					       warn ? 0 : DMA_ATTR_NO_WARN);
741
		if (paddr == SWIOTLB_MAP_ERROR)
742
			goto err_warn;
Linus Torvalds's avatar
Linus Torvalds committed
743

744
		ret = phys_to_virt(paddr);
745
		dev_addr = swiotlb_phys_to_dma(hwdev, paddr);
Linus Torvalds's avatar
Linus Torvalds committed
746

747 748 749 750 751
		/* Confirm address can be DMA'd by device */
		if (dev_addr + size - 1 > dma_mask) {
			printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
			       (unsigned long long)dma_mask,
			       (unsigned long long)dev_addr);
752

753 754 755 756
			/*
			 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
			 * The DMA_ATTR_SKIP_CPU_SYNC is optional.
			 */
757
			swiotlb_tbl_unmap_single(hwdev, paddr,
758 759
						 size, DMA_TO_DEVICE,
						 DMA_ATTR_SKIP_CPU_SYNC);
760
			goto err_warn;
761
		}
Linus Torvalds's avatar
Linus Torvalds committed
762
	}
763

Linus Torvalds's avatar
Linus Torvalds committed
764
	*dma_handle = dev_addr;
765 766
	memset(ret, 0, size);

Linus Torvalds's avatar
Linus Torvalds committed
767
	return ret;
768 769

err_warn:
770
	if (warn && printk_ratelimit()) {
Kees Cook's avatar
Kees Cook committed
771
		pr_warn("coherent allocation failed for device %s size=%zu\n",
772 773 774
			dev_name(hwdev), size);
		dump_stack();
	}
775 776

	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
777
}
778
EXPORT_SYMBOL(swiotlb_alloc_coherent);
Linus Torvalds's avatar
Linus Torvalds committed
779 780 781

void
swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
782
		      dma_addr_t dev_addr)
Linus Torvalds's avatar
Linus Torvalds committed
783
{
784
	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
785

786
	WARN_ON(irqs_disabled());
787 788
	if (!is_swiotlb_buffer(paddr))
		free_pages((unsigned long)vaddr, get_order(size));
Linus Torvalds's avatar
Linus Torvalds committed
789
	else
790 791 792 793 794 795
		/*
		 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
		 * DMA_ATTR_SKIP_CPU_SYNC is optional.
		 */
		swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE,
					 DMA_ATTR_SKIP_CPU_SYNC);
Linus Torvalds's avatar
Linus Torvalds committed
796
}
797
EXPORT_SYMBOL(swiotlb_free_coherent);
Linus Torvalds's avatar
Linus Torvalds committed
798 799

static void
800 801
swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
	     int do_panic)
Linus Torvalds's avatar
Linus Torvalds committed
802
{
803 804 805
	if (swiotlb_force == SWIOTLB_NO_FORCE)
		return;

Linus Torvalds's avatar
Linus Torvalds committed
806 807 808
	/*
	 * Ran out of IOMMU space for this operation. This is very bad.
	 * Unfortunately the drivers cannot handle this operation properly.
809
	 * unless they check for dma_mapping_error (most don't)
Linus Torvalds's avatar
Linus Torvalds committed
810 811 812
	 * When the mapping is small enough return a static buffer to limit
	 * the damage, or panic when the transfer is too big.
	 */
813 814
	dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
			    size);
Linus Torvalds's avatar
Linus Torvalds committed
815

816 817 818 819 820 821 822 823 824
	if (size <= io_tlb_overflow || !do_panic)
		return;

	if (dir == DMA_BIDIRECTIONAL)
		panic("DMA: Random memory could be DMA accessed\n");
	if (dir == DMA_FROM_DEVICE)
		panic("DMA: Random memory could be DMA written\n");
	if (dir == DMA_TO_DEVICE)
		panic("DMA: Random memory could be DMA read\n");
Linus Torvalds's avatar
Linus Torvalds committed
825 826 827 828
}

/*
 * Map a single buffer of the indicated size for DMA in streaming mode.  The
829
 * physical address to use is returned.
Linus Torvalds's avatar
Linus Torvalds committed
830 831
 *
 * Once the device is given the dma address, the device owns this memory until
Becky Bruce's avatar
Becky Bruce committed
832
 * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
Linus Torvalds's avatar
Linus Torvalds committed
833
 */
834 835 836
dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
			    unsigned long offset, size_t size,
			    enum dma_data_direction dir,
837
			    unsigned long attrs)
Linus Torvalds's avatar
Linus Torvalds committed
838
{
839
	phys_addr_t map, phys = page_to_phys(page) + offset;
840
	dma_addr_t dev_addr = phys_to_dma(dev, phys);
Linus Torvalds's avatar
Linus Torvalds committed
841

842
	BUG_ON(dir == DMA_NONE);
Linus Torvalds's avatar
Linus Torvalds committed
843
	/*
Becky Bruce's avatar
Becky Bruce committed
844
	 * If the address happens to be in the device's DMA window,
Linus Torvalds's avatar
Linus Torvalds committed
845 846 847
	 * we can safely return the device addr and not worry about bounce
	 * buffering it.
	 */
848
	if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
Linus Torvalds's avatar
Linus Torvalds committed
849 850
		return dev_addr;

851 852
	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);

853
	/* Oh well, have to allocate and map a bounce buffer. */
854
	map = map_single(dev, phys, size, dir, attrs);
855
	if (map == SWIOTLB_MAP_ERROR) {
856
		swiotlb_full(dev, size, dir, 1);
857
		return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
Linus Torvalds's avatar
Linus Torvalds committed
858 859
	}

860
	dev_addr = swiotlb_phys_to_dma(dev, map);
Linus Torvalds's avatar
Linus Torvalds committed
861

862
	/* Ensure that the address returned is DMA'ble */
863 864 865
	if (dma_capable(dev, dev_addr, size))
		return dev_addr;

866 867
	attrs |= DMA_ATTR_SKIP_CPU_SYNC;
	swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
Linus Torvalds's avatar
Linus Torvalds committed
868

869
	return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
Linus Torvalds's avatar
Linus Torvalds committed
870
}
871
EXPORT_SYMBOL_GPL(swiotlb_map_page);
Linus Torvalds's avatar
Linus Torvalds committed
872 873 874

/*
 * Unmap a single streaming mode DMA translation.  The dma_addr and size must
Becky Bruce's avatar
Becky Bruce committed
875
 * match what was provided for in a previous swiotlb_map_page call.  All
Linus Torvalds's avatar
Linus Torvalds committed
876 877 878 879 880
 * other usages are undefined.
 *
 * After this call, reads by the cpu to the buffer are guaranteed to see
 * whatever the device wrote there.
 */
881
static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
882 883
			 size_t size, enum dma_data_direction dir,
			 unsigned long attrs)
Linus Torvalds's avatar
Linus Torvalds committed
884
{
885
	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
Linus Torvalds's avatar
Linus Torvalds committed
886

887
	BUG_ON(dir == DMA_NONE);
888

889
	if (is_swiotlb_buffer(paddr)) {
890
		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
891 892 893 894 895 896
		return;
	}

	if (dir != DMA_FROM_DEVICE)
		return;

897 898 899 900 901 902 903
	/*
	 * phys_to_virt doesn't work with hihgmem page but we could
	 * call dma_mark_clean() with hihgmem page here. However, we
	 * are fine since dma_mark_clean() is null on POWERPC. We can
	 * make dma_mark_clean() take a physical address if necessary.
	 */
	dma_mark_clean(phys_to_virt(paddr), size);
904 905 906 907
}

void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
			size_t size, enum dma_data_direction dir,
908
			unsigned long attrs)
909
{
910
	unmap_single(hwdev, dev_addr, size, dir, attrs);
Linus Torvalds's avatar
Linus Torvalds committed
911
}
912
EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
913

Linus Torvalds's avatar
Linus Torvalds committed
914 915 916 917
/*
 * Make physical memory consistent for a single streaming mode DMA translation
 * after a transfer.
 *
Becky Bruce's avatar
Becky Bruce committed
918
 * If you perform a swiotlb_map_page() but wish to interrogate the buffer
919 920
 * using the cpu, yet do not wish to teardown the dma mapping, you must
 * call this function before doing so.  At the next point you give the dma
Linus Torvalds's avatar
Linus Torvalds committed
921 922 923
 * address back to the card, you must first perform a
 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
 */
Andrew Morton's avatar
Andrew Morton committed
924
static void
925
swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
926 927
		    size_t size, enum dma_data_direction dir,
		    enum dma_sync_target target)
Linus Torvalds's avatar
Linus Torvalds committed
928
{
929
	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
Linus Torvalds's avatar
Linus Torvalds committed
930

931
	BUG_ON(dir == DMA_NONE);
932

933
	if (is_swiotlb_buffer(paddr)) {
934
		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
935 936 937 938 939 940
		return;
	}

	if (dir != DMA_FROM_DEVICE)
		return;

941
	dma_mark_clean(phys_to_virt(paddr), size);
Linus Torvalds's avatar
Linus Torvalds committed
942 943
}

944 945
void
swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
946
			    size_t size, enum dma_data_direction dir)
947
{
948
	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
949
}
950
EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
951

Linus Torvalds's avatar
Linus Torvalds committed
952 953
void
swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
954
			       size_t size, enum dma_data_direction dir)
Linus Torvalds's avatar
Linus Torvalds committed
955
{
956
	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
Linus Torvalds's avatar
Linus Torvalds committed
957
}
958
EXPORT_SYMBOL(swiotlb_sync_single_for_device);
Linus Torvalds's avatar
Linus Torvalds committed
959 960 961

/*
 * Map a set of buffers described by scatterlist in streaming mode for DMA.
Becky Bruce's avatar
Becky Bruce committed
962
 * This is the scatter-gather version of the above swiotlb_map_page