Commit 2dc10ad8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:

 - "genirq: Introduce generic irq migration for cpu hotunplugged" patch
   merged from tip/irq/for-arm to allow the arm64-specific part to be
   upstreamed via the arm64 tree

 - CPU feature detection reworked to cope with heterogeneous systems
   where CPUs may not have exactly the same features.  The features
   reported by the kernel via internal data structures or ELF_HWCAP are
   delayed until all the CPUs are up (and before user space starts)

 - Support for 16KB pages, with the additional bonus of a 36-bit VA
   space, though the latter only depending on EXPERT

 - Implement native {relaxed, acquire, release} atomics for arm64

 - New ASID allocation algorithm which avoids IPI on roll-over, together
   with TLB invalidation optimisations (using local vs global where
   feasible)

 - KASan support for arm64

 - EFI_STUB clean-up and isolation for the kernel proper (required by
   KASan)

 - copy_{to,from,in}_user optimisations (sharing the memcpy template)

 - perf: moving arm64 to the arm32/64 shared PMU framework

 - L1_CACHE_BYTES increased to 128 to accommodate Cavium hardware

 - Support for the contiguous PTE hint on kernel mapping (16 consecutive
   entries may be able to use a single TLB entry)

 - Generic CONFIG_HZ now used on arm64

 - defconfig updates

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (91 commits)
  arm64/efi: fix libstub build under CONFIG_MODVERSIONS
  ARM64: Enable multi-core scheduler support by default
  arm64/efi: move arm64 specific stub C code to libstub
  arm64: page-align sections for DEBUG_RODATA
  arm64: Fix build with CONFIG_ZONE_DMA=n
  arm64: Fix compat register mappings
  arm64: Increase the max granular size
  arm64: remove bogus TASK_SIZE_64 check
  arm64: make Timer Interrupt Frequency selectable
  arm64/mm: use PAGE_ALIGNED instead of IS_ALIGNED
  arm64: cachetype: fix definitions of ICACHEF_* flags
  arm64: cpufeature: declare enable_cpu_capabilities as static
  genirq: Make the cpuhotplug migration code less noisy
  arm64: Constify hwcap name string arrays
  arm64/kvm: Make use of the system wide safe values
  arm64/debug: Make use of the system wide safe value
  arm64: Move FP/ASIMD hwcap handling to common code
  arm64/HWCAP: Use system wide safe values
  arm64/capabilities: Make use of system wide safe value
  arm64: Delay cpu feature capability checks
  ...
parents e627078a f8f8bdc4
......@@ -58,5 +58,3 @@ linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI
--------------------------------------------------------------------------------
linux,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format.
--------------------------------------------------------------------------------
linux,uefi-stub-kern-ver | string | Copy of linux_banner from build.
--------------------------------------------------------------------------------
......@@ -104,7 +104,12 @@ Header notes:
- The flags field (introduced in v3.17) is a little-endian 64-bit field
composed as follows:
Bit 0: Kernel endianness. 1 if BE, 0 if LE.
Bits 1-63: Reserved.
Bit 1-2: Kernel Page size.
0 - Unspecified.
1 - 4K
2 - 16K
3 - 64K
Bits 3-63: Reserved.
- When image_size is zero, a bootloader should attempt to keep as much
memory as possible free for use by the kernel immediately after the
......
......@@ -8,6 +8,8 @@ Required properties:
- compatible : should be one of
"arm,armv8-pmuv3"
"arm.cortex-a57-pmu"
"arm.cortex-a53-pmu"
"arm,cortex-a17-pmu"
"arm,cortex-a15-pmu"
"arm,cortex-a12-pmu"
......
......@@ -9,7 +9,7 @@
| alpha: | TODO |
| arc: | TODO |
| arm: | TODO |
| arm64: | TODO |
| arm64: | ok |
| avr32: | TODO |
| blackfin: | TODO |
| c6x: | TODO |
......
......@@ -823,12 +823,13 @@ F: arch/arm/include/asm/floppy.h
ARM PMU PROFILING AND DEBUGGING
M: Will Deacon <will.deacon@arm.com>
R: Mark Rutland <mark.rutland@arm.com>
S: Maintained
F: arch/arm/kernel/perf_*
F: arch/arm*/kernel/perf_*
F: arch/arm/oprofile/common.c
F: arch/arm/kernel/hw_breakpoint.c
F: arch/arm/include/asm/hw_breakpoint.h
F: arch/arm/include/asm/perf_event.h
F: arch/arm*/kernel/hw_breakpoint.c
F: arch/arm*/include/asm/hw_breakpoint.h
F: arch/arm*/include/asm/perf_event.h
F: drivers/perf/arm_pmu.c
F: include/linux/perf/arm_pmu.h
......
......@@ -48,6 +48,7 @@ config ARM64
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP
select HAVE_ARCH_KGDB
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
......@@ -169,10 +170,12 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36
default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48
default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
source "init/Kconfig"
......@@ -389,25 +392,37 @@ config ARM64_4K_PAGES
help
This feature enables 4KB pages support.
config ARM64_16K_PAGES
bool "16KB"
help
The system will use 16KB pages support. AArch32 emulation
requires applications compiled with 16K (or a multiple of 16K)
aligned segments.
config ARM64_64K_PAGES
bool "64KB"
help
This feature enables 64KB pages support (4KB by default)
allowing only two levels of page tables and faster TLB
look-up. AArch32 emulation is not available when this feature
is enabled.
look-up. AArch32 emulation requires applications compiled
with 64K aligned segments.
endchoice
choice
prompt "Virtual address space size"
default ARM64_VA_BITS_39 if ARM64_4K_PAGES
default ARM64_VA_BITS_47 if ARM64_16K_PAGES
default ARM64_VA_BITS_42 if ARM64_64K_PAGES
help
Allows choosing one of multiple possible virtual address
space sizes. The level of translation table is determined by
a combination of page size and virtual address space size.
config ARM64_VA_BITS_36
bool "36-bit" if EXPERT
depends on ARM64_16K_PAGES
config ARM64_VA_BITS_39
bool "39-bit"
depends on ARM64_4K_PAGES
......@@ -416,6 +431,10 @@ config ARM64_VA_BITS_42
bool "42-bit"
depends on ARM64_64K_PAGES
config ARM64_VA_BITS_47
bool "47-bit"
depends on ARM64_16K_PAGES
config ARM64_VA_BITS_48
bool "48-bit"
......@@ -423,8 +442,10 @@ endchoice
config ARM64_VA_BITS
int
default 36 if ARM64_VA_BITS_36
default 39 if ARM64_VA_BITS_39
default 42 if ARM64_VA_BITS_42
default 47 if ARM64_VA_BITS_47
default 48 if ARM64_VA_BITS_48
config CPU_BIG_ENDIAN
......@@ -454,15 +475,13 @@ config NR_CPUS
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
select GENERIC_IRQ_MIGRATION
help
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.
source kernel/Kconfig.preempt
config HZ
int
default 100
source kernel/Kconfig.hz
config ARCH_HAS_HOLES_MEMORYMODEL
def_bool y if SPARSEMEM
......@@ -481,12 +500,8 @@ config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
config HW_PERF_EVENTS
bool "Enable hardware performance counter support for perf events"
depends on PERF_EVENTS
default y
help
Enable hardware performance counter support for perf events. If
disabled, perf events will use software events only.
def_bool y
depends on ARM_PMU
config SYS_SUPPORTS_HUGETLBFS
def_bool y
......@@ -495,7 +510,7 @@ config ARCH_WANT_GENERAL_HUGETLB
def_bool y
config ARCH_WANT_HUGE_PMD_SHARE
def_bool y if !ARM64_64K_PAGES
def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
def_bool y
......@@ -532,7 +547,25 @@ config XEN
config FORCE_MAX_ZONEORDER
int
default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE)
default "11"
help
The kernel memory allocator divides physically contiguous memory
blocks into "zones", where each zone is a power of two number of
pages. This option selects the largest power of two that the kernel
keeps in the memory allocator. If you need to allocate very large
blocks of physically contiguous memory, then you may need to
increase this value.
This config option is actually maximum order plus one. For example,
a value of 11 means that the largest free memory block is 2^10 pages.
We make sure that we can allocate upto a HugePage size for each configuration.
Hence we have :
MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
4M allocations matching the default size used by generic code.
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
......@@ -707,7 +740,7 @@ source "fs/Kconfig.binfmt"
config COMPAT
bool "Kernel support for 32-bit EL0"
depends on !ARM64_64K_PAGES || EXPERT
depends on ARM64_4K_PAGES || EXPERT
select COMPAT_BINFMT_ELF
select HAVE_UID16
select OLD_SIGSUSPEND3
......@@ -718,9 +751,9 @@ config COMPAT
the user helper functions, VFP support and the ptrace interface are
handled appropriately by the kernel.
If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you
will only be able to execute AArch32 binaries that were compiled with
64k aligned segments.
If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
that you will only be able to execute AArch32 binaries that were compiled
with page size aligned segments.
If you want to execute 32-bit userspace applications, say Y.
......
......@@ -77,7 +77,7 @@ config DEBUG_RODATA
If in doubt, say Y
config DEBUG_ALIGN_RODATA
depends on DEBUG_RODATA && !ARM64_64K_PAGES
depends on DEBUG_RODATA && ARM64_4K_PAGES
bool "Align linker sections up to SECTION_SIZE"
help
If this option is enabled, sections that may potentially be marked as
......
......@@ -55,6 +55,13 @@ else
TEXT_OFFSET := 0x00080000
endif
# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61)
# in 32-bit arithmetic
KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \
(0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
+ (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \
- (1 << (64 - 32 - 3)) )) )
export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/
......
......@@ -91,17 +91,21 @@
};
};
pmu {
compatible = "arm,armv8-pmuv3";
pmu_a57 {
compatible = "arm,cortex-a57-pmu";
interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
interrupt-affinity = <&A57_0>,
<&A57_1>;
};
pmu_a53 {
compatible = "arm,cortex-a53-pmu";
interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
interrupt-affinity = <&A57_0>,
<&A57_1>,
<&A53_0>,
interrupt-affinity = <&A53_0>,
<&A53_1>,
<&A53_2>,
<&A53_3>;
......
......@@ -91,17 +91,21 @@
};
};
pmu {
compatible = "arm,armv8-pmuv3";
pmu_a57 {
compatible = "arm,cortex-a57-pmu";
interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
interrupt-affinity = <&A57_0>,
<&A57_1>;
};
pmu_a53 {
compatible = "arm,cortex-a53-pmu";
interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
interrupt-affinity = <&A57_0>,
<&A57_1>,
<&A53_0>,
interrupt-affinity = <&A53_0>,
<&A53_1>,
<&A53_2>,
<&A53_3>;
......
......@@ -51,6 +51,7 @@ CONFIG_PCI=y
CONFIG_PCI_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_SMP=y
CONFIG_SCHED_MC=y
CONFIG_PREEMPT=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
......@@ -109,6 +110,10 @@ CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_8250_MT6577=y
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_SERIAL_SAMSUNG=y
CONFIG_SERIAL_SAMSUNG_UARTS_4=y
CONFIG_SERIAL_SAMSUNG_UARTS=4
CONFIG_SERIAL_SAMSUNG_CONSOLE=y
CONFIG_SERIAL_MSM=y
CONFIG_SERIAL_MSM_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
......@@ -145,6 +150,10 @@ CONFIG_MMC_ARMMMCI=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SPI=y
CONFIG_MMC_DW=y
CONFIG_MMC_DW_IDMAC=y
CONFIG_MMC_DW_PLTFM=y
CONFIG_MMC_DW_EXYNOS=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
CONFIG_LEDS_SYSCON=y
......
......@@ -193,4 +193,15 @@ lr .req x30 // link register
str \src, [\tmp, :lo12:\sym]
.endm
/*
* Annotate a function as position independent, i.e., safe to be called before
* the kernel virtual mapping is activated.
*/
#define ENDPIPROC(x) \
.globl __pi_##x; \
.type __pi_##x, %function; \
.set __pi_##x, x; \
.size __pi_##x, . - x; \
ENDPROC(x)
#endif /* __ASM_ASSEMBLER_H */
......@@ -55,13 +55,42 @@
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_add_return_acquire atomic_add_return_acquire
#define atomic_add_return_release atomic_add_return_release
#define atomic_add_return atomic_add_return
#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v))
#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v))
#define atomic_inc_return_release(v) atomic_add_return_release(1, (v))
#define atomic_inc_return(v) atomic_add_return(1, (v))
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
#define atomic_sub_return_acquire atomic_sub_return_acquire
#define atomic_sub_return_release atomic_sub_return_release
#define atomic_sub_return atomic_sub_return
#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v))
#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v))
#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v))
#define atomic_dec_return(v) atomic_sub_return(1, (v))
#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new))
#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new))
#define atomic_xchg(v, new) xchg(&((v)->counter), (new))
#define atomic_cmpxchg_relaxed(v, old, new) \
cmpxchg_relaxed(&((v)->counter), (old), (new))
#define atomic_cmpxchg_acquire(v, old, new) \
cmpxchg_acquire(&((v)->counter), (old), (new))
#define atomic_cmpxchg_release(v, old, new) \
cmpxchg_release(&((v)->counter), (old), (new))
#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new))
#define atomic_inc(v) atomic_add(1, (v))
#define atomic_dec(v) atomic_sub(1, (v))
#define atomic_inc_return(v) atomic_add_return(1, (v))
#define atomic_dec_return(v) atomic_sub_return(1, (v))
#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
......@@ -75,13 +104,39 @@
#define ATOMIC64_INIT ATOMIC_INIT
#define atomic64_read atomic_read
#define atomic64_set atomic_set
#define atomic64_add_return_relaxed atomic64_add_return_relaxed
#define atomic64_add_return_acquire atomic64_add_return_acquire
#define atomic64_add_return_release atomic64_add_return_release
#define atomic64_add_return atomic64_add_return
#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v))
#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v))
#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v))
#define atomic64_inc_return(v) atomic64_add_return(1, (v))
#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
#define atomic64_sub_return_acquire atomic64_sub_return_acquire
#define atomic64_sub_return_release atomic64_sub_return_release
#define atomic64_sub_return atomic64_sub_return
#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v))
#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v))
#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
#define atomic64_xchg_relaxed atomic_xchg_relaxed
#define atomic64_xchg_acquire atomic_xchg_acquire
#define atomic64_xchg_release atomic_xchg_release
#define atomic64_xchg atomic_xchg
#define atomic64_cmpxchg_relaxed atomic_cmpxchg_relaxed
#define atomic64_cmpxchg_acquire atomic_cmpxchg_acquire
#define atomic64_cmpxchg_release atomic_cmpxchg_release
#define atomic64_cmpxchg atomic_cmpxchg
#define atomic64_inc(v) atomic64_add(1, (v))
#define atomic64_dec(v) atomic64_sub(1, (v))
#define atomic64_inc_return(v) atomic64_add_return(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0)
#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0)
......
......@@ -55,40 +55,47 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \
} \
__LL_SC_EXPORT(atomic_##op);
#define ATOMIC_OP_RETURN(op, asm_op) \
#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
__LL_SC_INLINE int \
__LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \
__LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \
{ \
unsigned long tmp; \
int result; \
\
asm volatile("// atomic_" #op "_return\n" \
asm volatile("// atomic_" #op "_return" #name "\n" \
" prfm pstl1strm, %2\n" \
"1: ldxr %w0, %2\n" \
"1: ld" #acq "xr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \
" stlxr %w1, %w0, %2\n" \
" cbnz %w1, 1b" \
" st" #rel "xr %w1, %w0, %2\n" \
" cbnz %w1, 1b\n" \
" " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: "Ir" (i) \
: "memory"); \
: cl); \
\
smp_mb(); \
return result; \
} \
__LL_SC_EXPORT(atomic_##op##_return);
__LL_SC_EXPORT(atomic_##op##_return##name);
#define ATOMIC_OPS(...) \
ATOMIC_OP(__VA_ARGS__) \
ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)
#define ATOMIC_OPS(op, asm_op) \
ATOMIC_OP(op, asm_op) \
ATOMIC_OP_RETURN(op, asm_op)
#define ATOMIC_OPS_RLX(...) \
ATOMIC_OPS(__VA_ARGS__) \
ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\
ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\
ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)
ATOMIC_OPS(add, add)
ATOMIC_OPS(sub, sub)
ATOMIC_OPS_RLX(add, add)
ATOMIC_OPS_RLX(sub, sub)
ATOMIC_OP(and, and)
ATOMIC_OP(andnot, bic)
ATOMIC_OP(or, orr)
ATOMIC_OP(xor, eor)
#undef ATOMIC_OPS_RLX
#undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
......@@ -111,40 +118,47 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \
} \
__LL_SC_EXPORT(atomic64_##op);
#define ATOMIC64_OP_RETURN(op, asm_op) \
#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
__LL_SC_INLINE long \
__LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \
__LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \
{ \
long result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "_return\n" \
asm volatile("// atomic64_" #op "_return" #name "\n" \
" prfm pstl1strm, %2\n" \
"1: ldxr %0, %2\n" \
"1: ld" #acq "xr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \
" stlxr %w1, %0, %2\n" \
" cbnz %w1, 1b" \
" st" #rel "xr %w1, %0, %2\n" \
" cbnz %w1, 1b\n" \
" " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: "Ir" (i) \
: "memory"); \
: cl); \
\
smp_mb(); \
return result; \
} \
__LL_SC_EXPORT(atomic64_##op##_return);
__LL_SC_EXPORT(atomic64_##op##_return##name);
#define ATOMIC64_OPS(...) \
ATOMIC64_OP(__VA_ARGS__) \
ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__)
#define ATOMIC64_OPS(op, asm_op) \
ATOMIC64_OP(op, asm_op) \
ATOMIC64_OP_RETURN(op, asm_op)
#define ATOMIC64_OPS_RLX(...) \
ATOMIC64_OPS(__VA_ARGS__) \
ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \
ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \
ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__)
ATOMIC64_OPS(add, add)
ATOMIC64_OPS(sub, sub)
ATOMIC64_OPS_RLX(add, add)
ATOMIC64_OPS_RLX(sub, sub)
ATOMIC64_OP(and, and)
ATOMIC64_OP(andnot, bic)
ATOMIC64_OP(or, orr)
ATOMIC64_OP(xor, eor)
#undef ATOMIC64_OPS_RLX
#undef ATOMIC64_OPS
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
......@@ -172,7 +186,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
}
__LL_SC_EXPORT(atomic64_dec_if_positive);
#define __CMPXCHG_CASE(w, sz, name, mb, rel, cl) \
#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \
__LL_SC_INLINE unsigned long \
__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
unsigned long old, \
......@@ -182,7 +196,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
\
asm volatile( \
" prfm pstl1strm, %[v]\n" \
"1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \
"1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
" cbnz %" #w "[tmp], 2f\n" \
" st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \
......@@ -199,14 +213,22 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
} \
__LL_SC_EXPORT(__cmpxchg_case_##name);
__CMPXCHG_CASE(w, b, 1, , , )
__CMPXCHG_CASE(w, h, 2, , , )
__CMPXCHG_CASE(w, , 4, , , )
__CMPXCHG_CASE( , , 8, , , )
__CMPXCHG_CASE(w, b, mb_1, dmb ish, l, "memory")
__CMPXCHG_CASE(w, h, mb_2, dmb ish, l, "memory")
__CMPXCHG_CASE(w, , mb_4, dmb ish, l, "memory")
__CMPXCHG_CASE( , , mb_8, dmb ish, l, "memory")
__CMPXCHG_CASE(w, b, 1, , , , )
__CMPXCHG_CASE(w, h, 2, , , , )
__CMPXCHG_CASE(w, , 4, , , , )
__CMPXCHG_CASE( , , 8, , , , )
__CMPXCHG_CASE(w, b, acq_1, , a, , "memory")
__CMPXCHG_CASE(w, h, acq_2, , a, , "memory")
__CMPXCHG_CASE(w, , acq_4, , a, , "memory")
__CMPXCHG_CASE( , , acq_8, , a, , "memory")
__CMPXCHG_CASE(w, b, rel_1, , , l, "memory")
__CMPXCHG_CASE(w, h, rel_2, , , l, "memory")
__CMPXCHG_CASE(w, , rel_4, , , l, "memory")
__CMPXCHG_CASE( , , rel_8, , , l, "memory")
__CMPXCHG_CASE(w, b, mb_1, dmb ish, , l, "memory")
__CMPXCHG_CASE(w, h, mb_2, dmb ish, , l, "memory")
__CMPXCHG_CASE(w, , mb_4, dmb ish, , l, "memory")
__CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory")
#undef __CMPXCHG_CASE
......
......@@ -75,24 +75,32 @@ static inline void atomic_add(int i, atomic_t *v)
: "x30");
}
static inline int atomic_add_return(int i, atomic_t *v)
{
register int w0 asm ("w0") = i;
register atomic_t *x1 asm ("x1") = v;
#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
static inline int atomic_add_return##name(int i, atomic_t *v) \
{ \
register int w0 asm ("w0") = i; \
register atomic_t *x1 asm ("x1") = v; \
\