Commit e1ba1c99 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'riscv-for-linus-4.15-rc2_cleanups' of...

Merge tag 'riscv-for-linus-4.15-rc2_cleanups' of git://git.kernel.org/pub/scm/linux/kernel/git/palmer/linux

Pull RISC-V cleanups and ABI fixes from Palmer Dabbelt:
 "This contains a handful of small cleanups that are a result of
  feedback that didn't make it into our original patch set, either
  because the feedback hadn't been given yet, I missed the original
  emails, or we weren't ready to submit the changes yet.

  I've been maintaining the various cleanup patch sets I have as their
  own branches, which I then merged together and signed. Each merge
  commit has a short summary of the changes, and each branch is based on
  your latest tag (4.15-rc1, in this case). If this isn't the right way
  to do this then feel free to suggest something else, but it seems sane
  to me.

  Here's a short summary of the changes, roughly in order of how
  interesting they are.

   - libgcc.h has been moved from include/lib, where it's the only
     member, to include/linux. This is meant to avoid tab completion
     conflicts.

   - VDSO entries for clock_get/gettimeofday/getcpu have been added.
     These are simple syscalls now, but we want to let glibc use them
     from the start so we can make them faster later.

   - A VDSO entry for instruction cache flushing has been added so
     userspace can flush the instruction cache.

   - The VDSO symbol versions for __vdso_cmpxchg{32,64} have been
     removed, as those VDSO entries don't actually exist.

   - __io_writes has been corrected to respect the given type.

   - A new READ_ONCE in arch_spin_is_locked().

   - __test_and_op_bit_ord() is now actually ordered.

   - Various small fixes throughout the tree to enable allmodconfig to
     build cleanly.

   - Removal of some dead code in our atomic support headers.

   - Improvements to various comments in our atomic support headers"

* tag 'riscv-for-linus-4.15-rc2_cleanups' of git://git.kernel.org/pub/scm/linux/kernel/git/palmer/linux: (23 commits)
  RISC-V: __io_writes should respect the length argument
  move libgcc.h to include/linux
  RISC-V: Clean up an unused include
  RISC-V: Allow userspace to flush the instruction cache
  RISC-V: Flush I$ when making a dirty page executable
  RISC-V: Add missing include
  RISC-V: Use define for get_cycles like other architectures
  RISC-V: Provide stub of setup_profiling_timer()
  RISC-V: Export some expected symbols for modules
  RISC-V: move empty_zero_page definition to C and export it
  RISC-V: io.h: type fixes for warnings
  RISC-V: use RISCV_{INT,SHORT} instead of {INT,SHORT} for asm macros
  RISC-V: use generic serial.h
  RISC-V: remove spin_unlock_wait()
  RISC-V: `sfence.vma` orderes the instruction cache
  RISC-V: Add READ_ONCE in arch_spin_is_locked()
  RISC-V: __test_and_op_bit_ord should be strongly ordered
  RISC-V: Remove smb_mb__{before,after}_spinlock()
  RISC-V: Remove __smp_bp__{before,after}_atomic
  RISC-V: Comment on why {,cmp}xchg is ordered how it is
  ...
parents 4b1967c9 3b62de26
......@@ -40,6 +40,7 @@ generic-y += resource.h
generic-y += scatterlist.h
generic-y += sections.h
generic-y += sembuf.h
generic-y += serial.h
generic-y += setup.h
generic-y += shmbuf.h
generic-y += shmparam.h
......
......@@ -58,17 +58,17 @@
#endif
#if (__SIZEOF_INT__ == 4)
#define INT __ASM_STR(.word)
#define SZINT __ASM_STR(4)
#define LGINT __ASM_STR(2)
#define RISCV_INT __ASM_STR(.word)
#define RISCV_SZINT __ASM_STR(4)
#define RISCV_LGINT __ASM_STR(2)
#else
#error "Unexpected __SIZEOF_INT__"
#endif
#if (__SIZEOF_SHORT__ == 2)
#define SHORT __ASM_STR(.half)
#define SZSHORT __ASM_STR(2)
#define LGSHORT __ASM_STR(1)
#define RISCV_SHORT __ASM_STR(.half)
#define RISCV_SZSHORT __ASM_STR(2)
#define RISCV_LGSHORT __ASM_STR(1)
#else
#error "Unexpected __SIZEOF_SHORT__"
#endif
......
......@@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i)
* have the AQ or RL bits set. These don't return anything, so there's only
* one version to worry about.
*/
#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix) \
static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
{ \
__asm__ __volatile__ ( \
"amo" #asm_op "." #asm_type " zero, %1, %0" \
: "+A" (v->counter) \
: "r" (I) \
: "memory"); \
#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
{ \
__asm__ __volatile__ ( \
"amo" #asm_op "." #asm_type " zero, %1, %0" \
: "+A" (v->counter) \
: "r" (I) \
: "memory"); \
}
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_OP (op, asm_op, c_op, I, w, int, )
#define ATOMIC_OPS(op, asm_op, I) \
ATOMIC_OP (op, asm_op, I, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_OP (op, asm_op, c_op, I, w, int, ) \
ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
#define ATOMIC_OPS(op, asm_op, I) \
ATOMIC_OP (op, asm_op, I, w, int, ) \
ATOMIC_OP (op, asm_op, I, d, long, 64)
#endif
ATOMIC_OPS(add, add, +, i)
ATOMIC_OPS(sub, add, +, -i)
ATOMIC_OPS(and, and, &, i)
ATOMIC_OPS( or, or, |, i)
ATOMIC_OPS(xor, xor, ^, i)
ATOMIC_OPS(add, add, i)
ATOMIC_OPS(sub, add, -i)
ATOMIC_OPS(and, and, i)
ATOMIC_OPS( or, or, i)
ATOMIC_OPS(xor, xor, i)
#undef ATOMIC_OP
#undef ATOMIC_OPS
......@@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^, i)
* There's two flavors of these: the arithmatic ops have both fetch and return
* versions, while the logical ops only have fetch versions.
*/
#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \
#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \
static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \
{ \
register c_type ret; \
......@@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, d, long, 64) \
ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
#endif
......@@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl, )
#undef ATOMIC_OPS
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, )
#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64)
#endif
ATOMIC_OPS(and, and, &, i, , _relaxed)
ATOMIC_OPS(and, and, &, i, .aq , _acquire)
ATOMIC_OPS(and, and, &, i, .rl , _release)
ATOMIC_OPS(and, and, &, i, .aqrl, )
ATOMIC_OPS(and, and, i, , _relaxed)
ATOMIC_OPS(and, and, i, .aq , _acquire)
ATOMIC_OPS(and, and, i, .rl , _release)
ATOMIC_OPS(and, and, i, .aqrl, )
ATOMIC_OPS( or, or, |, i, , _relaxed)
ATOMIC_OPS( or, or, |, i, .aq , _acquire)
ATOMIC_OPS( or, or, |, i, .rl , _release)
ATOMIC_OPS( or, or, |, i, .aqrl, )
ATOMIC_OPS( or, or, i, , _relaxed)
ATOMIC_OPS( or, or, i, .aq , _acquire)
ATOMIC_OPS( or, or, i, .rl , _release)
ATOMIC_OPS( or, or, i, .aqrl, )
ATOMIC_OPS(xor, xor, ^, i, , _relaxed)
ATOMIC_OPS(xor, xor, ^, i, .aq , _acquire)
ATOMIC_OPS(xor, xor, ^, i, .rl , _release)
ATOMIC_OPS(xor, xor, ^, i, .aqrl, )
ATOMIC_OPS(xor, xor, i, , _relaxed)
ATOMIC_OPS(xor, xor, i, .aq , _acquire)
ATOMIC_OPS(xor, xor, i, .rl , _release)
ATOMIC_OPS(xor, xor, i, .aqrl, )
#undef ATOMIC_OPS
......@@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add, <, 0)
#undef ATOMIC_OP
#undef ATOMIC_OPS
#define ATOMIC_OP(op, func_op, c_op, I, c_type, prefix) \
#define ATOMIC_OP(op, func_op, I, c_type, prefix) \
static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \
{ \
atomic##prefix##_##func_op(I, v); \
}
#define ATOMIC_FETCH_OP(op, func_op, c_op, I, c_type, prefix) \
#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \
static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \
{ \
return atomic##prefix##_fetch_##func_op(I, v); \
......@@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_OP (op, asm_op, c_op, I, int, ) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \
ATOMIC_OP (op, asm_op, I, int, ) \
ATOMIC_FETCH_OP (op, asm_op, I, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, )
#else
#define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_OP (op, asm_op, c_op, I, int, ) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \
ATOMIC_OP (op, asm_op, I, int, ) \
ATOMIC_FETCH_OP (op, asm_op, I, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \
ATOMIC_OP (op, asm_op, c_op, I, long, 64) \
ATOMIC_FETCH_OP (op, asm_op, c_op, I, long, 64) \
ATOMIC_OP (op, asm_op, I, long, 64) \
ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
#endif
......@@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
/*
* atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
* {cmp,}xchg and the operations that return, so they need a barrier. We just
* use the other implementations directly.
* {cmp,}xchg and the operations that return, so they need a barrier.
*/
/*
* FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by
* assigning the same barrier to both the LR and SC operations, but that might
* not make any sense. We're waiting on a memory model specification to
* determine exactly what the right thing to do is here.
*/
#define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \
static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \
......
......@@ -38,29 +38,6 @@
#define smp_rmb() RISCV_FENCE(r,r)
#define smp_wmb() RISCV_FENCE(w,w)
/*
* These fences exist to enforce ordering around the relaxed AMOs. The
* documentation defines that
* "
* atomic_fetch_add();
* is equivalent to:
* smp_mb__before_atomic();
* atomic_fetch_add_relaxed();
* smp_mb__after_atomic();
* "
* So we emit full fences on both sides.
*/
#define __smb_mb__before_atomic() smp_mb()
#define __smb_mb__after_atomic() smp_mb()
/*
* These barriers prevent accesses performed outside a spinlock from being moved
* inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only
* enforce release consistency, we need full fences here.
*/
#define smb_mb__before_spinlock() smp_mb()
#define smb_mb__after_spinlock() smp_mb()
#include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */
......
......@@ -67,7 +67,7 @@
: "memory");
#define __test_and_op_bit(op, mod, nr, addr) \
__test_and_op_bit_ord(op, mod, nr, addr, )
__test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
#define __op_bit(op, mod, nr, addr) \
__op_bit_ord(op, mod, nr, addr, )
......
......@@ -27,8 +27,8 @@
typedef u32 bug_insn_t;
#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
#define __BUG_ENTRY_ADDR INT " 1b - 2b"
#define __BUG_ENTRY_FILE INT " %0 - 2b"
#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b"
#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b"
#else
#define __BUG_ENTRY_ADDR RISCV_PTR " 1b"
#define __BUG_ENTRY_FILE RISCV_PTR " %0"
......@@ -38,7 +38,7 @@ typedef u32 bug_insn_t;
#define __BUG_ENTRY \
__BUG_ENTRY_ADDR "\n\t" \
__BUG_ENTRY_FILE "\n\t" \
SHORT " %1"
RISCV_SHORT " %1"
#else
#define __BUG_ENTRY \
__BUG_ENTRY_ADDR
......
......@@ -18,22 +18,44 @@
#undef flush_icache_range
#undef flush_icache_user_range
#undef flush_dcache_page
static inline void local_flush_icache_all(void)
{
asm volatile ("fence.i" ::: "memory");
}
#define PG_dcache_clean PG_arch_1
static inline void flush_dcache_page(struct page *page)
{
if (test_bit(PG_dcache_clean, &page->flags))
clear_bit(PG_dcache_clean, &page->flags);
}
/*
* RISC-V doesn't have an instruction to flush parts of the instruction cache,
* so instead we just flush the whole thing.
*/
#define flush_icache_range(start, end) flush_icache_all()
#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all()
#ifndef CONFIG_SMP
#define flush_icache_range(start, end) local_flush_icache_all()
#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all()
#define flush_icache_all() local_flush_icache_all()
#define flush_icache_mm(mm, local) flush_icache_all()
#else /* CONFIG_SMP */
#define flush_icache_range(start, end) sbi_remote_fence_i(0)
#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0)
#define flush_icache_all() sbi_remote_fence_i(0)
void flush_icache_mm(struct mm_struct *mm, bool local);
#endif /* CONFIG_SMP */
/*
* Bits in sys_riscv_flush_icache()'s flags argument.
*/
#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL)
#endif /* _ASM_RISCV_CACHEFLUSH_H */
......@@ -19,6 +19,8 @@
#ifndef _ASM_RISCV_IO_H
#define _ASM_RISCV_IO_H
#include <linux/types.h>
#ifdef CONFIG_MMU
extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
......@@ -32,7 +34,7 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
#define ioremap_wc(addr, size) ioremap((addr), (size))
#define ioremap_wt(addr, size) ioremap((addr), (size))
extern void iounmap(void __iomem *addr);
extern void iounmap(volatile void __iomem *addr);
#endif /* CONFIG_MMU */
......@@ -250,7 +252,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
const ctype *buf = buffer; \
\
do { \
__raw_writeq(*buf++, addr); \
__raw_write ## len(*buf++, addr); \
} while (--count); \
} \
afence; \
......@@ -266,9 +268,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar())
__io_reads_ins(ins, u8, b, __io_pbr(), __io_par())
__io_reads_ins(ins, u16, w, __io_pbr(), __io_par())
__io_reads_ins(ins, u32, l, __io_pbr(), __io_par())
#define insb(addr, buffer, count) __insb((void __iomem *)addr, buffer, count)
#define insw(addr, buffer, count) __insw((void __iomem *)addr, buffer, count)
#define insl(addr, buffer, count) __insl((void __iomem *)addr, buffer, count)
#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count)
#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count)
#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count)
__io_writes_outs(writes, u8, b, __io_bw(), __io_aw())
__io_writes_outs(writes, u16, w, __io_bw(), __io_aw())
......@@ -280,9 +282,9 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw())
__io_writes_outs(outs, u8, b, __io_pbw(), __io_paw())
__io_writes_outs(outs, u16, w, __io_pbw(), __io_paw())
__io_writes_outs(outs, u32, l, __io_pbw(), __io_paw())
#define outsb(addr, buffer, count) __outsb((void __iomem *)addr, buffer, count)
#define outsw(addr, buffer, count) __outsw((void __iomem *)addr, buffer, count)
#define outsl(addr, buffer, count) __outsl((void __iomem *)addr, buffer, count)
#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count)
#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count)
#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count)
#ifdef CONFIG_64BIT
__io_reads_ins(reads, u64, q, __io_br(), __io_ar())
......
......@@ -19,6 +19,10 @@
typedef struct {
void *vdso;
#ifdef CONFIG_SMP
/* A local icache flush is needed before user execution can resume. */
cpumask_t icache_stale_mask;
#endif
} mm_context_t;
#endif /* __ASSEMBLY__ */
......
/*
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
......@@ -14,11 +15,13 @@
#ifndef _ASM_RISCV_MMU_CONTEXT_H
#define _ASM_RISCV_MMU_CONTEXT_H
#include <linux/mm_types.h>
#include <asm-generic/mm_hooks.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
static inline void enter_lazy_tlb(struct mm_struct *mm,
struct task_struct *task)
......@@ -46,12 +49,54 @@ static inline void set_pgdir(pgd_t *pgd)
csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE);
}
/*
* When necessary, performs a deferred icache flush for the given MM context,
* on the local CPU. RISC-V has no direct mechanism for instruction cache
* shoot downs, so instead we send an IPI that informs the remote harts they
* need to flush their local instruction caches. To avoid pathologically slow
* behavior in a common case (a bunch of single-hart processes on a many-hart
* machine, ie 'make -j') we avoid the IPIs for harts that are not currently
* executing a MM context and instead schedule a deferred local instruction
* cache flush to be performed before execution resumes on each hart. This
* actually performs that local instruction cache flush, which implicitly only
* refers to the current hart.
*/
static inline void flush_icache_deferred(struct mm_struct *mm)
{
#ifdef CONFIG_SMP
unsigned int cpu = smp_processor_id();
cpumask_t *mask = &mm->context.icache_stale_mask;
if (cpumask_test_cpu(cpu, mask)) {
cpumask_clear_cpu(cpu, mask);
/*
* Ensure the remote hart's writes are visible to this hart.
* This pairs with a barrier in flush_icache_mm.
*/
smp_mb();
local_flush_icache_all();
}
#endif
}
static inline void switch_mm(struct mm_struct *prev,
struct mm_struct *next, struct task_struct *task)
{
if (likely(prev != next)) {
/*
* Mark the current MM context as inactive, and the next as
* active. This is at least used by the icache flushing
* routines in order to determine who should
*/
unsigned int cpu = smp_processor_id();
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
set_pgdir(next->pgd);
local_flush_tlb_all();
flush_icache_deferred(next);
}
}
......
......@@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr)
#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr))
#define pte_unmap(pte) ((void)(pte))
/*
* Certain architectures need to do special things when PTEs within
* a page table are directly modified. Thus, the following hook is
* made available.
*/
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
}
static inline void set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
set_pte(ptep, pteval);
}
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
set_pte_at(mm, addr, ptep, __pte(0));
}
static inline int pte_present(pte_t pte)
{
return (pte_val(pte) & _PAGE_PRESENT);
......@@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte)
return (pte_val(pte) == 0);
}
/* static inline int pte_read(pte_t pte) */
static inline int pte_write(pte_t pte)
{
return pte_val(pte) & _PAGE_WRITE;
}
static inline int pte_exec(pte_t pte)
{
return pte_val(pte) & _PAGE_EXEC;
}
static inline int pte_huge(pte_t pte)
{
return pte_present(pte)
&& (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
}
/* static inline int pte_exec(pte_t pte) */
static inline int pte_dirty(pte_t pte)
{
return pte_val(pte) & _PAGE_DIRTY;
......@@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
return pte_val(pte_a) == pte_val(pte_b);
}
/*
* Certain architectures need to do special things when PTEs within
* a page table are directly modified. Thus, the following hook is
* made available.
*/
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
}
void flush_icache_pte(pte_t pte);
static inline void set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
if (pte_present(pteval) && pte_exec(pteval))
flush_icache_pte(pteval);
set_pte(ptep, pteval);
}
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
set_pte_at(mm, addr, ptep, __pte(0));
}
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
......
......@@ -24,7 +24,7 @@
/* FIXME: Replace this with a ticket lock, like MIPS. */
#define arch_spin_is_locked(x) ((x)->lock != 0)
#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
......@@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
}
}
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
smp_rmb();
do {
cpu_relax();
} while (arch_spin_is_locked(lock));
smp_acquire__after_ctrl_dep();
}
/***********************************************************/
static inline void arch_read_lock(arch_rwlock_t *lock)
......
......@@ -18,7 +18,7 @@
typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void)
static inline cycles_t get_cycles_inline(void)
{
cycles_t n;
......@@ -27,6 +27,7 @@ static inline cycles_t get_cycles(void)
: "=r" (n));
return n;
}
#define get_cycles get_cycles_inline
#ifdef CONFIG_64BIT
static inline uint64_t get_cycles64(void)
......
......@@ -17,7 +17,12 @@
#ifdef CONFIG_MMU
/* Flush entire local TLB */
#include <linux/mm_types.h>
/*
* Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
* cache as well, so a 'fence.i' is not necessary.
*/
static inline void local_flush_tlb_all(void)
{
__asm__ __volatile__ ("sfence.vma" : : : "memory");
......
/*
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
#define _ASM_RISCV_VDSO_SYSCALLS_H
#ifdef CONFIG_SMP
/* These syscalls are only used by the vDSO and are not in the uapi. */
#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
#endif
#endif /* _ASM_RISCV_VDSO_H */
......@@ -38,4 +38,8 @@ struct vdso_data {
(void __user *)((unsigned long)(base) + __vdso_##name); \
})
#ifdef CONFIG_SMP
asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
#endif
#endif /* _ASM_RISCV_VDSO_H */
......@@ -152,6 +152,3 @@ END(_start)
__PAGE_ALIGNED_BSS
/* Empty zero page */
.balign PAGE_SIZE
ENTRY(empty_zero_page)
.fill (empty_zero_page + PAGE_SIZE) - ., 1, 0x00
END(empty_zero_page)
......@@ -12,4 +12,7 @@
/*
* Assembly functions that may be used (directly or indirectly) by modules
*/
EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(__copy_user);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
......@@ -58,7 +58,12 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
#endif /* CONFIG_CMDLINE_BOOL */
unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
/* The lucky hart to first increment this variable will boot the other cores */
atomic_t hart_lottery;
......
......@@ -38,6 +38,13 @@ enum ipi_message_type {
IPI_MAX
};
/* Unsupported */
int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
irqreturn_t handle_ipi(void)
{
unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
......@@ -108,3 +115,51 @@ void smp_send_reschedule(int cpu)
{
send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
}
/*
* Performs an icache flush for the given MM context. RISC-V has no direct
* mechanism for instruction cache shoot downs, so instead we send an IPI that
* informs the remote harts they need to flush their local instruction caches.
* To avoid pathologically slow behavior in a common case (a bunch of
* single-hart processes on a many-hart machine, ie 'make -j') we avoid the
* IPIs for harts that are not currently executing a MM context and instead
* schedule a deferred local instruction cache flush to be performed before
* execution resumes on each hart.
*/
void flush_icache_mm(struct mm_struct *mm, bool local)
{
unsigned int cpu;
cpumask_t others, *mask;
preempt_disable();
/* Mark every hart's icache as needing a flush for this MM. */
mask = &mm->context.icache_stale_mask;
cpumask_setall(mask);
/* Flush this hart's I$ now, and mark it as flushed. */
cpu = smp_processor_id();
cpumask_clear_cpu(cpu, mask);
local_flush_icache_all();
/*
* Flush the I$ of other harts concurrently executing, and mark them as
* flushed.
*/
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
local |= cpumask_empty(&others);
if (mm != current->active_mm || !local)
sbi_remote_fence_i(others.bits);
else {
/*
* It's assumed that at least one strongly ordered operation is
* performed on this hart between setting a hart's cpumask bit
* and scheduling this MM context on that hart. Sending an SBI
* remote message will do this, but in the case where no
* messages are sent we still need to order this hart's writes
* with flush_icache_deferred().
*/
smp_mb();
}
preempt_enable();