Commit 1cdcf888 authored by Philippe Gerum's avatar Philippe Gerum

cobalt/posix/timer: fix CPU affinity tracking

Timers may have specific CPU affinity requirements in that their
backing clock device might not beat on all available CPUs, but only on
a subset of them.

The CPU affinity of every timer bound to a particular thread has to be
tracked each time such timer is started, so that no core timer is
queued to a per-CPU list which won't receive any event from the
backing clock device.

Such tracking was missing for timerfd and POSIX timers, along with
internal timers running the sporadic scheduling policy.

At this chance, the timer affinity code was cleaned up by folding all
the affinity selection logic into a single call,
i.e. xntimer_set_affinity().
parent 59560645
......@@ -254,15 +254,6 @@ static inline int xnclock_set_time(struct xnclock *clock,
#endif /* !CONFIG_XENO_OPT_EXTCLOCK */
#ifdef CONFIG_SMP
int xnclock_get_default_cpu(struct xnclock *clock, int cpu);
#else
static inline int xnclock_get_default_cpu(struct xnclock *clock, int cpu)
{
return cpu;
}
#endif
static inline xnticks_t xnclock_get_offset(struct xnclock *clock)
{
return clock->wallclock_offset;
......
......@@ -522,14 +522,23 @@ int xntimer_setup_ipi(void);
void xntimer_release_ipi(void);
bool xntimer_set_sched(struct xntimer *timer,
struct xnsched *sched);
void __xntimer_set_affinity(struct xntimer *timer,
struct xnsched *sched);
static inline void xntimer_set_affinity(struct xntimer *timer,
struct xnsched *sched)
{
if (sched != xntimer_sched(timer))
__xntimer_set_affinity(timer, sched);
}
#else /* ! CONFIG_SMP */
static inline void xntimer_migrate(struct xntimer *timer,
struct xnsched *sched)
{ }
{
timer->sched = sched;
}
static inline int xntimer_setup_ipi(void)
{
......@@ -538,10 +547,10 @@ static inline int xntimer_setup_ipi(void)
static inline void xntimer_release_ipi(void) { }
static inline bool xntimer_set_sched(struct xntimer *timer,
struct xnsched *sched)
static inline void xntimer_set_affinity(struct xntimer *timer,
struct xnsched *sched)
{
return true;
xntimer_migrate(timer, sched);
}
#endif /* CONFIG_SMP */
......
......@@ -329,34 +329,6 @@ xnticks_t xnclock_core_read_monotonic(void)
}
EXPORT_SYMBOL_GPL(xnclock_core_read_monotonic);
#ifdef CONFIG_SMP
int xnclock_get_default_cpu(struct xnclock *clock, int cpu)
{
cpumask_t set;
/*
* Check a CPU number against the possible set of CPUs
* receiving events from the underlying clock device. If the
* suggested CPU does not receive events from this device,
* return the first one which does. We also account for the
* dynamic set of real-time CPUs.
*
* A clock device with no percpu semantics causes this routine
* to return CPU0 unconditionally.
*/
if (cpumask_empty(&clock->affinity))
return 0;
cpumask_and(&set, &clock->affinity, &cobalt_cpu_affinity);
if (!cpumask_empty(&set) && !cpumask_test_cpu(cpu, &set))
cpu = cpumask_first(&set);
return cpu;
}
EXPORT_SYMBOL_GPL(xnclock_get_default_cpu);
#endif /* !CONFIG_SMP */
#ifdef CONFIG_XENO_OPT_STATS
static struct xnvfile_directory timerlist_vfroot;
......@@ -657,7 +629,7 @@ int xnclock_register(struct xnclock *clock, const cpumask_t *affinity)
cpumask_and(&clock->affinity, affinity, &xnsched_realtime_cpus);
if (cpumask_empty(&clock->affinity))
return -EINVAL;
} else /* No percpu semantics. */
} else /* Device is global without particular IRQ affinity. */
cpumask_clear(&clock->affinity);
#endif
......@@ -669,8 +641,9 @@ int xnclock_register(struct xnclock *clock, const cpumask_t *affinity)
/*
* POLA: init all timer slots for the new clock, although some
* of them might remain unused depending on the CPU affinity
* of the event source(s). If the clock device has no percpu
* semantics, all timers will be queued to slot #0.
* of the event source(s). If the clock device is global
* without any particular IRQ affinity, all timers will be
* queued to CPU0.
*/
for_each_online_cpu(cpu) {
tmd = xnclock_percpu_timerdata(clock, cpu);
......@@ -732,8 +705,8 @@ EXPORT_SYMBOL_GPL(xnclock_deregister);
* @coretags{coreirq-only, atomic-entry}
*
* @note The current CPU must be part of the real-time affinity set
* unless the clock device has no percpu semantics, otherwise weird
* things may happen.
* unless the clock device has no particular IRQ affinity, otherwise
* weird things may happen.
*/
void xnclock_tick(struct xnclock *clock)
{
......@@ -748,8 +721,9 @@ void xnclock_tick(struct xnclock *clock)
#ifdef CONFIG_SMP
/*
* Some external clock devices may have no percpu semantics,
* in which case all timers are queued to slot #0.
* Some external clock devices may be global without any
* particular IRQ affinity, in which case the associated
* timers will be queued to CPU0.
*/
if (IS_ENABLED(CONFIG_XENO_OPT_EXTCLOCK) &&
clock != &nkclock &&
......
......@@ -326,11 +326,8 @@ static inline int timer_set(struct cobalt_timer *timer, int flags,
/*
* No extension, or operation not handled. Default to plain
* POSIX behavior.
*/
/*
* If the target thread vanished, simply don't start the
* timer.
*
* If the target thread vanished, just don't start the timer.
*/
thread = cobalt_thread_find(timer->target);
if (thread == NULL)
......@@ -338,9 +335,9 @@ static inline int timer_set(struct cobalt_timer *timer, int flags,
/*
* Make the timer affine to the CPU running the thread to be
* signaled.
* signaled if possible.
*/
xntimer_set_sched(&timer->timerbase, thread->threadbase.sched);
xntimer_set_affinity(&timer->timerbase, thread->threadbase.sched);
return __cobalt_timer_setval(&timer->timerbase,
clock_flag(flags, timer->clockid), value);
......
......@@ -119,7 +119,7 @@ timerfd_select(struct rtdm_fd *fd, struct xnselector *selector,
return -ENOMEM;
xnlock_get_irqsave(&nklock, s);
xntimer_set_sched(&tfd->timer, xnsched_current());
xntimer_set_affinity(&tfd->timer, xnthread_current()->sched);
err = xnselect_bind(&tfd->read_select, binding, selector, type,
index, tfd->flags & COBALT_TFD_TICKED);
xnlock_put_irqrestore(&nklock, s);
......@@ -268,7 +268,7 @@ int __cobalt_timerfd_settime(int fd, int flags,
if (ovalue)
__cobalt_timer_getval(&tfd->timer, ovalue);
xntimer_set_sched(&tfd->timer, xnsched_current());
xntimer_set_affinity(&tfd->timer, xnthread_current()->sched);
ret = __cobalt_timer_setval(&tfd->timer,
clock_flag(cflag, tfd->clockid), value);
......
......@@ -227,13 +227,11 @@ static void xnsched_quota_init(struct xnsched *sched)
xntimer_init(&qs->refill_timer,
&nkclock, quota_refill_handler, sched,
XNTIMER_NOBLCK|XNTIMER_IGRAVITY);
xntimer_set_sched(&qs->refill_timer, sched);
xntimer_set_name(&qs->refill_timer, refiller_name);
xntimer_init(&qs->limit_timer,
&nkclock, quota_limit_handler, sched,
XNTIMER_NOBLCK|XNTIMER_IGRAVITY);
xntimer_set_sched(&qs->limit_timer, sched);
xntimer_set_name(&qs->limit_timer, limiter_name);
}
......
......@@ -106,6 +106,7 @@ static void sporadic_schedule_drop(struct xnthread *thread)
* the thread is running, trading cycles at firing time
* against cycles when arming the timer.
*/
xntimer_set_affinity(&pss->drop_timer, thread->sched);
ret = xntimer_start(&pss->drop_timer, now + pss->budget,
XN_INFINITE, XN_ABSOLUTE);
if (ret == -ETIMEDOUT) {
......@@ -141,7 +142,7 @@ retry:
} while(--pss->repl_pending > 0);
if (pss->repl_pending > 0) {
xntimer_set_sched(&pss->repl_timer, thread->sched);
xntimer_set_affinity(&pss->repl_timer, thread->sched);
ret = xntimer_start(&pss->repl_timer, pss->repl_data[r].date,
XN_INFINITE, XN_ABSOLUTE);
if (ret == -ETIMEDOUT)
......@@ -194,6 +195,7 @@ static void sporadic_post_recharge(struct xnthread *thread, xnticks_t budget)
pss->repl_in = (r + 1) % MAX_REPLENISH;
if (pss->repl_pending++ == 0) {
xntimer_set_affinity(&pss->repl_timer, thread->sched);
ret = xntimer_start(&pss->repl_timer, pss->repl_data[r].date,
XN_INFINITE, XN_ABSOLUTE);
/*
......
......@@ -103,7 +103,6 @@ static void xnsched_tp_init(struct xnsched *sched)
INIT_LIST_HEAD(&tp->threads);
xntimer_init(&tp->tf_timer, &nkclock, tp_tick_handler,
sched, XNTIMER_NOBLCK|XNTIMER_IGRAVITY);
xntimer_set_sched(&tp->tf_timer, sched);
xntimer_set_name(&tp->tf_timer, timer_name);
}
......
......@@ -57,23 +57,6 @@ static void timeout_handler(struct xntimer *timer)
xnthread_resume(thread, XNDELAY);
}
static inline void fixup_ptimer_affinity(struct xnthread *thread)
{
#ifdef CONFIG_SMP
struct xntimer *timer = &thread->ptimer;
int cpu;
/*
* The thread a periodic timer is affine to might have been
* migrated to another CPU while passive. Fix this up.
*/
if (thread->sched != timer->sched) {
cpu = xnclock_get_default_cpu(xntimer_clock(timer),
xnsched_cpu(thread->sched));
xntimer_set_sched(timer, xnsched_struct(cpu));
}
#endif
}
static void periodic_handler(struct xntimer *timer)
{
struct xnthread *thread = container_of(timer, struct xnthread, ptimer);
......@@ -84,7 +67,11 @@ static void periodic_handler(struct xntimer *timer)
if (xnthread_test_state(thread, XNDELAY|XNPEND) == XNDELAY)
xnthread_resume(thread, XNDELAY);
fixup_ptimer_affinity(thread);
/*
* The periodic thread might have migrated to another CPU
* while passive, fix the timer affinity if need be.
*/
xntimer_set_affinity(&thread->ptimer, thread->sched);
}
static inline void enlist_new_thread(struct xnthread *thread)
......@@ -912,7 +899,7 @@ void xnthread_suspend(struct xnthread *thread, int mask,
* Don't start the timer for a thread delayed indefinitely.
*/
if (timeout != XN_INFINITE || timeout_mode != XN_RELATIVE) {
xntimer_set_sched(&thread->rtimer, thread->sched);
xntimer_set_affinity(&thread->rtimer, thread->sched);
if (xntimer_start(&thread->rtimer, timeout, XN_INFINITE,
timeout_mode)) {
/* (absolute) timeout value in the past, bail out. */
......@@ -1293,7 +1280,7 @@ EXPORT_SYMBOL_GPL(xnthread_unblock);
int xnthread_set_periodic(struct xnthread *thread, xnticks_t idate,
xntmode_t timeout_mode, xnticks_t period)
{
int ret = 0, cpu;
int ret = 0;
spl_t s;
if (thread == NULL) {
......@@ -1322,16 +1309,7 @@ int xnthread_set_periodic(struct xnthread *thread, xnticks_t idate,
goto unlock_and_exit;
}
/*
* Pin the periodic timer to a proper CPU, by order of
* preference: the CPU the timed thread runs on if possible,
* or the first CPU by logical number which can receive events
* from the clock device backing the timer, among the dynamic
* set of real-time CPUs currently enabled.
*/
cpu = xnclock_get_default_cpu(xntimer_clock(&thread->ptimer),
xnsched_cpu(thread->sched));
xntimer_set_sched(&thread->ptimer, xnsched_struct(cpu));
xntimer_set_affinity(&thread->ptimer, thread->sched);
if (idate == XN_INFINITE)
xntimer_start(&thread->ptimer, period, period, XN_RELATIVE);
......
......@@ -337,7 +337,6 @@ void __xntimer_init(struct xntimer *timer,
int flags)
{
spl_t s __maybe_unused;
int cpu;
#ifdef CONFIG_XENO_OPT_EXTCLOCK
timer->clock = clock;
......@@ -348,19 +347,16 @@ void __xntimer_init(struct xntimer *timer,
timer->status = (XNTIMER_DEQUEUED|(flags & XNTIMER_INIT_MASK));
timer->handler = handler;
timer->interval_ns = 0;
timer->sched = NULL;
/*
* If the CPU the caller is affine to does not receive timer
* events, or no affinity was specified (i.e. sched == NULL),
* assign the timer to the first possible CPU which can
* receive interrupt events from the clock device backing this
* timer.
*
* If the clock device has no percpu semantics,
* xnclock_get_default_cpu() makes the timer always affine to
* CPU0 unconditionally.
* Set the timer affinity, preferably to xnsched_cpu(sched) if
* sched was given, CPU0 otherwise.
*/
cpu = xnclock_get_default_cpu(clock, sched ? xnsched_cpu(sched) : 0);
timer->sched = xnsched_struct(cpu);
if (sched == NULL)
sched = xnsched_struct(0);
xntimer_set_affinity(timer, sched);
#ifdef CONFIG_XENO_OPT_STATS
#ifdef CONFIG_XENO_OPT_EXTCLOCK
......@@ -427,21 +423,6 @@ void __xntimer_switch_tracking(struct xntimer *timer,
#endif /* CONFIG_XENO_OPT_STATS */
static inline void __xntimer_set_clock(struct xntimer *timer,
struct xnclock *newclock)
{
#ifdef CONFIG_SMP
int cpu;
/*
* Make sure the timer lives on a CPU the backing clock device
* ticks on.
*/
cpu = xnclock_get_default_cpu(newclock, xnsched_cpu(timer->sched));
xntimer_migrate(timer, xnsched_struct(cpu));
#endif
__xntimer_switch_tracking(timer, newclock);
}
/**
* @brief Set the reference clock of a timer.
*
......@@ -457,9 +438,15 @@ static inline void __xntimer_set_clock(struct xntimer *timer,
void xntimer_set_clock(struct xntimer *timer,
struct xnclock *newclock)
{
xntimer_stop(timer);
timer->clock = newclock;
__xntimer_set_clock(timer, newclock);
if (timer->clock != newclock) {
xntimer_stop(timer);
timer->clock = newclock;
/*
* Since the timer was stopped, we can wait until it
* is restarted for fixing its CPU affinity.
*/
__xntimer_switch_tracking(timer, newclock);
}
}
#endif /* CONFIG_XENO_OPT_EXTCLOCK */
......@@ -512,13 +499,10 @@ EXPORT_SYMBOL_GPL(xntimer_destroy);
* @coretags{unrestricted, atomic-entry}
*/
void __xntimer_migrate(struct xntimer *timer, struct xnsched *sched)
{ /* nklocked, IRQs off */
{ /* nklocked, IRQs off, sched != timer->sched */
struct xnclock *clock;
xntimerq_t *q;
if (sched == timer->sched)
return;
trace_cobalt_timer_migrate(timer, xnsched_cpu(sched));
/*
......@@ -526,9 +510,6 @@ void __xntimer_migrate(struct xntimer *timer, struct xnsched *sched)
* for which we do not expect any clock events/IRQs from the
* associated clock device. If so, the timer would never fire
* since clock ticks would never happen on that CPU.
*
* A clock device with an empty affinity mask has no percpu
* semantics, which disables the check.
*/
XENO_WARN_ON_SMP(COBALT,
!cpumask_empty(&xntimer_clock(timer)->affinity) &&
......@@ -548,23 +529,57 @@ void __xntimer_migrate(struct xntimer *timer, struct xnsched *sched)
}
EXPORT_SYMBOL_GPL(__xntimer_migrate);
bool xntimer_set_sched(struct xntimer *timer,
struct xnsched *sched)
static inline int get_clock_cpu(struct xnclock *clock, int cpu)
{
/*
* We may deny the request if the target CPU does not receive
* any event from the clock device backing the timer, or the
* clock device has no percpu semantics.
* Check a CPU number against the possible set of CPUs
* receiving events from the underlying clock device. If the
* suggested CPU does not receive events from this device,
* return the first one which does instead.
*
* A global clock device with no particular IRQ affinity may
* tick on any CPU, but timers should always be queued on
* CPU0.
*
* NOTE: we have scheduler slots initialized for all online
* CPUs, we can program and receive clock ticks on any of
* them. So there is no point in restricting the valid CPU set
* to cobalt_cpu_affinity, which specifically refers to the
* set of CPUs which may run real-time threads. Although
* receiving a clock tick for waking up a thread living on a
* remote CPU is not optimal since this involves IPI-signaled
* rescheds, this is still a valid case.
*/
if (cpumask_test_cpu(xnsched_cpu(sched),
&xntimer_clock(timer)->affinity)) {
xntimer_migrate(timer, sched);
return true;
}
if (cpumask_empty(&clock->affinity))
return 0;
if (cpumask_test_cpu(cpu, &clock->affinity))
return cpu;
return cpumask_first(&clock->affinity);
}
void __xntimer_set_affinity(struct xntimer *timer, struct xnsched *sched)
{ /* nklocked, IRQs off */
struct xnclock *clock = xntimer_clock(timer);
int cpu;
return false;
/*
* Figure out which CPU is best suited for managing this
* timer, preferably picking xnsched_cpu(sched) if the ticking
* device moving the timer clock beats on that CPU. Otherwise,
* pick the first CPU from the clock affinity mask if set. If
* not, the timer is backed by a global device with no
* particular IRQ affinity, so it should always be queued to
* CPU0.
*/
cpu = 0;
if (!cpumask_empty(&clock->affinity))
cpu = get_clock_cpu(clock, xnsched_cpu(sched));
xntimer_migrate(timer, xnsched_struct(cpu));
}
EXPORT_SYMBOL_GPL(xntimer_set_sched);
EXPORT_SYMBOL_GPL(__xntimer_set_affinity);
int xntimer_setup_ipi(void)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment