Commit b9f77b00 authored by Philippe Gerum's avatar Philippe Gerum

ipipe: add cpuidle control interface

Add a kernel interface for sharing CPU idling control between the host
kernel and a co-kernel. The former invokes ipipe_cpuidle_control()
which the latter should implement, for determining whether entering a
sleep state is ok. This hook should return boolean true if so.

The co-kernel may veto such entry if need be, in order to prevent
latency spikes, as exiting sleep states might be costly depending on
the CPU idling operation being used.
parent 54bedcd0
......@@ -718,29 +718,39 @@ The following kernel areas are involved in interrupt pipelining:
* CPUIDLE support
Interrupt pipelining introduces an interesting corner case in the
logic of the CPU idle framework: the kernel might be idle in the
sense that no in-band activity is scheduled yet, and at the same
time, some out-of-band code might wait for a tick event already
The logic of the CPUIDLE framework has to account for those
specific issues the interrupt pipelining introduces:
- the kernel might be idle in the sense that no in-band activity
is scheduled yet, and planning to shut down the timer device
suffering the C3STOP (mis)feature. However, at the same time,
some out-of-band code might wait for a tick event already
programmed in the timer hardware controlled by some out-of-band
code via the timer_ interposition mechanism.
In that situation, we don't want the CPUIDLE logic to turn off the
hardware timer, causing the pending out-of-band event to be
lost. Since the in-band kernel code does not know about the
out-of-band context plans in essence, CPUIDLE calls
:c:func:`ipipe_enter_idle_hook` to figure out whether the
out-of-band system is fine with entering the idle state as well.
Conversely, the CPUIDLE logic invokes :c:func:`ipipe_exit_idle_hook`
to inform the out-of-band code when the idle state ends. Both
routines should be overriden by the out-of-band code for receiving
these notifications (*__weak* binding).
If :c:func:`ipipe_enter_idle_hook` returns a boolean *true* value,
CPUIDLE proceeds as normally and may turn off the per-CPU timer
hardware if the *C3STOP* misfeature is detected there. Otherwise,
the CPU is simply denied from entering the idle state, leaving the
timer hardware enabled.
- switching the CPU to a power saving state may incur a
significant latency, particularly for waking it up before it can
handle an incoming IRQ, which is at odds with the purpose of
interrupt pipelining.
Obviously, we don't want the CPUIDLE logic to turn off the
hardware timer when C3STOP is in effect for the timer device,
which would cause the pending out-of-band event to be
lost.
Likewise, the wake up latency induced by entering a sleep state on
a particular hardware may not always be acceptable.
Since the in-band kernel code does not know about the out-of-band
code plans by design, CPUIDLE calls :c:func:`ipipe_cpuidle_control`
to figure out whether the out-of-band system is fine with entering
the idle state as well. This routine should be overriden by the
out-of-band code for receiving such notification (*__weak*
binding).
If this hook returns a boolean *true* value, CPUIDLE proceeds as
normally. Otherwise, the CPU is simply denied from entering the
idle state, leaving the timer hardware enabled.
* Kernel preemption control (PREEMPT)
......
......@@ -17,6 +17,7 @@
#include <linux/pm_qos.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/ipipe.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/module.h>
......@@ -204,6 +205,15 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
ktime_t time_start, time_end;
s64 diff;
/*
* A co-kernel running on the head stage of the IRQ pipeline
* may deny this switch.
*/
if (!ipipe_enter_cpuidle(dev, target_state)) {
ipipe_exit_cpuidle();
return -EBUSY;
}
/*
* Tell the time framework to switch to a broadcast timer because our
* local timer will be shut down. If a local timer is used from another
......@@ -264,6 +274,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
dev->last_residency = 0;
}
ipipe_exit_cpuidle();
return entered_state;
}
......
......@@ -34,6 +34,9 @@
#include <asm/ipipe.h>
#endif
struct cpuidle_device;
struct cpuidle_state;
#ifdef CONFIG_IPIPE
#include <linux/ipipe_domain.h>
......@@ -426,6 +429,13 @@ void __ipipe_tracer_hrclock_initialized(void);
} while (0)
#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
bool __ipipe_enter_cpuidle(void);
bool ipipe_enter_cpuidle(struct cpuidle_device *dev,
struct cpuidle_state *state);
void ipipe_exit_cpuidle(void);
#else /* !CONFIG_IPIPE */
#define __ipipe_root_p 1
......@@ -460,6 +470,20 @@ int ipipe_handle_syscall(struct thread_info *ti,
return 0;
}
static inline bool __ipipe_enter_cpuidle(void)
{
return true;
}
static inline
bool ipipe_enter_cpuidle(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
return true;
}
static inline void ipipe_exit_cpuidle(void) { }
#endif /* !CONFIG_IPIPE */
#endif /* !__LINUX_IPIPE_H */
......@@ -1910,6 +1910,49 @@ void __ipipe_share_current(int flags)
}
EXPORT_SYMBOL_GPL(__ipipe_share_current);
bool __weak ipipe_cpuidle_control(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
/*
* Allow entering the idle state by default, matching the
* original behavior when CPU_IDLE is turned
* on. ipipe_cpuidle_control() should be overriden by the
* client domain code for determining whether the CPU may
* actually enter the idle state.
*/
return true;
}
bool __ipipe_enter_cpuidle(void)
{
struct ipipe_percpu_domain_data *p;
/*
* We may go idle if no interrupt is waiting delivery from the
* root stage.
*/
hard_local_irq_disable();
p = ipipe_this_cpu_root_context();
return !__ipipe_ipending_p(p);
}
bool ipipe_enter_cpuidle(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
/*
* Pending IRQs or a co-kernel may deny the transition to
* idle.
*/
return __ipipe_enter_cpuidle() && ipipe_cpuidle_control(dev, state);
}
void ipipe_exit_cpuidle(void)
{
/* unstall and re-enable hw IRQs too. */
local_irq_enable();
}
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \
defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT)
void __ipipe_uaccess_might_fault(void)
......
......@@ -79,51 +79,6 @@ void __weak arch_cpu_idle(void)
local_irq_enable();
}
#ifdef CONFIG_IPIPE
bool __weak ipipe_enter_idle_hook(void)
{
/*
* By default, we may enter the idle state if no co-kernel is
* present.
*/
return ipipe_root_domain == ipipe_head_domain;
}
void __weak ipipe_exit_idle_hook(void) { }
static bool pipeline_idle_enter(void)
{
struct ipipe_percpu_domain_data *p;
/*
* We may go idle if no interrupt is waiting delivery from the
* root stage, or a co-kernel denies such transition.
*/
hard_local_irq_disable();
p = ipipe_this_cpu_root_context();
return !__ipipe_ipending_p(p) && ipipe_enter_idle_hook();
}
static inline void pipeline_idle_exit(void)
{
ipipe_exit_idle_hook();
/* unstall and re-enable hw IRQs too. */
local_irq_enable();
}
#else
static inline bool pipeline_idle_enter(void)
{
return true;
}
static inline void pipeline_idle_exit(void) { }
#endif /* !CONFIG_IPIPE */
/**
* default_idle_call - Default CPU idle routine.
*
......@@ -131,12 +86,12 @@ static inline void pipeline_idle_exit(void) { }
*/
void __cpuidle default_idle_call(void)
{
if (current_clr_polling_and_test() || !pipeline_idle_enter()) {
if (current_clr_polling_and_test() || !__ipipe_enter_cpuidle()) {
local_irq_enable();
} else {
stop_critical_timings();
arch_cpu_idle();
pipeline_idle_exit();
ipipe_exit_cpuidle();
start_critical_timings();
}
}
......@@ -144,13 +99,11 @@ void __cpuidle default_idle_call(void)
static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
int next_state)
{
int ret;
/*
* The idle task must be scheduled, it is pointless to go to idle, just
* update no idle residency and return.
*/
if (current_clr_polling_and_test() || !pipeline_idle_enter()) {
if (current_clr_polling_and_test()) {
dev->last_residency = 0;
local_irq_enable();
return -EBUSY;
......@@ -161,10 +114,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* This function will block until an interrupt occurs and will take
* care of re-enabling the local interrupts
*/
ret = cpuidle_enter(drv, dev, next_state);
pipeline_idle_exit();
return ret;
return cpuidle_enter(drv, dev, next_state);
}
/**
......@@ -205,10 +155,6 @@ static void cpuidle_idle_call(void)
goto exit_idle;
}
if (!pipeline_idle_enter()) {
local_irq_enable();
goto exit_idle;
}
/*
* Suspend-to-idle ("s2idle") is a system state in which all user space
* has been frozen, all I/O devices have been suspended and the only
......@@ -237,7 +183,6 @@ static void cpuidle_idle_call(void)
next_state = cpuidle_find_deepest_state(drv, dev);
call_cpuidle(drv, dev, next_state);
pipeline_idle_exit();
} else {
bool stop_tick = true;
......@@ -254,7 +199,6 @@ static void cpuidle_idle_call(void)
rcu_idle_enter();
entered_state = call_cpuidle(drv, dev, next_state);
pipeline_idle_exit();
/*
* Give the governor an opportunity to reflect on the outcome
*/
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment