Commit 828cad8e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main changes in this (fairly busy) cycle were:

   - There was a class of scheduler bugs related to forgetting to update
     the rq-clock timestamp which can cause weird and hard to debug
     problems, so there's a new debug facility for this: which uncovered
     a whole lot of bugs which convinced us that we want to keep the
     debug facility.

     (Peter Zijlstra, Matt Fleming)

   - Various cputime related updates: eliminate cputime and use u64
     nanoseconds directly, simplify and improve the arch interfaces,
     implement delayed accounting more widely, etc. - (Frederic
     Weisbecker)

   - Move code around for better structure plus cleanups (Ingo Molnar)

   - Move IO schedule accounting deeper into the scheduler plus related
     changes to improve the situation (Tejun Heo)

   - ... plus a round of sched/rt and sched/deadline fixes, plus other
     fixes, updats and cleanups"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (85 commits)
  sched/core: Remove unlikely() annotation from sched_move_task()
  sched/autogroup: Rename auto_group.[ch] to autogroup.[ch]
  sched/topology: Split out scheduler topology code from core.c into topology.c
  sched/core: Remove unnecessary #include headers
  sched/rq_clock: Consolidate the ordering of the rq_clock methods
  delayacct: Include <uapi/linux/taskstats.h>
  sched/core: Clean up comments
  sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds
  sched/clock: Add dummy clear_sched_clock_stable() stub function
  sched/cputime: Remove generic asm headers
  sched/cputime: Remove unused nsec_to_cputime()
  s390, sched/cputime: Remove unused cputime definitions
  powerpc, sched/cputime: Remove unused cputime definitions
  s390, sched/cputime: Make arch_cpu_idle_time() to return nsecs
  ia64, sched/cputime: Remove unused cputime definitions
  ia64: Convert vtime to use nsec units directly
  ia64, sched/cputime: Move the nsecs based cputime headers to the last arch using it
  sched/cputime: Remove jiffies based cputime
  sched/cputime, vtime: Return nsecs instead of cputime_t to account
  sched/cputime: Complete nsec conversion of tick based accounting
  ...
parents 60c906ba bb3bac2c
......@@ -408,6 +408,11 @@ CONTENTS
* the new scheduling related syscalls that manipulate it, i.e.,
sched_setattr() and sched_getattr() are implemented.
For debugging purposes, the leftover runtime and absolute deadline of a
SCHED_DEADLINE task can be retrieved through /proc/<pid>/sched (entries
dl.runtime and dl.deadline, both values in ns). A programmatic way to
retrieve these values from production code is under discussion.
4.3 Default behavior
---------------------
......@@ -476,6 +481,7 @@ CONTENTS
Still missing:
- programmatic way to retrieve current runtime and absolute deadline
- refinements to deadline inheritance, especially regarding the possibility
of retaining bandwidth isolation among non-interacting tasks. This is
being studied from both theoretical and practical points of view, and
......
......@@ -158,11 +158,11 @@ as its prone to starvation without deadline scheduling.
Consider two sibling groups A and B; both have 50% bandwidth, but A's
period is twice the length of B's.
* group A: period=100000us, runtime=10000us
- this runs for 0.01s once every 0.1s
* group A: period=100000us, runtime=50000us
- this runs for 0.05s once every 0.1s
* group B: period= 50000us, runtime=10000us
- this runs for 0.01s twice every 0.1s (or once every 0.05 sec).
* group B: period= 50000us, runtime=25000us
- this runs for 0.025s twice every 0.1s (or once every 0.05 sec).
This means that currently a while (1) loop in A will run for the full period of
B and can starve B's tasks (assuming they are of lower priority) for a whole
......
generic-y += clkdev.h
generic-y += cputime.h
generic-y += exec.h
generic-y += export.h
generic-y += irq_work.h
......
......@@ -1145,7 +1145,7 @@ struct rusage32 {
SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
{
struct rusage32 r;
cputime_t utime, stime;
u64 utime, stime;
unsigned long utime_jiffies, stime_jiffies;
if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
......@@ -1155,16 +1155,16 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
switch (who) {
case RUSAGE_SELF:
task_cputime(current, &utime, &stime);
utime_jiffies = cputime_to_jiffies(utime);
stime_jiffies = cputime_to_jiffies(stime);
utime_jiffies = nsecs_to_jiffies(utime);
stime_jiffies = nsecs_to_jiffies(stime);
jiffies_to_timeval32(utime_jiffies, &r.ru_utime);
jiffies_to_timeval32(stime_jiffies, &r.ru_stime);
r.ru_minflt = current->min_flt;
r.ru_majflt = current->maj_flt;
break;
case RUSAGE_CHILDREN:
utime_jiffies = cputime_to_jiffies(current->signal->cutime);
stime_jiffies = cputime_to_jiffies(current->signal->cstime);
utime_jiffies = nsecs_to_jiffies(current->signal->cutime);
stime_jiffies = nsecs_to_jiffies(current->signal->cstime);
jiffies_to_timeval32(utime_jiffies, &r.ru_utime);
jiffies_to_timeval32(stime_jiffies, &r.ru_stime);
r.ru_minflt = current->signal->cmin_flt;
......
......@@ -2,7 +2,6 @@ generic-y += auxvec.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
generic-y += emergency-restart.h
......
......@@ -2,7 +2,6 @@
generic-y += bitsperlong.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h
generic-y += early_ioremap.h
generic-y += emergency-restart.h
......
generic-y += bugs.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += delay.h
generic-y += div64.h
generic-y += dma.h
......
generic-y += clkdev.h
generic-y += cputime.h
generic-y += delay.h
generic-y += device.h
generic-y += div64.h
......
......@@ -2,7 +2,6 @@
generic-y += auxvec.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
......
......@@ -5,7 +5,6 @@ generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
......
......@@ -4,7 +4,6 @@ generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += clkdev.h
generic-y += cmpxchg.h
generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
generic-y += errno.h
......
generic-y += clkdev.h
generic-y += cputime.h
generic-y += exec.h
generic-y += irq_work.h
generic-y += mcs_spinlock.h
......
......@@ -5,7 +5,6 @@ generic-y += bugs.h
generic-y += cacheflush.h
generic-y += checksum.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h
generic-y += delay.h
generic-y += device.h
......
......@@ -6,7 +6,6 @@ generic-y += barrier.h
generic-y += bug.h
generic-y += bugs.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
......
......@@ -18,11 +18,7 @@
#ifndef __IA64_CPUTIME_H
#define __IA64_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
# include <asm-generic/cputime.h>
#else
# include <asm/processor.h>
# include <asm-generic/cputime_nsecs.h>
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
extern void arch_vtime_task_switch(struct task_struct *tsk);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
......
......@@ -27,6 +27,12 @@ struct thread_info {
mm_segment_t addr_limit; /* user-level address space limit */
int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
__u64 utime;
__u64 stime;
__u64 gtime;
__u64 hardirq_time;
__u64 softirq_time;
__u64 idle_time;
__u64 ac_stamp;
__u64 ac_leave;
__u64 ac_stime;
......
......@@ -1031,7 +1031,7 @@ GLOBAL_ENTRY(ia64_native_sched_clock)
END(ia64_native_sched_clock)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
GLOBAL_ENTRY(cycle_to_cputime)
GLOBAL_ENTRY(cycle_to_nsec)
alloc r16=ar.pfs,1,0,0,0
addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
;;
......@@ -1047,7 +1047,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
;;
shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
br.ret.sptk.many rp
END(cycle_to_cputime)
END(cycle_to_nsec)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_IA64_BRL_EMU
......
......@@ -619,6 +619,8 @@ setup_arch (char **cmdline_p)
check_sal_cache_flush();
#endif
paging_init();
clear_sched_clock_stable();
}
/*
......
......@@ -21,6 +21,7 @@
#include <linux/timex.h>
#include <linux/timekeeper_internal.h>
#include <linux/platform_device.h>
#include <linux/cputime.h>
#include <asm/machvec.h>
#include <asm/delay.h>
......@@ -59,18 +60,43 @@ static struct clocksource *itc_clocksource;
#include <linux/kernel_stat.h>
extern cputime_t cycle_to_cputime(u64 cyc);
extern u64 cycle_to_nsec(u64 cyc);
void vtime_account_user(struct task_struct *tsk)
void vtime_flush(struct task_struct *tsk)
{
cputime_t delta_utime;
struct thread_info *ti = task_thread_info(tsk);
u64 delta;
if (ti->ac_utime) {
delta_utime = cycle_to_cputime(ti->ac_utime);
account_user_time(tsk, delta_utime);
ti->ac_utime = 0;
if (ti->utime)
account_user_time(tsk, cycle_to_nsec(ti->utime));
if (ti->gtime)
account_guest_time(tsk, cycle_to_nsec(ti->gtime));
if (ti->idle_time)
account_idle_time(cycle_to_nsec(ti->idle_time));
if (ti->stime) {
delta = cycle_to_nsec(ti->stime);
account_system_index_time(tsk, delta, CPUTIME_SYSTEM);
}
if (ti->hardirq_time) {
delta = cycle_to_nsec(ti->hardirq_time);
account_system_index_time(tsk, delta, CPUTIME_IRQ);
}
if (ti->softirq_time) {
delta = cycle_to_nsec(ti->softirq_time));
account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
}
ti->utime = 0;
ti->gtime = 0;
ti->idle_time = 0;
ti->stime = 0;
ti->hardirq_time = 0;
ti->softirq_time = 0;
}
/*
......@@ -83,7 +109,7 @@ void arch_vtime_task_switch(struct task_struct *prev)
struct thread_info *pi = task_thread_info(prev);
struct thread_info *ni = task_thread_info(current);
pi->ac_stamp = ni->ac_stamp;
ni->ac_stamp = pi->ac_stamp;
ni->ac_stime = ni->ac_utime = 0;
}
......@@ -91,18 +117,15 @@ void arch_vtime_task_switch(struct task_struct *prev)
* Account time for a transition between system, hard irq or soft irq state.
* Note that this function is called with interrupts enabled.
*/
static cputime_t vtime_delta(struct task_struct *tsk)
static __u64 vtime_delta(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
cputime_t delta_stime;
__u64 now;
__u64 now, delta_stime;
WARN_ON_ONCE(!irqs_disabled());
now = ia64_get_itc();
delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
ti->ac_stime = 0;
delta_stime = now - ti->ac_stamp;
ti->ac_stamp = now;
return delta_stime;
......@@ -110,15 +133,25 @@ static cputime_t vtime_delta(struct task_struct *tsk)
void vtime_account_system(struct task_struct *tsk)
{
cputime_t delta = vtime_delta(tsk);
account_system_time(tsk, 0, delta);
struct thread_info *ti = task_thread_info(tsk);
__u64 stime = vtime_delta(tsk);
if ((tsk->flags & PF_VCPU) && !irq_count())
ti->gtime += stime;
else if (hardirq_count())
ti->hardirq_time += stime;
else if (in_serving_softirq())
ti->softirq_time += stime;
else
ti->stime += stime;
}
EXPORT_SYMBOL_GPL(vtime_account_system);
void vtime_account_idle(struct task_struct *tsk)
{
account_idle_time(vtime_delta(tsk));
struct thread_info *ti = task_thread_info(tsk);
ti->idle_time += vtime_delta(tsk);
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
......
generic-y += clkdev.h
generic-y += cputime.h
generic-y += exec.h
generic-y += irq_work.h
generic-y += kvm_para.h
......
generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += device.h
generic-y += emergency-restart.h
generic-y += errno.h
......
......@@ -2,7 +2,6 @@ generic-y += auxvec.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += dma.h
......
generic-y += barrier.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += device.h
generic-y += exec.h
generic-y += irq_work.h
......
# MIPS headers
generic-(CONFIG_GENERIC_CSUM) += checksum.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h
generic-y += dma-contiguous.h
generic-y += emergency-restart.h
......
......@@ -99,15 +99,7 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
#undef TASK_SIZE
#define TASK_SIZE TASK_SIZE32
#undef cputime_to_timeval
#define cputime_to_timeval cputime_to_compat_timeval
static __inline__ void
cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
{
unsigned long jiffies = cputime_to_jiffies(cputime);
value->tv_usec = (jiffies % HZ) * (1000000L / HZ);
value->tv_sec = jiffies / HZ;
}
#undef ns_to_timeval
#define ns_to_timeval ns_to_compat_timeval
#include "../../../fs/binfmt_elf.c"
......@@ -102,15 +102,7 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
#undef TASK_SIZE
#define TASK_SIZE TASK_SIZE32
#undef cputime_to_timeval
#define cputime_to_timeval cputime_to_compat_timeval
static __inline__ void
cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
{
unsigned long jiffies = cputime_to_jiffies(cputime);
value->tv_usec = (jiffies % HZ) * (1000000L / HZ);
value->tv_sec = jiffies / HZ;
}
#undef ns_to_timeval
#define ns_to_timeval ns_to_compat_timeval
#include "../../../fs/binfmt_elf.c"
generic-y += barrier.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += exec.h
generic-y += irq_work.h
generic-y += mcs_spinlock.h
......
......@@ -6,7 +6,6 @@ generic-y += bitsperlong.h
generic-y += bug.h
generic-y += bugs.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
......
......@@ -12,7 +12,6 @@ generic-y += checksum.h
generic-y += clkdev.h
generic-y += cmpxchg-local.h
generic-y += cmpxchg.h
generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
......
......@@ -2,7 +2,6 @@
generic-y += auxvec.h
generic-y += barrier.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
generic-y += emergency-restart.h
......
......@@ -91,14 +91,7 @@ struct elf_prpsinfo32
current->thread.map_base = DEFAULT_MAP_BASE32; \
current->thread.task_size = DEFAULT_TASK_SIZE32 \
#undef cputime_to_timeval
#define cputime_to_timeval cputime_to_compat_timeval
static __inline__ void
cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
{
unsigned long jiffies = cputime_to_jiffies(cputime);
value->tv_usec = (jiffies % HZ) * (1000000L / HZ);
value->tv_sec = jiffies / HZ;
}
#undef ns_to_timeval
#define ns_to_timeval ns_to_compat_timeval
#include "../../../fs/binfmt_elf.c"
......@@ -36,6 +36,7 @@
#undef PCI_DEBUG
#include <linux/proc_fs.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <asm/processor.h>
#include <asm/sections.h>
......@@ -176,6 +177,7 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con; /* we use do_take_over_console() later ! */
#endif
clear_sched_clock_stable();
}
/*
......
......@@ -12,9 +12,17 @@
/* Stuff for accurate time accounting */
struct cpu_accounting_data {
unsigned long user_time; /* accumulated usermode TB ticks */
unsigned long system_time; /* accumulated system TB ticks */
unsigned long user_time_scaled; /* accumulated usermode SPURR ticks */
/* Accumulated cputime values to flush on ticks*/
unsigned long utime;
unsigned long stime;
unsigned long utime_scaled;
unsigned long stime_scaled;
unsigned long gtime;
unsigned long hardirq_time;
unsigned long softirq_time;
unsigned long steal_time;
unsigned long idle_time;
/* Internal counters */
unsigned long starttime; /* TB value snapshot */
unsigned long starttime_user; /* TB value on exit to usermode */
unsigned long startspurr; /* SPURR value snapshot */
......
......@@ -16,12 +16,7 @@
#ifndef __POWERPC_CPUTIME_H
#define __POWERPC_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <asm-generic/cputime.h>
#ifdef __KERNEL__
static inline void setup_cputime_one_jiffy(void) { }
#endif
#else
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <linux/types.h>
#include <linux/time.h>
......@@ -36,65 +31,6 @@ typedef u64 __nocast cputime64_t;
#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new)
#ifdef __KERNEL__
/*
* One jiffy in timebase units computed during initialization
*/
extern cputime_t cputime_one_jiffy;
/*
* Convert cputime <-> jiffies
*/
extern u64 __cputime_jiffies_factor;
static inline unsigned long cputime_to_jiffies(const cputime_t ct)
{
return mulhdu((__force u64) ct, __cputime_jiffies_factor);
}
static inline cputime_t jiffies_to_cputime(const unsigned long jif)
{
u64 ct;
unsigned long sec;
/* have to be a little careful about overflow */
ct = jif % HZ;
sec = jif / HZ;
if (ct) {
ct *= tb_ticks_per_sec;
do_div(ct, HZ);
}
if (sec)
ct += (cputime_t) sec * tb_ticks_per_sec;
return (__force cputime_t) ct;
}
static inline void setup_cputime_one_jiffy(void)
{
cputime_one_jiffy = jiffies_to_cputime(1);
}
static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
{
u64 ct;
u64 sec = jif;
/* have to be a little careful about overflow */
ct = do_div(sec, HZ);
if (ct) {
ct *= tb_ticks_per_sec;
do_div(ct, HZ);
}
if (sec)
ct += (u64) sec * tb_ticks_per_sec;
return (__force cputime64_t) ct;
}
static inline u64 cputime64_to_jiffies64(const cputime_t ct)
{
return mulhdu((__force u64) ct, __cputime_jiffies_factor);
}
/*
* Convert cputime <-> microseconds
*/
......@@ -105,117 +41,6 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct)
return mulhdu((__force u64) ct, __cputime_usec_factor);
}
static inline cputime_t usecs_to_cputime(const unsigned long us)
{
u64 ct;
unsigned long sec;
/* have to be a little careful about overflow */
ct = us % 1000000;
sec = us / 1000000;
if (ct) {
ct *= tb_ticks_per_sec;
do_div(ct, 1000000);
}
if (sec)
ct += (cputime_t) sec * tb_ticks_per_sec;
return (__force cputime_t) ct;
}
#define usecs_to_cputime64(us) usecs_to_cputime(us)
/*
* Convert cputime <-> seconds
*/
extern u64 __cputime_sec_factor;
static inline unsigned long cputime_to_secs(const cputime_t ct)
{
return mulhdu((__force u64) ct, __cputime_sec_factor);
}
static inline cputime_t secs_to_cputime(const unsigned long sec)
{
return (__force cputime_t)((u64) sec * tb_ticks_per_sec);