Commit 93fa7636 authored by Markus Metzger's avatar Markus Metzger Committed by Ingo Molnar

x86, ptrace: PEBS support

Polish the ds.h interface and add support for PEBS.

Ds.c is meant to be the resource allocator for per-thread and per-cpu
BTS and PEBS recording.
It is used by ptrace/utrace to provide execution tracing of debugged tasks.
It will be used by profilers (e.g. perfmon2).
It may be used by kernel debuggers to provide a kernel execution trace.

Changes in detail:
- guard DS and ptrace by CONFIG macros
- separate DS and BTS more clearly
- simplify field accesses
- add functions to manage PEBS buffers
- add simple protection/allocation mechanism
- added support for Atom

Opens:
- buffer overflow handling
  Currently, only circular buffers are supported. This is all we need
  for debugging. Profilers would want an overflow notification.
  This is planned to be added when perfmon2 is made to use the ds.h
  interface.
- utrace intermediate layer
Signed-off-by: default avatarMarkus Metzger <markus.t.metzger@intel.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 492c2e47
......@@ -415,3 +415,21 @@ config X86_MINIMUM_CPU_FAMILY
config X86_DEBUGCTLMSR
def_bool y
depends on !(M586MMX || M586TSC || M586 || M486 || M386)
config X86_DS
bool "Debug Store support"
default y
help
Add support for Debug Store.
This allows the kernel to provide a memory buffer to the hardware
to store various profiling and tracing events.
config X86_PTRACE_BTS
bool "ptrace interface to Branch Trace Store"
default y
depends on (X86_DS && X86_DEBUGCTLMSR)
help
Add a ptrace interface to allow collecting an execution trace
of the traced task.
This collects control flow changes in a (cyclic) buffer and allows
debuggers to fill in the gaps and show an execution trace of the debuggee.
......@@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_BTS);
if (!(l1 & (1<<12)))
set_cpu_cap(c, X86_FEATURE_PEBS);
ds_init_intel(c);
}
if (cpu_has_bts)
ds_init_intel(c);
ptrace_bts_init_intel(c);
}
static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
......
This diff is collapsed.
......@@ -316,6 +316,14 @@ void exit_thread(void)
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
#ifdef CONFIG_X86_DS
/* Free any DS contexts that have not been properly released. */
if (unlikely(current->thread.ds_ctx)) {
/* we clear debugctl to make sure DS is not used. */
update_debugctlmsr(0);
ds_free(current->thread.ds_ctx);
}
#endif /* CONFIG_X86_DS */
}
void flush_thread(void)
......@@ -482,18 +490,27 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
{
struct thread_struct *prev, *next;
unsigned long debugctl;
unsigned long ds_prev = 0, ds_next = 0;
prev = &prev_p->thread;
next = &next_p->thread;
debugctl = prev->debugctlmsr;
if (next->ds_area_msr != prev->ds_area_msr) {
#ifdef CONFIG_X86_DS
if (prev->ds_ctx)
ds_prev = (unsigned long)prev->ds_ctx->ds;
if (next->ds_ctx)
ds_next = (unsigned long)next->ds_ctx->ds;
if (ds_next != ds_prev) {
/* we clear debugctl to make sure DS
* is not in use when we change it */
debugctl = 0;
update_debugctlmsr(0);
wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
}
#endif /* CONFIG_X86_DS */
if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr);
......@@ -517,13 +534,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
hard_enable_TSC();
}
#ifdef X86_BTS
#ifdef CONFIG_X86_PTRACE_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
#endif
#endif /* CONFIG_X86_PTRACE_BTS */
if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
......
......@@ -267,6 +267,14 @@ void exit_thread(void)
t->io_bitmap_max = 0;
put_cpu();
}
#ifdef CONFIG_X86_DS
/* Free any DS contexts that have not been properly released. */
if (unlikely(t->ds_ctx)) {
/* we clear debugctl to make sure DS is not used. */
update_debugctlmsr(0);
ds_free(t->ds_ctx);
}
#endif /* CONFIG_X86_DS */
}
void flush_thread(void)
......@@ -492,18 +500,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
{
struct thread_struct *prev, *next;
unsigned long debugctl;
unsigned long ds_prev = 0, ds_next = 0;
prev = &prev_p->thread,
next = &next_p->thread;
debugctl = prev->debugctlmsr;
if (next->ds_area_msr != prev->ds_area_msr) {
#ifdef CONFIG_X86_DS
if (prev->ds_ctx)
ds_prev = (unsigned long)prev->ds_ctx->ds;
if (next->ds_ctx)
ds_next = (unsigned long)next->ds_ctx->ds;
if (ds_next != ds_prev) {
/* we clear debugctl to make sure DS
* is not in use when we change it */
debugctl = 0;
update_debugctlmsr(0);
wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
wrmsrl(MSR_IA32_DS_AREA, ds_next);
}
#endif /* CONFIG_X86_DS */
if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr);
......@@ -541,13 +558,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
#ifdef X86_BTS
#ifdef CONFIG_X86_PTRACE_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
#endif
#endif /* CONFIG_X86_PTRACE_BTS */
}
/*
......
This diff is collapsed.
......@@ -920,11 +920,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_BTS);
if (!(l1 & (1<<12)))
set_cpu_cap(c, X86_FEATURE_PEBS);
ds_init_intel(c);
}
if (cpu_has_bts)
ds_init_intel(c);
ptrace_bts_init_intel(c);
n = c->extended_cpuid_level;
if (n >= 0x80000008) {
......
......@@ -2,71 +2,237 @@
* Debug Store (DS) support
*
* This provides a low-level interface to the hardware's Debug Store
* feature that is used for last branch recording (LBR) and
* feature that is used for branch trace store (BTS) and
* precise-event based sampling (PEBS).
*
* Different architectures use a different DS layout/pointer size.
* The below functions therefore work on a void*.
* It manages:
* - per-thread and per-cpu allocation of BTS and PEBS
* - buffer memory allocation (optional)
* - buffer overflow handling
* - buffer access
*
* It assumes:
* - get_task_struct on all parameter tasks
* - current is allowed to trace parameter tasks
*
* Since there is no user for PEBS, yet, only LBR (or branch
* trace store, BTS) is supported.
*
*
* Copyright (C) 2007 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
* Copyright (C) 2007-2008 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
*/
#ifndef _ASM_X86_DS_H
#define _ASM_X86_DS_H
#ifdef CONFIG_X86_DS
#include <linux/types.h>
#include <linux/init.h>
struct cpuinfo_x86;
struct task_struct;
/* a branch trace record entry
/*
* Request BTS or PEBS
*
* Due to alignement constraints, the actual buffer may be slightly
* smaller than the requested or provided buffer.
*
* In order to unify the interface between various processor versions,
* we use the below data structure for all processors.
* Returns 0 on success; -Eerrno otherwise
*
* task: the task to request recording for;
* NULL for per-cpu recording on the current cpu
* base: the base pointer for the (non-pageable) buffer;
* NULL if buffer allocation requested
* size: the size of the requested or provided buffer
* ovfl: pointer to a function to be called on buffer overflow;
* NULL if cyclic buffer requested
*/
enum bts_qualifier {
BTS_INVALID = 0,
BTS_BRANCH,
BTS_TASK_ARRIVES,
BTS_TASK_DEPARTS
};
typedef void (*ds_ovfl_callback_t)(struct task_struct *);
extern int ds_request_bts(struct task_struct *task, void *base, size_t size,
ds_ovfl_callback_t ovfl);
extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
ds_ovfl_callback_t ovfl);
/*
* Release BTS or PEBS resources
*
* Frees buffers allocated on ds_request.
*
* Returns 0 on success; -Eerrno otherwise
*
* task: the task to release resources for;
* NULL to release resources for the current cpu
*/
extern int ds_release_bts(struct task_struct *task);
extern int ds_release_pebs(struct task_struct *task);
/*
* Return the (array) index of the write pointer.
* (assuming an array of BTS/PEBS records)
*
* Returns -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* pos (out): if not NULL, will hold the result
*/
extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
/*
* Return the (array) index one record beyond the end of the array.
* (assuming an array of BTS/PEBS records)
*
* Returns -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* pos (out): if not NULL, will hold the result
*/
extern int ds_get_bts_end(struct task_struct *task, size_t *pos);
extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
/*
* Provide a pointer to the BTS/PEBS record at parameter index.
* (assuming an array of BTS/PEBS records)
*
* The pointer points directly into the buffer. The user is
* responsible for copying the record.
*
* Returns the size of a single record on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* index: the index of the requested record
* record (out): pointer to the requested record
*/
extern int ds_access_bts(struct task_struct *task,
size_t index, const void **record);
extern int ds_access_pebs(struct task_struct *task,
size_t index, const void **record);
/*
* Write one or more BTS/PEBS records at the write pointer index and
* advance the write pointer.
*
* If size is not a multiple of the record size, trailing bytes are
* zeroed out.
*
* May result in one or more overflow notifications.
*
* If called during overflow handling, that is, with index >=
* interrupt threshold, the write will wrap around.
*
* An overflow notification is given if and when the interrupt
* threshold is reached during or after the write.
*
* Returns the number of bytes written or -Eerrno.
*
* task: the task to access;
* NULL to access the current cpu
* buffer: the buffer to write
* size: the size of the buffer
*/
extern int ds_write_bts(struct task_struct *task,
const void *buffer, size_t size);
extern int ds_write_pebs(struct task_struct *task,
const void *buffer, size_t size);
/*
* Same as ds_write_bts/pebs, but omit ownership checks.
*
* This is needed to have some other task than the owner of the
* BTS/PEBS buffer or the parameter task itself write into the
* respective buffer.
*/
extern int ds_unchecked_write_bts(struct task_struct *task,
const void *buffer, size_t size);
extern int ds_unchecked_write_pebs(struct task_struct *task,
const void *buffer, size_t size);
/*
* Reset the write pointer of the BTS/PEBS buffer.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
*/
extern int ds_reset_bts(struct task_struct *task);
extern int ds_reset_pebs(struct task_struct *task);
/*
* Clear the BTS/PEBS buffer and reset the write pointer.
* The entire buffer will be zeroed out.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
*/
extern int ds_clear_bts(struct task_struct *task);
extern int ds_clear_pebs(struct task_struct *task);
/*
* Provide the PEBS counter reset value.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* value (out): the counter reset value
*/
extern int ds_get_pebs_reset(struct task_struct *task, u64 *value);
/*
* Set the PEBS counter reset value.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* value: the new counter reset value
*/
extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
/*
* Initialization
*/
struct cpuinfo_x86;
extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
struct bts_struct {
u64 qualifier;
union {
/* BTS_BRANCH */
struct {
u64 from_ip;
u64 to_ip;
} lbr;
/* BTS_TASK_ARRIVES or
BTS_TASK_DEPARTS */
u64 jiffies;
} variant;
/*
* The DS context - part of struct thread_struct.
*/
struct ds_context {
/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
unsigned char *ds;
/* the owner of the BTS and PEBS configuration, respectively */
struct task_struct *owner[2];
/* buffer overflow notification function for BTS and PEBS */
ds_ovfl_callback_t callback[2];
/* the original buffer address */
void *buffer[2];
/* the number of allocated pages for on-request allocated buffers */
unsigned int pages[2];
/* use count */
unsigned long count;
/* a pointer to the context location inside the thread_struct
* or the per_cpu context array */
struct ds_context **this;
/* a pointer to the task owning this context, or NULL, if the
* context is owned by a cpu */
struct task_struct *task;
};
/* Overflow handling mechanisms */
#define DS_O_SIGNAL 1 /* send overflow signal */
#define DS_O_WRAP 2 /* wrap around */
extern int ds_allocate(void **, size_t);
extern int ds_free(void **);
extern int ds_get_bts_size(void *);
extern int ds_get_bts_end(void *);
extern int ds_get_bts_index(void *);
extern int ds_set_overflow(void *, int);
extern int ds_get_overflow(void *);
extern int ds_clear(void *);
extern int ds_read_bts(void *, int, struct bts_struct *);
extern int ds_write_bts(void *, const struct bts_struct *);
extern unsigned long ds_debugctl_mask(void);
extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
/* called by exit_thread() to free leftover contexts */
extern void ds_free(struct ds_context *context);
#else /* CONFIG_X86_DS */
#define ds_init_intel(config) do {} while (0)
#endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */
......@@ -20,6 +20,7 @@ struct mm_struct;
#include <asm/msr.h>
#include <asm/desc_defs.h>
#include <asm/nops.h>
#include <asm/ds.h>
#include <linux/personality.h>
#include <linux/cpumask.h>
......@@ -415,9 +416,14 @@ struct thread_struct {
unsigned io_bitmap_max;
/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
unsigned long debugctlmsr;
/* Debug Store - if not 0 points to a DS Save Area configuration;
* goes into MSR_IA32_DS_AREA */
unsigned long ds_area_msr;
#ifdef CONFIG_X86_DS
/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
struct ds_context *ds_ctx;
#endif /* CONFIG_X86_DS */
#ifdef CONFIG_X86_PTRACE_BTS
/* the signal to send on a bts buffer overflow */
unsigned int bts_ovfl_signal;
#endif /* CONFIG_X86_PTRACE_BTS */
};
static inline unsigned long native_get_debugreg(int regno)
......
......@@ -80,8 +80,9 @@
#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
#ifndef __ASSEMBLY__
#ifdef CONFIG_X86_PTRACE_BTS
#ifndef __ASSEMBLY__
#include <asm/types.h>
/* configuration/status structure used in PTRACE_BTS_CONFIG and
......@@ -97,20 +98,20 @@ struct ptrace_bts_config {
/* actual size of bts_struct in bytes */
__u32 bts_size;
};
#endif
#endif /* __ASSEMBLY__ */
#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */
#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */
#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow
instead of wrapping around */
#define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available
instead of failing */
#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */
#define PTRACE_BTS_CONFIG 40
/* Configure branch trace recording.
ADDR points to a struct ptrace_bts_config.
DATA gives the size of that buffer.
A new buffer is allocated, iff the size changes.
A new buffer is allocated, if requested in the flags.
An overflow signal may only be requested for new buffers.
Returns the number of bytes read.
*/
#define PTRACE_BTS_STATUS 41
......@@ -119,7 +120,7 @@ struct ptrace_bts_config {
Returns the number of bytes written.
*/
#define PTRACE_BTS_SIZE 42
/* Return the number of available BTS records.
/* Return the number of available BTS records for draining.
DATA and ADDR are ignored.
*/
#define PTRACE_BTS_GET 43
......@@ -139,5 +140,6 @@ struct ptrace_bts_config {
BTS records are read from oldest to newest.
Returns number of BTS records drained.
*/
#endif /* CONFIG_X86_PTRACE_BTS */
#endif
......@@ -125,14 +125,48 @@ struct pt_regs {
#endif /* __KERNEL__ */
#endif /* !__i386__ */
#ifdef CONFIG_X86_PTRACE_BTS
/* a branch trace record entry
*
* In order to unify the interface between various processor versions,
* we use the below data structure for all processors.
*/
enum bts_qualifier {
BTS_INVALID = 0,
BTS_BRANCH,
BTS_TASK_ARRIVES,
BTS_TASK_DEPARTS
};
struct bts_struct {
__u64 qualifier;
union {
/* BTS_BRANCH */
struct {
__u64 from_ip;
__u64 to_ip;
} lbr;
/* BTS_TASK_ARRIVES or
BTS_TASK_DEPARTS */
__u64 jiffies;
} variant;
};
#endif /* CONFIG_X86_PTRACE_BTS */
#ifdef __KERNEL__
/* the DS BTS struct is used for ptrace as well */
#include <asm/ds.h>
#include <linux/init.h>
struct cpuinfo_x86;
struct task_struct;
#ifdef CONFIG_X86_PTRACE_BTS
extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *);
extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
#else
#define ptrace_bts_init_intel(config) do {} while (0)
#endif /* CONFIG_X86_PTRACE_BTS */
extern unsigned long profile_pc(struct pt_regs *regs);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment