Commit 9977e886 authored by Hendrik Brueckner's avatar Hendrik Brueckner Committed by Martin Schwidefsky

s390/kernel: lazy restore fpu registers

Improve the save and restore behavior of FPU register contents to use the
vector extension within the kernel.

The kernel does not use floating-point or vector registers and, therefore,
saving and restoring the FPU register contents are performed for handling
signals or switching processes only.  To prepare for using vector
instructions and vector registers within the kernel, enhance the save
behavior and implement a lazy restore at return to user space from a
system call or interrupt.

To implement the lazy restore, the save_fpu_regs() sets a CPU information
flag, CIF_FPU, to indicate that the FPU registers must be restored.
Saving and setting CIF_FPU is performed in an atomic fashion to be
interrupt-safe.  When the kernel wants to use the vector extension or
wants to change the FPU register state for a task during signal handling,
the save_fpu_regs() must be called first.  The CIF_FPU flag is also set at
process switch.  At return to user space, the FPU state is restored.  In
particular, the FPU state includes the floating-point or vector register
contents, as well as, vector-enablement and floating-point control.  The
FPU state restore and clearing CIF_FPU is also performed in an atomic
fashion.

For KVM, the restore of the FPU register state is performed when restoring
the general-purpose guest registers before the SIE instructions is started.
Because the path towards the SIE instruction is interruptible, the CIF_FPU
flag must be checked again right before going into SIE.  If set, the guest
registers must be reloaded again by re-entering the outer SIE loop.  This
is the same behavior as if the SIE critical section is interrupted.
Signed-off-by: default avatarHendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent bd550337
......@@ -46,6 +46,8 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
__ctl_load(reg, cr, cr);
}
void __ctl_set_vx(void);
void smp_ctl_set_bit(int cr, int bit);
void smp_ctl_clear_bit(int cr, int bit);
......
......@@ -28,9 +28,14 @@ struct fpu {
};
};
void save_fpu_regs(struct fpu *fpu);
#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
/* VX array structure for address operand constraints in inline assemblies */
struct vx_array { __vector128 _[__NUM_VXRS]; };
static inline int test_fp_ctl(u32 fpc)
{
u32 orig_fpc;
......@@ -48,76 +53,6 @@ static inline int test_fp_ctl(u32 fpc)
return rc;
}
static inline void save_fp_ctl(u32 *fpc)
{
asm volatile(
" stfpc %0\n"
: "+Q" (*fpc));
}
static inline int restore_fp_ctl(u32 *fpc)
{
int rc;
asm volatile(
" lfpc %1\n"
"0: la %0,0\n"
"1:\n"
: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
return rc;
}
static inline void save_fp_regs(freg_t *fprs)
{
asm volatile("std 0,%0" : "=Q" (fprs[0]));
asm volatile("std 2,%0" : "=Q" (fprs[2]));
asm volatile("std 4,%0" : "=Q" (fprs[4]));
asm volatile("std 6,%0" : "=Q" (fprs[6]));
asm volatile("std 1,%0" : "=Q" (fprs[1]));
asm volatile("std 3,%0" : "=Q" (fprs[3]));
asm volatile("std 5,%0" : "=Q" (fprs[5]));
asm volatile("std 7,%0" : "=Q" (fprs[7]));
asm volatile("std 8,%0" : "=Q" (fprs[8]));
asm volatile("std 9,%0" : "=Q" (fprs[9]));
asm volatile("std 10,%0" : "=Q" (fprs[10]));
asm volatile("std 11,%0" : "=Q" (fprs[11]));
asm volatile("std 12,%0" : "=Q" (fprs[12]));
asm volatile("std 13,%0" : "=Q" (fprs[13]));
asm volatile("std 14,%0" : "=Q" (fprs[14]));
asm volatile("std 15,%0" : "=Q" (fprs[15]));
}
static inline void restore_fp_regs(freg_t *fprs)
{
asm volatile("ld 0,%0" : : "Q" (fprs[0]));
asm volatile("ld 2,%0" : : "Q" (fprs[2]));
asm volatile("ld 4,%0" : : "Q" (fprs[4]));
asm volatile("ld 6,%0" : : "Q" (fprs[6]));
asm volatile("ld 1,%0" : : "Q" (fprs[1]));
asm volatile("ld 3,%0" : : "Q" (fprs[3]));
asm volatile("ld 5,%0" : : "Q" (fprs[5]));
asm volatile("ld 7,%0" : : "Q" (fprs[7]));
asm volatile("ld 8,%0" : : "Q" (fprs[8]));
asm volatile("ld 9,%0" : : "Q" (fprs[9]));
asm volatile("ld 10,%0" : : "Q" (fprs[10]));
asm volatile("ld 11,%0" : : "Q" (fprs[11]));
asm volatile("ld 12,%0" : : "Q" (fprs[12]));
asm volatile("ld 13,%0" : : "Q" (fprs[13]));
asm volatile("ld 14,%0" : : "Q" (fprs[14]));
asm volatile("ld 15,%0" : : "Q" (fprs[15]));
}
static inline void save_vx_regs(__vector128 *vxrs)
{
typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
asm volatile(
" la 1,%0\n"
" .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
: "=Q" (*(addrtype *) vxrs) : : "1");
}
static inline void save_vx_regs_safe(__vector128 *vxrs)
{
unsigned long cr0, flags;
......@@ -126,20 +61,13 @@ static inline void save_vx_regs_safe(__vector128 *vxrs)
__ctl_store(cr0, 0, 0);
__ctl_set_bit(0, 17);
__ctl_set_bit(0, 18);
save_vx_regs(vxrs);
__ctl_load(cr0, 0, 0);
arch_local_irq_restore(flags);
}
static inline void restore_vx_regs(__vector128 *vxrs)
{
typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
asm volatile(
" la 1,%0\n"
" .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
: : "Q" (*(addrtype *) vxrs) : "1");
" .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
: "=Q" (*(struct vx_array *) vxrs) : : "1");
__ctl_load(cr0, 0, 0);
arch_local_irq_restore(flags);
}
static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
......@@ -177,24 +105,6 @@ static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
sizeof(fpregs->fprs));
}
static inline void save_fpu_regs(struct fpu *fpu)
{
save_fp_ctl(&fpu->fpc);
if (is_vx_fpu(fpu))
save_vx_regs(fpu->vxrs);
else
save_fp_regs(fpu->fprs);
}
static inline void restore_fpu_regs(struct fpu *fpu)
{
restore_fp_ctl(&fpu->fpc);
if (is_vx_fpu(fpu))
restore_vx_regs(fpu->vxrs);
else
restore_fp_regs(fpu->fprs);
}
#endif
#endif /* _ASM_S390_FPU_INTERNAL_H */
......@@ -22,6 +22,7 @@
#include <linux/kvm.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu-internal.h>
#include <asm/isc.h>
#define KVM_MAX_VCPUS 64
......@@ -498,10 +499,9 @@ struct kvm_guestdbg_info_arch {
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
s390_fp_regs host_fpregs;
unsigned int host_acrs[NUM_ACRS];
s390_fp_regs guest_fpregs;
struct kvm_s390_vregs *host_vregs;
struct fpu host_fpregs;
struct fpu guest_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
......
......@@ -14,10 +14,12 @@
#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
#define CIF_ASCE 1 /* user asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_FPU 3 /* restore vector registers */
#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING)
#define _CIF_ASCE (1<<CIF_ASCE)
#define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY)
#define _CIF_FPU (1<<CIF_FPU)
#ifndef __ASSEMBLY__
......
......@@ -36,7 +36,7 @@ static inline void restore_access_regs(unsigned int *acrs)
} \
if (next->mm) { \
update_cr_regs(next); \
restore_fpu_regs(&next->thread.fpu); \
set_cpu_flag(CIF_FPU); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
} \
......
......@@ -28,11 +28,16 @@ int main(void)
DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
BLANK();
DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
DEFINE(__THREAD_fpu, offsetof(struct task_struct, thread.fpu));
DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb));
BLANK();
DEFINE(__FPU_fpc, offsetof(struct fpu, fpc));
DEFINE(__FPU_flags, offsetof(struct fpu, flags));
DEFINE(__FPU_regs, offsetof(struct fpu, regs));
BLANK();
DEFINE(__TI_task, offsetof(struct thread_info, task));
DEFINE(__TI_flags, offsetof(struct thread_info, flags));
DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
......
......@@ -161,7 +161,6 @@ static void store_sigregs(void)
static void load_sigregs(void)
{
restore_access_regs(current->thread.acrs);
restore_fpu_regs(&current->thread.fpu);
}
static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
......@@ -287,6 +286,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
goto badframe;
set_current_blocked(&set);
save_fpu_regs(&current->thread.fpu);
if (restore_sigregs32(regs, &frame->sregs))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->sregs_ext))
......@@ -309,6 +309,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
save_fpu_regs(&current->thread.fpu);
if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
......
......@@ -20,6 +20,8 @@
#include <asm/page.h>
#include <asm/sigp.h>
#include <asm/irq.h>
#include <asm/fpu-internal.h>
#include <asm/vx-insn.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 8
......@@ -46,10 +48,10 @@ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_UPROBE)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU)
_PIF_WORK = (_PIF_PER_TRAP)
#define BASED(name) name-system_call(%r13)
#define BASED(name) name-cleanup_critical(%r13)
.macro TRACE_IRQS_ON
#ifdef CONFIG_TRACE_IRQFLAGS
......@@ -280,6 +282,8 @@ ENTRY(system_call)
jo .Lsysc_sigpending
tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
jo .Lsysc_notify_resume
tm __LC_CPU_FLAGS+7,_CIF_FPU
jo .Lsysc_vxrs
tm __LC_CPU_FLAGS+7,_CIF_ASCE
jo .Lsysc_uaccess
j .Lsysc_return # beware of critical section cleanup
......@@ -306,6 +310,13 @@ ENTRY(system_call)
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
j .Lsysc_return
#
# CIF_FPU is set, restore floating-point controls and floating-point registers.
#
.Lsysc_vxrs:
larl %r14,.Lsysc_return
jg load_fpu_regs
#
# _TIF_SIGPENDING is set, call do_signal
#
......@@ -405,7 +416,7 @@ ENTRY(pgm_check_handler)
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_PGM_OLD_PSW
HANDLE_SIE_INTERCEPT %r14,1
tmhh %r8,0x0001 # test problem state bit
......@@ -483,7 +494,7 @@ ENTRY(io_int_handler)
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_IO_OLD_PSW
HANDLE_SIE_INTERCEPT %r14,2
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
......@@ -587,6 +598,8 @@ ENTRY(io_int_handler)
jo .Lio_sigpending
tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
jo .Lio_notify_resume
tm __LC_CPU_FLAGS+7,_CIF_FPU
jo .Lio_vxrs
tm __LC_CPU_FLAGS+7,_CIF_ASCE
jo .Lio_uaccess
j .Lio_return # beware of critical section cleanup
......@@ -608,6 +621,13 @@ ENTRY(io_int_handler)
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
j .Lio_return
#
# CIF_FPU is set, restore floating-point controls and floating-point registers.
#
.Lio_vxrs:
larl %r14,.Lio_return
jg load_fpu_regs
#
# _TIF_NEED_RESCHED is set, call schedule
#
......@@ -652,7 +672,7 @@ ENTRY(ext_int_handler)
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_EXT_OLD_PSW
HANDLE_SIE_INTERCEPT %r14,3
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
......@@ -690,6 +710,121 @@ ENTRY(psw_idle)
br %r14
.Lpsw_idle_end:
/* Store floating-point controls and floating-point or vector extension
* registers instead. A critical section cleanup assures that the registers
* are stored even if interrupted for some other work. The register %r2
* designates a struct fpu to store register contents. If the specified
* structure does not contain a register save area, the register store is
* omitted (see also comments in arch_dup_task_struct()).
*
* The CIF_FPU flag is set in any case. The CIF_FPU triggers a lazy restore
* of the register contents at system call or io return.
*/
ENTRY(save_fpu_regs)
tm __LC_CPU_FLAGS+7,_CIF_FPU
bor %r14
stfpc __FPU_fpc(%r2)
.Lsave_fpu_regs_fpc_end:
lg %r3,__FPU_regs(%r2)
ltgr %r3,%r3
jz .Lsave_fpu_regs_done # no save area -> set CIF_FPU
tm __FPU_flags+3(%r2),FPU_USE_VX
jz .Lsave_fpu_regs_fp # no -> store FP regs
.Lsave_fpu_regs_vx_low:
VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
.Lsave_fpu_regs_vx_high:
VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
j .Lsave_fpu_regs_done # -> set CIF_FPU flag
.Lsave_fpu_regs_fp:
std 0,0(%r3)
std 1,8(%r3)
std 2,16(%r3)
std 3,24(%r3)
std 4,32(%r3)
std 5,40(%r3)
std 6,48(%r3)
std 7,56(%r3)
std 8,64(%r3)
std 9,72(%r3)
std 10,80(%r3)
std 11,88(%r3)
std 12,96(%r3)
std 13,104(%r3)
std 14,112(%r3)
std 15,120(%r3)
.Lsave_fpu_regs_done:
oi __LC_CPU_FLAGS+7,_CIF_FPU
br %r14
.Lsave_fpu_regs_end:
/* Load floating-point controls and floating-point or vector extension
* registers. A critical section cleanup assures that the register contents
* are loaded even if interrupted for some other work. Depending on the saved
* FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
*
* There are special calling conventions to fit into sysc and io return work:
* %r12: __LC_THREAD_INFO
* %r15: <kernel stack>
* The function requires:
* %r4 and __SF_EMPTY+32(%r15)
*/
load_fpu_regs:
tm __LC_CPU_FLAGS+7,_CIF_FPU
bnor %r14
lg %r4,__TI_task(%r12)
la %r4,__THREAD_fpu(%r4)
lfpc __FPU_fpc(%r4)
stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
tm __FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ?
lg %r4,__FPU_regs(%r4) # %r4 <- reg save area
jz .Lload_fpu_regs_fp_ctl # -> no VX, load FP regs
.Lload_fpu_regs_vx_ctl:
tm __SF_EMPTY+32+5(%r15),2 # test VX control
jo .Lload_fpu_regs_vx
oi __SF_EMPTY+32+5(%r15),2 # set VX control
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
.Lload_fpu_regs_vx:
VLM %v0,%v15,0,%r4
.Lload_fpu_regs_vx_high:
VLM %v16,%v31,256,%r4
j .Lload_fpu_regs_done
.Lload_fpu_regs_fp_ctl:
tm __SF_EMPTY+32+5(%r15),2 # test VX control
jz .Lload_fpu_regs_fp
ni __SF_EMPTY+32+5(%r15),253 # clear VX control
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
.Lload_fpu_regs_fp:
ld 0,0(%r4)
ld 1,8(%r4)
ld 2,16(%r4)
ld 3,24(%r4)
ld 4,32(%r4)
ld 5,40(%r4)
ld 6,48(%r4)
ld 7,56(%r4)
ld 8,64(%r4)
ld 9,72(%r4)
ld 10,80(%r4)
ld 11,88(%r4)
ld 12,96(%r4)
ld 13,104(%r4)
ld 14,112(%r4)
ld 15,120(%r4)
.Lload_fpu_regs_done:
ni __LC_CPU_FLAGS+7,255-_CIF_FPU
br %r14
.Lload_fpu_regs_end:
/* Test and set the vector enablement control in CR0.46 */
ENTRY(__ctl_set_vx)
stctg %c0,%c0,__SF_EMPTY(%r15)
tm __SF_EMPTY+5(%r15),2
bor %r14
oi __SF_EMPTY+5(%r15),2
lctlg %c0,%c0,__SF_EMPTY(%r15)
br %r14
.L__ctl_set_vx_end:
.L__critical_end:
/*
......@@ -702,7 +837,7 @@ ENTRY(mcck_int_handler)
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_MCK_OLD_PSW
HANDLE_SIE_INTERCEPT %r14,4
tm __LC_MCCK_CODE,0x80 # system damage?
......@@ -831,6 +966,12 @@ stack_overflow:
.quad .Lio_done
.quad psw_idle
.quad .Lpsw_idle_end
.quad save_fpu_regs
.quad .Lsave_fpu_regs_end
.quad load_fpu_regs
.quad .Lload_fpu_regs_end
.quad __ctl_set_vx
.quad .L__ctl_set_vx_end
cleanup_critical:
clg %r9,BASED(.Lcleanup_table) # system_call
......@@ -853,6 +994,18 @@ cleanup_critical:
jl 0f
clg %r9,BASED(.Lcleanup_table+72) # .Lpsw_idle_end
jl .Lcleanup_idle
clg %r9,BASED(.Lcleanup_table+80) # save_fpu_regs
jl 0f
clg %r9,BASED(.Lcleanup_table+88) # .Lsave_fpu_regs_end
jl .Lcleanup_save_fpu_regs
clg %r9,BASED(.Lcleanup_table+96) # load_fpu_regs
jl 0f
clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
jl .Lcleanup_load_fpu_regs
clg %r9,BASED(.Lcleanup_table+112) # __ctl_set_vx
jl 0f
clg %r9,BASED(.Lcleanup_table+120) # .L__ctl_set_vx_end
jl .Lcleanup___ctl_set_vx
0: br %r14
......@@ -981,6 +1134,145 @@ cleanup_critical:
.Lcleanup_idle_insn:
.quad .Lpsw_idle_lpsw
.Lcleanup_save_fpu_regs:
tm __LC_CPU_FLAGS+7,_CIF_FPU
bor %r14
clg %r9,BASED(.Lcleanup_save_fpu_regs_done)
jhe 5f
clg %r9,BASED(.Lcleanup_save_fpu_regs_fp)
jhe 4f
clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
jhe 3f
clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
jhe 2f
clg %r9,BASED(.Lcleanup_save_fpu_fpc_end)
jhe 1f
0: # Store floating-point controls
stfpc __FPU_fpc(%r2)
1: # Load register save area and check if VX is active
lg %r3,__FPU_regs(%r2)
ltgr %r3,%r3
jz 5f # no save area -> set CIF_FPU
tm __FPU_flags+3(%r2),FPU_USE_VX
jz 4f # no VX -> store FP regs
2: # Store vector registers (V0-V15)
VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
3: # Store vector registers (V16-V31)
VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
j 5f # -> done, set CIF_FPU flag
4: # Store floating-point registers
std 0,0(%r3)
std 1,8(%r3)
std 2,16(%r3)
std 3,24(%r3)
std 4,32(%r3)
std 5,40(%r3)
std 6,48(%r3)
std 7,56(%r3)
std 8,64(%r3)
std 9,72(%r3)
std 10,80(%r3)
std 11,88(%r3)
std 12,96(%r3)
std 13,104(%r3)
std 14,112(%r3)
std 15,120(%r3)
5: # Set CIF_FPU flag
oi __LC_CPU_FLAGS+7,_CIF_FPU
lg %r9,48(%r11) # return from save_fpu_regs
br %r14
.Lcleanup_save_fpu_fpc_end:
.quad .Lsave_fpu_regs_fpc_end
.Lcleanup_save_fpu_regs_vx_low:
.quad .Lsave_fpu_regs_vx_low
.Lcleanup_save_fpu_regs_vx_high:
.quad .Lsave_fpu_regs_vx_high
.Lcleanup_save_fpu_regs_fp:
.quad .Lsave_fpu_regs_fp
.Lcleanup_save_fpu_regs_done:
.quad .Lsave_fpu_regs_done
.Lcleanup_load_fpu_regs:
tm __LC_CPU_FLAGS+7,_CIF_FPU
bnor %r14
clg %r9,BASED(.Lcleanup_load_fpu_regs_done)
jhe 1f
clg %r9,BASED(.Lcleanup_load_fpu_regs_fp)
jhe 2f
clg %r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
jhe 3f
clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
jhe 4f
clg %r9,BASED(.Lcleanup_load_fpu_regs_vx)
jhe 5f
clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
jhe 6f
lg %r4,__TI_task(%r12)
la %r4,__THREAD_fpu(%r4)
lfpc __FPU_fpc(%r4)
tm __FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ?
lg %r4,__FPU_regs(%r4) # %r4 <- reg save area
jz 3f # -> no VX, load FP regs
6: # Set VX-enablement control
stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
tm __SF_EMPTY+32+5(%r15),2 # test VX control
jo 5f
oi __SF_EMPTY+32+5(%r15),2 # set VX control
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
5: # Load V0 ..V15 registers
VLM %v0,%v15,0,%r4
4: # Load V16..V31 registers
VLM %v16,%v31,256,%r4
j 1f
3: # Clear VX-enablement control for FP
stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
tm __SF_EMPTY+32+5(%r15),2 # test VX control
jz 2f
ni __SF_EMPTY+32+5(%r15),253 # clear VX control
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
2: # Load floating-point registers
ld 0,0(%r4)
ld 1,8(%r4)
ld 2,16(%r4)
ld 3,24(%r4)
ld 4,32(%r4)
ld 5,40(%r4)
ld 6,48(%r4)
ld 7,56(%r4)
ld 8,64(%r4)
ld 9,72(%r4)
ld 10,80(%r4)
ld 11,88(%r4)
ld 12,96(%r4)
ld 13,104(%r4)
ld 14,112(%r4)
ld 15,120(%r4)
1: # Clear CIF_FPU bit
ni __LC_CPU_FLAGS+7,255-_CIF_FPU
lg %r9,48(%r11) # return from load_fpu_regs
br %r14
.Lcleanup_load_fpu_regs_vx_ctl:
.quad .Lload_fpu_regs_vx_ctl
.Lcleanup_load_fpu_regs_vx:
.quad .Lload_fpu_regs_vx
.Lcleanup_load_fpu_regs_vx_high:
.quad .Lload_fpu_regs_vx_high
.Lcleanup_load_fpu_regs_fp_ctl:
.quad .Lload_fpu_regs_fp_ctl
.Lcleanup_load_fpu_regs_fp:
.quad .Lload_fpu_regs_fp
.Lcleanup_load_fpu_regs_done:
.quad .Lload_fpu_regs_done
.Lcleanup___ctl_set_vx:
stctg %c0,%c0,__SF_EMPTY(%r15)
tm __SF_EMPTY+5(%r15