FreeBSD Bugzilla – Attachment 12573 Details for
Bug 24219
<4.2R can't use Pentium3 SSE instructions>
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
file.diff
file.diff (text/plain), 27.60 KB, created by
kaz
on 2001-01-10 12:40:01 UTC
(
hide
)
Description:
file.diff
Filename:
MIME Type:
Creator:
kaz
Created:
2001-01-10 12:40:01 UTC
Size:
27.60 KB
patch
obsolete
>diff -ruN -9 sys/conf/options.i386 sys+/conf/options.i386 >--- sys/conf/options.i386 Sat Sep 30 11:49:30 2000 >+++ sys+/conf/options.i386 Tue Jan 9 13:44:18 2001 >@@ -52,18 +52,19 @@ > CPU_LOOP_EN opt_cpu.h > CPU_PPRO2CELERON opt_cpu.h > CPU_RSTK_EN opt_cpu.h > CPU_SUSP_HLT opt_cpu.h > CPU_UPGRADE_HW_CACHE opt_cpu.h > CPU_WT_ALLOC opt_cpu.h > CYRIX_CACHE_WORKS opt_cpu.h > CYRIX_CACHE_REALLY_WORKS opt_cpu.h > NO_MEMORY_HOLE opt_cpu.h >+CPU_ENABLE_SSE opt_cpu.h > > # The CPU type affects the endian conversion functions all over the kernel. > I386_CPU opt_global.h > I486_CPU opt_global.h > I586_CPU opt_global.h > I686_CPU opt_global.h > > MAXCONS opt_syscons.h > SC_ALT_MOUSE_IMAGE opt_syscons.h >diff -ruN -9 sys/i386/conf/LINT sys+/i386/conf/LINT >--- sys/i386/conf/LINT Sat Nov 18 18:22:07 2000 >+++ sys+/i386/conf/LINT Tue Jan 9 13:45:36 2001 >@@ -158,18 +158,20 @@ > # > # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space > # of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1. > # Otherwise, the NO_LOCK bit of CCR1 is cleared. (NOTE 3) > # > # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables > # reorder). This option should not be used if you use memory mapped > # I/O device(s). > # >+# CPU_ENABLE_SSE enables SSE/MMX2 instructions support. >+# > # CPU_FASTER_5X86_FPU enables faster FPU exception handler. > # > # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products > # for i386 machines. > # > # CPU_IORT defines I/O clock delay time (NOTE 1). Default values of > # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively > # (no clock delay). > # >diff -ruN -9 sys/i386/i386/genassym.c sys+/i386/i386/genassym.c >--- sys/i386/i386/genassym.c Tue May 16 15:58:06 2000 >+++ sys+/i386/i386/genassym.c Tue Jan 9 13:49:18 2001 >@@ -126,20 +126,21 @@ > ASSYM(PCB_DBREGS, PCB_DBREGS); > ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); > > #ifdef SMP > ASSYM(PCB_MPNEST, offsetof(struct pcb, pcb_mpnest)); > #endif > > ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare)); > ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); >-ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_savefpu)); >-ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct save87)); >+ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); >+ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu)); >+ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87)); > ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); > > #ifdef SMP > ASSYM(PCB_SIZE, sizeof(struct pcb)); > #endif > > ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno)); > ASSYM(TF_ERR, offsetof(struct trapframe, tf_err)); > ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); >diff -ruN -9 sys/i386/i386/initcpu.c sys+/i386/i386/initcpu.c >--- sys/i386/i386/initcpu.c Sun Oct 15 12:09:32 2000 >+++ sys+/i386/i386/initcpu.c Tue Jan 9 14:04:38 2001 >@@ -28,18 +28,19 @@ > * > * $FreeBSD: src/sys/i386/i386/initcpu.c,v 1.19.2.2 2000/10/15 03:09:32 nyan Exp $ > */ > > #include "opt_cpu.h" > > #include <sys/param.h> > #include <sys/kernel.h> > #include <sys/systm.h> >+#include <sys/sysctl.h> > > #include <machine/cputypes.h> > #include <machine/md_var.h> > #include <machine/specialreg.h> > > void initializecpu(void); > #if defined(I586_CPU) && defined(CPU_WT_ALLOC) > void enable_K5_wt_alloc(void); > void enable_K6_wt_alloc(void); >@@ -55,19 +56,27 @@ > static void init_i486_on_386(void); > #endif > static void init_6x86(void); > #endif /* I486_CPU */ > > #ifdef I686_CPU > static void init_6x86MX(void); > static void init_ppro(void); > static void init_mendocino(void); >+#if defined(CPU_ENABLE_SSE) >+void init_sse(void); > #endif >+#endif /* I686_CPU */ >+ >+int hw_instruction_sse = 0; >+SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, >+ &hw_instruction_sse, 0, >+ "SSE/MMX2 instructions available in CPU"); > > #ifdef I486_CPU > /* > * IBM Blue Lightning > */ > static void > init_bluelightning(void) > { > u_long eflags; >@@ -494,19 +503,28 @@ > bbl_cr_ctl3 |= 5 << 1; > #endif > wrmsr(0x11e, bbl_cr_ctl3); > } > > load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); > write_eflags(eflags); > #endif /* CPU_PPRO2CELERON */ > } >- >+#if defined(CPU_ENABLE_SSE) >+void >+init_sse(void) >+{ >+ if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { >+ load_cr4(rcr4() | CR4_FXSR | CR4_XMM); >+ cpu_fxsr = hw_instruction_sse = 1; >+ } >+} >+#endif > #endif /* I686_CPU */ > > void > initializecpu(void) > { > > switch (cpu) { > #ifdef I486_CPU > case CPU_BLUE: >@@ -538,18 +556,21 @@ > if (strcmp(cpu_vendor, "GenuineIntel") == 0) { > switch (cpu_id & 0xff0) { > case 0x610: > init_ppro(); > break; > case 0x660: > init_mendocino(); > break; > } >+#if defined(CPU_ENABLE_SSE) >+ init_sse(); >+#endif > } > break; > #endif > default: > break; > } > > #if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) > /* >diff -ruN -9 sys/i386/i386/locore.s sys+/i386/i386/locore.s >--- sys/i386/i386/locore.s Fri Jul 7 09:38:46 2000 >+++ sys+/i386/i386/locore.s Tue Jan 9 14:07:36 2001 >@@ -90,24 +90,25 @@ > ALIGN_DATA /* just to be sure */ > > .globl HIDENAME(tmpstk) > .space 0x2000 /* space for tmpstk - temporary stack */ > HIDENAME(tmpstk): > > .globl _boothowto,_bootdev > > .globl _cpu,_cpu_vendor,_cpu_id,_bootinfo >- .globl _cpu_high, _cpu_feature >+ .globl _cpu_high, _cpu_feature, _cpu_fxsr > > _cpu: .long 0 /* are we 386, 386sx, or 486 */ > _cpu_id: .long 0 /* stepping ID */ > _cpu_high: .long 0 /* highest arg to CPUID */ > _cpu_feature: .long 0 /* features */ >+_cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */ > _cpu_vendor: .space 20 /* CPU origin code */ > _bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ > > _KERNend: .long 0 /* phys addr end of kernel (just after bss) */ > physfree: .long 0 /* phys addr of next free page */ > > #ifdef SMP > .globl _cpu0prvpage > cpu0pp: .long 0 /* phys addr cpu0 private pg */ >diff -ruN -9 sys/i386/i386/machdep.c sys+/i386/i386/machdep.c >--- sys/i386/i386/machdep.c Fri Oct 27 18:07:22 2000 >+++ sys+/i386/i386/machdep.c Tue Jan 9 14:21:51 2001 >@@ -119,18 +119,22 @@ > extern void dblfault_handler __P((void)); > > extern void printcpuinfo(void); /* XXX header file */ > extern void earlysetcpuclass(void); /* same header file */ > extern void finishidentcpu(void); > extern void panicifcpuunsupported(void); > extern void initializecpu(void); > > static void cpu_startup __P((void *)); >+#ifdef CPU_ENABLE_SSE >+static void set_fpregs_xmm __P((struct save87 *, struct savexmm *)); >+static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *)); >+#endif /* CPU_ENABLE_SSE */ > SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) > > static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); > > int _udatasel, _ucodesel; > u_int atdevbase; > > #if defined(SWTCH_OPTIM_STATS) > extern int swtch_optim_stats; >@@ -2076,20 +2080,20 @@ > tp = p->p_md.md_regs; > frame_copy = *tp; > *(int *)((char *)&frame_copy + (off - min)) = data; > if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || > !CS_SECURE(frame_copy.tf_cs)) > return (EINVAL); > *(int*)((char *)p->p_addr + off) = data; > return (0); > } >- min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); >- if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { >+ min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save); >+ if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) { > *(int*)((char *)p->p_addr + off) = data; > return (0); > } > return (EFAULT); > } > > int > fill_regs(p, regs) > struct proc *p; >@@ -2145,33 +2149,101 @@ > tp->tf_cs = regs->r_cs; > tp->tf_eflags = regs->r_eflags; > tp->tf_esp = regs->r_esp; > tp->tf_ss = regs->r_ss; > pcb = &p->p_addr->u_pcb; > pcb->pcb_gs = regs->r_gs; > return (0); > } > >+#ifdef CPU_ENABLE_SSE >+static void >+fill_fpregs_xmm(sv_xmm, sv_87) >+ struct savexmm *sv_xmm; >+ struct save87 *sv_87; >+{ >+ register struct env87 *penv_87 = &sv_87->sv_env; >+ register struct envxmm *penv_xmm = &sv_xmm->sv_env; >+ int i; >+ >+ /* FPU control/status */ >+ penv_87->en_cw = penv_xmm->en_cw; >+ penv_87->en_sw = penv_xmm->en_sw; >+ penv_87->en_tw = penv_xmm->en_tw; >+ penv_87->en_fip = penv_xmm->en_fip; >+ penv_87->en_fcs = penv_xmm->en_fcs; >+ penv_87->en_opcode = penv_xmm->en_opcode; >+ penv_87->en_foo = penv_xmm->en_foo; >+ penv_87->en_fos = penv_xmm->en_fos; >+ >+ /* FPU registers */ >+ for (i = 0; i < 8; ++i) >+ sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; >+ >+ sv_87->sv_ex_sw = sv_xmm->sv_ex_sw; >+} >+ >+static void >+set_fpregs_xmm(sv_87, sv_xmm) >+ struct save87 *sv_87; >+ struct savexmm *sv_xmm; >+{ >+ register struct env87 *penv_87 = &sv_87->sv_env; >+ register struct envxmm *penv_xmm = &sv_xmm->sv_env; >+ int i; >+ >+/* FPU control/status */ >+ penv_xmm->en_cw = penv_87->en_cw; >+ penv_xmm->en_sw = penv_87->en_sw; >+ penv_xmm->en_tw = penv_87->en_tw; >+ penv_xmm->en_fip = penv_87->en_fip; >+ penv_xmm->en_fcs = penv_87->en_fcs; >+ penv_xmm->en_opcode = penv_87->en_opcode; >+ penv_xmm->en_foo = penv_87->en_foo; >+ penv_xmm->en_fos = penv_87->en_fos; >+ >+ /* FPU registers */ >+ for (i = 0; i < 8; ++i) >+ sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; >+ >+ sv_xmm->sv_ex_sw = sv_87->sv_ex_sw; >+} >+#endif /* CPU_ENABLE_SSE */ >+ > int > fill_fpregs(p, fpregs) > struct proc *p; > struct fpreg *fpregs; > { >- bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs); >+#ifdef CPU_ENABLE_SSE >+ if (cpu_fxsr) { >+ fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm, >+ (struct save87 *)fpregs); >+ return (0); >+ } >+#endif /* CPU_ENABLE_SSE */ >+ bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs); > return (0); > } > > int > set_fpregs(p, fpregs) > struct proc *p; > struct fpreg *fpregs; > { >- bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs); >+#ifdef CPU_ENABLE_SSE >+ if (cpu_fxsr) { >+ set_fpregs_xmm((struct save87 *)fpregs, >+ &p->p_addr->u_pcb.pcb_save.sv_xmm); >+ return (0); >+ } >+#endif /* CPU_ENABLE_SSE */ >+ bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs); > return (0); > } > > int > fill_dbregs(p, dbregs) > struct proc *p; > struct dbreg *dbregs; > { > struct pcb *pcb; >diff -ruN -9 sys/i386/i386/mp_machdep.c sys+/i386/i386/mp_machdep.c >--- sys/i386/i386/mp_machdep.c Sat Sep 30 11:49:32 2000 >+++ sys+/i386/i386/mp_machdep.c Tue Jan 9 14:26:16 2001 >@@ -229,18 +229,22 @@ > #define MP_ENABLE_POST 0x14 > #define MPTABLE_PASS2_POST 0x15 > > #define START_ALL_APS_POST 0x16 > #define INSTALL_AP_TRAMP_POST 0x17 > #define START_AP_POST 0x18 > > #define MP_ANNOUNCE_POST 0x19 > >+#if defined(CPU_ENABLE_SSE) >+extern void init_sse(void); >+#endif >+ > > /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ > int current_postcode; > > /** XXX FIXME: what system files declare these??? */ > extern struct region_descriptor r_gdt, r_idt; > > int bsp_apic_ready = 0; /* flags useability of BSP apic */ > int mp_ncpus; /* # of CPUs, including BSP */ >@@ -2392,18 +2396,23 @@ > other_cpus = all_cpus & ~(1 << cpuid); > > printf("SMP: AP CPU #%d Launched!\n", cpuid); > > /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ > load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); > > /* set up FPU state on the AP */ > npxinit(__INITIAL_NPXCW__); >+ >+ /* Setup the SSE */ >+#if defined(CPU_ENABLE_SSE) >+ init_sse(); >+#endif > > /* A quick check from sanity claus */ > apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); > if (cpuid != apic_id) { > printf("SMP: cpuid = %d\n", cpuid); > printf("SMP: apic_id = %d\n", apic_id); > printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]); > panic("cpuid mismatch! boom!!"); > } >diff -ruN -9 sys/i386/i386/support.s sys+/i386/i386/support.s >--- sys/i386/i386/support.s Sat Sep 30 11:49:33 2000 >+++ sys+/i386/i386/support.s Tue Jan 9 14:29:32 2001 >@@ -948,19 +948,19 @@ > src in %esi > dst in %edi > len in %ecx XXX changed to on stack for profiling > uses %eax and %edx for tmp. storage > */ > /* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ > ENTRY(fastmove) > pushl %ebp > movl %esp,%ebp >- subl $PCB_SAVEFPU_SIZE+3*4,%esp >+ subl $PCB_SAVE87_SIZE+3*4,%esp > > movl 8(%ebp),%ecx > cmpl $63,%ecx > jbe fastmove_tail > > testl $7,%esi /* check if src addr is multiple of 8 */ > jnz fastmove_tail > > testl $7,%edi /* check if dst addr is multiple of 8 */ >@@ -987,19 +987,19 @@ > */ > /* tmp = curpcb->pcb_savefpu; */ > movl %ecx,-12(%ebp) > movl %esi,-8(%ebp) > movl %edi,-4(%ebp) > movl %esp,%edi > movl _curpcb,%esi > addl $PCB_SAVEFPU,%esi > cld >- movl $PCB_SAVEFPU_SIZE>>2,%ecx >+ movl $PCB_SAVE87_SIZE>>2,%ecx > rep > movsl > movl -12(%ebp),%ecx > movl -8(%ebp),%esi > movl -4(%ebp),%edi > /* stop_emulating(); */ > clts > /* npxproc = curproc; */ > movl _curproc,%eax >@@ -1064,19 +1064,19 @@ > > /* curpcb->pcb_savefpu = tmp; */ > movl %ecx,-12(%ebp) > movl %esi,-8(%ebp) > movl %edi,-4(%ebp) > movl _curpcb,%edi > addl $PCB_SAVEFPU,%edi > movl %esp,%esi > cld >- movl $PCB_SAVEFPU_SIZE>>2,%ecx >+ movl $PCB_SAVE87_SIZE>>2,%ecx > rep > movsl > movl -12(%ebp),%ecx > movl -8(%ebp),%esi > movl -4(%ebp),%edi > > /* start_emulating(); */ > smsw %ax > orb $CR0_TS,%al >@@ -1103,19 +1103,19 @@ > popl %ebp > ret > > ALIGN_TEXT > fastmove_fault: > movl _curpcb,%edi > addl $PCB_SAVEFPU,%edi > movl %esp,%esi > cld >- movl $PCB_SAVEFPU_SIZE>>2,%ecx >+ movl $PCB_SAVE87_SIZE>>2,%ecx > rep > movsl > > smsw %ax > orb $CR0_TS,%al > lmsw %ax > movl $0,_npxproc > > fastmove_tail_fault: >diff -ruN -9 sys/i386/i386/vm_machdep.c sys+/i386/i386/vm_machdep.c >--- sys/i386/i386/vm_machdep.c Sat Aug 26 13:19:26 2000 >+++ sys+/i386/i386/vm_machdep.c Tue Jan 9 14:30:56 2001 >@@ -135,19 +135,19 @@ > } > } > #endif > return; > } > > #if NNPX > 0 > /* Ensure that p1's pcb is up to date. */ > if (npxproc == p1) >- npxsave(&p1->p_addr->u_pcb.pcb_savefpu); >+ npxsave(&p1->p_addr->u_pcb.pcb_save); > #endif > > /* Copy p1's pcb. */ > p2->p_addr->u_pcb = p1->p_addr->u_pcb; > pcb2 = &p2->p_addr->u_pcb; > > /* > * Create a new fresh stack for the new process. > * Copy the trap frame for the return to user mode as if from a >diff -ruN -9 sys/i386/include/asnames.h sys+/i386/include/asnames.h >--- sys/i386/include/asnames.h Tue May 16 15:58:10 2000 >+++ sys+/i386/include/asnames.h Tue Jan 9 14:32:12 2001 >@@ -185,18 +185,19 @@ > #define _copyin_vector copyin_vector > #define _copyout_vector copyout_vector > #define _cpl cpl > #define _cpl_lock cpl_lock > #define _cpu cpu > #define _cpu0prvpage cpu0prvpage > #define _cpu_apic_versions cpu_apic_versions > #define _cpu_class cpu_class > #define _cpu_feature cpu_feature >+#define _cpu_fxsr cpu_fxsr > #define _cpu_high cpu_high > #define _cpu_id cpu_id > #define _cpu_num_to_apic_id cpu_num_to_apic_id > #define _cpu_switch cpu_switch > #define _cpu_vendor cpu_vendor > #define _default_halt default_halt > #define _denormal_operand denormal_operand > #define _div_small div_small > #define _divide_by_zero divide_by_zero >diff -ruN -9 sys/i386/include/md_var.h sys+/i386/include/md_var.h >--- sys/i386/include/md_var.h Mon Feb 21 05:51:23 2000 >+++ sys+/i386/include/md_var.h Tue Jan 9 14:33:24 2001 >@@ -41,18 +41,19 @@ > extern void (*bcopy_vector) __P((const void *from, void *to, size_t len)); > extern int busdma_swi_pending; > extern int (*copyin_vector) __P((const void *udaddr, void *kaddr, > size_t len)); > extern int (*copyout_vector) __P((const void *kaddr, void *udaddr, > size_t len)); > extern u_int cpu_feature; > extern u_int cpu_high; > extern u_int cpu_id; >+extern u_int cpu_fxsr; > extern char cpu_vendor[]; > extern u_int cyrix_did; > extern char kstack[]; > #ifdef PC98 > extern int need_pre_dma_flush; > extern int need_post_dma_flush; > #endif > extern void (*netisrs[32]) __P((void)); > extern int nfs_diskless_valid; >diff -ruN -9 sys/i386/include/npx.h sys+/i386/include/npx.h >--- sys/i386/include/npx.h Sat Mar 11 02:56:33 2000 >+++ sys+/i386/include/npx.h Tue Jan 9 14:40:14 2001 >@@ -81,18 +81,54 @@ > * struct and arrange to store into this struct (ending here) > * before it is inspected for ptracing or for core dumps. Some > * emulators overwrite the whole struct. We have no good way of > * knowing how much padding to leave. Leave just enough for the > * GPL emulator's i387_union (176 bytes total). > */ > u_char sv_pad[64]; /* padding; used by emulators */ > }; > >+struct envxmm { >+ u_int16_t en_cw; /* control word (16bits) */ >+ u_int16_t en_sw; /* status word (16bits) */ >+ u_int16_t en_tw; /* tag word (16bits) */ >+ u_int16_t en_opcode; /* opcode last executed (11 bits ) */ >+ u_int32_t en_fip; /* floating point instruction pointer */ >+ u_int16_t en_fcs; /* floating code segment selector */ >+ u_int16_t en_pad0; /* padding */ >+ u_int32_t en_foo; /* floating operand offset */ >+ u_int16_t en_fos; /* floating operand segment selector */ >+ u_int16_t en_pad1; /* padding */ >+ u_int32_t en_mxcsr; /* SSE sontorol/status register */ >+ u_int32_t en_pad2; /* padding */ >+}; >+ >+/* Contents of each SSE extended accumulator */ >+struct xmmacc { >+ u_char xmm_bytes[16]; >+}; >+ >+struct savexmm { >+ struct envxmm sv_env; >+ struct { >+ struct fpacc87 fp_acc; >+ u_char fp_pad[6]; /* padding */ >+ } sv_fp[8]; >+ struct xmmacc sv_xmm[8]; >+ u_long sv_ex_sw; /* status word for last exception */ >+ u_char sv_pad[220]; >+}; >+ >+union savefpu { >+ struct save87 sv_87; >+ struct savexmm sv_xmm; >+}; >+ > /* > * The hardware default control word for i387's and later coprocessors is > * 0x37F, giving: > * > * round to nearest > * 64-bit precision > * all exceptions masked. > * > * We modify the affine mode bit and precision bits in this to give: >@@ -108,13 +144,13 @@ > > #ifdef _KERNEL > #ifndef npxproc > extern struct proc *npxproc; > #endif > > int npxdna __P((void)); > void npxexit __P((struct proc *p)); > void npxinit __P((int control)); >-void npxsave __P((struct save87 *addr)); >+void npxsave __P((union savefpu *addr)); > #endif > > #endif /* !_MACHINE_NPX_H_ */ >diff -ruN -9 sys/i386/include/pcb.h sys+/i386/include/pcb.h >--- sys/i386/include/pcb.h Wed Dec 29 13:33:03 1999 >+++ sys+/i386/include/pcb.h Tue Jan 9 14:43:07 2001 >@@ -61,19 +61,20 @@ > int pcb_dr3; > int pcb_dr6; > int pcb_dr7; > > #ifdef USER_LDT > struct pcb_ldt *pcb_ldt; /* per process (user) LDT */ > #else > struct pcb_ldt *pcb_ldt_dontuse; > #endif >- struct save87 pcb_savefpu; /* floating point state for 287/387 */ >+ union savefpu pcb_save; >+#define pcb_savefpu pcb_save.sv_87 > u_char pcb_flags; > #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ > #define PCB_DBREGS 0x02 /* process using debug registers */ > caddr_t pcb_onfault; /* copyin/out fault recovery */ > #ifdef SMP > u_long pcb_mpnest; > #else > u_long pcb_mpnest_dontuse; > #endif >diff -ruN -9 sys/i386/include/specialreg.h sys+/i386/include/specialreg.h >--- sys/i386/include/specialreg.h Sat Sep 11 00:51:44 1999 >+++ sys+/i386/include/specialreg.h Tue Jan 9 14:44:25 2001 >@@ -87,18 +87,20 @@ > #define CPUID_MCE 0x0080 > #define CPUID_CX8 0x0100 > #define CPUID_APIC 0x0200 > #define CPUID_B10 0x0400 > #define CPUID_B11 0x0800 > #define CPUID_MTRR 0x1000 > #define CPUID_PGE 0x2000 > #define CPUID_MCA 0x4000 > #define CPUID_CMOV 0x8000 >+#define CPUID_FXSR 0x01000000 >+#define CPUID_XMM 0x02000000 > > /* > * Model-specific registers for the i386 family > */ > #define MSR_P5_MC_ADDR 0x000 > #define MSR_P5_MC_TYPE 0x001 > #define MSR_TSC 0x010 > #define MSR_APICBASE 0x01b > #define MSR_EBL_CR_POWERON 0x02a >diff -ruN -9 sys/i386/isa/npx.c sys+/i386/isa/npx.c >--- sys/i386/isa/npx.c Sun Jan 30 01:17:36 2000 >+++ sys+/i386/isa/npx.c Tue Jan 9 15:06:48 2001 >@@ -29,18 +29,19 @@ > * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT > * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY > * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF > * SUCH DAMAGE. > * > * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 > * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80 2000/01/29 16:17:36 peter Exp $ > */ > >+#include "opt_cpu.h" > #include "opt_debug_npx.h" > #include "opt_math_emulate.h" > > #include <sys/param.h> > #include <sys/systm.h> > #include <sys/bus.h> > #include <sys/kernel.h> > #include <sys/malloc.h> > #include <sys/module.h> >@@ -90,45 +91,60 @@ > #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) > #define fnclex() __asm("fnclex") > #define fninit() __asm("fninit") > #define fnop() __asm("fnop") > #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) > #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) > #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) > #define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop") > #define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) >+#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) >+#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) > #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ > : : "n" (CR0_TS) : "ax") > #define stop_emulating() __asm("clts") > > #else /* not __GNUC__ */ > > void fldcw __P((caddr_t addr)); > void fnclex __P((void)); > void fninit __P((void)); > void fnop __P((void)); > void fnsave __P((caddr_t addr)); > void fnstcw __P((caddr_t addr)); > void fnstsw __P((caddr_t addr)); > void fp_divide_by_0 __P((void)); > void frstor __P((caddr_t addr)); >+void fxsave __P((caddr_t addr)); >+void fxrstor __P((caddr_t addr)); > void start_emulating __P((void)); > void stop_emulating __P((void)); > > #endif /* __GNUC__ */ > >+#ifdef CPU_ENABLE_SSE >+#define FPU_STATUS_EX(pcb) \ >+ (cpu_fxsr ? \ >+ &(pcb)->pcb_save.sv_xmm.sv_ex_sw : \ >+ &(pcb)->pcb_save.sv_87.sv_ex_sw) >+#else /* CPU_ENABLE_SSE */ >+#define FPU_STATUS_EX(pcb) (&(pcb)->pcb_save.sv_87.sv_ex_sw) >+#endif /* CPU_ENABLE_SSE */ >+ > typedef u_char bool_t; > > static int npx_attach __P((device_t dev)); > void npx_intr __P((void *)); > static void npx_identify __P((driver_t *driver, device_t parent)); > static int npx_probe __P((device_t dev)); > static int npx_probe1 __P((device_t dev)); >+static void fpusave __P((union savefpu *)); >+static void fpurstor __P((union savefpu *)); > #ifdef I586_CPU > static long timezero __P((const char *funcname, > void (*func)(void *buf, size_t len))); > #endif /* I586_CPU */ > > int hw_float; /* XXX currently just alias for npx_exists */ > > SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, > CTLFLAG_RD, &hw_float, 0, >@@ -468,45 +484,45 @@ > } > > /* > * Initialize floating point unit. > */ > void > npxinit(control) > u_short control; > { >- struct save87 dummy; >+ union savefpu dummy; > > if (!npx_exists) > return; > /* > * fninit has the same h/w bugs as fnsave. Use the detoxified > * fnsave to throw away any junk in the fpu. npxsave() initializes > * the fpu and sets npxproc = NULL as important side effects. > */ > npxsave(&dummy); > stop_emulating(); > fldcw(&control); > if (curpcb != NULL) >- fnsave(&curpcb->pcb_savefpu); >+ fpusave(&curpcb->pcb_save); > start_emulating(); > } > > /* > * Free coprocessor (if we have it). > */ > void > npxexit(p) > struct proc *p; > { > > if (p == npxproc) >- npxsave(&curpcb->pcb_savefpu); >+ npxsave(&curpcb->pcb_save); > #ifdef NPX_DEBUG > if (npx_exists) { > u_int masked_exceptions; > > masked_exceptions = curpcb->pcb_savefpu.sv_env.en_cw > & curpcb->pcb_savefpu.sv_env.en_sw & 0x7f; > /* > * Log exceptions that would have trapped with the old > * control word (overflow, divide by 0, and invalid operand). >@@ -708,32 +724,35 @@ > * solution for signals other than SIGFPE. > */ > void > npx_intr(dummy) > void *dummy; > { > int code; > u_short control; > struct intrframe *frame; >+ u_long *pstatus; > > if (npxproc == NULL || !npx_exists) { > printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n", > npxproc, curproc, npx_exists); > panic("npxintr from nowhere"); > } > if (npxproc != curproc) { > printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n", > npxproc, curproc, npx_exists); > panic("npxintr from non-current process"); > } > >+ pstatus = FPU_STATUS_EX(curpcb); >+ > outb(0xf0, 0); >- fnstsw(&curpcb->pcb_savefpu.sv_ex_sw); >+ fnstsw(pstatus); > fnstcw(&control); > fnclex(); > > /* > * Pass exception to process. > */ > frame = (struct intrframe *)&dummy; /* XXX */ > if ((ISPL(frame->if_cs) == SEL_UPL) || (frame->if_eflags & PSL_VM)) { > /* >@@ -747,20 +766,19 @@ > * in doreti, and the frame for that could easily be set up > * just before it is used). > */ > curproc->p_md.md_regs = INTR_TO_TRAPFRAME(frame); > /* > * Encode the appropriate code for detailed information on > * this exception. > */ > code = >- fpetable[(curpcb->pcb_savefpu.sv_ex_sw & ~control & 0x3f) | >- (curpcb->pcb_savefpu.sv_ex_sw & 0x40)]; >+ fpetable[(*pstatus & ~control & 0x3f) | (*pstatus & 0x40)]; > trapsignal(curproc, SIGFPE, code); > } else { > /* > * Nested interrupt. These losers occur when: > * o an IRQ13 is bogusly generated at a bogus time, e.g.: > * o immediately after an fnsave or frstor of an > * error state. > * o a couple of 386 instructions after > * "fstpl _memvar" causes a stack overflow. >@@ -779,69 +797,73 @@ > * Implement device not available (DNA) exception > * > * It would be better to switch FP context here (if curproc != npxproc) > * and not necessarily for every context switch, but it is too hard to > * access foreign pcb's. > */ > int > npxdna() > { >+ u_long *pstatus; >+ > if (!npx_exists) > return (0); > if (npxproc != NULL) { > printf("npxdna: npxproc = %p, curproc = %p\n", > npxproc, curproc); > panic("npxdna"); > } > stop_emulating(); > /* > * Record new context early in case frstor causes an IRQ13. > */ > npxproc = curproc; >- curpcb->pcb_savefpu.sv_ex_sw = 0; >+ >+ pstatus = FPU_STATUS_EX(curpcb); >+ *pstatus = 0; >+ > /* > * The following frstor may cause an IRQ13 when the state being > * restored has a pending error. The error will appear to have been > * triggered by the current (npx) user instruction even when that > * instruction is a no-wait instruction that should not trigger an > * error (e.g., fnclex). On at least one 486 system all of the > * no-wait instructions are broken the same as frstor, so our > * treatment does not amplify the breakage. On at least one > * 386/Cyrix 387 system, fnclex works correctly while frstor and > * fnsave are broken, so our treatment breaks fnclex if it is the > * first FPU instruction after a context switch. > */ >- frstor(&curpcb->pcb_savefpu); >+ fpurstor(&curpcb->pcb_save); > > return (1); > } > > /* > * Wrapper for fnsave instruction to handle h/w bugs. If there is an error > * pending, then fnsave generates a bogus IRQ13 on some systems. Force > * any IRQ13 to be handled immediately, and then ignore it. This routine is > * often called at splhigh so it must not use many system services. In > * particular, it's much easier to install a special handler than to > * guarantee that it's safe to use npxintr() and its supporting code. > */ > void > npxsave(addr) >- struct save87 *addr; >+ union savefpu *addr; > { >-#ifdef SMP >- >+#if defined(SMP) || defined(CPU_ENABLE_SSE) > stop_emulating(); >- fnsave(addr); >+ fpusave(addr); > /* fnop(); */ > start_emulating(); > npxproc = NULL; > >-#else /* SMP */ >+#else /* SMP or CPU_ENABLE_SSE */ > > u_char icu1_mask; > u_char icu2_mask; > u_char old_icu1_mask; > u_char old_icu2_mask; > struct gate_descriptor save_idt_npxintr; > > disable_intr(); > old_icu1_mask = inb(IO_ICU1 + 1); >@@ -862,18 +884,50 @@ > outb(IO_ICU1 + 1, > (icu1_mask & ~npx0_imask) | (old_icu1_mask & npx0_imask)); > outb(IO_ICU2 + 1, > (icu2_mask & ~(npx0_imask >> 8)) > | (old_icu2_mask & (npx0_imask >> 8))); > idt[npx_intrno] = save_idt_npxintr; > enable_intr(); /* back to usual state */ > > #endif /* SMP */ >+} >+ >+static void >+fpusave(addr) >+ union savefpu *addr; >+{ >+#ifdef CPU_ENABLE_SSE >+ static struct savexmm svxmm __attribute__((aligned(16))); >+ >+ if (cpu_fxsr) { >+ fxsave(&svxmm); >+ bcopy(&svxmm, addr, sizeof(struct savexmm)); >+ return; >+ } >+#endif /* CPU_ENABLE_SSE */ >+ fnsave(addr); >+} >+ >+static void >+fpurstor(addr) >+ union savefpu *addr; >+{ >+#ifdef CPU_ENABLE_SSE >+ static struct savexmm svxmm __attribute__((aligned(16))); >+ >+ if (cpu_fxsr) { >+ bcopy(addr, &svxmm, sizeof (struct savexmm)); >+ fxrstor(&svxmm); >+ return; >+ } >+#endif /* CPU_ENABLE_SSE */ >+ frstor(addr); > } > > #ifdef I586_CPU > static long > timezero(funcname, func) > const char *funcname; > void (*func) __P((void *buf, size_t len)); > > {
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 24219
: 12573