FreeBSD Bugzilla – Attachment 15084 Details for
Bug 28070
suport Pentium3 SSE
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
file.diff
file.diff (text/plain), 26.34 KB, created by
kaz
on 2001-06-11 18:00:09 UTC
(
hide
)
Description:
file.diff
Filename:
MIME Type:
Creator:
kaz
Created:
2001-06-11 18:00:09 UTC
Size:
26.34 KB
patch
obsolete
>diff -ruN -9 sys-/conf/options.i386 sys/conf/options.i386 >--- sys-/conf/options.i386 Sat Apr 14 10:11:49 2001 >+++ sys/conf/options.i386 Sun Jun 10 10:28:05 2001 >@@ -53,18 +53,19 @@ > CPU_LOOP_EN opt_cpu.h > CPU_PPRO2CELERON opt_cpu.h > CPU_RSTK_EN opt_cpu.h > CPU_SUSP_HLT opt_cpu.h > CPU_UPGRADE_HW_CACHE opt_cpu.h > CPU_WT_ALLOC opt_cpu.h > CYRIX_CACHE_WORKS opt_cpu.h > CYRIX_CACHE_REALLY_WORKS opt_cpu.h > NO_MEMORY_HOLE opt_cpu.h >+CPU_ENABLE_SSE opt_cpu.h > > # The CPU type affects the endian conversion functions all over the kernel. > I386_CPU opt_global.h > I486_CPU opt_global.h > I586_CPU opt_global.h > I686_CPU opt_global.h > > MAXCONS opt_syscons.h > SC_ALT_MOUSE_IMAGE opt_syscons.h >diff -ruN -9 sys-/i386/conf/NOTES sys/i386/conf/NOTES >--- sys-/i386/conf/NOTES Wed May 30 10:47:57 2001 >+++ sys/i386/conf/NOTES Sun Jun 10 10:29:27 2001 >@@ -181,18 +181,20 @@ > # > # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space > # of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1. > # Otherwise, the NO_LOCK bit of CCR1 is cleared. (NOTE 3) > # > # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables > # reorder). This option should not be used if you use memory mapped > # I/O device(s). > # >+# CPU_ENABLE_SSE enables SSE/MMX2 instructions support. >+# > # CPU_FASTER_5X86_FPU enables faster FPU exception handler. > # > # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products > # for i386 machines. > # > # CPU_IORT defines I/O clock delay time (NOTE 1). Default values of > # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively > # (no clock delay). > # >diff -ruN -9 sys-/i386/i386/genassym.c sys/i386/i386/genassym.c >--- sys-/i386/i386/genassym.c Mon Apr 30 10:17:36 2001 >+++ sys/i386/i386/genassym.c Sun Jun 10 13:36:53 2001 >@@ -120,20 +120,21 @@ > ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); > ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); > ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); > ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); > ASSYM(PCB_DBREGS, PCB_DBREGS); > ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); > > ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare)); > ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); >-ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_savefpu)); >-ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct save87)); >+ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); >+ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu)); >+ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87)); > ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); > > #ifdef SMP > ASSYM(PCB_SIZE, sizeof(struct pcb)); > #endif > > ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno)); > ASSYM(TF_ERR, offsetof(struct trapframe, tf_err)); > ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); >diff -ruN -9 sys-/i386/i386/initcpu.c sys/i386/i386/initcpu.c >--- sys-/i386/i386/initcpu.c Thu Mar 29 10:33:32 2001 >+++ sys/i386/i386/initcpu.c Sun Jun 10 10:29:27 2001 >@@ -28,18 +28,19 @@ > * > * $FreeBSD: src/sys/i386/i386/initcpu.c,v 1.25 2001/03/28 03:06:06 jhb Exp $ > */ > > #include "opt_cpu.h" > > #include <sys/param.h> > #include <sys/kernel.h> > #include <sys/systm.h> >+#include <sys/sysctl.h> > > #include <machine/cputypes.h> > #include <machine/md_var.h> > #include <machine/specialreg.h> > > void initializecpu(void); > #if defined(I586_CPU) && defined(CPU_WT_ALLOC) > void enable_K5_wt_alloc(void); > void enable_K6_wt_alloc(void); >@@ -55,19 +56,27 @@ > static void init_i486_on_386(void); > #endif > static void init_6x86(void); > #endif /* I486_CPU */ > > #ifdef I686_CPU > static void init_6x86MX(void); > static void init_ppro(void); > static void init_mendocino(void); >+#if defined(CPU_ENABLE_SSE) >+void init_sse(void); > #endif >+#endif /* I686_CPU */ >+ >+int hw_instruction_sse = 0; >+SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, >+ &hw_instruction_sse, 0, >+ "SSE/MMX2 instructions available in CPU"); > > #ifdef I486_CPU > /* > * IBM Blue Lightning > */ > static void > init_bluelightning(void) > { > u_long eflags; >@@ -494,19 +503,28 @@ > bbl_cr_ctl3 |= 5 << 1; > #endif > wrmsr(0x11e, bbl_cr_ctl3); > } > > load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); > write_eflags(eflags); > #endif /* CPU_PPRO2CELERON */ > } >- >+#if defined(CPU_ENABLE_SSE) >+void >+init_sse(void) >+{ >+ if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { >+ load_cr4(rcr4() | CR4_FXSR | CR4_XMM); >+ cpu_fxsr = hw_instruction_sse = 1; >+ } >+} >+#endif > #endif /* I686_CPU */ > > void > initializecpu(void) > { > > switch (cpu) { > #ifdef I486_CPU > case CPU_BLUE: >@@ -538,18 +556,21 @@ > if (strcmp(cpu_vendor, "GenuineIntel") == 0) { > switch (cpu_id & 0xff0) { > case 0x610: > init_ppro(); > break; > case 0x660: > init_mendocino(); > break; > } >+#if defined(CPU_ENABLE_SSE) >+ init_sse(); >+#endif > } > break; > #endif > default: > break; > } > > #if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) > /* >diff -ruN -9 sys-/i386/i386/locore.s sys/i386/i386/locore.s >--- sys-/i386/i386/locore.s Thu Mar 15 14:10:06 2001 >+++ sys/i386/i386/locore.s Sun Jun 10 16:50:54 2001 >@@ -107,24 +107,25 @@ > ALIGN_DATA /* just to be sure */ > > .globl HIDENAME(tmpstk) > .space 0x2000 /* space for tmpstk - temporary stack */ > HIDENAME(tmpstk): > > .globl boothowto,bootdev > > .globl cpu,cpu_vendor,cpu_id,bootinfo >- .globl cpu_high, cpu_feature >+ .globl cpu_high, cpu_feature, cpu_fxsr > > cpu: .long 0 /* are we 386, 386sx, or 486 */ > cpu_id: .long 0 /* stepping ID */ > cpu_high: .long 0 /* highest arg to CPUID */ > cpu_feature: .long 0 /* features */ >+cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */ > cpu_vendor: .space 20 /* CPU origin code */ > bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ > > KERNend: .long 0 /* phys addr end of kernel (just after bss) */ > physfree: .long 0 /* phys addr of next free page */ > > #ifdef SMP > .globl cpu0prvpage > cpu0pp: .long 0 /* phys addr cpu0 private pg */ >diff -ruN -9 sys-/i386/i386/machdep.c sys/i386/i386/machdep.c >--- sys-/i386/i386/machdep.c Sun May 20 10:14:25 2001 >+++ sys/i386/i386/machdep.c Sun Jun 10 13:49:40 2001 >@@ -125,18 +125,22 @@ > extern void earlysetcpuclass(void); /* same header file */ > extern void finishidentcpu(void); > extern void panicifcpuunsupported(void); > extern void initializecpu(void); > > #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) > #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) > > static void cpu_startup __P((void *)); >+#ifdef CPU_ENABLE_SSE >+static void set_fpregs_xmm __P((struct save87 *, struct savexmm *)); >+static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *)); >+#endif /* CPU_ENABLE_SSE */ > SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) > > int _udatasel, _ucodesel; > u_int atdevbase; > > #if defined(SWTCH_OPTIM_STATS) > extern int swtch_optim_stats; > SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, > CTLFLAG_RD, &swtch_optim_stats, 0, ""); >@@ -2143,20 +2147,20 @@ > tp = p->p_md.md_regs; > frame_copy = *tp; > *(int *)((char *)&frame_copy + (off - min)) = data; > if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || > !CS_SECURE(frame_copy.tf_cs)) > return (EINVAL); > *(int*)((char *)p->p_addr + off) = data; > return (0); > } >- min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); >- if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { >+ min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save); >+ if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) { > *(int*)((char *)p->p_addr + off) = data; > return (0); > } > return (EFAULT); > } > > int > fill_regs(p, regs) > struct proc *p; >@@ -2212,33 +2216,101 @@ > tp->tf_cs = regs->r_cs; > tp->tf_eflags = regs->r_eflags; > tp->tf_esp = regs->r_esp; > tp->tf_ss = regs->r_ss; > pcb = &p->p_addr->u_pcb; > pcb->pcb_gs = regs->r_gs; > return (0); > } > >+#ifdef CPU_ENABLE_SSE >+static void >+fill_fpregs_xmm(sv_xmm, sv_87) >+ struct savexmm *sv_xmm; >+ struct save87 *sv_87; >+{ >+ register struct env87 *penv_87 = &sv_87->sv_env; >+ register struct envxmm *penv_xmm = &sv_xmm->sv_env; >+ int i; >+ >+ /* FPU control/status */ >+ penv_87->en_cw = penv_xmm->en_cw; >+ penv_87->en_sw = penv_xmm->en_sw; >+ penv_87->en_tw = penv_xmm->en_tw; >+ penv_87->en_fip = penv_xmm->en_fip; >+ penv_87->en_fcs = penv_xmm->en_fcs; >+ penv_87->en_opcode = penv_xmm->en_opcode; >+ penv_87->en_foo = penv_xmm->en_foo; >+ penv_87->en_fos = penv_xmm->en_fos; >+ >+ /* FPU registers */ >+ for (i = 0; i < 8; ++i) >+ sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; >+ >+ sv_87->sv_ex_sw = sv_xmm->sv_ex_sw; >+} >+ >+static void >+set_fpregs_xmm(sv_87, sv_xmm) >+ struct save87 *sv_87; >+ struct savexmm *sv_xmm; >+{ >+ register struct env87 *penv_87 = &sv_87->sv_env; >+ register struct envxmm *penv_xmm = &sv_xmm->sv_env; >+ int i; >+ >+/* FPU control/status */ >+ penv_xmm->en_cw = penv_87->en_cw; >+ penv_xmm->en_sw = penv_87->en_sw; >+ penv_xmm->en_tw = penv_87->en_tw; >+ penv_xmm->en_fip = penv_87->en_fip; >+ penv_xmm->en_fcs = penv_87->en_fcs; >+ penv_xmm->en_opcode = penv_87->en_opcode; >+ penv_xmm->en_foo = penv_87->en_foo; >+ penv_xmm->en_fos = penv_87->en_fos; >+ >+ /* FPU registers */ >+ for (i = 0; i < 8; ++i) >+ sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; >+ >+ sv_xmm->sv_ex_sw = sv_87->sv_ex_sw; >+} >+#endif /* CPU_ENABLE_SSE */ >+ > int > fill_fpregs(p, fpregs) > struct proc *p; > struct fpreg *fpregs; > { >- bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs); >+#ifdef CPU_ENABLE_SSE >+ if (cpu_fxsr) { >+ fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm, >+ (struct save87 *)fpregs); >+ return (0); >+ } >+#endif /* CPU_ENABLE_SSE */ >+ bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs); > return (0); > } > > int > set_fpregs(p, fpregs) > struct proc *p; > struct fpreg *fpregs; > { >- bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs); >+#ifdef CPU_ENABLE_SSE >+ if (cpu_fxsr) { >+ set_fpregs_xmm((struct save87 *)fpregs, >+ &p->p_addr->u_pcb.pcb_save.sv_xmm); >+ return (0); >+ } >+#endif /* CPU_ENABLE_SSE */ >+ bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs); > return (0); > } > > int > fill_dbregs(p, dbregs) > struct proc *p; > struct dbreg *dbregs; > { > struct pcb *pcb; >diff -ruN -9 sys-/i386/i386/mp_machdep.c sys/i386/i386/mp_machdep.c >--- sys-/i386/i386/mp_machdep.c Wed May 16 10:28:28 2001 >+++ sys/i386/i386/mp_machdep.c Sun Jun 10 13:55:00 2001 >@@ -227,18 +227,22 @@ > #define MP_ENABLE_POST 0x14 > #define MPTABLE_PASS2_POST 0x15 > > #define START_ALL_APS_POST 0x16 > #define INSTALL_AP_TRAMP_POST 0x17 > #define START_AP_POST 0x18 > > #define MP_ANNOUNCE_POST 0x19 > >+#if defined(CPU_ENABLE_SSE) >+extern void init_sse(void); >+#endif >+ > /* used to hold the AP's until we are ready to release them */ > static struct mtx ap_boot_mtx; > > /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ > int current_postcode; > > /** XXX FIXME: what system files declare these??? */ > extern struct region_descriptor r_gdt, r_idt; > >@@ -2258,18 +2262,23 @@ > PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid))); > > printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); > > /* set up CPU registers and state */ > cpu_setregs(); > > /* set up FPU state on the AP */ > npxinit(__INITIAL_NPXCW__); >+ >+ /* Setup the SSE */ >+#if defined(CPU_ENABLE_SSE) >+ init_sse(); >+#endif > > /* A quick check from sanity claus */ > apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); > if (PCPU_GET(cpuid) != apic_id) { > printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); > printf("SMP: apic_id = %d\n", apic_id); > printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]); > panic("cpuid mismatch! boom!!"); > } >diff -ruN -9 sys-/i386/i386/support.s sys/i386/i386/support.s >--- sys-/i386/i386/support.s Fri Apr 27 10:13:03 2001 >+++ sys/i386/i386/support.s Sun Jun 10 14:10:57 2001 >@@ -970,19 +970,19 @@ > src in %esi > dst in %edi > len in %ecx XXX changed to on stack for profiling > uses %eax and %edx for tmp. storage > */ > /* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ > ENTRY(fastmove) > pushl %ebp > movl %esp,%ebp >- subl $PCB_SAVEFPU_SIZE+3*4,%esp >+ subl $PCB_SAVE87_SIZE+3*4,%esp > > movl 8(%ebp),%ecx > cmpl $63,%ecx > jbe fastmove_tail > > testl $7,%esi /* check if src addr is multiple of 8 */ > jnz fastmove_tail > > testl $7,%edi /* check if dst addr is multiple of 8 */ >@@ -1012,19 +1012,19 @@ > */ > /* tmp = curpcb->pcb_savefpu; */ > movl %ecx,-12(%ebp) > movl %esi,-8(%ebp) > movl %edi,-4(%ebp) > movl %esp,%edi > movl PCPU(CURPCB),%esi > addl $PCB_SAVEFPU,%esi > cld >- movl $PCB_SAVEFPU_SIZE>>2,%ecx >+ movl $PCB_SAVE87_SIZE>>2,%ecx > rep > movsl > movl -12(%ebp),%ecx > movl -8(%ebp),%esi > movl -4(%ebp),%edi > /* stop_emulating(); */ > clts > /* npxproc = curproc; */ > movl PCPU(CURPROC),%eax >@@ -1096,19 +1096,19 @@ > > /* curpcb->pcb_savefpu = tmp; */ > movl %ecx,-12(%ebp) > movl %esi,-8(%ebp) > movl %edi,-4(%ebp) > movl PCPU(CURPCB),%edi > addl $PCB_SAVEFPU,%edi > movl %esp,%esi > cld >- movl $PCB_SAVEFPU_SIZE>>2,%ecx >+ movl $PCB_SAVE87_SIZE>>2,%ecx > rep > movsl > movl -12(%ebp),%ecx > movl -8(%ebp),%esi > movl -4(%ebp),%edi > > /* start_emulating(); */ > smsw %ax > orb $CR0_TS,%al >@@ -1141,19 +1141,19 @@ > ALIGN_TEXT > fastmove_fault: > /* XXX ungrab FPU context atomically. */ > cli > > movl PCPU(CURPCB),%edi > addl $PCB_SAVEFPU,%edi > movl %esp,%esi > cld >- movl $PCB_SAVEFPU_SIZE>>2,%ecx >+ movl $PCB_SAVE87_SIZE>>2,%ecx > rep > movsl > > smsw %ax > orb $CR0_TS,%al > lmsw %ax > movl $0,PCPU(NPXPROC) > > /* XXX end of atomic FPU context ungrab. */ >diff -ruN -9 sys-/i386/i386/vm_machdep.c sys/i386/i386/vm_machdep.c >--- sys-/i386/i386/vm_machdep.c Thu May 31 10:15:30 2001 >+++ sys/i386/i386/vm_machdep.c Sun Jun 10 14:13:09 2001 >@@ -142,19 +142,19 @@ > return; > } > > /* Ensure that p1's pcb is up to date. */ > #ifdef DEV_NPX > if (p1 == curproc) > p1->p_addr->u_pcb.pcb_gs = rgs(); > savecrit = critical_enter(); > if (PCPU_GET(npxproc) == p1) >- npxsave(&p1->p_addr->u_pcb.pcb_savefpu); >+ npxsave(&p1->p_addr->u_pcb.pcb_save); > critical_exit(savecrit); > #endif > > /* Copy p1's pcb. */ > p2->p_addr->u_pcb = p1->p_addr->u_pcb; > pcb2 = &p2->p_addr->u_pcb; > > /* > * Create a new fresh stack for the new process. >diff -ruN -9 sys-/i386/include/md_var.h sys/i386/include/md_var.h >--- sys-/i386/include/md_var.h Tue Dec 5 09:35:57 2000 >+++ sys/i386/include/md_var.h Sun Jun 10 13:25:24 2001 >@@ -41,18 +41,19 @@ > extern void (*bcopy_vector) __P((const void *from, void *to, size_t len)); > extern int busdma_swi_pending; > extern int (*copyin_vector) __P((const void *udaddr, void *kaddr, > size_t len)); > extern int (*copyout_vector) __P((const void *kaddr, void *udaddr, > size_t len)); > extern u_int cpu_feature; > extern u_int cpu_high; > extern u_int cpu_id; >+extern u_int cpu_fxsr; > extern char cpu_vendor[]; > extern u_int cyrix_did; > extern char kstack[]; > #ifdef PC98 > extern int need_pre_dma_flush; > extern int need_post_dma_flush; > #endif > extern int nfs_diskless_valid; > extern void (*ovbcopy_vector) __P((const void *from, void *to, size_t len)); >diff -ruN -9 sys-/i386/include/npx.h sys/i386/include/npx.h >--- sys-/i386/include/npx.h Wed May 23 10:22:06 2001 >+++ sys/i386/include/npx.h Sun Jun 10 14:49:09 2001 >@@ -79,18 +79,54 @@ > * struct and arrange to store into this struct (ending here) > * before it is inspected for ptracing or for core dumps. Some > * emulators overwrite the whole struct. We have no good way of > * knowing how much padding to leave. Leave just enough for the > * GPL emulator's i387_union (176 bytes total). > */ > u_char sv_pad[64]; /* padding; used by emulators */ > }; > >+struct envxmm { >+ u_int16_t en_cw; /* control word (16bits) */ >+ u_int16_t en_sw; /* status word (16bits) */ >+ u_int16_t en_tw; /* tag word (16bits) */ >+ u_int16_t en_opcode; /* opcode last executed (11 bits ) */ >+ u_int32_t en_fip; /* floating point instruction pointer */ >+ u_int16_t en_fcs; /* floating code segment selector */ >+ u_int16_t en_pad0; /* padding */ >+ u_int32_t en_foo; /* floating operand offset */ >+ u_int16_t en_fos; /* floating operand segment selector */ >+ u_int16_t en_pad1; /* padding */ >+ u_int32_t en_mxcsr; /* SSE contorol/status register */ >+ u_int32_t en_pad2; /* padding */ >+}; >+ >+/* Contents of each SSE extended accumulator */ >+struct xmmacc { >+ u_char xmm_bytes[16]; >+}; >+ >+struct savexmm { >+ struct envxmm sv_env; >+ struct { >+ struct fpacc87 fp_acc; >+ u_char fp_pad[6]; /* padding */ >+ } sv_fp[8]; >+ struct xmmacc sv_xmm[8]; >+ u_long sv_ex_sw; /* status word for last exception */ >+ u_char sv_pad[220]; >+}; >+ >+union savefpu { >+ struct save87 sv_87; >+ struct savexmm sv_xmm; >+}; >+ > /* > * The hardware default control word for i387's and later coprocessors is > * 0x37F, giving: > * > * round to nearest > * 64-bit precision > * all exceptions masked. > * > * We modify the affine mode bit and precision bits in this to give: >@@ -102,14 +138,14 @@ > * because it makes the results of calculations depend on whether > * intermediate values are stored in memory or in FPU registers. > */ > #define __INITIAL_NPXCW__ 0x127F > > #ifdef _KERNEL > int npxdna __P((void)); > void npxexit __P((struct proc *p)); > void npxinit __P((int control)); >-void npxsave __P((struct save87 *addr)); >+void npxsave __P((union savefpu *addr)); > int npxtrap __P((void)); > #endif > > #endif /* !_MACHINE_NPX_H_ */ >diff -ruN -9 sys-/i386/include/pcb.h sys/i386/include/pcb.h >--- sys-/i386/include/pcb.h Wed May 23 10:22:06 2001 >+++ sys/i386/include/pcb.h Sun Jun 10 14:52:25 2001 >@@ -56,19 +56,20 @@ > > int pcb_dr0; > int pcb_dr1; > int pcb_dr2; > int pcb_dr3; > int pcb_dr6; > int pcb_dr7; > > struct pcb_ldt *pcb_ldt; /* per process (user) LDT */ >- struct save87 pcb_savefpu; /* floating point state for 287/387 */ >+ union savefpu pcb_save; >+#define pcb_savefpu pcb_save.sv_87 > u_char pcb_flags; > #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ > #define PCB_DBREGS 0x02 /* process using debug registers */ > #define PCB_NPXTRAP 0x04 /* npx trap pending */ > caddr_t pcb_onfault; /* copyin/out fault recovery */ > int pcb_gs; > struct pcb_ext *pcb_ext; /* optional pcb extension */ > u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */ > }; >diff -ruN -9 sys-/i386/include/specialreg.h sys/i386/include/specialreg.h >--- sys-/i386/include/specialreg.h Sat Sep 11 00:51:44 1999 >+++ sys/i386/include/specialreg.h Sun Jun 10 13:25:24 2001 >@@ -87,18 +87,20 @@ > #define CPUID_MCE 0x0080 > #define CPUID_CX8 0x0100 > #define CPUID_APIC 0x0200 > #define CPUID_B10 0x0400 > #define CPUID_B11 0x0800 > #define CPUID_MTRR 0x1000 > #define CPUID_PGE 0x2000 > #define CPUID_MCA 0x4000 > #define CPUID_CMOV 0x8000 >+#define CPUID_FXSR 0x01000000 >+#define CPUID_XMM 0x02000000 > > /* > * Model-specific registers for the i386 family > */ > #define MSR_P5_MC_ADDR 0x000 > #define MSR_P5_MC_TYPE 0x001 > #define MSR_TSC 0x010 > #define MSR_APICBASE 0x01b > #define MSR_EBL_CR_POWERON 0x02a >diff -ruN -9 sys-/i386/isa/npx.c sys/i386/isa/npx.c >--- sys-/i386/isa/npx.c Wed May 23 10:22:08 2001 >+++ sys/i386/isa/npx.c Mon Jun 11 23:14:23 2001 >@@ -29,18 +29,19 @@ > * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT > * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY > * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF > * SUCH DAMAGE. > * > * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 > * $FreeBSD: src/sys/i386/isa/npx.c,v 1.101 2001/05/22 21:20:49 bde Exp $ > */ > >+#include "opt_cpu.h" > #include "opt_debug_npx.h" > #include "opt_math_emulate.h" > > #include <sys/param.h> > #include <sys/systm.h> > #include <sys/bus.h> > #include <sys/kernel.h> > #include <sys/lock.h> > #include <sys/malloc.h> >@@ -93,46 +94,61 @@ > > #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) > #define fnclex() __asm("fnclex") > #define fninit() __asm("fninit") > #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) > #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) > #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) > #define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop") > #define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) >+#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) >+#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) > #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ > : : "n" (CR0_TS) : "ax") > #define stop_emulating() __asm("clts") > > #else /* not __GNUC__ */ > > void fldcw __P((caddr_t addr)); > void fnclex __P((void)); > void fninit __P((void)); > void fnsave __P((caddr_t addr)); > void fnstcw __P((caddr_t addr)); > void fnstsw __P((caddr_t addr)); > void fp_divide_by_0 __P((void)); > void frstor __P((caddr_t addr)); >+void fxsave __P((caddr_t addr)); >+void fxrstor __P((caddr_t addr)); > void start_emulating __P((void)); > void stop_emulating __P((void)); > > #endif /* __GNUC__ */ > >+#ifdef CPU_ENABLE_SSE >+#define FPU_STATUS_EX(pcb) \ >+ (cpu_fxsr ? \ >+ &(pcb)->pcb_save.sv_xmm.sv_ex_sw : \ >+ &(pcb)->pcb_save.sv_87.sv_ex_sw) >+#else /* CPU_ENABLE_SSE */ >+#define FPU_STATUS_EX(pcb) (&(pcb)->pcb_save.sv_87.sv_ex_sw) >+#endif /* CPU_ENABLE_SSE */ >+ > typedef u_char bool_t; > > static int npx_attach __P((device_t dev)); > static void npx_identify __P((driver_t *driver, device_t parent)); > #ifndef SMP > static void npx_intr __P((void *)); > #endif > static int npx_probe __P((device_t dev)); > static int npx_probe1 __P((device_t dev)); >+static void fpusave __P((union savefpu *)); >+static void fpurstor __P((union savefpu *)); > #ifdef I586_CPU > static long timezero __P((const char *funcname, > void (*func)(void *buf, size_t len))); > #endif /* I586_CPU */ > > int hw_float; /* XXX currently just alias for npx_exists */ > > SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, > CTLFLAG_RD, &hw_float, 0, >@@ -197,18 +213,20 @@ > > child = BUS_ADD_CHILD(parent, 0, "npx", 0); > if (child == NULL) > panic("npx_identify"); > } > > #ifndef SMP > /* > * Do minimal handling of npx interrupts to convert them to traps. >+ * >+ * (2001/06/10) kaz@kobe1995 TODO: FPU STATUS > */ > static void > npx_intr(dummy) > void *dummy; > { > struct proc *p; > > /* > * The BUSY# latch must be cleared in all cases so that the next >@@ -523,57 +541,57 @@ > } > > /* > * Initialize floating point unit. > */ > void > npxinit(control) > u_short control; > { >- struct save87 dummy; >+ union savefpu dummy; > critical_t savecrit; > > if (!npx_exists) > return; > /* > * fninit has the same h/w bugs as fnsave. Use the detoxified > * fnsave to throw away any junk in the fpu. npxsave() initializes > * the fpu and sets npxproc = NULL as important side effects. > */ > savecrit = critical_enter(); > npxsave(&dummy); > stop_emulating(); > fldcw(&control); > if (PCPU_GET(curpcb) != NULL) >- fnsave(&PCPU_GET(curpcb)->pcb_savefpu); >+ fpusave(&PCPU_GET(curpcb)->pcb_save); > start_emulating(); > critical_exit(savecrit); > } > > /* > * Free coprocessor (if we have it). > */ > void > npxexit(p) > struct proc *p; > { > critical_t savecrit; > > savecrit = critical_enter(); > if (p == PCPU_GET(npxproc)) >- npxsave(&PCPU_GET(curpcb)->pcb_savefpu); >+ npxsave(&PCPU_GET(curpcb)->pcb_save); > critical_exit(savecrit); > #ifdef NPX_DEBUG > if (npx_exists) { > u_int masked_exceptions; > >- masked_exceptions = PCPU_GET(curpcb)->pcb_savefpu.sv_env.en_cw >- & PCPU_GET(curpcb)->pcb_savefpu.sv_env.en_sw & 0x7f; >+ masked_exceptions = PCPU_GET(curpcb)->pcb_save.sv_env.en_cw >+ & PCPU_GET(curpcb)->pcb_save.sv_env.en_sw & 0x7f; > /* > * Log exceptions that would have trapped with the old > * control word (overflow, divide by 0, and invalid operand). > */ > if (masked_exceptions & 0x0d) > log(LOG_ERR, > "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", > p->p_pid, p->p_comm, masked_exceptions); > } >@@ -807,47 +825,51 @@ > * Implement device not available (DNA) exception > * > * It would be better to switch FP context here (if curproc != npxproc) > * and not necessarily for every context switch, but it is too hard to > * access foreign pcb's. > */ > int > npxdna() > { >+ /* u_long *pstatus; kaz@kobe1995 */ > critical_t s; > > if (!npx_exists) > return (0); > if (PCPU_GET(npxproc) != NULL) { > printf("npxdna: npxproc = %p, curproc = %p\n", > PCPU_GET(npxproc), curproc); > panic("npxdna"); > } > s = critical_enter(); > stop_emulating(); > /* > * Record new context early in case frstor causes an IRQ13. > */ > PCPU_SET(npxproc, CURPROC); >- PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0; >+ PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0; /* kaz@kobe1995 */ >+ /* pstatus = FPU_STATUS_EX(curpcb); >+ *pstatus = 0; kaz@kobe1995 */ >+ > /* > * The following frstor may cause an IRQ13 when the state being > * restored has a pending error. The error will appear to have been > * triggered by the current (npx) user instruction even when that > * instruction is a no-wait instruction that should not trigger an > * error (e.g., fnclex). On at least one 486 system all of the > * no-wait instructions are broken the same as frstor, so our > * treatment does not amplify the breakage. On at least one > * 386/Cyrix 387 system, fnclex works correctly while frstor and > * fnsave are broken, so our treatment breaks fnclex if it is the > * first FPU instruction after a context switch. > */ >- frstor(&PCPU_GET(curpcb)->pcb_savefpu); >+ fpurstor(&PCPU_GET(curpcb)->pcb_save); > critical_exit(s); > > return (1); > } > > /* > * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx > * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by > * no-wait npx instructions. See the Intel application note AP-578 for >@@ -866,25 +888,57 @@ > * npxsave() atomically with checking npxproc. > * > * A previous version of npxsave() went to great lengths to excecute fnsave > * with interrupts enabled in case executing it froze the CPU. This case > * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply > * spurious freezes. > */ > void > npxsave(addr) >- struct save87 *addr; >+ union savefpu *addr; > { > > stop_emulating(); >- fnsave(addr); >+ fpusave(addr); > start_emulating(); > PCPU_SET(npxproc, NULL); >+} >+ >+static void >+fpusave(addr) >+ union savefpu *addr; >+{ >+#ifdef CPU_ENABLE_SSE >+ static struct savexmm svxmm __attribute__((aligned(16))); >+ >+ if (cpu_fxsr) { >+ fxsave(&svxmm); >+ bcopy(&svxmm, addr, sizeof(struct savexmm)); >+ return; >+ } >+#endif /* CPU_ENABLE_SSE */ >+ fnsave(addr); >+} >+ >+static void >+fpurstor(addr) >+ union savefpu *addr; >+{ >+#ifdef CPU_ENABLE_SSE >+ static struct savexmm svxmm __attribute__((aligned(16))); >+ >+ if (cpu_fxsr) { >+ bcopy(addr, &svxmm, sizeof (struct savexmm)); >+ fxrstor(&svxmm); >+ return; >+ } >+#endif /* CPU_ENABLE_SSE */ >+ frstor(addr); > } > > #ifdef I586_CPU > static long > timezero(funcname, func) > const char *funcname; > void (*func) __P((void *buf, size_t len)); > > { >-- >ÃæÂ¼Ï»֡÷¿À¸Í <mailto:kaz@kobe1995.net> >NAKAMURA Kazushi@KOBE <http://kobe1995.net/>
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 28070
: 15084