diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c index 4a068b7f61e..10c91f9c0b9 100644 --- a/sys/amd64/ia32/ia32_reg.c +++ b/sys/amd64/ia32/ia32_reg.c @@ -146,7 +146,9 @@ fill_fpregs32(struct thread *td, struct fpreg32 *regs) struct save87 *sv_87; struct env87 *penv_87; struct envxmm *penv_xmm; - int i; + int i, st; + uint16_t tw; + uint8_t ab_tw; bzero(regs, sizeof(*regs)); sv_87 = (struct save87 *)regs; @@ -173,12 +175,30 @@ fill_fpregs32(struct thread *td, struct fpreg32 *regs) penv_87->en_fos = td->td_frame->tf_ds; /* FPU registers and tags */ - penv_87->en_tw = 0xffff; - for (i = 0; i < 8; ++i) { - sv_87->sv_ac[i] = sv_fpu->sv_fp[i].fp_acc; - if ((penv_xmm->en_tw & (1 << i)) != 0) - penv_87->en_tw &= ~(3 << i * 2); + /* For ST(i), i = fpu_reg - top, we start with fpu_reg=7. */ + st = 7 - ((penv_xmm->en_sw >> 11) & 7); + ab_tw = penv_xmm->en_tw; + tw = 0; + for (i = 0x80; i != 0; i >>= 1) { + sv_87->sv_ac[st] = sv_fpu->sv_fp[st].fp_acc; + tw <<= 2; + if (ab_tw & i) { + /* Non-empty - we need to check ST(i) */ + struct fpacc87* fx_reg = &sv_fpu->sv_fp[st].fp_acc; + uint16_t exp = *((uint16_t*)&fx_reg->fp_bytes[8]) & 0x7fff; + uint64_t mantissa = *((uint64_t*)fx_reg->fp_bytes); + if (exp == 0) { + if (mantissa == 0) + tw |= 1; /* Zero */ + else + tw |= 2; /* Denormal */ + } else if (exp == 0x7fff) + tw |= 2; /* Infinity or NaN */ + } else + tw |= 3; /* Empty */ + st = (st - 1) & 7; } + penv_87->en_tw = tw; return (0); } diff --git a/sys/i386/i386/npx.c b/sys/i386/i386/npx.c index d5ce4fe5513..285b2bf58d1 100644 --- a/sys/i386/i386/npx.c +++ b/sys/i386/i386/npx.c @@ -1149,7 +1149,9 @@ npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87) { struct env87 *penv_87; struct envxmm *penv_xmm; - int i; + int i, st; + uint8_t ab_tw; + uint16_t tw; penv_87 = &sv_87->sv_env; penv_xmm = &sv_xmm->sv_env; @@ -1164,13 +1166,31 @@ npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87) penv_87->en_fos = penv_xmm->en_fos; /* FPU registers and tags */ - penv_87->en_tw = 0xffff; - for (i = 0; i < 8; ++i) { - sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; - if ((penv_xmm->en_tw & (1 << i)) != 0) - /* zero and special are set as valid */ - penv_87->en_tw &= ~(3 << i * 2); + /* For ST(i), i = fpu_reg - top, we start with fpu_reg=7. */ + st = 7 - ((penv_xmm->en_sw >> 11) & 7); + ab_tw = penv_xmm->en_tw; + tw = 0; + for (i = 0x80; i != 0; i >>= 1) { + sv_87->sv_ac[st] = sv_xmm->sv_fp[st].fp_acc; + tw <<= 2; + if (ab_tw & i) { + /* Non-empty - we need to check ST(i) */ + struct fpacc87* fx_reg = &sv_xmm->sv_fp[st].fp_acc; + uint16_t exp = *((uint16_t*)fx_reg) & 0x7fff; + uint16_t exp = *((uint16_t*)&fx_reg->fp_bytes[8]) & 0x7fff; + uint64_t mantissa = *((uint64_t*)fx_reg->fp_bytes); + if (exp == 0) { + if (mantissa == 0) + tw |= 1; /* Zero */ + else + tw |= 2; /* Denormal */ + } else if (exp == 0x7fff) + tw |= 2; /* Infinity or NaN */ + } else + tw |= 3; /* Empty */ + st = (st - 1) & 7; } + penv_87->en_tw = tw; } void