Line 0
Link Here
|
|
|
1 |
--- filter/yuvdenoise/motion.c.orig 2016-09-18 16:54:49 UTC |
2 |
+++ filter/yuvdenoise/motion.c |
3 |
@@ -350,32 +350,32 @@ calc_SAD_half_mmx (uint8_t * ref, uint8_ |
4 |
|
5 |
__asm__ __volatile__ |
6 |
( |
7 |
- " pxor %%mm0 , %%mm0; /* clear mm0 */" |
8 |
- " pcmpeqw %%mm6 , %%mm6; /* Build 7f7f7f7f7f7f7f in a register */" |
9 |
- " psrlw $9 , %%mm6; /* */" |
10 |
- " packuswb %%mm6 , %%mm6; /* */" |
11 |
- " ; /* */" |
12 |
- " .rept 8 ; /* */" |
13 |
- " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
14 |
- " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
15 |
- " movq (%%eax), %%mm3; /* reference to mm3 */" |
16 |
- " psrlq $1 , %%mm1; /* average source pixels */" |
17 |
- " psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
18 |
- " pand %%mm6 , %%mm1; /* kill downshifted bits */" |
19 |
- " pand %%mm6 , %%mm2; /* kill downshifted bits */" |
20 |
- " paddusw %%mm2 , %%mm1; /* add up ... */" |
21 |
- |
22 |
- " movq %%mm3 , %%mm4; /* copy reference to mm4 */" |
23 |
- " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */" |
24 |
- " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */" |
25 |
- " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
26 |
- " paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
27 |
- " addl %%ecx , %%esi; /* add framewidth to frameaddress */" |
28 |
- " addl %%ecx , %%edi; /* add framewidth to frameaddress */" |
29 |
- " addl %%ecx , %%ecx; /* add framewidth to frameaddress */" |
30 |
- " .endr ; /* */" |
31 |
- " /* */" |
32 |
- " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */" |
33 |
+ " pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
34 |
+ " pcmpeqw %%mm6 , %%mm6; /* Build 7f7f7f7f7f7f7f in a register */\n" |
35 |
+ " psrlw $9 , %%mm6; /* */\n" |
36 |
+ " packuswb %%mm6 , %%mm6; /* */\n" |
37 |
+ " ; /* */\n" |
38 |
+ " .rept 8 ; /* */\n" |
39 |
+ " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
40 |
+ " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
41 |
+ " movq (%%eax), %%mm3; /* reference to mm3 */\n" |
42 |
+ " psrlq $1 , %%mm1; /* average source pixels */\n" |
43 |
+ " psrlq $1 , %%mm2; /* shift right by one (divide by two) */\n" |
44 |
+ " pand %%mm6 , %%mm1; /* kill downshifted bits */\n" |
45 |
+ " pand %%mm6 , %%mm2; /* kill downshifted bits */\n" |
46 |
+ " paddusw %%mm2 , %%mm1; /* add up ... */\n" |
47 |
+ " /* */\n" |
48 |
+ " movq %%mm3 , %%mm4; /* copy reference to mm4 */\n" |
49 |
+ " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
50 |
+ " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
51 |
+ " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */\n" |
52 |
+ " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
53 |
+ " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
54 |
+ " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
55 |
+ " addl %%ecx , %%ecx; /* add framewidth to frameaddress */\n" |
56 |
+ " .endr ; /* */\n" |
57 |
+ " /* */\n" |
58 |
+ " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
59 |
:"=g" (a) |
60 |
:"S" (frm1),"D" (frm2), "a" (ref), "c" (denoiser.frame.w) |
61 |
); |