Attachment #199834 for bug #233790

View | Details | Raw Unified | Return to bug 233790
Collapse All | Expand All

Line 0 Link Here

(-)files/patch-configure.ac (+11 lines)
	1	--- configure.ac.orig 2018-12-04 23:39:19 UTC
	2	+++ configure.ac
	3	@@ -195,7 +195,7 @@ else
	4	src/dotprod/src/dotprod_rrrf.av.o \
	5	src/dotprod/src/dotprod_crcf.av.o \
	6	src/dotprod/src/sumsq.o"
	7	- ARCH_OPTION="-fno-common -faltivec";;
	8	+ ARCH_OPTION="-fno-common -maltivec";;
	9	armv1\|armv2\|armv3\|armv4\|armv5\|armv6)
	10	# assume neon instructions are NOT available
	11	MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.o \




--- src/dotprod/src/dotprod_crcf.av.c.orig	2018-12-04 23:40:11 UTC
+++ src/dotprod/src/dotprod_crcf.av.c
@@ -26,6 +26,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <altivec.h>
 
 #include "liquid.internal.h"
 
@@ -168,8 +169,8 @@ void dotprod_crcf_execute(dotprod_crcf    _q,
     union { vector float v; float w[4];} s;
     unsigned int nblocks;
 
-    ar = (vector float*)( (int)_x & ~15);
-    al = ((int)_x & 15)/sizeof(float);
+    ar = (vector float*)( (uintptr_t)_x & ~15);
+    al = ((uintptr_t)_x & 15)/sizeof(float);
 
     d = (vector float*)_q->h[al];
 
@@ -179,7 +180,7 @@ void dotprod_crcf_execute(dotprod_crcf    _q,
     // split into four vectors each with four 32-bit
     // partial sums.  Effectively each loop iteration
     // operates on 16 input samples at a time.
-    s0 = s1 = s2 = s3 = (vector float)(0);
+    s0 = s1 = s2 = s3 = (vector float){0,0,0,0};
     while (nblocks >= 4) {
         s0 = vec_madd(ar[nblocks-1],d[nblocks-1],s0);
         s1 = vec_madd(ar[nblocks-2],d[nblocks-2],s1);
@@ -200,7 +201,7 @@ void dotprod_crcf_execute(dotprod_crcf    _q,
     // move the result into the union s (effetively,
     // this loads the four 32-bit values in s0 into
     // the array w).
-    s.v = vec_add(s0,(vector float)(0));
+    s.v = vec_add(s0,(vector float){0,0,0,0});
 
     // sum the resulting array
     //*_r = s.w[0] + s.w[1] + s.w[2] + s.w[3];




--- src/dotprod/src/dotprod_rrrf.av.c.orig	2018-12-04 23:41:32 UTC
+++ src/dotprod/src/dotprod_rrrf.av.c
@@ -26,6 +26,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <altivec.h>
 
 #include "liquid.internal.h"
 
@@ -163,8 +164,8 @@ void dotprod_rrrf_execute(dotprod_rrrf _q,
     union { vector float v; float w[4];} s;
     unsigned int nblocks;
 
-    ar = (vector float*)( (int)_x & ~15);
-    al = ((int)_x & 15)/sizeof(float);
+    ar = (vector float*)( (uintptr_t)_x & ~15);
+    al = ((uintptr_t)_x & 15)/sizeof(float);
 
     d = (vector float*)_q->h[al];
 
@@ -173,7 +174,7 @@ void dotprod_rrrf_execute(dotprod_rrrf _q,
     // split into four vectors each with four 32-bit
     // partial sums.  Effectively each loop iteration
     // operates on 16 input samples at a time.
-    s0 = s1 = s2 = s3 = (vector float)(0);
+    s0 = s1 = s2 = s3 = (vector float){0,0,0,0};
     while (nblocks >= 4) {
         s0 = vec_madd(ar[nblocks-1],d[nblocks-1],s0);
         s1 = vec_madd(ar[nblocks-2],d[nblocks-2],s1);
@@ -194,7 +195,7 @@ void dotprod_rrrf_execute(dotprod_rrrf _q,
     // move the result into the union s (effetively,
     // this loads the four 32-bit values in s0 into
     // the array w).
-    s.v = vec_add(s0,(vector float)(0));
+    s.v = vec_add(s0,(vector float){0,0,0,0});
 
     // sum the resulting array
     *_r = s.w[0] + s.w[1] + s.w[2] + s.w[3];

Return to bug 233790

Line 0 Link Here

(-)files/patch-src_dotprod_src_dotprod__crcf.av.c (+39 lines)
	1	--- src/dotprod/src/dotprod_crcf.av.c.orig 2018-12-04 23:40:11 UTC
	2	+++ src/dotprod/src/dotprod_crcf.av.c
	3	@@ -26,6 +26,7 @@
	4
	5	#include <stdio.h>
	6	#include <stdlib.h>
	7	+#include <altivec.h>
	8
	9	#include "liquid.internal.h"
	10
	11	@@ -168,8 +169,8 @@ void dotprod_crcf_execute(dotprod_crcf _q,
	12	union { vector float v; float w[4];} s;
	13	unsigned int nblocks;
	14
	15	- ar = (vector float*)( (int)_x & ~15);
	16	- al = ((int)_x & 15)/sizeof(float);
	17	+ ar = (vector float*)( (uintptr_t)_x & ~15);
	18	+ al = ((uintptr_t)_x & 15)/sizeof(float);
	19
	20	d = (vector float*)_q->h[al];
	21
	22	@@ -179,7 +180,7 @@ void dotprod_crcf_execute(dotprod_crcf _q,
	23	// split into four vectors each with four 32-bit
	24	// partial sums. Effectively each loop iteration
	25	// operates on 16 input samples at a time.
	26	- s0 = s1 = s2 = s3 = (vector float)(0);
	27	+ s0 = s1 = s2 = s3 = (vector float){0,0,0,0};
	28	while (nblocks >= 4) {
	29	s0 = vec_madd(ar[nblocks-1],d[nblocks-1],s0);
	30	s1 = vec_madd(ar[nblocks-2],d[nblocks-2],s1);
	31	@@ -200,7 +201,7 @@ void dotprod_crcf_execute(dotprod_crcf _q,
	32	// move the result into the union s (effetively,
	33	// this loads the four 32-bit values in s0 into
	34	// the array w).
	35	- s.v = vec_add(s0,(vector float)(0));
	36	+ s.v = vec_add(s0,(vector float){0,0,0,0});
	37
	38	// sum the resulting array
	39	//*_r = s.w[0] + s.w[1] + s.w[2] + s.w[3];

Line 0 Link Here

(-)files/patch-src_dotprod_src_dotprod__rrrf.av.c (+39 lines)
	1	--- src/dotprod/src/dotprod_rrrf.av.c.orig 2018-12-04 23:41:32 UTC
	2	+++ src/dotprod/src/dotprod_rrrf.av.c
	3	@@ -26,6 +26,7 @@
	4
	5	#include <stdio.h>
	6	#include <stdlib.h>
	7	+#include <altivec.h>
	8
	9	#include "liquid.internal.h"
	10
	11	@@ -163,8 +164,8 @@ void dotprod_rrrf_execute(dotprod_rrrf _q,
	12	union { vector float v; float w[4];} s;
	13	unsigned int nblocks;
	14
	15	- ar = (vector float*)( (int)_x & ~15);
	16	- al = ((int)_x & 15)/sizeof(float);
	17	+ ar = (vector float*)( (uintptr_t)_x & ~15);
	18	+ al = ((uintptr_t)_x & 15)/sizeof(float);
	19
	20	d = (vector float*)_q->h[al];
	21
	22	@@ -173,7 +174,7 @@ void dotprod_rrrf_execute(dotprod_rrrf _q,
	23	// split into four vectors each with four 32-bit
	24	// partial sums. Effectively each loop iteration
	25	// operates on 16 input samples at a time.
	26	- s0 = s1 = s2 = s3 = (vector float)(0);
	27	+ s0 = s1 = s2 = s3 = (vector float){0,0,0,0};
	28	while (nblocks >= 4) {
	29	s0 = vec_madd(ar[nblocks-1],d[nblocks-1],s0);
	30	s1 = vec_madd(ar[nblocks-2],d[nblocks-2],s1);
	31	@@ -194,7 +195,7 @@ void dotprod_rrrf_execute(dotprod_rrrf _q,
	32	// move the result into the union s (effetively,
	33	// this loads the four 32-bit values in s0 into
	34	// the array w).
	35	- s.v = vec_add(s0,(vector float)(0));
	36	+ s.v = vec_add(s0,(vector float){0,0,0,0});
	37
	38	// sum the resulting array
	39	*_r = s.w[0] + s.w[1] + s.w[2] + s.w[3];