Lines 1-15
Link Here
|
1 |
--- gwnum/gwnum.c.orig 2016-10-30 14:22:26 UTC |
1 |
--- gwnum/gwnum.c.orig 2016-10-30 14:22:26 UTC |
2 |
+++ gwnum/gwnum.c |
2 |
+++ gwnum/gwnum.c |
3 |
@@ -170,7 +170,7 @@ void *avx_carries_prctab[] = { |
3 |
@@ -865,17 +865,17 @@ |
4 |
/* b or blank (b > 2 or not) */ |
|
|
5 |
/* s4 or blank (SSE4 or not) */ |
6 |
/* k or blank (k for XMM_K_HI is zero or not) */ |
7 |
-/* c1 or cm1 or blank (c=1, c=-1, abs(c)!=1) */ |
8 |
+/* c1 or cm1 or blank (c=1, c=-1, labs(c)!=1) */ |
9 |
/* We also define a macro that will pick the correct entry from the array. */ |
10 |
|
11 |
#define avx_explode(macro) avx_explode1(macro,yr) avx_explode1(macro,yi) |
12 |
@@ -767,17 +767,17 @@ int gwinfo ( /* Return zero-padded fft flag or error |
13 |
log2k = log2 (k); |
4 |
log2k = log2 (k); |
14 |
logbk = logb (k); |
5 |
logbk = logb (k); |
15 |
log2b = log2 (b); |
6 |
log2b = log2 (b); |
Lines 20-46
Link Here
|
20 |
/* First, see what FFT length we would get if we emulate the k*b^n+c modulo */ |
11 |
/* First, see what FFT length we would get if we emulate the k*b^n+c modulo */ |
21 |
-/* with a zero padded FFT. If k is 1 and abs (c) is 1 then we can skip this */ |
12 |
-/* with a zero padded FFT. If k is 1 and abs (c) is 1 then we can skip this */ |
22 |
+/* with a zero padded FFT. If k is 1 and labs (c) is 1 then we can skip this */ |
13 |
+/* with a zero padded FFT. If k is 1 and labs (c) is 1 then we can skip this */ |
23 |
/* loop as we're sure to find an IBDWT that will do the job. */ |
14 |
/* loop as we're sure to find an IBDWT that will do the job. Also skip if called from */ |
|
|
15 |
/* gwmap_fftlen_to_max_exponent (n = 0) or we are QAing IBDWT FFTs (qa_pick_nth_fft >= 1000) */ |
24 |
|
16 |
|
25 |
again: zpad_jmptab = NULL; |
17 |
again: zpad_jmptab = NULL; |
26 |
generic_jmptab = NULL; |
18 |
generic_jmptab = NULL; |
27 |
if (! gwdata->force_general_mod && |
19 |
- if (! gwdata->force_general_mod && (k > 1.0 || (n > 0 && n < 500) || abs (c) > 1) && gwdata->qa_pick_nth_fft < 1000) { |
28 |
- (k > 1.0 || n < 500 || abs (c) > 1) && |
20 |
+ if (! gwdata->force_general_mod && (k > 1.0 || (n > 0 && n < 500) || labs (c) > 1) && gwdata->qa_pick_nth_fft < 1000) { |
29 |
+ (k > 1.0 || n < 500 || labs (c) > 1) && |
|
|
30 |
gwdata->qa_pick_nth_fft < 1000) { |
31 |
|
21 |
|
32 |
/* Use the proper 2^N-1 jmptable */ |
22 |
/* Use the proper 2^N-1 jmptable */ |
33 |
@@ -984,7 +984,7 @@ next1: while (zpad_jmptab->flags & 0x80000000) INC_J |
|
|
34 |
/* the bits per word. An FFT result word cannot be more than 5 times */ |
35 |
/* bits-per-word (bits-per-word are stored in the current word and the */ |
36 |
/* 4 words we propagate carries to). How many bits are in an FFT result */ |
37 |
-/* word? Well, because of balanced representation the abs(input word) is */ |
38 |
+/* word? Well, because of balanced representation the labs(input word) is */ |
39 |
/* (bits_per_word-1) bits long. An FFT result word contains multiplied data */ |
40 |
/* words, that's (bits_per_word-1)*2 bits. Adding up many multiplied data */ |
41 |
/* words adds some bits proportional to the size of the FFT. Experience */ |
42 |
@@ -1789,13 +1789,13 @@ int gwsetup ( |
43 |
|
23 |
|
|
|
24 |
@@ -1915,13 +1915,13 @@ |
25 |
|
44 |
if (c == 0) |
26 |
if (c == 0) |
45 |
gcd = 0; |
27 |
gcd = 0; |
46 |
- else if (k == 1.0 || abs (c) == 1) |
28 |
- else if (k == 1.0 || abs (c) == 1) |
Lines 55-61
Link Here
|
55 |
gcdg (kg, cg); |
37 |
gcdg (kg, cg); |
56 |
gcd = cg->n[0]; |
38 |
gcd = cg->n[0]; |
57 |
} |
39 |
} |
58 |
@@ -1809,7 +1809,7 @@ int gwsetup ( |
40 |
@@ -1935,7 +1935,7 @@ |
59 |
|
41 |
|
60 |
if (gcd == 1 && |
42 |
if (gcd == 1 && |
61 |
k * gwdata->maxmulbyconst <= MAX_ZEROPAD_K && |
43 |
k * gwdata->maxmulbyconst <= MAX_ZEROPAD_K && |
Lines 64-70
Link Here
|
64 |
log2(b) * (double) n >= 350.0 && |
46 |
log2(b) * (double) n >= 350.0 && |
65 |
(b == 2 || (gwdata->cpu_flags & (CPU_AVX | CPU_SSE2))) && |
47 |
(b == 2 || (gwdata->cpu_flags & (CPU_AVX | CPU_SSE2))) && |
66 |
!gwdata->force_general_mod) { |
48 |
!gwdata->force_general_mod) { |
67 |
@@ -2429,12 +2429,12 @@ int internal_gwsetup ( |
49 |
@@ -2555,12 +2555,12 @@ |
68 |
gwdata->NUM_B_PER_SMALL_WORD = (unsigned long) gwdata->avg_num_b_per_word; |
50 |
gwdata->NUM_B_PER_SMALL_WORD = (unsigned long) gwdata->avg_num_b_per_word; |
69 |
|
51 |
|
70 |
/* Set a flag if this is a rational FFT. That is, an FFT where all the */ |
52 |
/* Set a flag if this is a rational FFT. That is, an FFT where all the */ |
Lines 79-85
Link Here
|
79 |
|
61 |
|
80 |
/* Remember the maximum number of bits per word that this FFT length */ |
62 |
/* Remember the maximum number of bits per word that this FFT length */ |
81 |
/* supports. We this in gwnear_fft_limit. Note that zero padded FFTs */ |
63 |
/* supports. We this in gwnear_fft_limit. Note that zero padded FFTs */ |
82 |
@@ -3688,7 +3688,7 @@ int internal_gwsetup ( |
64 |
@@ -3814,7 +3814,7 @@ |
83 |
if (gwdata->ZERO_PADDED_FFT || |
65 |
if (gwdata->ZERO_PADDED_FFT || |
84 |
3.0 * gwdata->NUM_B_PER_SMALL_WORD * log2 (b) > |
66 |
3.0 * gwdata->NUM_B_PER_SMALL_WORD * log2 (b) > |
85 |
2.0 * ((gwdata->NUM_B_PER_SMALL_WORD + 1) * log2 (b) - 1) + |
67 |
2.0 * ((gwdata->NUM_B_PER_SMALL_WORD + 1) * log2 (b) - 1) + |
Lines 88-94
Link Here
|
88 |
asm_data->SPREAD_CARRY_OVER_EXTRA_WORDS = FALSE; |
70 |
asm_data->SPREAD_CARRY_OVER_EXTRA_WORDS = FALSE; |
89 |
else |
71 |
else |
90 |
asm_data->SPREAD_CARRY_OVER_EXTRA_WORDS = TRUE; |
72 |
asm_data->SPREAD_CARRY_OVER_EXTRA_WORDS = TRUE; |
91 |
@@ -6262,7 +6262,7 @@ void gw_as_string ( |
73 |
@@ -6409,7 +6409,7 @@ |
92 |
sprintf (buf, "%.0f", k + c); |
74 |
sprintf (buf, "%.0f", k + c); |
93 |
else if (k != 1.0) |
75 |
else if (k != 1.0) |
94 |
sprintf (buf, "%.0f*%lu^%lu%c%lu", k, b, n, |
76 |
sprintf (buf, "%.0f*%lu^%lu%c%lu", k, b, n, |
Lines 97-103
Link Here
|
97 |
else if (b == 2 && c == -1) |
79 |
else if (b == 2 && c == -1) |
98 |
sprintf (buf, "M%lu", n); |
80 |
sprintf (buf, "M%lu", n); |
99 |
else { |
81 |
else { |
100 |
@@ -6272,7 +6272,7 @@ void gw_as_string ( |
82 |
@@ -6419,7 +6419,7 @@ |
101 |
sprintf (buf, "F%lu", cnt); |
83 |
sprintf (buf, "F%lu", cnt); |
102 |
else |
84 |
else |
103 |
sprintf (buf, "%lu^%lu%c%lu", b, n, |
85 |
sprintf (buf, "%lu^%lu%c%lu", b, n, |
Lines 106-112
Link Here
|
106 |
} |
88 |
} |
107 |
} |
89 |
} |
108 |
|
90 |
|
109 |
@@ -6357,7 +6357,7 @@ double virtual_bits_per_word ( |
91 |
@@ -6504,7 +6504,7 @@ |
110 |
weighted_bits_per_output_word = |
92 |
weighted_bits_per_output_word = |
111 |
2.0 * ((b_per_input_word + 1.0) * log2b - 1.0) + |
93 |
2.0 * ((b_per_input_word + 1.0) * log2b - 1.0) + |
112 |
0.6 * log2 (gwdata->FFTLEN) + |
94 |
0.6 * log2 (gwdata->FFTLEN) + |
Lines 115-121
Link Here
|
115 |
if (gwdata->k == 1.0 && gwdata->n % gwdata->FFTLEN == 0) |
97 |
if (gwdata->k == 1.0 && gwdata->n % gwdata->FFTLEN == 0) |
116 |
weighted_bits_per_output_word -= ((log2b <= 4.0) ? log2b : 1.4 * log2b); |
98 |
weighted_bits_per_output_word -= ((log2b <= 4.0) ? log2b : 1.4 * log2b); |
117 |
else if (num_big_words == 1 && gwdata->k > 1.0) |
99 |
else if (num_big_words == 1 && gwdata->k > 1.0) |
118 |
@@ -6756,7 +6756,7 @@ void gwsetaddin ( |
100 |
@@ -6911,7 +6911,7 @@ |
119 |
{ |
101 |
{ |
120 |
unsigned long word, b_in_word; |
102 |
unsigned long word, b_in_word; |
121 |
|
103 |
|
Lines 124-130
Link Here
|
124 |
|
106 |
|
125 |
/* In a zero-padded FFT, the value is added into ZPAD0 */ |
107 |
/* In a zero-padded FFT, the value is added into ZPAD0 */ |
126 |
|
108 |
|
127 |
@@ -7022,7 +7022,7 @@ void gianttogw ( |
109 |
@@ -7177,7 +7177,7 @@ |
128 |
/* Small numbers can also be optimized for many moduli by zeroing all the */ |
110 |
/* Small numbers can also be optimized for many moduli by zeroing all the */ |
129 |
/* FFT data using memset and then setting only the affected FFT elements. */ |
111 |
/* FFT data using memset and then setting only the affected FFT elements. */ |
130 |
|
112 |
|
Lines 133-148
Link Here
|
133 |
uint32_t low_addin; |
115 |
uint32_t low_addin; |
134 |
int i; |
116 |
int i; |
135 |
|
117 |
|
136 |
@@ -7639,7 +7639,7 @@ void specialmodg ( |
118 |
@@ -7802,7 +7802,7 @@ |
137 |
} |
|
|
138 |
|
139 |
/* Do the quick modulus code twice because in the case where */ |
140 |
-/* abs(c) > k once won't get us close enough. */ |
141 |
+/* labs(c) > k once won't get us close enough. */ |
142 |
|
143 |
neg = FALSE; |
144 |
for (count = 0; count < 2; count++) { |
145 |
@@ -7647,7 +7647,7 @@ void specialmodg ( |
146 |
/* Handle negative input values */ |
119 |
/* Handle negative input values */ |
147 |
|
120 |
|
148 |
neg ^= (g->sign < 0); |
121 |
neg ^= (g->sign < 0); |