Added
Link Here
|
1 |
--- libs/ardour/x86_functions_avx512f.cc.orig 2023-02-21 01:05:04 UTC |
2 |
+++ libs/ardour/x86_functions_avx512f.cc |
3 |
@@ -83,7 +83,7 @@ x86_avx512f_compute_peak(const float *src, uint32_t nf |
4 |
} |
5 |
|
6 |
while (frames >= 256) { |
7 |
- _mm_prefetch(reinterpret_cast<void const *>(src + 256), _mm_hint(0)); |
8 |
+ _mm_prefetch(reinterpret_cast<void const *>(src + 256), _MM_HINT_NTA); |
9 |
|
10 |
__m512 x0 = _mm512_load_ps(src + 0); |
11 |
__m512 x1 = _mm512_load_ps(src + 16); |
12 |
@@ -142,7 +142,7 @@ x86_avx512f_compute_peak(const float *src, uint32_t nf |
13 |
} |
14 |
|
15 |
while (frames >= 128) { |
16 |
- _mm_prefetch(reinterpret_cast<void const *>(src + 128), _mm_hint(0)); |
17 |
+ _mm_prefetch(reinterpret_cast<void const *>(src + 128), _MM_HINT_NTA); |
18 |
|
19 |
__m512 x0 = _mm512_load_ps(src + 0); |
20 |
__m512 x1 = _mm512_load_ps(src + 16); |
21 |
@@ -176,7 +176,7 @@ x86_avx512f_compute_peak(const float *src, uint32_t nf |
22 |
} |
23 |
|
24 |
while (frames >= 64) { |
25 |
- _mm_prefetch(reinterpret_cast<void const *>(src + 64), _mm_hint(0)); |
26 |
+ _mm_prefetch(reinterpret_cast<void const *>(src + 64), _MM_HINT_NTA); |
27 |
|
28 |
__m512 x0 = _mm512_load_ps(src + 0); |
29 |
__m512 x1 = _mm512_load_ps(src + 16); |
30 |
@@ -310,7 +310,7 @@ x86_avx512f_find_peaks(const float *src, uint32_t nfra |
31 |
} |
32 |
|
33 |
while (frames >= 256) { |
34 |
- _mm_prefetch(reinterpret_cast<void const *>(src + 256), _mm_hint(0)); |
35 |
+ _mm_prefetch(reinterpret_cast<void const *>(src + 256), _MM_HINT_NTA); |
36 |
|
37 |
__m512 x0 = _mm512_load_ps(src + 0); |
38 |
__m512 x1 = _mm512_load_ps(src + 16); |
39 |
@@ -371,7 +371,7 @@ x86_avx512f_find_peaks(const float *src, uint32_t nfra |
40 |
} |
41 |
|
42 |
while (frames >= 128) { |
43 |
- _mm_prefetch(reinterpret_cast<void const *>(src + 128), _mm_hint(0)); |
44 |
+ _mm_prefetch(reinterpret_cast<void const *>(src + 128), _MM_HINT_NTA); |
45 |
|
46 |
__m512 x0 = _mm512_load_ps(src + 0); |
47 |
__m512 x1 = _mm512_load_ps(src + 16); |
48 |
@@ -405,7 +405,7 @@ x86_avx512f_find_peaks(const float *src, uint32_t nfra |
49 |
} |
50 |
|
51 |
while (frames >= 64) { |
52 |
- _mm_prefetch(reinterpret_cast<void const *>(src + 64), _mm_hint(0)); |
53 |
+ _mm_prefetch(reinterpret_cast<void const *>(src + 64), _MM_HINT_NTA); |
54 |
|
55 |
__m512 x0 = _mm512_load_ps(src + 0); |
56 |
__m512 x1 = _mm512_load_ps(src + 16); |
57 |
@@ -532,7 +532,7 @@ x86_avx512f_apply_gain_to_buffer(float *dst, uint32_t |
58 |
|
59 |
// Process the remaining samples 128 at a time |
60 |
while (frames >= 128) { |
61 |
- _mm_prefetch(reinterpret_cast<void const *>(dst + 128), _mm_hint(0)); |
62 |
+ _mm_prefetch(reinterpret_cast<void const *>(dst + 128), _MM_HINT_NTA); |
63 |
|
64 |
__m512 x0 = _mm512_load_ps(dst + 0); |
65 |
__m512 x1 = _mm512_load_ps(dst + 16); |
66 |
@@ -679,8 +679,8 @@ x86_avx512f_mix_buffers_with_gain(float *dst, const fl |
67 |
|
68 |
// Process the remaining samples 128 at a time |
69 |
while (frames >= 128) { |
70 |
- _mm_prefetch(reinterpret_cast<void const *>(src + 128), _mm_hint(0)); |
71 |
- _mm_prefetch(reinterpret_cast<void const *>(dst + 128), _mm_hint(0)); |
72 |
+ _mm_prefetch(reinterpret_cast<void const *>(src + 128), _MM_HINT_NTA); |
73 |
+ _mm_prefetch(reinterpret_cast<void const *>(dst + 128), _MM_HINT_NTA); |
74 |
|
75 |
__m512 x0 = _mm512_load_ps(src + 0); |
76 |
__m512 x1 = _mm512_load_ps(src + 16); |
77 |
@@ -836,8 +836,8 @@ x86_avx512f_mix_buffers_no_gain(float *dst, const floa |
78 |
|
79 |
// Process the remaining samples 128 at a time |
80 |
while (frames >= 128) { |
81 |
- _mm_prefetch(reinterpret_cast<void const *>(src + 128), _mm_hint(0)); |
82 |
- _mm_prefetch(reinterpret_cast<void const *>(dst + 128), _mm_hint(0)); |
83 |
+ _mm_prefetch(reinterpret_cast<void const *>(src + 128), _MM_HINT_NTA); |
84 |
+ _mm_prefetch(reinterpret_cast<void const *>(dst + 128), _MM_HINT_NTA); |
85 |
|
86 |
__m512 x0 = _mm512_load_ps(src + 0); |
87 |
__m512 x1 = _mm512_load_ps(src + 16); |
88 |
@@ -987,8 +987,8 @@ x86_avx512f_copy_vector(float *dst, const float *src, |
89 |
|
90 |
// Process 256 samples at a time |
91 |
while (frames >= 256) { |
92 |
- _mm_prefetch(reinterpret_cast<void const *>(src + 256), _mm_hint(0)); |
93 |
- _mm_prefetch(reinterpret_cast<void const *>(dst + 256), _mm_hint(0)); |
94 |
+ _mm_prefetch(reinterpret_cast<void const *>(src + 256), _MM_HINT_NTA); |
95 |
+ _mm_prefetch(reinterpret_cast<void const *>(dst + 256), _MM_HINT_NTA); |
96 |
|
97 |
__m512 x0 = _mm512_load_ps(src + 0); |
98 |
__m512 x1 = _mm512_load_ps(src + 16); |
99 |
@@ -1033,8 +1033,8 @@ x86_avx512f_copy_vector(float *dst, const float *src, |
100 |
|
101 |
// Process remaining samples 64 at a time |
102 |
while (frames >= 64) { |
103 |
- _mm_prefetch(reinterpret_cast<void const *>(src + 64), _mm_hint(0)); |
104 |
- _mm_prefetch(reinterpret_cast<void const *>(dst + 64), _mm_hint(0)); |
105 |
+ _mm_prefetch(reinterpret_cast<void const *>(src + 64), _MM_HINT_NTA); |
106 |
+ _mm_prefetch(reinterpret_cast<void const *>(dst + 64), _MM_HINT_NTA); |
107 |
|
108 |
__m512 x0 = _mm512_load_ps(src + 0); |
109 |
__m512 x1 = _mm512_load_ps(src + 16); |