Added
Link Here
|
0 |
- |
1 |
--- gosthash.c.orig 2022-12-13 13:51:25 UTC |
|
|
2 |
+++ gosthash.c |
3 |
@@ -26,9 +26,8 @@ echo -n "8JaanTcVv6ndF8Xp/N011Lp46e68LjaUT9FhnEyQGs8=" |
4 |
#if defined(__sun__) || defined(__linux__) || defined(__FreeBSD__) |
5 |
#define _aligned_malloc(size, align) memalign(align, size) |
6 |
#define _aligned_free(ptr) free(ptr) |
7 |
-#else |
8 |
-#include <malloc.h> |
9 |
#endif // __sun__ __linux__ __FreeBSD__ |
10 |
+#include <malloc.h> |
11 |
|
12 |
typedef char v16qi __attribute__((__vector_size__(16))); |
13 |
//typedef uint8_t v4qi __attribute__((__vector_size__(4))); |
14 |
@@ -141,7 +140,53 @@ static inline void XOR(v256 *x, const v256 *a) |
15 |
x->q[1] ^= a->q[1]; |
16 |
} |
17 |
#if defined(__arm__) |
18 |
-extern void UADD(v256 *a, v256 *b); |
19 |
+static inline |
20 |
+void UADD(v256 *a, v256 *b) |
21 |
+{ |
22 |
+ __asm volatile ( |
23 |
+ " ldmia %0, {r0, r1, r2, r3}\n" |
24 |
+ " ldmia %1!, {r4, r5, r6, r7}\n" |
25 |
+ " adds r0, r0, r4\n" |
26 |
+ " adcs r1, r1, r5\n" |
27 |
+ " adcs r2, r2, r6\n" |
28 |
+ " adcs r3, r3, r7\n" |
29 |
+ " stmia %0!, {r0, r1, r2, r3}\n" |
30 |
+ |
31 |
+ " ldmia %0, {r0, r1, r2, r3}\n" |
32 |
+ " ldmia %1!, {r4, r5, r6, r7}\n" |
33 |
+ " adcs r0, r0, r4\n" |
34 |
+ " adcs r1, r1, r5\n" |
35 |
+ " adcs r2, r2, r6\n" |
36 |
+ " adcs r3, r3, r7\n" |
37 |
+ " stmia %0!, {r0, r1, r2, r3}\n" |
38 |
+ |
39 |
+ " sub %0, %0, #64\n" |
40 |
+ " sub %1, %1, #64\n" |
41 |
+ |
42 |
+ :: "r"(a), "r"(b) : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "memory"); |
43 |
+} |
44 |
+#elif defined(__aarch64__) |
45 |
+static inline |
46 |
+void UADD(v256 *a, v256 *b) |
47 |
+{ |
48 |
+ uint64_t a0, a1, b0, b1; |
49 |
+ |
50 |
+ __asm volatile ( |
51 |
+ " ldp %0, %1, [%4, #0]\n" |
52 |
+ " ldp %2, %3, [%5, #0]\n" |
53 |
+ " adds %0, %0, %2\n" |
54 |
+ " adcs %1, %1, %3\n" |
55 |
+ " stp %0, %1, [%4, #0]\n" |
56 |
+ |
57 |
+ " ldp %0, %1, [%4, #16]\n" |
58 |
+ " ldp %2, %3, [%5, #16]\n" |
59 |
+ " adcs %0, %0, %2\n" |
60 |
+ " adcs %1, %1, %3\n" |
61 |
+ " stp %0, %1, [%4, #16]\n" |
62 |
+ |
63 |
+ : "=r"(a0), "=r"(a1), "=r"(b0), "=r"(b1) |
64 |
+ : "r"(a), "r"(b) : "memory"); |
65 |
+} |
66 |
|
67 |
#elif defined(__x86_64__)//0 |
68 |
static inline |