7 #ifndef SECP256K1_SCALAR_REPR_IMPL_H
8 #define SECP256K1_SCALAR_REPR_IMPL_H
14 #define SECP256K1_N_0 ((uint64_t)0xBFD25E8CD0364141ULL)
15 #define SECP256K1_N_1 ((uint64_t)0xBAAEDCE6AF48A03BULL)
16 #define SECP256K1_N_2 ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
17 #define SECP256K1_N_3 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
20 #define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
21 #define SECP256K1_N_C_1 (~SECP256K1_N_1)
22 #define SECP256K1_N_C_2 (1)
25 #define SECP256K1_N_H_0 ((uint64_t)0xDFE92F46681B20A0ULL)
26 #define SECP256K1_N_H_1 ((uint64_t)0x5D576E7357A4501DULL)
27 #define SECP256K1_N_H_2 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
28 #define SECP256K1_N_H_3 ((uint64_t)0x7FFFFFFFFFFFFFFFULL)
53 return (a->
d[offset >> 6] >> (offset & 0x3F)) & ((((uint64_t)1) << count) - 1);
59 if ((offset + count - 1) >> 6 == offset >> 6) {
60 return secp256k1_scalar_get_bits(a, offset, count);
63 return ((a->
d[offset >> 6] >> (offset & 0x3F)) | (a->
d[(offset >> 6) + 1] << (64 - (offset & 0x3F)))) & ((((uint64_t)1) << count) - 1);
83 r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
85 r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
87 r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
88 t += (uint64_t)
r->d[3];
89 r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL;
96 r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
98 r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
100 r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
102 r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
103 overflow = t + secp256k1_scalar_check_overflow(
r);
105 secp256k1_scalar_reduce(
r, overflow);
109 static void secp256k1_scalar_cadd_bit(
secp256k1_scalar *
r,
unsigned int bit,
int flag) {
112 bit += ((uint32_t) flag - 1) & 0x100;
113 t = (
uint128_t)
r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
114 r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
115 t += (
uint128_t)
r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
116 r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
117 t += (
uint128_t)
r->d[2] + (((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F));
118 r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
119 t += (
uint128_t)
r->d[3] + (((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F));
120 r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL;
127 static void secp256k1_scalar_set_b32(
secp256k1_scalar *
r,
const unsigned char *b32,
int *overflow) {
129 r->d[0] = (uint64_t)b32[31] | (uint64_t)b32[30] << 8 | (uint64_t)b32[29] << 16 | (uint64_t)b32[28] << 24 | (uint64_t)b32[27] << 32 | (uint64_t)b32[26] << 40 | (uint64_t)b32[25] << 48 | (uint64_t)b32[24] << 56;
130 r->d[1] = (uint64_t)b32[23] | (uint64_t)b32[22] << 8 | (uint64_t)b32[21] << 16 | (uint64_t)b32[20] << 24 | (uint64_t)b32[19] << 32 | (uint64_t)b32[18] << 40 | (uint64_t)b32[17] << 48 | (uint64_t)b32[16] << 56;
131 r->d[2] = (uint64_t)b32[15] | (uint64_t)b32[14] << 8 | (uint64_t)b32[13] << 16 | (uint64_t)b32[12] << 24 | (uint64_t)b32[11] << 32 | (uint64_t)b32[10] << 40 | (uint64_t)b32[9] << 48 | (uint64_t)b32[8] << 56;
132 r->d[3] = (uint64_t)b32[7] | (uint64_t)b32[6] << 8 | (uint64_t)b32[5] << 16 | (uint64_t)b32[4] << 24 | (uint64_t)b32[3] << 32 | (uint64_t)b32[2] << 40 | (uint64_t)b32[1] << 48 | (uint64_t)b32[0] << 56;
133 over = secp256k1_scalar_reduce(
r, secp256k1_scalar_check_overflow(
r));
139 static void secp256k1_scalar_get_b32(
unsigned char *bin,
const secp256k1_scalar* a) {
140 bin[0] = a->
d[3] >> 56; bin[1] = a->
d[3] >> 48; bin[2] = a->
d[3] >> 40; bin[3] = a->
d[3] >> 32; bin[4] = a->
d[3] >> 24; bin[5] = a->
d[3] >> 16; bin[6] = a->
d[3] >> 8; bin[7] = a->
d[3];
141 bin[8] = a->
d[2] >> 56; bin[9] = a->
d[2] >> 48; bin[10] = a->
d[2] >> 40; bin[11] = a->
d[2] >> 32; bin[12] = a->
d[2] >> 24; bin[13] = a->
d[2] >> 16; bin[14] = a->
d[2] >> 8; bin[15] = a->
d[2];
142 bin[16] = a->
d[1] >> 56; bin[17] = a->
d[1] >> 48; bin[18] = a->
d[1] >> 40; bin[19] = a->
d[1] >> 32; bin[20] = a->
d[1] >> 24; bin[21] = a->
d[1] >> 16; bin[22] = a->
d[1] >> 8; bin[23] = a->
d[1];
143 bin[24] = a->
d[0] >> 56; bin[25] = a->
d[0] >> 48; bin[26] = a->
d[0] >> 40; bin[27] = a->
d[0] >> 32; bin[28] = a->
d[0] >> 24; bin[29] = a->
d[0] >> 16; bin[30] = a->
d[0] >> 8; bin[31] = a->
d[0];
147 return (a->
d[0] | a->
d[1] | a->
d[2] | a->
d[3]) == 0;
151 uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (secp256k1_scalar_is_zero(a) == 0);
153 r->d[0] = t & nonzero; t >>= 64;
155 r->d[1] = t & nonzero; t >>= 64;
157 r->d[2] = t & nonzero; t >>= 64;
159 r->d[3] = t & nonzero;
163 return ((a->
d[0] ^ 1) | a->
d[1] | a->
d[2] | a->
d[3]) == 0;
181 uint64_t mask = !flag - 1;
182 uint64_t nonzero = (secp256k1_scalar_is_zero(
r) != 0) - 1;
184 r->d[0] = t & nonzero; t >>= 64;
186 r->d[1] = t & nonzero; t >>= 64;
188 r->d[2] = t & nonzero; t >>= 64;
190 r->d[3] = t & nonzero;
191 return 2 * (mask == 0) - 1;
197 #define muladd(a,b) { \
200 uint128_t t = (uint128_t)a * b; \
205 th += (c0 < tl) ? 1 : 0; \
207 c2 += (c1 < th) ? 1 : 0; \
208 VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
212 #define muladd_fast(a,b) { \
215 uint128_t t = (uint128_t)a * b; \
220 th += (c0 < tl) ? 1 : 0; \
222 VERIFY_CHECK(c1 >= th); \
226 #define muladd2(a,b) { \
227 uint64_t tl, th, th2, tl2; \
229 uint128_t t = (uint128_t)a * b; \
234 c2 += (th2 < th) ? 1 : 0; \
235 VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
237 th2 += (tl2 < tl) ? 1 : 0; \
239 th2 += (c0 < tl2) ? 1 : 0; \
240 c2 += (c0 < tl2) & (th2 == 0); \
241 VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
243 c2 += (c1 < th2) ? 1 : 0; \
244 VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
248 #define sumadd(a) { \
251 over = (c0 < (a)) ? 1 : 0; \
253 c2 += (c1 < over) ? 1 : 0; \
257 #define sumadd_fast(a) { \
259 c1 += (c0 < (a)) ? 1 : 0; \
260 VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
261 VERIFY_CHECK(c2 == 0); \
265 #define extract(n) { \
273 #define extract_fast(n) { \
277 VERIFY_CHECK(c2 == 0); \
280 static void secp256k1_scalar_reduce_512(
secp256k1_scalar *
r,
const uint64_t *l) {
281 #ifdef USE_ASM_X86_64
283 uint64_t m0, m1, m2, m3, m4, m5, m6;
284 uint64_t p0, p1, p2, p3, p4;
287 __asm__ __volatile__(
289 "movq 32(%%rsi), %%r11\n"
290 "movq 40(%%rsi), %%r12\n"
291 "movq 48(%%rsi), %%r13\n"
292 "movq 56(%%rsi), %%r14\n"
294 "movq 0(%%rsi), %%r8\n"
296 "xorq %%r10, %%r10\n"
306 "addq 8(%%rsi), %%r9\n"
312 "adcq %%rdx, %%r10\n"
318 "adcq %%rdx, %%r10\n"
324 "addq 16(%%rsi), %%r10\n"
330 "addq %%rax, %%r10\n"
336 "addq %%rax, %%r10\n"
340 "addq %%r11, %%r10\n"
345 "xorq %%r10, %%r10\n"
347 "addq 24(%%rsi), %%r8\n"
373 "adcq %%rdx, %%r10\n"
382 "addq %%r14, %%r10\n"
388 :
"=g"(m0),
"=g"(m1),
"=g"(m2),
"=g"(m3),
"=g"(m4),
"=g"(m5),
"=g"(m6)
390 :
"rax",
"rdx",
"r8",
"r9",
"r10",
"r11",
"r12",
"r13",
"r14",
"cc");
393 __asm__ __volatile__(
401 "xorq %%r10, %%r10\n"
417 "adcq %%rdx, %%r10\n"
423 "adcq %%rdx, %%r10\n"
435 "addq %%rax, %%r10\n"
441 "addq %%rax, %%r10\n"
445 "addq %%r11, %%r10\n"
467 :
"=&g"(p0),
"=&g"(p1),
"=&g"(p2),
"=g"(p3),
"=g"(p4)
469 :
"rax",
"rdx",
"r8",
"r9",
"r10",
"r11",
"r12",
"r13",
"cc");
472 __asm__ __volatile__(
482 "movq %%rax, 0(%q6)\n"
495 "movq %%r8, 8(%q6)\n"
504 "movq %%r9, 16(%q6)\n"
510 "movq %%r8, 24(%q6)\n"
515 :
"rax",
"rdx",
"r8",
"r9",
"r10",
"cc",
"memory");
519 uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
520 uint64_t m0, m1, m2, m3, m4, m5;
522 uint64_t p0, p1, p2, p3;
527 c0 = l[0]; c1 = 0; c2 = 0;
554 c0 = m0; c1 = 0; c2 = 0;
576 r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
578 r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
580 r->d[2] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
582 r->d[3] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
586 secp256k1_scalar_reduce(
r, c + secp256k1_scalar_check_overflow(
r));
590 #ifdef USE_ASM_X86_64
591 const uint64_t *pb =
b->d;
592 __asm__ __volatile__(
594 "movq 0(%%rdi), %%r15\n"
595 "movq 8(%%rdi), %%rbx\n"
596 "movq 16(%%rdi), %%rcx\n"
597 "movq 0(%%rdx), %%r11\n"
598 "movq 8(%%rdx), %%r12\n"
599 "movq 16(%%rdx), %%r13\n"
600 "movq 24(%%rdx), %%r14\n"
602 "movq %%r15, %%rax\n"
605 "movq %%rax, 0(%%rsi)\n"
609 "xorq %%r10, %%r10\n"
611 "movq %%r15, %%rax\n"
617 "movq %%rbx, %%rax\n"
623 "movq %%r8, 8(%%rsi)\n"
626 "movq %%r15, %%rax\n"
629 "adcq %%rdx, %%r10\n"
632 "movq %%rbx, %%rax\n"
635 "adcq %%rdx, %%r10\n"
638 "movq %%rcx, %%rax\n"
641 "adcq %%rdx, %%r10\n"
644 "movq %%r9, 16(%%rsi)\n"
647 "movq %%r15, %%rax\n"
649 "addq %%rax, %%r10\n"
653 "movq 24(%%rdi), %%r15\n"
655 "movq %%rbx, %%rax\n"
657 "addq %%rax, %%r10\n"
661 "movq %%rcx, %%rax\n"
663 "addq %%rax, %%r10\n"
667 "movq %%r15, %%rax\n"
669 "addq %%rax, %%r10\n"
673 "movq %%r10, 24(%%rsi)\n"
674 "xorq %%r10, %%r10\n"
676 "movq %%rbx, %%rax\n"
682 "movq %%rcx, %%rax\n"
688 "movq %%r15, %%rax\n"
694 "movq %%r8, 32(%%rsi)\n"
697 "movq %%rcx, %%rax\n"
700 "adcq %%rdx, %%r10\n"
703 "movq %%r15, %%rax\n"
706 "adcq %%rdx, %%r10\n"
709 "movq %%r9, 40(%%rsi)\n"
711 "movq %%r15, %%rax\n"
713 "addq %%rax, %%r10\n"
716 "movq %%r10, 48(%%rsi)\n"
718 "movq %%r8, 56(%%rsi)\n"
721 :
"rax",
"rbx",
"rcx",
"r8",
"r9",
"r10",
"r11",
"r12",
"r13",
"r14",
"r15",
"cc",
"memory");
724 uint64_t c0 = 0, c1 = 0;
756 static void secp256k1_scalar_sqr_512(uint64_t l[8],
const secp256k1_scalar *a) {
757 #ifdef USE_ASM_X86_64
758 __asm__ __volatile__(
760 "movq 0(%%rdi), %%r11\n"
761 "movq 8(%%rdi), %%r12\n"
762 "movq 16(%%rdi), %%r13\n"
763 "movq 24(%%rdi), %%r14\n"
765 "movq %%r11, %%rax\n"
768 "movq %%rax, 0(%%rsi)\n"
772 "xorq %%r10, %%r10\n"
774 "movq %%r11, %%rax\n"
783 "movq %%r8, 8(%%rsi)\n"
786 "movq %%r11, %%rax\n"
789 "adcq %%rdx, %%r10\n"
792 "adcq %%rdx, %%r10\n"
795 "movq %%r12, %%rax\n"
798 "adcq %%rdx, %%r10\n"
801 "movq %%r9, 16(%%rsi)\n"
804 "movq %%r11, %%rax\n"
806 "addq %%rax, %%r10\n"
809 "addq %%rax, %%r10\n"
813 "movq %%r12, %%rax\n"
815 "addq %%rax, %%r10\n"
818 "addq %%rax, %%r10\n"
822 "movq %%r10, 24(%%rsi)\n"
823 "xorq %%r10, %%r10\n"
825 "movq %%r12, %%rax\n"
834 "movq %%r13, %%rax\n"
840 "movq %%r8, 32(%%rsi)\n"
843 "movq %%r13, %%rax\n"
846 "adcq %%rdx, %%r10\n"
849 "adcq %%rdx, %%r10\n"
852 "movq %%r9, 40(%%rsi)\n"
854 "movq %%r14, %%rax\n"
856 "addq %%rax, %%r10\n"
859 "movq %%r10, 48(%%rsi)\n"
861 "movq %%r8, 56(%%rsi)\n"
864 :
"rax",
"rdx",
"r8",
"r9",
"r10",
"r11",
"r12",
"r13",
"r14",
"cc",
"memory");
867 uint64_t c0 = 0, c1 = 0;
903 secp256k1_scalar_mul_512(l, a,
b);
904 secp256k1_scalar_reduce_512(
r, l);
911 ret =
r->d[0] & ((1 << n) - 1);
912 r->d[0] = (
r->d[0] >> n) + (
r->d[1] << (64 - n));
913 r->d[1] = (
r->d[1] >> n) + (
r->d[2] << (64 - n));
914 r->d[2] = (
r->d[2] >> n) + (
r->d[3] << (64 - n));
915 r->d[3] = (
r->d[3] >> n);
921 secp256k1_scalar_sqr_512(l, a);
922 secp256k1_scalar_reduce_512(
r, l);
925 #ifdef USE_ENDOMORPHISM
939 return ((a->
d[0] ^
b->d[0]) | (a->
d[1] ^
b->d[1]) | (a->
d[2] ^
b->d[2]) | (a->
d[3] ^
b->d[3])) == 0;
944 unsigned int shiftlimbs;
945 unsigned int shiftlow;
946 unsigned int shifthigh;
948 secp256k1_scalar_mul_512(l, a,
b);
949 shiftlimbs = shift >> 6;
950 shiftlow = shift & 0x3F;
951 shifthigh = 64 - shiftlow;
952 r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
953 r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
954 r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
955 r->d[3] = shift < 320 ? (l[3 + shiftlimbs] >> shiftlow) : 0;
956 secp256k1_scalar_cadd_bit(
r, 0, (l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1);
959 #define ROTL32(x,n) ((x) << (n) | (x) >> (32-(n)))
960 #define QUARTERROUND(a,b,c,d) \
961 a += b; d = ROTL32(d ^ a, 16); \
962 c += d; b = ROTL32(b ^ c, 12); \
963 a += b; d = ROTL32(d ^ a, 8); \
964 c += d; b = ROTL32(b ^ c, 7);
966 #ifdef WORDS_BIGENDIAN
967 #define LE32(p) ((((p) & 0xFF) << 24) | (((p) & 0xFF00) << 8) | (((p) & 0xFF0000) >> 8) | (((p) & 0xFF000000) >> 24))
970 #define BE32(p) ((((p) & 0xFF) << 24) | (((p) & 0xFF00) << 8) | (((p) & 0xFF0000) >> 8) | (((p) & 0xFF000000) >> 24))
976 size_t over_count = 0;
978 uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
981 memcpy((
void *) seed32, (
const void *) seed, 32);
987 x4 =
LE32(seed32[0]);
988 x5 =
LE32(seed32[1]);
989 x6 =
LE32(seed32[2]);
990 x7 =
LE32(seed32[3]);
991 x8 =
LE32(seed32[4]);
992 x9 =
LE32(seed32[5]);
993 x10 =
LE32(seed32[6]);
994 x11 =
LE32(seed32[7]);
1016 x4 +=
LE32(seed32[0]);
1017 x5 +=
LE32(seed32[1]);
1018 x6 +=
LE32(seed32[2]);
1019 x7 +=
LE32(seed32[3]);
1020 x8 +=
LE32(seed32[4]);
1021 x9 +=
LE32(seed32[5]);
1022 x10 +=
LE32(seed32[6]);
1023 x11 +=
LE32(seed32[7]);
1029 r1->
d[3] =
BE32((uint64_t) x0) << 32 |
BE32(x1);
1030 r1->
d[2] =
BE32((uint64_t) x2) << 32 |
BE32(x3);
1031 r1->
d[1] =
BE32((uint64_t) x4) << 32 |
BE32(x5);
1032 r1->
d[0] =
BE32((uint64_t) x6) << 32 |
BE32(x7);
1033 r2->
d[3] =
BE32((uint64_t) x8) << 32 |
BE32(x9);
1034 r2->
d[2] =
BE32((uint64_t) x10) << 32 |
BE32(x11);
1035 r2->
d[1] =
BE32((uint64_t) x12) << 32 |
BE32(x13);
1036 r2->
d[0] =
BE32((uint64_t) x14) << 32 |
BE32(x15);
1038 over1 = secp256k1_scalar_check_overflow(r1);
1039 over2 = secp256k1_scalar_check_overflow(r2);
1041 }
while (over1 | over2);