PRCYCoin  2.0.0.7rc1
P2P Digital Currency
scalar_4x64_impl.h
Go to the documentation of this file.
1 /**********************************************************************
2  * Copyright (c) 2013, 2014 Pieter Wuille *
3  * Distributed under the MIT software license, see the accompanying *
4  * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
5  **********************************************************************/
6 
7 #ifndef SECP256K1_SCALAR_REPR_IMPL_H
8 #define SECP256K1_SCALAR_REPR_IMPL_H
9 
10 #include "scalar.h"
11 #include <string.h>
12 
13 /* Limbs of the secp256k1 order. */
14 #define SECP256K1_N_0 ((uint64_t)0xBFD25E8CD0364141ULL)
15 #define SECP256K1_N_1 ((uint64_t)0xBAAEDCE6AF48A03BULL)
16 #define SECP256K1_N_2 ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
17 #define SECP256K1_N_3 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
18 
19 /* Limbs of 2^256 minus the secp256k1 order. */
20 #define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
21 #define SECP256K1_N_C_1 (~SECP256K1_N_1)
22 #define SECP256K1_N_C_2 (1)
23 
24 /* Limbs of half the secp256k1 order. */
25 #define SECP256K1_N_H_0 ((uint64_t)0xDFE92F46681B20A0ULL)
26 #define SECP256K1_N_H_1 ((uint64_t)0x5D576E7357A4501DULL)
27 #define SECP256K1_N_H_2 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
28 #define SECP256K1_N_H_3 ((uint64_t)0x7FFFFFFFFFFFFFFFULL)
29 
30 SECP256K1_INLINE static void secp256k1_scalar_clear(secp256k1_scalar *r) {
31  r->d[0] = 0;
32  r->d[1] = 0;
33  r->d[2] = 0;
34  r->d[3] = 0;
35 }
36 
37 SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar *r, unsigned int v) {
38  r->d[0] = v;
39  r->d[1] = 0;
40  r->d[2] = 0;
41  r->d[3] = 0;
42 }
43 
44 SECP256K1_INLINE static void secp256k1_scalar_set_u64(secp256k1_scalar *r, uint64_t v) {
45  r->d[0] = v;
46  r->d[1] = 0;
47  r->d[2] = 0;
48  r->d[3] = 0;
49 }
50 
51 SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits(const secp256k1_scalar *a, unsigned int offset, unsigned int count) {
52  VERIFY_CHECK((offset + count - 1) >> 6 == offset >> 6);
53  return (a->d[offset >> 6] >> (offset & 0x3F)) & ((((uint64_t)1) << count) - 1);
54 }
55 
56 SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits_var(const secp256k1_scalar *a, unsigned int offset, unsigned int count) {
57  VERIFY_CHECK(count < 32);
58  VERIFY_CHECK(offset + count <= 256);
59  if ((offset + count - 1) >> 6 == offset >> 6) {
60  return secp256k1_scalar_get_bits(a, offset, count);
61  } else {
62  VERIFY_CHECK((offset >> 6) + 1 < 4);
63  return ((a->d[offset >> 6] >> (offset & 0x3F)) | (a->d[(offset >> 6) + 1] << (64 - (offset & 0x3F)))) & ((((uint64_t)1) << count) - 1);
64  }
65 }
66 
67 SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar *a) {
68  int yes = 0;
69  int no = 0;
70  no |= (a->d[3] < SECP256K1_N_3); /* No need for a > check. */
71  no |= (a->d[2] < SECP256K1_N_2);
72  yes |= (a->d[2] > SECP256K1_N_2) & ~no;
73  no |= (a->d[1] < SECP256K1_N_1);
74  yes |= (a->d[1] > SECP256K1_N_1) & ~no;
75  yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
76  return yes;
77 }
78 
79 SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar *r, unsigned int overflow) {
80  uint128_t t;
81  VERIFY_CHECK(overflow <= 1);
82  t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0;
83  r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
84  t += (uint128_t)r->d[1] + overflow * SECP256K1_N_C_1;
85  r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
86  t += (uint128_t)r->d[2] + overflow * SECP256K1_N_C_2;
87  r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
88  t += (uint64_t)r->d[3];
89  r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL;
90  return overflow;
91 }
92 
93 static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
94  int overflow;
95  uint128_t t = (uint128_t)a->d[0] + b->d[0];
96  r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
97  t += (uint128_t)a->d[1] + b->d[1];
98  r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
99  t += (uint128_t)a->d[2] + b->d[2];
100  r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
101  t += (uint128_t)a->d[3] + b->d[3];
102  r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
103  overflow = t + secp256k1_scalar_check_overflow(r);
104  VERIFY_CHECK(overflow == 0 || overflow == 1);
105  secp256k1_scalar_reduce(r, overflow);
106  return overflow;
107 }
108 
109 static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) {
110  uint128_t t;
111  VERIFY_CHECK(bit < 256);
112  bit += ((uint32_t) flag - 1) & 0x100; /* forcing (bit >> 6) > 3 makes this a noop */
113  t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
114  r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
115  t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
116  r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
117  t += (uint128_t)r->d[2] + (((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F));
118  r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
119  t += (uint128_t)r->d[3] + (((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F));
120  r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL;
121 #ifdef VERIFY
122  VERIFY_CHECK((t >> 64) == 0);
123  VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
124 #endif
125 }
126 
127 static void secp256k1_scalar_set_b32(secp256k1_scalar *r, const unsigned char *b32, int *overflow) {
128  int over;
129  r->d[0] = (uint64_t)b32[31] | (uint64_t)b32[30] << 8 | (uint64_t)b32[29] << 16 | (uint64_t)b32[28] << 24 | (uint64_t)b32[27] << 32 | (uint64_t)b32[26] << 40 | (uint64_t)b32[25] << 48 | (uint64_t)b32[24] << 56;
130  r->d[1] = (uint64_t)b32[23] | (uint64_t)b32[22] << 8 | (uint64_t)b32[21] << 16 | (uint64_t)b32[20] << 24 | (uint64_t)b32[19] << 32 | (uint64_t)b32[18] << 40 | (uint64_t)b32[17] << 48 | (uint64_t)b32[16] << 56;
131  r->d[2] = (uint64_t)b32[15] | (uint64_t)b32[14] << 8 | (uint64_t)b32[13] << 16 | (uint64_t)b32[12] << 24 | (uint64_t)b32[11] << 32 | (uint64_t)b32[10] << 40 | (uint64_t)b32[9] << 48 | (uint64_t)b32[8] << 56;
132  r->d[3] = (uint64_t)b32[7] | (uint64_t)b32[6] << 8 | (uint64_t)b32[5] << 16 | (uint64_t)b32[4] << 24 | (uint64_t)b32[3] << 32 | (uint64_t)b32[2] << 40 | (uint64_t)b32[1] << 48 | (uint64_t)b32[0] << 56;
133  over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
134  if (overflow) {
135  *overflow = over;
136  }
137 }
138 
139 static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar* a) {
140  bin[0] = a->d[3] >> 56; bin[1] = a->d[3] >> 48; bin[2] = a->d[3] >> 40; bin[3] = a->d[3] >> 32; bin[4] = a->d[3] >> 24; bin[5] = a->d[3] >> 16; bin[6] = a->d[3] >> 8; bin[7] = a->d[3];
141  bin[8] = a->d[2] >> 56; bin[9] = a->d[2] >> 48; bin[10] = a->d[2] >> 40; bin[11] = a->d[2] >> 32; bin[12] = a->d[2] >> 24; bin[13] = a->d[2] >> 16; bin[14] = a->d[2] >> 8; bin[15] = a->d[2];
142  bin[16] = a->d[1] >> 56; bin[17] = a->d[1] >> 48; bin[18] = a->d[1] >> 40; bin[19] = a->d[1] >> 32; bin[20] = a->d[1] >> 24; bin[21] = a->d[1] >> 16; bin[22] = a->d[1] >> 8; bin[23] = a->d[1];
143  bin[24] = a->d[0] >> 56; bin[25] = a->d[0] >> 48; bin[26] = a->d[0] >> 40; bin[27] = a->d[0] >> 32; bin[28] = a->d[0] >> 24; bin[29] = a->d[0] >> 16; bin[30] = a->d[0] >> 8; bin[31] = a->d[0];
144 }
145 
146 SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar *a) {
147  return (a->d[0] | a->d[1] | a->d[2] | a->d[3]) == 0;
148 }
149 
150 static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a) {
151  uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (secp256k1_scalar_is_zero(a) == 0);
152  uint128_t t = (uint128_t)(~a->d[0]) + SECP256K1_N_0 + 1;
153  r->d[0] = t & nonzero; t >>= 64;
154  t += (uint128_t)(~a->d[1]) + SECP256K1_N_1;
155  r->d[1] = t & nonzero; t >>= 64;
156  t += (uint128_t)(~a->d[2]) + SECP256K1_N_2;
157  r->d[2] = t & nonzero; t >>= 64;
158  t += (uint128_t)(~a->d[3]) + SECP256K1_N_3;
159  r->d[3] = t & nonzero;
160 }
161 
162 SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
163  return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3]) == 0;
164 }
165 
166 static int secp256k1_scalar_is_high(const secp256k1_scalar *a) {
167  int yes = 0;
168  int no = 0;
169  no |= (a->d[3] < SECP256K1_N_H_3);
170  yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
171  no |= (a->d[2] < SECP256K1_N_H_2) & ~yes; /* No need for a > check. */
172  no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
173  yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
174  yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
175  return yes;
176 }
177 
178 static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
179  /* If we are flag = 0, mask = 00...00 and this is a no-op;
180  * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
181  uint64_t mask = !flag - 1;
182  uint64_t nonzero = (secp256k1_scalar_is_zero(r) != 0) - 1;
183  uint128_t t = (uint128_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask);
184  r->d[0] = t & nonzero; t >>= 64;
185  t += (uint128_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask);
186  r->d[1] = t & nonzero; t >>= 64;
187  t += (uint128_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask);
188  r->d[2] = t & nonzero; t >>= 64;
189  t += (uint128_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask);
190  r->d[3] = t & nonzero;
191  return 2 * (mask == 0) - 1;
192 }
193 
194 /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
195 
197 #define muladd(a,b) { \
198  uint64_t tl, th; \
199  { \
200  uint128_t t = (uint128_t)a * b; \
201  th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \
202  tl = t; \
203  } \
204  c0 += tl; /* overflow is handled on the next line */ \
205  th += (c0 < tl) ? 1 : 0; /* at most 0xFFFFFFFFFFFFFFFF */ \
206  c1 += th; /* overflow is handled on the next line */ \
207  c2 += (c1 < th) ? 1 : 0; /* never overflows by contract (verified in the next line) */ \
208  VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
209 }
210 
212 #define muladd_fast(a,b) { \
213  uint64_t tl, th; \
214  { \
215  uint128_t t = (uint128_t)a * b; \
216  th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \
217  tl = t; \
218  } \
219  c0 += tl; /* overflow is handled on the next line */ \
220  th += (c0 < tl) ? 1 : 0; /* at most 0xFFFFFFFFFFFFFFFF */ \
221  c1 += th; /* never overflows by contract (verified in the next line) */ \
222  VERIFY_CHECK(c1 >= th); \
223 }
224 
226 #define muladd2(a,b) { \
227  uint64_t tl, th, th2, tl2; \
228  { \
229  uint128_t t = (uint128_t)a * b; \
230  th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \
231  tl = t; \
232  } \
233  th2 = th + th; /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
234  c2 += (th2 < th) ? 1 : 0; /* never overflows by contract (verified the next line) */ \
235  VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
236  tl2 = tl + tl; /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
237  th2 += (tl2 < tl) ? 1 : 0; /* at most 0xFFFFFFFFFFFFFFFF */ \
238  c0 += tl2; /* overflow is handled on the next line */ \
239  th2 += (c0 < tl2) ? 1 : 0; /* second overflow is handled on the next line */ \
240  c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \
241  VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
242  c1 += th2; /* overflow is handled on the next line */ \
243  c2 += (c1 < th2) ? 1 : 0; /* never overflows by contract (verified the next line) */ \
244  VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
245 }
246 
248 #define sumadd(a) { \
249  unsigned int over; \
250  c0 += (a); /* overflow is handled on the next line */ \
251  over = (c0 < (a)) ? 1 : 0; \
252  c1 += over; /* overflow is handled on the next line */ \
253  c2 += (c1 < over) ? 1 : 0; /* never overflows by contract */ \
254 }
255 
257 #define sumadd_fast(a) { \
258  c0 += (a); /* overflow is handled on the next line */ \
259  c1 += (c0 < (a)) ? 1 : 0; /* never overflows by contract (verified the next line) */ \
260  VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
261  VERIFY_CHECK(c2 == 0); \
262 }
263 
265 #define extract(n) { \
266  (n) = c0; \
267  c0 = c1; \
268  c1 = c2; \
269  c2 = 0; \
270 }
271 
273 #define extract_fast(n) { \
274  (n) = c0; \
275  c0 = c1; \
276  c1 = 0; \
277  VERIFY_CHECK(c2 == 0); \
278 }
279 
280 static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l) {
281 #ifdef USE_ASM_X86_64
282  /* Reduce 512 bits into 385. */
283  uint64_t m0, m1, m2, m3, m4, m5, m6;
284  uint64_t p0, p1, p2, p3, p4;
285  uint64_t c;
286 
287  __asm__ __volatile__(
288  /* Preload. */
289  "movq 32(%%rsi), %%r11\n"
290  "movq 40(%%rsi), %%r12\n"
291  "movq 48(%%rsi), %%r13\n"
292  "movq 56(%%rsi), %%r14\n"
293  /* Initialize r8,r9,r10 */
294  "movq 0(%%rsi), %%r8\n"
295  "xorq %%r9, %%r9\n"
296  "xorq %%r10, %%r10\n"
297  /* (r8,r9) += n0 * c0 */
298  "movq %8, %%rax\n"
299  "mulq %%r11\n"
300  "addq %%rax, %%r8\n"
301  "adcq %%rdx, %%r9\n"
302  /* extract m0 */
303  "movq %%r8, %q0\n"
304  "xorq %%r8, %%r8\n"
305  /* (r9,r10) += l1 */
306  "addq 8(%%rsi), %%r9\n"
307  "adcq $0, %%r10\n"
308  /* (r9,r10,r8) += n1 * c0 */
309  "movq %8, %%rax\n"
310  "mulq %%r12\n"
311  "addq %%rax, %%r9\n"
312  "adcq %%rdx, %%r10\n"
313  "adcq $0, %%r8\n"
314  /* (r9,r10,r8) += n0 * c1 */
315  "movq %9, %%rax\n"
316  "mulq %%r11\n"
317  "addq %%rax, %%r9\n"
318  "adcq %%rdx, %%r10\n"
319  "adcq $0, %%r8\n"
320  /* extract m1 */
321  "movq %%r9, %q1\n"
322  "xorq %%r9, %%r9\n"
323  /* (r10,r8,r9) += l2 */
324  "addq 16(%%rsi), %%r10\n"
325  "adcq $0, %%r8\n"
326  "adcq $0, %%r9\n"
327  /* (r10,r8,r9) += n2 * c0 */
328  "movq %8, %%rax\n"
329  "mulq %%r13\n"
330  "addq %%rax, %%r10\n"
331  "adcq %%rdx, %%r8\n"
332  "adcq $0, %%r9\n"
333  /* (r10,r8,r9) += n1 * c1 */
334  "movq %9, %%rax\n"
335  "mulq %%r12\n"
336  "addq %%rax, %%r10\n"
337  "adcq %%rdx, %%r8\n"
338  "adcq $0, %%r9\n"
339  /* (r10,r8,r9) += n0 */
340  "addq %%r11, %%r10\n"
341  "adcq $0, %%r8\n"
342  "adcq $0, %%r9\n"
343  /* extract m2 */
344  "movq %%r10, %q2\n"
345  "xorq %%r10, %%r10\n"
346  /* (r8,r9,r10) += l3 */
347  "addq 24(%%rsi), %%r8\n"
348  "adcq $0, %%r9\n"
349  "adcq $0, %%r10\n"
350  /* (r8,r9,r10) += n3 * c0 */
351  "movq %8, %%rax\n"
352  "mulq %%r14\n"
353  "addq %%rax, %%r8\n"
354  "adcq %%rdx, %%r9\n"
355  "adcq $0, %%r10\n"
356  /* (r8,r9,r10) += n2 * c1 */
357  "movq %9, %%rax\n"
358  "mulq %%r13\n"
359  "addq %%rax, %%r8\n"
360  "adcq %%rdx, %%r9\n"
361  "adcq $0, %%r10\n"
362  /* (r8,r9,r10) += n1 */
363  "addq %%r12, %%r8\n"
364  "adcq $0, %%r9\n"
365  "adcq $0, %%r10\n"
366  /* extract m3 */
367  "movq %%r8, %q3\n"
368  "xorq %%r8, %%r8\n"
369  /* (r9,r10,r8) += n3 * c1 */
370  "movq %9, %%rax\n"
371  "mulq %%r14\n"
372  "addq %%rax, %%r9\n"
373  "adcq %%rdx, %%r10\n"
374  "adcq $0, %%r8\n"
375  /* (r9,r10,r8) += n2 */
376  "addq %%r13, %%r9\n"
377  "adcq $0, %%r10\n"
378  "adcq $0, %%r8\n"
379  /* extract m4 */
380  "movq %%r9, %q4\n"
381  /* (r10,r8) += n3 */
382  "addq %%r14, %%r10\n"
383  "adcq $0, %%r8\n"
384  /* extract m5 */
385  "movq %%r10, %q5\n"
386  /* extract m6 */
387  "movq %%r8, %q6\n"
388  : "=g"(m0), "=g"(m1), "=g"(m2), "=g"(m3), "=g"(m4), "=g"(m5), "=g"(m6)
389  : "S"(l), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
390  : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc");
391 
392  /* Reduce 385 bits into 258. */
393  __asm__ __volatile__(
394  /* Preload */
395  "movq %q9, %%r11\n"
396  "movq %q10, %%r12\n"
397  "movq %q11, %%r13\n"
398  /* Initialize (r8,r9,r10) */
399  "movq %q5, %%r8\n"
400  "xorq %%r9, %%r9\n"
401  "xorq %%r10, %%r10\n"
402  /* (r8,r9) += m4 * c0 */
403  "movq %12, %%rax\n"
404  "mulq %%r11\n"
405  "addq %%rax, %%r8\n"
406  "adcq %%rdx, %%r9\n"
407  /* extract p0 */
408  "movq %%r8, %q0\n"
409  "xorq %%r8, %%r8\n"
410  /* (r9,r10) += m1 */
411  "addq %q6, %%r9\n"
412  "adcq $0, %%r10\n"
413  /* (r9,r10,r8) += m5 * c0 */
414  "movq %12, %%rax\n"
415  "mulq %%r12\n"
416  "addq %%rax, %%r9\n"
417  "adcq %%rdx, %%r10\n"
418  "adcq $0, %%r8\n"
419  /* (r9,r10,r8) += m4 * c1 */
420  "movq %13, %%rax\n"
421  "mulq %%r11\n"
422  "addq %%rax, %%r9\n"
423  "adcq %%rdx, %%r10\n"
424  "adcq $0, %%r8\n"
425  /* extract p1 */
426  "movq %%r9, %q1\n"
427  "xorq %%r9, %%r9\n"
428  /* (r10,r8,r9) += m2 */
429  "addq %q7, %%r10\n"
430  "adcq $0, %%r8\n"
431  "adcq $0, %%r9\n"
432  /* (r10,r8,r9) += m6 * c0 */
433  "movq %12, %%rax\n"
434  "mulq %%r13\n"
435  "addq %%rax, %%r10\n"
436  "adcq %%rdx, %%r8\n"
437  "adcq $0, %%r9\n"
438  /* (r10,r8,r9) += m5 * c1 */
439  "movq %13, %%rax\n"
440  "mulq %%r12\n"
441  "addq %%rax, %%r10\n"
442  "adcq %%rdx, %%r8\n"
443  "adcq $0, %%r9\n"
444  /* (r10,r8,r9) += m4 */
445  "addq %%r11, %%r10\n"
446  "adcq $0, %%r8\n"
447  "adcq $0, %%r9\n"
448  /* extract p2 */
449  "movq %%r10, %q2\n"
450  /* (r8,r9) += m3 */
451  "addq %q8, %%r8\n"
452  "adcq $0, %%r9\n"
453  /* (r8,r9) += m6 * c1 */
454  "movq %13, %%rax\n"
455  "mulq %%r13\n"
456  "addq %%rax, %%r8\n"
457  "adcq %%rdx, %%r9\n"
458  /* (r8,r9) += m5 */
459  "addq %%r12, %%r8\n"
460  "adcq $0, %%r9\n"
461  /* extract p3 */
462  "movq %%r8, %q3\n"
463  /* (r9) += m6 */
464  "addq %%r13, %%r9\n"
465  /* extract p4 */
466  "movq %%r9, %q4\n"
467  : "=&g"(p0), "=&g"(p1), "=&g"(p2), "=g"(p3), "=g"(p4)
468  : "g"(m0), "g"(m1), "g"(m2), "g"(m3), "g"(m4), "g"(m5), "g"(m6), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
469  : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "cc");
470 
471  /* Reduce 258 bits into 256. */
472  __asm__ __volatile__(
473  /* Preload */
474  "movq %q5, %%r10\n"
475  /* (rax,rdx) = p4 * c0 */
476  "movq %7, %%rax\n"
477  "mulq %%r10\n"
478  /* (rax,rdx) += p0 */
479  "addq %q1, %%rax\n"
480  "adcq $0, %%rdx\n"
481  /* extract r0 */
482  "movq %%rax, 0(%q6)\n"
483  /* Move to (r8,r9) */
484  "movq %%rdx, %%r8\n"
485  "xorq %%r9, %%r9\n"
486  /* (r8,r9) += p1 */
487  "addq %q2, %%r8\n"
488  "adcq $0, %%r9\n"
489  /* (r8,r9) += p4 * c1 */
490  "movq %8, %%rax\n"
491  "mulq %%r10\n"
492  "addq %%rax, %%r8\n"
493  "adcq %%rdx, %%r9\n"
494  /* Extract r1 */
495  "movq %%r8, 8(%q6)\n"
496  "xorq %%r8, %%r8\n"
497  /* (r9,r8) += p4 */
498  "addq %%r10, %%r9\n"
499  "adcq $0, %%r8\n"
500  /* (r9,r8) += p2 */
501  "addq %q3, %%r9\n"
502  "adcq $0, %%r8\n"
503  /* Extract r2 */
504  "movq %%r9, 16(%q6)\n"
505  "xorq %%r9, %%r9\n"
506  /* (r8,r9) += p3 */
507  "addq %q4, %%r8\n"
508  "adcq $0, %%r9\n"
509  /* Extract r3 */
510  "movq %%r8, 24(%q6)\n"
511  /* Extract c */
512  "movq %%r9, %q0\n"
513  : "=g"(c)
514  : "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
515  : "rax", "rdx", "r8", "r9", "r10", "cc", "memory");
516 #else
517  uint128_t c;
518  uint64_t c0, c1, c2;
519  uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
520  uint64_t m0, m1, m2, m3, m4, m5;
521  uint32_t m6;
522  uint64_t p0, p1, p2, p3;
523  uint32_t p4;
524 
525  /* Reduce 512 bits into 385. */
526  /* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */
527  c0 = l[0]; c1 = 0; c2 = 0;
529  extract_fast(m0);
530  sumadd_fast(l[1]);
531  muladd(n1, SECP256K1_N_C_0);
532  muladd(n0, SECP256K1_N_C_1);
533  extract(m1);
534  sumadd(l[2]);
535  muladd(n2, SECP256K1_N_C_0);
536  muladd(n1, SECP256K1_N_C_1);
537  sumadd(n0);
538  extract(m2);
539  sumadd(l[3]);
540  muladd(n3, SECP256K1_N_C_0);
541  muladd(n2, SECP256K1_N_C_1);
542  sumadd(n1);
543  extract(m3);
544  muladd(n3, SECP256K1_N_C_1);
545  sumadd(n2);
546  extract(m4);
547  sumadd_fast(n3);
548  extract_fast(m5);
549  VERIFY_CHECK(c0 <= 1);
550  m6 = c0;
551 
552  /* Reduce 385 bits into 258. */
553  /* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */
554  c0 = m0; c1 = 0; c2 = 0;
556  extract_fast(p0);
557  sumadd_fast(m1);
558  muladd(m5, SECP256K1_N_C_0);
559  muladd(m4, SECP256K1_N_C_1);
560  extract(p1);
561  sumadd(m2);
562  muladd(m6, SECP256K1_N_C_0);
563  muladd(m5, SECP256K1_N_C_1);
564  sumadd(m4);
565  extract(p2);
566  sumadd_fast(m3);
568  sumadd_fast(m5);
569  extract_fast(p3);
570  p4 = c0 + m6;
571  VERIFY_CHECK(p4 <= 2);
572 
573  /* Reduce 258 bits into 256. */
574  /* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
575  c = p0 + (uint128_t)SECP256K1_N_C_0 * p4;
576  r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
577  c += p1 + (uint128_t)SECP256K1_N_C_1 * p4;
578  r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
579  c += p2 + (uint128_t)p4;
580  r->d[2] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
581  c += p3;
582  r->d[3] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
583 #endif
584 
585  /* Final reduction of r. */
586  secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
587 }
588 
589 static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, const secp256k1_scalar *b) {
590 #ifdef USE_ASM_X86_64
591  const uint64_t *pb = b->d;
592  __asm__ __volatile__(
593  /* Preload */
594  "movq 0(%%rdi), %%r15\n"
595  "movq 8(%%rdi), %%rbx\n"
596  "movq 16(%%rdi), %%rcx\n"
597  "movq 0(%%rdx), %%r11\n"
598  "movq 8(%%rdx), %%r12\n"
599  "movq 16(%%rdx), %%r13\n"
600  "movq 24(%%rdx), %%r14\n"
601  /* (rax,rdx) = a0 * b0 */
602  "movq %%r15, %%rax\n"
603  "mulq %%r11\n"
604  /* Extract l0 */
605  "movq %%rax, 0(%%rsi)\n"
606  /* (r8,r9,r10) = (rdx) */
607  "movq %%rdx, %%r8\n"
608  "xorq %%r9, %%r9\n"
609  "xorq %%r10, %%r10\n"
610  /* (r8,r9,r10) += a0 * b1 */
611  "movq %%r15, %%rax\n"
612  "mulq %%r12\n"
613  "addq %%rax, %%r8\n"
614  "adcq %%rdx, %%r9\n"
615  "adcq $0, %%r10\n"
616  /* (r8,r9,r10) += a1 * b0 */
617  "movq %%rbx, %%rax\n"
618  "mulq %%r11\n"
619  "addq %%rax, %%r8\n"
620  "adcq %%rdx, %%r9\n"
621  "adcq $0, %%r10\n"
622  /* Extract l1 */
623  "movq %%r8, 8(%%rsi)\n"
624  "xorq %%r8, %%r8\n"
625  /* (r9,r10,r8) += a0 * b2 */
626  "movq %%r15, %%rax\n"
627  "mulq %%r13\n"
628  "addq %%rax, %%r9\n"
629  "adcq %%rdx, %%r10\n"
630  "adcq $0, %%r8\n"
631  /* (r9,r10,r8) += a1 * b1 */
632  "movq %%rbx, %%rax\n"
633  "mulq %%r12\n"
634  "addq %%rax, %%r9\n"
635  "adcq %%rdx, %%r10\n"
636  "adcq $0, %%r8\n"
637  /* (r9,r10,r8) += a2 * b0 */
638  "movq %%rcx, %%rax\n"
639  "mulq %%r11\n"
640  "addq %%rax, %%r9\n"
641  "adcq %%rdx, %%r10\n"
642  "adcq $0, %%r8\n"
643  /* Extract l2 */
644  "movq %%r9, 16(%%rsi)\n"
645  "xorq %%r9, %%r9\n"
646  /* (r10,r8,r9) += a0 * b3 */
647  "movq %%r15, %%rax\n"
648  "mulq %%r14\n"
649  "addq %%rax, %%r10\n"
650  "adcq %%rdx, %%r8\n"
651  "adcq $0, %%r9\n"
652  /* Preload a3 */
653  "movq 24(%%rdi), %%r15\n"
654  /* (r10,r8,r9) += a1 * b2 */
655  "movq %%rbx, %%rax\n"
656  "mulq %%r13\n"
657  "addq %%rax, %%r10\n"
658  "adcq %%rdx, %%r8\n"
659  "adcq $0, %%r9\n"
660  /* (r10,r8,r9) += a2 * b1 */
661  "movq %%rcx, %%rax\n"
662  "mulq %%r12\n"
663  "addq %%rax, %%r10\n"
664  "adcq %%rdx, %%r8\n"
665  "adcq $0, %%r9\n"
666  /* (r10,r8,r9) += a3 * b0 */
667  "movq %%r15, %%rax\n"
668  "mulq %%r11\n"
669  "addq %%rax, %%r10\n"
670  "adcq %%rdx, %%r8\n"
671  "adcq $0, %%r9\n"
672  /* Extract l3 */
673  "movq %%r10, 24(%%rsi)\n"
674  "xorq %%r10, %%r10\n"
675  /* (r8,r9,r10) += a1 * b3 */
676  "movq %%rbx, %%rax\n"
677  "mulq %%r14\n"
678  "addq %%rax, %%r8\n"
679  "adcq %%rdx, %%r9\n"
680  "adcq $0, %%r10\n"
681  /* (r8,r9,r10) += a2 * b2 */
682  "movq %%rcx, %%rax\n"
683  "mulq %%r13\n"
684  "addq %%rax, %%r8\n"
685  "adcq %%rdx, %%r9\n"
686  "adcq $0, %%r10\n"
687  /* (r8,r9,r10) += a3 * b1 */
688  "movq %%r15, %%rax\n"
689  "mulq %%r12\n"
690  "addq %%rax, %%r8\n"
691  "adcq %%rdx, %%r9\n"
692  "adcq $0, %%r10\n"
693  /* Extract l4 */
694  "movq %%r8, 32(%%rsi)\n"
695  "xorq %%r8, %%r8\n"
696  /* (r9,r10,r8) += a2 * b3 */
697  "movq %%rcx, %%rax\n"
698  "mulq %%r14\n"
699  "addq %%rax, %%r9\n"
700  "adcq %%rdx, %%r10\n"
701  "adcq $0, %%r8\n"
702  /* (r9,r10,r8) += a3 * b2 */
703  "movq %%r15, %%rax\n"
704  "mulq %%r13\n"
705  "addq %%rax, %%r9\n"
706  "adcq %%rdx, %%r10\n"
707  "adcq $0, %%r8\n"
708  /* Extract l5 */
709  "movq %%r9, 40(%%rsi)\n"
710  /* (r10,r8) += a3 * b3 */
711  "movq %%r15, %%rax\n"
712  "mulq %%r14\n"
713  "addq %%rax, %%r10\n"
714  "adcq %%rdx, %%r8\n"
715  /* Extract l6 */
716  "movq %%r10, 48(%%rsi)\n"
717  /* Extract l7 */
718  "movq %%r8, 56(%%rsi)\n"
719  : "+d"(pb)
720  : "S"(l), "D"(a->d)
721  : "rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "cc", "memory");
722 #else
723  /* 160 bit accumulator. */
724  uint64_t c0 = 0, c1 = 0;
725  uint32_t c2 = 0;
726 
727  /* l[0..7] = a[0..3] * b[0..3]. */
728  muladd_fast(a->d[0], b->d[0]);
729  extract_fast(l[0]);
730  muladd(a->d[0], b->d[1]);
731  muladd(a->d[1], b->d[0]);
732  extract(l[1]);
733  muladd(a->d[0], b->d[2]);
734  muladd(a->d[1], b->d[1]);
735  muladd(a->d[2], b->d[0]);
736  extract(l[2]);
737  muladd(a->d[0], b->d[3]);
738  muladd(a->d[1], b->d[2]);
739  muladd(a->d[2], b->d[1]);
740  muladd(a->d[3], b->d[0]);
741  extract(l[3]);
742  muladd(a->d[1], b->d[3]);
743  muladd(a->d[2], b->d[2]);
744  muladd(a->d[3], b->d[1]);
745  extract(l[4]);
746  muladd(a->d[2], b->d[3]);
747  muladd(a->d[3], b->d[2]);
748  extract(l[5]);
749  muladd_fast(a->d[3], b->d[3]);
750  extract_fast(l[6]);
751  VERIFY_CHECK(c1 == 0);
752  l[7] = c0;
753 #endif
754 }
755 
756 static void secp256k1_scalar_sqr_512(uint64_t l[8], const secp256k1_scalar *a) {
757 #ifdef USE_ASM_X86_64
758  __asm__ __volatile__(
759  /* Preload */
760  "movq 0(%%rdi), %%r11\n"
761  "movq 8(%%rdi), %%r12\n"
762  "movq 16(%%rdi), %%r13\n"
763  "movq 24(%%rdi), %%r14\n"
764  /* (rax,rdx) = a0 * a0 */
765  "movq %%r11, %%rax\n"
766  "mulq %%r11\n"
767  /* Extract l0 */
768  "movq %%rax, 0(%%rsi)\n"
769  /* (r8,r9,r10) = (rdx,0) */
770  "movq %%rdx, %%r8\n"
771  "xorq %%r9, %%r9\n"
772  "xorq %%r10, %%r10\n"
773  /* (r8,r9,r10) += 2 * a0 * a1 */
774  "movq %%r11, %%rax\n"
775  "mulq %%r12\n"
776  "addq %%rax, %%r8\n"
777  "adcq %%rdx, %%r9\n"
778  "adcq $0, %%r10\n"
779  "addq %%rax, %%r8\n"
780  "adcq %%rdx, %%r9\n"
781  "adcq $0, %%r10\n"
782  /* Extract l1 */
783  "movq %%r8, 8(%%rsi)\n"
784  "xorq %%r8, %%r8\n"
785  /* (r9,r10,r8) += 2 * a0 * a2 */
786  "movq %%r11, %%rax\n"
787  "mulq %%r13\n"
788  "addq %%rax, %%r9\n"
789  "adcq %%rdx, %%r10\n"
790  "adcq $0, %%r8\n"
791  "addq %%rax, %%r9\n"
792  "adcq %%rdx, %%r10\n"
793  "adcq $0, %%r8\n"
794  /* (r9,r10,r8) += a1 * a1 */
795  "movq %%r12, %%rax\n"
796  "mulq %%r12\n"
797  "addq %%rax, %%r9\n"
798  "adcq %%rdx, %%r10\n"
799  "adcq $0, %%r8\n"
800  /* Extract l2 */
801  "movq %%r9, 16(%%rsi)\n"
802  "xorq %%r9, %%r9\n"
803  /* (r10,r8,r9) += 2 * a0 * a3 */
804  "movq %%r11, %%rax\n"
805  "mulq %%r14\n"
806  "addq %%rax, %%r10\n"
807  "adcq %%rdx, %%r8\n"
808  "adcq $0, %%r9\n"
809  "addq %%rax, %%r10\n"
810  "adcq %%rdx, %%r8\n"
811  "adcq $0, %%r9\n"
812  /* (r10,r8,r9) += 2 * a1 * a2 */
813  "movq %%r12, %%rax\n"
814  "mulq %%r13\n"
815  "addq %%rax, %%r10\n"
816  "adcq %%rdx, %%r8\n"
817  "adcq $0, %%r9\n"
818  "addq %%rax, %%r10\n"
819  "adcq %%rdx, %%r8\n"
820  "adcq $0, %%r9\n"
821  /* Extract l3 */
822  "movq %%r10, 24(%%rsi)\n"
823  "xorq %%r10, %%r10\n"
824  /* (r8,r9,r10) += 2 * a1 * a3 */
825  "movq %%r12, %%rax\n"
826  "mulq %%r14\n"
827  "addq %%rax, %%r8\n"
828  "adcq %%rdx, %%r9\n"
829  "adcq $0, %%r10\n"
830  "addq %%rax, %%r8\n"
831  "adcq %%rdx, %%r9\n"
832  "adcq $0, %%r10\n"
833  /* (r8,r9,r10) += a2 * a2 */
834  "movq %%r13, %%rax\n"
835  "mulq %%r13\n"
836  "addq %%rax, %%r8\n"
837  "adcq %%rdx, %%r9\n"
838  "adcq $0, %%r10\n"
839  /* Extract l4 */
840  "movq %%r8, 32(%%rsi)\n"
841  "xorq %%r8, %%r8\n"
842  /* (r9,r10,r8) += 2 * a2 * a3 */
843  "movq %%r13, %%rax\n"
844  "mulq %%r14\n"
845  "addq %%rax, %%r9\n"
846  "adcq %%rdx, %%r10\n"
847  "adcq $0, %%r8\n"
848  "addq %%rax, %%r9\n"
849  "adcq %%rdx, %%r10\n"
850  "adcq $0, %%r8\n"
851  /* Extract l5 */
852  "movq %%r9, 40(%%rsi)\n"
853  /* (r10,r8) += a3 * a3 */
854  "movq %%r14, %%rax\n"
855  "mulq %%r14\n"
856  "addq %%rax, %%r10\n"
857  "adcq %%rdx, %%r8\n"
858  /* Extract l6 */
859  "movq %%r10, 48(%%rsi)\n"
860  /* Extract l7 */
861  "movq %%r8, 56(%%rsi)\n"
862  :
863  : "S"(l), "D"(a->d)
864  : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc", "memory");
865 #else
866  /* 160 bit accumulator. */
867  uint64_t c0 = 0, c1 = 0;
868  uint32_t c2 = 0;
869 
870  /* l[0..7] = a[0..3] * b[0..3]. */
871  muladd_fast(a->d[0], a->d[0]);
872  extract_fast(l[0]);
873  muladd2(a->d[0], a->d[1]);
874  extract(l[1]);
875  muladd2(a->d[0], a->d[2]);
876  muladd(a->d[1], a->d[1]);
877  extract(l[2]);
878  muladd2(a->d[0], a->d[3]);
879  muladd2(a->d[1], a->d[2]);
880  extract(l[3]);
881  muladd2(a->d[1], a->d[3]);
882  muladd(a->d[2], a->d[2]);
883  extract(l[4]);
884  muladd2(a->d[2], a->d[3]);
885  extract(l[5]);
886  muladd_fast(a->d[3], a->d[3]);
887  extract_fast(l[6]);
888  VERIFY_CHECK(c1 == 0);
889  l[7] = c0;
890 #endif
891 }
892 
893 #undef sumadd
894 #undef sumadd_fast
895 #undef muladd
896 #undef muladd_fast
897 #undef muladd2
898 #undef extract
899 #undef extract_fast
900 
901 static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
902  uint64_t l[8];
903  secp256k1_scalar_mul_512(l, a, b);
904  secp256k1_scalar_reduce_512(r, l);
905 }
906 
907 static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
908  int ret;
909  VERIFY_CHECK(n > 0);
910  VERIFY_CHECK(n < 16);
911  ret = r->d[0] & ((1 << n) - 1);
912  r->d[0] = (r->d[0] >> n) + (r->d[1] << (64 - n));
913  r->d[1] = (r->d[1] >> n) + (r->d[2] << (64 - n));
914  r->d[2] = (r->d[2] >> n) + (r->d[3] << (64 - n));
915  r->d[3] = (r->d[3] >> n);
916  return ret;
917 }
918 
919 static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a) {
920  uint64_t l[8];
921  secp256k1_scalar_sqr_512(l, a);
922  secp256k1_scalar_reduce_512(r, l);
923 }
924 
925 #ifdef USE_ENDOMORPHISM
926 static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a) {
927  r1->d[0] = a->d[0];
928  r1->d[1] = a->d[1];
929  r1->d[2] = 0;
930  r1->d[3] = 0;
931  r2->d[0] = a->d[2];
932  r2->d[1] = a->d[3];
933  r2->d[2] = 0;
934  r2->d[3] = 0;
935 }
936 #endif
937 
938 SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const secp256k1_scalar *b) {
939  return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3])) == 0;
940 }
941 
942 SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) {
943  uint64_t l[8];
944  unsigned int shiftlimbs;
945  unsigned int shiftlow;
946  unsigned int shifthigh;
947  VERIFY_CHECK(shift >= 256);
948  secp256k1_scalar_mul_512(l, a, b);
949  shiftlimbs = shift >> 6;
950  shiftlow = shift & 0x3F;
951  shifthigh = 64 - shiftlow;
952  r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
953  r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
954  r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
955  r->d[3] = shift < 320 ? (l[3 + shiftlimbs] >> shiftlow) : 0;
956  secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1);
957 }
958 
959 #define ROTL32(x,n) ((x) << (n) | (x) >> (32-(n)))
960 #define QUARTERROUND(a,b,c,d) \
961  a += b; d = ROTL32(d ^ a, 16); \
962  c += d; b = ROTL32(b ^ c, 12); \
963  a += b; d = ROTL32(d ^ a, 8); \
964  c += d; b = ROTL32(b ^ c, 7);
965 
966 #ifdef WORDS_BIGENDIAN
967 #define LE32(p) ((((p) & 0xFF) << 24) | (((p) & 0xFF00) << 8) | (((p) & 0xFF0000) >> 8) | (((p) & 0xFF000000) >> 24))
968 #define BE32(p) (p)
969 #else
970 #define BE32(p) ((((p) & 0xFF) << 24) | (((p) & 0xFF00) << 8) | (((p) & 0xFF0000) >> 8) | (((p) & 0xFF000000) >> 24))
971 #define LE32(p) (p)
972 #endif
973 
974 static void secp256k1_scalar_chacha20(secp256k1_scalar *r1, secp256k1_scalar *r2, const unsigned char *seed, uint64_t idx) {
975  size_t n;
976  size_t over_count = 0;
977  uint32_t seed32[8];
978  uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
979  int over1, over2;
980 
981  memcpy((void *) seed32, (const void *) seed, 32);
982  do {
983  x0 = 0x61707865;
984  x1 = 0x3320646e;
985  x2 = 0x79622d32;
986  x3 = 0x6b206574;
987  x4 = LE32(seed32[0]);
988  x5 = LE32(seed32[1]);
989  x6 = LE32(seed32[2]);
990  x7 = LE32(seed32[3]);
991  x8 = LE32(seed32[4]);
992  x9 = LE32(seed32[5]);
993  x10 = LE32(seed32[6]);
994  x11 = LE32(seed32[7]);
995  x12 = idx;
996  x13 = idx >> 32;
997  x14 = 0;
998  x15 = over_count;
999 
1000  n = 10;
1001  while (n--) {
1002  QUARTERROUND(x0, x4, x8,x12)
1003  QUARTERROUND(x1, x5, x9,x13)
1004  QUARTERROUND(x2, x6,x10,x14)
1005  QUARTERROUND(x3, x7,x11,x15)
1006  QUARTERROUND(x0, x5,x10,x15)
1007  QUARTERROUND(x1, x6,x11,x12)
1008  QUARTERROUND(x2, x7, x8,x13)
1009  QUARTERROUND(x3, x4, x9,x14)
1010  }
1011 
1012  x0 += 0x61707865;
1013  x1 += 0x3320646e;
1014  x2 += 0x79622d32;
1015  x3 += 0x6b206574;
1016  x4 += LE32(seed32[0]);
1017  x5 += LE32(seed32[1]);
1018  x6 += LE32(seed32[2]);
1019  x7 += LE32(seed32[3]);
1020  x8 += LE32(seed32[4]);
1021  x9 += LE32(seed32[5]);
1022  x10 += LE32(seed32[6]);
1023  x11 += LE32(seed32[7]);
1024  x12 += idx;
1025  x13 += idx >> 32;
1026  x14 += 0;
1027  x15 += over_count;
1028 
1029  r1->d[3] = BE32((uint64_t) x0) << 32 | BE32(x1);
1030  r1->d[2] = BE32((uint64_t) x2) << 32 | BE32(x3);
1031  r1->d[1] = BE32((uint64_t) x4) << 32 | BE32(x5);
1032  r1->d[0] = BE32((uint64_t) x6) << 32 | BE32(x7);
1033  r2->d[3] = BE32((uint64_t) x8) << 32 | BE32(x9);
1034  r2->d[2] = BE32((uint64_t) x10) << 32 | BE32(x11);
1035  r2->d[1] = BE32((uint64_t) x12) << 32 | BE32(x13);
1036  r2->d[0] = BE32((uint64_t) x14) << 32 | BE32(x15);
1037 
1038  over1 = secp256k1_scalar_check_overflow(r1);
1039  over2 = secp256k1_scalar_check_overflow(r2);
1040  over_count++;
1041  } while (over1 | over2);
1042 }
1043 
1044 #undef ROTL32
1045 #undef QUARTERROUND
1046 #undef BE32
1047 #undef LE32
1048 
1049 #endif /* SECP256K1_SCALAR_REPR_IMPL_H */
uint128_t
unsigned __int128 uint128_t
Definition: scalar_4x64_impl.h:10
VERIFY_CHECK
#define VERIFY_CHECK(cond)
Definition: util.h:61
SECP256K1_N_C_0
#define SECP256K1_N_C_0
Definition: scalar_4x64_impl.h:20
sumadd
#define sumadd(a)
Add a to the number defined by (c0,c1,c2).
Definition: scalar_4x64_impl.h:248
SECP256K1_N_2
#define SECP256K1_N_2
Definition: scalar_4x64_impl.h:16
b
void const uint64_t * b
Definition: field_5x52_asm_impl.h:10
SECP256K1_N_C_1
#define SECP256K1_N_C_1
Definition: scalar_4x64_impl.h:21
memcpy
void * memcpy(void *a, const void *b, size_t c)
Definition: glibc_compat.cpp:15
SECP256K1_N_C_2
#define SECP256K1_N_C_2
Definition: scalar_4x64_impl.h:22
r
void const uint64_t uint64_t * r
Definition: field_5x52_asm_impl.h:10
secp256k1_scalar
A scalar modulo the group order of the secp256k1 curve.
Definition: scalar_4x64.h:13
QUARTERROUND
#define QUARTERROUND(a, b, c, d)
Definition: scalar_4x64_impl.h:960
BE32
#define BE32(p)
Definition: scalar_4x64_impl.h:970
muladd
#define muladd(a, b)
Add a*b to the number defined by (c0,c1,c2).
Definition: scalar_4x64_impl.h:197
SECP256K1_N_3
#define SECP256K1_N_3
Definition: scalar_4x64_impl.h:17
zxcvbn::no
const auto no
Definition: adjacency_graphs.cpp:17
SECP256K1_N_H_3
#define SECP256K1_N_H_3
Definition: scalar_4x64_impl.h:28
secp256k1_scalar::d
uint64_t d[4]
Definition: scalar_4x64.h:18
SECP256K1_N_0
#define SECP256K1_N_0
Definition: scalar_4x64_impl.h:14
SECP256K1_N_H_2
#define SECP256K1_N_H_2
Definition: scalar_4x64_impl.h:27
SECP256K1_N_H_1
#define SECP256K1_N_H_1
Definition: scalar_4x64_impl.h:26
scalar.h
SECP256K1_N_1
#define SECP256K1_N_1
Definition: scalar_4x64_impl.h:15
muladd2
#define muladd2(a, b)
Add 2*a*b to the number defined by (c0,c1,c2).
Definition: scalar_4x64_impl.h:226
SECP256K1_N_H_0
#define SECP256K1_N_H_0
Definition: scalar_4x64_impl.h:25
SECP256K1_INLINE
#define SECP256K1_INLINE
Definition: secp256k1.h:23
LE32
#define LE32(p)
Definition: scalar_4x64_impl.h:971
muladd_fast
#define muladd_fast(a, b)
Add a*b to the number defined by (c0,c1).
Definition: scalar_4x64_impl.h:212
extract_fast
#define extract_fast(n)
Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits.
Definition: scalar_4x64_impl.h:273
sumadd_fast
#define sumadd_fast(a)
Add a to the number defined by (c0,c1).
Definition: scalar_4x64_impl.h:257
extract
#define extract(n)
Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits.
Definition: scalar_4x64_impl.h:265