Contiki 3.x
ecc.c
1 /*
2  * Copyright (c) 2015, Lars Schmertmann <SmallLars@t-online.de>,
3  * Jens Trillmann <jtrillma@informatik.uni-bremen.de>.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived
17  * from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
30  * OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  * This file is part of the Contiki operating system.
33  *
34  */
35 
36 #include "ecc.h"
37 
38 #include <string.h>
39 
40 #define X 0
41 #define Y 8
42 #define Z 16
43 
44 const uint32_t ecc_prime_m[8] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0xffffffff };
45 const uint32_t ecc_prime_r[8] = { 0x00000001, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x00000000 };
46 
47 /*---------------------------------------------------------------------------*/
48 
49 #define DEBUG 0
50 #define SELF_TEST 0
51 
52 #if DEBUG
53 #include <stdio.h>
54 #define PRINTF(...) printf(__VA_ARGS__)
55 #define PRINTHEX(...) print_hex(__VA_ARGS__)
56 static void
57 print_hex(const char *name, const uint32_t *d, uint32_t l)
58 {
59  printf("%s:", name);
60  int i;
61  for(i = l - 1; i >= 0; --i) {
62  printf(" %08X", d[i]);
63  }
64  printf("\n");
65 }
66 #else
67 #define PRINTF(...)
68 #define PRINTHEX(...)
69 #endif
70 
71 #if SELF_TEST
72 #include <stdio.h>
73 static void selfTest();
74 #endif
75 
76 /* private prototypes ----------------------------------------------------- */
77 
78 /* simple functions to work with 256 bit numbers */
79 static void ecc_setZero(uint32_t *a);
80 static void ecc_copy(uint32_t *dst, const uint32_t *src);
81 static uint32_t ecc_isX(const uint32_t *a, const uint32_t x);
82 static void ecc_rshift(uint32_t *a);
83 static void ecc_replace(uint32_t bit, uint32_t *dst, uint32_t *src);
84 static uint32_t ecc_add(uint32_t *result, const uint32_t *a, const uint32_t *b);
85 static uint32_t ecc_sub(uint32_t *result, const uint32_t *a, const uint32_t *b);
86 static void ecc_mult(uint32_t *result, const uint32_t *x, const uint32_t *y, const uint32_t length);
87 
88 /* ecc_field_ModP-Helper */
89 __attribute__((always_inline)) static void ecc_form_s1(uint32_t *dst, const uint32_t *src);
90 __attribute__((always_inline)) static void ecc_form_s2(uint32_t *dst, const uint32_t *src);
91 __attribute__((always_inline)) static void ecc_form_s3(uint32_t *dst, const uint32_t *src);
92 __attribute__((always_inline)) static void ecc_form_s4(uint32_t *dst, const uint32_t *src);
93 __attribute__((always_inline)) static void ecc_form_d1(uint32_t *dst, const uint32_t *src);
94 __attribute__((always_inline)) static void ecc_form_d2(uint32_t *dst, const uint32_t *src);
95 __attribute__((always_inline)) static void ecc_form_d3(uint32_t *dst, const uint32_t *src);
96 __attribute__((always_inline)) static void ecc_form_d4(uint32_t *dst, const uint32_t *src);
97 
98 /* field functions for 256 bit numbers */
99 static void ecc_field_Add(uint32_t *result, const uint32_t *x, const uint32_t *y);
100 static void ecc_field_Sub(uint32_t *result, const uint32_t *x, const uint32_t *y);
101 static void ecc_field_ModP(uint32_t *result, const uint32_t *T);
102 static void ecc_field_Mult(uint32_t *result, const uint32_t *A, const uint32_t *B);
103 static void ecc_field_Inv(uint32_t *result, const uint32_t *A);
104 
105 /* new projective stuff */
106 static void ecc_projective_double(uint32_t *val);
107 static void ecc_projective_add(uint32_t *result, const uint32_t *val_1, const uint32_t *x_2, const uint32_t *y_2, const uint32_t *z_2);
108 
109 /* public functions -------------------------------------------------------- */
110 
111 int32_t
112 ecc_compare(const uint32_t *a, const uint32_t *b)
113 {
114  int32_t r = 0;
115  uint32_t i = 8;
116  while(i--) {
117  uint32_t neq = (a[i] != b[i]);
118  int32_t greater = (a[i] > b[i] ? 1 : -1);
119  r ^= ((-(!r && neq)) & (r ^ greater));
120  }
121  return r;
122 }
123 void
124 ecc_ec_mult(uint32_t *resultx, uint32_t *resulty, const uint32_t *px, const uint32_t *py, const uint32_t *secret)
125 {
126 #if SELF_TEST
127  selfTest();
128 #endif
129 
130  PRINTHEX("PX", px, 8);
131  PRINTHEX("PY", py, 8);
132  PRINTHEX("SC", secret, 8);
133 
134  uint32_t Q[24];
135  ecc_setZero(Q + X);
136  ecc_setZero(Q + Y);
137  ecc_setZero(Q + Z);
138  Q[Z] = 0x00000001;
139 
140  uint32_t pz[8];
141  ecc_setZero(pz);
142  pz[0] = 0x00000001;
143 
144  uint32_t temp[24];
145 
146  int i;
147  for(i = 255; i >= 0; --i) {
148  ecc_projective_double(Q);
149 /* PRINTHEX("QX", Q+X, 8); */
150 /* PRINTHEX("QY", Q+Y, 8); */
151 /* PRINTHEX("QZ", Q+Z, 8); */
152  ecc_projective_add(temp, Q, px, py, pz);
153 /* PRINTHEX("QX", temp+X, 8); */
154 /* PRINTHEX("QY", temp+Y, 8); */
155 /* PRINTHEX("QZ", temp+Z, 8); */
156  int current_bit = (secret[i / 32] >> (i % 32)) & 0x1; /* ((secret[i / 32]) & ((uint32_t)1 << (i % 32))); */
157  ecc_replace(current_bit, Q, temp);
158 /* PRINTHEX("QX", Q+X, 8); */
159 /* PRINTHEX("QY", Q+Y, 8); */
160 /* PRINTHEX("QZ", Q+Z, 8); */
161  }
162 /* PRINTHEX("QX", Q+X, 8); */
163 /* PRINTHEX("QY", Q+Y, 8); */
164 /* PRINTHEX("QZ", Q+Z, 8); */
165  ecc_field_Inv(temp, Q + Z);
166  ecc_field_Mult(resultx, Q + X, temp);
167  ecc_field_Mult(resulty, Q + Y, temp);
168  PRINTHEX("RX", resultx, 8);
169  PRINTHEX("RY", resulty, 8);
170 }
171 /* private functions ------------------------------------------------------- */
172 
173 static void
174 ecc_setZero(uint32_t *a)
175 {
176  asm volatile (
177  "mov r1, $0 \n\t"
178  "mov r2, r1 \n\t"
179  "mov r3, r2 \n\t"
180  "mov r4, r3 \n\t"
181  "stm %[a]!, {r1-r4} \n\t"
182  "stm %[a]!, {r1-r4} \n\t"
183  : /* out */
184  : /* in */
185  [a] "l" (a)
186  : /* clobber list */
187  "r1", "r2", "r3", "r4", "memory"
188  );
189 }
190 /*
191  * copy one array to another
192  */
193 static void
194 ecc_copy(uint32_t *dst, const uint32_t *src)
195 {
196  asm volatile (
197  "ldm %[s]!, {r2-r5} \n\t"
198  "stm %[d]!, {r2-r5} \n\t"
199  "ldm %[s]!, {r2-r5} \n\t"
200  "stm %[d]!, {r2-r5} \n\t"
201  : /* out */
202  : /* in */
203  [d] "l" (dst),
204  [s] "l" (src)
205  : /* clobber list */
206  "r2", "r3", "r4", "r5", "memory"
207  );
208 }
209 static uint32_t
210 ecc_isX(const uint32_t *a, const uint32_t x)
211 {
212  uint32_t r = (a[0] == x);
213  uint32_t n = 8;
214  while(--n) {
215  r &= (a[n] == 0);
216  }
217  return r;
218 }
219 static void
220 ecc_rshift(uint32_t *a)
221 {
222  uint32_t index = 32;
223  uint32_t carry = 0;
224 
225  asm volatile (
226  "0: \n\t"
227  "sub %[i], %[i], #4 \n\t" /* index -= 4 */
228  "mov r4, %[c] \n\t" /* result = carry */
229  "ldr r3, [%[a],%[i]] \n\t" /* value = a[index] */
230  "lsl %[c], r3, #31 \n\t" /* carry = value << 31 */
231  "lsr r3, r3, #1 \n\t" /* value >>= 1 */
232  "orr r4, r4, r3 \n\t" /* result |= value */
233  "str r4, [%[a],%[i]] \n\t" /* a[index] = result */
234  "cmp %[i], $0 \n\t" /* index == 0 */
235  "bne 0b \n\t" /* != ? next loop */
236  : /* out */
237  : /* in */
238  [a] "r" (a),
239  [i] "r" (index),
240  [c] "r" (carry)
241  : /* clobber list */
242  "r3", "r4", "memory"
243  );
244 }
245 static void
246 ecc_replace(uint32_t bit, uint32_t *dst, uint32_t *src)
247 {
248  bit = -bit;
249  int i;
250  for(i = 0; i < 24; i++) {
251  dst[i] ^= (bit & (dst[i] ^ src[i]));
252  }
253 }
254 static uint32_t
255 ecc_add(uint32_t *result, const uint32_t *a, const uint32_t *b)
256 {
257  uint32_t carry;
258 
259  asm volatile (
260  "ldm %[x]!, {r4,r5} \n\t"
261  "ldm %[y]!, {r6,r7} \n\t"
262  "add r4, r4, r6 \n\t"
263  "adc r5, r5, r7 \n\t"
264  "stm %[r]!, {r4,r5} \n\t"
265  "ldm %[x]!, {r4,r5} \n\t"
266  "ldm %[y]!, {r6,r7} \n\t"
267  "adc r4, r4, r6 \n\t"
268  "adc r5, r5, r7 \n\t"
269  "stm %[r]!, {r4,r5} \n\t"
270  "ldm %[x]!, {r4,r5} \n\t"
271  "ldm %[y]!, {r6,r7} \n\t"
272  "adc r4, r4, r6 \n\t"
273  "adc r5, r5, r7 \n\t"
274  "stm %[r]!, {r4,r5} \n\t"
275  "ldm %[x]!, {r4,r5} \n\t"
276  "ldm %[y]!, {r6,r7} \n\t"
277  "adc r4, r4, r6 \n\t"
278  "adc r5, r5, r7 \n\t"
279  "stm %[r]!, {r4,r5} \n\t"
280  "bcc 0f \n\t"
281  "mov %[c], #1 \n\t"
282  "b 1f \n\t"
283  "0: \n\t"
284  "mov %[c], $0 \n\t"
285  "1: \n\t"
286  : /* out */
287  [c] "=l" (carry)
288  : /* in */
289  [x] "l" (a),
290  [y] "l" (b),
291  [r] "l" (result)
292  : /* clobber list */
293  "r4", "r5", "r6", "r7", "memory"
294  );
295 
296  return carry;
297 }
298 static uint32_t
299 ecc_sub(uint32_t *result, const uint32_t *a, const uint32_t *b)
300 {
301  uint32_t carry;
302 
303  asm volatile (
304  "ldm %[x]!, {r4,r5} \n\t"
305  "ldm %[y]!, {r6,r7} \n\t"
306  "sub r4, r4, r6 \n\t"
307  "sbc r5, r5, r7 \n\t"
308  "stm %[r]!, {r4,r5} \n\t"
309  "ldm %[x]!, {r4,r5} \n\t"
310  "ldm %[y]!, {r6,r7} \n\t"
311  "sbc r4, r4, r6 \n\t"
312  "sbc r5, r5, r7 \n\t"
313  "stm %[r]!, {r4,r5} \n\t"
314  "ldm %[x]!, {r4,r5} \n\t"
315  "ldm %[y]!, {r6,r7} \n\t"
316  "sbc r4, r4, r6 \n\t"
317  "sbc r5, r5, r7 \n\t"
318  "stm %[r]!, {r4,r5} \n\t"
319  "ldm %[x]!, {r4,r5} \n\t"
320  "ldm %[y]!, {r6,r7} \n\t"
321  "sbc r4, r4, r6 \n\t"
322  "sbc r5, r5, r7 \n\t"
323  "stm %[r]!, {r4,r5} \n\t"
324  "bcs 0f \n\t"
325  "mov %[c], #1 \n\t"
326  "b 1f \n\t"
327  "0: \n\t"
328  "mov %[c], $0 \n\t"
329  "1: \n\t"
330  : /* out */
331  [c] "=l" (carry)
332  : /* in */
333  [x] "l" (a),
334  [y] "l" (b),
335  [r] "l" (result)
336  : /* clobber list */
337  "r4", "r5", "r6", "r7", "memory"
338  );
339 
340  return carry;
341 }
342 static void
343 ecc_mult(uint32_t *result, const uint32_t *x, const uint32_t *y, const uint32_t length)
344 {
345  if(length == 1) {
346  /* Version 1: 56 Byte bigger as ASM-Version */
347  /* uint64_t *r = (uint64_t *) result; */
348  /* *r = (uint64_t) x[0] * (uint64_t) y[0]; */
349 
350  /* Version 2: 56 Byte lesser as Version 1 but same speed */
351  asm volatile (
352  "ldrh r5, [%[x], $0] \n\t" /* r5 = (x[0] & 0x0000FFFF) */
353  "ldrh r3, [%[y], $0] \n\t" /* r3 = (y[0] & 0x0000FFFF) */
354  "mul r5, r3 \n\t" /* r5 *= r3 r5 = AB[0] */
355  "ldrh r6, [%[x], #2] \n\t" /* r6 = (x[0] >> 16) */
356  "mul r3, r6 \n\t" /* r3 *= r6 r3 = C[0] */
357  "ldrh r4, [%[y], #2] \n\t" /* r4 = (y[0] >> 16) */
358  "mul r6, r4 \n\t" /* r6 *= r4 r6 = AB[1] */
359  /* %[y] is not longer needed - its called ry now */
360  "ldrh %[y], [%[x], $0] \n\t" /* ry = (x[0] & 0x0000FFFF) */
361  "mul r4, %[y] \n\t" /* r4 *= ry r4 = C[1] */
362  "add %[y], r3, r4 \n\t" /* ry = r3 + r4 ry = C[0] + C[1] */
363  /* C[1] (r4) is not longer needed */
364  "mov r4, $0 \n\t" /* r4 = 0 */
365  "bcc 0f \n\t" /* jump if carry clear */
366  "mov r4, #1 \n\t" /* r4 = 1 */
367  "lsl r4, r4, #16 \n\t" /* r4 <<= 16 */
368  "0: \n\t" /* r4 = 0x000c0000 = (carry << 16) */
369  "lsr r3, %[y], #16 \n\t" /* r3 = (ry >> 16) */
370  "orr r4, r4, r3 \n\t" /* r4 |= r3 r4 = 0x000c'ryh' = (r4 | ry >> 16) */
371  "lsl r3, %[y], #16 \n\t" /* r3 = (ry << 16) r3 = 0x'ryl'0000 = (ry << 16) */
372  "add r3, r3, r5 \n\t"
373  "adc r4, r4, r6 \n\t"
374  "stm %[r]!, {r3, r4} \n\t"
375  : /* out */
376  : /* in */
377  [x] "l" (x),
378  [y] "l" (y),
379  [r] "l" (result)
380  : /* clobber list */
381  "r3", "r4", "r5", "r6", "memory"
382  );
383  } else {
384  uint32_t carry;
385  uint32_t C[length * 2];
386  ecc_mult(result, x, y, length / 2);
387  ecc_mult(result + length, x + (length / 2), y + (length / 2), length / 2);
388  ecc_mult(C, x, y + (length / 2), length / 2);
389  ecc_mult(C + length, x + (length / 2), y, length / 2);
390  if(length == 8) {
391  carry = ecc_add(C, C, C + length);
392  } else {
393  asm volatile (
394  "cmp %[l], #2 \n\t"
395  "beq .add2 \n\t"
396  /* ASM for: ecc_add(C, C, C + 4, 4); */
397  "mov %[l], %[a] \n\t"
398  "ldm %[a]!, {r3-r6} \n\t"
399  "ldm %[a]!, {r5,r6} \n\t"
400  "sub %[a], %[a], #16 \n\t"
401  "add r3, r3, r5 \n\t"
402  "adc r4, r4, r6 \n\t"
403  "stm %[l]!, {r3,r4} \n\t"
404  "ldm %[a]!, {r3-r6} \n\t"
405  "ldm %[a]!, {r5,r6} \n\t"
406  "adc r3, r3, r5 \n\t"
407  "adc r4, r4, r6 \n\t"
408  "stm %[l]!, {r3,r4} \n\t"
409  "b 0f \n\t"
410  ".add2: \n\t"
411  /* ASM for: ecc_add(C, C, C + 2, 2); */
412  "ldm %[a]!, {r3-r6} \n\t"
413  "sub %[a], %[a], #16 \n\t"
414  "add r3, r3, r5 \n\t"
415  "adc r4, r4, r6 \n\t"
416  "stm %[a]!, {r3,r4} \n\t"
417  "0: \n\t"
418  "bcc 1f \n\t"
419  "mov %[c], #1 \n\t"
420  "b 2f \n\t"
421  "1: \n\t"
422  "mov %[c], $0 \n\t"
423  "2: \n\t"
424  : /* out */
425  [c] "=l" (carry)
426  : /* in */
427  [a] "l" (C),
428  [l] "l" (length)
429  : /* clobber list */
430  "r3", "r4", "r5", "r6", "memory"
431  );
432  } C[length] = carry;
433  asm volatile (
434  "cmp %[l], #2 \n\t"
435  "beq .add3 \n\t"
436  "cmp %[l], #4 \n\t"
437  "beq .add6 \n\t"
438  ".add12: \n\t"
439  /* ASM for: ecc_add(result + 4, result + 4, C, 12); */
440  /* RRRRRRRRRRRRRRRR */
441  /* + CCCCCCCCC000 */
442  /* = RRRRRRRRRRRRRRRR */
443  "add %[r], %[r], #16 \n\t"
444  "mov %[l], %[r] \n\t"
445  "ldm %[r]!, {r3,r4} \n\t"
446  "ldm %[c]!, {r5,r6} \n\t"
447  "add r3, r3, r5 \n\t"
448  "adc r4, r4, r6 \n\t"
449  "stm %[l]!, {r3,r4} \n\t"
450  "ldm %[r]!, {r3,r4} \n\t"
451  "ldm %[c]!, {r5,r6} \n\t"
452  "adc r3, r3, r5 \n\t"
453  "adc r4, r4, r6 \n\t"
454  "stm %[l]!, {r3,r4} \n\t"
455  "ldm %[r]!, {r3,r4} \n\t"
456  "ldm %[c]!, {r5,r6} \n\t"
457  "adc r3, r3, r5 \n\t"
458  "adc r4, r4, r6 \n\t"
459  "stm %[l]!, {r3,r4} \n\t"
460  "ldm %[r]!, {r3,r4} \n\t"
461  "ldm %[c]!, {r5,r6} \n\t"
462  "adc r3, r3, r5 \n\t"
463  "adc r4, r4, r6 \n\t"
464  "stm %[l]!, {r3,r4} \n\t"
465  "ldm %[r]!, {r3,r4} \n\t"
466  "ldm %[c]!, {r5} \n\t"
467  "mov r6, $0 \n\t"
468  "adc r3, r3, r5 \n\t"
469  "adc r4, r4, r6 \n\t"
470  "stm %[l]!, {r3,r4} \n\t"
471  "ldm %[r]!, {r3,r4} \n\t"
472  "adc r3, r3, r6 \n\t"
473  "adc r4, r4, r6 \n\t"
474  "stm %[l]!, {r3,r4} \n\t"
475  "b 0f \n\t"
476  ".add6: \n\t"
477  /* ASM for: ecc_add(result + 2, result + 2, C, 6); */
478  /* RRRRRRRR */
479  /* + CCCCC0 */
480  /* = RRRRRRRR */
481  "add %[r], %[r], #8 \n\t"
482  "mov %[l], %[r] \n\t"
483  "ldm %[r]!, {r3,r4} \n\t"
484  "ldm %[c]!, {r5,r6} \n\t"
485  "add r3, r3, r5 \n\t"
486  "adc r4, r4, r6 \n\t"
487  "stm %[l]!, {r3,r4} \n\t"
488  "ldm %[r]!, {r3,r4} \n\t"
489  "ldm %[c]!, {r5,r6} \n\t"
490  "adc r3, r3, r5 \n\t"
491  "adc r4, r4, r6 \n\t"
492  "stm %[l]!, {r3,r4} \n\t"
493  "ldm %[r]!, {r3,r4} \n\t"
494  "ldm %[c]!, {r5} \n\t"
495  "mov r6, $0 \n\t"
496  "adc r3, r3, r5 \n\t"
497  "adc r4, r4, r6 \n\t"
498  "stm %[l]!, {r3,r4} \n\t"
499  "b 0f \n\t"
500  ".add3: \n\t"
501  /* ASM for: ecc_add(result + 1, result + 1, C, 3); */
502  /* RRRR */
503  /* + CCC */
504  /* = RRRR */
505  "add %[r], %[r], #4 \n\t"
506  "mov %[l], %[r] \n\t"
507  "ldm %[r]!, {r3,r4} \n\t"
508  "ldm %[c]!, {r5,r6} \n\t"
509  "add r3, r3, r5 \n\t"
510  "adc r4, r4, r6 \n\t"
511  "ldr r5, [%[r], $0] \n\t"
512  "ldr r6, [%[c], $0] \n\t"
513  "adc r5, r5, r6 \n\t"
514  "stm %[l]!, {r3-r5} \n\t"
515  "0: \n\t"
516  : /* out */
517  : /* in */
518  [r] "l" (result),
519  [c] "l" (C),
520  [l] "l" (length)
521  : /* clobber list */
522  "r3", "r4", "r5", "r6", "memory"
523  );
524  }
525 }
526 /*---------------------------------------------------------------------------*/
527 
528 __attribute__((always_inline)) static void
529 ecc_form_s1(uint32_t *dst, const uint32_t *src)
530 {
531  /* 0, 0, 0, src[11], src[12], src[13], src[14], src[15] */
532  asm volatile (
533  "mov r2, $0 \n\t"
534  "mov r3, r2 \n\t"
535  "mov r4, r3 \n\t"
536  "stm %[d]!, {r2-r4} \n\t"
537  "add %[s], #44 \n\t"
538  "ldm %[s]!, {r2-r6} \n\t"
539  "stm %[d]!, {r2-r6} \n\t"
540  : /* out */
541  [d] "+l" (dst),
542  [s] "+l" (src)
543  : /* in */
544  : /* clobber list */
545  "r2", "r3", "r4", "r5", "r6", "memory"
546  );
547 }
548 __attribute__((always_inline)) static void
549 ecc_form_s2(uint32_t *dst, const uint32_t *src)
550 {
551  /* 0, 0, 0, src[12], src[13], src[14], src[15], 0 */
552  asm volatile (
553  "mov r2, $0 \n\t"
554  "mov r3, r2 \n\t"
555  "mov r4, r3 \n\t"
556  "stm %[d]!, {r2-r4} \n\t"
557  "add %[s], #48 \n\t"
558  "ldm %[s]!, {r2-r5} \n\t"
559  "stm %[d]!, {r2-r5} \n\t"
560  "mov r2, $0 \n\t"
561  "stm %[d]!, {r2} \n\t"
562  : /* out */
563  [d] "+l" (dst),
564  [s] "+l" (src)
565  : /* in */
566  : /* clobber list */
567  "r2", "r3", "r4", "r5", "memory"
568  );
569 }
570 __attribute__((always_inline)) static void
571 ecc_form_s3(uint32_t *dst, const uint32_t *src)
572 {
573  /* src[8], src[9], src[10], 0, 0, 0, src[14], src[15] */
574  asm volatile (
575  "add %[s], #32 \n\t"
576  "ldm %[s]!, {r2-r4} \n\t"
577  "mov r5, $0 \n\t"
578  "stm %[d]!, {r2-r5} \n\t"
579  "mov r2, r5 \n\t"
580  "mov r3, r2 \n\t"
581  "add %[s], #12 \n\t"
582  "ldm %[s]!, {r4,r5} \n\t"
583  "stm %[d]!, {r2-r5} \n\t"
584  : /* out */
585  [d] "+l" (dst),
586  [s] "+l" (src)
587  : /* in */
588  : /* clobber list */
589  "r2", "r3", "r4", "r5", "memory"
590  );
591 }
592 __attribute__((always_inline)) static void
593 ecc_form_s4(uint32_t *dst, const uint32_t *src)
594 {
595  /* src[9], src[10], src[11], src[13], src[14], src[15], src[13], src[8] */
596  asm volatile (
597  "add %[s], #32 \n\t"
598  "ldm %[s]!, {r2-r5} \n\t"
599  "stm %[d]!, {r3-r5} \n\t"
600  "add %[s], #4 \n\t"
601  "ldm %[s]!, {r3-r5} \n\t"
602  "stm %[d]!, {r3-r5} \n\t"
603  "mov r4, r2 \n\t"
604  "stm %[d]!, {r3,r4} \n\t"
605  : /* out */
606  [d] "+l" (dst),
607  [s] "+l" (src)
608  : /* in */
609  : /* clobber list */
610  "r2", "r3", "r4", "r5", "memory"
611  );
612 }
613 __attribute__((always_inline)) static void
614 ecc_form_d1(uint32_t *dst, const uint32_t *src)
615 {
616  /* src[11], src[12], src[13], 0, 0, 0, src[8], src[10] */
617  asm volatile (
618  "add %[s], #32 \n\t"
619  "ldm %[s]!, {r2-r7} \n\t"
620  "stm %[d]!, {r5-r7} \n\t"
621  "mov r3, $0 \n\t"
622  "mov r5, r3 \n\t"
623  "mov r6, r5 \n\t"
624  "stm %[d]!, {r3,r5,r6} \n\t"
625  "stm %[d]!, {r2,r4} \n\t"
626  : /* out */
627  [d] "+l" (dst),
628  [s] "+l" (src)
629  : /* in */
630  : /* clobber list */
631  "r2", "r3", "r4", "r5", "r6", "r7", "memory"
632  );
633 }
634 __attribute__((always_inline)) static void
635 ecc_form_d2(uint32_t *dst, const uint32_t *src)
636 {
637  /* src[12], src[13], src[14], src[15], 0, 0, src[9], src[11] */
638  asm volatile (
639  "add %[s], #48 \n\t"
640  "ldm %[s]!, {r2-r5} \n\t"
641  "stm %[d]!, {r2-r5} \n\t"
642  "sub %[s], #28 \n\t"
643  "ldm %[s]!, {r4-r6} \n\t"
644  "mov r2, $0 \n\t"
645  "mov r3, r2 \n\t"
646  "stm %[d]!, {r2-r4,r6} \n\t"
647  : /* out */
648  [d] "+l" (dst),
649  [s] "+l" (src)
650  : /* in */
651  : /* clobber list */
652  "r2", "r3", "r4", "r5", "r6", "memory"
653  );
654 }
655 __attribute__((always_inline)) static void
656 ecc_form_d3(uint32_t *dst, const uint32_t *src)
657 {
658  /* src[13], src[14], src[15], src[8], src[9], src[10], 0, src[12] */
659  asm volatile (
660  "add %[s], #52 \n\t"
661  "ldm %[s]!, {r2-r4} \n\t"
662  "stm %[d]!, {r2-r4} \n\t"
663  "sub %[s], #32 \n\t"
664  "ldm %[s]!, {r2-r6} \n\t"
665  "mov r5, $0 \n\t"
666  "stm %[d]!, {r2-r6} \n\t"
667  : /* out */
668  [d] "+l" (dst),
669  [s] "+l" (src)
670  : /* in */
671  : /* clobber list */
672  "r2", "r3", "r4", "r5", "r6", "memory"
673  );
674 }
675 __attribute__((always_inline)) static void
676 ecc_form_d4(uint32_t *dst, const uint32_t *src)
677 {
678  /* src[14], src[15], 0, src[9], src[10], src[11], 0, src[13] */
679  asm volatile (
680  "add %[s], #56 \n\t"
681  "ldm %[s]!, {r2,r3} \n\t"
682  "mov r4, $0 \n\t"
683  "stm %[d]!, {r2-r4} \n\t"
684  "sub %[s], #28 \n\t"
685  "ldm %[s]!, {r2-r6} \n\t"
686  "mov r5, $0 \n\t"
687  "stm %[d]!, {r2-r6} \n\t"
688  : /* out */
689  [d] "+l" (dst),
690  [s] "+l" (src)
691  : /* in */
692  : /* clobber list */
693  "r2", "r3", "r4", "r5", "r6", "memory"
694  );
695 }
696 /*---------------------------------------------------------------------------*/
697 
698 static void
699 ecc_field_Add(uint32_t *result, const uint32_t *x, const uint32_t *y)
700 {
701  uint32_t temp[8];
702  uint32_t carry = -ecc_add(result, x, y);
703  ecc_add(temp, result, ecc_prime_r);
704 
705  int i;
706  for(i = 0; i < 8; i++) {
707  result[i] ^= (carry & (result[i] ^ temp[i]));
708  }
709 }
710 static void
711 ecc_field_Sub(uint32_t *result, const uint32_t *x, const uint32_t *y)
712 {
713  uint32_t temp[8];
714  uint32_t carry = -ecc_sub(result, x, y);
715  ecc_add(temp, result, ecc_prime_m);
716 
717  int i;
718  for(i = 0; i < 8; i++) {
719  result[i] ^= (carry & (result[i] ^ temp[i]));
720  }
721 }
722 static void
723 ecc_field_ModP(uint32_t *result, const uint32_t *T)
724 {
725  uint32_t SX_o_DX[8];
726  ecc_copy(result, T); /* result = T */
727 
728  ecc_form_s1(SX_o_DX, T); /* Form S1 */
729  ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 */
730  ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 + S1 */
731 
732  ecc_form_s2(SX_o_DX, T); /* Form S2 */
733  ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 */
734  ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 */
735 
736  ecc_form_s3(SX_o_DX, T); /* Form S3 */
737  ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 */
738 
739  ecc_form_s4(SX_o_DX, T); /* Form S4 */
740  ecc_field_Add(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 + S4 */
741 
742  ecc_form_d1(SX_o_DX, T); /* Form D1 */
743  ecc_field_Sub(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 + S4 - D1 */
744 
745  ecc_form_d2(SX_o_DX, T); /* Form D2 */
746  ecc_field_Sub(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 + S4 - D1 - D2 */
747 
748  ecc_form_d3(SX_o_DX, T); /* Form D3 */
749  ecc_field_Sub(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 + S4 - D1 - D2 - D3 */
750 
751  ecc_form_d4(SX_o_DX, T); /* Form D4 */
752  ecc_field_Sub(result, result, SX_o_DX); /* result = T + S1 + S1 + S2 + S2 + S3 + S4 - D1 - D2 - D3 - D4 */
753 
754  if(ecc_compare(result, ecc_prime_m) >= 0) {
755  ecc_field_Sub(result, result, ecc_prime_m);
756  }
757 }
758 static void
759 ecc_field_Mult(uint32_t *result, const uint32_t *A, const uint32_t *B)
760 {
761  uint32_t product[16];
762  ecc_mult(product, A, B, 8);
763  ecc_field_ModP(result, product);
764 }
765 static void
766 ecc_field_Inv(uint32_t *result, const uint32_t *A)
767 {
768  PRINTHEX("Input", A, 8);
769 
770  ecc_setZero(result);
771  result[0] = 0x00000001;
772  int i;
773  for(i = 255; i >= 0; --i) {
774  ecc_field_Mult(result, result, result);
775  if(((ecc_prime_m[i / 32] >> (i % 32)) & 0x1) == 1 && i != 1) {
776  ecc_field_Mult(result, result, A);
777  }
778  }
779 
780  PRINTHEX("Result", result, 8);
781 }
782 /*---------------------------------------------------------------------------*/
783 
784 static void
785 ecc_projective_double(uint32_t *val)
786 {
787  /* Algorithm taken from https://hyperelliptic.org/EFD/g1p/auto-shortw-projective-3.html#doubling-dbl-2007-bl-2 */
788  /* w = 3*(X1-Z1)*(X1+Z1) */
789  /* s = 2*Y1*Z1 */
790  /* ss = s^2 */
791  /* sss = s*ss */
792  /* R = Y1*s */
793  /* RR = R^2 */
794  /* B = 2*X1*R */
795  /* h = w^2-2*B */
796  /* X3 = h*s */
797  /* Y3 = w*(B-h)-2*RR */
798  /* Z3 = sss */
799 
800  uint32_t temp[24];
801  uint32_t w[8];
802  uint32_t s[8];
803  uint32_t B[8];
804  uint32_t h[8];
805 
806  uint8_t is_zero = ecc_isX(val + X, 0) & ecc_isX(val + Y, 0) & ecc_isX(val + Z, 1);
807 
808  ecc_field_Sub(temp + X, val + X, val + Z);
809  ecc_field_Add(temp + Y, val + X, val + Z);
810  ecc_field_Mult(temp + Z, temp + X, temp + Y);
811  ecc_field_Add(temp + X, temp + Z, temp + Z);
812  ecc_field_Add(w, temp + Z, temp + X);
813  ecc_field_Mult(temp + X, val + Y, val + Z);
814  ecc_field_Add(s, temp + X, temp + X);
815  ecc_field_Mult(temp + X, s, s);
816  ecc_field_Mult(val + Z, s, temp + X);
817  ecc_field_Mult(temp + X, val + Y, s); /* temp = R */
818  ecc_field_Mult(temp + Z, temp + X, temp + X); /* temp3 = RR */
819  ecc_field_Mult(temp + Y, val + X, temp + X); /* temp2 = R*x */
820  ecc_field_Add(B, temp + Y, temp + Y); /* B = 2*R*x */
821  ecc_field_Mult(temp + X, w, w);
822  ecc_field_Add(temp + Y, B, B);
823  ecc_field_Sub(h, temp + X, temp + Y);
824  ecc_field_Mult(val + X, h, s);
825  ecc_field_Sub(temp + X, B, h);
826  ecc_field_Mult(temp + Y, w, temp + X);
827  ecc_field_Add(temp + Z, temp + Z, temp + Z); /* temp3 = 2*RR */
828  ecc_field_Sub(val + Y, temp + Y, temp + Z);
829  /* finished, now swap the result if necessary */
830 
831  ecc_setZero(temp + X);
832  ecc_setZero(temp + Y);
833  ecc_setZero(temp + Z);
834  (temp + Z)[0] = 0x00000001;
835 
836  ecc_replace(is_zero, val, temp);
837 }
838 static void
839 ecc_projective_add(uint32_t *result, const uint32_t *val_1, const uint32_t *x_2, const uint32_t *y_2, const uint32_t *z_2)
840 {
841 /* algorithm taken from https://hyperelliptic.org/EFD/g1p/auto-shortw-projective-3.html#addition-add-1998-cmo-2 */
842 /* X Z X Y U Y */
843 /* 1 1 2 2 U UU V 1V */
844 /* Z R Z ZVZ R UZZ VR ZY */
845 /* VX 2RAY 2Z 1V1UA UZV V2A 2Z */
846 /* Y1Z2 = Y1*Z2 | */
847 /* X2Z1 = X2*Z1 | | */
848 /* X1Z2 = X1*Z2 | | | */
849 /* V = X2Z1-X1Z2 | x x | */
850 /* VV = V^2 x | | | */
851 /* R = VV*X1Z2 | x| x | */
852 /* VVV = V*VV x | x | | */
853 /* Y2Z1 = Y2*Z1 | | | | | */
854 /* U = Y2Z1-Y1Z2 | | x| | x */
855 /* UU = U^2 | | x | | | */
856 /* Z1Z2 = Z1*Z2 | | | | | | | */
857 /* UUZZ = UU*Z1Z2 | | x | x| | | */
858 /* UZV = UUZZ-VVV | | | | x| x | */
859 /* Z = VVV*Z1Z2 | | x| | | x | */
860 /* VYZ = VVV*Y1Z2 | | | | | x x| */
861 /* R2 = 2*R | x | | | | | */
862 /* A = UZV-2R | | | | x x| | */
863 /* X = V*A x| | | | x | */
864 /* RA = R-A | x| | | x | */
865 /* URA = U*RA | x | x| | */
866 /* Y = URA-VYZ | | | x x */
867 
868  uint32_t temp[32];
869  #define X1 val_1 + X
870  #define Y1 val_1 + Y
871  #define Z1 val_1 + Z
872  #define X2 x_2
873  #define Y2 y_2
874  #define Z2 z_2
875  #define V result + X
876  #define X1Z2 result + Y
877  #define R result + Y
878  #define RA result + Y
879  #define Z1Z2 result + Z
880  #define X2Z1 temp + X
881  #define VV temp + X
882  #define Y2Z1 temp + X
883  #define U temp + X
884  #define URA temp + X
885  #define UU temp + Y
886  #define UUZZ temp + Y
887  #define UZV temp + Y
888  #define VVV temp + Z
889  #define R2 temp + Z
890  #define A temp + Z
891  #define Y1Z2 temp + 24
892  #define VYZ temp + 24
893 
894  uint8_t is_input1_zero = ecc_isX(val_1 + X, 0) & ecc_isX(val_1 + Y, 0) & ecc_isX(val_1 + Z, 1);
895  uint8_t is_input2_zero = ecc_isX(x_2, 0) & ecc_isX(y_2, 0) & ecc_isX(z_2, 1);
896 
897  ecc_copy(temp + X, x_2);
898  ecc_copy(temp + Y, y_2);
899  ecc_copy(temp + Z, z_2);
900  ecc_replace(is_input1_zero, result, temp);
901 
902  ecc_copy(temp + X, val_1 + X);
903  ecc_copy(temp + Y, val_1 + Y);
904  ecc_copy(temp + Z, val_1 + Z);
905  ecc_replace(is_input2_zero, result, temp);
906 
907  /* invalidate the result pointer */
908  result = (uint32_t *)((uintptr_t)result ^ (-(is_input2_zero | is_input1_zero) & ((uintptr_t)result ^ (uintptr_t)temp)));
909 
910  ecc_field_Mult(Y1Z2, Y1, Z2);
911  ecc_field_Mult(X2Z1, X2, Z1);
912  ecc_field_Mult(X1Z2, X1, Z2);
913  ecc_field_Sub(V, X2Z1, X1Z2);
914  ecc_field_Mult(VV, V, V);
915  ecc_field_Mult(R, VV, X1Z2);
916  ecc_field_Mult(VVV, V, VV);
917  ecc_field_Mult(Y2Z1, Y2, Z1);
918  ecc_field_Sub(U, Y2Z1, Y1Z2);
919  ecc_field_Mult(UU, U, U);
920  ecc_field_Mult(Z1Z2, Z1, Z2);
921  ecc_field_Mult(UUZZ, UU, Z1Z2);
922  ecc_field_Sub(UZV, UUZZ, VVV);
923  ecc_field_Mult(result + Z, VVV, Z1Z2);
924  ecc_field_Mult(VYZ, VVV, Y1Z2);
925  ecc_field_Add(R2, R, R);
926  ecc_field_Sub(A, UZV, R2);
927  ecc_field_Mult(result + X, V, A);
928  ecc_field_Sub(RA, R, A);
929  ecc_field_Mult(URA, U, RA);
930  ecc_field_Sub(result + Y, URA, VYZ);
931 }
932 /*---------------------------------------------------------------------------*/
933 
934 #if SELF_TEST
935 static void
936 assertTrue(uint32_t value, const char *msg)
937 {
938  if(!value) {
939  printf("%s\n", msg);
940  }
941 }
942 static void
943 assertFalse(uint32_t value, const char *msg)
944 {
945  if(value) {
946  printf("%s\n", msg);
947  }
948 }
949 static void
950 assertSame(uint32_t *val_1, uint32_t *val_2, const char *msg)
951 {
952  if(ecc_compare(val_1, val_2)) {
953  printf("%s\n", msg);
954  }
955 }
956 static void
957 selfTest()
958 {
959  uint32_t num_000[8] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
960  uint32_t num_001[8] = { 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
961  uint32_t num_002[8] = { 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
962  uint32_t num_004[8] = { 0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
963  uint32_t num_max[8] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
964  uint32_t primeMinusOne[8] = { 0xfffffffe, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0xffffffff };
965 
966  uint32_t result[8];
967 
968  /* ecc_compare */
969  assertFalse(ecc_compare(num_001, num_001), "ecc_compare 1: Wrong result!");
970  assertTrue(ecc_compare(num_000, num_001) == -1, "ecc_compare 2: Wrong result!");
971  assertTrue(ecc_compare(num_001, num_000) == 1, "ecc_compare 3: Wrong result!");
972 
973  /* ecc_isX */
974  assertTrue(ecc_isX(num_000, 0), "ecc_isX 1: Wrong result!");
975  assertTrue(ecc_isX(num_001, 1), "ecc_isX 2: Wrong result!");
976  assertTrue(ecc_isX(num_002, 2), "ecc_isX 3: Wrong result!");
977  assertTrue(ecc_isX(num_004, 4), "ecc_isX 4: Wrong result!");
978  assertFalse(ecc_isX(num_000, 1), "ecc_isX 5: Wrong result!");
979  assertFalse(ecc_isX(num_000, 2), "ecc_isX 6: Wrong result!");
980  assertFalse(ecc_isX(num_000, 4), "ecc_isX 7: Wrong result!");
981  assertFalse(ecc_isX(num_001, 0), "ecc_isX 8: Wrong result!");
982  assertFalse(ecc_isX(num_001, 2), "ecc_isX 9: Wrong result!");
983  assertFalse(ecc_isX(num_001, 4), "ecc_isX 10: Wrong result!");
984  assertFalse(ecc_isX(num_002, 0), "ecc_isX 11: Wrong result!");
985  assertFalse(ecc_isX(num_002, 1), "ecc_isX 12: Wrong result!");
986  assertFalse(ecc_isX(num_002, 4), "ecc_isX 13: Wrong result!");
987  assertFalse(ecc_isX(num_004, 0), "ecc_isX 14: Wrong result!");
988  assertFalse(ecc_isX(num_004, 1), "ecc_isX 15: Wrong result!");
989  assertFalse(ecc_isX(num_004, 2), "ecc_isX 16: Wrong result!");
990 
991  /* ecc_add */
992  assertFalse(ecc_add(result, num_001, num_002), "ecc_add 1: Unexpected carrybit!");
993  assertFalse(ecc_add(result, result, num_001), "ecc_add 2: Unexpected carrybit!");
994  assertSame(result, num_004, "ecc_add 3: Wrong result!");
995  assertTrue(ecc_add(result, num_max, num_002), "ecc_add 4: Carrybit missing!");
996  assertSame(result, num_001, "ecc_add 5: Wrong result!");
997 
998  /* ecc_sub */
999  assertFalse(ecc_sub(result, num_004, num_002), "ecc_sub 1: Unexpected carrybit!");
1000  assertFalse(ecc_sub(result, result, num_001), "ecc_sub 2: Unexpected carrybit!");
1001  assertFalse(ecc_sub(result, result, num_001), "ecc_sub 3: Unexpected carrybit!");
1002  assertSame(result, num_000, "ecc_sub 4: Wrong result!");
1003  assertTrue(ecc_sub(result, num_000, num_001), "ecc_sub 5: Carrybit missing!");
1004  assertSame(result, num_max, "ecc_sub 6: Wrong result!");
1005 
1006  /* ecc_field_Sub */
1007  ecc_field_Sub(result, num_001, num_000);
1008  assertSame(num_001, result, "ecc_field_Sub 1: Wrong result!");
1009  ecc_field_Sub(result, num_001, num_001);
1010  assertSame(num_000, result, "ecc_field_Sub 2: Wrong result!");
1011  ecc_field_Sub(result, num_000, num_001);
1012  assertSame(primeMinusOne, result, "ecc_field_Sub 3: Wrong result!");
1013 
1014  printf("Tests completed!\n");
1015 }
1016 #endif
#define __attribute__(nothing)
Define attribute to nothing since it isn't handled by IAR.
Definition: iar.h:194
Calculations on elliptic curve secp256r1
int32_t ecc_compare(const uint32_t *a, const uint32_t *b)
Compares the value of a with the value of b.
Definition: ecc.c:112
void ecc_ec_mult(uint32_t *resultx, uint32_t *resulty, const uint32_t *px, const uint32_t *py, const uint32_t *secret)
ECC scalar multiplication on elliptic curve secp256r1.
Definition: ecc.c:124