From a828df5c34305616bb55bbdc9d854e0bf6e5d9ea Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Wed, 13 May 2015 21:16:13 -0500
Subject: [PATCH 01/13] tests: add a couple tests

  - Add zero/one sanity check tests for ecmult

  - Add unit test for secp256k1_scalar_split_lambda_var

  - Typo fix in `ge_equals_ge`; was comparing b->y to itself, should
    have been comparing a->y to b->y

  - Normalize y-coordinate in `random_group_element_test`; this is
    needed to pass random group elements as the first argument to
    `ge_equals_ge`, which I will do in a future commit.
---
 src/tests.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/tests.c b/src/tests.c
index bcd2d62a4a..6e958256d0 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -57,6 +57,7 @@ void random_group_element_test(secp256k1_ge_t *ge) {
     do {
         random_field_element_test(&fe);
         if (secp256k1_ge_set_xo_var(ge, &fe, secp256k1_rand32() & 1)) {
+            secp256k1_fe_normalize(&ge->y);
             break;
         }
     } while(1);
@@ -914,7 +915,7 @@ void ge_equals_ge(const secp256k1_ge_t *a, const secp256k1_ge_t *b) {
         return;
     }
     CHECK(secp256k1_fe_equal_var(&a->x, &b->x));
-    CHECK(secp256k1_fe_equal_var(&b->y, &b->y));
+    CHECK(secp256k1_fe_equal_var(&a->y, &b->y));
 }
 
 /* This compares jacobian points including their Z, not just their geometric meaning. */
@@ -1305,6 +1306,8 @@ void test_point_times_order(const secp256k1_gej_t *point) {
     /* X * (point + G) + (order-X) * (pointer + G) = 0 */
     secp256k1_scalar_t x;
     secp256k1_scalar_t nx;
+    secp256k1_scalar_t zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
+    secp256k1_scalar_t one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1);
     secp256k1_gej_t res1, res2;
     secp256k1_ge_t res3;
     unsigned char pub[65];
@@ -1322,6 +1325,16 @@ void test_point_times_order(const secp256k1_gej_t *point) {
     CHECK(secp256k1_eckey_pubkey_serialize(&res3, pub, &psize, 0) == 0);
     psize = 65;
     CHECK(secp256k1_eckey_pubkey_serialize(&res3, pub, &psize, 1) == 0);
+    /* check zero/one edge cases */
+    secp256k1_ecmult(&ctx->ecmult_ctx, &res1, point, &zero, &zero);
+    secp256k1_ge_set_gej(&res3, &res1);
+    CHECK(secp256k1_ge_is_infinity(&res3));
+    secp256k1_ecmult(&ctx->ecmult_ctx, &res1, point, &one, &zero);
+    secp256k1_ge_set_gej(&res3, &res1);
+    ge_equals_gej(&res3, point);
+    secp256k1_ecmult(&ctx->ecmult_ctx, &res1, point, &zero, &one);
+    secp256k1_ge_set_gej(&res3, &res1);
+    ge_equals_ge(&res3, &secp256k1_ge_const_g);
 }
 
 void run_point_times_order(void) {
@@ -1469,6 +1482,33 @@ void run_ecmult_gen_blind(void) {
     }
 }
 
+#ifdef USE_ENDOMORPHISM
+/***** ENDOMORPHISH TESTS *****/
+void test_scalar_split(void) {
+    secp256k1_scalar_t full;
+    secp256k1_scalar_t s1, slam;
+    const unsigned char zero[32] = {0};
+    unsigned char tmp[32];
+
+    random_scalar_order_test(&full);
+    secp256k1_scalar_split_lambda_var(&s1, &slam, &full);
+
+    /* check that both are <= 128 bits in size */
+    if (secp256k1_scalar_is_high(&s1))
+        secp256k1_scalar_negate(&s1, &s1);
+    if (secp256k1_scalar_is_high(&slam))
+        secp256k1_scalar_negate(&slam, &slam);
+
+    secp256k1_scalar_get_b32(tmp, &s1);
+    CHECK(memcmp(zero, tmp, 16) == 0);
+    secp256k1_scalar_get_b32(tmp, &slam);
+    CHECK(memcmp(zero, tmp, 16) == 0);
+}
+
+void run_endomorphism_tests(void) {
+    test_scalar_split();
+}
+#endif
 
 void random_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *key, const secp256k1_scalar_t *msg, int *recid) {
     secp256k1_scalar_t nonce;
@@ -2228,6 +2268,11 @@ int main(int argc, char **argv) {
     run_ecmult_constants();
     run_ecmult_gen_blind();
 
+    /* endomorphism tests */
+#ifdef USE_ENDOMORPHISM
+    run_endomorphism_tests();
+#endif
+
     /* ecdsa tests */
     run_random_pubkeys();
     run_ecdsa_sign_verify();

From 955774cef31d06bb6bebe6eb71572adb290964c5 Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Wed, 13 May 2015 17:31:47 -0500
Subject: [PATCH 02/13] Add constant-time `secp256k1_point_multiply` for ECDH

Designed with clear separation of the wNAF conversion, precomputation
and exponentiation (since the precomp at least we will probably want
to separate in the API for users who reuse points a lot.

Future work:
  - actually separate precomp in the API
  - do multiexp rather than single exponentiation
---
 Makefile.am            |   2 +
 src/ecdh.h             |  15 ++++
 src/ecdh_impl.h        | 116 +++++++++++++++++++++++++++
 src/scalar.h           |  11 +++
 src/scalar_4x64_impl.h |  28 +++++++
 src/scalar_8x32_impl.h |  41 ++++++++++
 src/scalar_impl.h      |   5 ++
 src/secp256k1.c        |   2 +
 src/tests.c            | 178 ++++++++++++++++++++++++++++++++++++++++-
 9 files changed, 397 insertions(+), 1 deletion(-)
 create mode 100644 src/ecdh.h
 create mode 100644 src/ecdh_impl.h

diff --git a/Makefile.am b/Makefile.am
index cc15338b7e..5adba36fe6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -13,6 +13,8 @@ noinst_HEADERS += src/group.h
 noinst_HEADERS += src/group_impl.h
 noinst_HEADERS += src/num_gmp.h
 noinst_HEADERS += src/num_gmp_impl.h
+noinst_HEADERS += src/ecdh.h
+noinst_HEADERS += src/ecdh_impl.h
 noinst_HEADERS += src/ecdsa.h
 noinst_HEADERS += src/ecdsa_impl.h
 noinst_HEADERS += src/eckey.h
diff --git a/src/ecdh.h b/src/ecdh.h
new file mode 100644
index 0000000000..7fbd9c9caf
--- /dev/null
+++ b/src/ecdh.h
@@ -0,0 +1,15 @@
+/**********************************************************************
+ * Copyright (c) 2015 Pieter Wuille, Andrew Poelstra                  *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECDH_
+#define _SECP256K1_ECDH_
+
+#include "scalar.h"
+#include "group.h"
+
+static void secp256k1_point_multiply(secp256k1_gej_t *r, const secp256k1_ge_t *a, const secp256k1_scalar_t *q);
+
+#endif
diff --git a/src/ecdh_impl.h b/src/ecdh_impl.h
new file mode 100644
index 0000000000..624ec76dbb
--- /dev/null
+++ b/src/ecdh_impl.h
@@ -0,0 +1,116 @@
+/**********************************************************************
+ * Copyright (c) 2015 Pieter Wuille, Andrew Poelstra                  *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECDH_IMPL_
+#define _SECP256K1_ECDH_IMPL_
+
+#include "scalar.h"
+#include "group.h"
+#include "ecdh.h"
+#include "ecmult_impl.h"
+
+#define WNAF_BITS 256
+#define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w))
+
+/** Convert a number to WNAF notation. The number becomes represented by sum(2^{wi} * wnaf[i], i=0..return_val)
+ *  with the following guarantees:
+ *  - each wnaf[i] an odd integer between -(1 << w) and (1 << w)
+ *  - each wnaf[i] is nonzero
+ *  - the number of words set is returned; this is always (256 + w - 1) / w
+ *
+ *  Adapted from `The Width-w NAF Method Provides Small Memory and Fast Elliptic Scalar
+ *  Multiplications Secure against Side Channel Attacks`, Okeya and Tagaki. M. Joye (Ed.)
+ *  CT-RSA 2003, LNCS 2612, pp. 328-443, 2003. Springer-Verlagy Berlin Heidelberg 2003
+ *
+ *  Numbers reference steps of `Algorithm SPA-resistant Width-w NAF with Odd Scalar` on pp. 335
+ */
+static void secp256k1_ecdh_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w) {
+    secp256k1_scalar_t s = *a;
+    /* Negate to force oddness */
+    int is_even = secp256k1_scalar_is_even(&s);
+    int global_sign = secp256k1_scalar_cond_negate(&s, is_even);
+
+    int word = 0;
+    /* 1 2 3 */
+    int u_last = secp256k1_scalar_shr_int(&s, w);
+    int u;
+    /* 4 */
+    while (word * w < WNAF_BITS) {
+        int sign;
+        int even;
+
+        /* 4.1 4.4 */
+        u = secp256k1_scalar_shr_int(&s, w);
+        /* 4.2 */
+        even = ((u & 1) == 0);
+        sign = 2 * (u_last > 0) - 1;
+        u += sign * even;
+        u_last -= sign * even * (1 << w);
+
+        /* 4.3, adapted for global sign change */
+        wnaf[word++] = u_last * global_sign;
+
+        u_last = u;
+    }
+    wnaf[word] = u * global_sign;
+
+    VERIFY_CHECK(secp256k1_scalar_is_zero(&s));
+    VERIFY_CHECK(word == WNAF_SIZE(w));
+}
+
+
+static void secp256k1_point_multiply(secp256k1_gej_t *r, const secp256k1_ge_t *a, const secp256k1_scalar_t *scalar) {
+    secp256k1_ge_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_ge_t tmpa;
+    secp256k1_fe_t Z;
+
+    int wnaf[1 + WNAF_SIZE(WINDOW_A - 1)];
+
+    int i;
+    int is_zero = secp256k1_scalar_is_zero(scalar);
+    secp256k1_scalar_t sc = *scalar;
+    /* the wNAF ladder cannot handle zero, so bump this to one .. we will
+     * correct the result after the fact */
+    sc.d[0] += is_zero;
+
+    /* build wnaf representation for q. */
+    secp256k1_ecdh_wnaf(wnaf, &sc, WINDOW_A - 1);
+
+    /* Calculate odd multiples of a.
+     * All multiples are brought to the same Z 'denominator', which is stored
+     * in Z. Due to secp256k1' isomorphism we can do all operations pretending
+     * that the Z coordinate was 1, use affine addition formulae, and correct
+     * the Z coordinate of the result once at the end.
+     */
+    secp256k1_gej_set_ge(r, a);
+    secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, r);
+    secp256k1_gej_set_infinity(r);
+
+    for (i = WNAF_SIZE(WINDOW_A - 1); i >= 0; i--) {
+        int n;
+        int j;
+        for (j = 0; j < WINDOW_A - 1; ++j) {
+            /* This is a variable-time doubling, but it is actually constant-time for
+             * nonzero points. We know on the first iteration that `r` will be zero
+             * and know (by uniqueness of wNAF) that `r` will never be zero after
+             * that iteration, so this does not result in a timing leak. */
+            secp256k1_gej_double_var(r, r, NULL);
+        }
+        n = wnaf[i];
+        VERIFY_CHECK(n != 0);
+        ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+    }
+
+    if (!r->infinity) {
+        secp256k1_fe_mul(&r->z, &r->z, &Z);
+    }
+
+    /* correct for zero */
+    r->infinity |= is_zero;
+}
+
+#endif
diff --git a/src/scalar.h b/src/scalar.h
index f5d09f8d47..f33e72a7c9 100644
--- a/src/scalar.h
+++ b/src/scalar.h
@@ -48,6 +48,10 @@ static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit);
 /** Multiply two scalars (modulo the group order). */
 static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
 
+/** Shift a scalar right by some amount strictly between 0 and 16, returning
+ *  the low bits that were shifted off */
+static int secp256k1_scalar_shr_int(secp256k1_scalar_t *r, int n);
+
 /** Compute the square of a scalar (modulo the group order). */
 static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a);
 
@@ -66,9 +70,16 @@ static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a);
 /** Check whether a scalar equals one. */
 static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a);
 
+/** Check whether a scalar, considered as an nonnegative integer, is even. */
+static int secp256k1_scalar_is_even(const secp256k1_scalar_t *a);
+
 /** Check whether a scalar is higher than the group order divided by 2. */
 static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a);
 
+/** Conditionally negate a number, in constant time.
+ * Returns -1 if the number was negated, 1 otherwise */
+static int secp256k1_scalar_cond_negate(secp256k1_scalar_t *a, int flag);
+
 #ifndef USE_NUM_NONE
 /** Convert a scalar to a number. */
 static void secp256k1_scalar_get_num(secp256k1_num_t *r, const secp256k1_scalar_t *a);
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index ff365292f8..147229dab9 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -164,6 +164,22 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
     return yes;
 }
 
+static int secp256k1_scalar_cond_negate(secp256k1_scalar_t *r, int flag) {
+    /* If we are flag = 0, mask = 00...00 and this is a no-op;
+     * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
+    uint64_t mask = !flag - 1;
+    uint64_t nonzero = (secp256k1_scalar_is_zero(r) != 0) - 1;
+    uint128_t t = (uint128_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask);
+    r->d[0] = t & nonzero; t >>= 64;
+    t += (uint128_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask);
+    r->d[1] = t & nonzero; t >>= 64;
+    t += (uint128_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask);
+    r->d[2] = t & nonzero; t >>= 64;
+    t += (uint128_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask);
+    r->d[3] = t & nonzero;
+    return 2 * (mask == 0) - 1;
+}
+
 /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
 
 /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
@@ -877,6 +893,18 @@ static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t
     secp256k1_scalar_reduce_512(r, l);
 }
 
+static int secp256k1_scalar_shr_int(secp256k1_scalar_t *r, int n) {
+    int ret;
+    VERIFY_CHECK(n > 0);
+    VERIFY_CHECK(n < 16);
+    ret = r->d[0] & ((1 << n) - 1);
+    r->d[0] = (r->d[0] >> n) + (r->d[1] << (64 - n));
+    r->d[1] = (r->d[1] >> n) + (r->d[2] << (64 - n));
+    r->d[2] = (r->d[2] >> n) + (r->d[3] << (64 - n));
+    r->d[3] = (r->d[3] >> n);
+    return ret;
+}
+
 static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
     uint64_t l[8];
     secp256k1_scalar_sqr_512(l, a);
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 22b31d4112..0ad2423db0 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -234,6 +234,31 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
     return yes;
 }
 
+static int secp256k1_scalar_cond_negate(secp256k1_scalar_t *r, int flag) {
+    /* If we are flag = 0, mask = 00...00 and this is a no-op;
+     * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
+    uint32_t mask = !flag - 1;
+    uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(r) == 0);
+    uint64_t t = (uint64_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask);
+    r->d[0] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask);
+    r->d[1] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask);
+    r->d[2] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask);
+    r->d[3] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[4] ^ mask) + (SECP256K1_N_4 & mask);
+    r->d[4] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[5] ^ mask) + (SECP256K1_N_5 & mask);
+    r->d[5] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[6] ^ mask) + (SECP256K1_N_6 & mask);
+    r->d[6] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[7] ^ mask) + (SECP256K1_N_7 & mask);
+    r->d[7] = t & nonzero;
+    return 2 * (mask == 0) - 1;
+}
+
+
 /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
 
 /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
@@ -624,6 +649,22 @@ static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t
     secp256k1_scalar_reduce_512(r, l);
 }
 
+static int secp256k1_scalar_shr_int(secp256k1_scalar_t *r, int n) {
+    int ret;
+    VERIFY_CHECK(n > 0);
+    VERIFY_CHECK(n < 16);
+    ret = r->d[0] & ((1 << n) - 1);
+    r->d[0] = (r->d[0] >> n) + (r->d[1] << (32 - n));
+    r->d[1] = (r->d[1] >> n) + (r->d[2] << (32 - n));
+    r->d[2] = (r->d[2] >> n) + (r->d[3] << (32 - n));
+    r->d[3] = (r->d[3] >> n) + (r->d[4] << (32 - n));
+    r->d[4] = (r->d[4] >> n) + (r->d[5] << (32 - n));
+    r->d[5] = (r->d[5] >> n) + (r->d[6] << (32 - n));
+    r->d[6] = (r->d[6] >> n) + (r->d[7] << (32 - n));
+    r->d[7] = (r->d[7] >> n);
+    return ret;
+}
+
 static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
     uint32_t l[16];
     secp256k1_scalar_sqr_512(l, a);
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
index 33824983e4..6ed8865441 100644
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -234,6 +234,11 @@ static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scal
     secp256k1_scalar_mul(r, t, &x6); /* 111111 */
 }
 
+SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar_t *a) {
+    /* d[0] is present and is the lowest word for all representations */
+    return !(a->d[0] & 1);
+}
+
 static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *x) {
 #if defined(USE_SCALAR_INV_BUILTIN)
     secp256k1_scalar_inverse(r, x);
diff --git a/src/secp256k1.c b/src/secp256k1.c
index 552c196aae..784bddb2ad 100644
--- a/src/secp256k1.c
+++ b/src/secp256k1.c
@@ -13,6 +13,8 @@
 #include "field_impl.h"
 #include "scalar_impl.h"
 #include "group_impl.h"
+#include "ecdsa_impl.h"
+#include "ecdh_impl.h"
 #include "ecmult_impl.h"
 #include "ecmult_gen_impl.h"
 #include "ecdsa_impl.h"
diff --git a/src/tests.c b/src/tests.c
index 6e958256d0..a1a2d130a4 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -500,6 +500,20 @@ void scalar_test(void) {
         secp256k1_scalar_get_num(&rnum2, &r);
         CHECK(secp256k1_num_eq(&rnum, &rnum2));
     }
+
+    {
+        /* test secp256k1_scalar_shr_int */
+        secp256k1_scalar_t r;
+        int i;
+        int low;
+        random_scalar_order_test(&r);
+        for (i = 0; i < 100; ++i) {
+            int shift = 1 + (secp256k1_rand32() % 15);
+            int expected = r.d[0] % (1 << shift);
+            low = secp256k1_scalar_shr_int(&r, shift);
+            CHECK(expected == low);
+        }
+    }
 #endif
 
     {
@@ -1236,6 +1250,111 @@ void run_ge(void) {
     test_add_neg_y_diff_x();
 }
 
+/***** ECDH TESTS *****/
+
+void ecdh_random_mult(void) {
+    /* random starting point A (on the curve) */
+    secp256k1_ge_t a = SECP256K1_GE_CONST(
+        0x6d986544, 0x57ff52b8, 0xcf1b8126, 0x5b802a5b,
+        0xa97f9263, 0xb1e88044, 0x93351325, 0x91bc450a,
+        0x535c59f7, 0x325e5d2b, 0xc391fbe8, 0x3c12787c,
+        0x337e4a98, 0xe82a9011, 0x0123ba37, 0xdd769c7d
+    );
+    /* random initial factor xn */
+    secp256k1_scalar_t xn = SECP256K1_SCALAR_CONST(
+        0x649d4f77, 0xc4242df7, 0x7f2079c9, 0x14530327,
+        0xa31b876a, 0xd2d8ce2a, 0x2236d5c6, 0xd7b2029b
+    );
+    /* expected xn * A (from sage) */
+    secp256k1_ge_t expected_b = SECP256K1_GE_CONST(
+        0x23773684, 0x4d209dc7, 0x098a786f, 0x20d06fcd,
+        0x070a38bf, 0xc11ac651, 0x03004319, 0x1e2a8786,
+        0xed8c3b8e, 0xc06dd57b, 0xd06ea66e, 0x45492b0f,
+        0xb84e4e1b, 0xfb77e21f, 0x96baae2a, 0x63dec956
+    );
+    secp256k1_gej_t b;
+    secp256k1_point_multiply(&b, &a, &xn);
+
+    CHECK(secp256k1_ge_is_valid_var(&a));
+    ge_equals_gej(&expected_b, &b);
+}
+
+void ecdh_commutativity(void) {
+    secp256k1_scalar_t a;
+    secp256k1_scalar_t b;
+    secp256k1_gej_t res1;
+    secp256k1_gej_t res2;
+    secp256k1_ge_t mid1;
+    secp256k1_ge_t mid2;
+    random_scalar_order_test(&a);
+    random_scalar_order_test(&b);
+
+    secp256k1_point_multiply(&res1, &secp256k1_ge_const_g, &a);
+    secp256k1_point_multiply(&res2, &secp256k1_ge_const_g, &b);
+    secp256k1_ge_set_gej(&mid1, &res1);
+    secp256k1_ge_set_gej(&mid2, &res2);
+    secp256k1_point_multiply(&res1, &mid1, &b);
+    secp256k1_point_multiply(&res2, &mid2, &a);
+    secp256k1_ge_set_gej(&mid1, &res1);
+    secp256k1_ge_set_gej(&mid2, &res2);
+    ge_equals_ge(&mid1, &mid2);
+}
+
+void ecdh_mult_zero_one(void) {
+    secp256k1_scalar_t zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
+    secp256k1_scalar_t one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1);
+    secp256k1_scalar_t negone;
+    secp256k1_gej_t res1;
+    secp256k1_ge_t res2;
+    secp256k1_ge_t point;
+    secp256k1_scalar_negate(&negone, &one);
+
+    random_group_element_test(&point);
+    secp256k1_point_multiply(&res1, &point, &zero);
+    secp256k1_ge_set_gej(&res2, &res1);
+    CHECK(secp256k1_ge_is_infinity(&res2));
+    secp256k1_point_multiply(&res1, &point, &one);
+    secp256k1_ge_set_gej(&res2, &res1);
+    ge_equals_ge(&res2, &point);
+    secp256k1_point_multiply(&res1, &point, &negone);
+    secp256k1_gej_neg(&res1, &res1);
+    secp256k1_ge_set_gej(&res2, &res1);
+    ge_equals_ge(&res2, &point);
+}
+
+void ecdh_chain_multiply(void) {
+    /* Check known result (randomly generated test problem from sage) */
+    const secp256k1_scalar_t scalar = SECP256K1_SCALAR_CONST(
+        0x4968d524, 0x2abf9b7a, 0x466abbcf, 0x34b11b6d,
+        0xcd83d307, 0x827bed62, 0x05fad0ce, 0x18fae63b
+    );
+    const secp256k1_gej_t expected_point = SECP256K1_GEJ_CONST(
+        0x5494c15d, 0x32099706, 0xc2395f94, 0x348745fd,
+        0x757ce30e, 0x4e8c90fb, 0xa2bad184, 0xf883c69f,
+        0x5d195d20, 0xe191bf7f, 0x1be3e55f, 0x56a80196,
+        0x6071ad01, 0xf1462f66, 0xc997fa94, 0xdb858435
+    );
+    secp256k1_gej_t point;
+    secp256k1_ge_t res;
+    int i;
+
+    secp256k1_gej_set_ge(&point, &secp256k1_ge_const_g);
+    for (i = 0; i < 100; ++i) {
+        secp256k1_ge_t tmp;
+        secp256k1_ge_set_gej(&tmp, &point);
+        secp256k1_point_multiply(&point, &tmp, &scalar);
+    }
+    secp256k1_ge_set_gej(&res, &point);
+    ge_equals_gej(&res, &expected_point);
+}
+
+void run_ecdh_tests(void) {
+    ecdh_mult_zero_one();
+    ecdh_random_mult();
+    ecdh_commutativity();
+    ecdh_chain_multiply();
+}
+
 /***** ECMULT TESTS *****/
 
 void run_ecmult_chain(void) {
@@ -1393,12 +1512,66 @@ void test_wnaf(const secp256k1_scalar_t *number, int w) {
     CHECK(secp256k1_scalar_eq(&x, number)); /* check that wnaf represents number */
 }
 
+void test_constant_wnaf_negate(const secp256k1_scalar_t *number) {
+    secp256k1_scalar_t neg1 = *number;
+    secp256k1_scalar_t neg2 = *number;
+    int sign1 = 1;
+    int sign2 = 1;
+
+    if (!secp256k1_scalar_get_bits(&neg1, 0, 1)) {
+        secp256k1_scalar_negate(&neg1, &neg1);
+        sign1 = -1;
+    }
+    sign2 = secp256k1_scalar_cond_negate(&neg2, secp256k1_scalar_is_even(&neg2));
+    CHECK(sign1 == sign2);
+    CHECK(secp256k1_scalar_eq(&neg1, &neg2));
+}
+
+void test_constant_wnaf(const secp256k1_scalar_t *number, int w) {
+    secp256k1_scalar_t x, shift;
+    int wnaf[256] = {0};
+    int i;
+
+    secp256k1_scalar_set_int(&x, 0);
+    secp256k1_scalar_set_int(&shift, 1 << w);
+    secp256k1_ecdh_wnaf(wnaf, number, w);
+
+    for (i = WNAF_SIZE(w); i >= 0; --i) {
+        secp256k1_scalar_t t;
+        int v = wnaf[i];
+        CHECK(v != 0); /* check nonzero */
+        CHECK(v & 1);  /* check parity */
+        CHECK(v > -(1 << w)); /* check range above */
+        CHECK(v < (1 << w));  /* check range below */
+
+        secp256k1_scalar_mul(&x, &x, &shift);
+        if (v >= 0) {
+            secp256k1_scalar_set_int(&t, v);
+        } else {
+            secp256k1_scalar_set_int(&t, -v);
+            secp256k1_scalar_negate(&t, &t);
+        }
+        secp256k1_scalar_add(&x, &x, &t);
+    }
+    CHECK(secp256k1_scalar_eq(&x, number));
+}
+
 void run_wnaf(void) {
     int i;
-    secp256k1_scalar_t n;
+    secp256k1_scalar_t n = {{0}};
+
+    /* Sanity check: 1 and 2 are the smallest odd and even numbers and should
+     *               have easier-to-diagnose failure modes  */
+    n.d[0] = 1;
+    test_constant_wnaf(&n, 4);
+    n.d[0] = 2;
+    test_constant_wnaf(&n, 4);
+    /* Random tests */
     for (i = 0; i < count; i++) {
         random_scalar_order(&n);
         test_wnaf(&n, 4+(i%10));
+        test_constant_wnaf_negate(&n);
+        test_constant_wnaf(&n, 4 + (i % 10));
     }
 }
 
@@ -2273,6 +2446,9 @@ int main(int argc, char **argv) {
     run_endomorphism_tests();
 #endif
 
+    /* ecdh tests */
+    run_ecdh_tests();
+
     /* ecdsa tests */
     run_random_pubkeys();
     run_ecdsa_sign_verify();

From 1bcca3510a7bab13df03514947071c11f4d4761c Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Mon, 29 Jun 2015 15:06:28 -0500
Subject: [PATCH 03/13] Expose API for constant time point multiplication

---
 include/secp256k1.h | 18 ++++++++++++++++++
 src/secp256k1.c     | 44 ++++++++++++++++++++++++++++++++++++++++++++
 src/tests.c         | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+)

diff --git a/include/secp256k1.h b/include/secp256k1.h
index beda1e7b5d..1bd429d0fd 100644
--- a/include/secp256k1.h
+++ b/include/secp256k1.h
@@ -217,6 +217,24 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_recover_compact(
   int recid
 ) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5);
 
+/** Compute an EC Diffie-Hellman secret in constant time
+ *  Returns: 1: exponentiation was successful
+ *           0: scalar was zero (cannot serialize output point)
+ *          -1: scalar overflow
+ *          -2: invalid input point
+ *  In:      scalar:   a 32-byte scalar with which to multiply the point
+ *           point:    pointer to 33 or 65 byte array containing an EC point
+ *           pointlen: length of the point array
+ *  Out:     result:   a 32-byte array which will be populated by an ECDH
+ *                     secret computed from the point and scalar
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdh(
+  unsigned char *result,
+  unsigned char *point,
+  int *pointlen,
+  const unsigned char *scalar
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
 /** Verify an ECDSA secret key.
  *  Returns: 1: secret key is valid
  *           0: secret key is invalid
diff --git a/src/secp256k1.c b/src/secp256k1.c
index 784bddb2ad..0db661172b 100644
--- a/src/secp256k1.c
+++ b/src/secp256k1.c
@@ -225,6 +225,50 @@ int secp256k1_ecdsa_recover_compact(const secp256k1_context_t* ctx, const unsign
     return ret;
 }
 
+int secp256k1_ecdh(unsigned char *result, unsigned char *point, int *pointlen, const unsigned char *scalar) {
+    int ret = 0;
+    int overflow = 0;
+    secp256k1_gej_t res;
+    secp256k1_ge_t pt;
+    secp256k1_scalar_t s;
+    DEBUG_CHECK(point != NULL);
+    DEBUG_CHECK(pointlen != NULL);
+    DEBUG_CHECK(scalar != NULL);
+
+    if (secp256k1_eckey_pubkey_parse(&pt, point, *pointlen)) {
+        secp256k1_scalar_set_b32(&s, scalar, &overflow);
+        if (secp256k1_scalar_is_zero(&s)) {
+            ret = -1;
+        } else if (overflow) {
+            ret = -2;
+        } else {
+            unsigned char x[32];
+            unsigned char y[1];
+            secp256k1_sha256_t sha;
+
+            secp256k1_point_multiply(&res, &pt, &s);
+            secp256k1_ge_set_gej(&pt, &res);
+            /* Compute a hash of the point in compressed form
+             * Note we cannot use secp256k1_eckey_pubkey_serialize here since it does not
+             * expect its output to be secret and has a timing sidechannel. */
+            secp256k1_fe_normalize(&pt.x);
+            secp256k1_fe_normalize(&pt.y);
+            secp256k1_fe_get_b32(x, &pt.x);
+            y[0] = 0x02 | secp256k1_fe_is_odd(&pt.y);
+
+            secp256k1_sha256_initialize(&sha);
+            secp256k1_sha256_write(&sha, y, sizeof(y));
+            secp256k1_sha256_write(&sha, x, sizeof(x));
+            secp256k1_sha256_finalize(&sha, result);
+            ret = 1;
+        }
+    } else {
+        ret = -3;
+    }
+    secp256k1_scalar_clear(&s);
+    return ret;
+}
+
 int secp256k1_ec_seckey_verify(const secp256k1_context_t* ctx, const unsigned char *seckey) {
     secp256k1_scalar_t sec;
     int ret;
diff --git a/src/tests.c b/src/tests.c
index a1a2d130a4..c1f8b547ca 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -1348,6 +1348,39 @@ void ecdh_chain_multiply(void) {
     ge_equals_gej(&res, &expected_point);
 }
 
+void ecdh_generator_basepoint(void) {
+    secp256k1_ge_t gen = secp256k1_ge_const_g;
+    unsigned char point[33];
+    unsigned char point2[33];
+    int pointlen = sizeof(point), point2len = sizeof(point2);
+    int i;
+
+    /* Check against pubkey creation when the basepoint is the generator */
+    for (i = 0; i < 100; ++i) {
+        secp256k1_sha256_t sha;
+        unsigned char s_b32[32];
+        unsigned char output_ecdh[32];
+        unsigned char output_ser[32];
+        secp256k1_scalar_t s;
+
+        random_scalar_order(&s);
+        secp256k1_scalar_get_b32(s_b32, &s);
+
+        /* compute using ECDH function */
+        secp256k1_eckey_pubkey_serialize(&gen, point, &pointlen, 1);
+        CHECK(secp256k1_ecdh(output_ecdh, point, &pointlen, s_b32) == 1);
+        /* compute "explicitly" */
+        secp256k1_eckey_pubkey_serialize(&gen, point2, &point2len, 1);
+        CHECK(secp256k1_ec_pubkey_create(ctx, point2, &point2len, s_b32, 1) == 1);
+
+        secp256k1_sha256_initialize(&sha);
+        secp256k1_sha256_write(&sha, point2, sizeof(point2));
+        secp256k1_sha256_finalize(&sha, output_ser);
+        /* compare */
+        CHECK(memcmp(output_ecdh, output_ser, sizeof(output_ser)) == 0);
+    }
+}
+
 void run_ecdh_tests(void) {
     ecdh_mult_zero_one();
     ecdh_random_mult();
@@ -1355,6 +1388,10 @@ void run_ecdh_tests(void) {
     ecdh_chain_multiply();
 }
 
+void run_ecdh_api_tests(void) {
+    ecdh_generator_basepoint();
+}
+
 /***** ECMULT TESTS *****/
 
 void run_ecmult_chain(void) {

From 6b43041f90ffefa7708c58962ce659d79aaa6640 Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Fri, 15 May 2015 14:46:08 -0500
Subject: [PATCH 04/13] Add benchmark for ECDH multiplication

---
 .gitignore           |  1 +
 Makefile.am          |  6 +++++-
 src/bench_ecdh.c     | 50 ++++++++++++++++++++++++++++++++++++++++++++
 src/bench_internal.c | 12 +++++++++++
 4 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 src/bench_ecdh.c

diff --git a/.gitignore b/.gitignore
index 076ff1295f..a697a794cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 bench_inv
+bench_ecdh
 bench_sign
 bench_verify
 bench_recover
diff --git a/Makefile.am b/Makefile.am
index 5adba36fe6..ea9770e011 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -51,7 +51,7 @@ libsecp256k1_la_LIBADD = $(SECP_LIBS)
 
 noinst_PROGRAMS =
 if USE_BENCHMARK
-noinst_PROGRAMS += bench_verify bench_recover bench_sign bench_internal
+noinst_PROGRAMS += bench_verify bench_recover bench_sign bench_internal bench_ecdh
 bench_verify_SOURCES = src/bench_verify.c
 bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS)
 bench_verify_LDFLAGS = -static
@@ -65,6 +65,10 @@ bench_internal_SOURCES = src/bench_internal.c
 bench_internal_LDADD = $(SECP_LIBS)
 bench_internal_LDFLAGS = -static
 bench_internal_CPPFLAGS = $(SECP_INCLUDES)
+bench_ecdh_SOURCES = src/bench_ecdh.c
+bench_ecdh_LDADD = libsecp256k1.la $(SECP_LIBS)
+bench_ecdh_LDFLAGS = -static
+bench_ecdh_CPPFLAGS = $(SECP_INCLUDES)
 endif
 
 if USE_TESTS
diff --git a/src/bench_ecdh.c b/src/bench_ecdh.c
new file mode 100644
index 0000000000..0b4cd83177
--- /dev/null
+++ b/src/bench_ecdh.c
@@ -0,0 +1,50 @@
+/**********************************************************************
+ * Copyright (c) 2015 Pieter Wuille, Andrew Poelstra                  *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#include <string.h>
+
+#include "include/secp256k1.h"
+#include "util.h"
+#include "bench.h"
+
+typedef struct {
+    unsigned char point[33];
+    int pointlen;
+    unsigned char scalar[32];
+} bench_multiply_t;
+
+static void bench_multiply_setup(void* arg) {
+    int i;
+    bench_multiply_t *data = (bench_multiply_t*)arg;
+    const unsigned char point[] = {
+        0x03,
+        0x54, 0x94, 0xc1, 0x5d, 0x32, 0x09, 0x97, 0x06,
+        0xc2, 0x39, 0x5f, 0x94, 0x34, 0x87, 0x45, 0xfd,
+        0x75, 0x7c, 0xe3, 0x0e, 0x4e, 0x8c, 0x90, 0xfb,
+        0xa2, 0xba, 0xd1, 0x84, 0xf8, 0x83, 0xc6, 0x9f
+    };
+
+    for (i = 0; i < 32; i++) data->scalar[i] = i + 1;
+    data->pointlen = sizeof(point);
+    memcpy(data->point, point, data->pointlen);
+}
+
+static void bench_multiply(void* arg) {
+    int i;
+    unsigned char res[32];
+    bench_multiply_t *data = (bench_multiply_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        CHECK(secp256k1_ecdh(res, data->point, &data->pointlen, data->scalar) == 1);
+    }
+}
+
+int main(void) {
+    bench_multiply_t data;
+
+    run_benchmark("ecdh_mult", bench_multiply, bench_multiply_setup, NULL, &data, 10, 20000);
+    return 0;
+}
diff --git a/src/bench_internal.c b/src/bench_internal.c
index bbbc7c3fc7..b33985a1a3 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -13,6 +13,7 @@
 #include "field_impl.h"
 #include "group_impl.h"
 #include "scalar_impl.h"
+#include "ecdh_impl.h"
 #include "ecmult_impl.h"
 #include "bench.h"
 
@@ -234,6 +235,16 @@ void bench_ecmult_wnaf(void* arg) {
     }
 }
 
+void bench_ecdh_wnaf(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_ecdh_wnaf(data->wnaf, &data->scalar_x, WINDOW_A);
+        secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
+    }
+}
+
 
 void bench_sha256(void* arg) {
     int i;
@@ -309,6 +320,7 @@ int main(int argc, char **argv) {
     if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, 200000);
     if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, 200000);
 
+    if (have_flag(argc, argv, "ecdh") || have_flag(argc, argv, "wnaf")) run_benchmark("ecdh_wnaf", bench_ecdh_wnaf, bench_setup, NULL, &data, 10, 20000);
     if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, 20000);
 
     if (have_flag(argc, argv, "hash") || have_flag(argc, argv, "sha256")) run_benchmark("hash_sha256", bench_sha256, bench_setup, NULL, &data, 10, 20000);

From 59c57209c224db570e33cf7ee7f657b61942b919 Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Fri, 22 May 2015 11:51:51 -0500
Subject: [PATCH 05/13] Make `secp256k1_scalar_add_bit` conditional; make
 `secp256k1_scalar_split_lambda_var` constant time

This has the effect of making `secp256k1_scalar_mul_shift_var` constant
time in both input scalars. Keep the _var name because it is NOT constant
time in the shift amount.

As used in `secp256k1_scalar_split_lambda_var`, the shift is always
the constant 272, so this function becomes constant time, and it
loses the `_var` suffix.
---
 src/bench_internal.c   | 2 +-
 src/ecmult_impl.h      | 4 ++--
 src/scalar.h           | 6 +++---
 src/scalar_4x64_impl.h | 7 +++----
 src/scalar_8x32_impl.h | 7 +++----
 src/scalar_impl.h      | 3 ++-
 src/tests.c            | 7 +++++--
 7 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/bench_internal.c b/src/bench_internal.c
index b33985a1a3..00080d4bdf 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -97,7 +97,7 @@ void bench_scalar_split(void* arg) {
 
     for (i = 0; i < 20000; i++) {
         secp256k1_scalar_t l, r;
-        secp256k1_scalar_split_lambda_var(&l, &r, &data->scalar_x);
+        secp256k1_scalar_split_lambda(&l, &r, &data->scalar_x);
         secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
     }
 }
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index d6aa2ea7db..fa30378507 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -242,7 +242,7 @@ static int secp256k1_ecmult_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w)
         }
         word = secp256k1_scalar_get_bits_var(&s, bit, now);
         if (word & (1 << (w-1))) {
-            secp256k1_scalar_add_bit(&s, bit + w);
+            secp256k1_scalar_cadd_bit(&s, bit + w, 1);
             wnaf[set_bits++] = sign * (word - (1 << w));
         } else {
             wnaf[set_bits++] = sign * word;
@@ -280,7 +280,7 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_ge
 
 #ifdef USE_ENDOMORPHISM
     /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
-    secp256k1_scalar_split_lambda_var(&na_1, &na_lam, na);
+    secp256k1_scalar_split_lambda(&na_1, &na_lam, na);
 
     /* build wnaf representation for na_1 and na_lam. */
     bits_na_1   = secp256k1_ecmult_wnaf(wnaf_na_1,   &na_1,   WINDOW_A);
diff --git a/src/scalar.h b/src/scalar.h
index f33e72a7c9..72e12b86fc 100644
--- a/src/scalar.h
+++ b/src/scalar.h
@@ -42,8 +42,8 @@ static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_
 /** Add two scalars together (modulo the group order). Returns whether it overflowed. */
 static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
 
-/** Add a power of two to a scalar. The result is not allowed to overflow. */
-static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit);
+/** Conditionally add a power of two to a scalar. The result is not allowed to overflow. */
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag);
 
 /** Multiply two scalars (modulo the group order). */
 static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
@@ -95,7 +95,7 @@ static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scal
 /** Find r1 and r2 such that r1+r2*2^128 = a. */
 static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
 /** Find r1 and r2 such that r1+r2*lambda = a, and r1 and r2 are maximum 128 bits long (see secp256k1_gej_mul_lambda). */
-static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
+static void secp256k1_scalar_split_lambda(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
 #endif
 
 /** Multiply a and b (without taking the modulus!), divide by 2**shift, and round to the nearest integer. Shift must be at least 256. */
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index 147229dab9..09431e0df7 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -96,9 +96,10 @@ static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t
     return overflow;
 }
 
-static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag) {
     uint128_t t;
     VERIFY_CHECK(bit < 256);
+    bit += ((uint32_t) flag - 1) & 0x100;  /* forcing (bit >> 6) > 3 makes this a noop */
     t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
     r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
     t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
@@ -940,9 +941,7 @@ SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *
     r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[3] = shift < 320 ? (l[3 + shiftlimbs] >> shiftlow) : 0;
-    if ((l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1) {
-        secp256k1_scalar_add_bit(r, 0);
-    }
+    secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1);
 }
 
 #endif
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 0ad2423db0..54923eb45f 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -136,9 +136,10 @@ static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t
     return overflow;
 }
 
-static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag) {
     uint64_t t;
     VERIFY_CHECK(bit < 256);
+    bit += ((uint32_t) flag - 1) & 0x100;  /* forcing (bit >> 5) > 7 makes this a noop */
     t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
     r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
     t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F));
@@ -714,9 +715,7 @@ SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *
     r->d[5] = shift < 352 ? (l[5 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[6 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[6] = shift < 320 ? (l[6 + shiftlimbs] >> shiftlow | (shift < 288 && shiftlow ? (l[7 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[7] = shift < 288 ? (l[7 + shiftlimbs] >> shiftlow)  : 0;
-    if ((l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1) {
-        secp256k1_scalar_add_bit(r, 0);
-    }
+    secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1);
 }
 
 #endif
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
index 6ed8865441..dda103160e 100644
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -295,7 +295,7 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_
  * The function below splits a in r1 and r2, such that r1 + lambda * r2 == a (mod order).
  */
 
-static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
+static void secp256k1_scalar_split_lambda(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
     secp256k1_scalar_t c1, c2;
     static const secp256k1_scalar_t minus_lambda = SECP256K1_SCALAR_CONST(
         0xAC9C52B3UL, 0x3FA3CF1FUL, 0x5AD9E3FDUL, 0x77ED9BA4UL,
@@ -319,6 +319,7 @@ static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_
     );
     VERIFY_CHECK(r1 != a);
     VERIFY_CHECK(r2 != a);
+    /* these _var calls are constant time since the shift amount is constant */
     secp256k1_scalar_mul_shift_var(&c1, a, &g1, 272);
     secp256k1_scalar_mul_shift_var(&c2, a, &g2, 272);
     secp256k1_scalar_mul(&c1, &c1, &minus_b1);
diff --git a/src/tests.c b/src/tests.c
index c1f8b547ca..d60dc59e43 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -562,7 +562,10 @@ void scalar_test(void) {
         r2 = s1;
         if (!secp256k1_scalar_add(&r1, &r1, &b)) {
             /* No overflow happened. */
-            secp256k1_scalar_add_bit(&r2, bit);
+            secp256k1_scalar_cadd_bit(&r2, bit, 1);
+            CHECK(secp256k1_scalar_eq(&r1, &r2));
+            /* cadd is a noop when flag is zero */
+            secp256k1_scalar_cadd_bit(&r2, bit, 0);
             CHECK(secp256k1_scalar_eq(&r1, &r2));
         }
     }
@@ -1701,7 +1704,7 @@ void test_scalar_split(void) {
     unsigned char tmp[32];
 
     random_scalar_order_test(&full);
-    secp256k1_scalar_split_lambda_var(&s1, &slam, &full);
+    secp256k1_scalar_split_lambda(&s1, &slam, &full);
 
     /* check that both are <= 128 bits in size */
     if (secp256k1_scalar_is_high(&s1))

From 48dc8e3001a927f42c5a7f46c540e0c1b2f76909 Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Fri, 22 May 2015 12:09:36 -0500
Subject: [PATCH 06/13] Implement endomorphism optimization for
 secp256k1_ecdh_point_multiply

---
 src/bench_internal.c |   2 +-
 src/ecdh_impl.h      | 125 +++++++++++++++++++++++++++++++++++++++----
 src/tests.c          |  19 ++++++-
 3 files changed, 132 insertions(+), 14 deletions(-)

diff --git a/src/bench_internal.c b/src/bench_internal.c
index 00080d4bdf..a6ae9dd235 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -240,7 +240,7 @@ void bench_ecdh_wnaf(void* arg) {
     bench_inv_t *data = (bench_inv_t*)arg;
 
     for (i = 0; i < 20000; i++) {
-        secp256k1_ecdh_wnaf(data->wnaf, &data->scalar_x, WINDOW_A);
+        secp256k1_ecdh_wnaf(data->wnaf, data->scalar_x, WINDOW_A);
         secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
     }
 }
diff --git a/src/ecdh_impl.h b/src/ecdh_impl.h
index 624ec76dbb..d07030dadd 100644
--- a/src/ecdh_impl.h
+++ b/src/ecdh_impl.h
@@ -12,7 +12,11 @@
 #include "ecdh.h"
 #include "ecmult_impl.h"
 
-#define WNAF_BITS 256
+#ifdef USE_ENDOMORPHISM
+    #define WNAF_BITS 128
+#else
+    #define WNAF_BITS 256
+#endif
 #define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w))
 
 /** Convert a number to WNAF notation. The number becomes represented by sum(2^{wi} * wnaf[i], i=0..return_val)
@@ -27,17 +31,47 @@
  *
  *  Numbers reference steps of `Algorithm SPA-resistant Width-w NAF with Odd Scalar` on pp. 335
  */
-static void secp256k1_ecdh_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w) {
-    secp256k1_scalar_t s = *a;
-    /* Negate to force oddness */
-    int is_even = secp256k1_scalar_is_even(&s);
-    int global_sign = secp256k1_scalar_cond_negate(&s, is_even);
-
+static int secp256k1_ecdh_wnaf(int *wnaf, secp256k1_scalar_t s, int w) {
+    int global_sign = 1;
+    int skew = 0;
     int word = 0;
     /* 1 2 3 */
-    int u_last = secp256k1_scalar_shr_int(&s, w);
+    int u_last;
     int u;
+
+#ifdef USE_ENDOMORPHISM
+    /* If we are using the endomorphism, we cannot handle even numbers by negating
+     * them, since we are working with 128-bit numbers whose negations would be 256
+     * bits, eliminating the performance advantage. Instead we use a technique from
+     * Section 4.2 of the Okeya/Tagaki paper, which is to add either 1 (for even)
+     * or 2 (for odd) to the number we are encoding, then compensating after the
+     * multiplication. */
+    /* Negative 128-bit numbers will be negated, since otherwise they are 256-bit */
+    int flip = secp256k1_scalar_is_high(&s);
+    /* We add 1 to even numbers, 2 to odd ones, noting that negation flips parity */
+    int bit = flip ^ (s.d[0] & 1);
+    /* We check for negative one, since adding 2 to it will cause an overflow */
+    secp256k1_scalar_t neg_s;
+    int not_neg_one;
+    secp256k1_scalar_negate(&neg_s, &s);
+    not_neg_one = !secp256k1_scalar_is_one(&neg_s);
+    secp256k1_scalar_cadd_bit(&s, bit, not_neg_one);
+    /* If we had negative one, flip == 1, s.d[0] == 0, bit == 1, so caller expects
+     * that we added two to it and flipped it. In fact for -1 these operations are
+     * identical. We only flipped, but since skewing is required (in the sense that
+     * the skew must be 1 or 2, never zero) and flipping is not, we need to change
+     * our flags to claim that we only skewed. */
+    global_sign = secp256k1_scalar_cond_negate(&s, flip);
+    global_sign *= not_neg_one * 2 - 1;
+    skew = 1 << bit;
+#else
+    /* Otherwise, we just negate to force oddness */
+    int is_even = secp256k1_scalar_is_even(&s);
+    global_sign = secp256k1_scalar_cond_negate(&s, is_even);
+#endif
+
     /* 4 */
+    u_last = secp256k1_scalar_shr_int(&s, w);
     while (word * w < WNAF_BITS) {
         int sign;
         int even;
@@ -59,6 +93,7 @@ static void secp256k1_ecdh_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w) {
 
     VERIFY_CHECK(secp256k1_scalar_is_zero(&s));
     VERIFY_CHECK(word == WNAF_SIZE(w));
+    return skew;
 }
 
 
@@ -67,17 +102,37 @@ static void secp256k1_point_multiply(secp256k1_gej_t *r, const secp256k1_ge_t *a
     secp256k1_ge_t tmpa;
     secp256k1_fe_t Z;
 
+#ifdef USE_ENDOMORPHISM
+    secp256k1_ge_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
+    int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)];
+    int wnaf_lam[1 + WNAF_SIZE(WINDOW_A - 1)];
+    int skew_1;
+    int skew_lam;
+    secp256k1_scalar_t q_1, q_lam;
+#else
     int wnaf[1 + WNAF_SIZE(WINDOW_A - 1)];
+#endif
 
     int i;
-    int is_zero = secp256k1_scalar_is_zero(scalar);
     secp256k1_scalar_t sc = *scalar;
+
+    /* build wnaf representation for q. */
+#ifdef USE_ENDOMORPHISM
+    /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */
+    secp256k1_scalar_split_lambda(&q_1, &q_lam, &sc);
+    /* no need for zero correction when using endomorphism since even
+     * numbers have one added to them anyway */
+    skew_1   = secp256k1_ecdh_wnaf(wnaf_1,   q_1,   WINDOW_A - 1);
+    skew_lam = secp256k1_ecdh_wnaf(wnaf_lam, q_lam, WINDOW_A - 1);
+#else
+    int is_zero = secp256k1_scalar_is_zero(scalar);
     /* the wNAF ladder cannot handle zero, so bump this to one .. we will
      * correct the result after the fact */
     sc.d[0] += is_zero;
+    VERIFY_CHECK(!secp256k1_scalar_is_zero(&sc));
 
-    /* build wnaf representation for q. */
-    secp256k1_ecdh_wnaf(wnaf, &sc, WINDOW_A - 1);
+    secp256k1_ecdh_wnaf(wnaf, sc, WINDOW_A - 1);
+#endif
 
     /* Calculate odd multiples of a.
      * All multiples are brought to the same Z 'denominator', which is stored
@@ -87,6 +142,11 @@ static void secp256k1_point_multiply(secp256k1_gej_t *r, const secp256k1_ge_t *a
      */
     secp256k1_gej_set_ge(r, a);
     secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, r);
+#ifdef USE_ENDOMORPHISM
+    for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
+        secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+    }
+#endif
     secp256k1_gej_set_infinity(r);
 
     for (i = WNAF_SIZE(WINDOW_A - 1); i >= 0; i--) {
@@ -99,18 +159,61 @@ static void secp256k1_point_multiply(secp256k1_gej_t *r, const secp256k1_ge_t *a
              * that iteration, so this does not result in a timing leak. */
             secp256k1_gej_double_var(r, r, NULL);
         }
+#ifdef USE_ENDOMORPHISM
+        n = wnaf_1[i];
+        ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
+        VERIFY_CHECK(n != 0);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+
+        n = wnaf_lam[i];
+        ECMULT_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
+        VERIFY_CHECK(n != 0);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+#else
         n = wnaf[i];
         VERIFY_CHECK(n != 0);
         ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
         secp256k1_gej_add_ge(r, r, &tmpa);
+#endif
     }
 
     if (!r->infinity) {
         secp256k1_fe_mul(&r->z, &r->z, &Z);
     }
 
+#ifdef USE_ENDOMORPHISM
+    {
+        /* Correct for wNAF skew */
+        secp256k1_ge_t correction = *a;
+        secp256k1_ge_storage_t correction_1_stor;
+        secp256k1_ge_storage_t correction_lam_stor;
+        secp256k1_ge_storage_t a2_stor;
+        secp256k1_gej_t tmpj;
+        secp256k1_gej_set_ge(&tmpj, &correction);
+        secp256k1_gej_double_var(&tmpj, &tmpj, NULL);
+        secp256k1_ge_set_gej(&correction, &tmpj);
+        secp256k1_ge_to_storage(&correction_1_stor, a);
+        secp256k1_ge_to_storage(&correction_lam_stor, a);
+        secp256k1_ge_to_storage(&a2_stor, &correction);
+
+        /* For odd numbers this is 2a (so replace it), for even ones a (so no-op) */
+        secp256k1_ge_storage_cmov(&correction_1_stor, &a2_stor, skew_1 == 2);
+        secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2);
+
+        /* Apply the correction */
+        secp256k1_ge_from_storage(&correction, &correction_1_stor);
+        secp256k1_ge_neg(&correction, &correction);
+        secp256k1_gej_add_ge(r, r, &correction);
+
+        secp256k1_ge_from_storage(&correction, &correction_lam_stor);
+        secp256k1_ge_neg(&correction, &correction);
+        secp256k1_ge_mul_lambda(&correction, &correction);
+        secp256k1_gej_add_ge(r, r, &correction);
+    }
+#else
     /* correct for zero */
     r->infinity |= is_zero;
+#endif
 }
 
 #endif
diff --git a/src/tests.c b/src/tests.c
index d60dc59e43..3dc0f3b86b 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -1571,10 +1571,21 @@ void test_constant_wnaf(const secp256k1_scalar_t *number, int w) {
     secp256k1_scalar_t x, shift;
     int wnaf[256] = {0};
     int i;
+#ifdef USE_ENDOMORPHISM
+    int skew;
+#endif
+    secp256k1_scalar_t num = *number;
 
     secp256k1_scalar_set_int(&x, 0);
     secp256k1_scalar_set_int(&shift, 1 << w);
-    secp256k1_ecdh_wnaf(wnaf, number, w);
+    /* With USE_ENDOMORPHISM on we only consider 128-bit numbers */
+#ifdef USE_ENDOMORPHISM
+    for (i = 0; i < 16; ++i)
+        secp256k1_scalar_shr_int(&num, 8);
+    skew = secp256k1_ecdh_wnaf(wnaf, num, w);
+#else
+    secp256k1_ecdh_wnaf(wnaf, num, w);
+#endif
 
     for (i = WNAF_SIZE(w); i >= 0; --i) {
         secp256k1_scalar_t t;
@@ -1593,7 +1604,11 @@ void test_constant_wnaf(const secp256k1_scalar_t *number, int w) {
         }
         secp256k1_scalar_add(&x, &x, &t);
     }
-    CHECK(secp256k1_scalar_eq(&x, number));
+#ifdef USE_ENDOMORPHISM
+    /* Skew num because when encoding 128-bit numbers as odd we use an offset */
+    secp256k1_scalar_cadd_bit(&num, skew == 2, 1);
+#endif
+    CHECK(secp256k1_scalar_eq(&x, &num));
 }
 
 void run_wnaf(void) {

From 171470e8405ea19b11f08decf142a7e49fad25a2 Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Mon, 29 Jun 2015 15:06:28 -0500
Subject: [PATCH 07/13] Expose API for constant time point multiplication

---
 include/secp256k1.h | 18 ++++++++++++++++++
 src/secp256k1.c     | 44 ++++++++++++++++++++++++++++++++++++++++++++
 src/tests.c         | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+)

diff --git a/include/secp256k1.h b/include/secp256k1.h
index beda1e7b5d..c006ad4912 100644
--- a/include/secp256k1.h
+++ b/include/secp256k1.h
@@ -217,6 +217,24 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_recover_compact(
   int recid
 ) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5);
 
+/** Compute an EC Diffie-Hellman secret in constant time
+ *  Returns: 1: exponentiation was successful
+ *          -1: scalar was zero
+ *          -2: scalar overflow
+ *          -3: invalid input point
+ *  In:      scalar:   a 32-byte scalar with which to multiply the point
+ *           point:    pointer to 33 or 65 byte array containing an EC point
+ *           pointlen: length of the point array
+ *  Out:     result:   a 32-byte array which will be populated by an ECDH
+ *                     secret computed from the point and scalar
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdh(
+  unsigned char *result,
+  unsigned char *point,
+  int *pointlen,
+  const unsigned char *scalar
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
 /** Verify an ECDSA secret key.
  *  Returns: 1: secret key is valid
  *           0: secret key is invalid
diff --git a/src/secp256k1.c b/src/secp256k1.c
index 784bddb2ad..0db661172b 100644
--- a/src/secp256k1.c
+++ b/src/secp256k1.c
@@ -225,6 +225,50 @@ int secp256k1_ecdsa_recover_compact(const secp256k1_context_t* ctx, const unsign
     return ret;
 }
 
+int secp256k1_ecdh(unsigned char *result, unsigned char *point, int *pointlen, const unsigned char *scalar) {
+    int ret = 0;
+    int overflow = 0;
+    secp256k1_gej_t res;
+    secp256k1_ge_t pt;
+    secp256k1_scalar_t s;
+    DEBUG_CHECK(point != NULL);
+    DEBUG_CHECK(pointlen != NULL);
+    DEBUG_CHECK(scalar != NULL);
+
+    if (secp256k1_eckey_pubkey_parse(&pt, point, *pointlen)) {
+        secp256k1_scalar_set_b32(&s, scalar, &overflow);
+        if (secp256k1_scalar_is_zero(&s)) {
+            ret = -1;
+        } else if (overflow) {
+            ret = -2;
+        } else {
+            unsigned char x[32];
+            unsigned char y[1];
+            secp256k1_sha256_t sha;
+
+            secp256k1_point_multiply(&res, &pt, &s);
+            secp256k1_ge_set_gej(&pt, &res);
+            /* Compute a hash of the point in compressed form
+             * Note we cannot use secp256k1_eckey_pubkey_serialize here since it does not
+             * expect its output to be secret and has a timing sidechannel. */
+            secp256k1_fe_normalize(&pt.x);
+            secp256k1_fe_normalize(&pt.y);
+            secp256k1_fe_get_b32(x, &pt.x);
+            y[0] = 0x02 | secp256k1_fe_is_odd(&pt.y);
+
+            secp256k1_sha256_initialize(&sha);
+            secp256k1_sha256_write(&sha, y, sizeof(y));
+            secp256k1_sha256_write(&sha, x, sizeof(x));
+            secp256k1_sha256_finalize(&sha, result);
+            ret = 1;
+        }
+    } else {
+        ret = -3;
+    }
+    secp256k1_scalar_clear(&s);
+    return ret;
+}
+
 int secp256k1_ec_seckey_verify(const secp256k1_context_t* ctx, const unsigned char *seckey) {
     secp256k1_scalar_t sec;
     int ret;
diff --git a/src/tests.c b/src/tests.c
index a1a2d130a4..75a73b738b 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -1348,6 +1348,39 @@ void ecdh_chain_multiply(void) {
     ge_equals_gej(&res, &expected_point);
 }
 
+void ecdh_generator_basepoint(void) {
+    secp256k1_ge_t gen = secp256k1_ge_const_g;
+    unsigned char point[33];
+    unsigned char point2[33];
+    int pointlen = sizeof(point), point2len = sizeof(point2);
+    int i;
+
+    /* Check against pubkey creation when the basepoint is the generator */
+    for (i = 0; i < 100; ++i) {
+        secp256k1_sha256_t sha;
+        unsigned char s_b32[32];
+        unsigned char output_ecdh[32];
+        unsigned char output_ser[32];
+        secp256k1_scalar_t s;
+
+        random_scalar_order(&s);
+        secp256k1_scalar_get_b32(s_b32, &s);
+
+        /* compute using ECDH function */
+        secp256k1_eckey_pubkey_serialize(&gen, point, &pointlen, 1);
+        CHECK(secp256k1_ecdh(output_ecdh, point, &pointlen, s_b32) == 1);
+        /* compute "explicitly" */
+        secp256k1_eckey_pubkey_serialize(&gen, point2, &point2len, 1);
+        CHECK(secp256k1_ec_pubkey_create(ctx, point2, &point2len, s_b32, 1) == 1);
+
+        secp256k1_sha256_initialize(&sha);
+        secp256k1_sha256_write(&sha, point2, sizeof(point2));
+        secp256k1_sha256_finalize(&sha, output_ser);
+        /* compare */
+        CHECK(memcmp(output_ecdh, output_ser, sizeof(output_ser)) == 0);
+    }
+}
+
 void run_ecdh_tests(void) {
     ecdh_mult_zero_one();
     ecdh_random_mult();
@@ -1355,6 +1388,10 @@ void run_ecdh_tests(void) {
     ecdh_chain_multiply();
 }
 
+void run_ecdh_api_tests(void) {
+    ecdh_generator_basepoint();
+}
+
 /***** ECMULT TESTS *****/
 
 void run_ecmult_chain(void) {
@@ -2448,6 +2485,7 @@ int main(int argc, char **argv) {
 
     /* ecdh tests */
     run_ecdh_tests();
+    run_ecdh_api_tests();
 
     /* ecdsa tests */
     run_random_pubkeys();

From c641e0609831accb10533a06a7f4e8fc5fa71a2d Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Fri, 15 May 2015 14:46:08 -0500
Subject: [PATCH 08/13] Add benchmark for ECDH multiplication

---
 .gitignore           |  1 +
 Makefile.am          |  6 +++++-
 src/bench_ecdh.c     | 50 ++++++++++++++++++++++++++++++++++++++++++++
 src/bench_internal.c | 12 +++++++++++
 4 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 src/bench_ecdh.c

diff --git a/.gitignore b/.gitignore
index 076ff1295f..a697a794cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 bench_inv
+bench_ecdh
 bench_sign
 bench_verify
 bench_recover
diff --git a/Makefile.am b/Makefile.am
index 5adba36fe6..ea9770e011 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -51,7 +51,7 @@ libsecp256k1_la_LIBADD = $(SECP_LIBS)
 
 noinst_PROGRAMS =
 if USE_BENCHMARK
-noinst_PROGRAMS += bench_verify bench_recover bench_sign bench_internal
+noinst_PROGRAMS += bench_verify bench_recover bench_sign bench_internal bench_ecdh
 bench_verify_SOURCES = src/bench_verify.c
 bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS)
 bench_verify_LDFLAGS = -static
@@ -65,6 +65,10 @@ bench_internal_SOURCES = src/bench_internal.c
 bench_internal_LDADD = $(SECP_LIBS)
 bench_internal_LDFLAGS = -static
 bench_internal_CPPFLAGS = $(SECP_INCLUDES)
+bench_ecdh_SOURCES = src/bench_ecdh.c
+bench_ecdh_LDADD = libsecp256k1.la $(SECP_LIBS)
+bench_ecdh_LDFLAGS = -static
+bench_ecdh_CPPFLAGS = $(SECP_INCLUDES)
 endif
 
 if USE_TESTS
diff --git a/src/bench_ecdh.c b/src/bench_ecdh.c
new file mode 100644
index 0000000000..0b4cd83177
--- /dev/null
+++ b/src/bench_ecdh.c
@@ -0,0 +1,50 @@
+/**********************************************************************
+ * Copyright (c) 2015 Pieter Wuille, Andrew Poelstra                  *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#include <string.h>
+
+#include "include/secp256k1.h"
+#include "util.h"
+#include "bench.h"
+
+typedef struct {
+    unsigned char point[33];
+    int pointlen;
+    unsigned char scalar[32];
+} bench_multiply_t;
+
+static void bench_multiply_setup(void* arg) {
+    int i;
+    bench_multiply_t *data = (bench_multiply_t*)arg;
+    const unsigned char point[] = {
+        0x03,
+        0x54, 0x94, 0xc1, 0x5d, 0x32, 0x09, 0x97, 0x06,
+        0xc2, 0x39, 0x5f, 0x94, 0x34, 0x87, 0x45, 0xfd,
+        0x75, 0x7c, 0xe3, 0x0e, 0x4e, 0x8c, 0x90, 0xfb,
+        0xa2, 0xba, 0xd1, 0x84, 0xf8, 0x83, 0xc6, 0x9f
+    };
+
+    for (i = 0; i < 32; i++) data->scalar[i] = i + 1;
+    data->pointlen = sizeof(point);
+    memcpy(data->point, point, data->pointlen);
+}
+
+static void bench_multiply(void* arg) {
+    int i;
+    unsigned char res[32];
+    bench_multiply_t *data = (bench_multiply_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        CHECK(secp256k1_ecdh(res, data->point, &data->pointlen, data->scalar) == 1);
+    }
+}
+
+int main(void) {
+    bench_multiply_t data;
+
+    run_benchmark("ecdh_mult", bench_multiply, bench_multiply_setup, NULL, &data, 10, 20000);
+    return 0;
+}
diff --git a/src/bench_internal.c b/src/bench_internal.c
index bbbc7c3fc7..b33985a1a3 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -13,6 +13,7 @@
 #include "field_impl.h"
 #include "group_impl.h"
 #include "scalar_impl.h"
+#include "ecdh_impl.h"
 #include "ecmult_impl.h"
 #include "bench.h"
 
@@ -234,6 +235,16 @@ void bench_ecmult_wnaf(void* arg) {
     }
 }
 
+void bench_ecdh_wnaf(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_ecdh_wnaf(data->wnaf, &data->scalar_x, WINDOW_A);
+        secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
+    }
+}
+
 
 void bench_sha256(void* arg) {
     int i;
@@ -309,6 +320,7 @@ int main(int argc, char **argv) {
     if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, 200000);
     if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, 200000);
 
+    if (have_flag(argc, argv, "ecdh") || have_flag(argc, argv, "wnaf")) run_benchmark("ecdh_wnaf", bench_ecdh_wnaf, bench_setup, NULL, &data, 10, 20000);
     if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, 20000);
 
     if (have_flag(argc, argv, "hash") || have_flag(argc, argv, "sha256")) run_benchmark("hash_sha256", bench_sha256, bench_setup, NULL, &data, 10, 20000);

From a35d91b13d96653c89f5a3e1bbb04d0099d01d5a Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Fri, 22 May 2015 11:51:51 -0500
Subject: [PATCH 09/13] Make `secp256k1_scalar_add_bit` conditional; make
 `secp256k1_scalar_split_lambda_var` constant time

This has the effect of making `secp256k1_scalar_mul_shift_var` constant
time in both input scalars. Keep the _var name because it is NOT constant
time in the shift amount.

As used in `secp256k1_scalar_split_lambda_var`, the shift is always
the constant 272, so this function becomes constant time, and it
loses the `_var` suffix.
---
 src/bench_internal.c   | 2 +-
 src/ecmult_impl.h      | 4 ++--
 src/scalar.h           | 6 +++---
 src/scalar_4x64_impl.h | 7 +++----
 src/scalar_8x32_impl.h | 7 +++----
 src/scalar_impl.h      | 3 ++-
 src/tests.c            | 7 +++++--
 7 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/bench_internal.c b/src/bench_internal.c
index b33985a1a3..00080d4bdf 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -97,7 +97,7 @@ void bench_scalar_split(void* arg) {
 
     for (i = 0; i < 20000; i++) {
         secp256k1_scalar_t l, r;
-        secp256k1_scalar_split_lambda_var(&l, &r, &data->scalar_x);
+        secp256k1_scalar_split_lambda(&l, &r, &data->scalar_x);
         secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
     }
 }
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index d6aa2ea7db..fa30378507 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -242,7 +242,7 @@ static int secp256k1_ecmult_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w)
         }
         word = secp256k1_scalar_get_bits_var(&s, bit, now);
         if (word & (1 << (w-1))) {
-            secp256k1_scalar_add_bit(&s, bit + w);
+            secp256k1_scalar_cadd_bit(&s, bit + w, 1);
             wnaf[set_bits++] = sign * (word - (1 << w));
         } else {
             wnaf[set_bits++] = sign * word;
@@ -280,7 +280,7 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_ge
 
 #ifdef USE_ENDOMORPHISM
     /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
-    secp256k1_scalar_split_lambda_var(&na_1, &na_lam, na);
+    secp256k1_scalar_split_lambda(&na_1, &na_lam, na);
 
     /* build wnaf representation for na_1 and na_lam. */
     bits_na_1   = secp256k1_ecmult_wnaf(wnaf_na_1,   &na_1,   WINDOW_A);
diff --git a/src/scalar.h b/src/scalar.h
index f33e72a7c9..72e12b86fc 100644
--- a/src/scalar.h
+++ b/src/scalar.h
@@ -42,8 +42,8 @@ static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_
 /** Add two scalars together (modulo the group order). Returns whether it overflowed. */
 static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
 
-/** Add a power of two to a scalar. The result is not allowed to overflow. */
-static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit);
+/** Conditionally add a power of two to a scalar. The result is not allowed to overflow. */
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag);
 
 /** Multiply two scalars (modulo the group order). */
 static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
@@ -95,7 +95,7 @@ static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scal
 /** Find r1 and r2 such that r1+r2*2^128 = a. */
 static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
 /** Find r1 and r2 such that r1+r2*lambda = a, and r1 and r2 are maximum 128 bits long (see secp256k1_gej_mul_lambda). */
-static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
+static void secp256k1_scalar_split_lambda(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
 #endif
 
 /** Multiply a and b (without taking the modulus!), divide by 2**shift, and round to the nearest integer. Shift must be at least 256. */
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index 147229dab9..09431e0df7 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -96,9 +96,10 @@ static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t
     return overflow;
 }
 
-static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag) {
     uint128_t t;
     VERIFY_CHECK(bit < 256);
+    bit += ((uint32_t) flag - 1) & 0x100;  /* forcing (bit >> 6) > 3 makes this a noop */
     t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
     r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
     t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
@@ -940,9 +941,7 @@ SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *
     r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[3] = shift < 320 ? (l[3 + shiftlimbs] >> shiftlow) : 0;
-    if ((l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1) {
-        secp256k1_scalar_add_bit(r, 0);
-    }
+    secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1);
 }
 
 #endif
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 0ad2423db0..54923eb45f 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -136,9 +136,10 @@ static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t
     return overflow;
 }
 
-static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag) {
     uint64_t t;
     VERIFY_CHECK(bit < 256);
+    bit += ((uint32_t) flag - 1) & 0x100;  /* forcing (bit >> 5) > 7 makes this a noop */
     t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
     r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
     t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F));
@@ -714,9 +715,7 @@ SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *
     r->d[5] = shift < 352 ? (l[5 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[6 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[6] = shift < 320 ? (l[6 + shiftlimbs] >> shiftlow | (shift < 288 && shiftlow ? (l[7 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[7] = shift < 288 ? (l[7 + shiftlimbs] >> shiftlow)  : 0;
-    if ((l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1) {
-        secp256k1_scalar_add_bit(r, 0);
-    }
+    secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1);
 }
 
 #endif
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
index 6ed8865441..dda103160e 100644
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -295,7 +295,7 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_
  * The function below splits a in r1 and r2, such that r1 + lambda * r2 == a (mod order).
  */
 
-static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
+static void secp256k1_scalar_split_lambda(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
     secp256k1_scalar_t c1, c2;
     static const secp256k1_scalar_t minus_lambda = SECP256K1_SCALAR_CONST(
         0xAC9C52B3UL, 0x3FA3CF1FUL, 0x5AD9E3FDUL, 0x77ED9BA4UL,
@@ -319,6 +319,7 @@ static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_
     );
     VERIFY_CHECK(r1 != a);
     VERIFY_CHECK(r2 != a);
+    /* these _var calls are constant time since the shift amount is constant */
     secp256k1_scalar_mul_shift_var(&c1, a, &g1, 272);
     secp256k1_scalar_mul_shift_var(&c2, a, &g2, 272);
     secp256k1_scalar_mul(&c1, &c1, &minus_b1);
diff --git a/src/tests.c b/src/tests.c
index 75a73b738b..c524607dbe 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -562,7 +562,10 @@ void scalar_test(void) {
         r2 = s1;
         if (!secp256k1_scalar_add(&r1, &r1, &b)) {
             /* No overflow happened. */
-            secp256k1_scalar_add_bit(&r2, bit);
+            secp256k1_scalar_cadd_bit(&r2, bit, 1);
+            CHECK(secp256k1_scalar_eq(&r1, &r2));
+            /* cadd is a noop when flag is zero */
+            secp256k1_scalar_cadd_bit(&r2, bit, 0);
             CHECK(secp256k1_scalar_eq(&r1, &r2));
         }
     }
@@ -1701,7 +1704,7 @@ void test_scalar_split(void) {
     unsigned char tmp[32];
 
     random_scalar_order_test(&full);
-    secp256k1_scalar_split_lambda_var(&s1, &slam, &full);
+    secp256k1_scalar_split_lambda(&s1, &slam, &full);
 
     /* check that both are <= 128 bits in size */
     if (secp256k1_scalar_is_high(&s1))

From 0436dfcf62f5f47c3b8dacd49fba8de436f56030 Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Fri, 22 May 2015 12:09:36 -0500
Subject: [PATCH 10/13] Implement endomorphism optimization for
 secp256k1_ecdh_point_multiply

---
 src/bench_internal.c |   2 +-
 src/ecdh_impl.h      | 125 +++++++++++++++++++++++++++++++++++++++----
 src/tests.c          |  19 ++++++-
 3 files changed, 132 insertions(+), 14 deletions(-)

diff --git a/src/bench_internal.c b/src/bench_internal.c
index 00080d4bdf..a6ae9dd235 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -240,7 +240,7 @@ void bench_ecdh_wnaf(void* arg) {
     bench_inv_t *data = (bench_inv_t*)arg;
 
     for (i = 0; i < 20000; i++) {
-        secp256k1_ecdh_wnaf(data->wnaf, &data->scalar_x, WINDOW_A);
+        secp256k1_ecdh_wnaf(data->wnaf, data->scalar_x, WINDOW_A);
         secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
     }
 }
diff --git a/src/ecdh_impl.h b/src/ecdh_impl.h
index 624ec76dbb..d07030dadd 100644
--- a/src/ecdh_impl.h
+++ b/src/ecdh_impl.h
@@ -12,7 +12,11 @@
 #include "ecdh.h"
 #include "ecmult_impl.h"
 
-#define WNAF_BITS 256
+#ifdef USE_ENDOMORPHISM
+    #define WNAF_BITS 128
+#else
+    #define WNAF_BITS 256
+#endif
 #define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w))
 
 /** Convert a number to WNAF notation. The number becomes represented by sum(2^{wi} * wnaf[i], i=0..return_val)
@@ -27,17 +31,47 @@
  *
  *  Numbers reference steps of `Algorithm SPA-resistant Width-w NAF with Odd Scalar` on pp. 335
  */
-static void secp256k1_ecdh_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w) {
-    secp256k1_scalar_t s = *a;
-    /* Negate to force oddness */
-    int is_even = secp256k1_scalar_is_even(&s);
-    int global_sign = secp256k1_scalar_cond_negate(&s, is_even);
-
+static int secp256k1_ecdh_wnaf(int *wnaf, secp256k1_scalar_t s, int w) {
+    int global_sign = 1;
+    int skew = 0;
     int word = 0;
     /* 1 2 3 */
-    int u_last = secp256k1_scalar_shr_int(&s, w);
+    int u_last;
     int u;
+
+#ifdef USE_ENDOMORPHISM
+    /* If we are using the endomorphism, we cannot handle even numbers by negating
+     * them, since we are working with 128-bit numbers whose negations would be 256
+     * bits, eliminating the performance advantage. Instead we use a technique from
+     * Section 4.2 of the Okeya/Tagaki paper, which is to add either 1 (for even)
+     * or 2 (for odd) to the number we are encoding, then compensating after the
+     * multiplication. */
+    /* Negative 128-bit numbers will be negated, since otherwise they are 256-bit */
+    int flip = secp256k1_scalar_is_high(&s);
+    /* We add 1 to even numbers, 2 to odd ones, noting that negation flips parity */
+    int bit = flip ^ (s.d[0] & 1);
+    /* We check for negative one, since adding 2 to it will cause an overflow */
+    secp256k1_scalar_t neg_s;
+    int not_neg_one;
+    secp256k1_scalar_negate(&neg_s, &s);
+    not_neg_one = !secp256k1_scalar_is_one(&neg_s);
+    secp256k1_scalar_cadd_bit(&s, bit, not_neg_one);
+    /* If we had negative one, flip == 1, s.d[0] == 0, bit == 1, so caller expects
+     * that we added two to it and flipped it. In fact for -1 these operations are
+     * identical. We only flipped, but since skewing is required (in the sense that
+     * the skew must be 1 or 2, never zero) and flipping is not, we need to change
+     * our flags to claim that we only skewed. */
+    global_sign = secp256k1_scalar_cond_negate(&s, flip);
+    global_sign *= not_neg_one * 2 - 1;
+    skew = 1 << bit;
+#else
+    /* Otherwise, we just negate to force oddness */
+    int is_even = secp256k1_scalar_is_even(&s);
+    global_sign = secp256k1_scalar_cond_negate(&s, is_even);
+#endif
+
     /* 4 */
+    u_last = secp256k1_scalar_shr_int(&s, w);
     while (word * w < WNAF_BITS) {
         int sign;
         int even;
@@ -59,6 +93,7 @@ static void secp256k1_ecdh_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w) {
 
     VERIFY_CHECK(secp256k1_scalar_is_zero(&s));
     VERIFY_CHECK(word == WNAF_SIZE(w));
+    return skew;
 }
 
 
@@ -67,17 +102,37 @@ static void secp256k1_point_multiply(secp256k1_gej_t *r, const secp256k1_ge_t *a
     secp256k1_ge_t tmpa;
     secp256k1_fe_t Z;
 
+#ifdef USE_ENDOMORPHISM
+    secp256k1_ge_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
+    int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)];
+    int wnaf_lam[1 + WNAF_SIZE(WINDOW_A - 1)];
+    int skew_1;
+    int skew_lam;
+    secp256k1_scalar_t q_1, q_lam;
+#else
     int wnaf[1 + WNAF_SIZE(WINDOW_A - 1)];
+#endif
 
     int i;
-    int is_zero = secp256k1_scalar_is_zero(scalar);
     secp256k1_scalar_t sc = *scalar;
+
+    /* build wnaf representation for q. */
+#ifdef USE_ENDOMORPHISM
+    /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */
+    secp256k1_scalar_split_lambda(&q_1, &q_lam, &sc);
+    /* no need for zero correction when using endomorphism since even
+     * numbers have one added to them anyway */
+    skew_1   = secp256k1_ecdh_wnaf(wnaf_1,   q_1,   WINDOW_A - 1);
+    skew_lam = secp256k1_ecdh_wnaf(wnaf_lam, q_lam, WINDOW_A - 1);
+#else
+    int is_zero = secp256k1_scalar_is_zero(scalar);
     /* the wNAF ladder cannot handle zero, so bump this to one .. we will
      * correct the result after the fact */
     sc.d[0] += is_zero;
+    VERIFY_CHECK(!secp256k1_scalar_is_zero(&sc));
 
-    /* build wnaf representation for q. */
-    secp256k1_ecdh_wnaf(wnaf, &sc, WINDOW_A - 1);
+    secp256k1_ecdh_wnaf(wnaf, sc, WINDOW_A - 1);
+#endif
 
     /* Calculate odd multiples of a.
      * All multiples are brought to the same Z 'denominator', which is stored
@@ -87,6 +142,11 @@ static void secp256k1_point_multiply(secp256k1_gej_t *r, const secp256k1_ge_t *a
      */
     secp256k1_gej_set_ge(r, a);
     secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, r);
+#ifdef USE_ENDOMORPHISM
+    for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
+        secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+    }
+#endif
     secp256k1_gej_set_infinity(r);
 
     for (i = WNAF_SIZE(WINDOW_A - 1); i >= 0; i--) {
@@ -99,18 +159,61 @@ static void secp256k1_point_multiply(secp256k1_gej_t *r, const secp256k1_ge_t *a
              * that iteration, so this does not result in a timing leak. */
             secp256k1_gej_double_var(r, r, NULL);
         }
+#ifdef USE_ENDOMORPHISM
+        n = wnaf_1[i];
+        ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
+        VERIFY_CHECK(n != 0);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+
+        n = wnaf_lam[i];
+        ECMULT_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
+        VERIFY_CHECK(n != 0);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+#else
         n = wnaf[i];
         VERIFY_CHECK(n != 0);
         ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
         secp256k1_gej_add_ge(r, r, &tmpa);
+#endif
     }
 
     if (!r->infinity) {
         secp256k1_fe_mul(&r->z, &r->z, &Z);
     }
 
+#ifdef USE_ENDOMORPHISM
+    {
+        /* Correct for wNAF skew */
+        secp256k1_ge_t correction = *a;
+        secp256k1_ge_storage_t correction_1_stor;
+        secp256k1_ge_storage_t correction_lam_stor;
+        secp256k1_ge_storage_t a2_stor;
+        secp256k1_gej_t tmpj;
+        secp256k1_gej_set_ge(&tmpj, &correction);
+        secp256k1_gej_double_var(&tmpj, &tmpj, NULL);
+        secp256k1_ge_set_gej(&correction, &tmpj);
+        secp256k1_ge_to_storage(&correction_1_stor, a);
+        secp256k1_ge_to_storage(&correction_lam_stor, a);
+        secp256k1_ge_to_storage(&a2_stor, &correction);
+
+        /* For odd numbers this is 2a (so replace it), for even ones a (so no-op) */
+        secp256k1_ge_storage_cmov(&correction_1_stor, &a2_stor, skew_1 == 2);
+        secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2);
+
+        /* Apply the correction */
+        secp256k1_ge_from_storage(&correction, &correction_1_stor);
+        secp256k1_ge_neg(&correction, &correction);
+        secp256k1_gej_add_ge(r, r, &correction);
+
+        secp256k1_ge_from_storage(&correction, &correction_lam_stor);
+        secp256k1_ge_neg(&correction, &correction);
+        secp256k1_ge_mul_lambda(&correction, &correction);
+        secp256k1_gej_add_ge(r, r, &correction);
+    }
+#else
     /* correct for zero */
     r->infinity |= is_zero;
+#endif
 }
 
 #endif
diff --git a/src/tests.c b/src/tests.c
index c524607dbe..3eb59a5662 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -1571,10 +1571,21 @@ void test_constant_wnaf(const secp256k1_scalar_t *number, int w) {
     secp256k1_scalar_t x, shift;
     int wnaf[256] = {0};
     int i;
+#ifdef USE_ENDOMORPHISM
+    int skew;
+#endif
+    secp256k1_scalar_t num = *number;
 
     secp256k1_scalar_set_int(&x, 0);
     secp256k1_scalar_set_int(&shift, 1 << w);
-    secp256k1_ecdh_wnaf(wnaf, number, w);
+    /* With USE_ENDOMORPHISM on we only consider 128-bit numbers */
+#ifdef USE_ENDOMORPHISM
+    for (i = 0; i < 16; ++i)
+        secp256k1_scalar_shr_int(&num, 8);
+    skew = secp256k1_ecdh_wnaf(wnaf, num, w);
+#else
+    secp256k1_ecdh_wnaf(wnaf, num, w);
+#endif
 
     for (i = WNAF_SIZE(w); i >= 0; --i) {
         secp256k1_scalar_t t;
@@ -1593,7 +1604,11 @@ void test_constant_wnaf(const secp256k1_scalar_t *number, int w) {
         }
         secp256k1_scalar_add(&x, &x, &t);
     }
-    CHECK(secp256k1_scalar_eq(&x, number));
+#ifdef USE_ENDOMORPHISM
+    /* Skew num because when encoding 128-bit numbers as odd we use an offset */
+    secp256k1_scalar_cadd_bit(&num, skew == 2, 1);
+#endif
+    CHECK(secp256k1_scalar_eq(&x, &num));
 }
 
 void run_wnaf(void) {

From 2a5914ac1a25cf84c7e140cd9001a4dce01277a8 Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Wed, 1 Jul 2015 19:20:46 +0930
Subject: [PATCH 11/13] Demo code for x-only ECDH

---
 .gitignore          |  1 +
 Makefile.am         |  6 +++++-
 include/secp256k1.h |  6 ++++++
 src/bench_ecdh_xo.c | 50 +++++++++++++++++++++++++++++++++++++++++++++
 src/group.h         |  2 ++
 src/group_impl.h    | 15 ++++++++++++++
 src/secp256k1.c     | 41 +++++++++++++++++++++++++++++++++++++
 src/tests.c         | 13 ++++++++++++
 8 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 src/bench_ecdh_xo.c

diff --git a/.gitignore b/.gitignore
index a697a794cc..904bdb26eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 bench_inv
 bench_ecdh
+bench_ecdh_xo
 bench_sign
 bench_verify
 bench_recover
diff --git a/Makefile.am b/Makefile.am
index ea9770e011..54c3743036 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -51,7 +51,7 @@ libsecp256k1_la_LIBADD = $(SECP_LIBS)
 
 noinst_PROGRAMS =
 if USE_BENCHMARK
-noinst_PROGRAMS += bench_verify bench_recover bench_sign bench_internal bench_ecdh
+noinst_PROGRAMS += bench_verify bench_recover bench_sign bench_internal bench_ecdh bench_ecdh_xo
 bench_verify_SOURCES = src/bench_verify.c
 bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS)
 bench_verify_LDFLAGS = -static
@@ -69,6 +69,10 @@ bench_ecdh_SOURCES = src/bench_ecdh.c
 bench_ecdh_LDADD = libsecp256k1.la $(SECP_LIBS)
 bench_ecdh_LDFLAGS = -static
 bench_ecdh_CPPFLAGS = $(SECP_INCLUDES)
+bench_ecdh_xo_SOURCES = src/bench_ecdh_xo.c
+bench_ecdh_xo_LDADD = libsecp256k1.la $(SECP_LIBS)
+bench_ecdh_xo_LDFLAGS = -static
+bench_ecdh_xo_CPPFLAGS = $(SECP_INCLUDES)
 endif
 
 if USE_TESTS
diff --git a/include/secp256k1.h b/include/secp256k1.h
index c006ad4912..7c4ff801a3 100644
--- a/include/secp256k1.h
+++ b/include/secp256k1.h
@@ -235,6 +235,12 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdh(
   const unsigned char *scalar
 ) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
 
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdh_xo(
+  unsigned char *result,
+  const unsigned char *x,
+  const unsigned char *scalar
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
 /** Verify an ECDSA secret key.
  *  Returns: 1: secret key is valid
  *           0: secret key is invalid
diff --git a/src/bench_ecdh_xo.c b/src/bench_ecdh_xo.c
new file mode 100644
index 0000000000..7a8c42b3c6
--- /dev/null
+++ b/src/bench_ecdh_xo.c
@@ -0,0 +1,50 @@
+/**********************************************************************
+ * Copyright (c) 2015 Pieter Wuille, Andrew Poelstra                  *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#include <string.h>
+
+#include "include/secp256k1.h"
+#include "util.h"
+#include "bench.h"
+
+typedef struct {
+    unsigned char point[33];
+    int pointlen;
+    unsigned char scalar[32];
+} bench_multiply_t;
+
+static void bench_multiply_setup(void* arg) {
+    int i;
+    bench_multiply_t *data = (bench_multiply_t*)arg;
+    const unsigned char point[] = {
+        0x03,
+        0x54, 0x94, 0xc1, 0x5d, 0x32, 0x09, 0x97, 0x06,
+        0xc2, 0x39, 0x5f, 0x94, 0x34, 0x87, 0x45, 0xfd,
+        0x75, 0x7c, 0xe3, 0x0e, 0x4e, 0x8c, 0x90, 0xfb,
+        0xa2, 0xba, 0xd1, 0x84, 0xf8, 0x83, 0xc6, 0x9f
+    };
+
+    for (i = 0; i < 32; i++) data->scalar[i] = i + 1;
+    data->pointlen = sizeof(point);
+    memcpy(data->point, point, data->pointlen);
+}
+
+static void bench_multiply(void* arg) {
+    int i;
+    unsigned char res[32];
+    bench_multiply_t *data = (bench_multiply_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        CHECK(secp256k1_ecdh_xo(res, data->point+1, data->scalar) == 1);
+    }
+}
+
+int main(void) {
+    bench_multiply_t data;
+
+    run_benchmark("ecdh_mult_xo", bench_multiply, bench_multiply_setup, NULL, &data, 10, 20000);
+    return 0;
+}
diff --git a/src/group.h b/src/group.h
index 1d9ef9d2aa..fcca4a6985 100644
--- a/src/group.h
+++ b/src/group.h
@@ -48,6 +48,8 @@ static void secp256k1_ge_set_xy(secp256k1_ge_t *r, const secp256k1_fe_t *x, cons
  *  for Y. Return value indicates whether the result is valid. */
 static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd);
 
+static int secp256k1_ge_set_xo_iso_var(secp256k1_ge_t *r, secp256k1_fe_t *rk, const secp256k1_fe_t *x);
+
 /** Check whether a group element is the point at infinity. */
 static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a);
 
diff --git a/src/group_impl.h b/src/group_impl.h
index 2da8909793..43b2ec7403 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -196,6 +196,21 @@ static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, i
     return 1;
 }
 
+static int secp256k1_ge_set_xo_iso_var(secp256k1_ge_t *r, secp256k1_fe_t *rk, const secp256k1_fe_t *x) {
+    secp256k1_fe_t t;
+    secp256k1_fe_sqr(&t, x);
+    secp256k1_fe_mul(&t, &t, x);
+    secp256k1_fe_set_int(rk, 7);
+    secp256k1_fe_add(rk, &t);           /* K = X^3 + 7 (2) */
+
+    /* TODO Jacobi symbol test to make sure K is a square */
+
+    r->infinity = 0;
+    secp256k1_fe_mul(&r->x, rk, x);     /* r->x = K*X (1) */
+    secp256k1_fe_sqr(&r->y, rk);        /* r->y = K^2 (1) */
+    return 1;
+}
+
 static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a) {
    r->infinity = a->infinity;
    r->x = a->x;
diff --git a/src/secp256k1.c b/src/secp256k1.c
index 0db661172b..2904c9a7a5 100644
--- a/src/secp256k1.c
+++ b/src/secp256k1.c
@@ -269,6 +269,47 @@ int secp256k1_ecdh(unsigned char *result, unsigned char *point, int *pointlen, c
     return ret;
 }
 
+int secp256k1_ecdh_xo(unsigned char *result, const unsigned char *x, const unsigned char *scalar) {
+    int ret = 0;
+    int overflow = 0;
+    secp256k1_fe_t k, t;
+    secp256k1_gej_t res;
+    secp256k1_ge_t pt;
+    secp256k1_scalar_t s;
+    unsigned char input[32];
+    secp256k1_sha256_t sha;
+    DEBUG_CHECK(result != NULL);
+    DEBUG_CHECK(x != NULL);
+    DEBUG_CHECK(scalar != NULL);
+
+    secp256k1_scalar_set_b32(&s, scalar, &overflow);
+    if (secp256k1_scalar_is_zero(&s)) {
+        ret = -1;
+    } else if (overflow) {
+        ret = -2;
+    } else if (secp256k1_fe_set_b32(&t, x) && secp256k1_ge_set_xo_iso_var(&pt, &k, &t)) {
+        secp256k1_point_multiply(&res, &pt, &s);
+        if (!res.infinity) {
+            secp256k1_fe_sqr(&t, &res.z);
+            secp256k1_fe_mul(&t, &t, &k);
+            secp256k1_fe_inv(&k, &t);
+            secp256k1_fe_mul(&t, &res.x, &k);
+            secp256k1_fe_normalize(&t);
+
+            /* secp256k1_fe_get_b32(result, &t); */
+            secp256k1_fe_get_b32(input, &t);
+            secp256k1_sha256_initialize(&sha);
+            secp256k1_sha256_write(&sha, input, sizeof(input));
+            secp256k1_sha256_finalize(&sha, result);
+            ret = 1;
+        }
+    } else {
+        ret = -3;
+    }
+    secp256k1_scalar_clear(&s);
+    return ret;
+}
+
 int secp256k1_ec_seckey_verify(const secp256k1_context_t* ctx, const unsigned char *seckey) {
     secp256k1_scalar_t sec;
     int ret;
diff --git a/src/tests.c b/src/tests.c
index 3eb59a5662..ba5762e10c 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -1381,6 +1381,19 @@ void ecdh_generator_basepoint(void) {
         secp256k1_sha256_finalize(&sha, output_ser);
         /* compare */
         CHECK(memcmp(output_ecdh, output_ser, sizeof(output_ser)) == 0);
+
+        memset(output_ecdh, 0, sizeof(output_ecdh));
+        memset(output_ser, 0, sizeof(output_ser));
+
+        /* compute using x-only ECDH function */
+        secp256k1_eckey_pubkey_serialize(&gen, point, &pointlen, 1);
+        CHECK(secp256k1_ecdh_xo(output_ecdh, point+1, s_b32) == 1);
+        /* compute "explicitly" */
+        secp256k1_sha256_initialize(&sha);
+        secp256k1_sha256_write(&sha, point2+1, sizeof(point2)-1);
+        secp256k1_sha256_finalize(&sha, output_ser);
+        /* compare */
+        CHECK(memcmp(output_ecdh, output_ser, sizeof(output_ser)) == 0);
     }
 }
 

From d23d8c559242c4774b834aadefadb415a505b37f Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Fri, 3 Jul 2015 21:51:52 +0930
Subject: [PATCH 12/13] Add Jacobi symbol test via GMP

---
 src/group_impl.h   | 20 +++++++++++++++++++-
 src/num.h          |  3 +++
 src/num_gmp_impl.h | 22 ++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/src/group_impl.h b/src/group_impl.h
index 43b2ec7403..f13d0c9609 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -198,12 +198,30 @@ static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, i
 
 static int secp256k1_ge_set_xo_iso_var(secp256k1_ge_t *r, secp256k1_fe_t *rk, const secp256k1_fe_t *x) {
     secp256k1_fe_t t;
+    secp256k1_num_t a, p;
+    unsigned char b[32];
+
+    /* secp256k1 field prime, value p defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */
+    static const unsigned char prime[32] = {
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
+    };
+
     secp256k1_fe_sqr(&t, x);
     secp256k1_fe_mul(&t, &t, x);
     secp256k1_fe_set_int(rk, 7);
     secp256k1_fe_add(rk, &t);           /* K = X^3 + 7 (2) */
 
-    /* TODO Jacobi symbol test to make sure K is a square */
+    /* Perform a Jacobi symbol test on K to verify that it's a non-zero quadratic residue */
+    secp256k1_fe_normalize_var(rk);
+    secp256k1_fe_get_b32(b, rk);
+    secp256k1_num_set_bin(&a, b, 32);
+    secp256k1_num_set_bin(&p, prime, 32);
+    if (secp256k1_num_jacobi(&a, &p) != 1) {
+        return 0;
+    }
 
     r->infinity = 0;
     secp256k1_fe_mul(&r->x, rk, x);     /* r->x = K*X (1) */
diff --git a/src/num.h b/src/num.h
index 339b6bb6ec..d4263c14d6 100644
--- a/src/num.h
+++ b/src/num.h
@@ -32,6 +32,9 @@ static void secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, un
 /** Compute a modular inverse. The input must be less than the modulus. */
 static void secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m);
 
+/** Compute the jacobi symbol (a|b). b must be positive and odd. */
+static int secp256k1_num_jacobi(const secp256k1_num_t *a, const secp256k1_num_t *b);
+
 /** Compare the absolute value of two numbers. */
 static int secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b);
 
diff --git a/src/num_gmp_impl.h b/src/num_gmp_impl.h
index dbbc458d5d..a0f312cb49 100644
--- a/src/num_gmp_impl.h
+++ b/src/num_gmp_impl.h
@@ -142,6 +142,28 @@ static void secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t
     memset(v, 0, sizeof(v));
 }
 
+static int secp256k1_num_jacobi(const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    int ret;
+    mpz_t ga, gb;
+    secp256k1_num_sanity(a);
+    secp256k1_num_sanity(b);
+    VERIFY_CHECK(!b->neg && (b->limbs > 0) && (b->data[0] & 1));
+
+    mpz_inits(ga, gb, NULL);
+
+    mpz_import(gb, b->limbs, -1, sizeof(mp_limb_t), 0, 0, b->data);
+    mpz_import(ga, a->limbs, -1, sizeof(mp_limb_t), 0, 0, a->data);
+    if (a->neg) {
+        mpz_neg(ga, ga);
+    }
+
+    ret = mpz_jacobi(ga, gb);
+
+    mpz_clears(ga, gb, NULL);
+
+    return ret;
+}
+
 static int secp256k1_num_is_zero(const secp256k1_num_t *a) {
     return (a->limbs == 1 && a->data[0] == 0);
 }

From a5eaa21565025be3b8132433f7be52a0371ae735 Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Sat, 4 Jul 2015 18:09:47 +0930
Subject: [PATCH 13/13] Fallback to _set_xo_var when USE_NUM_NONE defined

---
 src/group_impl.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/group_impl.h b/src/group_impl.h
index f13d0c9609..1c841c327a 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -197,6 +197,10 @@ static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, i
 }
 
 static int secp256k1_ge_set_xo_iso_var(secp256k1_ge_t *r, secp256k1_fe_t *rk, const secp256k1_fe_t *x) {
+#ifdef USE_NUM_NONE
+    secp256k1_fe_set_int(rk, 1);
+    return secp256k1_ge_set_xo_var(r, x, 0);
+#else
     secp256k1_fe_t t;
     secp256k1_num_t a, p;
     unsigned char b[32];
@@ -227,6 +231,7 @@ static int secp256k1_ge_set_xo_iso_var(secp256k1_ge_t *r, secp256k1_fe_t *rk, co
     secp256k1_fe_mul(&r->x, rk, x);     /* r->x = K*X (1) */
     secp256k1_fe_sqr(&r->y, rk);        /* r->y = K^2 (1) */
     return 1;
+#endif
 }
 
 static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a) {