Skip to content

Commit

Permalink
Optimize ScalarMult using endomorphism
Browse files Browse the repository at this point in the history
This implements a speedup to ScalarMult using the endomorphism available to secp256k1.

Note the constants lambda, beta, a1, b1, a2 and b2 are from here:

https://bitcointalk.org/index.php?topic=3238.0

Preliminary tests indicate a speedup of between 17%-20% (BenchScalarMult).

More speedup can probably be achieved once splitK uses something more like what fieldVal uses. Unfortunately, the prime for this math is the order of G (N), not P.

Note the NAF optimization was specifically not done as that's the purview of another issue.

This closes btcsuite#1
  • Loading branch information
jimmysong committed Sep 26, 2014
1 parent 2b32cce commit b4cadcb
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 25 deletions.
172 changes: 147 additions & 25 deletions btcec.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,22 @@ var (
// interface from crypto/elliptic.
type KoblitzCurve struct {
*elliptic.CurveParams
q *big.Int
H int // cofactor of the curve.
q *big.Int
H int // cofactor of the curve.

// The next 6 values are used specifically for endomorphism optimizations
// in ScalarMult.

// lambda should fulfill lambda^3 = 1 mod N where N is the order of G
lambda *big.Int
// beta should fulfill beta^3 = 1 mod P where P is the prime field of the curve
beta *fieldVal
// a1, b1, a2 and b2 are explained in detail in Guide To Elliptical Curve
// Cryptography (Hankerson, Menezes, Vanstone) in Algorithm 3.74
a1 *big.Int
b1 *big.Int
a2 *big.Int
b2 *big.Int
bytePoints *[32][256][3]fieldVal
}

Expand Down Expand Up @@ -594,29 +608,126 @@ func (curve *KoblitzCurve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
return curve.fieldJacobianToBigAffine(fx3, fy3, fz3)
}

// splitK returns a balanced length-two representation of k and their
// signs.
// This is algorithm 3.74 from Guide to Elliptical Curve Cryptography (ref above)
func (curve *KoblitzCurve) splitK(k []byte) ([]byte, []byte, int, int) {

// All math here is done with big.Int, which is slow.
// At some point, it might be useful to write something similar to fieldVal
// but for N instead of P as the prime field if this ends up being a
// bottleneck.
bigIntK, c1, c2, tmp1, tmp2, k1, k2 := new(big.Int), new(big.Int), new(big.Int), new(big.Int), new(big.Int), new(big.Int), new(big.Int)

bigIntK.SetBytes(k)
// c1 = round(b2 * k / n) from step 4.
// Rounding isn't really necessary and costs too much, hence skipped
c1.Mul(curve.b2, bigIntK)
c1.Div(c1, curve.N)
// c2 = round(b1 * k / n) from step 4 (sign reversed to optimize one step)
// Rounding isn't really necessary and costs too much, hence skipped
c2.Mul(curve.b1, bigIntK)
c2.Div(c2, curve.N)
// k1 = k - c1 * a1 - c2 * a2 from step 5 (note c2's sign is reversed)
tmp1.Mul(c1, curve.a1)
tmp2.Mul(c2, curve.a2)
k1.Sub(bigIntK, tmp1)
k1.Add(k1, tmp2)
// k2 = - c1 * b1 - c2 * b2 from step 5 (note c2's sign is reversed)
tmp1.Mul(c1, curve.b1)
tmp2.Mul(c2, curve.b2)
k2.Sub(tmp2, tmp1)

// Note Bytes() throws out the sign of k1 and k2. This matters
// since k1 and/or k2 can be negative. Hence, we pass that
// back separately.
return k1.Bytes(), k2.Bytes(), k1.Sign(), k2.Sign()
}

// moduloReduce reduces k from more than 32 bytes to 32 bytes and under.
// This is done by doing a simple modulo curve.N. We can do this since
// G^N = 1 and thus any other valid point on the elliptical curve has the
// same order.
func (curve *KoblitzCurve) moduloReduce(k []byte) []byte {
var newK []byte
// Since the order of G is curve.N, we can use a much smaller number
// by doing modulo curve.N
if len(k) > curve.BitSize/8 {
// reduce k by performing modulo curve.N
tmpK := new(big.Int).SetBytes(k)
tmpK.Mod(tmpK, curve.N)
newK = tmpK.Bytes()
} else {
newK = k
}
return newK
}

// ScalarMult returns k*(Bx, By) where k is a big endian integer.
// Part of the elliptic.Curve interface.
func (curve *KoblitzCurve) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big.Int) {
// This uses the left to right binary method for point multiplication:
k1, k2, signK1, signK2 := curve.splitK(curve.moduloReduce(k))
m := len(k1)
if len(k2) > m {
m = len(k2)
}

// Point Q = ∞ (point at infinity).
qx, qy, qz := new(fieldVal), new(fieldVal), new(fieldVal)

// Point P = the point to multiply the scalar with.
px, py := curve.bigAffineToField(Bx, By)
pz := new(fieldVal).SetInt(1)
// The main equation here to remember is
// k * P = k1 * P + k2 * ϕ(P)
// P1 below is P in the equation, P2 below is ϕ(P) in the equation
p1x, p1y := curve.bigAffineToField(Bx, By)
p1z := new(fieldVal).SetInt(1)
// Note ϕ(x,y) = (βx,y), the Jacobian z coordinate is 1, so this math
// goes through.
p2x := new(fieldVal).Set(p1x).Mul(curve.beta)
p2y := new(fieldVal).Set(p1y)
p2z := new(fieldVal).SetInt(1)

// If k1 or k2 are negative, we only need to flip the y of the respective
// Jacobian point. In ECC terms, we're reflecting the point over the
// x-axis which is guaranteed to still be on the curve.
if signK1 == -1 {
p1y.Negate(1)
}
if signK2 == -1 {
p2y.Negate(1)
}

// Double and add as necessary depending on the bits set in the scalar.
for _, byteVal := range k {
// We use the left to right binary addition method.
// At each bit of k1 and k2, we add the current part of the
// k * P = k1 * P + k2 * ϕ(P) equation (that is, P1 and P2) and double.
// A further optimization using NAF is possible here but unimplemented.
var byteVal1, byteVal2 byte
for i := 0; i < m; i++ {
// Note that if k1 or k2 has less than the max number of bytes, we
// want to ignore the bytes at the front since we're going left to
// right.
if i < m-len(k1) {
byteVal1 = 0
} else {
byteVal1 = k1[i-m+len(k1)]
}
if i < m-len(k2) {
byteVal2 = 0
} else {
byteVal2 = k2[i-m+len(k2)]
}
for bitNum := 0; bitNum < 8; bitNum++ {
// Q = 2*Q
curve.doubleJacobian(qx, qy, qz, qx, qy, qz)
if byteVal&0x80 == 0x80 {
// Q = Q + P
curve.addJacobian(qx, qy, qz, px, py, pz, qx,
qy, qz)
if byteVal1&0x80 == 0x80 {
// Q = Q + P1
curve.addJacobian(qx, qy, qz, p1x, p1y, p1z, qx, qy, qz)
}
if byteVal2&0x80 == 0x80 {
// Q = Q + P2
curve.addJacobian(qx, qy, qz, p2x, p2y, p2z, qx, qy, qz)
}
byteVal <<= 1
byteVal1 <<= 1
byteVal2 <<= 1
}
}

Expand All @@ -629,18 +740,7 @@ func (curve *KoblitzCurve) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big
// Part of the elliptic.Curve interface.
func (curve *KoblitzCurve) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {

var newK []byte
// Since the order of G is curve.N, we can use a much smaller number
// by doing modulo curve.N
if len(k) > len(curve.bytePoints) {
// reduce k by performing modulo curve.N
tmpK := big.NewInt(0)
tmpK.SetBytes(k)
tmpK.Mod(tmpK, curve.N)
newK = tmpK.Bytes()
} else {
newK = k
}
newK := curve.moduloReduce(k)

diff := len(curve.bytePoints) - len(newK)

Expand Down Expand Up @@ -685,6 +785,28 @@ func initS256() {
secp256k1.H = 1
secp256k1.q = new(big.Int).Div(new(big.Int).Add(secp256k1.P,
big.NewInt(1)), big.NewInt(4))

// Next 6 constants are from Hal Finney's bitcointalk.org post:
// https://bitcointalk.org/index.php?topic=3238.msg45565#msg45565
// May he rest in peace.
secp256k1.lambda, _ = new(big.Int).SetString("5363AD4CC05C30E0A5261C028812645A122E22EA20816678DF02967C1B23BD72", 16)
secp256k1.beta = new(fieldVal).SetHex("7AE96A2B657C07106E64479EAC3434E99CF0497512F58995C1396C28719501EE")
secp256k1.a1, _ = new(big.Int).SetString("3086D221A7D46BCDE86C90E49284EB15", 16)
secp256k1.b1, _ = new(big.Int).SetString("-E4437ED6010E88286F547FA90ABFE4C3", 16)
secp256k1.a2, _ = new(big.Int).SetString("114CA50F7A8E2F3F657C1108D9D44CFD8", 16)
secp256k1.b2, _ = new(big.Int).SetString("3086D221A7D46BCDE86C90E49284EB15", 16)

// Alternatively, we can use the parameters below, however, they seem
// to be about 8% slower.
// λ = AC9C52B33FA3CF1F5AD9E3FD77ED9BA4A880B9FC8EC739C2E0CFC810B51283CE
// β = 851695D49A83F8EF919BB86153CBCB16630FB68AED0A766A3EC693D68E6AFA40
// secp256k1.lambda, _ = new(big.Int).SetString("AC9C52B33FA3CF1F5AD9E3FD77ED9BA4A880B9FC8EC739C2E0CFC810B51283CE", 16)
// secp256k1.beta = new(fieldVal).SetHex("851695D49A83F8EF919BB86153CBCB16630FB68AED0A766A3EC693D68E6AFA40")
// secp256k1.a1, _ = new(big.Int).SetString("E4437ED6010E88286F547FA90ABFE4C3", 16)
// secp256k1.b1, _ = new(big.Int).SetString("-3086D221A7D46BCDE86C90E49284EB15", 16)
// secp256k1.a2, _ = new(big.Int).SetString("3086D221A7D46BCDE86C90E49284EB15", 16)
// secp256k1.b2, _ = new(big.Int).SetString("114CA50F7A8E2F3F657C1108D9D44CFD8", 16)

secp256k1.bytePoints = &secp256k1BytePoints
}

Expand Down
26 changes: 26 additions & 0 deletions btcec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,32 @@ func TestBaseMultVerify(t *testing.T) {
}
}

func TestScalarMult(t *testing.T) {
// Strategy for this test:
// Get a random exponent from the generator point at first
// This creates a new point which is used in the next iteration
// Use another random exponent on the new point.
// We use BaseMult to verify by multiplying the exponents together (mod N)
s256 := btcec.S256()
x, y := s256.Gx, s256.Gy
exponent := big.NewInt(1)
for i := 0; i < 1024; i++ {
data := make([]byte, 32)
_, err := rand.Read(data)
if err != nil {
t.Fatalf("failed to read random data at %d", i)
continue
}
x, y = s256.ScalarMult(x, y, data)
exponent.Mul(exponent, new(big.Int).SetBytes(data))
exponent.Mod(exponent, s256.N)
xWant, yWant := s256.ScalarBaseMult(exponent.Bytes())
if x.Cmp(xWant) != 0 || y.Cmp(yWant) != 0 {
t.Errorf("%d: bad output for %X: got (%X, %X), want (%X, %X)", i, data, x, y, xWant, yWant)
}
}
}

//TODO: test more curves?
func BenchmarkBaseMult(b *testing.B) {
b.ResetTimer()
Expand Down

0 comments on commit b4cadcb

Please sign in to comment.