feat: direct base table mul + projective-table GLV

this repo has no description

- extend G_TABLE from [16][256] to [32][256] for full 256-bit scalar coverage
- u1*G: direct byte-at-a-time lookup, no GLV split, zero doublings
- u2*Q: Jacobian precompute tables, no batchToAffine field inversion

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

zzstoatzz.io 2 weeks ago 629313e6 24bba1cd

+43 -65

2 changed files

expand all

unified split

src

affine.zig

verify.zig

+10 -9

src/affine.zig

··· 53 54 /// Build a byte-indexed precomputed table for scalar multiplication. 55 /// table[i][j] = j * 256^i * base, stored as AffinePoint26. 56 /// table[i][0] is the identity element (unused in lookups). 57 - pub fn buildByteTable(base: Secp256k1) [16][256]AffinePoint26 { 58 @setEvalBranchQuota(100_000_000); 59 60 - // Phase 1: compute all 16*256 points in Jacobian Fe26 61 - var flat: [16 * 256]JacobianPoint = undefined; 62 63 var cur_base_affine = AffinePoint26.fromStdlib(base.affineCoordinates()); 64 - for (0..16) |sub| { 65 flat[sub * 256] = JacobianPoint.identity; 66 flat[sub * 256 + 1] = JacobianPoint.fromAffine(cur_base_affine); 67 for (2..256) |j| { 68 flat[sub * 256 + j] = flat[sub * 256 + j - 1].addMixed(cur_base_affine); 69 } 70 - if (sub < 15) { 71 // next subtable base = 256 * current base (8 doublings) 72 var next = JacobianPoint.fromAffine(cur_base_affine); 73 next = next.dbl().dbl().dbl().dbl().dbl().dbl().dbl().dbl(); ··· 78 } 79 80 // Phase 2: batch convert to affine 81 - const affine_flat = batchToAffine(16 * 256, flat); 82 83 - // Reshape to [16][256] 84 - var result: [16][256]AffinePoint26 = undefined; 85 - for (0..16) |sub| { 86 for (0..256) |j| { 87 result[sub][j] = affine_flat[sub * 256 + j]; 88 }

··· 53 54 /// Build a byte-indexed precomputed table for scalar multiplication. 55 /// table[i][j] = j * 256^i * base, stored as AffinePoint26. 56 + /// 32 subtables × 256 entries = full 256-bit scalar coverage. 57 /// table[i][0] is the identity element (unused in lookups). 58 + pub fn buildByteTable(base: Secp256k1) [32][256]AffinePoint26 { 59 @setEvalBranchQuota(100_000_000); 60 61 + // Phase 1: compute all 32*256 points in Jacobian Fe26 62 + var flat: [32 * 256]JacobianPoint = undefined; 63 64 var cur_base_affine = AffinePoint26.fromStdlib(base.affineCoordinates()); 65 + for (0..32) |sub| { 66 flat[sub * 256] = JacobianPoint.identity; 67 flat[sub * 256 + 1] = JacobianPoint.fromAffine(cur_base_affine); 68 for (2..256) |j| { 69 flat[sub * 256 + j] = flat[sub * 256 + j - 1].addMixed(cur_base_affine); 70 } 71 + if (sub < 31) { 72 // next subtable base = 256 * current base (8 doublings) 73 var next = JacobianPoint.fromAffine(cur_base_affine); 74 next = next.dbl().dbl().dbl().dbl().dbl().dbl().dbl().dbl(); ··· 79 } 80 81 // Phase 2: batch convert to affine 82 + const affine_flat = batchToAffine(32 * 256, flat); 83 84 + // Reshape to [32][256] 85 + var result: [32][256]AffinePoint26 = undefined; 86 + for (0..32) |sub| { 87 for (0..256) |j| { 88 result[sub][j] = affine_flat[sub * 256 + j]; 89 }

+33 -56

src/verify.zig

··· 15 error{SignatureVerificationFailed}; 16 17 /// Precomputed base point table: G_TABLE[i][j] = j * 256^i * G in Fe26 affine. 18 - /// 16 subtables × 256 entries = 4096 affine points. 19 /// Enables u1*G computation via byte-at-a-time lookup with zero doublings. 20 - const G_TABLE: [16][256]AffinePoint26 = blk: { 21 @setEvalBranchQuota(100_000_000); 22 break :blk affine_mod.buildByteTable(Secp256k1.basePoint); 23 }; ··· 33 /// 34 /// Optimizations: 35 /// 1. u1*G via precomputed byte table: ~32 mixed adds, zero doublings 36 - /// 2. u2*Q via 2-way Jacobian Shamir: cheaper dbl (2M+5S) and mixed add (7M+4S) 37 - /// 3. Endomorphism + Jacobian comparison (no field inversion) 38 /// 4. All arithmetic in Fe26 (10×26-bit) — no Montgomery form overhead 39 pub fn verify(sig_r: [32]u8, sig_s: [32]u8, msg_hash: [32]u8, public_key: Secp256k1) VerifyError!void { 40 // parse and validate r, s ··· 45 // scalar_u1 = z * s^-1, scalar_u2 = r * s^-1 46 const z = reduceToScalar(msg_hash); 47 const s_inv = s_sc.invert(); 48 - const scalar_u1 = z.mul(s_inv).toBytes(.little); 49 const scalar_u2 = r_sc.mul(s_inv).toBytes(.little); 50 51 - // GLV split: u = r1 + r2*lambda 52 - var split_u1 = endo.splitScalar(scalar_u1, .little) catch return error.SignatureVerificationFailed; 53 - var split_u2 = endo.splitScalar(scalar_u2, .little) catch return error.SignatureVerificationFailed; 54 55 - // handle negative half-scalars: negate and track sign 56 const zero_s = scalar.Scalar.zero.toBytes(.little); 57 - 58 - var neg_g = false; 59 - var neg_g_phi = false; 60 var neg_p = false; 61 var neg_p_phi = false; 62 - 63 - if (split_u1.r1[16] != 0) { 64 - split_u1.r1 = scalar.neg(split_u1.r1, .little) catch zero_s; 65 - neg_g = true; 66 - } 67 - if (split_u1.r2[16] != 0) { 68 - split_u1.r2 = scalar.neg(split_u1.r2, .little) catch zero_s; 69 - neg_g_phi = true; 70 - } 71 if (split_u2.r1[16] != 0) { 72 split_u2.r1 = scalar.neg(split_u2.r1, .little) catch zero_s; 73 neg_p = true; ··· 76 split_u2.r2 = scalar.neg(split_u2.r2, .little) catch zero_s; 77 neg_p_phi = true; 78 } 79 - 80 - // 1. u1*G via precomputed byte table (zero doublings) 81 - const r1 = basePointMul(split_u1.r1, neg_g, split_u1.r2, neg_g_phi); 82 - 83 - // 2. u2*Q via 2-way Jacobian Shamir 84 const pk_affine26 = AffinePoint26.fromStdlib(public_key.affineCoordinates()); 85 - const pk_jac = point.precompute(pk_affine26, 8); 86 - const pk_affine = affine_mod.batchToAffine(9, pk_jac); 87 - const pk_phi_affine = point.phiTableAffine(9, pk_affine); 88 - const r2 = publicKeyMul(split_u2, neg_p, neg_p_phi, pk_affine, pk_phi_affine); 89 90 // 3. combine results in Jacobian, compare without inversion 91 const q = r1.add(r2); ··· 101 } 102 103 /// Compute u1*G using the precomputed byte table. 104 - /// Decomposes the two 128-bit half-scalars into bytes and does direct lookups. 105 /// Cost: ~32 mixed Jacobian-affine additions, zero doublings. 106 - fn basePointMul(a1: [32]u8, neg1: bool, a2: [32]u8, neg2: bool) JacobianPoint { 107 var acc = JacobianPoint.identity; 108 109 - // a1 half: direct table lookup 110 - for (0..16) |i| { 111 - const byte = a1[i]; 112 - if (byte != 0) { 113 - acc = if (neg1) acc.subMixed(G_TABLE[i][byte]) else acc.addMixed(G_TABLE[i][byte]); 114 - } 115 - } 116 - 117 - // a2 half: table lookup + phi on the fly (1 field mul per lookup) 118 - for (0..16) |i| { 119 - const byte = a2[i]; 120 if (byte != 0) { 121 - const p = endo.phiAffine(G_TABLE[i][byte]); 122 - acc = if (neg2) acc.subMixed(p) else acc.addMixed(p); 123 } 124 } 125 126 return acc; 127 } 128 129 - /// Compute u2*Q using 2-way Jacobian Shamir with windowed scalars. 130 - /// Cost: 128 Jacobian doublings + ~44 mixed additions. 131 - fn publicKeyMul( 132 split: endo.SplitScalar, 133 neg_p: bool, 134 neg_p_phi: bool, 135 - pk_affine: [9]AffinePoint26, 136 - pk_phi_affine: [9]AffinePoint26, 137 ) JacobianPoint { 138 const e1 = point.slide(split.r1); 139 const e2 = point.slide(split.r2); 140 141 var q = JacobianPoint.identity; 142 var pos: usize = 32; // 128-bit half-scalars → 32 nybbles + carry 143 while (true) : (pos -= 1) { 144 - q = addSlot(q, &pk_affine, e1[pos], neg_p); 145 - q = addSlot(q, &pk_phi_affine, e2[pos], neg_p_phi); 146 if (pos == 0) break; 147 q = q.dbl().dbl().dbl().dbl(); 148 } 149 return q; 150 } 151 152 - /// Add/subtract a table entry based on signed digit and negation flag. 153 /// Variable-time: branches on digit value (safe for public verification). 154 - inline fn addSlot(q: JacobianPoint, table: *const [9]AffinePoint26, slot: i8, negate: bool) JacobianPoint { 155 var s = slot; 156 if (negate) s = -s; 157 if (s > 0) { 158 - return q.addMixed(table[@intCast(s)]); 159 } else if (s < 0) { 160 - return q.subMixed(table[@intCast(-s)]); 161 } 162 return q; 163 }

··· 15 error{SignatureVerificationFailed}; 16 17 /// Precomputed base point table: G_TABLE[i][j] = j * 256^i * G in Fe26 affine. 18 + /// 32 subtables × 256 entries = 8192 affine points. 19 /// Enables u1*G computation via byte-at-a-time lookup with zero doublings. 20 + const G_TABLE: [32][256]AffinePoint26 = blk: { 21 @setEvalBranchQuota(100_000_000); 22 break :blk affine_mod.buildByteTable(Secp256k1.basePoint); 23 }; ··· 33 /// 34 /// Optimizations: 35 /// 1. u1*G via precomputed byte table: ~32 mixed adds, zero doublings 36 + /// 2. u2*Q via projective-table GLV: no field inversion, 4-bit windowed 37 + /// 3. Jacobian comparison (no field inversion for final check) 38 /// 4. All arithmetic in Fe26 (10×26-bit) — no Montgomery form overhead 39 pub fn verify(sig_r: [32]u8, sig_s: [32]u8, msg_hash: [32]u8, public_key: Secp256k1) VerifyError!void { 40 // parse and validate r, s ··· 45 // scalar_u1 = z * s^-1, scalar_u2 = r * s^-1 46 const z = reduceToScalar(msg_hash); 47 const s_inv = s_sc.invert(); 48 + const scalar_u1 = z.mul(s_inv).toBytes(.big); 49 const scalar_u2 = r_sc.mul(s_inv).toBytes(.little); 50 51 + // 1. u1*G via precomputed byte table — direct 32-byte lookup, no GLV 52 + const r1 = basePointMul(scalar_u1); 53 54 + // 2. u2*Q via projective-table GLV — no field inversion 55 + var split_u2 = endo.splitScalar(scalar_u2, .little) catch return error.SignatureVerificationFailed; 56 const zero_s = scalar.Scalar.zero.toBytes(.little); 57 var neg_p = false; 58 var neg_p_phi = false; 59 if (split_u2.r1[16] != 0) { 60 split_u2.r1 = scalar.neg(split_u2.r1, .little) catch zero_s; 61 neg_p = true; ··· 64 split_u2.r2 = scalar.neg(split_u2.r2, .little) catch zero_s; 65 neg_p_phi = true; 66 } 67 const pk_affine26 = AffinePoint26.fromStdlib(public_key.affineCoordinates()); 68 + const r2 = publicKeyMulProjective(split_u2, neg_p, neg_p_phi, pk_affine26); 69 70 // 3. combine results in Jacobian, compare without inversion 71 const q = r1.add(r2); ··· 81 } 82 83 /// Compute u1*G using the precomputed byte table. 84 + /// Direct 32-byte lookup: G_TABLE[i][scalar[i]], no GLV decomposition. 85 /// Cost: ~32 mixed Jacobian-affine additions, zero doublings. 86 + fn basePointMul(scalar_u1: [32]u8) JacobianPoint { 87 var acc = JacobianPoint.identity; 88 89 + // G_TABLE[i] corresponds to byte position i (little-endian). 90 + // scalar_u1 is big-endian, so byte 31 is least significant. 91 + // G_TABLE[i] ↔ scalar_u1[31-i] 92 + for (0..32) |i| { 93 + const byte = scalar_u1[31 - i]; 94 if (byte != 0) { 95 + acc = acc.addMixed(G_TABLE[i][byte]); 96 } 97 } 98 99 return acc; 100 } 101 102 + /// Compute u2*Q using 2-way Jacobian Shamir with projective tables. 103 + /// Tables stay in Jacobian form — no batchToAffine field inversion. 104 + /// Cost: 128 Jacobian doublings + ~44 Jacobian additions. 105 + fn publicKeyMulProjective( 106 split: endo.SplitScalar, 107 neg_p: bool, 108 neg_p_phi: bool, 109 + pk_affine: AffinePoint26, 110 ) JacobianPoint { 111 + // Build 9-entry Jacobian tables: [identity, 1P, 2P, ..., 8P] 112 + const pk_table = point.precompute(pk_affine, 8); 113 + const pk_phi_table = point.phiTableJacobian(9, pk_table); 114 + 115 const e1 = point.slide(split.r1); 116 const e2 = point.slide(split.r2); 117 118 var q = JacobianPoint.identity; 119 var pos: usize = 32; // 128-bit half-scalars → 32 nybbles + carry 120 while (true) : (pos -= 1) { 121 + q = addSlotJ(q, &pk_table, e1[pos], neg_p); 122 + q = addSlotJ(q, &pk_phi_table, e2[pos], neg_p_phi); 123 if (pos == 0) break; 124 q = q.dbl().dbl().dbl().dbl(); 125 } 126 return q; 127 } 128 129 + /// Add/subtract a Jacobian table entry based on signed digit and negation flag. 130 /// Variable-time: branches on digit value (safe for public verification). 131 + inline fn addSlotJ(q: JacobianPoint, table: *const [9]JacobianPoint, slot: i8, negate: bool) JacobianPoint { 132 var s = slot; 133 if (negate) s = -s; 134 if (s > 0) { 135 + return q.add(table[@intCast(s)]); 136 } else if (s < 0) { 137 + return q.add(table[@intCast(-s)].negY()); 138 } 139 return q; 140 }