feat: direct base table mul + projective-table GLV

this repo has no description

- extend G_TABLE from [16][256] to [32][256] for full 256-bit scalar coverage
- u1*G: direct byte-at-a-time lookup, no GLV split, zero doublings
- u2*Q: Jacobian precompute tables, no batchToAffine field inversion

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

zzstoatzz.io 1 week ago 629313e6 24bba1cd

+43 -65

2 changed files

expand all

unified split

src

affine.zig

verify.zig

+10 -9

src/affine.zig

··· 53 53 54 54 /// Build a byte-indexed precomputed table for scalar multiplication. 55 55 /// table[i][j] = j * 256^i * base, stored as AffinePoint26. 56 + /// 32 subtables × 256 entries = full 256-bit scalar coverage. 56 57 /// table[i][0] is the identity element (unused in lookups). 57 - pub fn buildByteTable(base: Secp256k1) [16][256]AffinePoint26 { 58 + pub fn buildByteTable(base: Secp256k1) [32][256]AffinePoint26 { 58 59 @setEvalBranchQuota(100_000_000); 59 60 60 - // Phase 1: compute all 16*256 points in Jacobian Fe26 61 - var flat: [16 * 256]JacobianPoint = undefined; 61 + // Phase 1: compute all 32*256 points in Jacobian Fe26 62 + var flat: [32 * 256]JacobianPoint = undefined; 62 63 63 64 var cur_base_affine = AffinePoint26.fromStdlib(base.affineCoordinates()); 64 - for (0..16) |sub| { 65 + for (0..32) |sub| { 65 66 flat[sub * 256] = JacobianPoint.identity; 66 67 flat[sub * 256 + 1] = JacobianPoint.fromAffine(cur_base_affine); 67 68 for (2..256) |j| { 68 69 flat[sub * 256 + j] = flat[sub * 256 + j - 1].addMixed(cur_base_affine); 69 70 } 70 - if (sub < 15) { 71 + if (sub < 31) { 71 72 // next subtable base = 256 * current base (8 doublings) 72 73 var next = JacobianPoint.fromAffine(cur_base_affine); 73 74 next = next.dbl().dbl().dbl().dbl().dbl().dbl().dbl().dbl(); ··· 78 79 } 79 80 80 81 // Phase 2: batch convert to affine 81 - const affine_flat = batchToAffine(16 * 256, flat); 82 + const affine_flat = batchToAffine(32 * 256, flat); 82 83 83 - // Reshape to [16][256] 84 - var result: [16][256]AffinePoint26 = undefined; 85 - for (0..16) |sub| { 84 + // Reshape to [32][256] 85 + var result: [32][256]AffinePoint26 = undefined; 86 + for (0..32) |sub| { 86 87 for (0..256) |j| { 87 88 result[sub][j] = affine_flat[sub * 256 + j]; 88 89 }

+33 -56

src/verify.zig

··· 15 15 error{SignatureVerificationFailed}; 16 16 17 17 /// Precomputed base point table: G_TABLE[i][j] = j * 256^i * G in Fe26 affine. 18 - /// 16 subtables × 256 entries = 4096 affine points. 18 + /// 32 subtables × 256 entries = 8192 affine points. 19 19 /// Enables u1*G computation via byte-at-a-time lookup with zero doublings. 20 - const G_TABLE: [16][256]AffinePoint26 = blk: { 20 + const G_TABLE: [32][256]AffinePoint26 = blk: { 21 21 @setEvalBranchQuota(100_000_000); 22 22 break :blk affine_mod.buildByteTable(Secp256k1.basePoint); 23 23 }; ··· 33 33 /// 34 34 /// Optimizations: 35 35 /// 1. u1*G via precomputed byte table: ~32 mixed adds, zero doublings 36 - /// 2. u2*Q via 2-way Jacobian Shamir: cheaper dbl (2M+5S) and mixed add (7M+4S) 37 - /// 3. Endomorphism + Jacobian comparison (no field inversion) 36 + /// 2. u2*Q via projective-table GLV: no field inversion, 4-bit windowed 37 + /// 3. Jacobian comparison (no field inversion for final check) 38 38 /// 4. All arithmetic in Fe26 (10×26-bit) — no Montgomery form overhead 39 39 pub fn verify(sig_r: [32]u8, sig_s: [32]u8, msg_hash: [32]u8, public_key: Secp256k1) VerifyError!void { 40 40 // parse and validate r, s ··· 45 45 // scalar_u1 = z * s^-1, scalar_u2 = r * s^-1 46 46 const z = reduceToScalar(msg_hash); 47 47 const s_inv = s_sc.invert(); 48 - const scalar_u1 = z.mul(s_inv).toBytes(.little); 48 + const scalar_u1 = z.mul(s_inv).toBytes(.big); 49 49 const scalar_u2 = r_sc.mul(s_inv).toBytes(.little); 50 50 51 - // GLV split: u = r1 + r2*lambda 52 - var split_u1 = endo.splitScalar(scalar_u1, .little) catch return error.SignatureVerificationFailed; 53 - var split_u2 = endo.splitScalar(scalar_u2, .little) catch return error.SignatureVerificationFailed; 51 + // 1. u1*G via precomputed byte table — direct 32-byte lookup, no GLV 52 + const r1 = basePointMul(scalar_u1); 54 53 55 - // handle negative half-scalars: negate and track sign 54 + // 2. u2*Q via projective-table GLV — no field inversion 55 + var split_u2 = endo.splitScalar(scalar_u2, .little) catch return error.SignatureVerificationFailed; 56 56 const zero_s = scalar.Scalar.zero.toBytes(.little); 57 - 58 - var neg_g = false; 59 - var neg_g_phi = false; 60 57 var neg_p = false; 61 58 var neg_p_phi = false; 62 - 63 - if (split_u1.r1[16] != 0) { 64 - split_u1.r1 = scalar.neg(split_u1.r1, .little) catch zero_s; 65 - neg_g = true; 66 - } 67 - if (split_u1.r2[16] != 0) { 68 - split_u1.r2 = scalar.neg(split_u1.r2, .little) catch zero_s; 69 - neg_g_phi = true; 70 - } 71 59 if (split_u2.r1[16] != 0) { 72 60 split_u2.r1 = scalar.neg(split_u2.r1, .little) catch zero_s; 73 61 neg_p = true; ··· 76 64 split_u2.r2 = scalar.neg(split_u2.r2, .little) catch zero_s; 77 65 neg_p_phi = true; 78 66 } 79 - 80 - // 1. u1*G via precomputed byte table (zero doublings) 81 - const r1 = basePointMul(split_u1.r1, neg_g, split_u1.r2, neg_g_phi); 82 - 83 - // 2. u2*Q via 2-way Jacobian Shamir 84 67 const pk_affine26 = AffinePoint26.fromStdlib(public_key.affineCoordinates()); 85 - const pk_jac = point.precompute(pk_affine26, 8); 86 - const pk_affine = affine_mod.batchToAffine(9, pk_jac); 87 - const pk_phi_affine = point.phiTableAffine(9, pk_affine); 88 - const r2 = publicKeyMul(split_u2, neg_p, neg_p_phi, pk_affine, pk_phi_affine); 68 + const r2 = publicKeyMulProjective(split_u2, neg_p, neg_p_phi, pk_affine26); 89 69 90 70 // 3. combine results in Jacobian, compare without inversion 91 71 const q = r1.add(r2); ··· 101 81 } 102 82 103 83 /// Compute u1*G using the precomputed byte table. 104 - /// Decomposes the two 128-bit half-scalars into bytes and does direct lookups. 84 + /// Direct 32-byte lookup: G_TABLE[i][scalar[i]], no GLV decomposition. 105 85 /// Cost: ~32 mixed Jacobian-affine additions, zero doublings. 106 - fn basePointMul(a1: [32]u8, neg1: bool, a2: [32]u8, neg2: bool) JacobianPoint { 86 + fn basePointMul(scalar_u1: [32]u8) JacobianPoint { 107 87 var acc = JacobianPoint.identity; 108 88 109 - // a1 half: direct table lookup 110 - for (0..16) |i| { 111 - const byte = a1[i]; 112 - if (byte != 0) { 113 - acc = if (neg1) acc.subMixed(G_TABLE[i][byte]) else acc.addMixed(G_TABLE[i][byte]); 114 - } 115 - } 116 - 117 - // a2 half: table lookup + phi on the fly (1 field mul per lookup) 118 - for (0..16) |i| { 119 - const byte = a2[i]; 89 + // G_TABLE[i] corresponds to byte position i (little-endian). 90 + // scalar_u1 is big-endian, so byte 31 is least significant. 91 + // G_TABLE[i] ↔ scalar_u1[31-i] 92 + for (0..32) |i| { 93 + const byte = scalar_u1[31 - i]; 120 94 if (byte != 0) { 121 - const p = endo.phiAffine(G_TABLE[i][byte]); 122 - acc = if (neg2) acc.subMixed(p) else acc.addMixed(p); 95 + acc = acc.addMixed(G_TABLE[i][byte]); 123 96 } 124 97 } 125 98 126 99 return acc; 127 100 } 128 101 129 - /// Compute u2*Q using 2-way Jacobian Shamir with windowed scalars. 130 - /// Cost: 128 Jacobian doublings + ~44 mixed additions. 131 - fn publicKeyMul( 102 + /// Compute u2*Q using 2-way Jacobian Shamir with projective tables. 103 + /// Tables stay in Jacobian form — no batchToAffine field inversion. 104 + /// Cost: 128 Jacobian doublings + ~44 Jacobian additions. 105 + fn publicKeyMulProjective( 132 106 split: endo.SplitScalar, 133 107 neg_p: bool, 134 108 neg_p_phi: bool, 135 - pk_affine: [9]AffinePoint26, 136 - pk_phi_affine: [9]AffinePoint26, 109 + pk_affine: AffinePoint26, 137 110 ) JacobianPoint { 111 + // Build 9-entry Jacobian tables: [identity, 1P, 2P, ..., 8P] 112 + const pk_table = point.precompute(pk_affine, 8); 113 + const pk_phi_table = point.phiTableJacobian(9, pk_table); 114 + 138 115 const e1 = point.slide(split.r1); 139 116 const e2 = point.slide(split.r2); 140 117 141 118 var q = JacobianPoint.identity; 142 119 var pos: usize = 32; // 128-bit half-scalars → 32 nybbles + carry 143 120 while (true) : (pos -= 1) { 144 - q = addSlot(q, &pk_affine, e1[pos], neg_p); 145 - q = addSlot(q, &pk_phi_affine, e2[pos], neg_p_phi); 121 + q = addSlotJ(q, &pk_table, e1[pos], neg_p); 122 + q = addSlotJ(q, &pk_phi_table, e2[pos], neg_p_phi); 146 123 if (pos == 0) break; 147 124 q = q.dbl().dbl().dbl().dbl(); 148 125 } 149 126 return q; 150 127 } 151 128 152 - /// Add/subtract a table entry based on signed digit and negation flag. 129 + /// Add/subtract a Jacobian table entry based on signed digit and negation flag. 153 130 /// Variable-time: branches on digit value (safe for public verification). 154 - inline fn addSlot(q: JacobianPoint, table: *const [9]AffinePoint26, slot: i8, negate: bool) JacobianPoint { 131 + inline fn addSlotJ(q: JacobianPoint, table: *const [9]JacobianPoint, slot: i8, negate: bool) JacobianPoint { 155 132 var s = slot; 156 133 if (negate) s = -s; 157 134 if (s > 0) { 158 - return q.addMixed(table[@intCast(s)]); 135 + return q.add(table[@intCast(s)]); 159 136 } else if (s < 0) { 160 - return q.subMixed(table[@intCast(-s)]); 137 + return q.add(table[@intCast(-s)].negY()); 161 138 } 162 139 return q; 163 140 }