Fast implementation of Git in pure Go

hash: Use a hashAlgorithmDetails struct for single source of truth

hashAlgorithm's are assumed to be valid; methods on invalid
hashAlgorithms will panic from out-of-bounds read when it's not found in
hashAlgorithmTable and that's expected and intended.

runxiyu.tngl.sh 60024855 888f4af8

verified
+87 -72
+70 -42
hash.go
··· 6 6 "encoding/hex" 7 7 ) 8 8 9 - // maxHashSize MUST be equal to (or larger than) the size of the 10 - // largest hash supported in hashFuncs. 9 + // maxHashSize MUST be >= the largest supported algorithm size. 11 10 const maxHashSize = sha256.Size 12 11 13 12 // hashAlgorithm identifies the hash algorithm used for Git object IDs. 14 13 type hashAlgorithm uint8 15 14 16 - // hashFuncs maps hash algorithm to hash function. 17 - var hashFuncs = map[hashAlgorithm]hashFunc{ 18 - hashAlgoSHA1: func(data []byte) Hash { 19 - sum := sha1.Sum(data) 20 - var h Hash 21 - copy(h.data[:], sum[:]) 22 - h.algo = hashAlgoSHA1 23 - return h 24 - }, 25 - hashAlgoSHA256: func(data []byte) Hash { 26 - sum := sha256.Sum256(data) 27 - var h Hash 28 - copy(h.data[:], sum[:]) 29 - h.algo = hashAlgoSHA256 30 - return h 31 - }, 32 - } 33 - 34 15 const ( 35 16 hashAlgoUnknown hashAlgorithm = iota 36 17 hashAlgoSHA1 37 18 hashAlgoSHA256 38 19 ) 39 20 40 - // size returns the hash size in bytes. 41 - func (algo hashAlgorithm) size() int { 42 - switch algo { 43 - case hashAlgoSHA1: 44 - return sha1.Size 45 - case hashAlgoSHA256: 46 - return sha256.Size 47 - default: 48 - return 0 49 - } 21 + type hashAlgorithmDetails struct { 22 + name string 23 + size int 24 + sum func([]byte) Hash 25 + } 26 + 27 + var hashAlgorithmTable = [...]hashAlgorithmDetails{ 28 + hashAlgoUnknown: {}, 29 + hashAlgoSHA1: { 30 + name: "sha1", 31 + size: sha1.Size, 32 + sum: func(data []byte) Hash { 33 + sum := sha1.Sum(data) 34 + var h Hash 35 + copy(h.data[:], sum[:]) 36 + h.algo = hashAlgoSHA1 37 + return h 38 + }, 39 + }, 40 + hashAlgoSHA256: { 41 + name: "sha256", 42 + size: sha256.Size, 43 + sum: func(data []byte) Hash { 44 + sum := sha256.Sum256(data) 45 + var h Hash 46 + copy(h.data[:], sum[:]) 47 + h.algo = hashAlgoSHA256 48 + return h 49 + }, 50 + }, 51 + } 52 + 53 + func (algo hashAlgorithm) info() hashAlgorithmDetails { 54 + return hashAlgorithmTable[algo] 55 + } 56 + 57 + // Size returns the hash size in bytes. 58 + func (algo hashAlgorithm) Size() int { 59 + return algo.info().size 50 60 } 51 61 52 62 // String returns the canonical name of the hash algorithm. 53 63 func (algo hashAlgorithm) String() string { 54 - switch algo { 55 - case hashAlgoSHA1: 56 - return "sha1" 57 - case hashAlgoSHA256: 58 - return "sha256" 59 - default: 64 + inf := algo.info() 65 + if inf.name == "" { 60 66 return "unknown" 61 67 } 68 + return inf.name 69 + } 70 + 71 + func (algo hashAlgorithm) HexLen() int { 72 + return algo.Size() * 2 73 + } 74 + 75 + func (algo hashAlgorithm) Sum(data []byte) Hash { 76 + return algo.info().sum(data) 62 77 } 63 78 64 79 // Hash represents a Git object ID. ··· 67 82 data [maxHashSize]byte 68 83 } 69 84 70 - // hashFunc is a function that computes a hash from input data. 71 - type hashFunc func([]byte) Hash 72 - 73 85 // String returns a hexadecimal string representation of the hash. 74 86 func (hash Hash) String() string { 75 - size := hash.algo.size() 87 + size := hash.algo.Size() 76 88 if size == 0 { 77 89 return "" 78 90 } ··· 81 93 82 94 // Bytes returns a copy of the hash's bytes. 83 95 func (hash Hash) Bytes() []byte { 84 - size := hash.algo.size() 96 + size := hash.algo.Size() 85 97 if size == 0 { 86 98 return nil 87 99 } ··· 90 102 91 103 // Size returns the hash size. 92 104 func (hash Hash) Size() int { 93 - return hash.algo.size() 105 + return hash.algo.Size() 106 + } 107 + 108 + var algoByName = map[string]hashAlgorithm{} 109 + 110 + func init() { 111 + for algo, info := range hashAlgorithmTable { 112 + if info.name == "" { 113 + continue 114 + } 115 + algoByName[info.name] = hashAlgorithm(algo) 116 + } 117 + } 118 + 119 + func parseHashAlgorithm(s string) (hashAlgorithm, bool) { 120 + algo, ok := algoByName[s] 121 + return algo, ok 94 122 }
+1 -1
hash_test.go
··· 18 18 19 19 var validHash string 20 20 var expectedSize int 21 - if repo.hashAlgo.size() == 32 { 21 + if repo.hashAlgo.Size() == 32 { 22 22 validHash = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" 23 23 expectedSize = 32 24 24 } else {
+3 -3
obj_tree.go
··· 78 78 nameBytes := body[i : i+nul] 79 79 i += nul + 1 80 80 81 - if i+repo.hashAlgo.size() > len(body) { 81 + if i+repo.hashAlgo.Size() > len(body) { 82 82 return nil, errors.New("furgit: tree: truncated child hash") 83 83 } 84 84 var child Hash 85 - copy(child.data[:], body[i:i+repo.hashAlgo.size()]) 85 + copy(child.data[:], body[i:i+repo.hashAlgo.Size()]) 86 86 child.algo = repo.hashAlgo 87 - i += repo.hashAlgo.size() 87 + i += repo.hashAlgo.Size() 88 88 89 89 mode, err := strconv.ParseUint(string(modeBytes), 8, 32) 90 90 if err != nil {
+3 -3
pack_idx.go
··· 163 163 nobj := int(readBE32(pi.fanout[len(pi.fanout)-4:])) 164 164 165 165 namesStart := fanoutEnd 166 - namesEnd := namesStart + nobj*pi.repo.hashAlgo.size() 166 + namesEnd := namesStart + nobj*pi.repo.hashAlgo.Size() 167 167 if namesEnd > len(buf) { 168 168 return ErrInvalidObject 169 169 } ··· 183 183 pi.offset32 = buf[off32Start:off32End] 184 184 185 185 off64Start := off32End 186 - trailerStart := len(buf) - 2*pi.repo.hashAlgo.size() 186 + trailerStart := len(buf) - 2*pi.repo.hashAlgo.Size() 187 187 if trailerStart < off64Start { 188 188 return ErrInvalidObject 189 189 } ··· 253 253 lo = int(pi.fanoutEntry(first - 1)) 254 254 } 255 255 hi := int(pi.fanoutEntry(first)) 256 - idx, found := bsearchHash(pi.names, pi.repo.hashAlgo.size(), lo, hi, id) 256 + idx, found := bsearchHash(pi.names, pi.repo.hashAlgo.Size(), lo, hi, id) 257 257 if !found { 258 258 return packlocation{}, ErrNotFound 259 259 }
+2 -2
pack_pack.go
··· 176 176 case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: 177 177 return ty, declaredSize, nil 178 178 case ObjectTypeRefDelta: 179 - hashEnd := dataStart + uint64(repo.hashAlgo.size()) 179 + hashEnd := dataStart + uint64(repo.hashAlgo.Size()) 180 180 if hashEnd > uint64(len(pf.data)) { 181 181 return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF 182 182 } ··· 273 273 resultTy = ty 274 274 resolved = true 275 275 case ObjectTypeRefDelta: 276 - hashEnd := dataStart + uint64(repo.hashAlgo.size()) 276 + hashEnd := dataStart + uint64(repo.hashAlgo.Size()) 277 277 if hashEnd > uint64(len(pf.data)) { 278 278 return fail(io.ErrUnexpectedEOF) 279 279 }
+2 -2
refs.go
··· 70 70 } 71 71 72 72 sp := bytes.IndexByte(line, ' ') 73 - if sp != repo.hashAlgo.size()*2 { 73 + if sp != repo.hashAlgo.Size()*2 { 74 74 continue 75 75 } 76 76 ··· 428 428 } 429 429 430 430 sp := bytes.IndexByte(line, ' ') 431 - if sp != repo.hashAlgo.size()*2 { 431 + if sp != repo.hashAlgo.Size()*2 { 432 432 lastIdx = -1 433 433 continue 434 434 }
+5 -18
repo.go
··· 63 63 algo = "sha1" 64 64 } 65 65 66 - var hashAlgo hashAlgorithm 67 - switch algo { 68 - case "sha1": 69 - hashAlgo = hashAlgoSHA1 70 - case "sha256": 71 - hashAlgo = hashAlgoSHA256 72 - default: 73 - return nil, fmt.Errorf("furgit: unsupported hash algorithm %q", algo) 74 - } 75 - 76 - if hashAlgo.size() == 0 { 66 + hashAlgo, ok := parseHashAlgorithm(algo) 67 + if !ok { 77 68 return nil, fmt.Errorf("furgit: unsupported hash algorithm %q", algo) 78 - } 79 - if _, ok := hashFuncs[hashAlgo]; !ok { 80 - return nil, fmt.Errorf("furgit: hash algorithm %q is not supported by the hash functions provided by this build", algo) 81 69 } 82 70 83 71 return &Repository{ ··· 130 118 if len(s)%2 != 0 { 131 119 return id, fmt.Errorf("furgit: invalid hash length %d, it has to be even at the very least", len(s)) 132 120 } 133 - expectedLen := repo.hashAlgo.size() * 2 121 + expectedLen := repo.hashAlgo.Size() * 2 134 122 if len(s) != expectedLen { 135 - return id, fmt.Errorf("furgit: hash length mismatch: got %d chars, expected %d for hash size %d", len(s), expectedLen, repo.hashAlgo.size()) 123 + return id, fmt.Errorf("furgit: hash length mismatch: got %d chars, expected %d for hash size %d", len(s), expectedLen, repo.hashAlgo.Size()) 136 124 } 137 125 data, err := hex.DecodeString(s) 138 126 if err != nil { ··· 145 133 146 134 // computeRawHash computes a hash from raw data using the repository's hash algorithm. 147 135 func (repo *Repository) computeRawHash(data []byte) Hash { 148 - hashFunc := hashFuncs[repo.hashAlgo] 149 - return hashFunc(data) 136 + return repo.hashAlgo.Sum(data) 150 137 } 151 138 152 139 // verifyRawObject verifies a raw object against its expected hash.
+1 -1
repo_test.go
··· 17 17 if repo.rootPath != repoPath { 18 18 t.Errorf("rootPath: got %q, want %q", repo.rootPath, repoPath) 19 19 } 20 - hashSize := repo.hashAlgo.size() 20 + hashSize := repo.hashAlgo.Size() 21 21 if hashSize != 32 && hashSize != 20 { 22 22 t.Errorf("hashSize: got %d, want 32 (SHA-256) or 20 (SHA-1)", hashSize) 23 23 }