Mirror of @tangled.org/core. Running on a Raspberry Pi Zero 2

knotserver/git: improve lastCommit calculations for dirs and files

bigger portions of the logic for directories and files are now shared.

Signed-off-by: oppiliappan <me@oppi.li>

oppi.li c356948c 54b89d3f

verified
+93 -66
+93 -66
knotserver/git/last_commit.go
··· 6 6 "crypto/sha256" 7 7 "fmt" 8 8 "io" 9 + "iter" 9 10 "os/exec" 10 11 "path" 12 + "strconv" 11 13 "strings" 12 14 "time" 13 15 14 16 "github.com/dgraph-io/ristretto" 15 17 "github.com/go-git/go-git/v5/plumbing" 16 - "github.com/go-git/go-git/v5/plumbing/object" 18 + "tangled.org/core/sets" 17 19 "tangled.org/core/types" 18 20 ) 19 21 ··· 75 73 type commit struct { 76 74 hash plumbing.Hash 77 75 when time.Time 78 - files []string 76 + files sets.Set[string] 79 77 message string 78 + } 79 + 80 + func newCommit() commit { 81 + return commit{ 82 + files: sets.New[string](), 83 + } 84 + } 85 + 86 + type lastCommitDir struct { 87 + dir string 88 + entries []string 89 + } 90 + 91 + func (l lastCommitDir) children() iter.Seq[string] { 92 + return func(yield func(string) bool) { 93 + for _, child := range l.entries { 94 + if !yield(path.Join(l.dir, child)) { 95 + return 96 + } 97 + } 98 + } 80 99 } 81 100 82 101 func cacheKey(g *GitRepo, path string) string { ··· 106 83 return fmt.Sprintf("%x", hash) 107 84 } 108 85 109 - func (g *GitRepo) calculateCommitTimeIn(ctx context.Context, subtree *object.Tree, parent string, timeout time.Duration) (map[string]commit, error) { 86 + func (g *GitRepo) lastCommitDirIn(ctx context.Context, parent lastCommitDir, timeout time.Duration) (map[string]commit, error) { 110 87 ctx, cancel := context.WithTimeout(ctx, timeout) 111 88 defer cancel() 112 - return g.calculateCommitTime(ctx, subtree, parent) 89 + return g.lastCommitDir(ctx, parent) 113 90 } 114 91 115 - func (g *GitRepo) calculateCommitTime(ctx context.Context, subtree *object.Tree, parent string) (map[string]commit, error) { 116 - filesToDo := make(map[string]struct{}) 92 + func (g *GitRepo) lastCommitDir(ctx context.Context, parent lastCommitDir) (map[string]commit, error) { 93 + filesToDo := sets.Collect(parent.children()) 117 94 filesDone := make(map[string]commit) 118 - for _, e := range subtree.Entries { 119 - fpath := path.Clean(path.Join(parent, e.Name)) 120 - filesToDo[fpath] = struct{}{} 121 - } 122 95 123 - for _, e := range subtree.Entries { 124 - f := path.Clean(path.Join(parent, e.Name)) 125 - cacheKey := cacheKey(g, f) 96 + for p := range filesToDo.All() { 97 + cacheKey := cacheKey(g, p) 126 98 if cached, ok := commitCache.Get(cacheKey); ok { 127 - filesDone[f] = cached.(commit) 128 - delete(filesToDo, f) 99 + filesDone[p] = cached.(commit) 100 + filesToDo.Remove(p) 129 101 } else { 130 - filesToDo[f] = struct{}{} 102 + filesToDo.Insert(p) 131 103 } 132 104 } 133 105 134 - if len(filesToDo) == 0 { 106 + if filesToDo.IsEmpty() { 135 107 return filesDone, nil 136 108 } 137 109 ··· 134 116 defer cancel() 135 117 136 118 pathSpec := "." 137 - if parent != "" { 138 - pathSpec = parent 119 + if parent.dir != "" { 120 + pathSpec = parent.dir 139 121 } 140 - output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=iso", "--name-only", "--", pathSpec) 122 + if filesToDo.Len() == 1 { 123 + // this is an optimization for the scenario where we want to calculate 124 + // the last commit for just one path, we can directly set the pathspec to that path 125 + for s := range filesToDo.All() { 126 + pathSpec = s 127 + } 128 + } 129 + 130 + output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=unix", "--name-only", "--", pathSpec) 141 131 if err != nil { 142 132 return nil, err 143 133 } 144 134 defer output.Close() // Ensure the git process is properly cleaned up 145 135 146 136 reader := bufio.NewReader(output) 147 - var current commit 137 + current := newCommit() 148 138 for { 149 139 line, err := reader.ReadString('\n') 150 140 if err != nil && err != io.EOF { ··· 163 137 if line == "" { 164 138 if !current.hash.IsZero() { 165 139 // we have a fully parsed commit 166 - for _, f := range current.files { 167 - if _, ok := filesToDo[f]; ok { 140 + for f := range current.files.All() { 141 + if filesToDo.Contains(f) { 168 142 filesDone[f] = current 169 - delete(filesToDo, f) 143 + filesToDo.Remove(f) 170 144 commitCache.Set(cacheKey(g, f), current, 0) 171 145 } 172 146 } 173 147 174 - if len(filesToDo) == 0 { 175 - cancel() 148 + if filesToDo.IsEmpty() { 176 149 break 177 150 } 178 - current = commit{} 151 + current = newCommit() 179 152 } 180 153 } else if current.hash.IsZero() { 181 154 parts := strings.SplitN(line, ",", 3) 182 155 if len(parts) == 3 { 183 156 current.hash = plumbing.NewHash(parts[0]) 184 - current.when, _ = time.Parse("2006-01-02 15:04:05 -0700", parts[1]) 157 + epochTime, _ := strconv.ParseInt(parts[1], 10, 64) 158 + current.when = time.Unix(epochTime, 0) 185 159 current.message = parts[2] 186 160 } 187 161 } else { 188 162 // all ancestors along this path should also be included 189 163 file := path.Clean(line) 190 - ancestors := ancestors(file) 191 - current.files = append(current.files, file) 192 - current.files = append(current.files, ancestors...) 164 + current.files.Insert(file) 165 + for _, a := range ancestors(file) { 166 + current.files.Insert(a) 167 + } 193 168 } 194 169 195 170 if err == io.EOF { ··· 199 172 } 200 173 201 174 return filesDone, nil 175 + } 176 + 177 + // LastCommitFile returns the last commit information for a specific file path 178 + func (g *GitRepo) LastCommitFile(ctx context.Context, filePath string) (*types.LastCommitInfo, error) { 179 + parent, child := path.Split(filePath) 180 + parent = path.Clean(parent) 181 + if parent == "." { 182 + parent = "" 183 + } 184 + 185 + lastCommitDir := lastCommitDir{ 186 + dir: parent, 187 + entries: []string{child}, 188 + } 189 + 190 + times, err := g.lastCommitDirIn(ctx, lastCommitDir, 2*time.Second) 191 + if err != nil { 192 + return nil, fmt.Errorf("calculate commit time: %w", err) 193 + } 194 + 195 + // extract the only element of the map, the commit info of the current path 196 + var commitInfo *commit 197 + for _, c := range times { 198 + commitInfo = &c 199 + } 200 + 201 + if commitInfo == nil { 202 + return nil, fmt.Errorf("no commit found for path: %s", filePath) 203 + } 204 + 205 + return &types.LastCommitInfo{ 206 + Hash: commitInfo.hash, 207 + Message: commitInfo.message, 208 + When: commitInfo.when, 209 + }, nil 202 210 } 203 211 204 212 func ancestors(p string) []string { ··· 247 185 ancestors = append(ancestors, p) 248 186 } 249 187 return ancestors 250 - } 251 - 252 - // GetLastCommitForPath returns the last commit information for a specific file path 253 - func (g *GitRepo) GetLastCommitForPath(ctx context.Context, filePath string) (*types.LastCommitInfo, error) { 254 - c, err := g.r.CommitObject(g.h) 255 - if err != nil { 256 - return nil, fmt.Errorf("commit object: %w", err) 257 - } 258 - 259 - tree, err := c.Tree() 260 - if err != nil { 261 - return nil, fmt.Errorf("file tree: %w", err) 262 - } 263 - 264 - // parent directory for calculateCommitTime 265 - parent := path.Dir(filePath) 266 - if parent == "." { 267 - parent = "" 268 - } 269 - 270 - times, err := g.calculateCommitTimeIn(ctx, tree, parent, 2*time.Second) 271 - if err != nil { 272 - return nil, fmt.Errorf("calculate commit time: %w", err) 273 - } 274 - 275 - commitInfo, ok := times[filePath] 276 - if !ok { 277 - return nil, fmt.Errorf("no commit found for path: %s", filePath) 278 - } 279 - 280 - return &types.LastCommitInfo{ 281 - Hash: commitInfo.hash, 282 - Message: commitInfo.message, 283 - When: commitInfo.when, 284 - }, nil 285 188 }