Monorepo for Tangled

knotserver/git: improve lastCommit calculations for dirs and files

bigger portions of the logic for directories and files are now shared.

Signed-off-by: oppiliappan <me@oppi.li>

+87 -60
+87 -60
knotserver/git/last_commit.go
··· 6 6 "crypto/sha256" 7 7 "fmt" 8 8 "io" 9 + "iter" 9 10 "os/exec" 10 11 "path" 12 + "strconv" 11 13 "strings" 12 14 "time" 13 15 14 16 "github.com/dgraph-io/ristretto" 15 17 "github.com/go-git/go-git/v5/plumbing" 16 - "github.com/go-git/go-git/v5/plumbing/object" 18 + "tangled.org/core/sets" 17 19 "tangled.org/core/types" 18 20 ) 19 21 ··· 73 75 type commit struct { 74 76 hash plumbing.Hash 75 77 when time.Time 76 - files []string 78 + files sets.Set[string] 77 79 message string 78 80 } 79 81 82 + func newCommit() commit { 83 + return commit{ 84 + files: sets.New[string](), 85 + } 86 + } 87 + 88 + type lastCommitDir struct { 89 + dir string 90 + entries []string 91 + } 92 + 93 + func (l lastCommitDir) children() iter.Seq[string] { 94 + return func(yield func(string) bool) { 95 + for _, child := range l.entries { 96 + if !yield(path.Join(l.dir, child)) { 97 + return 98 + } 99 + } 100 + } 101 + } 102 + 80 103 func cacheKey(g *GitRepo, path string) string { 81 104 sep := byte(':') 82 105 hash := sha256.Sum256(fmt.Append([]byte{}, g.path, sep, g.h.String(), sep, path)) 83 106 return fmt.Sprintf("%x", hash) 84 107 } 85 108 86 - func (g *GitRepo) calculateCommitTimeIn(ctx context.Context, subtree *object.Tree, parent string, timeout time.Duration) (map[string]commit, error) { 109 + func (g *GitRepo) lastCommitDirIn(ctx context.Context, parent lastCommitDir, timeout time.Duration) (map[string]commit, error) { 87 110 ctx, cancel := context.WithTimeout(ctx, timeout) 88 111 defer cancel() 89 - return g.calculateCommitTime(ctx, subtree, parent) 112 + return g.lastCommitDir(ctx, parent) 90 113 } 91 114 92 - func (g *GitRepo) calculateCommitTime(ctx context.Context, subtree *object.Tree, parent string) (map[string]commit, error) { 93 - filesToDo := make(map[string]struct{}) 115 + func (g *GitRepo) lastCommitDir(ctx context.Context, parent lastCommitDir) (map[string]commit, error) { 116 + filesToDo := sets.Collect(parent.children()) 94 117 filesDone := make(map[string]commit) 95 - for _, e := range subtree.Entries { 96 - fpath := path.Clean(path.Join(parent, e.Name)) 97 - filesToDo[fpath] = struct{}{} 98 - } 99 118 100 - for _, e := range subtree.Entries { 101 - f := path.Clean(path.Join(parent, e.Name)) 102 - cacheKey := cacheKey(g, f) 119 + for p := range filesToDo.All() { 120 + cacheKey := cacheKey(g, p) 103 121 if cached, ok := commitCache.Get(cacheKey); ok { 104 - filesDone[f] = cached.(commit) 105 - delete(filesToDo, f) 122 + filesDone[p] = cached.(commit) 123 + filesToDo.Remove(p) 106 124 } else { 107 - filesToDo[f] = struct{}{} 125 + filesToDo.Insert(p) 108 126 } 109 127 } 110 128 111 - if len(filesToDo) == 0 { 129 + if filesToDo.IsEmpty() { 112 130 return filesDone, nil 113 131 } 114 132 ··· 116 134 defer cancel() 117 135 118 136 pathSpec := "." 119 - if parent != "" { 120 - pathSpec = parent 137 + if parent.dir != "" { 138 + pathSpec = parent.dir 139 + } 140 + if filesToDo.Len() == 1 { 141 + // this is an optimization for the scenario where we want to calculate 142 + // the last commit for just one path, we can directly set the pathspec to that path 143 + for s := range filesToDo.All() { 144 + pathSpec = s 145 + } 121 146 } 122 - output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=iso", "--name-only", "--", pathSpec) 147 + 148 + output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=unix", "--name-only", "--", pathSpec) 123 149 if err != nil { 124 150 return nil, err 125 151 } 126 152 defer output.Close() // Ensure the git process is properly cleaned up 127 153 128 154 reader := bufio.NewReader(output) 129 - var current commit 155 + current := newCommit() 130 156 for { 131 157 line, err := reader.ReadString('\n') 132 158 if err != nil && err != io.EOF { ··· 137 163 if line == "" { 138 164 if !current.hash.IsZero() { 139 165 // we have a fully parsed commit 140 - for _, f := range current.files { 141 - if _, ok := filesToDo[f]; ok { 166 + for f := range current.files.All() { 167 + if filesToDo.Contains(f) { 142 168 filesDone[f] = current 143 - delete(filesToDo, f) 169 + filesToDo.Remove(f) 144 170 commitCache.Set(cacheKey(g, f), current, 0) 145 171 } 146 172 } 147 173 148 - if len(filesToDo) == 0 { 149 - cancel() 174 + if filesToDo.IsEmpty() { 150 175 break 151 176 } 152 - current = commit{} 177 + current = newCommit() 153 178 } 154 179 } else if current.hash.IsZero() { 155 180 parts := strings.SplitN(line, ",", 3) 156 181 if len(parts) == 3 { 157 182 current.hash = plumbing.NewHash(parts[0]) 158 - current.when, _ = time.Parse("2006-01-02 15:04:05 -0700", parts[1]) 183 + epochTime, _ := strconv.ParseInt(parts[1], 10, 64) 184 + current.when = time.Unix(epochTime, 0) 159 185 current.message = parts[2] 160 186 } 161 187 } else { 162 188 // all ancestors along this path should also be included 163 189 file := path.Clean(line) 164 - ancestors := ancestors(file) 165 - current.files = append(current.files, file) 166 - current.files = append(current.files, ancestors...) 190 + current.files.Insert(file) 191 + for _, a := range ancestors(file) { 192 + current.files.Insert(a) 193 + } 167 194 } 168 195 169 196 if err == io.EOF { ··· 174 201 return filesDone, nil 175 202 } 176 203 177 - func ancestors(p string) []string { 178 - var ancestors []string 179 - 180 - for { 181 - p = path.Dir(p) 182 - if p == "." || p == "/" { 183 - break 184 - } 185 - ancestors = append(ancestors, p) 204 + // LastCommitFile returns the last commit information for a specific file path 205 + func (g *GitRepo) LastCommitFile(ctx context.Context, filePath string) (*types.LastCommitInfo, error) { 206 + parent, child := path.Split(filePath) 207 + parent = path.Clean(parent) 208 + if parent == "." { 209 + parent = "" 186 210 } 187 - return ancestors 188 - } 189 211 190 - // GetLastCommitForPath returns the last commit information for a specific file path 191 - func (g *GitRepo) GetLastCommitForPath(ctx context.Context, filePath string) (*types.LastCommitInfo, error) { 192 - c, err := g.r.CommitObject(g.h) 193 - if err != nil { 194 - return nil, fmt.Errorf("commit object: %w", err) 212 + lastCommitDir := lastCommitDir{ 213 + dir: parent, 214 + entries: []string{child}, 195 215 } 196 216 197 - tree, err := c.Tree() 217 + times, err := g.lastCommitDirIn(ctx, lastCommitDir, 2*time.Second) 198 218 if err != nil { 199 - return nil, fmt.Errorf("file tree: %w", err) 200 - } 201 - 202 - // parent directory for calculateCommitTime 203 - parent := path.Dir(filePath) 204 - if parent == "." { 205 - parent = "" 219 + return nil, fmt.Errorf("calculate commit time: %w", err) 206 220 } 207 221 208 - times, err := g.calculateCommitTimeIn(ctx, tree, parent, 2*time.Second) 209 - if err != nil { 210 - return nil, fmt.Errorf("calculate commit time: %w", err) 222 + // extract the only element of the map, the commit info of the current path 223 + var commitInfo *commit 224 + for _, c := range times { 225 + commitInfo = &c 211 226 } 212 227 213 - commitInfo, ok := times[filePath] 214 - if !ok { 228 + if commitInfo == nil { 215 229 return nil, fmt.Errorf("no commit found for path: %s", filePath) 216 230 } 217 231 ··· 221 235 When: commitInfo.when, 222 236 }, nil 223 237 } 238 + 239 + func ancestors(p string) []string { 240 + var ancestors []string 241 + 242 + for { 243 + p = path.Dir(p) 244 + if p == "." || p == "/" { 245 + break 246 + } 247 + ancestors = append(ancestors, p) 248 + } 249 + return ancestors 250 + }