forked from
tangled.org/core
Monorepo for Tangled
1package git
2
3import (
4 "bufio"
5 "context"
6 "crypto/sha256"
7 "fmt"
8 "io"
9 "os/exec"
10 "path"
11 "strings"
12 "time"
13
14 "github.com/dgraph-io/ristretto"
15 "github.com/go-git/go-git/v5/plumbing"
16 "github.com/go-git/go-git/v5/plumbing/object"
17 "tangled.org/core/types"
18)
19
20var (
21 commitCache *ristretto.Cache
22)
23
24func init() {
25 cache, _ := ristretto.NewCache(&ristretto.Config{
26 NumCounters: 1e7,
27 MaxCost: 1 << 30,
28 BufferItems: 64,
29 TtlTickerDurationInSec: 120,
30 })
31 commitCache = cache
32}
33
34// processReader wraps a reader and ensures the associated process is cleaned up
35type processReader struct {
36 io.Reader
37 cmd *exec.Cmd
38 stdout io.ReadCloser
39}
40
41func (pr *processReader) Close() error {
42 if err := pr.stdout.Close(); err != nil {
43 return err
44 }
45 return pr.cmd.Wait()
46}
47
48func (g *GitRepo) streamingGitLog(ctx context.Context, extraArgs ...string) (io.ReadCloser, error) {
49 args := []string{}
50 args = append(args, "log")
51 args = append(args, g.h.String())
52 args = append(args, extraArgs...)
53
54 cmd := exec.CommandContext(ctx, "git", args...)
55 cmd.Dir = g.path
56
57 stdout, err := cmd.StdoutPipe()
58 if err != nil {
59 return nil, err
60 }
61
62 if err := cmd.Start(); err != nil {
63 return nil, err
64 }
65
66 return &processReader{
67 Reader: stdout,
68 cmd: cmd,
69 stdout: stdout,
70 }, nil
71}
72
73type commit struct {
74 hash plumbing.Hash
75 when time.Time
76 files []string
77 message string
78}
79
80func cacheKey(g *GitRepo, path string) string {
81 sep := byte(':')
82 hash := sha256.Sum256(fmt.Append([]byte{}, g.path, sep, g.h.String(), sep, path))
83 return fmt.Sprintf("%x", hash)
84}
85
86func (g *GitRepo) calculateCommitTimeIn(ctx context.Context, subtree *object.Tree, parent string, timeout time.Duration) (map[string]commit, error) {
87 ctx, cancel := context.WithTimeout(ctx, timeout)
88 defer cancel()
89 return g.calculateCommitTime(ctx, subtree, parent)
90}
91
92func (g *GitRepo) calculateCommitTime(ctx context.Context, subtree *object.Tree, parent string) (map[string]commit, error) {
93 filesToDo := make(map[string]struct{})
94 filesDone := make(map[string]commit)
95 for _, e := range subtree.Entries {
96 fpath := path.Clean(path.Join(parent, e.Name))
97 filesToDo[fpath] = struct{}{}
98 }
99
100 for _, e := range subtree.Entries {
101 f := path.Clean(path.Join(parent, e.Name))
102 cacheKey := cacheKey(g, f)
103 if cached, ok := commitCache.Get(cacheKey); ok {
104 filesDone[f] = cached.(commit)
105 delete(filesToDo, f)
106 } else {
107 filesToDo[f] = struct{}{}
108 }
109 }
110
111 if len(filesToDo) == 0 {
112 return filesDone, nil
113 }
114
115 ctx, cancel := context.WithCancel(ctx)
116 defer cancel()
117
118 pathSpec := "."
119 if parent != "" {
120 pathSpec = parent
121 }
122 output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=iso", "--name-only", "--", pathSpec)
123 if err != nil {
124 return nil, err
125 }
126 defer output.Close() // Ensure the git process is properly cleaned up
127
128 reader := bufio.NewReader(output)
129 var current commit
130 for {
131 line, err := reader.ReadString('\n')
132 if err != nil && err != io.EOF {
133 return nil, err
134 }
135 line = strings.TrimSpace(line)
136
137 if line == "" {
138 if !current.hash.IsZero() {
139 // we have a fully parsed commit
140 for _, f := range current.files {
141 if _, ok := filesToDo[f]; ok {
142 filesDone[f] = current
143 delete(filesToDo, f)
144 commitCache.Set(cacheKey(g, f), current, 0)
145 }
146 }
147
148 if len(filesToDo) == 0 {
149 cancel()
150 break
151 }
152 current = commit{}
153 }
154 } else if current.hash.IsZero() {
155 parts := strings.SplitN(line, ",", 3)
156 if len(parts) == 3 {
157 current.hash = plumbing.NewHash(parts[0])
158 current.when, _ = time.Parse("2006-01-02 15:04:05 -0700", parts[1])
159 current.message = parts[2]
160 }
161 } else {
162 // all ancestors along this path should also be included
163 file := path.Clean(line)
164 ancestors := ancestors(file)
165 current.files = append(current.files, file)
166 current.files = append(current.files, ancestors...)
167 }
168
169 if err == io.EOF {
170 break
171 }
172 }
173
174 return filesDone, nil
175}
176
177func ancestors(p string) []string {
178 var ancestors []string
179
180 for {
181 p = path.Dir(p)
182 if p == "." || p == "/" {
183 break
184 }
185 ancestors = append(ancestors, p)
186 }
187 return ancestors
188}
189
190// GetLastCommitForPath returns the last commit information for a specific file path
191func (g *GitRepo) GetLastCommitForPath(ctx context.Context, filePath string) (*types.LastCommitInfo, error) {
192 c, err := g.r.CommitObject(g.h)
193 if err != nil {
194 return nil, fmt.Errorf("commit object: %w", err)
195 }
196
197 tree, err := c.Tree()
198 if err != nil {
199 return nil, fmt.Errorf("file tree: %w", err)
200 }
201
202 // parent directory for calculateCommitTime
203 parent := path.Dir(filePath)
204 if parent == "." {
205 parent = ""
206 }
207
208 times, err := g.calculateCommitTimeIn(ctx, tree, parent, 2*time.Second)
209 if err != nil {
210 return nil, fmt.Errorf("calculate commit time: %w", err)
211 }
212
213 commitInfo, ok := times[filePath]
214 if !ok {
215 return nil, fmt.Errorf("no commit found for path: %s", filePath)
216 }
217
218 return &types.LastCommitInfo{
219 Hash: commitInfo.hash,
220 Message: commitInfo.message,
221 When: commitInfo.when,
222 }, nil
223}