[mirror] Scalable static site server for Git forges (like GitHub Pages)
1package git_pages
2
3import (
4 "archive/tar"
5 "archive/zip"
6 "bytes"
7 "compress/gzip"
8 "context"
9 "errors"
10 "fmt"
11 "io"
12 "math"
13 "os"
14 "path"
15 "strings"
16
17 "github.com/c2h5oh/datasize"
18 "github.com/go-git/go-git/v6/plumbing"
19 "github.com/klauspost/compress/zstd"
20)
21
22var ErrArchiveTooLarge = errors.New("archive too large")
23
24func boundArchiveStream(reader io.Reader) io.Reader {
25 return ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()),
26 fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR()))
27}
28
29func ExtractGzip(
30 ctx context.Context, reader io.Reader,
31 next func(context.Context, io.Reader) (*Manifest, error),
32) (*Manifest, error) {
33 stream, err := gzip.NewReader(reader)
34 if err != nil {
35 return nil, err
36 }
37 defer stream.Close()
38
39 return next(ctx, boundArchiveStream(stream))
40}
41
42func ExtractZstd(
43 ctx context.Context, reader io.Reader,
44 next func(context.Context, io.Reader) (*Manifest, error),
45) (*Manifest, error) {
46 stream, err := zstd.NewReader(reader)
47 if err != nil {
48 return nil, err
49 }
50 defer stream.Close()
51
52 return next(ctx, boundArchiveStream(stream))
53}
54
55const BlobReferencePrefix = "/git/blobs/"
56
57type UnresolvedRefError struct {
58 missing []string
59}
60
61func (err UnresolvedRefError) Error() string {
62 return fmt.Sprintf("%d unresolved blob references", len(err.missing))
63}
64
65func normalizeArchiveMemberName(fileName string) string {
66 // Strip the leading slash and any extraneous path segments.
67 fileName = path.Clean(fileName)
68 fileName = strings.TrimPrefix(fileName, "/")
69 if fileName == "." {
70 fileName = ""
71 }
72 return fileName
73}
74
75// Returns a map of git hash to entry. If `manifest` is nil, returns an empty map.
76func indexManifestByGitHash(manifest *Manifest) map[string]*Entry {
77 index := map[string]*Entry{}
78 for _, entry := range manifest.GetContents() {
79 if hash := entry.GetGitHash(); hash != "" {
80 if _, ok := plumbing.FromHex(hash); ok {
81 index[hash] = entry
82 } else {
83 panic(fmt.Errorf("index: malformed hash: %s", hash))
84 }
85 }
86 }
87 return index
88}
89
90func addSymlinkOrBlobReference(
91 manifest *Manifest, fileName string, target string,
92 index map[string]*Entry, missing *[]string,
93) *Entry {
94 if hash, found := strings.CutPrefix(target, BlobReferencePrefix); found {
95 if entry, found := index[hash]; found {
96 manifest.Contents[fileName] = entry
97 return entry
98 } else {
99 *missing = append(*missing, hash)
100 return nil
101 }
102 } else {
103 return AddSymlink(manifest, fileName, target)
104 }
105}
106
107func ExtractTar(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
108 archive := tar.NewReader(reader)
109
110 var dataBytesRecycled int64
111 var dataBytesTransferred int64
112
113 index := indexManifestByGitHash(oldManifest)
114 missing := []string{}
115 manifest := NewManifest()
116 for {
117 header, err := archive.Next()
118 if err == io.EOF {
119 break
120 } else if err != nil {
121 return nil, err
122 }
123
124 fileName := normalizeArchiveMemberName(header.Name)
125 if fileName == "" {
126 // This must be the root directory. It will be filled in by EnsureLeadingDirectories.
127 continue
128 }
129
130 switch header.Typeflag {
131 case tar.TypeReg:
132 fileData, err := io.ReadAll(archive)
133 if err != nil {
134 return nil, fmt.Errorf("tar: %s: %w", fileName, err)
135 }
136 AddFile(manifest, fileName, fileData)
137 dataBytesTransferred += int64(len(fileData))
138 case tar.TypeSymlink:
139 entry := addSymlinkOrBlobReference(
140 manifest, fileName, header.Linkname, index, &missing)
141 dataBytesRecycled += entry.GetOriginalSize()
142 case tar.TypeDir:
143 AddDirectory(manifest, fileName)
144 default:
145 AddProblem(manifest, fileName, "tar: unsupported type '%c'", header.Typeflag)
146 continue
147 }
148 }
149
150 if len(missing) > 0 {
151 return nil, UnresolvedRefError{missing}
152 }
153
154 // Ensure parent directories exist for all entries.
155 EnsureLeadingDirectories(manifest)
156
157 logc.Printf(ctx,
158 "reuse: %s recycled, %s transferred\n",
159 datasize.ByteSize(dataBytesRecycled).HR(),
160 datasize.ByteSize(dataBytesTransferred).HR(),
161 )
162
163 return manifest, nil
164}
165
166// Used for zstd decompression inside zip files, it is recommended to share this.
167var zstdDecomp = zstd.ZipDecompressor()
168
169func ExtractZip(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
170 data, err := io.ReadAll(reader)
171 if err != nil {
172 return nil, err
173 }
174
175 archive, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
176 if err != nil {
177 return nil, err
178 }
179
180 // Support zstd compression inside zip files.
181 archive.RegisterDecompressor(zstd.ZipMethodWinZip, zstdDecomp)
182 archive.RegisterDecompressor(zstd.ZipMethodPKWare, zstdDecomp)
183
184 // Detect and defuse zipbombs.
185 var totalSize uint64
186 for _, file := range archive.File {
187 if totalSize+file.UncompressedSize64 < totalSize {
188 // Would overflow
189 totalSize = math.MaxUint64
190 break
191 }
192 totalSize += file.UncompressedSize64
193 }
194 if totalSize > config.Limits.MaxSiteSize.Bytes() {
195 return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit",
196 ErrArchiveTooLarge,
197 datasize.ByteSize(totalSize).HR(),
198 config.Limits.MaxSiteSize.HR(),
199 )
200 }
201
202 var dataBytesRecycled int64
203 var dataBytesTransferred int64
204
205 index := indexManifestByGitHash(oldManifest)
206 missing := []string{}
207 manifest := NewManifest()
208 for _, file := range archive.File {
209 normalizedName := normalizeArchiveMemberName(file.Name)
210 if strings.HasSuffix(file.Name, "/") {
211 AddDirectory(manifest, normalizedName)
212 } else {
213 fileReader, err := file.Open()
214 if err != nil {
215 return nil, err
216 }
217 defer fileReader.Close()
218
219 fileData, err := io.ReadAll(fileReader)
220 if err != nil {
221 return nil, fmt.Errorf("zip: %s: %w", file.Name, err)
222 }
223
224 if file.Mode()&os.ModeSymlink != 0 {
225 entry := addSymlinkOrBlobReference(
226 manifest, normalizedName, string(fileData), index, &missing)
227 dataBytesRecycled += entry.GetOriginalSize()
228 } else {
229 AddFile(manifest, normalizedName, fileData)
230 dataBytesTransferred += int64(len(fileData))
231 }
232 }
233 }
234
235 if len(missing) > 0 {
236 return nil, UnresolvedRefError{missing}
237 }
238
239 // Ensure parent directories exist for all entries.
240 EnsureLeadingDirectories(manifest)
241
242 logc.Printf(ctx,
243 "reuse: %s recycled, %s transferred\n",
244 datasize.ByteSize(dataBytesRecycled).HR(),
245 datasize.ByteSize(dataBytesTransferred).HR(),
246 )
247
248 return manifest, nil
249}