[mirror] Scalable static site server for Git forges (like GitHub Pages)
at main 249 lines 6.3 kB view raw
1package git_pages 2 3import ( 4 "archive/tar" 5 "archive/zip" 6 "bytes" 7 "compress/gzip" 8 "context" 9 "errors" 10 "fmt" 11 "io" 12 "math" 13 "os" 14 "path" 15 "strings" 16 17 "github.com/c2h5oh/datasize" 18 "github.com/go-git/go-git/v6/plumbing" 19 "github.com/klauspost/compress/zstd" 20) 21 22var ErrArchiveTooLarge = errors.New("archive too large") 23 24func boundArchiveStream(reader io.Reader) io.Reader { 25 return ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()), 26 fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR())) 27} 28 29func ExtractGzip( 30 ctx context.Context, reader io.Reader, 31 next func(context.Context, io.Reader) (*Manifest, error), 32) (*Manifest, error) { 33 stream, err := gzip.NewReader(reader) 34 if err != nil { 35 return nil, err 36 } 37 defer stream.Close() 38 39 return next(ctx, boundArchiveStream(stream)) 40} 41 42func ExtractZstd( 43 ctx context.Context, reader io.Reader, 44 next func(context.Context, io.Reader) (*Manifest, error), 45) (*Manifest, error) { 46 stream, err := zstd.NewReader(reader) 47 if err != nil { 48 return nil, err 49 } 50 defer stream.Close() 51 52 return next(ctx, boundArchiveStream(stream)) 53} 54 55const BlobReferencePrefix = "/git/blobs/" 56 57type UnresolvedRefError struct { 58 missing []string 59} 60 61func (err UnresolvedRefError) Error() string { 62 return fmt.Sprintf("%d unresolved blob references", len(err.missing)) 63} 64 65func normalizeArchiveMemberName(fileName string) string { 66 // Strip the leading slash and any extraneous path segments. 67 fileName = path.Clean(fileName) 68 fileName = strings.TrimPrefix(fileName, "/") 69 if fileName == "." { 70 fileName = "" 71 } 72 return fileName 73} 74 75// Returns a map of git hash to entry. If `manifest` is nil, returns an empty map. 76func indexManifestByGitHash(manifest *Manifest) map[string]*Entry { 77 index := map[string]*Entry{} 78 for _, entry := range manifest.GetContents() { 79 if hash := entry.GetGitHash(); hash != "" { 80 if _, ok := plumbing.FromHex(hash); ok { 81 index[hash] = entry 82 } else { 83 panic(fmt.Errorf("index: malformed hash: %s", hash)) 84 } 85 } 86 } 87 return index 88} 89 90func addSymlinkOrBlobReference( 91 manifest *Manifest, fileName string, target string, 92 index map[string]*Entry, missing *[]string, 93) *Entry { 94 if hash, found := strings.CutPrefix(target, BlobReferencePrefix); found { 95 if entry, found := index[hash]; found { 96 manifest.Contents[fileName] = entry 97 return entry 98 } else { 99 *missing = append(*missing, hash) 100 return nil 101 } 102 } else { 103 return AddSymlink(manifest, fileName, target) 104 } 105} 106 107func ExtractTar(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) { 108 archive := tar.NewReader(reader) 109 110 var dataBytesRecycled int64 111 var dataBytesTransferred int64 112 113 index := indexManifestByGitHash(oldManifest) 114 missing := []string{} 115 manifest := NewManifest() 116 for { 117 header, err := archive.Next() 118 if err == io.EOF { 119 break 120 } else if err != nil { 121 return nil, err 122 } 123 124 fileName := normalizeArchiveMemberName(header.Name) 125 if fileName == "" { 126 // This must be the root directory. It will be filled in by EnsureLeadingDirectories. 127 continue 128 } 129 130 switch header.Typeflag { 131 case tar.TypeReg: 132 fileData, err := io.ReadAll(archive) 133 if err != nil { 134 return nil, fmt.Errorf("tar: %s: %w", fileName, err) 135 } 136 AddFile(manifest, fileName, fileData) 137 dataBytesTransferred += int64(len(fileData)) 138 case tar.TypeSymlink: 139 entry := addSymlinkOrBlobReference( 140 manifest, fileName, header.Linkname, index, &missing) 141 dataBytesRecycled += entry.GetOriginalSize() 142 case tar.TypeDir: 143 AddDirectory(manifest, fileName) 144 default: 145 AddProblem(manifest, fileName, "tar: unsupported type '%c'", header.Typeflag) 146 continue 147 } 148 } 149 150 if len(missing) > 0 { 151 return nil, UnresolvedRefError{missing} 152 } 153 154 // Ensure parent directories exist for all entries. 155 EnsureLeadingDirectories(manifest) 156 157 logc.Printf(ctx, 158 "reuse: %s recycled, %s transferred\n", 159 datasize.ByteSize(dataBytesRecycled).HR(), 160 datasize.ByteSize(dataBytesTransferred).HR(), 161 ) 162 163 return manifest, nil 164} 165 166// Used for zstd decompression inside zip files, it is recommended to share this. 167var zstdDecomp = zstd.ZipDecompressor() 168 169func ExtractZip(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) { 170 data, err := io.ReadAll(reader) 171 if err != nil { 172 return nil, err 173 } 174 175 archive, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) 176 if err != nil { 177 return nil, err 178 } 179 180 // Support zstd compression inside zip files. 181 archive.RegisterDecompressor(zstd.ZipMethodWinZip, zstdDecomp) 182 archive.RegisterDecompressor(zstd.ZipMethodPKWare, zstdDecomp) 183 184 // Detect and defuse zipbombs. 185 var totalSize uint64 186 for _, file := range archive.File { 187 if totalSize+file.UncompressedSize64 < totalSize { 188 // Would overflow 189 totalSize = math.MaxUint64 190 break 191 } 192 totalSize += file.UncompressedSize64 193 } 194 if totalSize > config.Limits.MaxSiteSize.Bytes() { 195 return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit", 196 ErrArchiveTooLarge, 197 datasize.ByteSize(totalSize).HR(), 198 config.Limits.MaxSiteSize.HR(), 199 ) 200 } 201 202 var dataBytesRecycled int64 203 var dataBytesTransferred int64 204 205 index := indexManifestByGitHash(oldManifest) 206 missing := []string{} 207 manifest := NewManifest() 208 for _, file := range archive.File { 209 normalizedName := normalizeArchiveMemberName(file.Name) 210 if strings.HasSuffix(file.Name, "/") { 211 AddDirectory(manifest, normalizedName) 212 } else { 213 fileReader, err := file.Open() 214 if err != nil { 215 return nil, err 216 } 217 defer fileReader.Close() 218 219 fileData, err := io.ReadAll(fileReader) 220 if err != nil { 221 return nil, fmt.Errorf("zip: %s: %w", file.Name, err) 222 } 223 224 if file.Mode()&os.ModeSymlink != 0 { 225 entry := addSymlinkOrBlobReference( 226 manifest, normalizedName, string(fileData), index, &missing) 227 dataBytesRecycled += entry.GetOriginalSize() 228 } else { 229 AddFile(manifest, normalizedName, fileData) 230 dataBytesTransferred += int64(len(fileData)) 231 } 232 } 233 } 234 235 if len(missing) > 0 { 236 return nil, UnresolvedRefError{missing} 237 } 238 239 // Ensure parent directories exist for all entries. 240 EnsureLeadingDirectories(manifest) 241 242 logc.Printf(ctx, 243 "reuse: %s recycled, %s transferred\n", 244 datasize.ByteSize(dataBytesRecycled).HR(), 245 datasize.ByteSize(dataBytesTransferred).HR(), 246 ) 247 248 return manifest, nil 249}