A tool for backing up ATProto related data to S3

Improve and fix the Tangled Knot backup to not stream but store locally and then upload after.

authored by willdot.net and committed by

Tangled 079a1de0 3ba0b875

+134 -26
+2 -2
.env.example
··· 4 4 S3_BUCKET_NAME="my-super-duper-bucket" 5 5 DID="the-did-to-backup" 6 6 PDS_HOST="https://your-pds.com" 7 - TANGLED_KNOT_DATABASE_DIRECTORY="/path/to/database/directory" 8 - TANGLED_KNOT_REPOSITORY_DIRECTORY="/path/to/repository/directory" 7 + TANGLED_KNOT_DATABASE_DIRECTORY="./tangled/path/to/database/directory" # ./tangled is from the docker-compose volume 8 + TANGLED_KNOT_REPOSITORY_DIRECTORY="./tangled/path/to/repository/directory" # ./tangled is from the docker-compose volume 9 9 BUGSNAG_API_KEY="enter-api-key-to-enable" 10 10 BLOB_DIR="./blobs"
+2 -1
docker-compose.yaml
··· 3 3 container_name: back-at-it 4 4 image: willdot/back-at-it:latest 5 5 environment: 6 - ENV_LOCATION: "/app/data/ back-at-it.env" 6 + ENV_LOCATION: "/app/data/back-at-it.env" 7 7 volumes: 8 8 - ./data:/app/data 9 + - /home/will/apps/tangled:/app/tangled:ro 9 10 restart: always
+3 -3
go.mod
··· 1 1 module tangled.sh/willdot.net/backatit 2 2 3 - go 1.25.0 3 + go 1.26.0 4 4 5 5 require ( 6 + github.com/bugsnag/bugsnag-go/v2 v2.6.2 6 7 github.com/joho/godotenv v1.5.1 7 8 github.com/minio/minio-go/v7 v7.0.95 9 + github.com/robfig/cron v1.2.0 8 10 ) 9 11 10 12 require ( 11 - github.com/bugsnag/bugsnag-go/v2 v2.6.2 // indirect 12 13 github.com/bugsnag/panicwrap v1.3.4 // indirect 13 14 github.com/dustin/go-humanize v1.0.1 // indirect 14 15 github.com/go-ini/ini v1.67.0 // indirect ··· 21 22 github.com/minio/md5-simd v1.1.2 // indirect 22 23 github.com/philhofer/fwd v1.2.0 // indirect 23 24 github.com/pkg/errors v0.9.1 // indirect 24 - github.com/robfig/cron v1.2.0 // indirect 25 25 github.com/rs/xid v1.6.0 // indirect 26 26 github.com/stretchr/testify v1.10.0 // indirect 27 27 github.com/tinylib/msgp v1.3.0 // indirect
+1
go.sum
··· 1 + github.com/bitly/go-simplejson v0.5.1 h1:xgwPbetQScXt1gh9BmoJ6j9JMr3TElvuIyjR8pgdoow= 1 2 github.com/bitly/go-simplejson v0.5.1/go.mod h1:YOPVLzCfwK14b4Sff3oP1AmGhI9T9Vsg84etUnlyp+Q= 2 3 github.com/bugsnag/bugsnag-go/v2 v2.6.2 h1:gGjr8txMtPYWKovEBC+4o6tthYveuE7fjzu6XYVIApg= 3 4 github.com/bugsnag/bugsnag-go/v2 v2.6.2/go.mod h1:S9njhE7l6XCiKycOZ2zp0x1zoEE5nL3HjROCSsKc/3c=
+12 -2
main.go
··· 5 5 "log/slog" 6 6 "net/http" 7 7 "os" 8 + "os/signal" 8 9 "time" 9 10 10 11 "github.com/bugsnag/bugsnag-go/v2" ··· 24 25 } 25 26 26 27 func main() { 27 - ctx := context.Background() 28 + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, os.Kill) 29 + defer stop() 28 30 29 - err := godotenv.Load(".env") 31 + envLocation := os.Getenv("ENV_LOCATION") 32 + if envLocation == "" { 33 + envLocation = ".env" 34 + } 35 + 36 + err := godotenv.Load(envLocation) 30 37 if err != nil { 31 38 if !os.IsNotExist(err) { 32 39 slog.Error("load env", "error", err) ··· 83 90 }) 84 91 85 92 c.Start() 93 + defer c.Stop() 94 + 95 + <-ctx.Done() 86 96 } 87 97 88 98 func createMinioClient() (*minio.Client, error) {
+45 -1
pds.go
··· 9 9 "log/slog" 10 10 "net/http" 11 11 "os" 12 + "path" 12 13 "path/filepath" 13 14 "time" 14 15 ··· 54 55 55 56 defer resp.Body.Close() 56 57 57 - _, err = s.minioClient.PutObject(ctx, s.bucketName, "pds-repo", resp.Body, -1, minio.PutObjectOptions{}) 58 + b, err := io.ReadAll(resp.Body) 59 + if err != nil { 60 + return fmt.Errorf("reading repo response body: %w", err) 61 + } 62 + 63 + filename := path.Join(s.blobDir, fmt.Sprintf("%s-%d-repo.car", s.did, time.Now().UnixMilli())) 64 + f, err := os.Create(filename) 65 + if err != nil { 66 + return fmt.Errorf("creating temp file: %w", err) 67 + } 68 + defer func() { 69 + f.Close() 70 + 71 + err = os.Remove(filename) 72 + if err != nil { 73 + slog.Error("failed to delete pds repo file after uploading", "error", err, "filename", f.Name()) 74 + } 75 + }() 76 + 77 + zipWriter := zip.NewWriter(f) 78 + zipFile, err := zipWriter.Create("repo.car") 79 + if err != nil { 80 + return fmt.Errorf("create zip file: %w", err) 81 + } 82 + 83 + _, err = zipFile.Write(b) 84 + if err != nil { 85 + return fmt.Errorf("write repo to file: %w", err) 86 + } 87 + 88 + zipWriter.Close() 89 + 90 + // reset the reader back to the start so that the minio upload can read the data that's been written. 91 + _, err = f.Seek(0, 0) 92 + if err != nil { 93 + return fmt.Errorf("setting seek on written file: %w", err) 94 + } 95 + 96 + fi, err := f.Stat() 97 + if err != nil { 98 + return fmt.Errorf("stat file: %w", err) 99 + } 100 + 101 + _, err = s.minioClient.PutObject(ctx, s.bucketName, "pds-repo.zip", f, fi.Size(), minio.PutObjectOptions{}) 58 102 if err != nil { 59 103 return fmt.Errorf("stream repo to bucket: %w", err) 60 104 }
+4 -2
readme.md
··· 18 18 19 19 For PDS data backup you need to ensure that `DID` and `PDS_HOST` are populated. (You can run this tool on any machine to back PDS data up) 20 20 21 - For Knot data backup you need to ensure that `TANGLED_KNOT_DATABASE_DIRECTORY` and `TANGLED_KNOT_REPOSITORY_DIRECTORY` are populated. (You need to run this tool on your Knot server to back up Knot data) 21 + For Knot data backup you need to ensure that `TANGLED_KNOT_DATABASE_DIRECTORY` and `TANGLED_KNOT_REPOSITORY_DIRECTORY` are populated. (You need to run this tool on your Knot server to back up Knot data). 22 22 23 - Run `go run .` 23 + If using Docker, in the `docker-compose.yaml` file there is a mount that needs to be set which should point to the directory on your host machine that contains your Tangled Knot application running (it has read only permissions, don't worry). Then inside the `.env` file you need to set the `TANGLED_KNOT_DATABASE_DIRECTORY` and `TANGLED_KNOT_REPOSITORY_DIRECTORY` envs to be the directories inside that Knot application volume. 24 + 25 + Run `go run .` or use Docker. 24 26 25 27 ### Todo 26 28
+65 -15
tangled_knot.go
··· 4 4 "archive/tar" 5 5 "compress/gzip" 6 6 "context" 7 + "fmt" 7 8 "io" 8 9 "log/slog" 9 10 "os" 11 + "path" 10 12 "path/filepath" 13 + "time" 11 14 12 15 "github.com/bugsnag/bugsnag-go/v2" 13 16 "github.com/minio/minio-go/v7" ··· 26 29 slog.Info("TANGLED_KNOT_DATABASE_DIRECTORY env not set - skipping knot DB backup") 27 30 } 28 31 29 - pipeReader, pipeWriter := io.Pipe() 30 - defer pipeReader.Close() 32 + filename := path.Join(s.blobDir, fmt.Sprintf("%d-knot.zip", time.Now().UnixMilli())) 33 + f, err := os.Create(filename) 34 + if err != nil { 35 + slog.Error("creating temp file", "error", err) 36 + return 37 + } 38 + defer func() { 39 + f.Close() 31 40 32 - go compress(dir, pipeWriter) 41 + err = os.Remove(filename) 42 + if err != nil { 43 + slog.Error("failed to delete knot db zip file after uploading", "error", err, "filename", f.Name()) 44 + } 45 + }() 33 46 34 - _, err := s.minioClient.PutObject(ctx, s.bucketName, "knot-db.zip", pipeReader, -1, minio.PutObjectOptions{}) 47 + compress(dir, f) 48 + 49 + // reset the reader back to the start so that the minio upload can read the data that's been written. 50 + _, err = f.Seek(0, 0) 51 + if err != nil { 52 + slog.Error("setting seek on written file", "error", err) 53 + return 54 + } 55 + 56 + fi, err := f.Stat() 57 + if err != nil { 58 + slog.Error("failed to stat knot db zip file", "error", err) 59 + return 60 + } 61 + 62 + _, err = s.minioClient.PutObject(ctx, s.bucketName, "knot-db.zip", f, fi.Size(), minio.PutObjectOptions{}) 35 63 if err != nil { 36 64 slog.Error("stream knot DB to bucket", "error", err) 37 65 bugsnag.Notify(err) ··· 44 72 slog.Info("TANGLED_KNOT_REPOSITORY_DIRECTORY env not set - skipping knot repo backup") 45 73 } 46 74 47 - pipeReader, pipeWriter := io.Pipe() 48 - defer pipeReader.Close() 75 + filename := path.Join(s.blobDir, fmt.Sprintf("%d-knot-repos.zip", time.Now().UnixMilli())) 76 + f, err := os.Create(filename) 77 + if err != nil { 78 + slog.Error("creating temp file", "error", err) 79 + return 80 + } 81 + defer func() { 82 + f.Close() 49 83 50 - go compress(dir, pipeWriter) 84 + err = os.Remove(filename) 85 + if err != nil { 86 + slog.Error("failed to delete knot repos zip file after uploading", "error", err, "filename", f.Name()) 87 + } 88 + }() 51 89 52 - _, err := s.minioClient.PutObject(ctx, s.bucketName, "knot-repos.zip", pipeReader, -1, minio.PutObjectOptions{}) 90 + compress(dir, f) 91 + 92 + // reset the reader back to the start so that the minio upload can read the data that's been written. 93 + _, err = f.Seek(0, 0) 53 94 if err != nil { 54 - slog.Error("stream knot repos to bucket", "error", err) 95 + slog.Error("setting seek on written file", "error", err) 96 + return 97 + } 98 + 99 + fi, err := f.Stat() 100 + if err != nil { 101 + slog.Error("failed to stat knot db zip file", "error", err) 102 + return 103 + } 104 + 105 + _, err = s.minioClient.PutObject(ctx, s.bucketName, "knot-repos.zip", f, fi.Size(), minio.PutObjectOptions{}) 106 + if err != nil { 107 + slog.Error("write knot repos to bucket", "error", err) 55 108 bugsnag.Notify(err) 56 109 } 57 110 } 58 111 59 - func compress(src string, writer io.WriteCloser) error { 112 + func compress(src string, writer io.Writer) error { 60 113 zipWriter := gzip.NewWriter(writer) 61 114 tarWriter := tar.NewWriter(zipWriter) 62 - 63 - defer writer.Close() 64 - defer zipWriter.Close() 65 - defer tarWriter.Close() 66 115 67 116 filepath.Walk(src, func(file string, fi os.FileInfo, err error) error { 68 117 header, err := tar.FileInfoHeader(fi, file) ··· 87 136 return err 88 137 } 89 138 } 139 + 90 140 return nil 91 141 }) 92 142 ··· 98 148 if err := zipWriter.Close(); err != nil { 99 149 return err 100 150 } 101 - // 151 + 102 152 return nil 103 153 }