Fast implementation of Git in pure Go
at legacy 168 lines 4.8 kB view raw
1package furgit 2 3import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "strconv" 8) 9 10// ObjectType mirrors Git's object type tags. 11type ObjectType uint8 12 13const ( 14 // An invalid object. 15 ObjectTypeInvalid ObjectType = 0 16 // A commit object. 17 ObjectTypeCommit ObjectType = 1 18 // A tree object. 19 ObjectTypeTree ObjectType = 2 20 // A blob object. 21 ObjectTypeBlob ObjectType = 3 22 // An annotated tag object. 23 ObjectTypeTag ObjectType = 4 24 // An object type reserved for future use. 25 ObjectTypeFuture ObjectType = 5 26 // A packfile offset delta object. This is not typically exposed. 27 ObjectTypeOfsDelta ObjectType = 6 28 // A packfile reference delta object. This is not typically exposed. 29 ObjectTypeRefDelta ObjectType = 7 30) 31 32const ( 33 objectTypeNameBlob = "blob" 34 objectTypeNameTree = "tree" 35 objectTypeNameCommit = "commit" 36 objectTypeNameTag = "tag" 37) 38 39// Object represents a Git object. 40type Object interface { 41 // ObjectType returns the object's type. 42 ObjectType() ObjectType 43 // Serialize renders the object into its raw byte representation, 44 // including the header (i.e., "type size\0"). 45 Serialize() ([]byte, error) 46} 47 48// StoredObject describes a Git object with a known hash, such as 49// one read from storage. 50type StoredObject interface { 51 Object 52 // Hash returns the object's hash. 53 Hash() Hash 54} 55 56func headerForType(ty ObjectType, body []byte) ([]byte, error) { 57 var tyStr string 58 switch ty { 59 case ObjectTypeBlob: 60 tyStr = objectTypeNameBlob 61 case ObjectTypeTree: 62 tyStr = objectTypeNameTree 63 case ObjectTypeCommit: 64 tyStr = objectTypeNameCommit 65 case ObjectTypeTag: 66 tyStr = objectTypeNameTag 67 case ObjectTypeInvalid, ObjectTypeFuture, ObjectTypeOfsDelta, ObjectTypeRefDelta: 68 return nil, fmt.Errorf("furgit: object: unsupported type %d", ty) 69 default: 70 return nil, fmt.Errorf("furgit: object: unsupported type %d", ty) 71 } 72 size := strconv.Itoa(len(body)) 73 var buf bytes.Buffer 74 buf.Grow(len(tyStr) + len(size) + 1) 75 buf.WriteString(tyStr) 76 buf.WriteByte(' ') 77 buf.WriteString(size) 78 buf.WriteByte(0) 79 return buf.Bytes(), nil 80} 81 82func parseObjectBody(ty ObjectType, id Hash, body []byte, repo *Repository) (StoredObject, error) { 83 switch ty { 84 case ObjectTypeBlob: 85 return parseBlob(id, body) 86 case ObjectTypeTree: 87 return parseTree(id, body, repo) 88 case ObjectTypeCommit: 89 return parseCommit(id, body, repo) 90 case ObjectTypeTag: 91 return parseTag(id, body, repo) 92 case ObjectTypeInvalid, ObjectTypeFuture, ObjectTypeOfsDelta, ObjectTypeRefDelta: 93 return nil, fmt.Errorf("furgit: object: unsupported type %d", ty) 94 default: 95 return nil, fmt.Errorf("furgit: object: unknown type %d", ty) 96 } 97} 98 99// ReadObject resolves an ID. 100func (repo *Repository) ReadObject(id Hash) (StoredObject, error) { 101 ty, body, err := repo.looseRead(id) 102 if err == nil { 103 obj, parseErr := parseObjectBody(ty, id, body.Bytes(), repo) 104 body.Release() 105 return obj, parseErr 106 } 107 if !errors.Is(err, ErrNotFound) { 108 return nil, err 109 } 110 ty, body, err = repo.packRead(id) 111 if errors.Is(err, ErrNotFound) { 112 return nil, ErrNotFound 113 } 114 if err != nil { 115 return nil, err 116 } 117 obj, parseErr := parseObjectBody(ty, id, body.Bytes(), repo) 118 body.Release() 119 return obj, parseErr 120} 121 122// ReadObjectTypeRaw reads the object type and raw body. 123func (repo *Repository) ReadObjectTypeRaw(id Hash) (ObjectType, []byte, error) { 124 ty, body, err := repo.looseRead(id) 125 if err == nil { 126 return ty, body.Bytes(), nil 127 } 128 if !errors.Is(err, ErrNotFound) { 129 return ObjectTypeInvalid, nil, err 130 } 131 ty, body, err = repo.packRead(id) 132 if errors.Is(err, ErrNotFound) { 133 return ObjectTypeInvalid, nil, ErrNotFound 134 } 135 if err != nil { 136 return ObjectTypeInvalid, nil, err 137 } 138 return ty, body.Bytes(), nil 139 // note to self: It always feels wrong to not call .Release in places like 140 // this but this is actually correct; we're returning the underlying buffer 141 // to the user who should not be aware of our internal buffer pooling. 142 // Releasing this buffer back to the pool would lead to a use-after-free; 143 // not releasing it as we do here, means it gets GC'ed. 144 // Copying into a newly allocated buffer is even worse as it incurs 145 // unnecessary copy overhead. 146} 147 148// ReadObjectTypeSize reports the object type and size. 149// 150// Typicall, this is more efficient than reading the full object, 151// as it avoids decompressing the entire object body. 152func (repo *Repository) ReadObjectTypeSize(id Hash) (ObjectType, int64, error) { 153 ty, size, err := repo.looseTypeSize(id) 154 if err == nil { 155 return ty, size, nil 156 } 157 if !errors.Is(err, ErrNotFound) { 158 return ObjectTypeInvalid, 0, err 159 } 160 loc, err := repo.packIndexFind(id) 161 if err != nil { 162 if errors.Is(err, ErrNotFound) { 163 return ObjectTypeInvalid, 0, ErrInvalidObject 164 } 165 return ObjectTypeInvalid, 0, err 166 } 167 return repo.packTypeSizeAtLocation(loc, nil) 168}