Git fork
at reftables-rust 1997 lines 54 kB view raw
1/* 2 * GIT - The information manager from hell 3 * 4 * Copyright (C) Linus Torvalds, 2005 5 * 6 * This handles basic git object files - packing, unpacking, 7 * creation etc. 8 */ 9 10#define USE_THE_REPOSITORY_VARIABLE 11 12#include "git-compat-util.h" 13#include "convert.h" 14#include "dir.h" 15#include "environment.h" 16#include "fsck.h" 17#include "gettext.h" 18#include "hex.h" 19#include "loose.h" 20#include "object-file-convert.h" 21#include "object-file.h" 22#include "odb.h" 23#include "oidtree.h" 24#include "pack.h" 25#include "packfile.h" 26#include "path.h" 27#include "read-cache-ll.h" 28#include "setup.h" 29#include "streaming.h" 30#include "tempfile.h" 31#include "tmp-objdir.h" 32 33/* The maximum size for an object header. */ 34#define MAX_HEADER_LEN 32 35 36static int get_conv_flags(unsigned flags) 37{ 38 if (flags & INDEX_RENORMALIZE) 39 return CONV_EOL_RENORMALIZE; 40 else if (flags & INDEX_WRITE_OBJECT) 41 return global_conv_flags_eol | CONV_WRITE_OBJECT; 42 else 43 return 0; 44} 45 46static void fill_loose_path(struct strbuf *buf, 47 const struct object_id *oid, 48 const struct git_hash_algo *algop) 49{ 50 for (size_t i = 0; i < algop->rawsz; i++) { 51 static char hex[] = "0123456789abcdef"; 52 unsigned int val = oid->hash[i]; 53 strbuf_addch(buf, hex[val >> 4]); 54 strbuf_addch(buf, hex[val & 0xf]); 55 if (!i) 56 strbuf_addch(buf, '/'); 57 } 58} 59 60const char *odb_loose_path(struct odb_source *source, 61 struct strbuf *buf, 62 const struct object_id *oid) 63{ 64 strbuf_reset(buf); 65 strbuf_addstr(buf, source->path); 66 strbuf_addch(buf, '/'); 67 fill_loose_path(buf, oid, source->odb->repo->hash_algo); 68 return buf->buf; 69} 70 71/* Returns 1 if we have successfully freshened the file, 0 otherwise. */ 72static int freshen_file(const char *fn) 73{ 74 return !utime(fn, NULL); 75} 76 77/* 78 * All of the check_and_freshen functions return 1 if the file exists and was 79 * freshened (if freshening was requested), 0 otherwise. If they return 80 * 0, you should not assume that it is safe to skip a write of the object (it 81 * either does not exist on disk, or has a stale mtime and may be subject to 82 * pruning). 83 */ 84int check_and_freshen_file(const char *fn, int freshen) 85{ 86 if (access(fn, F_OK)) 87 return 0; 88 if (freshen && !freshen_file(fn)) 89 return 0; 90 return 1; 91} 92 93static int check_and_freshen_source(struct odb_source *source, 94 const struct object_id *oid, 95 int freshen) 96{ 97 static struct strbuf path = STRBUF_INIT; 98 odb_loose_path(source, &path, oid); 99 return check_and_freshen_file(path.buf, freshen); 100} 101 102int has_loose_object(struct odb_source *source, 103 const struct object_id *oid) 104{ 105 return check_and_freshen_source(source, oid, 0); 106} 107 108int format_object_header(char *str, size_t size, enum object_type type, 109 size_t objsize) 110{ 111 const char *name = type_name(type); 112 113 if (!name) 114 BUG("could not get a type name for 'enum object_type' value %d", type); 115 116 return xsnprintf(str, size, "%s %"PRIuMAX, name, (uintmax_t)objsize) + 1; 117} 118 119int check_object_signature(struct repository *r, const struct object_id *oid, 120 void *buf, unsigned long size, 121 enum object_type type) 122{ 123 const struct git_hash_algo *algo = 124 oid->algo ? &hash_algos[oid->algo] : r->hash_algo; 125 struct object_id real_oid; 126 127 hash_object_file(algo, buf, size, type, &real_oid); 128 129 return !oideq(oid, &real_oid) ? -1 : 0; 130} 131 132int stream_object_signature(struct repository *r, const struct object_id *oid) 133{ 134 struct object_id real_oid; 135 unsigned long size; 136 enum object_type obj_type; 137 struct git_istream *st; 138 struct git_hash_ctx c; 139 char hdr[MAX_HEADER_LEN]; 140 int hdrlen; 141 142 st = open_istream(r, oid, &obj_type, &size, NULL); 143 if (!st) 144 return -1; 145 146 /* Generate the header */ 147 hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size); 148 149 /* Sha1.. */ 150 r->hash_algo->init_fn(&c); 151 git_hash_update(&c, hdr, hdrlen); 152 for (;;) { 153 char buf[1024 * 16]; 154 ssize_t readlen = read_istream(st, buf, sizeof(buf)); 155 156 if (readlen < 0) { 157 close_istream(st); 158 return -1; 159 } 160 if (!readlen) 161 break; 162 git_hash_update(&c, buf, readlen); 163 } 164 git_hash_final_oid(&real_oid, &c); 165 close_istream(st); 166 return !oideq(oid, &real_oid) ? -1 : 0; 167} 168 169/* 170 * Find "oid" as a loose object in the local repository or in an alternate. 171 * Returns 0 on success, negative on failure. 172 * 173 * The "path" out-parameter will give the path of the object we found (if any). 174 * Note that it may point to static storage and is only valid until another 175 * call to stat_loose_object(). 176 */ 177static int stat_loose_object(struct repository *r, const struct object_id *oid, 178 struct stat *st, const char **path) 179{ 180 struct odb_source *source; 181 static struct strbuf buf = STRBUF_INIT; 182 183 odb_prepare_alternates(r->objects); 184 for (source = r->objects->sources; source; source = source->next) { 185 *path = odb_loose_path(source, &buf, oid); 186 if (!lstat(*path, st)) 187 return 0; 188 } 189 190 return -1; 191} 192 193/* 194 * Like stat_loose_object(), but actually open the object and return the 195 * descriptor. See the caveats on the "path" parameter above. 196 */ 197static int open_loose_object(struct repository *r, 198 const struct object_id *oid, const char **path) 199{ 200 int fd; 201 struct odb_source *source; 202 int most_interesting_errno = ENOENT; 203 static struct strbuf buf = STRBUF_INIT; 204 205 odb_prepare_alternates(r->objects); 206 for (source = r->objects->sources; source; source = source->next) { 207 *path = odb_loose_path(source, &buf, oid); 208 fd = git_open(*path); 209 if (fd >= 0) 210 return fd; 211 212 if (most_interesting_errno == ENOENT) 213 most_interesting_errno = errno; 214 } 215 errno = most_interesting_errno; 216 return -1; 217} 218 219static int quick_has_loose(struct repository *r, 220 const struct object_id *oid) 221{ 222 struct odb_source *source; 223 224 odb_prepare_alternates(r->objects); 225 for (source = r->objects->sources; source; source = source->next) { 226 if (oidtree_contains(odb_loose_cache(source, oid), oid)) 227 return 1; 228 } 229 return 0; 230} 231 232/* 233 * Map and close the given loose object fd. The path argument is used for 234 * error reporting. 235 */ 236static void *map_fd(int fd, const char *path, unsigned long *size) 237{ 238 void *map = NULL; 239 struct stat st; 240 241 if (!fstat(fd, &st)) { 242 *size = xsize_t(st.st_size); 243 if (!*size) { 244 /* mmap() is forbidden on empty files */ 245 error(_("object file %s is empty"), path); 246 close(fd); 247 return NULL; 248 } 249 map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); 250 } 251 close(fd); 252 return map; 253} 254 255void *map_loose_object(struct repository *r, 256 const struct object_id *oid, 257 unsigned long *size) 258{ 259 const char *p; 260 int fd = open_loose_object(r, oid, &p); 261 262 if (fd < 0) 263 return NULL; 264 return map_fd(fd, p, size); 265} 266 267enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, 268 unsigned char *map, 269 unsigned long mapsize, 270 void *buffer, 271 unsigned long bufsiz) 272{ 273 int status; 274 275 /* Get the data stream */ 276 memset(stream, 0, sizeof(*stream)); 277 stream->next_in = map; 278 stream->avail_in = mapsize; 279 stream->next_out = buffer; 280 stream->avail_out = bufsiz; 281 282 git_inflate_init(stream); 283 obj_read_unlock(); 284 status = git_inflate(stream, 0); 285 obj_read_lock(); 286 if (status != Z_OK && status != Z_STREAM_END) 287 return ULHR_BAD; 288 289 /* 290 * Check if entire header is unpacked in the first iteration. 291 */ 292 if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) 293 return ULHR_OK; 294 295 /* 296 * We have a header longer than MAX_HEADER_LEN. 297 */ 298 return ULHR_TOO_LONG; 299} 300 301static void *unpack_loose_rest(git_zstream *stream, 302 void *buffer, unsigned long size, 303 const struct object_id *oid) 304{ 305 size_t bytes = strlen(buffer) + 1, n; 306 unsigned char *buf = xmallocz(size); 307 int status = Z_OK; 308 309 n = stream->total_out - bytes; 310 if (n > size) 311 n = size; 312 memcpy(buf, (char *) buffer + bytes, n); 313 bytes = n; 314 if (bytes <= size) { 315 /* 316 * The above condition must be (bytes <= size), not 317 * (bytes < size). In other words, even though we 318 * expect no more output and set avail_out to zero, 319 * the input zlib stream may have bytes that express 320 * "this concludes the stream", and we *do* want to 321 * eat that input. 322 * 323 * Otherwise we would not be able to test that we 324 * consumed all the input to reach the expected size; 325 * we also want to check that zlib tells us that all 326 * went well with status == Z_STREAM_END at the end. 327 */ 328 stream->next_out = buf + bytes; 329 stream->avail_out = size - bytes; 330 while (status == Z_OK) { 331 obj_read_unlock(); 332 status = git_inflate(stream, Z_FINISH); 333 obj_read_lock(); 334 } 335 } 336 337 if (status != Z_STREAM_END) { 338 error(_("corrupt loose object '%s'"), oid_to_hex(oid)); 339 FREE_AND_NULL(buf); 340 } else if (stream->avail_in) { 341 error(_("garbage at end of loose object '%s'"), 342 oid_to_hex(oid)); 343 FREE_AND_NULL(buf); 344 } 345 346 return buf; 347} 348 349/* 350 * We used to just use "sscanf()", but that's actually way 351 * too permissive for what we want to check. So do an anal 352 * object header parse by hand. 353 */ 354int parse_loose_header(const char *hdr, struct object_info *oi) 355{ 356 const char *type_buf = hdr; 357 size_t size; 358 int type, type_len = 0; 359 360 /* 361 * The type can be of any size but is followed by 362 * a space. 363 */ 364 for (;;) { 365 char c = *hdr++; 366 if (!c) 367 return -1; 368 if (c == ' ') 369 break; 370 type_len++; 371 } 372 373 type = type_from_string_gently(type_buf, type_len, 1); 374 if (oi->typep) 375 *oi->typep = type; 376 377 /* 378 * The length must follow immediately, and be in canonical 379 * decimal format (ie "010" is not valid). 380 */ 381 size = *hdr++ - '0'; 382 if (size > 9) 383 return -1; 384 if (size) { 385 for (;;) { 386 unsigned long c = *hdr - '0'; 387 if (c > 9) 388 break; 389 hdr++; 390 size = st_add(st_mult(size, 10), c); 391 } 392 } 393 394 if (oi->sizep) 395 *oi->sizep = cast_size_t_to_ulong(size); 396 397 /* 398 * The length must be followed by a zero byte 399 */ 400 if (*hdr) 401 return -1; 402 403 /* 404 * The format is valid, but the type may still be bogus. The 405 * Caller needs to check its oi->typep. 406 */ 407 return 0; 408} 409 410int loose_object_info(struct repository *r, 411 const struct object_id *oid, 412 struct object_info *oi, int flags) 413{ 414 int status = 0; 415 int fd; 416 unsigned long mapsize; 417 const char *path; 418 void *map; 419 git_zstream stream; 420 char hdr[MAX_HEADER_LEN]; 421 unsigned long size_scratch; 422 enum object_type type_scratch; 423 424 if (oi->delta_base_oid) 425 oidclr(oi->delta_base_oid, r->hash_algo); 426 427 /* 428 * If we don't care about type or size, then we don't 429 * need to look inside the object at all. Note that we 430 * do not optimize out the stat call, even if the 431 * caller doesn't care about the disk-size, since our 432 * return value implicitly indicates whether the 433 * object even exists. 434 */ 435 if (!oi->typep && !oi->sizep && !oi->contentp) { 436 struct stat st; 437 if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) 438 return quick_has_loose(r, oid) ? 0 : -1; 439 if (stat_loose_object(r, oid, &st, &path) < 0) 440 return -1; 441 if (oi->disk_sizep) 442 *oi->disk_sizep = st.st_size; 443 return 0; 444 } 445 446 fd = open_loose_object(r, oid, &path); 447 if (fd < 0) { 448 if (errno != ENOENT) 449 error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); 450 return -1; 451 } 452 map = map_fd(fd, path, &mapsize); 453 if (!map) 454 return -1; 455 456 if (!oi->sizep) 457 oi->sizep = &size_scratch; 458 if (!oi->typep) 459 oi->typep = &type_scratch; 460 461 if (oi->disk_sizep) 462 *oi->disk_sizep = mapsize; 463 464 switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) { 465 case ULHR_OK: 466 if (parse_loose_header(hdr, oi) < 0) 467 status = error(_("unable to parse %s header"), oid_to_hex(oid)); 468 else if (*oi->typep < 0) 469 die(_("invalid object type")); 470 471 if (!oi->contentp) 472 break; 473 *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid); 474 if (*oi->contentp) 475 goto cleanup; 476 477 status = -1; 478 break; 479 case ULHR_BAD: 480 status = error(_("unable to unpack %s header"), 481 oid_to_hex(oid)); 482 break; 483 case ULHR_TOO_LONG: 484 status = error(_("header for %s too long, exceeds %d bytes"), 485 oid_to_hex(oid), MAX_HEADER_LEN); 486 break; 487 } 488 489 if (status && (flags & OBJECT_INFO_DIE_IF_CORRUPT)) 490 die(_("loose object %s (stored in %s) is corrupt"), 491 oid_to_hex(oid), path); 492 493cleanup: 494 git_inflate_end(&stream); 495 munmap(map, mapsize); 496 if (oi->sizep == &size_scratch) 497 oi->sizep = NULL; 498 if (oi->typep == &type_scratch) 499 oi->typep = NULL; 500 oi->whence = OI_LOOSE; 501 return status; 502} 503 504static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, 505 const void *buf, unsigned long len, 506 struct object_id *oid, 507 char *hdr, int *hdrlen) 508{ 509 algo->init_fn(c); 510 git_hash_update(c, hdr, *hdrlen); 511 git_hash_update(c, buf, len); 512 git_hash_final_oid(oid, c); 513} 514 515static void write_object_file_prepare(const struct git_hash_algo *algo, 516 const void *buf, unsigned long len, 517 enum object_type type, struct object_id *oid, 518 char *hdr, int *hdrlen) 519{ 520 struct git_hash_ctx c; 521 522 /* Generate the header */ 523 *hdrlen = format_object_header(hdr, *hdrlen, type, len); 524 525 /* Sha1.. */ 526 hash_object_body(algo, &c, buf, len, oid, hdr, hdrlen); 527} 528 529#define CHECK_COLLISION_DEST_VANISHED -2 530 531static int check_collision(const char *source, const char *dest) 532{ 533 char buf_source[4096], buf_dest[4096]; 534 int fd_source = -1, fd_dest = -1; 535 int ret = 0; 536 537 fd_source = open(source, O_RDONLY); 538 if (fd_source < 0) { 539 ret = error_errno(_("unable to open %s"), source); 540 goto out; 541 } 542 543 fd_dest = open(dest, O_RDONLY); 544 if (fd_dest < 0) { 545 if (errno != ENOENT) 546 ret = error_errno(_("unable to open %s"), dest); 547 else 548 ret = CHECK_COLLISION_DEST_VANISHED; 549 goto out; 550 } 551 552 while (1) { 553 ssize_t sz_a, sz_b; 554 555 sz_a = read_in_full(fd_source, buf_source, sizeof(buf_source)); 556 if (sz_a < 0) { 557 ret = error_errno(_("unable to read %s"), source); 558 goto out; 559 } 560 561 sz_b = read_in_full(fd_dest, buf_dest, sizeof(buf_dest)); 562 if (sz_b < 0) { 563 ret = error_errno(_("unable to read %s"), dest); 564 goto out; 565 } 566 567 if (sz_a != sz_b || memcmp(buf_source, buf_dest, sz_a)) { 568 ret = error(_("files '%s' and '%s' differ in contents"), 569 source, dest); 570 goto out; 571 } 572 573 if ((size_t) sz_a < sizeof(buf_source)) 574 break; 575 } 576 577out: 578 if (fd_source > -1) 579 close(fd_source); 580 if (fd_dest > -1) 581 close(fd_dest); 582 return ret; 583} 584 585/* 586 * Move the just written object into its final resting place. 587 */ 588int finalize_object_file(struct repository *repo, 589 const char *tmpfile, const char *filename) 590{ 591 return finalize_object_file_flags(repo, tmpfile, filename, 0); 592} 593 594int finalize_object_file_flags(struct repository *repo, 595 const char *tmpfile, const char *filename, 596 enum finalize_object_file_flags flags) 597{ 598 unsigned retries = 0; 599 int ret; 600 601retry: 602 ret = 0; 603 604 if (object_creation_mode == OBJECT_CREATION_USES_RENAMES) 605 goto try_rename; 606 else if (link(tmpfile, filename)) 607 ret = errno; 608 else 609 unlink_or_warn(tmpfile); 610 611 /* 612 * Coda hack - coda doesn't like cross-directory links, 613 * so we fall back to a rename, which will mean that it 614 * won't be able to check collisions, but that's not a 615 * big deal. 616 * 617 * The same holds for FAT formatted media. 618 * 619 * When this succeeds, we just return. We have nothing 620 * left to unlink. 621 */ 622 if (ret && ret != EEXIST) { 623 struct stat st; 624 625 try_rename: 626 if (!stat(filename, &st)) 627 ret = EEXIST; 628 else if (!rename(tmpfile, filename)) 629 goto out; 630 else 631 ret = errno; 632 } 633 if (ret) { 634 if (ret != EEXIST) { 635 int saved_errno = errno; 636 unlink_or_warn(tmpfile); 637 errno = saved_errno; 638 return error_errno(_("unable to write file %s"), filename); 639 } 640 if (!(flags & FOF_SKIP_COLLISION_CHECK)) { 641 ret = check_collision(tmpfile, filename); 642 if (ret == CHECK_COLLISION_DEST_VANISHED) { 643 if (retries++ > 5) 644 return error(_("unable to write repeatedly vanishing file %s"), 645 filename); 646 goto retry; 647 } 648 else if (ret) 649 return -1; 650 } 651 unlink_or_warn(tmpfile); 652 } 653 654out: 655 if (adjust_shared_perm(repo, filename)) 656 return error(_("unable to set permission to '%s'"), filename); 657 return 0; 658} 659 660void hash_object_file(const struct git_hash_algo *algo, const void *buf, 661 unsigned long len, enum object_type type, 662 struct object_id *oid) 663{ 664 char hdr[MAX_HEADER_LEN]; 665 int hdrlen = sizeof(hdr); 666 667 write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); 668} 669 670struct transaction_packfile { 671 char *pack_tmp_name; 672 struct hashfile *f; 673 off_t offset; 674 struct pack_idx_option pack_idx_opts; 675 676 struct pack_idx_entry **written; 677 uint32_t alloc_written; 678 uint32_t nr_written; 679}; 680 681struct odb_transaction { 682 struct object_database *odb; 683 684 struct tmp_objdir *objdir; 685 struct transaction_packfile packfile; 686}; 687 688static void prepare_loose_object_transaction(struct odb_transaction *transaction) 689{ 690 /* 691 * We lazily create the temporary object directory 692 * the first time an object might be added, since 693 * callers may not know whether any objects will be 694 * added at the time they call object_file_transaction_begin. 695 */ 696 if (!transaction || transaction->objdir) 697 return; 698 699 transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); 700 if (transaction->objdir) 701 tmp_objdir_replace_primary_odb(transaction->objdir, 0); 702} 703 704static void fsync_loose_object_transaction(struct odb_transaction *transaction, 705 int fd, const char *filename) 706{ 707 /* 708 * If we have an active ODB transaction, we issue a call that 709 * cleans the filesystem page cache but avoids a hardware flush 710 * command. Later on we will issue a single hardware flush 711 * before renaming the objects to their final names as part of 712 * flush_batch_fsync. 713 */ 714 if (!transaction || !transaction->objdir || 715 git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { 716 if (errno == ENOSYS) 717 warning(_("core.fsyncMethod = batch is unsupported on this platform")); 718 fsync_or_die(fd, filename); 719 } 720} 721 722/* 723 * Cleanup after batch-mode fsync_object_files. 724 */ 725static void flush_loose_object_transaction(struct odb_transaction *transaction) 726{ 727 struct strbuf temp_path = STRBUF_INIT; 728 struct tempfile *temp; 729 730 if (!transaction->objdir) 731 return; 732 733 /* 734 * Issue a full hardware flush against a temporary file to ensure 735 * that all objects are durable before any renames occur. The code in 736 * fsync_loose_object_transaction has already issued a writeout 737 * request, but it has not flushed any writeback cache in the storage 738 * hardware or any filesystem logs. This fsync call acts as a barrier 739 * to ensure that the data in each new object file is durable before 740 * the final name is visible. 741 */ 742 strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", 743 repo_get_object_directory(transaction->odb->repo)); 744 temp = xmks_tempfile(temp_path.buf); 745 fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); 746 delete_tempfile(&temp); 747 strbuf_release(&temp_path); 748 749 /* 750 * Make the object files visible in the primary ODB after their data is 751 * fully durable. 752 */ 753 tmp_objdir_migrate(transaction->objdir); 754 transaction->objdir = NULL; 755} 756 757/* Finalize a file on disk, and close it. */ 758static void close_loose_object(struct odb_source *source, 759 int fd, const char *filename) 760{ 761 if (source->will_destroy) 762 goto out; 763 764 if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) 765 fsync_loose_object_transaction(source->odb->transaction, fd, filename); 766 else if (fsync_object_files > 0) 767 fsync_or_die(fd, filename); 768 else 769 fsync_component_or_die(FSYNC_COMPONENT_LOOSE_OBJECT, fd, 770 filename); 771 772out: 773 if (close(fd) != 0) 774 die_errno(_("error when closing loose object file")); 775} 776 777/* Size of directory component, including the ending '/' */ 778static inline int directory_size(const char *filename) 779{ 780 const char *s = strrchr(filename, '/'); 781 if (!s) 782 return 0; 783 return s - filename + 1; 784} 785 786/* 787 * This creates a temporary file in the same directory as the final 788 * 'filename' 789 * 790 * We want to avoid cross-directory filename renames, because those 791 * can have problems on various filesystems (FAT, NFS, Coda). 792 */ 793static int create_tmpfile(struct repository *repo, 794 struct strbuf *tmp, const char *filename) 795{ 796 int fd, dirlen = directory_size(filename); 797 798 strbuf_reset(tmp); 799 strbuf_add(tmp, filename, dirlen); 800 strbuf_addstr(tmp, "tmp_obj_XXXXXX"); 801 fd = git_mkstemp_mode(tmp->buf, 0444); 802 if (fd < 0 && dirlen && errno == ENOENT) { 803 /* 804 * Make sure the directory exists; note that the contents 805 * of the buffer are undefined after mkstemp returns an 806 * error, so we have to rewrite the whole buffer from 807 * scratch. 808 */ 809 strbuf_reset(tmp); 810 strbuf_add(tmp, filename, dirlen - 1); 811 if (mkdir(tmp->buf, 0777) && errno != EEXIST) 812 return -1; 813 if (adjust_shared_perm(repo, tmp->buf)) 814 return -1; 815 816 /* Try again */ 817 strbuf_addstr(tmp, "/tmp_obj_XXXXXX"); 818 fd = git_mkstemp_mode(tmp->buf, 0444); 819 } 820 return fd; 821} 822 823/** 824 * Common steps for loose object writers to start writing loose 825 * objects: 826 * 827 * - Create tmpfile for the loose object. 828 * - Setup zlib stream for compression. 829 * - Start to feed header to zlib stream. 830 * 831 * Returns a "fd", which should later be provided to 832 * end_loose_object_common(). 833 */ 834static int start_loose_object_common(struct odb_source *source, 835 struct strbuf *tmp_file, 836 const char *filename, unsigned flags, 837 git_zstream *stream, 838 unsigned char *buf, size_t buflen, 839 struct git_hash_ctx *c, struct git_hash_ctx *compat_c, 840 char *hdr, int hdrlen) 841{ 842 const struct git_hash_algo *algo = source->odb->repo->hash_algo; 843 const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; 844 int fd; 845 846 fd = create_tmpfile(source->odb->repo, tmp_file, filename); 847 if (fd < 0) { 848 if (flags & WRITE_OBJECT_SILENT) 849 return -1; 850 else if (errno == EACCES) 851 return error(_("insufficient permission for adding " 852 "an object to repository database %s"), 853 source->path); 854 else 855 return error_errno( 856 _("unable to create temporary file")); 857 } 858 859 /* Setup zlib stream for compression */ 860 git_deflate_init(stream, zlib_compression_level); 861 stream->next_out = buf; 862 stream->avail_out = buflen; 863 algo->init_fn(c); 864 if (compat && compat_c) 865 compat->init_fn(compat_c); 866 867 /* Start to feed header to zlib stream */ 868 stream->next_in = (unsigned char *)hdr; 869 stream->avail_in = hdrlen; 870 while (git_deflate(stream, 0) == Z_OK) 871 ; /* nothing */ 872 git_hash_update(c, hdr, hdrlen); 873 if (compat && compat_c) 874 git_hash_update(compat_c, hdr, hdrlen); 875 876 return fd; 877} 878 879/** 880 * Common steps for the inner git_deflate() loop for writing loose 881 * objects. Returns what git_deflate() returns. 882 */ 883static int write_loose_object_common(struct odb_source *source, 884 struct git_hash_ctx *c, struct git_hash_ctx *compat_c, 885 git_zstream *stream, const int flush, 886 unsigned char *in0, const int fd, 887 unsigned char *compressed, 888 const size_t compressed_len) 889{ 890 const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; 891 int ret; 892 893 ret = git_deflate(stream, flush ? Z_FINISH : 0); 894 git_hash_update(c, in0, stream->next_in - in0); 895 if (compat && compat_c) 896 git_hash_update(compat_c, in0, stream->next_in - in0); 897 if (write_in_full(fd, compressed, stream->next_out - compressed) < 0) 898 die_errno(_("unable to write loose object file")); 899 stream->next_out = compressed; 900 stream->avail_out = compressed_len; 901 902 return ret; 903} 904 905/** 906 * Common steps for loose object writers to end writing loose objects: 907 * 908 * - End the compression of zlib stream. 909 * - Get the calculated oid to "oid". 910 */ 911static int end_loose_object_common(struct odb_source *source, 912 struct git_hash_ctx *c, struct git_hash_ctx *compat_c, 913 git_zstream *stream, struct object_id *oid, 914 struct object_id *compat_oid) 915{ 916 const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; 917 int ret; 918 919 ret = git_deflate_end_gently(stream); 920 if (ret != Z_OK) 921 return ret; 922 git_hash_final_oid(oid, c); 923 if (compat && compat_c) 924 git_hash_final_oid(compat_oid, compat_c); 925 926 return Z_OK; 927} 928 929static int write_loose_object(struct odb_source *source, 930 const struct object_id *oid, char *hdr, 931 int hdrlen, const void *buf, unsigned long len, 932 time_t mtime, unsigned flags) 933{ 934 int fd, ret; 935 unsigned char compressed[4096]; 936 git_zstream stream; 937 struct git_hash_ctx c; 938 struct object_id parano_oid; 939 static struct strbuf tmp_file = STRBUF_INIT; 940 static struct strbuf filename = STRBUF_INIT; 941 942 if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) 943 prepare_loose_object_transaction(source->odb->transaction); 944 945 odb_loose_path(source, &filename, oid); 946 947 fd = start_loose_object_common(source, &tmp_file, filename.buf, flags, 948 &stream, compressed, sizeof(compressed), 949 &c, NULL, hdr, hdrlen); 950 if (fd < 0) 951 return -1; 952 953 /* Then the data itself.. */ 954 stream.next_in = (void *)buf; 955 stream.avail_in = len; 956 do { 957 unsigned char *in0 = stream.next_in; 958 959 ret = write_loose_object_common(source, &c, NULL, &stream, 1, in0, fd, 960 compressed, sizeof(compressed)); 961 } while (ret == Z_OK); 962 963 if (ret != Z_STREAM_END) 964 die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid), 965 ret); 966 ret = end_loose_object_common(source, &c, NULL, &stream, &parano_oid, NULL); 967 if (ret != Z_OK) 968 die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid), 969 ret); 970 if (!oideq(oid, &parano_oid)) 971 die(_("confused by unstable object source data for %s"), 972 oid_to_hex(oid)); 973 974 close_loose_object(source, fd, tmp_file.buf); 975 976 if (mtime) { 977 struct utimbuf utb; 978 utb.actime = mtime; 979 utb.modtime = mtime; 980 if (utime(tmp_file.buf, &utb) < 0 && 981 !(flags & WRITE_OBJECT_SILENT)) 982 warning_errno(_("failed utime() on %s"), tmp_file.buf); 983 } 984 985 return finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, 986 FOF_SKIP_COLLISION_CHECK); 987} 988 989static int freshen_loose_object(struct object_database *odb, 990 const struct object_id *oid) 991{ 992 odb_prepare_alternates(odb); 993 for (struct odb_source *source = odb->sources; source; source = source->next) 994 if (check_and_freshen_source(source, oid, 1)) 995 return 1; 996 return 0; 997} 998 999static int freshen_packed_object(struct object_database *odb, 1000 const struct object_id *oid) 1001{ 1002 struct pack_entry e; 1003 if (!find_pack_entry(odb->repo, oid, &e)) 1004 return 0; 1005 if (e.p->is_cruft) 1006 return 0; 1007 if (e.p->freshened) 1008 return 1; 1009 if (!freshen_file(e.p->pack_name)) 1010 return 0; 1011 e.p->freshened = 1; 1012 return 1; 1013} 1014 1015int stream_loose_object(struct odb_source *source, 1016 struct input_stream *in_stream, size_t len, 1017 struct object_id *oid) 1018{ 1019 const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; 1020 struct object_id compat_oid; 1021 int fd, ret, err = 0, flush = 0; 1022 unsigned char compressed[4096]; 1023 git_zstream stream; 1024 struct git_hash_ctx c, compat_c; 1025 struct strbuf tmp_file = STRBUF_INIT; 1026 struct strbuf filename = STRBUF_INIT; 1027 int dirlen; 1028 char hdr[MAX_HEADER_LEN]; 1029 int hdrlen; 1030 1031 if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) 1032 prepare_loose_object_transaction(source->odb->transaction); 1033 1034 /* Since oid is not determined, save tmp file to odb path. */ 1035 strbuf_addf(&filename, "%s/", source->path); 1036 hdrlen = format_object_header(hdr, sizeof(hdr), OBJ_BLOB, len); 1037 1038 /* 1039 * Common steps for write_loose_object and stream_loose_object to 1040 * start writing loose objects: 1041 * 1042 * - Create tmpfile for the loose object. 1043 * - Setup zlib stream for compression. 1044 * - Start to feed header to zlib stream. 1045 */ 1046 fd = start_loose_object_common(source, &tmp_file, filename.buf, 0, 1047 &stream, compressed, sizeof(compressed), 1048 &c, &compat_c, hdr, hdrlen); 1049 if (fd < 0) { 1050 err = -1; 1051 goto cleanup; 1052 } 1053 1054 /* Then the data itself.. */ 1055 do { 1056 unsigned char *in0 = stream.next_in; 1057 1058 if (!stream.avail_in && !in_stream->is_finished) { 1059 const void *in = in_stream->read(in_stream, &stream.avail_in); 1060 stream.next_in = (void *)in; 1061 in0 = (unsigned char *)in; 1062 /* All data has been read. */ 1063 if (in_stream->is_finished) 1064 flush = 1; 1065 } 1066 ret = write_loose_object_common(source, &c, &compat_c, &stream, flush, in0, fd, 1067 compressed, sizeof(compressed)); 1068 /* 1069 * Unlike write_loose_object(), we do not have the entire 1070 * buffer. If we get Z_BUF_ERROR due to too few input bytes, 1071 * then we'll replenish them in the next input_stream->read() 1072 * call when we loop. 1073 */ 1074 } while (ret == Z_OK || ret == Z_BUF_ERROR); 1075 1076 if (stream.total_in != len + hdrlen) 1077 die(_("write stream object %ld != %"PRIuMAX), stream.total_in, 1078 (uintmax_t)len + hdrlen); 1079 1080 /* 1081 * Common steps for write_loose_object and stream_loose_object to 1082 * end writing loose object: 1083 * 1084 * - End the compression of zlib stream. 1085 * - Get the calculated oid. 1086 */ 1087 if (ret != Z_STREAM_END) 1088 die(_("unable to stream deflate new object (%d)"), ret); 1089 ret = end_loose_object_common(source, &c, &compat_c, &stream, oid, &compat_oid); 1090 if (ret != Z_OK) 1091 die(_("deflateEnd on stream object failed (%d)"), ret); 1092 close_loose_object(source, fd, tmp_file.buf); 1093 1094 if (freshen_packed_object(source->odb, oid) || 1095 freshen_loose_object(source->odb, oid)) { 1096 unlink_or_warn(tmp_file.buf); 1097 goto cleanup; 1098 } 1099 1100 odb_loose_path(source, &filename, oid); 1101 1102 /* We finally know the object path, and create the missing dir. */ 1103 dirlen = directory_size(filename.buf); 1104 if (dirlen) { 1105 struct strbuf dir = STRBUF_INIT; 1106 strbuf_add(&dir, filename.buf, dirlen); 1107 1108 if (safe_create_dir_in_gitdir(source->odb->repo, dir.buf) && 1109 errno != EEXIST) { 1110 err = error_errno(_("unable to create directory %s"), dir.buf); 1111 strbuf_release(&dir); 1112 goto cleanup; 1113 } 1114 strbuf_release(&dir); 1115 } 1116 1117 err = finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, 1118 FOF_SKIP_COLLISION_CHECK); 1119 if (!err && compat) 1120 err = repo_add_loose_object_map(source, oid, &compat_oid); 1121cleanup: 1122 strbuf_release(&tmp_file); 1123 strbuf_release(&filename); 1124 return err; 1125} 1126 1127int write_object_file(struct odb_source *source, 1128 const void *buf, unsigned long len, 1129 enum object_type type, struct object_id *oid, 1130 struct object_id *compat_oid_in, unsigned flags) 1131{ 1132 const struct git_hash_algo *algo = source->odb->repo->hash_algo; 1133 const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; 1134 struct object_id compat_oid; 1135 char hdr[MAX_HEADER_LEN]; 1136 int hdrlen = sizeof(hdr); 1137 1138 /* Generate compat_oid */ 1139 if (compat) { 1140 if (compat_oid_in) 1141 oidcpy(&compat_oid, compat_oid_in); 1142 else if (type == OBJ_BLOB) 1143 hash_object_file(compat, buf, len, type, &compat_oid); 1144 else { 1145 struct strbuf converted = STRBUF_INIT; 1146 convert_object_file(source->odb->repo, &converted, algo, compat, 1147 buf, len, type, 0); 1148 hash_object_file(compat, converted.buf, converted.len, 1149 type, &compat_oid); 1150 strbuf_release(&converted); 1151 } 1152 } 1153 1154 /* Normally if we have it in the pack then we do not bother writing 1155 * it out into .git/objects/??/?{38} file. 1156 */ 1157 write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); 1158 if (freshen_packed_object(source->odb, oid) || 1159 freshen_loose_object(source->odb, oid)) 1160 return 0; 1161 if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) 1162 return -1; 1163 if (compat) 1164 return repo_add_loose_object_map(source, oid, &compat_oid); 1165 return 0; 1166} 1167 1168int force_object_loose(struct odb_source *source, 1169 const struct object_id *oid, time_t mtime) 1170{ 1171 const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; 1172 void *buf; 1173 unsigned long len; 1174 struct object_info oi = OBJECT_INFO_INIT; 1175 struct object_id compat_oid; 1176 enum object_type type; 1177 char hdr[MAX_HEADER_LEN]; 1178 int hdrlen; 1179 int ret; 1180 1181 for (struct odb_source *s = source->odb->sources; s; s = s->next) 1182 if (has_loose_object(s, oid)) 1183 return 0; 1184 1185 oi.typep = &type; 1186 oi.sizep = &len; 1187 oi.contentp = &buf; 1188 if (odb_read_object_info_extended(source->odb, oid, &oi, 0)) 1189 return error(_("cannot read object for %s"), oid_to_hex(oid)); 1190 if (compat) { 1191 if (repo_oid_to_algop(source->odb->repo, oid, compat, &compat_oid)) 1192 return error(_("cannot map object %s to %s"), 1193 oid_to_hex(oid), compat->name); 1194 } 1195 hdrlen = format_object_header(hdr, sizeof(hdr), type, len); 1196 ret = write_loose_object(source, oid, hdr, hdrlen, buf, len, mtime, 0); 1197 if (!ret && compat) 1198 ret = repo_add_loose_object_map(source, oid, &compat_oid); 1199 free(buf); 1200 1201 return ret; 1202} 1203 1204/* 1205 * We can't use the normal fsck_error_function() for index_mem(), 1206 * because we don't yet have a valid oid for it to report. Instead, 1207 * report the minimal fsck error here, and rely on the caller to 1208 * give more context. 1209 */ 1210static int hash_format_check_report(struct fsck_options *opts UNUSED, 1211 void *fsck_report UNUSED, 1212 enum fsck_msg_type msg_type UNUSED, 1213 enum fsck_msg_id msg_id UNUSED, 1214 const char *message) 1215{ 1216 error(_("object fails fsck: %s"), message); 1217 return 1; 1218} 1219 1220static int index_mem(struct index_state *istate, 1221 struct object_id *oid, 1222 const void *buf, size_t size, 1223 enum object_type type, 1224 const char *path, unsigned flags) 1225{ 1226 struct strbuf nbuf = STRBUF_INIT; 1227 int ret = 0; 1228 int write_object = flags & INDEX_WRITE_OBJECT; 1229 1230 if (!type) 1231 type = OBJ_BLOB; 1232 1233 /* 1234 * Convert blobs to git internal format 1235 */ 1236 if ((type == OBJ_BLOB) && path) { 1237 if (convert_to_git(istate, path, buf, size, &nbuf, 1238 get_conv_flags(flags))) { 1239 buf = nbuf.buf; 1240 size = nbuf.len; 1241 } 1242 } 1243 if (flags & INDEX_FORMAT_CHECK) { 1244 struct fsck_options opts = FSCK_OPTIONS_DEFAULT; 1245 1246 opts.strict = 1; 1247 opts.error_func = hash_format_check_report; 1248 if (fsck_buffer(null_oid(istate->repo->hash_algo), type, buf, size, &opts)) 1249 die(_("refusing to create malformed object")); 1250 fsck_finish(&opts); 1251 } 1252 1253 if (write_object) 1254 ret = odb_write_object(istate->repo->objects, buf, size, type, oid); 1255 else 1256 hash_object_file(istate->repo->hash_algo, buf, size, type, oid); 1257 1258 strbuf_release(&nbuf); 1259 return ret; 1260} 1261 1262static int index_stream_convert_blob(struct index_state *istate, 1263 struct object_id *oid, 1264 int fd, 1265 const char *path, 1266 unsigned flags) 1267{ 1268 int ret = 0; 1269 const int write_object = flags & INDEX_WRITE_OBJECT; 1270 struct strbuf sbuf = STRBUF_INIT; 1271 1272 assert(path); 1273 ASSERT(would_convert_to_git_filter_fd(istate, path)); 1274 1275 convert_to_git_filter_fd(istate, path, fd, &sbuf, 1276 get_conv_flags(flags)); 1277 1278 if (write_object) 1279 ret = odb_write_object(istate->repo->objects, sbuf.buf, sbuf.len, OBJ_BLOB, 1280 oid); 1281 else 1282 hash_object_file(istate->repo->hash_algo, sbuf.buf, sbuf.len, OBJ_BLOB, 1283 oid); 1284 strbuf_release(&sbuf); 1285 return ret; 1286} 1287 1288static int index_pipe(struct index_state *istate, struct object_id *oid, 1289 int fd, enum object_type type, 1290 const char *path, unsigned flags) 1291{ 1292 struct strbuf sbuf = STRBUF_INIT; 1293 int ret; 1294 1295 if (strbuf_read(&sbuf, fd, 4096) >= 0) 1296 ret = index_mem(istate, oid, sbuf.buf, sbuf.len, type, path, flags); 1297 else 1298 ret = -1; 1299 strbuf_release(&sbuf); 1300 return ret; 1301} 1302 1303#define SMALL_FILE_SIZE (32*1024) 1304 1305static int index_core(struct index_state *istate, 1306 struct object_id *oid, int fd, size_t size, 1307 enum object_type type, const char *path, 1308 unsigned flags) 1309{ 1310 int ret; 1311 1312 if (!size) { 1313 ret = index_mem(istate, oid, "", size, type, path, flags); 1314 } else if (size <= SMALL_FILE_SIZE) { 1315 char *buf = xmalloc(size); 1316 ssize_t read_result = read_in_full(fd, buf, size); 1317 if (read_result < 0) 1318 ret = error_errno(_("read error while indexing %s"), 1319 path ? path : "<unknown>"); 1320 else if ((size_t) read_result != size) 1321 ret = error(_("short read while indexing %s"), 1322 path ? path : "<unknown>"); 1323 else 1324 ret = index_mem(istate, oid, buf, size, type, path, flags); 1325 free(buf); 1326 } else { 1327 void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); 1328 ret = index_mem(istate, oid, buf, size, type, path, flags); 1329 munmap(buf, size); 1330 } 1331 return ret; 1332} 1333 1334static int already_written(struct odb_transaction *transaction, 1335 struct object_id *oid) 1336{ 1337 /* The object may already exist in the repository */ 1338 if (odb_has_object(transaction->odb, oid, 1339 HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) 1340 return 1; 1341 1342 /* Might want to keep the list sorted */ 1343 for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) 1344 if (oideq(&transaction->packfile.written[i]->oid, oid)) 1345 return 1; 1346 1347 /* This is a new object we need to keep */ 1348 return 0; 1349} 1350 1351/* Lazily create backing packfile for the state */ 1352static void prepare_packfile_transaction(struct odb_transaction *transaction, 1353 unsigned flags) 1354{ 1355 struct transaction_packfile *state = &transaction->packfile; 1356 if (!(flags & INDEX_WRITE_OBJECT) || state->f) 1357 return; 1358 1359 state->f = create_tmp_packfile(transaction->odb->repo, 1360 &state->pack_tmp_name); 1361 reset_pack_idx_option(&state->pack_idx_opts); 1362 1363 /* Pretend we are going to write only one object */ 1364 state->offset = write_pack_header(state->f, 1); 1365 if (!state->offset) 1366 die_errno("unable to write pack header"); 1367} 1368 1369/* 1370 * Read the contents from fd for size bytes, streaming it to the 1371 * packfile in state while updating the hash in ctx. Signal a failure 1372 * by returning a negative value when the resulting pack would exceed 1373 * the pack size limit and this is not the first object in the pack, 1374 * so that the caller can discard what we wrote from the current pack 1375 * by truncating it and opening a new one. The caller will then call 1376 * us again after rewinding the input fd. 1377 * 1378 * The already_hashed_to pointer is kept untouched by the caller to 1379 * make sure we do not hash the same byte when we are called 1380 * again. This way, the caller does not have to checkpoint its hash 1381 * status before calling us just in case we ask it to call us again 1382 * with a new pack. 1383 */ 1384static int stream_blob_to_pack(struct transaction_packfile *state, 1385 struct git_hash_ctx *ctx, off_t *already_hashed_to, 1386 int fd, size_t size, const char *path, 1387 unsigned flags) 1388{ 1389 git_zstream s; 1390 unsigned char ibuf[16384]; 1391 unsigned char obuf[16384]; 1392 unsigned hdrlen; 1393 int status = Z_OK; 1394 int write_object = (flags & INDEX_WRITE_OBJECT); 1395 off_t offset = 0; 1396 1397 git_deflate_init(&s, pack_compression_level); 1398 1399 hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); 1400 s.next_out = obuf + hdrlen; 1401 s.avail_out = sizeof(obuf) - hdrlen; 1402 1403 while (status != Z_STREAM_END) { 1404 if (size && !s.avail_in) { 1405 size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); 1406 ssize_t read_result = read_in_full(fd, ibuf, rsize); 1407 if (read_result < 0) 1408 die_errno("failed to read from '%s'", path); 1409 if ((size_t)read_result != rsize) 1410 die("failed to read %u bytes from '%s'", 1411 (unsigned)rsize, path); 1412 offset += rsize; 1413 if (*already_hashed_to < offset) { 1414 size_t hsize = offset - *already_hashed_to; 1415 if (rsize < hsize) 1416 hsize = rsize; 1417 if (hsize) 1418 git_hash_update(ctx, ibuf, hsize); 1419 *already_hashed_to = offset; 1420 } 1421 s.next_in = ibuf; 1422 s.avail_in = rsize; 1423 size -= rsize; 1424 } 1425 1426 status = git_deflate(&s, size ? 0 : Z_FINISH); 1427 1428 if (!s.avail_out || status == Z_STREAM_END) { 1429 if (write_object) { 1430 size_t written = s.next_out - obuf; 1431 1432 /* would we bust the size limit? */ 1433 if (state->nr_written && 1434 pack_size_limit_cfg && 1435 pack_size_limit_cfg < state->offset + written) { 1436 git_deflate_abort(&s); 1437 return -1; 1438 } 1439 1440 hashwrite(state->f, obuf, written); 1441 state->offset += written; 1442 } 1443 s.next_out = obuf; 1444 s.avail_out = sizeof(obuf); 1445 } 1446 1447 switch (status) { 1448 case Z_OK: 1449 case Z_BUF_ERROR: 1450 case Z_STREAM_END: 1451 continue; 1452 default: 1453 die("unexpected deflate failure: %d", status); 1454 } 1455 } 1456 git_deflate_end(&s); 1457 return 0; 1458} 1459 1460static void flush_packfile_transaction(struct odb_transaction *transaction) 1461{ 1462 struct transaction_packfile *state = &transaction->packfile; 1463 struct repository *repo = transaction->odb->repo; 1464 unsigned char hash[GIT_MAX_RAWSZ]; 1465 struct strbuf packname = STRBUF_INIT; 1466 char *idx_tmp_name = NULL; 1467 1468 if (!state->f) 1469 return; 1470 1471 if (state->nr_written == 0) { 1472 close(state->f->fd); 1473 free_hashfile(state->f); 1474 unlink(state->pack_tmp_name); 1475 goto clear_exit; 1476 } else if (state->nr_written == 1) { 1477 finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 1478 CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); 1479 } else { 1480 int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); 1481 fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, 1482 state->nr_written, hash, 1483 state->offset); 1484 close(fd); 1485 } 1486 1487 strbuf_addf(&packname, "%s/pack/pack-%s.", 1488 repo_get_object_directory(transaction->odb->repo), 1489 hash_to_hex_algop(hash, repo->hash_algo)); 1490 1491 stage_tmp_packfiles(repo, &packname, state->pack_tmp_name, 1492 state->written, state->nr_written, NULL, 1493 &state->pack_idx_opts, hash, &idx_tmp_name); 1494 rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name); 1495 1496 for (uint32_t i = 0; i < state->nr_written; i++) 1497 free(state->written[i]); 1498 1499clear_exit: 1500 free(idx_tmp_name); 1501 free(state->pack_tmp_name); 1502 free(state->written); 1503 memset(state, 0, sizeof(*state)); 1504 1505 strbuf_release(&packname); 1506 /* Make objects we just wrote available to ourselves */ 1507 odb_reprepare(repo->objects); 1508} 1509 1510/* 1511 * This writes the specified object to a packfile. Objects written here 1512 * during the same transaction are written to the same packfile. The 1513 * packfile is not flushed until the transaction is flushed. The caller 1514 * is expected to ensure a valid transaction is setup for objects to be 1515 * recorded to. 1516 * 1517 * This also bypasses the usual "convert-to-git" dance, and that is on 1518 * purpose. We could write a streaming version of the converting 1519 * functions and insert that before feeding the data to fast-import 1520 * (or equivalent in-core API described above). However, that is 1521 * somewhat complicated, as we do not know the size of the filter 1522 * result, which we need to know beforehand when writing a git object. 1523 * Since the primary motivation for trying to stream from the working 1524 * tree file and to avoid mmaping it in core is to deal with large 1525 * binary blobs, they generally do not want to get any conversion, and 1526 * callers should avoid this code path when filters are requested. 1527 */ 1528static int index_blob_packfile_transaction(struct odb_transaction *transaction, 1529 struct object_id *result_oid, int fd, 1530 size_t size, const char *path, 1531 unsigned flags) 1532{ 1533 struct transaction_packfile *state = &transaction->packfile; 1534 off_t seekback, already_hashed_to; 1535 struct git_hash_ctx ctx; 1536 unsigned char obuf[16384]; 1537 unsigned header_len; 1538 struct hashfile_checkpoint checkpoint; 1539 struct pack_idx_entry *idx = NULL; 1540 1541 seekback = lseek(fd, 0, SEEK_CUR); 1542 if (seekback == (off_t)-1) 1543 return error("cannot find the current offset"); 1544 1545 header_len = format_object_header((char *)obuf, sizeof(obuf), 1546 OBJ_BLOB, size); 1547 transaction->odb->repo->hash_algo->init_fn(&ctx); 1548 git_hash_update(&ctx, obuf, header_len); 1549 1550 /* Note: idx is non-NULL when we are writing */ 1551 if ((flags & INDEX_WRITE_OBJECT) != 0) { 1552 CALLOC_ARRAY(idx, 1); 1553 1554 prepare_packfile_transaction(transaction, flags); 1555 hashfile_checkpoint_init(state->f, &checkpoint); 1556 } 1557 1558 already_hashed_to = 0; 1559 1560 while (1) { 1561 prepare_packfile_transaction(transaction, flags); 1562 if (idx) { 1563 hashfile_checkpoint(state->f, &checkpoint); 1564 idx->offset = state->offset; 1565 crc32_begin(state->f); 1566 } 1567 if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, 1568 fd, size, path, flags)) 1569 break; 1570 /* 1571 * Writing this object to the current pack will make 1572 * it too big; we need to truncate it, start a new 1573 * pack, and write into it. 1574 */ 1575 if (!idx) 1576 BUG("should not happen"); 1577 hashfile_truncate(state->f, &checkpoint); 1578 state->offset = checkpoint.offset; 1579 flush_packfile_transaction(transaction); 1580 if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) 1581 return error("cannot seek back"); 1582 } 1583 git_hash_final_oid(result_oid, &ctx); 1584 if (!idx) 1585 return 0; 1586 1587 idx->crc32 = crc32_end(state->f); 1588 if (already_written(transaction, result_oid)) { 1589 hashfile_truncate(state->f, &checkpoint); 1590 state->offset = checkpoint.offset; 1591 free(idx); 1592 } else { 1593 oidcpy(&idx->oid, result_oid); 1594 ALLOC_GROW(state->written, 1595 state->nr_written + 1, 1596 state->alloc_written); 1597 state->written[state->nr_written++] = idx; 1598 } 1599 return 0; 1600} 1601 1602int index_fd(struct index_state *istate, struct object_id *oid, 1603 int fd, struct stat *st, 1604 enum object_type type, const char *path, unsigned flags) 1605{ 1606 int ret; 1607 1608 /* 1609 * Call xsize_t() only when needed to avoid potentially unnecessary 1610 * die() for large files. 1611 */ 1612 if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) { 1613 ret = index_stream_convert_blob(istate, oid, fd, path, flags); 1614 } else if (!S_ISREG(st->st_mode)) { 1615 ret = index_pipe(istate, oid, fd, type, path, flags); 1616 } else if ((st->st_size >= 0 && 1617 (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || 1618 type != OBJ_BLOB || 1619 (path && would_convert_to_git(istate, path))) { 1620 ret = index_core(istate, oid, fd, xsize_t(st->st_size), 1621 type, path, flags); 1622 } else { 1623 struct odb_transaction *transaction; 1624 1625 transaction = odb_transaction_begin(the_repository->objects); 1626 ret = index_blob_packfile_transaction(the_repository->objects->transaction, 1627 oid, fd, 1628 xsize_t(st->st_size), 1629 path, flags); 1630 odb_transaction_commit(transaction); 1631 } 1632 1633 close(fd); 1634 return ret; 1635} 1636 1637int index_path(struct index_state *istate, struct object_id *oid, 1638 const char *path, struct stat *st, unsigned flags) 1639{ 1640 int fd; 1641 struct strbuf sb = STRBUF_INIT; 1642 int rc = 0; 1643 1644 switch (st->st_mode & S_IFMT) { 1645 case S_IFREG: 1646 fd = open(path, O_RDONLY); 1647 if (fd < 0) 1648 return error_errno("open(\"%s\")", path); 1649 if (index_fd(istate, oid, fd, st, OBJ_BLOB, path, flags) < 0) 1650 return error(_("%s: failed to insert into database"), 1651 path); 1652 break; 1653 case S_IFLNK: 1654 if (strbuf_readlink(&sb, path, st->st_size)) 1655 return error_errno("readlink(\"%s\")", path); 1656 if (!(flags & INDEX_WRITE_OBJECT)) 1657 hash_object_file(istate->repo->hash_algo, sb.buf, sb.len, 1658 OBJ_BLOB, oid); 1659 else if (odb_write_object(istate->repo->objects, sb.buf, sb.len, OBJ_BLOB, oid)) 1660 rc = error(_("%s: failed to insert into database"), path); 1661 strbuf_release(&sb); 1662 break; 1663 case S_IFDIR: 1664 return repo_resolve_gitlink_ref(istate->repo, path, "HEAD", oid); 1665 default: 1666 return error(_("%s: unsupported file type"), path); 1667 } 1668 return rc; 1669} 1670 1671int read_pack_header(int fd, struct pack_header *header) 1672{ 1673 if (read_in_full(fd, header, sizeof(*header)) != sizeof(*header)) 1674 /* "eof before pack header was fully read" */ 1675 return PH_ERROR_EOF; 1676 1677 if (header->hdr_signature != htonl(PACK_SIGNATURE)) 1678 /* "protocol error (pack signature mismatch detected)" */ 1679 return PH_ERROR_PACK_SIGNATURE; 1680 if (!pack_version_ok(header->hdr_version)) 1681 /* "protocol error (pack version unsupported)" */ 1682 return PH_ERROR_PROTOCOL; 1683 return 0; 1684} 1685 1686static int for_each_file_in_obj_subdir(unsigned int subdir_nr, 1687 struct strbuf *path, 1688 const struct git_hash_algo *algop, 1689 each_loose_object_fn obj_cb, 1690 each_loose_cruft_fn cruft_cb, 1691 each_loose_subdir_fn subdir_cb, 1692 void *data) 1693{ 1694 size_t origlen, baselen; 1695 DIR *dir; 1696 struct dirent *de; 1697 int r = 0; 1698 struct object_id oid; 1699 1700 if (subdir_nr > 0xff) 1701 BUG("invalid loose object subdirectory: %x", subdir_nr); 1702 1703 origlen = path->len; 1704 strbuf_complete(path, '/'); 1705 strbuf_addf(path, "%02x", subdir_nr); 1706 1707 dir = opendir(path->buf); 1708 if (!dir) { 1709 if (errno != ENOENT) 1710 r = error_errno(_("unable to open %s"), path->buf); 1711 strbuf_setlen(path, origlen); 1712 return r; 1713 } 1714 1715 oid.hash[0] = subdir_nr; 1716 strbuf_addch(path, '/'); 1717 baselen = path->len; 1718 1719 while ((de = readdir_skip_dot_and_dotdot(dir))) { 1720 size_t namelen; 1721 1722 namelen = strlen(de->d_name); 1723 strbuf_setlen(path, baselen); 1724 strbuf_add(path, de->d_name, namelen); 1725 if (namelen == algop->hexsz - 2 && 1726 !hex_to_bytes(oid.hash + 1, de->d_name, 1727 algop->rawsz - 1)) { 1728 oid_set_algo(&oid, algop); 1729 memset(oid.hash + algop->rawsz, 0, 1730 GIT_MAX_RAWSZ - algop->rawsz); 1731 if (obj_cb) { 1732 r = obj_cb(&oid, path->buf, data); 1733 if (r) 1734 break; 1735 } 1736 continue; 1737 } 1738 1739 if (cruft_cb) { 1740 r = cruft_cb(de->d_name, path->buf, data); 1741 if (r) 1742 break; 1743 } 1744 } 1745 closedir(dir); 1746 1747 strbuf_setlen(path, baselen - 1); 1748 if (!r && subdir_cb) 1749 r = subdir_cb(subdir_nr, path->buf, data); 1750 1751 strbuf_setlen(path, origlen); 1752 1753 return r; 1754} 1755 1756int for_each_loose_file_in_source(struct odb_source *source, 1757 each_loose_object_fn obj_cb, 1758 each_loose_cruft_fn cruft_cb, 1759 each_loose_subdir_fn subdir_cb, 1760 void *data) 1761{ 1762 struct strbuf buf = STRBUF_INIT; 1763 int r; 1764 1765 strbuf_addstr(&buf, source->path); 1766 for (int i = 0; i < 256; i++) { 1767 r = for_each_file_in_obj_subdir(i, &buf, source->odb->repo->hash_algo, 1768 obj_cb, cruft_cb, subdir_cb, data); 1769 if (r) 1770 break; 1771 } 1772 1773 strbuf_release(&buf); 1774 return r; 1775} 1776 1777int for_each_loose_object(struct object_database *odb, 1778 each_loose_object_fn cb, void *data, 1779 enum for_each_object_flags flags) 1780{ 1781 struct odb_source *source; 1782 1783 odb_prepare_alternates(odb); 1784 for (source = odb->sources; source; source = source->next) { 1785 int r = for_each_loose_file_in_source(source, cb, NULL, 1786 NULL, data); 1787 if (r) 1788 return r; 1789 1790 if (flags & FOR_EACH_OBJECT_LOCAL_ONLY) 1791 break; 1792 } 1793 1794 return 0; 1795} 1796 1797static int append_loose_object(const struct object_id *oid, 1798 const char *path UNUSED, 1799 void *data) 1800{ 1801 oidtree_insert(data, oid); 1802 return 0; 1803} 1804 1805struct oidtree *odb_loose_cache(struct odb_source *source, 1806 const struct object_id *oid) 1807{ 1808 int subdir_nr = oid->hash[0]; 1809 struct strbuf buf = STRBUF_INIT; 1810 size_t word_bits = bitsizeof(source->loose_objects_subdir_seen[0]); 1811 size_t word_index = subdir_nr / word_bits; 1812 size_t mask = (size_t)1u << (subdir_nr % word_bits); 1813 uint32_t *bitmap; 1814 1815 if (subdir_nr < 0 || 1816 (size_t) subdir_nr >= bitsizeof(source->loose_objects_subdir_seen)) 1817 BUG("subdir_nr out of range"); 1818 1819 bitmap = &source->loose_objects_subdir_seen[word_index]; 1820 if (*bitmap & mask) 1821 return source->loose_objects_cache; 1822 if (!source->loose_objects_cache) { 1823 ALLOC_ARRAY(source->loose_objects_cache, 1); 1824 oidtree_init(source->loose_objects_cache); 1825 } 1826 strbuf_addstr(&buf, source->path); 1827 for_each_file_in_obj_subdir(subdir_nr, &buf, 1828 source->odb->repo->hash_algo, 1829 append_loose_object, 1830 NULL, NULL, 1831 source->loose_objects_cache); 1832 *bitmap |= mask; 1833 strbuf_release(&buf); 1834 return source->loose_objects_cache; 1835} 1836 1837void odb_clear_loose_cache(struct odb_source *source) 1838{ 1839 oidtree_clear(source->loose_objects_cache); 1840 FREE_AND_NULL(source->loose_objects_cache); 1841 memset(&source->loose_objects_subdir_seen, 0, 1842 sizeof(source->loose_objects_subdir_seen)); 1843} 1844 1845static int check_stream_oid(git_zstream *stream, 1846 const char *hdr, 1847 unsigned long size, 1848 const char *path, 1849 const struct object_id *expected_oid, 1850 const struct git_hash_algo *algop) 1851{ 1852 struct git_hash_ctx c; 1853 struct object_id real_oid; 1854 unsigned char buf[4096]; 1855 unsigned long total_read; 1856 int status = Z_OK; 1857 1858 algop->init_fn(&c); 1859 git_hash_update(&c, hdr, stream->total_out); 1860 1861 /* 1862 * We already read some bytes into hdr, but the ones up to the NUL 1863 * do not count against the object's content size. 1864 */ 1865 total_read = stream->total_out - strlen(hdr) - 1; 1866 1867 /* 1868 * This size comparison must be "<=" to read the final zlib packets; 1869 * see the comment in unpack_loose_rest for details. 1870 */ 1871 while (total_read <= size && 1872 (status == Z_OK || 1873 (status == Z_BUF_ERROR && !stream->avail_out))) { 1874 stream->next_out = buf; 1875 stream->avail_out = sizeof(buf); 1876 if (size - total_read < stream->avail_out) 1877 stream->avail_out = size - total_read; 1878 status = git_inflate(stream, Z_FINISH); 1879 git_hash_update(&c, buf, stream->next_out - buf); 1880 total_read += stream->next_out - buf; 1881 } 1882 1883 if (status != Z_STREAM_END) { 1884 error(_("corrupt loose object '%s'"), oid_to_hex(expected_oid)); 1885 return -1; 1886 } 1887 if (stream->avail_in) { 1888 error(_("garbage at end of loose object '%s'"), 1889 oid_to_hex(expected_oid)); 1890 return -1; 1891 } 1892 1893 git_hash_final_oid(&real_oid, &c); 1894 if (!oideq(expected_oid, &real_oid)) { 1895 error(_("hash mismatch for %s (expected %s)"), path, 1896 oid_to_hex(expected_oid)); 1897 return -1; 1898 } 1899 1900 return 0; 1901} 1902 1903int read_loose_object(struct repository *repo, 1904 const char *path, 1905 const struct object_id *expected_oid, 1906 struct object_id *real_oid, 1907 void **contents, 1908 struct object_info *oi) 1909{ 1910 int ret = -1; 1911 int fd; 1912 void *map = NULL; 1913 unsigned long mapsize; 1914 git_zstream stream; 1915 char hdr[MAX_HEADER_LEN]; 1916 unsigned long *size = oi->sizep; 1917 1918 fd = git_open(path); 1919 if (fd >= 0) 1920 map = map_fd(fd, path, &mapsize); 1921 if (!map) { 1922 error_errno(_("unable to mmap %s"), path); 1923 goto out; 1924 } 1925 1926 if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr)) != ULHR_OK) { 1927 error(_("unable to unpack header of %s"), path); 1928 goto out_inflate; 1929 } 1930 1931 if (parse_loose_header(hdr, oi) < 0) { 1932 error(_("unable to parse header of %s"), path); 1933 goto out_inflate; 1934 } 1935 1936 if (*oi->typep < 0) { 1937 error(_("unable to parse type from header '%s' of %s"), 1938 hdr, path); 1939 goto out_inflate; 1940 } 1941 1942 if (*oi->typep == OBJ_BLOB && 1943 *size > repo_settings_get_big_file_threshold(repo)) { 1944 if (check_stream_oid(&stream, hdr, *size, path, expected_oid, 1945 repo->hash_algo) < 0) 1946 goto out_inflate; 1947 } else { 1948 *contents = unpack_loose_rest(&stream, hdr, *size, expected_oid); 1949 if (!*contents) { 1950 error(_("unable to unpack contents of %s"), path); 1951 goto out_inflate; 1952 } 1953 hash_object_file(repo->hash_algo, 1954 *contents, *size, 1955 *oi->typep, real_oid); 1956 if (!oideq(expected_oid, real_oid)) 1957 goto out_inflate; 1958 } 1959 1960 ret = 0; /* everything checks out */ 1961 1962out_inflate: 1963 git_inflate_end(&stream); 1964out: 1965 if (map) 1966 munmap(map, mapsize); 1967 return ret; 1968} 1969 1970struct odb_transaction *object_file_transaction_begin(struct odb_source *source) 1971{ 1972 struct object_database *odb = source->odb; 1973 1974 if (odb->transaction) 1975 return NULL; 1976 1977 CALLOC_ARRAY(odb->transaction, 1); 1978 odb->transaction->odb = odb; 1979 1980 return odb->transaction; 1981} 1982 1983void object_file_transaction_commit(struct odb_transaction *transaction) 1984{ 1985 if (!transaction) 1986 return; 1987 1988 /* 1989 * Ensure the transaction ending matches the pending transaction. 1990 */ 1991 ASSERT(transaction == transaction->odb->transaction); 1992 1993 flush_loose_object_transaction(transaction); 1994 flush_packfile_transaction(transaction); 1995 transaction->odb->transaction = NULL; 1996 free(transaction); 1997}