Git fork
at reftables-rust 2361 lines 62 kB view raw
1#define DISABLE_SIGN_COMPARE_WARNINGS 2 3#include "git-compat-util.h" 4#include "environment.h" 5#include "gettext.h" 6#include "hex.h" 7#include "list.h" 8#include "pack.h" 9#include "repository.h" 10#include "dir.h" 11#include "mergesort.h" 12#include "packfile.h" 13#include "delta.h" 14#include "hash-lookup.h" 15#include "commit.h" 16#include "object.h" 17#include "tag.h" 18#include "trace.h" 19#include "tree-walk.h" 20#include "tree.h" 21#include "object-file.h" 22#include "odb.h" 23#include "midx.h" 24#include "commit-graph.h" 25#include "pack-revindex.h" 26#include "promisor-remote.h" 27#include "pack-mtimes.h" 28 29char *odb_pack_name(struct repository *r, struct strbuf *buf, 30 const unsigned char *hash, const char *ext) 31{ 32 strbuf_reset(buf); 33 strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(r), 34 hash_to_hex_algop(hash, r->hash_algo), ext); 35 return buf->buf; 36} 37 38static unsigned int pack_used_ctr; 39static unsigned int pack_mmap_calls; 40static unsigned int peak_pack_open_windows; 41static unsigned int pack_open_windows; 42static unsigned int pack_open_fds; 43static unsigned int pack_max_fds; 44static size_t peak_pack_mapped; 45static size_t pack_mapped; 46 47#define SZ_FMT PRIuMAX 48static inline uintmax_t sz_fmt(size_t s) { return s; } 49 50void pack_report(struct repository *repo) 51{ 52 fprintf(stderr, 53 "pack_report: getpagesize() = %10" SZ_FMT "\n" 54 "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n" 55 "pack_report: core.packedGitLimit = %10" SZ_FMT "\n", 56 sz_fmt(getpagesize()), 57 sz_fmt(repo->settings.packed_git_window_size), 58 sz_fmt(repo->settings.packed_git_limit)); 59 fprintf(stderr, 60 "pack_report: pack_used_ctr = %10u\n" 61 "pack_report: pack_mmap_calls = %10u\n" 62 "pack_report: pack_open_windows = %10u / %10u\n" 63 "pack_report: pack_mapped = " 64 "%10" SZ_FMT " / %10" SZ_FMT "\n", 65 pack_used_ctr, 66 pack_mmap_calls, 67 pack_open_windows, peak_pack_open_windows, 68 sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped)); 69} 70 71/* 72 * Open and mmap the index file at path, perform a couple of 73 * consistency checks, then record its information to p. Return 0 on 74 * success. 75 */ 76static int check_packed_git_idx(const char *path, struct packed_git *p) 77{ 78 void *idx_map; 79 size_t idx_size; 80 int fd = git_open(path), ret; 81 struct stat st; 82 const unsigned int hashsz = p->repo->hash_algo->rawsz; 83 84 if (fd < 0) 85 return -1; 86 if (fstat(fd, &st)) { 87 close(fd); 88 return -1; 89 } 90 idx_size = xsize_t(st.st_size); 91 if (idx_size < 4 * 256 + hashsz + hashsz) { 92 close(fd); 93 return error("index file %s is too small", path); 94 } 95 idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0); 96 close(fd); 97 98 ret = load_idx(path, hashsz, idx_map, idx_size, p); 99 100 if (ret) 101 munmap(idx_map, idx_size); 102 103 return ret; 104} 105 106int load_idx(const char *path, const unsigned int hashsz, void *idx_map, 107 size_t idx_size, struct packed_git *p) 108{ 109 struct pack_idx_header *hdr = idx_map; 110 uint32_t version, nr, i, *index; 111 112 if (idx_size < 4 * 256 + hashsz + hashsz) 113 return error("index file %s is too small", path); 114 if (!idx_map) 115 return error("empty data"); 116 117 if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) { 118 version = ntohl(hdr->idx_version); 119 if (version < 2 || version > 2) 120 return error("index file %s is version %"PRIu32 121 " and is not supported by this binary" 122 " (try upgrading GIT to a newer version)", 123 path, version); 124 } else 125 version = 1; 126 127 nr = 0; 128 index = idx_map; 129 if (version > 1) 130 index += 2; /* skip index header */ 131 for (i = 0; i < 256; i++) { 132 uint32_t n = ntohl(index[i]); 133 if (n < nr) 134 return error("non-monotonic index %s", path); 135 nr = n; 136 } 137 138 if (version == 1) { 139 /* 140 * Total size: 141 * - 256 index entries 4 bytes each 142 * - 24-byte entries * nr (object ID + 4-byte offset) 143 * - hash of the packfile 144 * - file checksum 145 */ 146 if (idx_size != st_add(4 * 256 + hashsz + hashsz, st_mult(nr, hashsz + 4))) 147 return error("wrong index v1 file size in %s", path); 148 } else if (version == 2) { 149 /* 150 * Minimum size: 151 * - 8 bytes of header 152 * - 256 index entries 4 bytes each 153 * - object ID entry * nr 154 * - 4-byte crc entry * nr 155 * - 4-byte offset entry * nr 156 * - hash of the packfile 157 * - file checksum 158 * And after the 4-byte offset table might be a 159 * variable sized table containing 8-byte entries 160 * for offsets larger than 2^31. 161 */ 162 size_t min_size = st_add(8 + 4*256 + hashsz + hashsz, st_mult(nr, hashsz + 4 + 4)); 163 size_t max_size = min_size; 164 if (nr) 165 max_size = st_add(max_size, st_mult(nr - 1, 8)); 166 if (idx_size < min_size || idx_size > max_size) 167 return error("wrong index v2 file size in %s", path); 168 if (idx_size != min_size && 169 /* 170 * make sure we can deal with large pack offsets. 171 * 31-bit signed offset won't be enough, neither 172 * 32-bit unsigned one will be. 173 */ 174 (sizeof(off_t) <= 4)) 175 return error("pack too large for current definition of off_t in %s", path); 176 p->crc_offset = st_add(8 + 4 * 256, st_mult(nr, hashsz)); 177 } 178 179 p->index_version = version; 180 p->index_data = idx_map; 181 p->index_size = idx_size; 182 p->num_objects = nr; 183 return 0; 184} 185 186int open_pack_index(struct packed_git *p) 187{ 188 char *idx_name; 189 size_t len; 190 int ret; 191 192 if (p->index_data) 193 return 0; 194 195 if (!strip_suffix(p->pack_name, ".pack", &len)) 196 BUG("pack_name does not end in .pack"); 197 idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name); 198 ret = check_packed_git_idx(idx_name, p); 199 free(idx_name); 200 return ret; 201} 202 203uint32_t get_pack_fanout(struct packed_git *p, uint32_t value) 204{ 205 const uint32_t *level1_ofs = p->index_data; 206 207 if (!level1_ofs) { 208 if (open_pack_index(p)) 209 return 0; 210 level1_ofs = p->index_data; 211 } 212 213 if (p->index_version > 1) { 214 level1_ofs += 2; 215 } 216 217 return ntohl(level1_ofs[value]); 218} 219 220static struct packed_git *alloc_packed_git(struct repository *r, int extra) 221{ 222 struct packed_git *p = xmalloc(st_add(sizeof(*p), extra)); 223 memset(p, 0, sizeof(*p)); 224 p->pack_fd = -1; 225 p->repo = r; 226 return p; 227} 228 229static char *pack_path_from_idx(const char *idx_path) 230{ 231 size_t len; 232 if (!strip_suffix(idx_path, ".idx", &len)) 233 BUG("idx path does not end in .idx: %s", idx_path); 234 return xstrfmt("%.*s.pack", (int)len, idx_path); 235} 236 237struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1, 238 const char *idx_path) 239{ 240 char *path = pack_path_from_idx(idx_path); 241 size_t alloc = st_add(strlen(path), 1); 242 struct packed_git *p = alloc_packed_git(r, alloc); 243 244 memcpy(p->pack_name, path, alloc); /* includes NUL */ 245 free(path); 246 hashcpy(p->hash, sha1, p->repo->hash_algo); 247 if (check_packed_git_idx(idx_path, p)) { 248 free(p); 249 return NULL; 250 } 251 252 return p; 253} 254 255static void scan_windows(struct packed_git *p, 256 struct packed_git **lru_p, 257 struct pack_window **lru_w, 258 struct pack_window **lru_l) 259{ 260 struct pack_window *w, *w_l; 261 262 for (w_l = NULL, w = p->windows; w; w = w->next) { 263 if (!w->inuse_cnt) { 264 if (!*lru_w || w->last_used < (*lru_w)->last_used) { 265 *lru_p = p; 266 *lru_w = w; 267 *lru_l = w_l; 268 } 269 } 270 w_l = w; 271 } 272} 273 274static int unuse_one_window(struct packed_git *current) 275{ 276 struct packed_git *p, *lru_p = NULL; 277 struct pack_window *lru_w = NULL, *lru_l = NULL; 278 279 if (current) 280 scan_windows(current, &lru_p, &lru_w, &lru_l); 281 for (p = current->repo->objects->packfiles->packs; p; p = p->next) 282 scan_windows(p, &lru_p, &lru_w, &lru_l); 283 if (lru_p) { 284 munmap(lru_w->base, lru_w->len); 285 pack_mapped -= lru_w->len; 286 if (lru_l) 287 lru_l->next = lru_w->next; 288 else 289 lru_p->windows = lru_w->next; 290 free(lru_w); 291 pack_open_windows--; 292 return 1; 293 } 294 return 0; 295} 296 297void close_pack_windows(struct packed_git *p) 298{ 299 while (p->windows) { 300 struct pack_window *w = p->windows; 301 302 if (w->inuse_cnt) 303 die("pack '%s' still has open windows to it", 304 p->pack_name); 305 munmap(w->base, w->len); 306 pack_mapped -= w->len; 307 pack_open_windows--; 308 p->windows = w->next; 309 free(w); 310 } 311} 312 313int close_pack_fd(struct packed_git *p) 314{ 315 if (p->pack_fd < 0) 316 return 0; 317 318 close(p->pack_fd); 319 pack_open_fds--; 320 p->pack_fd = -1; 321 322 return 1; 323} 324 325void close_pack_index(struct packed_git *p) 326{ 327 if (p->index_data) { 328 munmap((void *)p->index_data, p->index_size); 329 p->index_data = NULL; 330 } 331} 332 333static void close_pack_revindex(struct packed_git *p) 334{ 335 if (!p->revindex_map) 336 return; 337 338 munmap((void *)p->revindex_map, p->revindex_size); 339 p->revindex_map = NULL; 340 p->revindex_data = NULL; 341} 342 343static void close_pack_mtimes(struct packed_git *p) 344{ 345 if (!p->mtimes_map) 346 return; 347 348 munmap((void *)p->mtimes_map, p->mtimes_size); 349 p->mtimes_map = NULL; 350} 351 352void close_pack(struct packed_git *p) 353{ 354 close_pack_windows(p); 355 close_pack_fd(p); 356 close_pack_index(p); 357 close_pack_revindex(p); 358 close_pack_mtimes(p); 359 oidset_clear(&p->bad_objects); 360} 361 362void close_object_store(struct object_database *o) 363{ 364 struct odb_source *source; 365 366 packfile_store_close(o->packfiles); 367 368 for (source = o->sources; source; source = source->next) { 369 if (source->midx) 370 close_midx(source->midx); 371 source->midx = NULL; 372 } 373 374 close_commit_graph(o); 375} 376 377void unlink_pack_path(const char *pack_name, int force_delete) 378{ 379 static const char *exts[] = {".idx", ".pack", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"}; 380 int i; 381 struct strbuf buf = STRBUF_INIT; 382 size_t plen; 383 384 strbuf_addstr(&buf, pack_name); 385 strip_suffix_mem(buf.buf, &buf.len, ".pack"); 386 plen = buf.len; 387 388 if (!force_delete) { 389 strbuf_addstr(&buf, ".keep"); 390 if (!access(buf.buf, F_OK)) { 391 strbuf_release(&buf); 392 return; 393 } 394 } 395 396 for (i = 0; i < ARRAY_SIZE(exts); i++) { 397 strbuf_setlen(&buf, plen); 398 strbuf_addstr(&buf, exts[i]); 399 unlink(buf.buf); 400 } 401 402 strbuf_release(&buf); 403} 404 405/* 406 * The LRU pack is the one with the oldest MRU window, preferring packs 407 * with no used windows, or the oldest mtime if it has no windows allocated. 408 */ 409static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse) 410{ 411 struct pack_window *w, *this_mru_w; 412 int has_windows_inuse = 0; 413 414 /* 415 * Reject this pack if it has windows and the previously selected 416 * one does not. If this pack does not have windows, reject 417 * it if the pack file is newer than the previously selected one. 418 */ 419 if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime)) 420 return; 421 422 for (w = this_mru_w = p->windows; w; w = w->next) { 423 /* 424 * Reject this pack if any of its windows are in use, 425 * but the previously selected pack did not have any 426 * inuse windows. Otherwise, record that this pack 427 * has windows in use. 428 */ 429 if (w->inuse_cnt) { 430 if (*accept_windows_inuse) 431 has_windows_inuse = 1; 432 else 433 return; 434 } 435 436 if (w->last_used > this_mru_w->last_used) 437 this_mru_w = w; 438 439 /* 440 * Reject this pack if it has windows that have been 441 * used more recently than the previously selected pack. 442 * If the previously selected pack had windows inuse and 443 * we have not encountered a window in this pack that is 444 * inuse, skip this check since we prefer a pack with no 445 * inuse windows to one that has inuse windows. 446 */ 447 if (*mru_w && *accept_windows_inuse == has_windows_inuse && 448 this_mru_w->last_used > (*mru_w)->last_used) 449 return; 450 } 451 452 /* 453 * Select this pack. 454 */ 455 *mru_w = this_mru_w; 456 *lru_p = p; 457 *accept_windows_inuse = has_windows_inuse; 458} 459 460static int close_one_pack(struct repository *r) 461{ 462 struct packed_git *p, *lru_p = NULL; 463 struct pack_window *mru_w = NULL; 464 int accept_windows_inuse = 1; 465 466 for (p = r->objects->packfiles->packs; p; p = p->next) { 467 if (p->pack_fd == -1) 468 continue; 469 find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); 470 } 471 472 if (lru_p) 473 return close_pack_fd(lru_p); 474 475 return 0; 476} 477 478static unsigned int get_max_fd_limit(void) 479{ 480#ifdef RLIMIT_NOFILE 481 { 482 struct rlimit lim; 483 484 if (!getrlimit(RLIMIT_NOFILE, &lim)) 485 return lim.rlim_cur; 486 } 487#endif 488 489#ifdef _SC_OPEN_MAX 490 { 491 long open_max = sysconf(_SC_OPEN_MAX); 492 if (0 < open_max) 493 return open_max; 494 /* 495 * Otherwise, we got -1 for one of the two 496 * reasons: 497 * 498 * (1) sysconf() did not understand _SC_OPEN_MAX 499 * and signaled an error with -1; or 500 * (2) sysconf() said there is no limit. 501 * 502 * We _could_ clear errno before calling sysconf() to 503 * tell these two cases apart and return a huge number 504 * in the latter case to let the caller cap it to a 505 * value that is not so selfish, but letting the 506 * fallback OPEN_MAX codepath take care of these cases 507 * is a lot simpler. 508 */ 509 } 510#endif 511 512#ifdef OPEN_MAX 513 return OPEN_MAX; 514#else 515 return 1; /* see the caller ;-) */ 516#endif 517} 518 519const char *pack_basename(struct packed_git *p) 520{ 521 const char *ret = strrchr(p->pack_name, '/'); 522 if (ret) 523 ret = ret + 1; /* skip past slash */ 524 else 525 ret = p->pack_name; /* we only have a base */ 526 return ret; 527} 528 529/* 530 * Do not call this directly as this leaks p->pack_fd on error return; 531 * call open_packed_git() instead. 532 */ 533static int open_packed_git_1(struct packed_git *p) 534{ 535 struct stat st; 536 struct pack_header hdr; 537 unsigned char hash[GIT_MAX_RAWSZ]; 538 unsigned char *idx_hash; 539 ssize_t read_result; 540 const unsigned hashsz = p->repo->hash_algo->rawsz; 541 542 if (open_pack_index(p)) 543 return error("packfile %s index unavailable", p->pack_name); 544 545 if (!pack_max_fds) { 546 unsigned int max_fds = get_max_fd_limit(); 547 548 /* Save 3 for stdin/stdout/stderr, 22 for work */ 549 if (25 < max_fds) 550 pack_max_fds = max_fds - 25; 551 else 552 pack_max_fds = 1; 553 } 554 555 while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo)) 556 ; /* nothing */ 557 558 p->pack_fd = git_open(p->pack_name); 559 if (p->pack_fd < 0 || fstat(p->pack_fd, &st)) 560 return -1; 561 pack_open_fds++; 562 563 /* If we created the struct before we had the pack we lack size. */ 564 if (!p->pack_size) { 565 if (!S_ISREG(st.st_mode)) 566 return error("packfile %s not a regular file", p->pack_name); 567 p->pack_size = st.st_size; 568 } else if (p->pack_size != st.st_size) 569 return error("packfile %s size changed", p->pack_name); 570 571 /* Verify we recognize this pack file format. */ 572 read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr)); 573 if (read_result < 0) 574 return error_errno("error reading from %s", p->pack_name); 575 if (read_result != sizeof(hdr)) 576 return error("file %s is far too short to be a packfile", p->pack_name); 577 if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) 578 return error("file %s is not a GIT packfile", p->pack_name); 579 if (!pack_version_ok(hdr.hdr_version)) 580 return error("packfile %s is version %"PRIu32" and not" 581 " supported (try upgrading GIT to a newer version)", 582 p->pack_name, ntohl(hdr.hdr_version)); 583 584 /* Verify the pack matches its index. */ 585 if (p->num_objects != ntohl(hdr.hdr_entries)) 586 return error("packfile %s claims to have %"PRIu32" objects" 587 " while index indicates %"PRIu32" objects", 588 p->pack_name, ntohl(hdr.hdr_entries), 589 p->num_objects); 590 read_result = pread_in_full(p->pack_fd, hash, hashsz, 591 p->pack_size - hashsz); 592 if (read_result < 0) 593 return error_errno("error reading from %s", p->pack_name); 594 if (read_result != hashsz) 595 return error("packfile %s signature is unavailable", p->pack_name); 596 idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2; 597 if (!hasheq(hash, idx_hash, p->repo->hash_algo)) 598 return error("packfile %s does not match index", p->pack_name); 599 return 0; 600} 601 602static int open_packed_git(struct packed_git *p) 603{ 604 if (!open_packed_git_1(p)) 605 return 0; 606 close_pack_fd(p); 607 return -1; 608} 609 610static int in_window(struct repository *r, struct pack_window *win, 611 off_t offset) 612{ 613 /* We must promise at least one full hash after the 614 * offset is available from this window, otherwise the offset 615 * is not actually in this window and a different window (which 616 * has that one hash excess) must be used. This is to support 617 * the object header and delta base parsing routines below. 618 */ 619 off_t win_off = win->offset; 620 return win_off <= offset 621 && (offset + r->hash_algo->rawsz) <= (win_off + win->len); 622} 623 624unsigned char *use_pack(struct packed_git *p, 625 struct pack_window **w_cursor, 626 off_t offset, 627 unsigned long *left) 628{ 629 struct pack_window *win = *w_cursor; 630 631 /* Since packfiles end in a hash of their content and it's 632 * pointless to ask for an offset into the middle of that 633 * hash, and the in_window function above wouldn't match 634 * don't allow an offset too close to the end of the file. 635 */ 636 if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p)) 637 die("packfile %s cannot be accessed", p->pack_name); 638 if (offset > (p->pack_size - p->repo->hash_algo->rawsz)) 639 die("offset beyond end of packfile (truncated pack?)"); 640 if (offset < 0) 641 die(_("offset before end of packfile (broken .idx?)")); 642 643 if (!win || !in_window(p->repo, win, offset)) { 644 if (win) 645 win->inuse_cnt--; 646 for (win = p->windows; win; win = win->next) { 647 if (in_window(p->repo, win, offset)) 648 break; 649 } 650 if (!win) { 651 size_t window_align; 652 off_t len; 653 struct repo_settings *settings; 654 655 /* lazy load the settings in case it hasn't been setup */ 656 prepare_repo_settings(p->repo); 657 settings = &p->repo->settings; 658 659 window_align = settings->packed_git_window_size / 2; 660 661 if (p->pack_fd == -1 && open_packed_git(p)) 662 die("packfile %s cannot be accessed", p->pack_name); 663 664 CALLOC_ARRAY(win, 1); 665 win->offset = (offset / window_align) * window_align; 666 len = p->pack_size - win->offset; 667 if (len > settings->packed_git_window_size) 668 len = settings->packed_git_window_size; 669 win->len = (size_t)len; 670 pack_mapped += win->len; 671 672 while (settings->packed_git_limit < pack_mapped 673 && unuse_one_window(p)) 674 ; /* nothing */ 675 win->base = xmmap_gently(NULL, win->len, 676 PROT_READ, MAP_PRIVATE, 677 p->pack_fd, win->offset); 678 if (win->base == MAP_FAILED) 679 die_errno(_("packfile %s cannot be mapped%s"), 680 p->pack_name, mmap_os_err()); 681 if (!win->offset && win->len == p->pack_size 682 && !p->do_not_close) 683 close_pack_fd(p); 684 pack_mmap_calls++; 685 pack_open_windows++; 686 if (pack_mapped > peak_pack_mapped) 687 peak_pack_mapped = pack_mapped; 688 if (pack_open_windows > peak_pack_open_windows) 689 peak_pack_open_windows = pack_open_windows; 690 win->next = p->windows; 691 p->windows = win; 692 } 693 } 694 if (win != *w_cursor) { 695 win->last_used = pack_used_ctr++; 696 win->inuse_cnt++; 697 *w_cursor = win; 698 } 699 offset -= win->offset; 700 if (left) 701 *left = win->len - xsize_t(offset); 702 return win->base + offset; 703} 704 705void unuse_pack(struct pack_window **w_cursor) 706{ 707 struct pack_window *w = *w_cursor; 708 if (w) { 709 w->inuse_cnt--; 710 *w_cursor = NULL; 711 } 712} 713 714struct packed_git *add_packed_git(struct repository *r, const char *path, 715 size_t path_len, int local) 716{ 717 struct stat st; 718 size_t alloc; 719 struct packed_git *p; 720 struct object_id oid; 721 722 /* 723 * Make sure a corresponding .pack file exists and that 724 * the index looks sane. 725 */ 726 if (!strip_suffix_mem(path, &path_len, ".idx")) 727 return NULL; 728 729 /* 730 * ".promisor" is long enough to hold any suffix we're adding (and 731 * the use xsnprintf double-checks that) 732 */ 733 alloc = st_add3(path_len, strlen(".promisor"), 1); 734 p = alloc_packed_git(r, alloc); 735 memcpy(p->pack_name, path, path_len); 736 737 /* 738 * Note that we have to check auxiliary data structures before we check 739 * for the ".pack" file to exist to avoid races with a packfile that is 740 * in the process of being deleted. The ".pack" file is unlinked before 741 * its auxiliary data structures, so we know that we either get a 742 * consistent snapshot of all data structures or that we'll fail to 743 * stat(3p) the packfile itself and thus return `NULL`. 744 * 745 * As such, we cannot bail out before the access(3p) calls in case the 746 * packfile doesn't exist without doing two stat(3p) calls for it. 747 */ 748 xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep"); 749 if (!access(p->pack_name, F_OK)) 750 p->pack_keep = 1; 751 752 xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor"); 753 if (!access(p->pack_name, F_OK)) 754 p->pack_promisor = 1; 755 756 xsnprintf(p->pack_name + path_len, alloc - path_len, ".mtimes"); 757 if (!access(p->pack_name, F_OK)) 758 p->is_cruft = 1; 759 760 xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack"); 761 if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { 762 free(p); 763 return NULL; 764 } 765 766 /* ok, it looks sane as far as we can check without 767 * actually mapping the pack file. 768 */ 769 p->pack_size = st.st_size; 770 p->pack_local = local; 771 p->mtime = st.st_mtime; 772 if (path_len < r->hash_algo->hexsz || 773 get_oid_hex_algop(path + path_len - r->hash_algo->hexsz, &oid, 774 r->hash_algo)) 775 hashclr(p->hash, r->hash_algo); 776 else 777 hashcpy(p->hash, oid.hash, r->hash_algo); 778 779 return p; 780} 781 782void packfile_store_add_pack(struct packfile_store *store, 783 struct packed_git *pack) 784{ 785 if (pack->pack_fd != -1) 786 pack_open_fds++; 787 788 pack->next = store->packs; 789 store->packs = pack; 790 791 hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); 792 hashmap_add(&store->map, &pack->packmap_ent); 793} 794 795struct packed_git *packfile_store_load_pack(struct packfile_store *store, 796 const char *idx_path, int local) 797{ 798 struct strbuf key = STRBUF_INIT; 799 struct packed_git *p; 800 801 /* 802 * We're being called with the path to the index file, but `pack_map` 803 * holds the path to the packfile itself. 804 */ 805 strbuf_addstr(&key, idx_path); 806 strbuf_strip_suffix(&key, ".idx"); 807 strbuf_addstr(&key, ".pack"); 808 809 p = hashmap_get_entry_from_hash(&store->map, strhash(key.buf), key.buf, 810 struct packed_git, packmap_ent); 811 if (!p) { 812 p = add_packed_git(store->odb->repo, idx_path, 813 strlen(idx_path), local); 814 if (p) 815 packfile_store_add_pack(store, p); 816 } 817 818 strbuf_release(&key); 819 return p; 820} 821 822void (*report_garbage)(unsigned seen_bits, const char *path); 823 824static void report_helper(const struct string_list *list, 825 int seen_bits, int first, int last) 826{ 827 if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX)) 828 return; 829 830 for (; first < last; first++) 831 report_garbage(seen_bits, list->items[first].string); 832} 833 834static void report_pack_garbage(struct string_list *list) 835{ 836 int i, baselen = -1, first = 0, seen_bits = 0; 837 838 if (!report_garbage) 839 return; 840 841 string_list_sort(list); 842 843 for (i = 0; i < list->nr; i++) { 844 const char *path = list->items[i].string; 845 if (baselen != -1 && 846 strncmp(path, list->items[first].string, baselen)) { 847 report_helper(list, seen_bits, first, i); 848 baselen = -1; 849 seen_bits = 0; 850 } 851 if (baselen == -1) { 852 const char *dot = strrchr(path, '.'); 853 if (!dot) { 854 report_garbage(PACKDIR_FILE_GARBAGE, path); 855 continue; 856 } 857 baselen = dot - path + 1; 858 first = i; 859 } 860 if (!strcmp(path + baselen, "pack")) 861 seen_bits |= 1; 862 else if (!strcmp(path + baselen, "idx")) 863 seen_bits |= 2; 864 } 865 report_helper(list, seen_bits, first, list->nr); 866} 867 868void for_each_file_in_pack_subdir(const char *objdir, 869 const char *subdir, 870 each_file_in_pack_dir_fn fn, 871 void *data) 872{ 873 struct strbuf path = STRBUF_INIT; 874 size_t dirnamelen; 875 DIR *dir; 876 struct dirent *de; 877 878 strbuf_addstr(&path, objdir); 879 strbuf_addstr(&path, "/pack"); 880 if (subdir) 881 strbuf_addf(&path, "/%s", subdir); 882 dir = opendir(path.buf); 883 if (!dir) { 884 if (errno != ENOENT) 885 error_errno("unable to open object pack directory: %s", 886 path.buf); 887 strbuf_release(&path); 888 return; 889 } 890 strbuf_addch(&path, '/'); 891 dirnamelen = path.len; 892 while ((de = readdir_skip_dot_and_dotdot(dir)) != NULL) { 893 strbuf_setlen(&path, dirnamelen); 894 strbuf_addstr(&path, de->d_name); 895 896 fn(path.buf, path.len, de->d_name, data); 897 } 898 899 closedir(dir); 900 strbuf_release(&path); 901} 902 903void for_each_file_in_pack_dir(const char *objdir, 904 each_file_in_pack_dir_fn fn, 905 void *data) 906{ 907 for_each_file_in_pack_subdir(objdir, NULL, fn, data); 908} 909 910struct prepare_pack_data { 911 struct repository *r; 912 struct string_list *garbage; 913 int local; 914 struct multi_pack_index *m; 915}; 916 917static void prepare_pack(const char *full_name, size_t full_name_len, 918 const char *file_name, void *_data) 919{ 920 struct prepare_pack_data *data = (struct prepare_pack_data *)_data; 921 size_t base_len = full_name_len; 922 923 if (strip_suffix_mem(full_name, &base_len, ".idx") && 924 !(data->m && midx_contains_pack(data->m, file_name))) { 925 char *trimmed_path = xstrndup(full_name, full_name_len); 926 packfile_store_load_pack(data->r->objects->packfiles, 927 trimmed_path, data->local); 928 free(trimmed_path); 929 } 930 931 if (!report_garbage) 932 return; 933 934 if (!strcmp(file_name, "multi-pack-index") || 935 !strcmp(file_name, "multi-pack-index.d")) 936 return; 937 if (starts_with(file_name, "multi-pack-index") && 938 (ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev"))) 939 return; 940 if (ends_with(file_name, ".idx") || 941 ends_with(file_name, ".rev") || 942 ends_with(file_name, ".pack") || 943 ends_with(file_name, ".bitmap") || 944 ends_with(file_name, ".keep") || 945 ends_with(file_name, ".promisor") || 946 ends_with(file_name, ".mtimes")) 947 string_list_append(data->garbage, full_name); 948 else 949 report_garbage(PACKDIR_FILE_GARBAGE, full_name); 950} 951 952static void prepare_packed_git_one(struct odb_source *source) 953{ 954 struct string_list garbage = STRING_LIST_INIT_DUP; 955 struct prepare_pack_data data = { 956 .m = source->midx, 957 .r = source->odb->repo, 958 .garbage = &garbage, 959 .local = source->local, 960 }; 961 962 for_each_file_in_pack_dir(source->path, prepare_pack, &data); 963 964 report_pack_garbage(data.garbage); 965 string_list_clear(data.garbage, 0); 966} 967 968DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next); 969 970static int sort_pack(const struct packed_git *a, const struct packed_git *b) 971{ 972 int st; 973 974 /* 975 * Local packs tend to contain objects specific to our 976 * variant of the project than remote ones. In addition, 977 * remote ones could be on a network mounted filesystem. 978 * Favor local ones for these reasons. 979 */ 980 st = a->pack_local - b->pack_local; 981 if (st) 982 return -st; 983 984 /* 985 * Younger packs tend to contain more recent objects, 986 * and more recent objects tend to get accessed more 987 * often. 988 */ 989 if (a->mtime < b->mtime) 990 return 1; 991 else if (a->mtime == b->mtime) 992 return 0; 993 return -1; 994} 995 996static void packfile_store_prepare_mru(struct packfile_store *store) 997{ 998 struct packed_git *p; 999 1000 INIT_LIST_HEAD(&store->mru); 1001 1002 for (p = store->packs; p; p = p->next) 1003 list_add_tail(&p->mru, &store->mru); 1004} 1005 1006void packfile_store_prepare(struct packfile_store *store) 1007{ 1008 struct odb_source *source; 1009 1010 if (store->initialized) 1011 return; 1012 1013 odb_prepare_alternates(store->odb); 1014 for (source = store->odb->sources; source; source = source->next) { 1015 prepare_multi_pack_index_one(source); 1016 prepare_packed_git_one(source); 1017 } 1018 sort_packs(&store->packs, sort_pack); 1019 1020 packfile_store_prepare_mru(store); 1021 store->initialized = true; 1022} 1023 1024void packfile_store_reprepare(struct packfile_store *store) 1025{ 1026 store->initialized = false; 1027 packfile_store_prepare(store); 1028} 1029 1030struct packed_git *packfile_store_get_packs(struct packfile_store *store) 1031{ 1032 packfile_store_prepare(store); 1033 return store->packs; 1034} 1035 1036struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) 1037{ 1038 packfile_store_prepare(store); 1039 1040 for (struct odb_source *source = store->odb->sources; source; source = source->next) { 1041 struct multi_pack_index *m = source->midx; 1042 if (!m) 1043 continue; 1044 for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) 1045 prepare_midx_pack(m, i); 1046 } 1047 1048 return store->packs; 1049} 1050 1051struct list_head *packfile_store_get_packs_mru(struct packfile_store *store) 1052{ 1053 packfile_store_prepare(store); 1054 return &store->mru; 1055} 1056 1057/* 1058 * Give a fast, rough count of the number of objects in the repository. This 1059 * ignores loose objects completely. If you have a lot of them, then either 1060 * you should repack because your performance will be awful, or they are 1061 * all unreachable objects about to be pruned, in which case they're not really 1062 * interesting as a measure of repo size in the first place. 1063 */ 1064unsigned long repo_approximate_object_count(struct repository *r) 1065{ 1066 if (!r->objects->approximate_object_count_valid) { 1067 struct odb_source *source; 1068 unsigned long count = 0; 1069 struct packed_git *p; 1070 1071 packfile_store_prepare(r->objects->packfiles); 1072 1073 for (source = r->objects->sources; source; source = source->next) { 1074 struct multi_pack_index *m = get_multi_pack_index(source); 1075 if (m) 1076 count += m->num_objects; 1077 } 1078 1079 for (p = r->objects->packfiles->packs; p; p = p->next) { 1080 if (open_pack_index(p)) 1081 continue; 1082 count += p->num_objects; 1083 } 1084 r->objects->approximate_object_count = count; 1085 r->objects->approximate_object_count_valid = 1; 1086 } 1087 return r->objects->approximate_object_count; 1088} 1089 1090unsigned long unpack_object_header_buffer(const unsigned char *buf, 1091 unsigned long len, enum object_type *type, unsigned long *sizep) 1092{ 1093 unsigned shift; 1094 size_t size, c; 1095 unsigned long used = 0; 1096 1097 c = buf[used++]; 1098 *type = (c >> 4) & 7; 1099 size = c & 15; 1100 shift = 4; 1101 while (c & 0x80) { 1102 if (len <= used || (bitsizeof(long) - 7) < shift) { 1103 error("bad object header"); 1104 size = used = 0; 1105 break; 1106 } 1107 c = buf[used++]; 1108 size = st_add(size, st_left_shift(c & 0x7f, shift)); 1109 shift += 7; 1110 } 1111 *sizep = cast_size_t_to_ulong(size); 1112 return used; 1113} 1114 1115unsigned long get_size_from_delta(struct packed_git *p, 1116 struct pack_window **w_curs, 1117 off_t curpos) 1118{ 1119 const unsigned char *data; 1120 unsigned char delta_head[20], *in; 1121 git_zstream stream; 1122 int st; 1123 1124 memset(&stream, 0, sizeof(stream)); 1125 stream.next_out = delta_head; 1126 stream.avail_out = sizeof(delta_head); 1127 1128 git_inflate_init(&stream); 1129 do { 1130 in = use_pack(p, w_curs, curpos, &stream.avail_in); 1131 stream.next_in = in; 1132 /* 1133 * Note: the window section returned by use_pack() must be 1134 * available throughout git_inflate()'s unlocked execution. To 1135 * ensure no other thread will modify the window in the 1136 * meantime, we rely on the packed_window.inuse_cnt. This 1137 * counter is incremented before window reading and checked 1138 * before window disposal. 1139 * 1140 * Other worrying sections could be the call to close_pack_fd(), 1141 * which can close packs even with in-use windows, and to 1142 * odb_reprepare(). Regarding the former, mmap doc says: 1143 * "closing the file descriptor does not unmap the region". And 1144 * for the latter, it won't re-open already available packs. 1145 */ 1146 obj_read_unlock(); 1147 st = git_inflate(&stream, Z_FINISH); 1148 obj_read_lock(); 1149 curpos += stream.next_in - in; 1150 } while ((st == Z_OK || st == Z_BUF_ERROR) && 1151 stream.total_out < sizeof(delta_head)); 1152 git_inflate_end(&stream); 1153 if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) { 1154 error("delta data unpack-initial failed"); 1155 return 0; 1156 } 1157 1158 /* Examine the initial part of the delta to figure out 1159 * the result size. 1160 */ 1161 data = delta_head; 1162 1163 /* ignore base size */ 1164 get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); 1165 1166 /* Read the result size */ 1167 return get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); 1168} 1169 1170int unpack_object_header(struct packed_git *p, 1171 struct pack_window **w_curs, 1172 off_t *curpos, 1173 unsigned long *sizep) 1174{ 1175 unsigned char *base; 1176 unsigned long left; 1177 unsigned long used; 1178 enum object_type type; 1179 1180 /* use_pack() assures us we have [base, base + 20) available 1181 * as a range that we can look at. (Its actually the hash 1182 * size that is assured.) With our object header encoding 1183 * the maximum deflated object size is 2^137, which is just 1184 * insane, so we know won't exceed what we have been given. 1185 */ 1186 base = use_pack(p, w_curs, *curpos, &left); 1187 used = unpack_object_header_buffer(base, left, &type, sizep); 1188 if (!used) { 1189 type = OBJ_BAD; 1190 } else 1191 *curpos += used; 1192 1193 return type; 1194} 1195 1196void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) 1197{ 1198 oidset_insert(&p->bad_objects, oid); 1199} 1200 1201const struct packed_git *has_packed_and_bad(struct repository *r, 1202 const struct object_id *oid) 1203{ 1204 struct packed_git *p; 1205 1206 for (p = r->objects->packfiles->packs; p; p = p->next) 1207 if (oidset_contains(&p->bad_objects, oid)) 1208 return p; 1209 return NULL; 1210} 1211 1212off_t get_delta_base(struct packed_git *p, 1213 struct pack_window **w_curs, 1214 off_t *curpos, 1215 enum object_type type, 1216 off_t delta_obj_offset) 1217{ 1218 unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL); 1219 off_t base_offset; 1220 1221 /* use_pack() assured us we have [base_info, base_info + 20) 1222 * as a range that we can look at without walking off the 1223 * end of the mapped window. Its actually the hash size 1224 * that is assured. An OFS_DELTA longer than the hash size 1225 * is stupid, as then a REF_DELTA would be smaller to store. 1226 */ 1227 if (type == OBJ_OFS_DELTA) { 1228 unsigned used = 0; 1229 unsigned char c = base_info[used++]; 1230 base_offset = c & 127; 1231 while (c & 128) { 1232 base_offset += 1; 1233 if (!base_offset || MSB(base_offset, 7)) 1234 return 0; /* overflow */ 1235 c = base_info[used++]; 1236 base_offset = (base_offset << 7) + (c & 127); 1237 } 1238 base_offset = delta_obj_offset - base_offset; 1239 if (base_offset <= 0 || base_offset >= delta_obj_offset) 1240 return 0; /* out of bound */ 1241 *curpos += used; 1242 } else if (type == OBJ_REF_DELTA) { 1243 /* The base entry _must_ be in the same pack */ 1244 struct object_id oid; 1245 oidread(&oid, base_info, p->repo->hash_algo); 1246 base_offset = find_pack_entry_one(&oid, p); 1247 *curpos += p->repo->hash_algo->rawsz; 1248 } else 1249 die("I am totally screwed"); 1250 return base_offset; 1251} 1252 1253/* 1254 * Like get_delta_base above, but we return the sha1 instead of the pack 1255 * offset. This means it is cheaper for REF deltas (we do not have to do 1256 * the final object lookup), but more expensive for OFS deltas (we 1257 * have to load the revidx to convert the offset back into a sha1). 1258 */ 1259static int get_delta_base_oid(struct packed_git *p, 1260 struct pack_window **w_curs, 1261 off_t curpos, 1262 struct object_id *oid, 1263 enum object_type type, 1264 off_t delta_obj_offset) 1265{ 1266 if (type == OBJ_REF_DELTA) { 1267 unsigned char *base = use_pack(p, w_curs, curpos, NULL); 1268 oidread(oid, base, p->repo->hash_algo); 1269 return 0; 1270 } else if (type == OBJ_OFS_DELTA) { 1271 uint32_t base_pos; 1272 off_t base_offset = get_delta_base(p, w_curs, &curpos, 1273 type, delta_obj_offset); 1274 1275 if (!base_offset) 1276 return -1; 1277 1278 if (offset_to_pack_pos(p, base_offset, &base_pos) < 0) 1279 return -1; 1280 1281 return nth_packed_object_id(oid, p, 1282 pack_pos_to_index(p, base_pos)); 1283 } else 1284 return -1; 1285} 1286 1287static int retry_bad_packed_offset(struct repository *r, 1288 struct packed_git *p, 1289 off_t obj_offset) 1290{ 1291 int type; 1292 uint32_t pos; 1293 struct object_id oid; 1294 if (offset_to_pack_pos(p, obj_offset, &pos) < 0) 1295 return OBJ_BAD; 1296 nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos)); 1297 mark_bad_packed_object(p, &oid); 1298 type = odb_read_object_info(r->objects, &oid, NULL); 1299 if (type <= OBJ_NONE) 1300 return OBJ_BAD; 1301 return type; 1302} 1303 1304#define POI_STACK_PREALLOC 64 1305 1306static enum object_type packed_to_object_type(struct repository *r, 1307 struct packed_git *p, 1308 off_t obj_offset, 1309 enum object_type type, 1310 struct pack_window **w_curs, 1311 off_t curpos) 1312{ 1313 off_t small_poi_stack[POI_STACK_PREALLOC]; 1314 off_t *poi_stack = small_poi_stack; 1315 int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC; 1316 1317 while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { 1318 off_t base_offset; 1319 unsigned long size; 1320 /* Push the object we're going to leave behind */ 1321 if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) { 1322 poi_stack_alloc = alloc_nr(poi_stack_nr); 1323 ALLOC_ARRAY(poi_stack, poi_stack_alloc); 1324 COPY_ARRAY(poi_stack, small_poi_stack, poi_stack_nr); 1325 } else { 1326 ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc); 1327 } 1328 poi_stack[poi_stack_nr++] = obj_offset; 1329 /* If parsing the base offset fails, just unwind */ 1330 base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); 1331 if (!base_offset) 1332 goto unwind; 1333 curpos = obj_offset = base_offset; 1334 type = unpack_object_header(p, w_curs, &curpos, &size); 1335 if (type <= OBJ_NONE) { 1336 /* If getting the base itself fails, we first 1337 * retry the base, otherwise unwind */ 1338 type = retry_bad_packed_offset(r, p, base_offset); 1339 if (type > OBJ_NONE) 1340 goto out; 1341 goto unwind; 1342 } 1343 } 1344 1345 switch (type) { 1346 case OBJ_BAD: 1347 case OBJ_COMMIT: 1348 case OBJ_TREE: 1349 case OBJ_BLOB: 1350 case OBJ_TAG: 1351 break; 1352 default: 1353 error("unknown object type %i at offset %"PRIuMAX" in %s", 1354 type, (uintmax_t)obj_offset, p->pack_name); 1355 type = OBJ_BAD; 1356 } 1357 1358out: 1359 if (poi_stack != small_poi_stack) 1360 free(poi_stack); 1361 return type; 1362 1363unwind: 1364 while (poi_stack_nr) { 1365 obj_offset = poi_stack[--poi_stack_nr]; 1366 type = retry_bad_packed_offset(r, p, obj_offset); 1367 if (type > OBJ_NONE) 1368 goto out; 1369 } 1370 type = OBJ_BAD; 1371 goto out; 1372} 1373 1374static struct hashmap delta_base_cache; 1375static size_t delta_base_cached; 1376 1377static LIST_HEAD(delta_base_cache_lru); 1378 1379struct delta_base_cache_key { 1380 struct packed_git *p; 1381 off_t base_offset; 1382}; 1383 1384struct delta_base_cache_entry { 1385 struct hashmap_entry ent; 1386 struct delta_base_cache_key key; 1387 struct list_head lru; 1388 void *data; 1389 unsigned long size; 1390 enum object_type type; 1391}; 1392 1393static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset) 1394{ 1395 unsigned int hash; 1396 1397 hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset; 1398 hash += (hash >> 8) + (hash >> 16); 1399 return hash; 1400} 1401 1402static struct delta_base_cache_entry * 1403get_delta_base_cache_entry(struct packed_git *p, off_t base_offset) 1404{ 1405 struct hashmap_entry entry, *e; 1406 struct delta_base_cache_key key; 1407 1408 if (!delta_base_cache.cmpfn) 1409 return NULL; 1410 1411 hashmap_entry_init(&entry, pack_entry_hash(p, base_offset)); 1412 key.p = p; 1413 key.base_offset = base_offset; 1414 e = hashmap_get(&delta_base_cache, &entry, &key); 1415 return e ? container_of(e, struct delta_base_cache_entry, ent) : NULL; 1416} 1417 1418static int delta_base_cache_key_eq(const struct delta_base_cache_key *a, 1419 const struct delta_base_cache_key *b) 1420{ 1421 return a->p == b->p && a->base_offset == b->base_offset; 1422} 1423 1424static int delta_base_cache_hash_cmp(const void *cmp_data UNUSED, 1425 const struct hashmap_entry *va, 1426 const struct hashmap_entry *vb, 1427 const void *vkey) 1428{ 1429 const struct delta_base_cache_entry *a, *b; 1430 const struct delta_base_cache_key *key = vkey; 1431 1432 a = container_of(va, const struct delta_base_cache_entry, ent); 1433 b = container_of(vb, const struct delta_base_cache_entry, ent); 1434 1435 if (key) 1436 return !delta_base_cache_key_eq(&a->key, key); 1437 else 1438 return !delta_base_cache_key_eq(&a->key, &b->key); 1439} 1440 1441static int in_delta_base_cache(struct packed_git *p, off_t base_offset) 1442{ 1443 return !!get_delta_base_cache_entry(p, base_offset); 1444} 1445 1446/* 1447 * Remove the entry from the cache, but do _not_ free the associated 1448 * entry data. The caller takes ownership of the "data" buffer, and 1449 * should copy out any fields it wants before detaching. 1450 */ 1451static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent) 1452{ 1453 hashmap_remove(&delta_base_cache, &ent->ent, &ent->key); 1454 list_del(&ent->lru); 1455 delta_base_cached -= ent->size; 1456 free(ent); 1457} 1458 1459static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p, 1460 off_t base_offset, unsigned long *base_size, 1461 enum object_type *type) 1462{ 1463 struct delta_base_cache_entry *ent; 1464 1465 ent = get_delta_base_cache_entry(p, base_offset); 1466 if (!ent) 1467 return unpack_entry(r, p, base_offset, type, base_size); 1468 1469 if (type) 1470 *type = ent->type; 1471 if (base_size) 1472 *base_size = ent->size; 1473 return xmemdupz(ent->data, ent->size); 1474} 1475 1476static inline void release_delta_base_cache(struct delta_base_cache_entry *ent) 1477{ 1478 free(ent->data); 1479 detach_delta_base_cache_entry(ent); 1480} 1481 1482void clear_delta_base_cache(void) 1483{ 1484 struct list_head *lru, *tmp; 1485 list_for_each_safe(lru, tmp, &delta_base_cache_lru) { 1486 struct delta_base_cache_entry *entry = 1487 list_entry(lru, struct delta_base_cache_entry, lru); 1488 release_delta_base_cache(entry); 1489 } 1490} 1491 1492static void add_delta_base_cache(struct packed_git *p, off_t base_offset, 1493 void *base, unsigned long base_size, 1494 unsigned long delta_base_cache_limit, 1495 enum object_type type) 1496{ 1497 struct delta_base_cache_entry *ent; 1498 struct list_head *lru, *tmp; 1499 1500 /* 1501 * Check required to avoid redundant entries when more than one thread 1502 * is unpacking the same object, in unpack_entry() (since its phases I 1503 * and III might run concurrently across multiple threads). 1504 */ 1505 if (in_delta_base_cache(p, base_offset)) { 1506 free(base); 1507 return; 1508 } 1509 1510 delta_base_cached += base_size; 1511 1512 list_for_each_safe(lru, tmp, &delta_base_cache_lru) { 1513 struct delta_base_cache_entry *f = 1514 list_entry(lru, struct delta_base_cache_entry, lru); 1515 if (delta_base_cached <= delta_base_cache_limit) 1516 break; 1517 release_delta_base_cache(f); 1518 } 1519 1520 ent = xmalloc(sizeof(*ent)); 1521 ent->key.p = p; 1522 ent->key.base_offset = base_offset; 1523 ent->type = type; 1524 ent->data = base; 1525 ent->size = base_size; 1526 list_add_tail(&ent->lru, &delta_base_cache_lru); 1527 1528 if (!delta_base_cache.cmpfn) 1529 hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0); 1530 hashmap_entry_init(&ent->ent, pack_entry_hash(p, base_offset)); 1531 hashmap_add(&delta_base_cache, &ent->ent); 1532} 1533 1534int packed_object_info(struct repository *r, struct packed_git *p, 1535 off_t obj_offset, struct object_info *oi) 1536{ 1537 struct pack_window *w_curs = NULL; 1538 unsigned long size; 1539 off_t curpos = obj_offset; 1540 enum object_type type; 1541 1542 /* 1543 * We always get the representation type, but only convert it to 1544 * a "real" type later if the caller is interested. 1545 */ 1546 if (oi->contentp) { 1547 *oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep, 1548 &type); 1549 if (!*oi->contentp) 1550 type = OBJ_BAD; 1551 } else { 1552 type = unpack_object_header(p, &w_curs, &curpos, &size); 1553 } 1554 1555 if (!oi->contentp && oi->sizep) { 1556 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { 1557 off_t tmp_pos = curpos; 1558 off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos, 1559 type, obj_offset); 1560 if (!base_offset) { 1561 type = OBJ_BAD; 1562 goto out; 1563 } 1564 *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos); 1565 if (*oi->sizep == 0) { 1566 type = OBJ_BAD; 1567 goto out; 1568 } 1569 } else { 1570 *oi->sizep = size; 1571 } 1572 } 1573 1574 if (oi->disk_sizep) { 1575 uint32_t pos; 1576 if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { 1577 error("could not find object at offset %"PRIuMAX" " 1578 "in pack %s", (uintmax_t)obj_offset, p->pack_name); 1579 type = OBJ_BAD; 1580 goto out; 1581 } 1582 1583 *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset; 1584 } 1585 1586 if (oi->typep) { 1587 enum object_type ptot; 1588 ptot = packed_to_object_type(r, p, obj_offset, 1589 type, &w_curs, curpos); 1590 if (oi->typep) 1591 *oi->typep = ptot; 1592 if (ptot < 0) { 1593 type = OBJ_BAD; 1594 goto out; 1595 } 1596 } 1597 1598 if (oi->delta_base_oid) { 1599 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { 1600 if (get_delta_base_oid(p, &w_curs, curpos, 1601 oi->delta_base_oid, 1602 type, obj_offset) < 0) { 1603 type = OBJ_BAD; 1604 goto out; 1605 } 1606 } else 1607 oidclr(oi->delta_base_oid, p->repo->hash_algo); 1608 } 1609 1610 oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED : 1611 OI_PACKED; 1612 1613out: 1614 unuse_pack(&w_curs); 1615 return type; 1616} 1617 1618static void *unpack_compressed_entry(struct packed_git *p, 1619 struct pack_window **w_curs, 1620 off_t curpos, 1621 unsigned long size) 1622{ 1623 int st; 1624 git_zstream stream; 1625 unsigned char *buffer, *in; 1626 1627 buffer = xmallocz_gently(size); 1628 if (!buffer) 1629 return NULL; 1630 memset(&stream, 0, sizeof(stream)); 1631 stream.next_out = buffer; 1632 stream.avail_out = size + 1; 1633 1634 git_inflate_init(&stream); 1635 do { 1636 in = use_pack(p, w_curs, curpos, &stream.avail_in); 1637 stream.next_in = in; 1638 /* 1639 * Note: we must ensure the window section returned by 1640 * use_pack() will be available throughout git_inflate()'s 1641 * unlocked execution. Please refer to the comment at 1642 * get_size_from_delta() to see how this is done. 1643 */ 1644 obj_read_unlock(); 1645 st = git_inflate(&stream, Z_FINISH); 1646 obj_read_lock(); 1647 if (!stream.avail_out) 1648 break; /* the payload is larger than it should be */ 1649 curpos += stream.next_in - in; 1650 } while (st == Z_OK || st == Z_BUF_ERROR); 1651 git_inflate_end(&stream); 1652 if ((st != Z_STREAM_END) || stream.total_out != size) { 1653 free(buffer); 1654 return NULL; 1655 } 1656 1657 /* versions of zlib can clobber unconsumed portion of outbuf */ 1658 buffer[size] = '\0'; 1659 1660 return buffer; 1661} 1662 1663static void write_pack_access_log(struct packed_git *p, off_t obj_offset) 1664{ 1665 static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS); 1666 trace_printf_key(&pack_access, "%s %"PRIuMAX"\n", 1667 p->pack_name, (uintmax_t)obj_offset); 1668} 1669 1670int do_check_packed_object_crc; 1671 1672#define UNPACK_ENTRY_STACK_PREALLOC 64 1673struct unpack_entry_stack_ent { 1674 off_t obj_offset; 1675 off_t curpos; 1676 unsigned long size; 1677}; 1678 1679void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, 1680 enum object_type *final_type, unsigned long *final_size) 1681{ 1682 struct pack_window *w_curs = NULL; 1683 off_t curpos = obj_offset; 1684 void *data = NULL; 1685 unsigned long size; 1686 enum object_type type; 1687 struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC]; 1688 struct unpack_entry_stack_ent *delta_stack = small_delta_stack; 1689 int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC; 1690 int base_from_cache = 0; 1691 1692 prepare_repo_settings(p->repo); 1693 1694 write_pack_access_log(p, obj_offset); 1695 1696 /* PHASE 1: drill down to the innermost base object */ 1697 for (;;) { 1698 off_t base_offset; 1699 int i; 1700 struct delta_base_cache_entry *ent; 1701 1702 ent = get_delta_base_cache_entry(p, curpos); 1703 if (ent) { 1704 type = ent->type; 1705 data = ent->data; 1706 size = ent->size; 1707 detach_delta_base_cache_entry(ent); 1708 base_from_cache = 1; 1709 break; 1710 } 1711 1712 if (do_check_packed_object_crc && p->index_version > 1) { 1713 uint32_t pack_pos, index_pos; 1714 off_t len; 1715 1716 if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) { 1717 error("could not find object at offset %"PRIuMAX" in pack %s", 1718 (uintmax_t)obj_offset, p->pack_name); 1719 data = NULL; 1720 goto out; 1721 } 1722 1723 len = pack_pos_to_offset(p, pack_pos + 1) - obj_offset; 1724 index_pos = pack_pos_to_index(p, pack_pos); 1725 if (check_pack_crc(p, &w_curs, obj_offset, len, index_pos)) { 1726 struct object_id oid; 1727 nth_packed_object_id(&oid, p, index_pos); 1728 error("bad packed object CRC for %s", 1729 oid_to_hex(&oid)); 1730 mark_bad_packed_object(p, &oid); 1731 data = NULL; 1732 goto out; 1733 } 1734 } 1735 1736 type = unpack_object_header(p, &w_curs, &curpos, &size); 1737 if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA) 1738 break; 1739 1740 base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); 1741 if (!base_offset) { 1742 error("failed to validate delta base reference " 1743 "at offset %"PRIuMAX" from %s", 1744 (uintmax_t)curpos, p->pack_name); 1745 /* bail to phase 2, in hopes of recovery */ 1746 data = NULL; 1747 break; 1748 } 1749 1750 /* push object, proceed to base */ 1751 if (delta_stack_nr >= delta_stack_alloc 1752 && delta_stack == small_delta_stack) { 1753 delta_stack_alloc = alloc_nr(delta_stack_nr); 1754 ALLOC_ARRAY(delta_stack, delta_stack_alloc); 1755 COPY_ARRAY(delta_stack, small_delta_stack, 1756 delta_stack_nr); 1757 } else { 1758 ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc); 1759 } 1760 i = delta_stack_nr++; 1761 delta_stack[i].obj_offset = obj_offset; 1762 delta_stack[i].curpos = curpos; 1763 delta_stack[i].size = size; 1764 1765 curpos = obj_offset = base_offset; 1766 } 1767 1768 /* PHASE 2: handle the base */ 1769 switch (type) { 1770 case OBJ_OFS_DELTA: 1771 case OBJ_REF_DELTA: 1772 if (data) 1773 BUG("unpack_entry: left loop at a valid delta"); 1774 break; 1775 case OBJ_COMMIT: 1776 case OBJ_TREE: 1777 case OBJ_BLOB: 1778 case OBJ_TAG: 1779 if (!base_from_cache) 1780 data = unpack_compressed_entry(p, &w_curs, curpos, size); 1781 break; 1782 default: 1783 data = NULL; 1784 error("unknown object type %i at offset %"PRIuMAX" in %s", 1785 type, (uintmax_t)obj_offset, p->pack_name); 1786 } 1787 1788 /* PHASE 3: apply deltas in order */ 1789 1790 /* invariants: 1791 * 'data' holds the base data, or NULL if there was corruption 1792 */ 1793 while (delta_stack_nr) { 1794 void *delta_data; 1795 void *base = data; 1796 void *external_base = NULL; 1797 unsigned long delta_size, base_size = size; 1798 int i; 1799 off_t base_obj_offset = obj_offset; 1800 1801 data = NULL; 1802 1803 if (!base) { 1804 /* 1805 * We're probably in deep shit, but let's try to fetch 1806 * the required base anyway from another pack or loose. 1807 * This is costly but should happen only in the presence 1808 * of a corrupted pack, and is better than failing outright. 1809 */ 1810 uint32_t pos; 1811 struct object_id base_oid; 1812 if (!(offset_to_pack_pos(p, obj_offset, &pos))) { 1813 struct object_info oi = OBJECT_INFO_INIT; 1814 1815 nth_packed_object_id(&base_oid, p, 1816 pack_pos_to_index(p, pos)); 1817 error("failed to read delta base object %s" 1818 " at offset %"PRIuMAX" from %s", 1819 oid_to_hex(&base_oid), (uintmax_t)obj_offset, 1820 p->pack_name); 1821 mark_bad_packed_object(p, &base_oid); 1822 1823 oi.typep = &type; 1824 oi.sizep = &base_size; 1825 oi.contentp = &base; 1826 if (odb_read_object_info_extended(r->objects, &base_oid, 1827 &oi, 0) < 0) 1828 base = NULL; 1829 1830 external_base = base; 1831 } 1832 } 1833 1834 i = --delta_stack_nr; 1835 obj_offset = delta_stack[i].obj_offset; 1836 curpos = delta_stack[i].curpos; 1837 delta_size = delta_stack[i].size; 1838 1839 if (!base) 1840 continue; 1841 1842 delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size); 1843 1844 if (!delta_data) { 1845 error("failed to unpack compressed delta " 1846 "at offset %"PRIuMAX" from %s", 1847 (uintmax_t)curpos, p->pack_name); 1848 data = NULL; 1849 } else { 1850 data = patch_delta(base, base_size, delta_data, 1851 delta_size, &size); 1852 1853 /* 1854 * We could not apply the delta; warn the user, but 1855 * keep going. Our failure will be noticed either in 1856 * the next iteration of the loop, or if this is the 1857 * final delta, in the caller when we return NULL. 1858 * Those code paths will take care of making a more 1859 * explicit warning and retrying with another copy of 1860 * the object. 1861 */ 1862 if (!data) 1863 error("failed to apply delta"); 1864 } 1865 1866 /* 1867 * We delay adding `base` to the cache until the end of the loop 1868 * because unpack_compressed_entry() momentarily releases the 1869 * obj_read_mutex, giving another thread the chance to access 1870 * the cache. Therefore, if `base` was already there, this other 1871 * thread could free() it (e.g. to make space for another entry) 1872 * before we are done using it. 1873 */ 1874 if (!external_base) 1875 add_delta_base_cache(p, base_obj_offset, base, base_size, 1876 p->repo->settings.delta_base_cache_limit, 1877 type); 1878 1879 free(delta_data); 1880 free(external_base); 1881 } 1882 1883 if (final_type) 1884 *final_type = type; 1885 if (final_size) 1886 *final_size = size; 1887 1888out: 1889 unuse_pack(&w_curs); 1890 1891 if (delta_stack != small_delta_stack) 1892 free(delta_stack); 1893 1894 return data; 1895} 1896 1897int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result) 1898{ 1899 const unsigned char *index_fanout = p->index_data; 1900 const unsigned char *index_lookup; 1901 const unsigned int hashsz = p->repo->hash_algo->rawsz; 1902 int index_lookup_width; 1903 1904 if (!index_fanout) 1905 BUG("bsearch_pack called without a valid pack-index"); 1906 1907 index_lookup = index_fanout + 4 * 256; 1908 if (p->index_version == 1) { 1909 index_lookup_width = hashsz + 4; 1910 index_lookup += 4; 1911 } else { 1912 index_lookup_width = hashsz; 1913 index_fanout += 8; 1914 index_lookup += 8; 1915 } 1916 1917 return bsearch_hash(oid->hash, (const uint32_t*)index_fanout, 1918 index_lookup, index_lookup_width, result); 1919} 1920 1921int nth_packed_object_id(struct object_id *oid, 1922 struct packed_git *p, 1923 uint32_t n) 1924{ 1925 const unsigned char *index = p->index_data; 1926 const unsigned int hashsz = p->repo->hash_algo->rawsz; 1927 if (!index) { 1928 if (open_pack_index(p)) 1929 return -1; 1930 index = p->index_data; 1931 } 1932 if (n >= p->num_objects) 1933 return -1; 1934 index += 4 * 256; 1935 if (p->index_version == 1) { 1936 oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4), 1937 p->repo->hash_algo); 1938 } else { 1939 index += 8; 1940 oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo); 1941 } 1942 return 0; 1943} 1944 1945void check_pack_index_ptr(const struct packed_git *p, const void *vptr) 1946{ 1947 const unsigned char *ptr = vptr; 1948 const unsigned char *start = p->index_data; 1949 const unsigned char *end = start + p->index_size; 1950 if (ptr < start) 1951 die(_("offset before start of pack index for %s (corrupt index?)"), 1952 p->pack_name); 1953 /* No need to check for underflow; .idx files must be at least 8 bytes */ 1954 if (ptr >= end - 8) 1955 die(_("offset beyond end of pack index for %s (truncated index?)"), 1956 p->pack_name); 1957} 1958 1959off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n) 1960{ 1961 const unsigned char *index = p->index_data; 1962 const unsigned int hashsz = p->repo->hash_algo->rawsz; 1963 index += 4 * 256; 1964 if (p->index_version == 1) { 1965 return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n)))); 1966 } else { 1967 uint32_t off; 1968 index += st_add(8, st_mult(p->num_objects, hashsz + 4)); 1969 off = ntohl(*((uint32_t *)(index + st_mult(4, n)))); 1970 if (!(off & 0x80000000)) 1971 return off; 1972 index += st_add(st_mult(p->num_objects, 4), 1973 st_mult(off & 0x7fffffff, 8)); 1974 check_pack_index_ptr(p, index); 1975 return get_be64(index); 1976 } 1977} 1978 1979off_t find_pack_entry_one(const struct object_id *oid, 1980 struct packed_git *p) 1981{ 1982 const unsigned char *index = p->index_data; 1983 uint32_t result; 1984 1985 if (!index) { 1986 if (open_pack_index(p)) 1987 return 0; 1988 } 1989 1990 if (bsearch_pack(oid, p, &result)) 1991 return nth_packed_object_offset(p, result); 1992 return 0; 1993} 1994 1995int is_pack_valid(struct packed_git *p) 1996{ 1997 /* An already open pack is known to be valid. */ 1998 if (p->pack_fd != -1) 1999 return 1; 2000 2001 /* If the pack has one window completely covering the 2002 * file size, the pack is known to be valid even if 2003 * the descriptor is not currently open. 2004 */ 2005 if (p->windows) { 2006 struct pack_window *w = p->windows; 2007 2008 if (!w->offset && w->len == p->pack_size) 2009 return 1; 2010 } 2011 2012 /* Force the pack to open to prove its valid. */ 2013 return !open_packed_git(p); 2014} 2015 2016struct packed_git *find_oid_pack(const struct object_id *oid, 2017 struct packed_git *packs) 2018{ 2019 struct packed_git *p; 2020 2021 for (p = packs; p; p = p->next) { 2022 if (find_pack_entry_one(oid, p)) 2023 return p; 2024 } 2025 return NULL; 2026 2027} 2028 2029static int fill_pack_entry(const struct object_id *oid, 2030 struct pack_entry *e, 2031 struct packed_git *p) 2032{ 2033 off_t offset; 2034 2035 if (oidset_size(&p->bad_objects) && 2036 oidset_contains(&p->bad_objects, oid)) 2037 return 0; 2038 2039 offset = find_pack_entry_one(oid, p); 2040 if (!offset) 2041 return 0; 2042 2043 /* 2044 * We are about to tell the caller where they can locate the 2045 * requested object. We better make sure the packfile is 2046 * still here and can be accessed before supplying that 2047 * answer, as it may have been deleted since the index was 2048 * loaded! 2049 */ 2050 if (!is_pack_valid(p)) 2051 return 0; 2052 e->offset = offset; 2053 e->p = p; 2054 return 1; 2055} 2056 2057int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) 2058{ 2059 struct list_head *pos; 2060 2061 packfile_store_prepare(r->objects->packfiles); 2062 2063 for (struct odb_source *source = r->objects->sources; source; source = source->next) 2064 if (source->midx && fill_midx_entry(source->midx, oid, e)) 2065 return 1; 2066 2067 if (!r->objects->packfiles->packs) 2068 return 0; 2069 2070 list_for_each(pos, &r->objects->packfiles->mru) { 2071 struct packed_git *p = list_entry(pos, struct packed_git, mru); 2072 if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { 2073 list_move(&p->mru, &r->objects->packfiles->mru); 2074 return 1; 2075 } 2076 } 2077 return 0; 2078} 2079 2080static void maybe_invalidate_kept_pack_cache(struct repository *r, 2081 unsigned flags) 2082{ 2083 if (!r->objects->packfiles->kept_cache.packs) 2084 return; 2085 if (r->objects->packfiles->kept_cache.flags == flags) 2086 return; 2087 FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); 2088 r->objects->packfiles->kept_cache.flags = 0; 2089} 2090 2091struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) 2092{ 2093 maybe_invalidate_kept_pack_cache(r, flags); 2094 2095 if (!r->objects->packfiles->kept_cache.packs) { 2096 struct packed_git **packs = NULL; 2097 size_t nr = 0, alloc = 0; 2098 struct packed_git *p; 2099 2100 /* 2101 * We want "all" packs here, because we need to cover ones that 2102 * are used by a midx, as well. We need to look in every one of 2103 * them (instead of the midx itself) to cover duplicates. It's 2104 * possible that an object is found in two packs that the midx 2105 * covers, one kept and one not kept, but the midx returns only 2106 * the non-kept version. 2107 */ 2108 for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { 2109 if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || 2110 (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { 2111 ALLOC_GROW(packs, nr + 1, alloc); 2112 packs[nr++] = p; 2113 } 2114 } 2115 ALLOC_GROW(packs, nr + 1, alloc); 2116 packs[nr] = NULL; 2117 2118 r->objects->packfiles->kept_cache.packs = packs; 2119 r->objects->packfiles->kept_cache.flags = flags; 2120 } 2121 2122 return r->objects->packfiles->kept_cache.packs; 2123} 2124 2125int find_kept_pack_entry(struct repository *r, 2126 const struct object_id *oid, 2127 unsigned flags, 2128 struct pack_entry *e) 2129{ 2130 struct packed_git **cache; 2131 2132 for (cache = kept_pack_cache(r, flags); *cache; cache++) { 2133 struct packed_git *p = *cache; 2134 if (fill_pack_entry(oid, e, p)) 2135 return 1; 2136 } 2137 2138 return 0; 2139} 2140 2141int has_object_pack(struct repository *r, const struct object_id *oid) 2142{ 2143 struct pack_entry e; 2144 return find_pack_entry(r, oid, &e); 2145} 2146 2147int has_object_kept_pack(struct repository *r, const struct object_id *oid, 2148 unsigned flags) 2149{ 2150 struct pack_entry e; 2151 return find_kept_pack_entry(r, oid, flags, &e); 2152} 2153 2154int for_each_object_in_pack(struct packed_git *p, 2155 each_packed_object_fn cb, void *data, 2156 enum for_each_object_flags flags) 2157{ 2158 uint32_t i; 2159 int r = 0; 2160 2161 if (flags & FOR_EACH_OBJECT_PACK_ORDER) { 2162 if (load_pack_revindex(p->repo, p)) 2163 return -1; 2164 } 2165 2166 for (i = 0; i < p->num_objects; i++) { 2167 uint32_t index_pos; 2168 struct object_id oid; 2169 2170 /* 2171 * We are iterating "i" from 0 up to num_objects, but its 2172 * meaning may be different, depending on the requested output 2173 * order: 2174 * 2175 * - in object-name order, it is the same as the index order 2176 * used by nth_packed_object_id(), so we can pass it 2177 * directly 2178 * 2179 * - in pack-order, it is pack position, which we must 2180 * convert to an index position in order to get the oid. 2181 */ 2182 if (flags & FOR_EACH_OBJECT_PACK_ORDER) 2183 index_pos = pack_pos_to_index(p, i); 2184 else 2185 index_pos = i; 2186 2187 if (nth_packed_object_id(&oid, p, index_pos) < 0) 2188 return error("unable to get sha1 of object %u in %s", 2189 index_pos, p->pack_name); 2190 2191 r = cb(&oid, p, index_pos, data); 2192 if (r) 2193 break; 2194 } 2195 return r; 2196} 2197 2198int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, 2199 void *data, enum for_each_object_flags flags) 2200{ 2201 struct packed_git *p; 2202 int r = 0; 2203 int pack_errors = 0; 2204 2205 for (p = packfile_store_get_all_packs(repo->objects->packfiles); p; p = p->next) { 2206 if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) 2207 continue; 2208 if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && 2209 !p->pack_promisor) 2210 continue; 2211 if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && 2212 p->pack_keep_in_core) 2213 continue; 2214 if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && 2215 p->pack_keep) 2216 continue; 2217 if (open_pack_index(p)) { 2218 pack_errors = 1; 2219 continue; 2220 } 2221 r = for_each_object_in_pack(p, cb, data, flags); 2222 if (r) 2223 break; 2224 } 2225 return r ? r : pack_errors; 2226} 2227 2228static int add_promisor_object(const struct object_id *oid, 2229 struct packed_git *pack, 2230 uint32_t pos UNUSED, 2231 void *set_) 2232{ 2233 struct oidset *set = set_; 2234 struct object *obj; 2235 int we_parsed_object; 2236 2237 obj = lookup_object(pack->repo, oid); 2238 if (obj && obj->parsed) { 2239 we_parsed_object = 0; 2240 } else { 2241 we_parsed_object = 1; 2242 obj = parse_object(pack->repo, oid); 2243 } 2244 2245 if (!obj) 2246 return 1; 2247 2248 oidset_insert(set, oid); 2249 2250 /* 2251 * If this is a tree, commit, or tag, the objects it refers 2252 * to are also promisor objects. (Blobs refer to no objects->) 2253 */ 2254 if (obj->type == OBJ_TREE) { 2255 struct tree *tree = (struct tree *)obj; 2256 struct tree_desc desc; 2257 struct name_entry entry; 2258 if (init_tree_desc_gently(&desc, &tree->object.oid, 2259 tree->buffer, tree->size, 0)) 2260 /* 2261 * Error messages are given when packs are 2262 * verified, so do not print any here. 2263 */ 2264 return 0; 2265 while (tree_entry_gently(&desc, &entry)) 2266 oidset_insert(set, &entry.oid); 2267 if (we_parsed_object) 2268 free_tree_buffer(tree); 2269 } else if (obj->type == OBJ_COMMIT) { 2270 struct commit *commit = (struct commit *) obj; 2271 struct commit_list *parents = commit->parents; 2272 2273 oidset_insert(set, get_commit_tree_oid(commit)); 2274 for (; parents; parents = parents->next) 2275 oidset_insert(set, &parents->item->object.oid); 2276 } else if (obj->type == OBJ_TAG) { 2277 struct tag *tag = (struct tag *) obj; 2278 oidset_insert(set, get_tagged_oid(tag)); 2279 } 2280 return 0; 2281} 2282 2283int is_promisor_object(struct repository *r, const struct object_id *oid) 2284{ 2285 static struct oidset promisor_objects; 2286 static int promisor_objects_prepared; 2287 2288 if (!promisor_objects_prepared) { 2289 if (repo_has_promisor_remote(r)) { 2290 for_each_packed_object(r, add_promisor_object, 2291 &promisor_objects, 2292 FOR_EACH_OBJECT_PROMISOR_ONLY | 2293 FOR_EACH_OBJECT_PACK_ORDER); 2294 } 2295 promisor_objects_prepared = 1; 2296 } 2297 return oidset_contains(&promisor_objects, oid); 2298} 2299 2300int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *len) 2301{ 2302 unsigned char *hdr; 2303 char *c; 2304 2305 hdr = out; 2306 put_be32(hdr, PACK_SIGNATURE); 2307 hdr += 4; 2308 put_be32(hdr, strtoul(in, &c, 10)); 2309 hdr += 4; 2310 if (*c != ',') 2311 return -1; 2312 put_be32(hdr, strtoul(c + 1, &c, 10)); 2313 hdr += 4; 2314 if (*c) 2315 return -1; 2316 *len = hdr - out; 2317 return 0; 2318} 2319 2320static int pack_map_entry_cmp(const void *cmp_data UNUSED, 2321 const struct hashmap_entry *entry, 2322 const struct hashmap_entry *entry2, 2323 const void *keydata) 2324{ 2325 const char *key = keydata; 2326 const struct packed_git *pg1, *pg2; 2327 2328 pg1 = container_of(entry, const struct packed_git, packmap_ent); 2329 pg2 = container_of(entry2, const struct packed_git, packmap_ent); 2330 2331 return strcmp(pg1->pack_name, key ? key : pg2->pack_name); 2332} 2333 2334struct packfile_store *packfile_store_new(struct object_database *odb) 2335{ 2336 struct packfile_store *store; 2337 CALLOC_ARRAY(store, 1); 2338 store->odb = odb; 2339 INIT_LIST_HEAD(&store->mru); 2340 hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); 2341 return store; 2342} 2343 2344void packfile_store_free(struct packfile_store *store) 2345{ 2346 for (struct packed_git *p = store->packs, *next; p; p = next) { 2347 next = p->next; 2348 free(p); 2349 } 2350 hashmap_clear(&store->map); 2351 free(store); 2352} 2353 2354void packfile_store_close(struct packfile_store *store) 2355{ 2356 for (struct packed_git *p = store->packs; p; p = p->next) { 2357 if (p->do_not_close) 2358 BUG("want to close pack marked 'do-not-close'"); 2359 close_pack(p); 2360 } 2361}