Git fork
at reftables-rust 693 lines 17 kB view raw
1#define USE_THE_REPOSITORY_VARIABLE 2#define DISABLE_SIGN_COMPARE_WARNINGS 3 4#include "builtin.h" 5#include "config.h" 6#include "environment.h" 7#include "gettext.h" 8#include "git-zlib.h" 9#include "hex.h" 10#include "object-file.h" 11#include "odb.h" 12#include "object.h" 13#include "delta.h" 14#include "pack.h" 15#include "blob.h" 16#include "replace-object.h" 17#include "strbuf.h" 18#include "progress.h" 19#include "decorate.h" 20#include "fsck.h" 21#include "packfile.h" 22 23static int dry_run, quiet, recover, has_errors, strict; 24static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict]"; 25 26/* We always read in 4kB chunks. */ 27static unsigned char buffer[4096]; 28static unsigned int offset, len; 29static off_t consumed_bytes; 30static off_t max_input_size; 31static struct git_hash_ctx ctx; 32static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT; 33static struct progress *progress; 34 35/* 36 * When running under --strict mode, objects whose reachability are 37 * suspect are kept in core without getting written in the object 38 * store. 39 */ 40struct obj_buffer { 41 char *buffer; 42 unsigned long size; 43}; 44 45static struct decoration obj_decorate; 46 47static struct obj_buffer *lookup_object_buffer(struct object *base) 48{ 49 return lookup_decoration(&obj_decorate, base); 50} 51 52static void add_object_buffer(struct object *object, char *buffer, unsigned long size) 53{ 54 struct obj_buffer *obj; 55 CALLOC_ARRAY(obj, 1); 56 obj->buffer = buffer; 57 obj->size = size; 58 if (add_decoration(&obj_decorate, object, obj)) 59 die("object %s tried to add buffer twice!", oid_to_hex(&object->oid)); 60} 61 62/* 63 * Make sure at least "min" bytes are available in the buffer, and 64 * return the pointer to the buffer. 65 */ 66static void *fill(int min) 67{ 68 if (min <= len) 69 return buffer + offset; 70 if (min > sizeof(buffer)) 71 die("cannot fill %d bytes", min); 72 if (offset) { 73 git_hash_update(&ctx, buffer, offset); 74 memmove(buffer, buffer + offset, len); 75 offset = 0; 76 } 77 do { 78 ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len); 79 if (ret <= 0) { 80 if (!ret) 81 die("early EOF"); 82 die_errno("read error on input"); 83 } 84 len += ret; 85 } while (len < min); 86 return buffer; 87} 88 89static void use(int bytes) 90{ 91 if (bytes > len) 92 die("used more bytes than were available"); 93 len -= bytes; 94 offset += bytes; 95 96 /* make sure off_t is sufficiently large not to wrap */ 97 if (signed_add_overflows(consumed_bytes, bytes)) 98 die("pack too large for current definition of off_t"); 99 consumed_bytes += bytes; 100 if (max_input_size && consumed_bytes > max_input_size) 101 die(_("pack exceeds maximum allowed size")); 102 display_throughput(progress, consumed_bytes); 103} 104 105/* 106 * Decompress zstream from the standard input into a newly 107 * allocated buffer of specified size and return the buffer. 108 * The caller is responsible to free the returned buffer. 109 * 110 * But for dry_run mode, "get_data()" is only used to check the 111 * integrity of data, and the returned buffer is not used at all. 112 * Therefore, in dry_run mode, "get_data()" will release the small 113 * allocated buffer which is reused to hold temporary zstream output 114 * and return NULL instead of returning garbage data. 115 */ 116static void *get_data(unsigned long size) 117{ 118 git_zstream stream; 119 unsigned long bufsize = dry_run && size > 8192 ? 8192 : size; 120 void *buf = xmallocz(bufsize); 121 122 memset(&stream, 0, sizeof(stream)); 123 124 stream.next_out = buf; 125 stream.avail_out = bufsize; 126 stream.next_in = fill(1); 127 stream.avail_in = len; 128 git_inflate_init(&stream); 129 130 for (;;) { 131 int ret = git_inflate(&stream, 0); 132 use(len - stream.avail_in); 133 if (stream.total_out == size && ret == Z_STREAM_END) 134 break; 135 if (ret != Z_OK) { 136 error("inflate returned %d", ret); 137 FREE_AND_NULL(buf); 138 if (!recover) 139 exit(1); 140 has_errors = 1; 141 break; 142 } 143 stream.next_in = fill(1); 144 stream.avail_in = len; 145 if (dry_run) { 146 /* reuse the buffer in dry_run mode */ 147 stream.next_out = buf; 148 stream.avail_out = bufsize > size - stream.total_out ? 149 size - stream.total_out : 150 bufsize; 151 } 152 } 153 git_inflate_end(&stream); 154 if (dry_run) 155 FREE_AND_NULL(buf); 156 return buf; 157} 158 159struct delta_info { 160 struct object_id base_oid; 161 unsigned nr; 162 off_t base_offset; 163 unsigned long size; 164 void *delta; 165 struct delta_info *next; 166}; 167 168static struct delta_info *delta_list; 169 170static void add_delta_to_list(unsigned nr, const struct object_id *base_oid, 171 off_t base_offset, 172 void *delta, unsigned long size) 173{ 174 struct delta_info *info = xmalloc(sizeof(*info)); 175 176 oidcpy(&info->base_oid, base_oid); 177 info->base_offset = base_offset; 178 info->size = size; 179 info->delta = delta; 180 info->nr = nr; 181 info->next = delta_list; 182 delta_list = info; 183} 184 185struct obj_info { 186 off_t offset; 187 struct object_id oid; 188 struct object *obj; 189}; 190 191/* Remember to update object flag allocation in object.h */ 192#define FLAG_OPEN (1u<<20) 193#define FLAG_WRITTEN (1u<<21) 194 195static struct obj_info *obj_list; 196static unsigned nr_objects; 197 198/* 199 * Called only from check_object() after it verified this object 200 * is Ok. 201 */ 202static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf) 203{ 204 struct object_id oid; 205 206 if (odb_write_object(the_repository->objects, obj_buf->buffer, obj_buf->size, 207 obj->type, &oid) < 0) 208 die("failed to write object %s", oid_to_hex(&obj->oid)); 209 obj->flags |= FLAG_WRITTEN; 210} 211 212/* 213 * At the very end of the processing, write_rest() scans the objects 214 * that have reachability requirements and calls this function. 215 * Verify its reachability and validity recursively and write it out. 216 */ 217static int check_object(struct object *obj, enum object_type type, 218 void *data UNUSED, 219 struct fsck_options *options UNUSED) 220{ 221 struct obj_buffer *obj_buf; 222 223 if (!obj) 224 return 1; 225 226 if (obj->flags & FLAG_WRITTEN) 227 return 0; 228 229 if (type != OBJ_ANY && obj->type != type) 230 die("object type mismatch"); 231 232 if (!(obj->flags & FLAG_OPEN)) { 233 unsigned long size; 234 int type = odb_read_object_info(the_repository->objects, &obj->oid, &size); 235 if (type != obj->type || type <= 0) 236 die("object of unexpected type"); 237 obj->flags |= FLAG_WRITTEN; 238 return 0; 239 } 240 241 obj_buf = lookup_object_buffer(obj); 242 if (!obj_buf) 243 die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid)); 244 if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options)) 245 die("fsck error in packed object"); 246 fsck_options.walk = check_object; 247 if (fsck_walk(obj, NULL, &fsck_options)) 248 die("Error on reachable objects of %s", oid_to_hex(&obj->oid)); 249 write_cached_object(obj, obj_buf); 250 return 0; 251} 252 253static void write_rest(void) 254{ 255 unsigned i; 256 for (i = 0; i < nr_objects; i++) { 257 if (obj_list[i].obj) 258 check_object(obj_list[i].obj, OBJ_ANY, NULL, NULL); 259 } 260} 261 262static void added_object(unsigned nr, enum object_type type, 263 void *data, unsigned long size); 264 265/* 266 * Write out nr-th object from the list, now we know the contents 267 * of it. Under --strict, this buffers structured objects in-core, 268 * to be checked at the end. 269 */ 270static void write_object(unsigned nr, enum object_type type, 271 void *buf, unsigned long size) 272{ 273 if (!strict) { 274 if (odb_write_object(the_repository->objects, buf, size, type, 275 &obj_list[nr].oid) < 0) 276 die("failed to write object"); 277 added_object(nr, type, buf, size); 278 free(buf); 279 obj_list[nr].obj = NULL; 280 } else if (type == OBJ_BLOB) { 281 struct blob *blob; 282 if (odb_write_object(the_repository->objects, buf, size, type, 283 &obj_list[nr].oid) < 0) 284 die("failed to write object"); 285 added_object(nr, type, buf, size); 286 free(buf); 287 288 blob = lookup_blob(the_repository, &obj_list[nr].oid); 289 if (blob) 290 blob->object.flags |= FLAG_WRITTEN; 291 else 292 die("invalid blob object"); 293 obj_list[nr].obj = NULL; 294 } else { 295 struct object *obj; 296 int eaten; 297 hash_object_file(the_hash_algo, buf, size, type, 298 &obj_list[nr].oid); 299 added_object(nr, type, buf, size); 300 obj = parse_object_buffer(the_repository, &obj_list[nr].oid, 301 type, size, buf, 302 &eaten); 303 if (!obj) 304 die("invalid %s", type_name(type)); 305 add_object_buffer(obj, buf, size); 306 obj->flags |= FLAG_OPEN; 307 obj_list[nr].obj = obj; 308 } 309} 310 311static void resolve_delta(unsigned nr, enum object_type type, 312 void *base, unsigned long base_size, 313 void *delta, unsigned long delta_size) 314{ 315 void *result; 316 unsigned long result_size; 317 318 result = patch_delta(base, base_size, 319 delta, delta_size, 320 &result_size); 321 if (!result) 322 die("failed to apply delta"); 323 free(delta); 324 write_object(nr, type, result, result_size); 325} 326 327/* 328 * We now know the contents of an object (which is nr-th in the pack); 329 * resolve all the deltified objects that are based on it. 330 */ 331static void added_object(unsigned nr, enum object_type type, 332 void *data, unsigned long size) 333{ 334 struct delta_info **p = &delta_list; 335 struct delta_info *info; 336 337 while ((info = *p) != NULL) { 338 if (oideq(&info->base_oid, &obj_list[nr].oid) || 339 info->base_offset == obj_list[nr].offset) { 340 *p = info->next; 341 p = &delta_list; 342 resolve_delta(info->nr, type, data, size, 343 info->delta, info->size); 344 free(info); 345 continue; 346 } 347 p = &info->next; 348 } 349} 350 351static void unpack_non_delta_entry(enum object_type type, unsigned long size, 352 unsigned nr) 353{ 354 void *buf = get_data(size); 355 356 if (buf) 357 write_object(nr, type, buf, size); 358} 359 360struct input_zstream_data { 361 git_zstream *zstream; 362 unsigned char buf[8192]; 363 int status; 364}; 365 366static const void *feed_input_zstream(struct input_stream *in_stream, 367 unsigned long *readlen) 368{ 369 struct input_zstream_data *data = in_stream->data; 370 git_zstream *zstream = data->zstream; 371 void *in = fill(1); 372 373 if (in_stream->is_finished) { 374 *readlen = 0; 375 return NULL; 376 } 377 378 zstream->next_out = data->buf; 379 zstream->avail_out = sizeof(data->buf); 380 zstream->next_in = in; 381 zstream->avail_in = len; 382 383 data->status = git_inflate(zstream, 0); 384 385 in_stream->is_finished = data->status != Z_OK; 386 use(len - zstream->avail_in); 387 *readlen = sizeof(data->buf) - zstream->avail_out; 388 389 return data->buf; 390} 391 392static void stream_blob(unsigned long size, unsigned nr) 393{ 394 git_zstream zstream = { 0 }; 395 struct input_zstream_data data = { 0 }; 396 struct input_stream in_stream = { 397 .read = feed_input_zstream, 398 .data = &data, 399 }; 400 struct obj_info *info = &obj_list[nr]; 401 402 data.zstream = &zstream; 403 git_inflate_init(&zstream); 404 405 if (stream_loose_object(the_repository->objects->sources, 406 &in_stream, size, &info->oid)) 407 die(_("failed to write object in stream")); 408 409 if (data.status != Z_STREAM_END) 410 die(_("inflate returned (%d)"), data.status); 411 git_inflate_end(&zstream); 412 413 if (strict) { 414 struct blob *blob = lookup_blob(the_repository, &info->oid); 415 416 if (!blob) 417 die(_("invalid blob object from stream")); 418 blob->object.flags |= FLAG_WRITTEN; 419 } 420 info->obj = NULL; 421} 422 423static int resolve_against_held(unsigned nr, const struct object_id *base, 424 void *delta_data, unsigned long delta_size) 425{ 426 struct object *obj; 427 struct obj_buffer *obj_buffer; 428 obj = lookup_object(the_repository, base); 429 if (!obj) 430 return 0; 431 obj_buffer = lookup_object_buffer(obj); 432 if (!obj_buffer) 433 return 0; 434 resolve_delta(nr, obj->type, obj_buffer->buffer, 435 obj_buffer->size, delta_data, delta_size); 436 return 1; 437} 438 439static void unpack_delta_entry(enum object_type type, unsigned long delta_size, 440 unsigned nr) 441{ 442 void *delta_data, *base; 443 unsigned long base_size; 444 struct object_id base_oid; 445 446 if (type == OBJ_REF_DELTA) { 447 oidread(&base_oid, fill(the_hash_algo->rawsz), the_repository->hash_algo); 448 use(the_hash_algo->rawsz); 449 delta_data = get_data(delta_size); 450 if (!delta_data) 451 return; 452 if (odb_has_object(the_repository->objects, &base_oid, 453 HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) 454 ; /* Ok we have this one */ 455 else if (resolve_against_held(nr, &base_oid, 456 delta_data, delta_size)) 457 return; /* we are done */ 458 else { 459 /* cannot resolve yet --- queue it */ 460 oidclr(&obj_list[nr].oid, the_repository->hash_algo); 461 add_delta_to_list(nr, &base_oid, 0, delta_data, delta_size); 462 return; 463 } 464 } else { 465 unsigned base_found = 0; 466 unsigned char *pack, c; 467 off_t base_offset; 468 unsigned lo, mid, hi; 469 470 pack = fill(1); 471 c = *pack; 472 use(1); 473 base_offset = c & 127; 474 while (c & 128) { 475 base_offset += 1; 476 if (!base_offset || MSB(base_offset, 7)) 477 die("offset value overflow for delta base object"); 478 pack = fill(1); 479 c = *pack; 480 use(1); 481 base_offset = (base_offset << 7) + (c & 127); 482 } 483 base_offset = obj_list[nr].offset - base_offset; 484 if (base_offset <= 0 || base_offset >= obj_list[nr].offset) 485 die("offset value out of bound for delta base object"); 486 487 delta_data = get_data(delta_size); 488 if (!delta_data) 489 return; 490 lo = 0; 491 hi = nr; 492 while (lo < hi) { 493 mid = lo + (hi - lo) / 2; 494 if (base_offset < obj_list[mid].offset) { 495 hi = mid; 496 } else if (base_offset > obj_list[mid].offset) { 497 lo = mid + 1; 498 } else { 499 oidcpy(&base_oid, &obj_list[mid].oid); 500 base_found = !is_null_oid(&base_oid); 501 break; 502 } 503 } 504 if (!base_found) { 505 /* 506 * The delta base object is itself a delta that 507 * has not been resolved yet. 508 */ 509 oidclr(&obj_list[nr].oid, the_repository->hash_algo); 510 add_delta_to_list(nr, null_oid(the_hash_algo), base_offset, 511 delta_data, delta_size); 512 return; 513 } 514 } 515 516 if (resolve_against_held(nr, &base_oid, delta_data, delta_size)) 517 return; 518 519 base = odb_read_object(the_repository->objects, &base_oid, 520 &type, &base_size); 521 if (!base) { 522 error("failed to read delta-pack base object %s", 523 oid_to_hex(&base_oid)); 524 if (!recover) 525 exit(1); 526 has_errors = 1; 527 return; 528 } 529 resolve_delta(nr, type, base, base_size, delta_data, delta_size); 530 free(base); 531} 532 533static void unpack_one(unsigned nr) 534{ 535 unsigned shift; 536 unsigned char *pack; 537 unsigned long size, c; 538 enum object_type type; 539 540 obj_list[nr].offset = consumed_bytes; 541 542 pack = fill(1); 543 c = *pack; 544 use(1); 545 type = (c >> 4) & 7; 546 size = (c & 15); 547 shift = 4; 548 while (c & 0x80) { 549 pack = fill(1); 550 c = *pack; 551 use(1); 552 size += (c & 0x7f) << shift; 553 shift += 7; 554 } 555 556 switch (type) { 557 case OBJ_BLOB: 558 if (!dry_run && 559 size > repo_settings_get_big_file_threshold(the_repository)) { 560 stream_blob(size, nr); 561 return; 562 } 563 /* fallthrough */ 564 case OBJ_COMMIT: 565 case OBJ_TREE: 566 case OBJ_TAG: 567 unpack_non_delta_entry(type, size, nr); 568 return; 569 case OBJ_REF_DELTA: 570 case OBJ_OFS_DELTA: 571 unpack_delta_entry(type, size, nr); 572 return; 573 default: 574 error("bad object type %d", type); 575 has_errors = 1; 576 if (recover) 577 return; 578 exit(1); 579 } 580} 581 582static void unpack_all(void) 583{ 584 int i; 585 unsigned char *hdr = fill(sizeof(struct pack_header)); 586 struct odb_transaction *transaction; 587 588 if (get_be32(hdr) != PACK_SIGNATURE) 589 die("bad pack file"); 590 hdr += 4; 591 if (!pack_version_ok_native(get_be32(hdr))) 592 die("unknown pack file version %"PRIu32, 593 get_be32(hdr)); 594 hdr += 4; 595 nr_objects = get_be32(hdr); 596 use(sizeof(struct pack_header)); 597 598 if (!quiet) 599 progress = start_progress(the_repository, 600 _("Unpacking objects"), nr_objects); 601 CALLOC_ARRAY(obj_list, nr_objects); 602 transaction = odb_transaction_begin(the_repository->objects); 603 for (i = 0; i < nr_objects; i++) { 604 unpack_one(i); 605 display_progress(progress, i + 1); 606 } 607 odb_transaction_commit(transaction); 608 stop_progress(&progress); 609 610 if (delta_list) 611 die("unresolved deltas left after unpacking"); 612} 613 614int cmd_unpack_objects(int argc, 615 const char **argv, 616 const char *prefix UNUSED, 617 struct repository *repo UNUSED) 618{ 619 int i; 620 struct object_id oid; 621 struct git_hash_ctx tmp_ctx; 622 623 disable_replace_refs(); 624 625 repo_config(the_repository, git_default_config, NULL); 626 627 quiet = !isatty(2); 628 629 show_usage_if_asked(argc, argv, unpack_usage); 630 631 for (i = 1 ; i < argc; i++) { 632 const char *arg = argv[i]; 633 634 if (*arg == '-') { 635 if (!strcmp(arg, "-n")) { 636 dry_run = 1; 637 continue; 638 } 639 if (!strcmp(arg, "-q")) { 640 quiet = 1; 641 continue; 642 } 643 if (!strcmp(arg, "-r")) { 644 recover = 1; 645 continue; 646 } 647 if (!strcmp(arg, "--strict")) { 648 strict = 1; 649 continue; 650 } 651 if (skip_prefix(arg, "--strict=", &arg)) { 652 strict = 1; 653 fsck_set_msg_types(&fsck_options, arg); 654 continue; 655 } 656 if (skip_prefix(arg, "--pack_header=", &arg)) { 657 if (parse_pack_header_option(arg, 658 buffer, &len) < 0) 659 die(_("bad --pack_header: %s"), arg); 660 continue; 661 } 662 if (skip_prefix(arg, "--max-input-size=", &arg)) { 663 max_input_size = strtoumax(arg, NULL, 10); 664 continue; 665 } 666 usage(unpack_usage); 667 } 668 669 /* We don't take any non-flag arguments now.. Maybe some day */ 670 usage(unpack_usage); 671 } 672 the_hash_algo->init_fn(&ctx); 673 unpack_all(); 674 git_hash_update(&ctx, buffer, offset); 675 the_hash_algo->init_fn(&tmp_ctx); 676 git_hash_clone(&tmp_ctx, &ctx); 677 git_hash_final_oid(&oid, &tmp_ctx); 678 if (strict) { 679 write_rest(); 680 if (fsck_finish(&fsck_options)) 681 die(_("fsck error in pack objects")); 682 } 683 if (!hasheq(fill(the_hash_algo->rawsz), oid.hash, 684 the_repository->hash_algo)) 685 die("final sha1 did not match"); 686 use(the_hash_algo->rawsz); 687 688 /* Write the last part of the buffer to stdout */ 689 write_in_full(1, buffer + offset, len); 690 691 /* All done */ 692 return has_errors; 693}