Git fork
1#define USE_THE_REPOSITORY_VARIABLE
2#define DISABLE_SIGN_COMPARE_WARNINGS
3
4#include "builtin.h"
5#include "environment.h"
6#include "gettext.h"
7#include "hex.h"
8#include "config.h"
9#include "attr.h"
10#include "object.h"
11#include "commit.h"
12#include "tag.h"
13#include "delta.h"
14#include "pack.h"
15#include "pack-revindex.h"
16#include "csum-file.h"
17#include "tree-walk.h"
18#include "diff.h"
19#include "revision.h"
20#include "list-objects.h"
21#include "list-objects-filter-options.h"
22#include "pack-objects.h"
23#include "progress.h"
24#include "refs.h"
25#include "streaming.h"
26#include "thread-utils.h"
27#include "pack-bitmap.h"
28#include "delta-islands.h"
29#include "reachable.h"
30#include "oid-array.h"
31#include "strvec.h"
32#include "list.h"
33#include "packfile.h"
34#include "object-file.h"
35#include "odb.h"
36#include "replace-object.h"
37#include "dir.h"
38#include "midx.h"
39#include "trace2.h"
40#include "shallow.h"
41#include "promisor-remote.h"
42#include "pack-mtimes.h"
43#include "parse-options.h"
44#include "blob.h"
45#include "tree.h"
46#include "path-walk.h"
47#include "trace2.h"
48
49/*
50 * Objects we are going to pack are collected in the `to_pack` structure.
51 * It contains an array (dynamically expanded) of the object data, and a map
52 * that can resolve SHA1s to their position in the array.
53 */
54static struct packing_data to_pack;
55
56static inline struct object_entry *oe_delta(
57 const struct packing_data *pack,
58 const struct object_entry *e)
59{
60 if (!e->delta_idx)
61 return NULL;
62 if (e->ext_base)
63 return &pack->ext_bases[e->delta_idx - 1];
64 else
65 return &pack->objects[e->delta_idx - 1];
66}
67
68static inline unsigned long oe_delta_size(struct packing_data *pack,
69 const struct object_entry *e)
70{
71 if (e->delta_size_valid)
72 return e->delta_size_;
73
74 /*
75 * pack->delta_size[] can't be NULL because oe_set_delta_size()
76 * must have been called when a new delta is saved with
77 * oe_set_delta().
78 * If oe_delta() returns NULL (i.e. default state, which means
79 * delta_size_valid is also false), then the caller must never
80 * call oe_delta_size().
81 */
82 return pack->delta_size[e - pack->objects];
83}
84
85unsigned long oe_get_size_slow(struct packing_data *pack,
86 const struct object_entry *e);
87
88static inline unsigned long oe_size(struct packing_data *pack,
89 const struct object_entry *e)
90{
91 if (e->size_valid)
92 return e->size_;
93
94 return oe_get_size_slow(pack, e);
95}
96
97static inline void oe_set_delta(struct packing_data *pack,
98 struct object_entry *e,
99 struct object_entry *delta)
100{
101 if (delta)
102 e->delta_idx = (delta - pack->objects) + 1;
103 else
104 e->delta_idx = 0;
105}
106
107static inline struct object_entry *oe_delta_sibling(
108 const struct packing_data *pack,
109 const struct object_entry *e)
110{
111 if (e->delta_sibling_idx)
112 return &pack->objects[e->delta_sibling_idx - 1];
113 return NULL;
114}
115
116static inline struct object_entry *oe_delta_child(
117 const struct packing_data *pack,
118 const struct object_entry *e)
119{
120 if (e->delta_child_idx)
121 return &pack->objects[e->delta_child_idx - 1];
122 return NULL;
123}
124
125static inline void oe_set_delta_child(struct packing_data *pack,
126 struct object_entry *e,
127 struct object_entry *delta)
128{
129 if (delta)
130 e->delta_child_idx = (delta - pack->objects) + 1;
131 else
132 e->delta_child_idx = 0;
133}
134
135static inline void oe_set_delta_sibling(struct packing_data *pack,
136 struct object_entry *e,
137 struct object_entry *delta)
138{
139 if (delta)
140 e->delta_sibling_idx = (delta - pack->objects) + 1;
141 else
142 e->delta_sibling_idx = 0;
143}
144
145static inline void oe_set_size(struct packing_data *pack,
146 struct object_entry *e,
147 unsigned long size)
148{
149 if (size < pack->oe_size_limit) {
150 e->size_ = size;
151 e->size_valid = 1;
152 } else {
153 e->size_valid = 0;
154 if (oe_get_size_slow(pack, e) != size)
155 BUG("'size' is supposed to be the object size!");
156 }
157}
158
159static inline void oe_set_delta_size(struct packing_data *pack,
160 struct object_entry *e,
161 unsigned long size)
162{
163 if (size < pack->oe_delta_size_limit) {
164 e->delta_size_ = size;
165 e->delta_size_valid = 1;
166 } else {
167 packing_data_lock(pack);
168 if (!pack->delta_size)
169 ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
170 packing_data_unlock(pack);
171
172 pack->delta_size[e - pack->objects] = size;
173 e->delta_size_valid = 0;
174 }
175}
176
177#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
178#define SIZE(obj) oe_size(&to_pack, obj)
179#define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
180#define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
181#define DELTA(obj) oe_delta(&to_pack, obj)
182#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
183#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
184#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
185#define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
186#define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
187#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
188#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
189
190static const char *const pack_usage[] = {
191 N_("git pack-objects [-q | --progress | --all-progress] [--all-progress-implied]\n"
192 " [--no-reuse-delta] [--delta-base-offset] [--non-empty]\n"
193 " [--local] [--incremental] [--window=<n>] [--depth=<n>]\n"
194 " [--revs [--unpacked | --all]] [--keep-pack=<pack-name>]\n"
195 " [--cruft] [--cruft-expiration=<time>]\n"
196 " [--stdout [--filter=<filter-spec>] | <base-name>]\n"
197 " [--shallow] [--keep-true-parents] [--[no-]sparse]\n"
198 " [--name-hash-version=<n>] [--path-walk] < <object-list>"),
199 NULL
200};
201
202static struct pack_idx_entry **written_list;
203static uint32_t nr_result, nr_written, nr_seen;
204static struct bitmap_index *bitmap_git;
205static uint32_t write_layer;
206
207static int non_empty;
208static int reuse_delta = 1, reuse_object = 1;
209static int keep_unreachable, unpack_unreachable, include_tag;
210static timestamp_t unpack_unreachable_expiration;
211static int pack_loose_unreachable;
212static int cruft;
213static int shallow = 0;
214static timestamp_t cruft_expiration;
215static int local;
216static int have_non_local_packs;
217static int incremental;
218static int ignore_packed_keep_on_disk;
219static int ignore_packed_keep_in_core;
220static int ignore_packed_keep_in_core_has_cruft;
221static int allow_ofs_delta;
222static struct pack_idx_option pack_idx_opts;
223static const char *base_name;
224static int progress = 1;
225static int window = 10;
226static unsigned long pack_size_limit;
227static int depth = 50;
228static int delta_search_threads;
229static int pack_to_stdout;
230static int sparse;
231static int thin;
232static int path_walk = -1;
233static int num_preferred_base;
234static struct progress *progress_state;
235
236static struct bitmapped_pack *reuse_packfiles;
237static size_t reuse_packfiles_nr;
238static size_t reuse_packfiles_used_nr;
239static uint32_t reuse_packfile_objects;
240static struct bitmap *reuse_packfile_bitmap;
241
242static int use_bitmap_index_default = 1;
243static int use_bitmap_index = -1;
244static enum {
245 NO_PACK_REUSE = 0,
246 SINGLE_PACK_REUSE,
247 MULTI_PACK_REUSE,
248} allow_pack_reuse = SINGLE_PACK_REUSE;
249static enum {
250 WRITE_BITMAP_FALSE = 0,
251 WRITE_BITMAP_QUIET,
252 WRITE_BITMAP_TRUE,
253} write_bitmap_index;
254static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE;
255
256static int exclude_promisor_objects;
257static int exclude_promisor_objects_best_effort;
258
259static int use_delta_islands;
260
261static unsigned long delta_cache_size = 0;
262static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
263static unsigned long cache_max_small_delta_size = 1000;
264
265static unsigned long window_memory_limit = 0;
266
267static struct string_list uri_protocols = STRING_LIST_INIT_NODUP;
268
269enum missing_action {
270 MA_ERROR = 0, /* fail if any missing objects are encountered */
271 MA_ALLOW_ANY, /* silently allow ALL missing objects */
272 MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
273};
274static enum missing_action arg_missing_action;
275static show_object_fn fn_show_object;
276
277struct configured_exclusion {
278 struct oidmap_entry e;
279 char *pack_hash_hex;
280 char *uri;
281};
282static struct oidmap configured_exclusions;
283
284static struct oidset excluded_by_config;
285static int name_hash_version = -1;
286
287enum stdin_packs_mode {
288 STDIN_PACKS_MODE_NONE,
289 STDIN_PACKS_MODE_STANDARD,
290 STDIN_PACKS_MODE_FOLLOW,
291};
292
293/**
294 * Check whether the name_hash_version chosen by user input is appropriate,
295 * and also validate whether it is compatible with other features.
296 */
297static void validate_name_hash_version(void)
298{
299 if (name_hash_version < 1 || name_hash_version > 2)
300 die(_("invalid --name-hash-version option: %d"), name_hash_version);
301 if (write_bitmap_index && name_hash_version != 1) {
302 warning(_("currently, --write-bitmap-index requires --name-hash-version=1"));
303 name_hash_version = 1;
304 }
305}
306
307static inline uint32_t pack_name_hash_fn(const char *name)
308{
309 static int seen_version = -1;
310
311 if (seen_version < 0)
312 seen_version = name_hash_version;
313 else if (seen_version != name_hash_version)
314 BUG("name hash version changed from %d to %d mid-process",
315 seen_version, name_hash_version);
316
317 switch (name_hash_version) {
318 case 1:
319 return pack_name_hash(name);
320
321 case 2:
322 return pack_name_hash_v2((const unsigned char *)name);
323
324 default:
325 BUG("invalid name-hash version: %d", name_hash_version);
326 }
327}
328
329/*
330 * stats
331 */
332static uint32_t written, written_delta;
333static uint32_t reused, reused_delta;
334
335/*
336 * Indexed commits
337 */
338static struct commit **indexed_commits;
339static unsigned int indexed_commits_nr;
340static unsigned int indexed_commits_alloc;
341
342static void index_commit_for_bitmap(struct commit *commit)
343{
344 if (indexed_commits_nr >= indexed_commits_alloc) {
345 indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
346 REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
347 }
348
349 indexed_commits[indexed_commits_nr++] = commit;
350}
351
352static void *get_delta(struct object_entry *entry)
353{
354 unsigned long size, base_size, delta_size;
355 void *buf, *base_buf, *delta_buf;
356 enum object_type type;
357
358 buf = odb_read_object(the_repository->objects, &entry->idx.oid,
359 &type, &size);
360 if (!buf)
361 die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
362 base_buf = odb_read_object(the_repository->objects,
363 &DELTA(entry)->idx.oid, &type,
364 &base_size);
365 if (!base_buf)
366 die("unable to read %s",
367 oid_to_hex(&DELTA(entry)->idx.oid));
368 delta_buf = diff_delta(base_buf, base_size,
369 buf, size, &delta_size, 0);
370 /*
371 * We successfully computed this delta once but dropped it for
372 * memory reasons. Something is very wrong if this time we
373 * recompute and create a different delta.
374 */
375 if (!delta_buf || delta_size != DELTA_SIZE(entry))
376 BUG("delta size changed");
377 free(buf);
378 free(base_buf);
379 return delta_buf;
380}
381
382static unsigned long do_compress(void **pptr, unsigned long size)
383{
384 git_zstream stream;
385 void *in, *out;
386 unsigned long maxsize;
387
388 git_deflate_init(&stream, pack_compression_level);
389 maxsize = git_deflate_bound(&stream, size);
390
391 in = *pptr;
392 out = xmalloc(maxsize);
393 *pptr = out;
394
395 stream.next_in = in;
396 stream.avail_in = size;
397 stream.next_out = out;
398 stream.avail_out = maxsize;
399 while (git_deflate(&stream, Z_FINISH) == Z_OK)
400 ; /* nothing */
401 git_deflate_end(&stream);
402
403 free(in);
404 return stream.total_out;
405}
406
407static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
408 const struct object_id *oid)
409{
410 git_zstream stream;
411 unsigned char ibuf[1024 * 16];
412 unsigned char obuf[1024 * 16];
413 unsigned long olen = 0;
414
415 git_deflate_init(&stream, pack_compression_level);
416
417 for (;;) {
418 ssize_t readlen;
419 int zret = Z_OK;
420 readlen = read_istream(st, ibuf, sizeof(ibuf));
421 if (readlen == -1)
422 die(_("unable to read %s"), oid_to_hex(oid));
423
424 stream.next_in = ibuf;
425 stream.avail_in = readlen;
426 while ((stream.avail_in || readlen == 0) &&
427 (zret == Z_OK || zret == Z_BUF_ERROR)) {
428 stream.next_out = obuf;
429 stream.avail_out = sizeof(obuf);
430 zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
431 hashwrite(f, obuf, stream.next_out - obuf);
432 olen += stream.next_out - obuf;
433 }
434 if (stream.avail_in)
435 die(_("deflate error (%d)"), zret);
436 if (readlen == 0) {
437 if (zret != Z_STREAM_END)
438 die(_("deflate error (%d)"), zret);
439 break;
440 }
441 }
442 git_deflate_end(&stream);
443 return olen;
444}
445
446/*
447 * we are going to reuse the existing object data as is. make
448 * sure it is not corrupt.
449 */
450static int check_pack_inflate(struct packed_git *p,
451 struct pack_window **w_curs,
452 off_t offset,
453 off_t len,
454 unsigned long expect)
455{
456 git_zstream stream;
457 unsigned char fakebuf[4096], *in;
458 int st;
459
460 memset(&stream, 0, sizeof(stream));
461 git_inflate_init(&stream);
462 do {
463 in = use_pack(p, w_curs, offset, &stream.avail_in);
464 stream.next_in = in;
465 stream.next_out = fakebuf;
466 stream.avail_out = sizeof(fakebuf);
467 st = git_inflate(&stream, Z_FINISH);
468 offset += stream.next_in - in;
469 } while (st == Z_OK || st == Z_BUF_ERROR);
470 git_inflate_end(&stream);
471 return (st == Z_STREAM_END &&
472 stream.total_out == expect &&
473 stream.total_in == len) ? 0 : -1;
474}
475
476static void copy_pack_data(struct hashfile *f,
477 struct packed_git *p,
478 struct pack_window **w_curs,
479 off_t offset,
480 off_t len)
481{
482 unsigned char *in;
483 unsigned long avail;
484
485 while (len) {
486 in = use_pack(p, w_curs, offset, &avail);
487 if (avail > len)
488 avail = (unsigned long)len;
489 hashwrite(f, in, avail);
490 offset += avail;
491 len -= avail;
492 }
493}
494
495static inline int oe_size_greater_than(struct packing_data *pack,
496 const struct object_entry *lhs,
497 unsigned long rhs)
498{
499 if (lhs->size_valid)
500 return lhs->size_ > rhs;
501 if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
502 return 1;
503 return oe_get_size_slow(pack, lhs) > rhs;
504}
505
506/* Return 0 if we will bust the pack-size limit */
507static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
508 unsigned long limit, int usable_delta)
509{
510 unsigned long size, datalen;
511 unsigned char header[MAX_PACK_OBJECT_HEADER],
512 dheader[MAX_PACK_OBJECT_HEADER];
513 unsigned hdrlen;
514 enum object_type type;
515 void *buf;
516 struct git_istream *st = NULL;
517 const unsigned hashsz = the_hash_algo->rawsz;
518
519 if (!usable_delta) {
520 if (oe_type(entry) == OBJ_BLOB &&
521 oe_size_greater_than(&to_pack, entry,
522 repo_settings_get_big_file_threshold(the_repository)) &&
523 (st = open_istream(the_repository, &entry->idx.oid, &type,
524 &size, NULL)) != NULL)
525 buf = NULL;
526 else {
527 buf = odb_read_object(the_repository->objects,
528 &entry->idx.oid, &type,
529 &size);
530 if (!buf)
531 die(_("unable to read %s"),
532 oid_to_hex(&entry->idx.oid));
533 }
534 /*
535 * make sure no cached delta data remains from a
536 * previous attempt before a pack split occurred.
537 */
538 FREE_AND_NULL(entry->delta_data);
539 entry->z_delta_size = 0;
540 } else if (entry->delta_data) {
541 size = DELTA_SIZE(entry);
542 buf = entry->delta_data;
543 entry->delta_data = NULL;
544 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
545 OBJ_OFS_DELTA : OBJ_REF_DELTA;
546 } else {
547 buf = get_delta(entry);
548 size = DELTA_SIZE(entry);
549 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
550 OBJ_OFS_DELTA : OBJ_REF_DELTA;
551 }
552
553 if (st) /* large blob case, just assume we don't compress well */
554 datalen = size;
555 else if (entry->z_delta_size)
556 datalen = entry->z_delta_size;
557 else
558 datalen = do_compress(&buf, size);
559
560 /*
561 * The object header is a byte of 'type' followed by zero or
562 * more bytes of length.
563 */
564 hdrlen = encode_in_pack_object_header(header, sizeof(header),
565 type, size);
566
567 if (type == OBJ_OFS_DELTA) {
568 /*
569 * Deltas with relative base contain an additional
570 * encoding of the relative offset for the delta
571 * base from this object's position in the pack.
572 */
573 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
574 unsigned pos = sizeof(dheader) - 1;
575 dheader[pos] = ofs & 127;
576 while (ofs >>= 7)
577 dheader[--pos] = 128 | (--ofs & 127);
578 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
579 if (st)
580 close_istream(st);
581 free(buf);
582 return 0;
583 }
584 hashwrite(f, header, hdrlen);
585 hashwrite(f, dheader + pos, sizeof(dheader) - pos);
586 hdrlen += sizeof(dheader) - pos;
587 } else if (type == OBJ_REF_DELTA) {
588 /*
589 * Deltas with a base reference contain
590 * additional bytes for the base object ID.
591 */
592 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
593 if (st)
594 close_istream(st);
595 free(buf);
596 return 0;
597 }
598 hashwrite(f, header, hdrlen);
599 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
600 hdrlen += hashsz;
601 } else {
602 if (limit && hdrlen + datalen + hashsz >= limit) {
603 if (st)
604 close_istream(st);
605 free(buf);
606 return 0;
607 }
608 hashwrite(f, header, hdrlen);
609 }
610 if (st) {
611 datalen = write_large_blob_data(st, f, &entry->idx.oid);
612 close_istream(st);
613 } else {
614 hashwrite(f, buf, datalen);
615 free(buf);
616 }
617
618 return hdrlen + datalen;
619}
620
621/* Return 0 if we will bust the pack-size limit */
622static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
623 unsigned long limit, int usable_delta)
624{
625 struct packed_git *p = IN_PACK(entry);
626 struct pack_window *w_curs = NULL;
627 uint32_t pos;
628 off_t offset;
629 enum object_type type = oe_type(entry);
630 off_t datalen;
631 unsigned char header[MAX_PACK_OBJECT_HEADER],
632 dheader[MAX_PACK_OBJECT_HEADER];
633 unsigned hdrlen;
634 const unsigned hashsz = the_hash_algo->rawsz;
635 unsigned long entry_size = SIZE(entry);
636
637 if (DELTA(entry))
638 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
639 OBJ_OFS_DELTA : OBJ_REF_DELTA;
640 hdrlen = encode_in_pack_object_header(header, sizeof(header),
641 type, entry_size);
642
643 offset = entry->in_pack_offset;
644 if (offset_to_pack_pos(p, offset, &pos) < 0)
645 die(_("write_reuse_object: could not locate %s, expected at "
646 "offset %"PRIuMAX" in pack %s"),
647 oid_to_hex(&entry->idx.oid), (uintmax_t)offset,
648 p->pack_name);
649 datalen = pack_pos_to_offset(p, pos + 1) - offset;
650 if (!pack_to_stdout && p->index_version > 1 &&
651 check_pack_crc(p, &w_curs, offset, datalen,
652 pack_pos_to_index(p, pos))) {
653 error(_("bad packed object CRC for %s"),
654 oid_to_hex(&entry->idx.oid));
655 unuse_pack(&w_curs);
656 return write_no_reuse_object(f, entry, limit, usable_delta);
657 }
658
659 offset += entry->in_pack_header_size;
660 datalen -= entry->in_pack_header_size;
661
662 if (!pack_to_stdout && p->index_version == 1 &&
663 check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
664 error(_("corrupt packed object for %s"),
665 oid_to_hex(&entry->idx.oid));
666 unuse_pack(&w_curs);
667 return write_no_reuse_object(f, entry, limit, usable_delta);
668 }
669
670 if (type == OBJ_OFS_DELTA) {
671 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
672 unsigned pos = sizeof(dheader) - 1;
673 dheader[pos] = ofs & 127;
674 while (ofs >>= 7)
675 dheader[--pos] = 128 | (--ofs & 127);
676 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
677 unuse_pack(&w_curs);
678 return 0;
679 }
680 hashwrite(f, header, hdrlen);
681 hashwrite(f, dheader + pos, sizeof(dheader) - pos);
682 hdrlen += sizeof(dheader) - pos;
683 reused_delta++;
684 } else if (type == OBJ_REF_DELTA) {
685 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
686 unuse_pack(&w_curs);
687 return 0;
688 }
689 hashwrite(f, header, hdrlen);
690 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
691 hdrlen += hashsz;
692 reused_delta++;
693 } else {
694 if (limit && hdrlen + datalen + hashsz >= limit) {
695 unuse_pack(&w_curs);
696 return 0;
697 }
698 hashwrite(f, header, hdrlen);
699 }
700 copy_pack_data(f, p, &w_curs, offset, datalen);
701 unuse_pack(&w_curs);
702 reused++;
703 return hdrlen + datalen;
704}
705
706/* Return 0 if we will bust the pack-size limit */
707static off_t write_object(struct hashfile *f,
708 struct object_entry *entry,
709 off_t write_offset)
710{
711 unsigned long limit;
712 off_t len;
713 int usable_delta, to_reuse;
714
715 if (!pack_to_stdout)
716 crc32_begin(f);
717
718 /* apply size limit if limited packsize and not first object */
719 if (!pack_size_limit || !nr_written)
720 limit = 0;
721 else if (pack_size_limit <= write_offset)
722 /*
723 * the earlier object did not fit the limit; avoid
724 * mistaking this with unlimited (i.e. limit = 0).
725 */
726 limit = 1;
727 else
728 limit = pack_size_limit - write_offset;
729
730 if (!DELTA(entry))
731 usable_delta = 0; /* no delta */
732 else if (!pack_size_limit)
733 usable_delta = 1; /* unlimited packfile */
734 else if (DELTA(entry)->idx.offset == (off_t)-1)
735 usable_delta = 0; /* base was written to another pack */
736 else if (DELTA(entry)->idx.offset)
737 usable_delta = 1; /* base already exists in this pack */
738 else
739 usable_delta = 0; /* base could end up in another pack */
740
741 if (!reuse_object)
742 to_reuse = 0; /* explicit */
743 else if (!IN_PACK(entry))
744 to_reuse = 0; /* can't reuse what we don't have */
745 else if (oe_type(entry) == OBJ_REF_DELTA ||
746 oe_type(entry) == OBJ_OFS_DELTA)
747 /* check_object() decided it for us ... */
748 to_reuse = usable_delta;
749 /* ... but pack split may override that */
750 else if (oe_type(entry) != entry->in_pack_type)
751 to_reuse = 0; /* pack has delta which is unusable */
752 else if (DELTA(entry))
753 to_reuse = 0; /* we want to pack afresh */
754 else
755 to_reuse = 1; /* we have it in-pack undeltified,
756 * and we do not need to deltify it.
757 */
758
759 if (!to_reuse)
760 len = write_no_reuse_object(f, entry, limit, usable_delta);
761 else
762 len = write_reuse_object(f, entry, limit, usable_delta);
763 if (!len)
764 return 0;
765
766 if (usable_delta)
767 written_delta++;
768 written++;
769 if (!pack_to_stdout)
770 entry->idx.crc32 = crc32_end(f);
771 return len;
772}
773
774enum write_one_status {
775 WRITE_ONE_SKIP = -1, /* already written */
776 WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
777 WRITE_ONE_WRITTEN = 1, /* normal */
778 WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
779};
780
781static enum write_one_status write_one(struct hashfile *f,
782 struct object_entry *e,
783 off_t *offset)
784{
785 off_t size;
786 int recursing;
787
788 /*
789 * we set offset to 1 (which is an impossible value) to mark
790 * the fact that this object is involved in "write its base
791 * first before writing a deltified object" recursion.
792 */
793 recursing = (e->idx.offset == 1);
794 if (recursing) {
795 warning(_("recursive delta detected for object %s"),
796 oid_to_hex(&e->idx.oid));
797 return WRITE_ONE_RECURSIVE;
798 } else if (e->idx.offset || e->preferred_base) {
799 /* offset is non zero if object is written already. */
800 return WRITE_ONE_SKIP;
801 }
802
803 /* if we are deltified, write out base object first. */
804 if (DELTA(e)) {
805 e->idx.offset = 1; /* now recurse */
806 switch (write_one(f, DELTA(e), offset)) {
807 case WRITE_ONE_RECURSIVE:
808 /* we cannot depend on this one */
809 SET_DELTA(e, NULL);
810 break;
811 default:
812 break;
813 case WRITE_ONE_BREAK:
814 e->idx.offset = recursing;
815 return WRITE_ONE_BREAK;
816 }
817 }
818
819 e->idx.offset = *offset;
820 size = write_object(f, e, *offset);
821 if (!size) {
822 e->idx.offset = recursing;
823 return WRITE_ONE_BREAK;
824 }
825 written_list[nr_written++] = &e->idx;
826
827 /* make sure off_t is sufficiently large not to wrap */
828 if (signed_add_overflows(*offset, size))
829 die(_("pack too large for current definition of off_t"));
830 *offset += size;
831 return WRITE_ONE_WRITTEN;
832}
833
834static int mark_tagged(const char *path UNUSED, const char *referent UNUSED, const struct object_id *oid,
835 int flag UNUSED, void *cb_data UNUSED)
836{
837 struct object_id peeled;
838 struct object_entry *entry = packlist_find(&to_pack, oid);
839
840 if (entry)
841 entry->tagged = 1;
842 if (!peel_iterated_oid(the_repository, oid, &peeled)) {
843 entry = packlist_find(&to_pack, &peeled);
844 if (entry)
845 entry->tagged = 1;
846 }
847 return 0;
848}
849
850static inline unsigned char oe_layer(struct packing_data *pack,
851 struct object_entry *e)
852{
853 if (!pack->layer)
854 return 0;
855 return pack->layer[e - pack->objects];
856}
857
858static inline void add_to_write_order(struct object_entry **wo,
859 unsigned int *endp,
860 struct object_entry *e)
861{
862 if (e->filled || oe_layer(&to_pack, e) != write_layer)
863 return;
864 wo[(*endp)++] = e;
865 e->filled = 1;
866}
867
868static void add_descendants_to_write_order(struct object_entry **wo,
869 unsigned int *endp,
870 struct object_entry *e)
871{
872 int add_to_order = 1;
873 while (e) {
874 if (add_to_order) {
875 struct object_entry *s;
876 /* add this node... */
877 add_to_write_order(wo, endp, e);
878 /* all its siblings... */
879 for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
880 add_to_write_order(wo, endp, s);
881 }
882 }
883 /* drop down a level to add left subtree nodes if possible */
884 if (DELTA_CHILD(e)) {
885 add_to_order = 1;
886 e = DELTA_CHILD(e);
887 } else {
888 add_to_order = 0;
889 /* our sibling might have some children, it is next */
890 if (DELTA_SIBLING(e)) {
891 e = DELTA_SIBLING(e);
892 continue;
893 }
894 /* go back to our parent node */
895 e = DELTA(e);
896 while (e && !DELTA_SIBLING(e)) {
897 /* we're on the right side of a subtree, keep
898 * going up until we can go right again */
899 e = DELTA(e);
900 }
901 if (!e) {
902 /* done- we hit our original root node */
903 return;
904 }
905 /* pass it off to sibling at this level */
906 e = DELTA_SIBLING(e);
907 }
908 };
909}
910
911static void add_family_to_write_order(struct object_entry **wo,
912 unsigned int *endp,
913 struct object_entry *e)
914{
915 struct object_entry *root;
916
917 for (root = e; DELTA(root); root = DELTA(root))
918 ; /* nothing */
919 add_descendants_to_write_order(wo, endp, root);
920}
921
922static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
923{
924 unsigned int i, last_untagged;
925 struct object_entry *objects = to_pack.objects;
926
927 for (i = 0; i < to_pack.nr_objects; i++) {
928 if (objects[i].tagged)
929 break;
930 add_to_write_order(wo, wo_end, &objects[i]);
931 }
932 last_untagged = i;
933
934 /*
935 * Then fill all the tagged tips.
936 */
937 for (; i < to_pack.nr_objects; i++) {
938 if (objects[i].tagged)
939 add_to_write_order(wo, wo_end, &objects[i]);
940 }
941
942 /*
943 * And then all remaining commits and tags.
944 */
945 for (i = last_untagged; i < to_pack.nr_objects; i++) {
946 if (oe_type(&objects[i]) != OBJ_COMMIT &&
947 oe_type(&objects[i]) != OBJ_TAG)
948 continue;
949 add_to_write_order(wo, wo_end, &objects[i]);
950 }
951
952 /*
953 * And then all the trees.
954 */
955 for (i = last_untagged; i < to_pack.nr_objects; i++) {
956 if (oe_type(&objects[i]) != OBJ_TREE)
957 continue;
958 add_to_write_order(wo, wo_end, &objects[i]);
959 }
960
961 /*
962 * Finally all the rest in really tight order
963 */
964 for (i = last_untagged; i < to_pack.nr_objects; i++) {
965 if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
966 add_family_to_write_order(wo, wo_end, &objects[i]);
967 }
968}
969
970static struct object_entry **compute_write_order(void)
971{
972 uint32_t max_layers = 1;
973 unsigned int i, wo_end;
974
975 struct object_entry **wo;
976 struct object_entry *objects = to_pack.objects;
977
978 for (i = 0; i < to_pack.nr_objects; i++) {
979 objects[i].tagged = 0;
980 objects[i].filled = 0;
981 SET_DELTA_CHILD(&objects[i], NULL);
982 SET_DELTA_SIBLING(&objects[i], NULL);
983 }
984
985 /*
986 * Fully connect delta_child/delta_sibling network.
987 * Make sure delta_sibling is sorted in the original
988 * recency order.
989 */
990 for (i = to_pack.nr_objects; i > 0;) {
991 struct object_entry *e = &objects[--i];
992 if (!DELTA(e))
993 continue;
994 /* Mark me as the first child */
995 e->delta_sibling_idx = DELTA(e)->delta_child_idx;
996 SET_DELTA_CHILD(DELTA(e), e);
997 }
998
999 /*
1000 * Mark objects that are at the tip of tags.
1001 */
1002 refs_for_each_tag_ref(get_main_ref_store(the_repository), mark_tagged,
1003 NULL);
1004
1005 if (use_delta_islands) {
1006 max_layers = compute_pack_layers(&to_pack);
1007 free_island_marks();
1008 }
1009
1010 ALLOC_ARRAY(wo, to_pack.nr_objects);
1011 wo_end = 0;
1012
1013 for (; write_layer < max_layers; ++write_layer)
1014 compute_layer_order(wo, &wo_end);
1015
1016 if (wo_end != to_pack.nr_objects)
1017 die(_("ordered %u objects, expected %"PRIu32),
1018 wo_end, to_pack.nr_objects);
1019
1020 return wo;
1021}
1022
1023
1024/*
1025 * A reused set of objects. All objects in a chunk have the same
1026 * relative position in the original packfile and the generated
1027 * packfile.
1028 */
1029
1030static struct reused_chunk {
1031 /* The offset of the first object of this chunk in the original
1032 * packfile. */
1033 off_t original;
1034 /* The difference for "original" minus the offset of the first object of
1035 * this chunk in the generated packfile. */
1036 off_t difference;
1037} *reused_chunks;
1038static int reused_chunks_nr;
1039static int reused_chunks_alloc;
1040
1041static void record_reused_object(off_t where, off_t offset)
1042{
1043 if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
1044 return;
1045
1046 ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
1047 reused_chunks_alloc);
1048 reused_chunks[reused_chunks_nr].original = where;
1049 reused_chunks[reused_chunks_nr].difference = offset;
1050 reused_chunks_nr++;
1051}
1052
1053/*
1054 * Binary search to find the chunk that "where" is in. Note
1055 * that we're not looking for an exact match, just the first
1056 * chunk that contains it (which implicitly ends at the start
1057 * of the next chunk.
1058 */
1059static off_t find_reused_offset(off_t where)
1060{
1061 int lo = 0, hi = reused_chunks_nr;
1062 while (lo < hi) {
1063 int mi = lo + ((hi - lo) / 2);
1064 if (where == reused_chunks[mi].original)
1065 return reused_chunks[mi].difference;
1066 if (where < reused_chunks[mi].original)
1067 hi = mi;
1068 else
1069 lo = mi + 1;
1070 }
1071
1072 /*
1073 * The first chunk starts at zero, so we can't have gone below
1074 * there.
1075 */
1076 assert(lo);
1077 return reused_chunks[lo-1].difference;
1078}
1079
1080static void write_reused_pack_one(struct packed_git *reuse_packfile,
1081 size_t pos, struct hashfile *out,
1082 off_t pack_start,
1083 struct pack_window **w_curs)
1084{
1085 off_t offset, next, cur;
1086 enum object_type type;
1087 unsigned long size;
1088
1089 offset = pack_pos_to_offset(reuse_packfile, pos);
1090 next = pack_pos_to_offset(reuse_packfile, pos + 1);
1091
1092 record_reused_object(offset,
1093 offset - (hashfile_total(out) - pack_start));
1094
1095 cur = offset;
1096 type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
1097 assert(type >= 0);
1098
1099 if (type == OBJ_OFS_DELTA) {
1100 off_t base_offset;
1101 off_t fixup;
1102
1103 unsigned char header[MAX_PACK_OBJECT_HEADER];
1104 unsigned len;
1105
1106 base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
1107 assert(base_offset != 0);
1108
1109 /* Convert to REF_DELTA if we must... */
1110 if (!allow_ofs_delta) {
1111 uint32_t base_pos;
1112 struct object_id base_oid;
1113
1114 if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0)
1115 die(_("expected object at offset %"PRIuMAX" "
1116 "in pack %s"),
1117 (uintmax_t)base_offset,
1118 reuse_packfile->pack_name);
1119
1120 nth_packed_object_id(&base_oid, reuse_packfile,
1121 pack_pos_to_index(reuse_packfile, base_pos));
1122
1123 len = encode_in_pack_object_header(header, sizeof(header),
1124 OBJ_REF_DELTA, size);
1125 hashwrite(out, header, len);
1126 hashwrite(out, base_oid.hash, the_hash_algo->rawsz);
1127 copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1128 return;
1129 }
1130
1131 /* Otherwise see if we need to rewrite the offset... */
1132 fixup = find_reused_offset(offset) -
1133 find_reused_offset(base_offset);
1134 if (fixup) {
1135 unsigned char ofs_header[MAX_PACK_OBJECT_HEADER];
1136 unsigned i, ofs_len;
1137 off_t ofs = offset - base_offset - fixup;
1138
1139 len = encode_in_pack_object_header(header, sizeof(header),
1140 OBJ_OFS_DELTA, size);
1141
1142 i = sizeof(ofs_header) - 1;
1143 ofs_header[i] = ofs & 127;
1144 while (ofs >>= 7)
1145 ofs_header[--i] = 128 | (--ofs & 127);
1146
1147 ofs_len = sizeof(ofs_header) - i;
1148
1149 hashwrite(out, header, len);
1150 hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
1151 copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1152 return;
1153 }
1154
1155 /* ...otherwise we have no fixup, and can write it verbatim */
1156 }
1157
1158 copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
1159}
1160
1161static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile,
1162 struct hashfile *out,
1163 struct pack_window **w_curs)
1164{
1165 size_t pos = 0;
1166 size_t end;
1167
1168 if (reuse_packfile->bitmap_pos) {
1169 /*
1170 * We can't reuse whole chunks verbatim out of
1171 * non-preferred packs since we can't guarantee that
1172 * all duplicate objects were resolved in favor of
1173 * that pack.
1174 *
1175 * Even if we have a whole eword_t worth of bits that
1176 * could be reused, there may be objects between the
1177 * objects corresponding to the first and last bit of
1178 * that word which were selected from a different
1179 * pack, causing us to send duplicate or unwanted
1180 * objects.
1181 *
1182 * Handle non-preferred packs from within
1183 * write_reused_pack(), which inspects and reuses
1184 * individual bits.
1185 */
1186 return reuse_packfile->bitmap_pos / BITS_IN_EWORD;
1187 }
1188
1189 /*
1190 * Only read through the last word whose bits all correspond
1191 * to objects in the given packfile, since we must stop at a
1192 * word boundary.
1193 *
1194 * If there is no whole word to read (i.e. the packfile
1195 * contains fewer than BITS_IN_EWORD objects), then we'll
1196 * inspect bits one-by-one in write_reused_pack().
1197 */
1198 end = reuse_packfile->bitmap_nr / BITS_IN_EWORD;
1199 if (reuse_packfile_bitmap->word_alloc < end)
1200 BUG("fewer words than expected in reuse_packfile_bitmap");
1201
1202 while (pos < end && reuse_packfile_bitmap->words[pos] == (eword_t)~0)
1203 pos++;
1204
1205 if (pos) {
1206 off_t to_write;
1207
1208 written = (pos * BITS_IN_EWORD);
1209 to_write = pack_pos_to_offset(reuse_packfile->p, written)
1210 - sizeof(struct pack_header);
1211
1212 /* We're recording one chunk, not one object. */
1213 record_reused_object(sizeof(struct pack_header), 0);
1214 hashflush(out);
1215 copy_pack_data(out, reuse_packfile->p, w_curs,
1216 sizeof(struct pack_header), to_write);
1217
1218 display_progress(progress_state, written);
1219 }
1220 return pos;
1221}
1222
1223static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
1224 struct hashfile *f)
1225{
1226 size_t i = reuse_packfile->bitmap_pos / BITS_IN_EWORD;
1227 uint32_t offset;
1228 off_t pack_start = hashfile_total(f) - sizeof(struct pack_header);
1229 struct pack_window *w_curs = NULL;
1230
1231 if (allow_ofs_delta)
1232 i = write_reused_pack_verbatim(reuse_packfile, f, &w_curs);
1233
1234 for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
1235 eword_t word = reuse_packfile_bitmap->words[i];
1236 size_t pos = (i * BITS_IN_EWORD);
1237
1238 for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1239 uint32_t pack_pos;
1240 if ((word >> offset) == 0)
1241 break;
1242
1243 offset += ewah_bit_ctz64(word >> offset);
1244 if (pos + offset < reuse_packfile->bitmap_pos)
1245 continue;
1246 if (pos + offset >= reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr)
1247 goto done;
1248
1249 if (reuse_packfile->bitmap_pos) {
1250 /*
1251 * When doing multi-pack reuse on a
1252 * non-preferred pack, translate bit positions
1253 * from the MIDX pseudo-pack order back to their
1254 * pack-relative positions before attempting
1255 * reuse.
1256 */
1257 struct multi_pack_index *m = reuse_packfile->from_midx;
1258 uint32_t midx_pos;
1259 off_t pack_ofs;
1260
1261 if (!m)
1262 BUG("non-zero bitmap position without MIDX");
1263
1264 midx_pos = pack_pos_to_midx(m, pos + offset);
1265 pack_ofs = nth_midxed_offset(m, midx_pos);
1266
1267 if (offset_to_pack_pos(reuse_packfile->p,
1268 pack_ofs, &pack_pos) < 0)
1269 BUG("could not find expected object at offset %"PRIuMAX" in pack %s",
1270 (uintmax_t)pack_ofs,
1271 pack_basename(reuse_packfile->p));
1272 } else {
1273 /*
1274 * Can use bit positions directly, even for MIDX
1275 * bitmaps. See comment in try_partial_reuse()
1276 * for why.
1277 */
1278 pack_pos = pos + offset;
1279 }
1280
1281 write_reused_pack_one(reuse_packfile->p, pack_pos, f,
1282 pack_start, &w_curs);
1283 display_progress(progress_state, ++written);
1284 }
1285 }
1286
1287done:
1288 unuse_pack(&w_curs);
1289}
1290
1291static void write_excluded_by_configs(void)
1292{
1293 struct oidset_iter iter;
1294 const struct object_id *oid;
1295
1296 oidset_iter_init(&excluded_by_config, &iter);
1297 while ((oid = oidset_iter_next(&iter))) {
1298 struct configured_exclusion *ex =
1299 oidmap_get(&configured_exclusions, oid);
1300
1301 if (!ex)
1302 BUG("configured exclusion wasn't configured");
1303 write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex));
1304 write_in_full(1, " ", 1);
1305 write_in_full(1, ex->uri, strlen(ex->uri));
1306 write_in_full(1, "\n", 1);
1307 }
1308}
1309
1310static const char no_split_warning[] = N_(
1311"disabling bitmap writing, packs are split due to pack.packSizeLimit"
1312);
1313
1314static void write_pack_file(void)
1315{
1316 uint32_t i = 0, j;
1317 struct hashfile *f;
1318 off_t offset;
1319 uint32_t nr_remaining = nr_result;
1320 time_t last_mtime = 0;
1321 struct object_entry **write_order;
1322
1323 if (progress > pack_to_stdout)
1324 progress_state = start_progress(the_repository,
1325 _("Writing objects"), nr_result);
1326 ALLOC_ARRAY(written_list, to_pack.nr_objects);
1327 write_order = compute_write_order();
1328
1329 do {
1330 unsigned char hash[GIT_MAX_RAWSZ];
1331 char *pack_tmp_name = NULL;
1332
1333 if (pack_to_stdout)
1334 f = hashfd_throughput(the_repository->hash_algo, 1,
1335 "<stdout>", progress_state);
1336 else
1337 f = create_tmp_packfile(the_repository, &pack_tmp_name);
1338
1339 offset = write_pack_header(f, nr_remaining);
1340
1341 if (reuse_packfiles_nr) {
1342 assert(pack_to_stdout);
1343 for (j = 0; j < reuse_packfiles_nr; j++) {
1344 reused_chunks_nr = 0;
1345 write_reused_pack(&reuse_packfiles[j], f);
1346 if (reused_chunks_nr)
1347 reuse_packfiles_used_nr++;
1348 }
1349 offset = hashfile_total(f);
1350 }
1351
1352 nr_written = 0;
1353 for (; i < to_pack.nr_objects; i++) {
1354 struct object_entry *e = write_order[i];
1355 if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
1356 break;
1357 display_progress(progress_state, written);
1358 }
1359
1360 if (pack_to_stdout) {
1361 /*
1362 * We never fsync when writing to stdout since we may
1363 * not be writing to an actual pack file. For instance,
1364 * the upload-pack code passes a pipe here. Calling
1365 * fsync on a pipe results in unnecessary
1366 * synchronization with the reader on some platforms.
1367 */
1368 finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
1369 CSUM_HASH_IN_STREAM | CSUM_CLOSE);
1370 } else if (nr_written == nr_remaining) {
1371 finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
1372 CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
1373 } else {
1374 /*
1375 * If we wrote the wrong number of entries in the
1376 * header, rewrite it like in fast-import.
1377 */
1378
1379 int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
1380 fixup_pack_header_footer(the_hash_algo, fd, hash,
1381 pack_tmp_name, nr_written,
1382 hash, offset);
1383 close(fd);
1384 if (write_bitmap_index) {
1385 if (write_bitmap_index != WRITE_BITMAP_QUIET)
1386 warning(_(no_split_warning));
1387 write_bitmap_index = 0;
1388 }
1389 }
1390
1391 if (!pack_to_stdout) {
1392 struct stat st;
1393 struct strbuf tmpname = STRBUF_INIT;
1394 struct bitmap_writer bitmap_writer;
1395 char *idx_tmp_name = NULL;
1396
1397 /*
1398 * Packs are runtime accessed in their mtime
1399 * order since newer packs are more likely to contain
1400 * younger objects. So if we are creating multiple
1401 * packs then we should modify the mtime of later ones
1402 * to preserve this property.
1403 */
1404 if (stat(pack_tmp_name, &st) < 0) {
1405 warning_errno(_("failed to stat %s"), pack_tmp_name);
1406 } else if (!last_mtime) {
1407 last_mtime = st.st_mtime;
1408 } else {
1409 struct utimbuf utb;
1410 utb.actime = st.st_atime;
1411 utb.modtime = --last_mtime;
1412 if (utime(pack_tmp_name, &utb) < 0)
1413 warning_errno(_("failed utime() on %s"), pack_tmp_name);
1414 }
1415
1416 strbuf_addf(&tmpname, "%s-%s.", base_name,
1417 hash_to_hex(hash));
1418
1419 if (write_bitmap_index) {
1420 bitmap_writer_init(&bitmap_writer,
1421 the_repository, &to_pack,
1422 NULL);
1423 bitmap_writer_set_checksum(&bitmap_writer, hash);
1424 bitmap_writer_build_type_index(&bitmap_writer,
1425 written_list);
1426 }
1427
1428 if (cruft)
1429 pack_idx_opts.flags |= WRITE_MTIMES;
1430
1431 stage_tmp_packfiles(the_repository, &tmpname,
1432 pack_tmp_name, written_list,
1433 nr_written, &to_pack,
1434 &pack_idx_opts, hash,
1435 &idx_tmp_name);
1436
1437 if (write_bitmap_index) {
1438 size_t tmpname_len = tmpname.len;
1439
1440 strbuf_addstr(&tmpname, "bitmap");
1441 stop_progress(&progress_state);
1442
1443 bitmap_writer_show_progress(&bitmap_writer,
1444 progress);
1445 bitmap_writer_select_commits(&bitmap_writer,
1446 indexed_commits,
1447 indexed_commits_nr);
1448 if (bitmap_writer_build(&bitmap_writer) < 0)
1449 die(_("failed to write bitmap index"));
1450 bitmap_writer_finish(&bitmap_writer,
1451 written_list,
1452 tmpname.buf, write_bitmap_options);
1453 bitmap_writer_free(&bitmap_writer);
1454 write_bitmap_index = 0;
1455 strbuf_setlen(&tmpname, tmpname_len);
1456 }
1457
1458 rename_tmp_packfile_idx(the_repository, &tmpname, &idx_tmp_name);
1459
1460 free(idx_tmp_name);
1461 strbuf_release(&tmpname);
1462 free(pack_tmp_name);
1463 puts(hash_to_hex(hash));
1464 }
1465
1466 /* mark written objects as written to previous pack */
1467 for (j = 0; j < nr_written; j++) {
1468 written_list[j]->offset = (off_t)-1;
1469 }
1470 nr_remaining -= nr_written;
1471 } while (nr_remaining && i < to_pack.nr_objects);
1472
1473 free(written_list);
1474 free(write_order);
1475 stop_progress(&progress_state);
1476 if (written != nr_result)
1477 die(_("wrote %"PRIu32" objects while expecting %"PRIu32),
1478 written, nr_result);
1479 trace2_data_intmax("pack-objects", the_repository,
1480 "write_pack_file/wrote", nr_result);
1481}
1482
1483static int no_try_delta(const char *path)
1484{
1485 static struct attr_check *check;
1486
1487 if (!check)
1488 check = attr_check_initl("delta", NULL);
1489 git_check_attr(the_repository->index, path, check);
1490 if (ATTR_FALSE(check->items[0].value))
1491 return 1;
1492 return 0;
1493}
1494
1495/*
1496 * When adding an object, check whether we have already added it
1497 * to our packing list. If so, we can skip. However, if we are
1498 * being asked to excludei t, but the previous mention was to include
1499 * it, make sure to adjust its flags and tweak our numbers accordingly.
1500 *
1501 * As an optimization, we pass out the index position where we would have
1502 * found the item, since that saves us from having to look it up again a
1503 * few lines later when we want to add the new entry.
1504 */
1505static int have_duplicate_entry(const struct object_id *oid,
1506 int exclude)
1507{
1508 struct object_entry *entry;
1509
1510 if (reuse_packfile_bitmap &&
1511 bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
1512 return 1;
1513
1514 entry = packlist_find(&to_pack, oid);
1515 if (!entry)
1516 return 0;
1517
1518 if (exclude) {
1519 if (!entry->preferred_base)
1520 nr_result--;
1521 entry->preferred_base = 1;
1522 }
1523
1524 return 1;
1525}
1526
1527static int want_cruft_object_mtime(struct repository *r,
1528 const struct object_id *oid,
1529 unsigned flags, uint32_t mtime)
1530{
1531 struct packed_git **cache;
1532
1533 for (cache = kept_pack_cache(r, flags); *cache; cache++) {
1534 struct packed_git *p = *cache;
1535 off_t ofs;
1536 uint32_t candidate_mtime;
1537
1538 ofs = find_pack_entry_one(oid, p);
1539 if (!ofs)
1540 continue;
1541
1542 /*
1543 * We have a copy of the object 'oid' in a non-cruft
1544 * pack. We can avoid packing an additional copy
1545 * regardless of what the existing copy's mtime is since
1546 * it is outside of a cruft pack.
1547 */
1548 if (!p->is_cruft)
1549 return 0;
1550
1551 /*
1552 * If we have a copy of the object 'oid' in a cruft
1553 * pack, then either read the cruft pack's mtime for
1554 * that object, or, if that can't be loaded, assume the
1555 * pack's mtime itself.
1556 */
1557 if (!load_pack_mtimes(p)) {
1558 uint32_t pos;
1559 if (offset_to_pack_pos(p, ofs, &pos) < 0)
1560 continue;
1561 candidate_mtime = nth_packed_mtime(p, pos);
1562 } else {
1563 candidate_mtime = p->mtime;
1564 }
1565
1566 /*
1567 * We have a surviving copy of the object in a cruft
1568 * pack whose mtime is greater than or equal to the one
1569 * we are considering. We can thus avoid packing an
1570 * additional copy of that object.
1571 */
1572 if (mtime <= candidate_mtime)
1573 return 0;
1574 }
1575
1576 return -1;
1577}
1578
1579static int want_found_object(const struct object_id *oid, int exclude,
1580 struct packed_git *p, uint32_t mtime)
1581{
1582 if (exclude)
1583 return 1;
1584 if (incremental)
1585 return 0;
1586
1587 if (!is_pack_valid(p))
1588 return -1;
1589
1590 /*
1591 * When asked to do --local (do not include an object that appears in a
1592 * pack we borrow from elsewhere) or --honor-pack-keep (do not include
1593 * an object that appears in a pack marked with .keep), finding a pack
1594 * that matches the criteria is sufficient for us to decide to omit it.
1595 * However, even if this pack does not satisfy the criteria, we need to
1596 * make sure no copy of this object appears in _any_ pack that makes us
1597 * to omit the object, so we need to check all the packs.
1598 *
1599 * We can however first check whether these options can possibly matter;
1600 * if they do not matter we know we want the object in generated pack.
1601 * Otherwise, we signal "-1" at the end to tell the caller that we do
1602 * not know either way, and it needs to check more packs.
1603 */
1604
1605 /*
1606 * Objects in packs borrowed from elsewhere are discarded regardless of
1607 * if they appear in other packs that weren't borrowed.
1608 */
1609 if (local && !p->pack_local)
1610 return 0;
1611
1612 /*
1613 * Then handle .keep first, as we have a fast(er) path there.
1614 */
1615 if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
1616 /*
1617 * Set the flags for the kept-pack cache to be the ones we want
1618 * to ignore.
1619 *
1620 * That is, if we are ignoring objects in on-disk keep packs,
1621 * then we want to search through the on-disk keep and ignore
1622 * the in-core ones.
1623 */
1624 unsigned flags = 0;
1625 if (ignore_packed_keep_on_disk)
1626 flags |= ON_DISK_KEEP_PACKS;
1627 if (ignore_packed_keep_in_core)
1628 flags |= IN_CORE_KEEP_PACKS;
1629
1630 /*
1631 * If the object is in a pack that we want to ignore, *and* we
1632 * don't have any cruft packs that are being retained, we can
1633 * abort quickly.
1634 */
1635 if (!ignore_packed_keep_in_core_has_cruft) {
1636 if (ignore_packed_keep_on_disk && p->pack_keep)
1637 return 0;
1638 if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1639 return 0;
1640 if (has_object_kept_pack(p->repo, oid, flags))
1641 return 0;
1642 } else {
1643 /*
1644 * But if there is at least one cruft pack which
1645 * is being kept, we only want to include the
1646 * provided object if it has a strictly greater
1647 * mtime than any existing cruft copy.
1648 */
1649 if (!want_cruft_object_mtime(p->repo, oid, flags,
1650 mtime))
1651 return 0;
1652 }
1653 }
1654
1655 /*
1656 * At this point we know definitively that either we don't care about
1657 * keep-packs, or the object is not in one. Keep checking other
1658 * conditions...
1659 */
1660 if (!local || !have_non_local_packs)
1661 return 1;
1662
1663 /* we don't know yet; keep looking for more packs */
1664 return -1;
1665}
1666
1667static int want_object_in_pack_one(struct packed_git *p,
1668 const struct object_id *oid,
1669 int exclude,
1670 struct packed_git **found_pack,
1671 off_t *found_offset,
1672 uint32_t found_mtime)
1673{
1674 off_t offset;
1675
1676 if (p == *found_pack)
1677 offset = *found_offset;
1678 else
1679 offset = find_pack_entry_one(oid, p);
1680
1681 if (offset) {
1682 if (!*found_pack) {
1683 if (!is_pack_valid(p))
1684 return -1;
1685 *found_offset = offset;
1686 *found_pack = p;
1687 }
1688 return want_found_object(oid, exclude, p, found_mtime);
1689 }
1690 return -1;
1691}
1692
1693/*
1694 * Check whether we want the object in the pack (e.g., we do not want
1695 * objects found in non-local stores if the "--local" option was used).
1696 *
1697 * If the caller already knows an existing pack it wants to take the object
1698 * from, that is passed in *found_pack and *found_offset; otherwise this
1699 * function finds if there is any pack that has the object and returns the pack
1700 * and its offset in these variables.
1701 */
1702static int want_object_in_pack_mtime(const struct object_id *oid,
1703 int exclude,
1704 struct packed_git **found_pack,
1705 off_t *found_offset,
1706 uint32_t found_mtime)
1707{
1708 int want;
1709 struct odb_source *source;
1710 struct list_head *pos;
1711
1712 if (!exclude && local) {
1713 /*
1714 * Note that we start iterating at `sources->next` so that we
1715 * skip the local object source.
1716 */
1717 struct odb_source *source = the_repository->objects->sources->next;
1718 for (; source; source = source->next)
1719 if (has_loose_object(source, oid))
1720 return 0;
1721 }
1722
1723 /*
1724 * If we already know the pack object lives in, start checks from that
1725 * pack - in the usual case when neither --local was given nor .keep files
1726 * are present we will determine the answer right now.
1727 */
1728 if (*found_pack) {
1729 want = want_found_object(oid, exclude, *found_pack,
1730 found_mtime);
1731 if (want != -1)
1732 return want;
1733
1734 *found_pack = NULL;
1735 *found_offset = 0;
1736 }
1737
1738 odb_prepare_alternates(the_repository->objects);
1739
1740 for (source = the_repository->objects->sources; source; source = source->next) {
1741 struct multi_pack_index *m = get_multi_pack_index(source);
1742 struct pack_entry e;
1743
1744 if (m && fill_midx_entry(m, oid, &e)) {
1745 want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime);
1746 if (want != -1)
1747 return want;
1748 }
1749 }
1750
1751 list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) {
1752 struct packed_git *p = list_entry(pos, struct packed_git, mru);
1753 want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime);
1754 if (!exclude && want > 0)
1755 list_move(&p->mru,
1756 packfile_store_get_packs_mru(the_repository->objects->packfiles));
1757 if (want != -1)
1758 return want;
1759 }
1760
1761 if (uri_protocols.nr) {
1762 struct configured_exclusion *ex =
1763 oidmap_get(&configured_exclusions, oid);
1764 int i;
1765 const char *p;
1766
1767 if (ex) {
1768 for (i = 0; i < uri_protocols.nr; i++) {
1769 if (skip_prefix(ex->uri,
1770 uri_protocols.items[i].string,
1771 &p) &&
1772 *p == ':') {
1773 oidset_insert(&excluded_by_config, oid);
1774 return 0;
1775 }
1776 }
1777 }
1778 }
1779
1780 return 1;
1781}
1782
1783static inline int want_object_in_pack(const struct object_id *oid,
1784 int exclude,
1785 struct packed_git **found_pack,
1786 off_t *found_offset)
1787{
1788 return want_object_in_pack_mtime(oid, exclude, found_pack, found_offset,
1789 0);
1790}
1791
1792static struct object_entry *create_object_entry(const struct object_id *oid,
1793 enum object_type type,
1794 uint32_t hash,
1795 int exclude,
1796 int no_try_delta,
1797 struct packed_git *found_pack,
1798 off_t found_offset)
1799{
1800 struct object_entry *entry;
1801
1802 entry = packlist_alloc(&to_pack, oid);
1803 entry->hash = hash;
1804 oe_set_type(entry, type);
1805 if (exclude)
1806 entry->preferred_base = 1;
1807 else
1808 nr_result++;
1809 if (found_pack) {
1810 oe_set_in_pack(&to_pack, entry, found_pack);
1811 entry->in_pack_offset = found_offset;
1812 }
1813
1814 entry->no_try_delta = no_try_delta;
1815
1816 return entry;
1817}
1818
1819static const char no_closure_warning[] = N_(
1820"disabling bitmap writing, as some objects are not being packed"
1821);
1822
1823static int add_object_entry(const struct object_id *oid, enum object_type type,
1824 const char *name, int exclude)
1825{
1826 struct packed_git *found_pack = NULL;
1827 off_t found_offset = 0;
1828
1829 display_progress(progress_state, ++nr_seen);
1830
1831 if (have_duplicate_entry(oid, exclude))
1832 return 0;
1833
1834 if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1835 /* The pack is missing an object, so it will not have closure */
1836 if (write_bitmap_index) {
1837 if (write_bitmap_index != WRITE_BITMAP_QUIET)
1838 warning(_(no_closure_warning));
1839 write_bitmap_index = 0;
1840 }
1841 return 0;
1842 }
1843
1844 create_object_entry(oid, type, pack_name_hash_fn(name),
1845 exclude, name && no_try_delta(name),
1846 found_pack, found_offset);
1847 return 1;
1848}
1849
1850static int add_object_entry_from_bitmap(const struct object_id *oid,
1851 enum object_type type,
1852 int flags UNUSED, uint32_t name_hash,
1853 struct packed_git *pack, off_t offset,
1854 void *payload UNUSED)
1855{
1856 display_progress(progress_state, ++nr_seen);
1857
1858 if (have_duplicate_entry(oid, 0))
1859 return 0;
1860
1861 if (!want_object_in_pack(oid, 0, &pack, &offset))
1862 return 0;
1863
1864 create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
1865 return 1;
1866}
1867
1868struct pbase_tree_cache {
1869 struct object_id oid;
1870 int ref;
1871 int temporary;
1872 void *tree_data;
1873 unsigned long tree_size;
1874};
1875
1876static struct pbase_tree_cache *(pbase_tree_cache[256]);
1877static int pbase_tree_cache_ix(const struct object_id *oid)
1878{
1879 return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache);
1880}
1881static int pbase_tree_cache_ix_incr(int ix)
1882{
1883 return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1884}
1885
1886static struct pbase_tree {
1887 struct pbase_tree *next;
1888 /* This is a phony "cache" entry; we are not
1889 * going to evict it or find it through _get()
1890 * mechanism -- this is for the toplevel node that
1891 * would almost always change with any commit.
1892 */
1893 struct pbase_tree_cache pcache;
1894} *pbase_tree;
1895
1896static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
1897{
1898 struct pbase_tree_cache *ent, *nent;
1899 void *data;
1900 unsigned long size;
1901 enum object_type type;
1902 int neigh;
1903 int my_ix = pbase_tree_cache_ix(oid);
1904 int available_ix = -1;
1905
1906 /* pbase-tree-cache acts as a limited hashtable.
1907 * your object will be found at your index or within a few
1908 * slots after that slot if it is cached.
1909 */
1910 for (neigh = 0; neigh < 8; neigh++) {
1911 ent = pbase_tree_cache[my_ix];
1912 if (ent && oideq(&ent->oid, oid)) {
1913 ent->ref++;
1914 return ent;
1915 }
1916 else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1917 ((0 <= available_ix) &&
1918 (!ent && pbase_tree_cache[available_ix])))
1919 available_ix = my_ix;
1920 if (!ent)
1921 break;
1922 my_ix = pbase_tree_cache_ix_incr(my_ix);
1923 }
1924
1925 /* Did not find one. Either we got a bogus request or
1926 * we need to read and perhaps cache.
1927 */
1928 data = odb_read_object(the_repository->objects, oid, &type, &size);
1929 if (!data)
1930 return NULL;
1931 if (type != OBJ_TREE) {
1932 free(data);
1933 return NULL;
1934 }
1935
1936 /* We need to either cache or return a throwaway copy */
1937
1938 if (available_ix < 0)
1939 ent = NULL;
1940 else {
1941 ent = pbase_tree_cache[available_ix];
1942 my_ix = available_ix;
1943 }
1944
1945 if (!ent) {
1946 nent = xmalloc(sizeof(*nent));
1947 nent->temporary = (available_ix < 0);
1948 }
1949 else {
1950 /* evict and reuse */
1951 free(ent->tree_data);
1952 nent = ent;
1953 }
1954 oidcpy(&nent->oid, oid);
1955 nent->tree_data = data;
1956 nent->tree_size = size;
1957 nent->ref = 1;
1958 if (!nent->temporary)
1959 pbase_tree_cache[my_ix] = nent;
1960 return nent;
1961}
1962
1963static void pbase_tree_put(struct pbase_tree_cache *cache)
1964{
1965 if (!cache->temporary) {
1966 cache->ref--;
1967 return;
1968 }
1969 free(cache->tree_data);
1970 free(cache);
1971}
1972
1973static size_t name_cmp_len(const char *name)
1974{
1975 return strcspn(name, "\n/");
1976}
1977
1978static void add_pbase_object(struct tree_desc *tree,
1979 const char *name,
1980 size_t cmplen,
1981 const char *fullname)
1982{
1983 struct name_entry entry;
1984 int cmp;
1985
1986 while (tree_entry(tree,&entry)) {
1987 if (S_ISGITLINK(entry.mode))
1988 continue;
1989 cmp = tree_entry_len(&entry) != cmplen ? 1 :
1990 memcmp(name, entry.path, cmplen);
1991 if (cmp > 0)
1992 continue;
1993 if (cmp < 0)
1994 return;
1995 if (name[cmplen] != '/') {
1996 add_object_entry(&entry.oid,
1997 object_type(entry.mode),
1998 fullname, 1);
1999 return;
2000 }
2001 if (S_ISDIR(entry.mode)) {
2002 struct tree_desc sub;
2003 struct pbase_tree_cache *tree;
2004 const char *down = name+cmplen+1;
2005 size_t downlen = name_cmp_len(down);
2006
2007 tree = pbase_tree_get(&entry.oid);
2008 if (!tree)
2009 return;
2010 init_tree_desc(&sub, &tree->oid,
2011 tree->tree_data, tree->tree_size);
2012
2013 add_pbase_object(&sub, down, downlen, fullname);
2014 pbase_tree_put(tree);
2015 }
2016 }
2017}
2018
2019static unsigned *done_pbase_paths;
2020static int done_pbase_paths_num;
2021static int done_pbase_paths_alloc;
2022static int done_pbase_path_pos(unsigned hash)
2023{
2024 int lo = 0;
2025 int hi = done_pbase_paths_num;
2026 while (lo < hi) {
2027 int mi = lo + (hi - lo) / 2;
2028 if (done_pbase_paths[mi] == hash)
2029 return mi;
2030 if (done_pbase_paths[mi] < hash)
2031 hi = mi;
2032 else
2033 lo = mi + 1;
2034 }
2035 return -lo-1;
2036}
2037
2038static int check_pbase_path(unsigned hash)
2039{
2040 int pos = done_pbase_path_pos(hash);
2041 if (0 <= pos)
2042 return 1;
2043 pos = -pos - 1;
2044 ALLOC_GROW(done_pbase_paths,
2045 done_pbase_paths_num + 1,
2046 done_pbase_paths_alloc);
2047 done_pbase_paths_num++;
2048 if (pos < done_pbase_paths_num)
2049 MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos,
2050 done_pbase_paths_num - pos - 1);
2051 done_pbase_paths[pos] = hash;
2052 return 0;
2053}
2054
2055static void add_preferred_base_object(const char *name)
2056{
2057 struct pbase_tree *it;
2058 size_t cmplen;
2059 unsigned hash = pack_name_hash_fn(name);
2060
2061 if (!num_preferred_base || check_pbase_path(hash))
2062 return;
2063
2064 cmplen = name_cmp_len(name);
2065 for (it = pbase_tree; it; it = it->next) {
2066 if (cmplen == 0) {
2067 add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
2068 }
2069 else {
2070 struct tree_desc tree;
2071 init_tree_desc(&tree, &it->pcache.oid,
2072 it->pcache.tree_data, it->pcache.tree_size);
2073 add_pbase_object(&tree, name, cmplen, name);
2074 }
2075 }
2076}
2077
2078static void add_preferred_base(struct object_id *oid)
2079{
2080 struct pbase_tree *it;
2081 void *data;
2082 unsigned long size;
2083 struct object_id tree_oid;
2084
2085 if (window <= num_preferred_base++)
2086 return;
2087
2088 data = odb_read_object_peeled(the_repository->objects, oid,
2089 OBJ_TREE, &size, &tree_oid);
2090 if (!data)
2091 return;
2092
2093 for (it = pbase_tree; it; it = it->next) {
2094 if (oideq(&it->pcache.oid, &tree_oid)) {
2095 free(data);
2096 return;
2097 }
2098 }
2099
2100 CALLOC_ARRAY(it, 1);
2101 it->next = pbase_tree;
2102 pbase_tree = it;
2103
2104 oidcpy(&it->pcache.oid, &tree_oid);
2105 it->pcache.tree_data = data;
2106 it->pcache.tree_size = size;
2107}
2108
2109static void cleanup_preferred_base(void)
2110{
2111 struct pbase_tree *it;
2112 unsigned i;
2113
2114 it = pbase_tree;
2115 pbase_tree = NULL;
2116 while (it) {
2117 struct pbase_tree *tmp = it;
2118 it = tmp->next;
2119 free(tmp->pcache.tree_data);
2120 free(tmp);
2121 }
2122
2123 for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
2124 if (!pbase_tree_cache[i])
2125 continue;
2126 free(pbase_tree_cache[i]->tree_data);
2127 FREE_AND_NULL(pbase_tree_cache[i]);
2128 }
2129
2130 FREE_AND_NULL(done_pbase_paths);
2131 done_pbase_paths_num = done_pbase_paths_alloc = 0;
2132}
2133
2134/*
2135 * Return 1 iff the object specified by "delta" can be sent
2136 * literally as a delta against the base in "base_sha1". If
2137 * so, then *base_out will point to the entry in our packing
2138 * list, or NULL if we must use the external-base list.
2139 *
2140 * Depth value does not matter - find_deltas() will
2141 * never consider reused delta as the base object to
2142 * deltify other objects against, in order to avoid
2143 * circular deltas.
2144 */
2145static int can_reuse_delta(const struct object_id *base_oid,
2146 struct object_entry *delta,
2147 struct object_entry **base_out)
2148{
2149 struct object_entry *base;
2150
2151 /*
2152 * First see if we're already sending the base (or it's explicitly in
2153 * our "excluded" list).
2154 */
2155 base = packlist_find(&to_pack, base_oid);
2156 if (base) {
2157 if (!in_same_island(&delta->idx.oid, &base->idx.oid))
2158 return 0;
2159 *base_out = base;
2160 return 1;
2161 }
2162
2163 /*
2164 * Otherwise, reachability bitmaps may tell us if the receiver has it,
2165 * even if it was buried too deep in history to make it into the
2166 * packing list.
2167 */
2168 if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) {
2169 if (use_delta_islands) {
2170 if (!in_same_island(&delta->idx.oid, base_oid))
2171 return 0;
2172 }
2173 *base_out = NULL;
2174 return 1;
2175 }
2176
2177 return 0;
2178}
2179
2180static void prefetch_to_pack(uint32_t object_index_start) {
2181 struct oid_array to_fetch = OID_ARRAY_INIT;
2182 uint32_t i;
2183
2184 for (i = object_index_start; i < to_pack.nr_objects; i++) {
2185 struct object_entry *entry = to_pack.objects + i;
2186
2187 if (!odb_read_object_info_extended(the_repository->objects,
2188 &entry->idx.oid,
2189 NULL,
2190 OBJECT_INFO_FOR_PREFETCH))
2191 continue;
2192 oid_array_append(&to_fetch, &entry->idx.oid);
2193 }
2194 promisor_remote_get_direct(the_repository,
2195 to_fetch.oid, to_fetch.nr);
2196 oid_array_clear(&to_fetch);
2197}
2198
2199static void check_object(struct object_entry *entry, uint32_t object_index)
2200{
2201 unsigned long canonical_size;
2202 enum object_type type;
2203 struct object_info oi = {.typep = &type, .sizep = &canonical_size};
2204
2205 if (IN_PACK(entry)) {
2206 struct packed_git *p = IN_PACK(entry);
2207 struct pack_window *w_curs = NULL;
2208 int have_base = 0;
2209 struct object_id base_ref;
2210 struct object_entry *base_entry;
2211 unsigned long used, used_0;
2212 unsigned long avail;
2213 off_t ofs;
2214 unsigned char *buf, c;
2215 enum object_type type;
2216 unsigned long in_pack_size;
2217
2218 buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
2219
2220 /*
2221 * We want in_pack_type even if we do not reuse delta
2222 * since non-delta representations could still be reused.
2223 */
2224 used = unpack_object_header_buffer(buf, avail,
2225 &type,
2226 &in_pack_size);
2227 if (used == 0)
2228 goto give_up;
2229
2230 if (type < 0)
2231 BUG("invalid type %d", type);
2232 entry->in_pack_type = type;
2233
2234 /*
2235 * Determine if this is a delta and if so whether we can
2236 * reuse it or not. Otherwise let's find out as cheaply as
2237 * possible what the actual type and size for this object is.
2238 */
2239 switch (entry->in_pack_type) {
2240 default:
2241 /* Not a delta hence we've already got all we need. */
2242 oe_set_type(entry, entry->in_pack_type);
2243 SET_SIZE(entry, in_pack_size);
2244 entry->in_pack_header_size = used;
2245 if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
2246 goto give_up;
2247 unuse_pack(&w_curs);
2248 return;
2249 case OBJ_REF_DELTA:
2250 if (reuse_delta && !entry->preferred_base) {
2251 oidread(&base_ref,
2252 use_pack(p, &w_curs,
2253 entry->in_pack_offset + used,
2254 NULL),
2255 the_repository->hash_algo);
2256 have_base = 1;
2257 }
2258 entry->in_pack_header_size = used + the_hash_algo->rawsz;
2259 break;
2260 case OBJ_OFS_DELTA:
2261 buf = use_pack(p, &w_curs,
2262 entry->in_pack_offset + used, NULL);
2263 used_0 = 0;
2264 c = buf[used_0++];
2265 ofs = c & 127;
2266 while (c & 128) {
2267 ofs += 1;
2268 if (!ofs || MSB(ofs, 7)) {
2269 error(_("delta base offset overflow in pack for %s"),
2270 oid_to_hex(&entry->idx.oid));
2271 goto give_up;
2272 }
2273 c = buf[used_0++];
2274 ofs = (ofs << 7) + (c & 127);
2275 }
2276 ofs = entry->in_pack_offset - ofs;
2277 if (ofs <= 0 || ofs >= entry->in_pack_offset) {
2278 error(_("delta base offset out of bound for %s"),
2279 oid_to_hex(&entry->idx.oid));
2280 goto give_up;
2281 }
2282 if (reuse_delta && !entry->preferred_base) {
2283 uint32_t pos;
2284 if (offset_to_pack_pos(p, ofs, &pos) < 0)
2285 goto give_up;
2286 if (!nth_packed_object_id(&base_ref, p,
2287 pack_pos_to_index(p, pos)))
2288 have_base = 1;
2289 }
2290 entry->in_pack_header_size = used + used_0;
2291 break;
2292 }
2293
2294 if (have_base &&
2295 can_reuse_delta(&base_ref, entry, &base_entry)) {
2296 oe_set_type(entry, entry->in_pack_type);
2297 SET_SIZE(entry, in_pack_size); /* delta size */
2298 SET_DELTA_SIZE(entry, in_pack_size);
2299
2300 if (base_entry) {
2301 SET_DELTA(entry, base_entry);
2302 entry->delta_sibling_idx = base_entry->delta_child_idx;
2303 SET_DELTA_CHILD(base_entry, entry);
2304 } else {
2305 SET_DELTA_EXT(entry, &base_ref);
2306 }
2307
2308 unuse_pack(&w_curs);
2309 return;
2310 }
2311
2312 if (oe_type(entry)) {
2313 off_t delta_pos;
2314
2315 /*
2316 * This must be a delta and we already know what the
2317 * final object type is. Let's extract the actual
2318 * object size from the delta header.
2319 */
2320 delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
2321 canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
2322 if (canonical_size == 0)
2323 goto give_up;
2324 SET_SIZE(entry, canonical_size);
2325 unuse_pack(&w_curs);
2326 return;
2327 }
2328
2329 /*
2330 * No choice but to fall back to the recursive delta walk
2331 * with odb_read_object_info() to find about the object type
2332 * at this point...
2333 */
2334 give_up:
2335 unuse_pack(&w_curs);
2336 }
2337
2338 if (odb_read_object_info_extended(the_repository->objects, &entry->idx.oid, &oi,
2339 OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
2340 if (repo_has_promisor_remote(the_repository)) {
2341 prefetch_to_pack(object_index);
2342 if (odb_read_object_info_extended(the_repository->objects, &entry->idx.oid, &oi,
2343 OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
2344 type = -1;
2345 } else {
2346 type = -1;
2347 }
2348 }
2349 oe_set_type(entry, type);
2350 if (entry->type_valid) {
2351 SET_SIZE(entry, canonical_size);
2352 } else {
2353 /*
2354 * Bad object type is checked in prepare_pack(). This is
2355 * to permit a missing preferred base object to be ignored
2356 * as a preferred base. Doing so can result in a larger
2357 * pack file, but the transfer will still take place.
2358 */
2359 }
2360}
2361
2362static int pack_offset_sort(const void *_a, const void *_b)
2363{
2364 const struct object_entry *a = *(struct object_entry **)_a;
2365 const struct object_entry *b = *(struct object_entry **)_b;
2366 const struct packed_git *a_in_pack = IN_PACK(a);
2367 const struct packed_git *b_in_pack = IN_PACK(b);
2368
2369 /* avoid filesystem trashing with loose objects */
2370 if (!a_in_pack && !b_in_pack)
2371 return oidcmp(&a->idx.oid, &b->idx.oid);
2372
2373 if (a_in_pack < b_in_pack)
2374 return -1;
2375 if (a_in_pack > b_in_pack)
2376 return 1;
2377 return a->in_pack_offset < b->in_pack_offset ? -1 :
2378 (a->in_pack_offset > b->in_pack_offset);
2379}
2380
2381/*
2382 * Drop an on-disk delta we were planning to reuse. Naively, this would
2383 * just involve blanking out the "delta" field, but we have to deal
2384 * with some extra book-keeping:
2385 *
2386 * 1. Removing ourselves from the delta_sibling linked list.
2387 *
2388 * 2. Updating our size/type to the non-delta representation. These were
2389 * either not recorded initially (size) or overwritten with the delta type
2390 * (type) when check_object() decided to reuse the delta.
2391 *
2392 * 3. Resetting our delta depth, as we are now a base object.
2393 */
2394static void drop_reused_delta(struct object_entry *entry)
2395{
2396 unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
2397 struct object_info oi = OBJECT_INFO_INIT;
2398 enum object_type type;
2399 unsigned long size;
2400
2401 while (*idx) {
2402 struct object_entry *oe = &to_pack.objects[*idx - 1];
2403
2404 if (oe == entry)
2405 *idx = oe->delta_sibling_idx;
2406 else
2407 idx = &oe->delta_sibling_idx;
2408 }
2409 SET_DELTA(entry, NULL);
2410 entry->depth = 0;
2411
2412 oi.sizep = &size;
2413 oi.typep = &type;
2414 if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
2415 /*
2416 * We failed to get the info from this pack for some reason;
2417 * fall back to odb_read_object_info, which may find another copy.
2418 * And if that fails, the error will be recorded in oe_type(entry)
2419 * and dealt with in prepare_pack().
2420 */
2421 oe_set_type(entry,
2422 odb_read_object_info(the_repository->objects,
2423 &entry->idx.oid, &size));
2424 } else {
2425 oe_set_type(entry, type);
2426 }
2427 SET_SIZE(entry, size);
2428}
2429
2430/*
2431 * Follow the chain of deltas from this entry onward, throwing away any links
2432 * that cause us to hit a cycle (as determined by the DFS state flags in
2433 * the entries).
2434 *
2435 * We also detect too-long reused chains that would violate our --depth
2436 * limit.
2437 */
2438static void break_delta_chains(struct object_entry *entry)
2439{
2440 /*
2441 * The actual depth of each object we will write is stored as an int,
2442 * as it cannot exceed our int "depth" limit. But before we break
2443 * changes based no that limit, we may potentially go as deep as the
2444 * number of objects, which is elsewhere bounded to a uint32_t.
2445 */
2446 uint32_t total_depth;
2447 struct object_entry *cur, *next;
2448
2449 for (cur = entry, total_depth = 0;
2450 cur;
2451 cur = DELTA(cur), total_depth++) {
2452 if (cur->dfs_state == DFS_DONE) {
2453 /*
2454 * We've already seen this object and know it isn't
2455 * part of a cycle. We do need to append its depth
2456 * to our count.
2457 */
2458 total_depth += cur->depth;
2459 break;
2460 }
2461
2462 /*
2463 * We break cycles before looping, so an ACTIVE state (or any
2464 * other cruft which made its way into the state variable)
2465 * is a bug.
2466 */
2467 if (cur->dfs_state != DFS_NONE)
2468 BUG("confusing delta dfs state in first pass: %d",
2469 cur->dfs_state);
2470
2471 /*
2472 * Now we know this is the first time we've seen the object. If
2473 * it's not a delta, we're done traversing, but we'll mark it
2474 * done to save time on future traversals.
2475 */
2476 if (!DELTA(cur)) {
2477 cur->dfs_state = DFS_DONE;
2478 break;
2479 }
2480
2481 /*
2482 * Mark ourselves as active and see if the next step causes
2483 * us to cycle to another active object. It's important to do
2484 * this _before_ we loop, because it impacts where we make the
2485 * cut, and thus how our total_depth counter works.
2486 * E.g., We may see a partial loop like:
2487 *
2488 * A -> B -> C -> D -> B
2489 *
2490 * Cutting B->C breaks the cycle. But now the depth of A is
2491 * only 1, and our total_depth counter is at 3. The size of the
2492 * error is always one less than the size of the cycle we
2493 * broke. Commits C and D were "lost" from A's chain.
2494 *
2495 * If we instead cut D->B, then the depth of A is correct at 3.
2496 * We keep all commits in the chain that we examined.
2497 */
2498 cur->dfs_state = DFS_ACTIVE;
2499 if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
2500 drop_reused_delta(cur);
2501 cur->dfs_state = DFS_DONE;
2502 break;
2503 }
2504 }
2505
2506 /*
2507 * And now that we've gone all the way to the bottom of the chain, we
2508 * need to clear the active flags and set the depth fields as
2509 * appropriate. Unlike the loop above, which can quit when it drops a
2510 * delta, we need to keep going to look for more depth cuts. So we need
2511 * an extra "next" pointer to keep going after we reset cur->delta.
2512 */
2513 for (cur = entry; cur; cur = next) {
2514 next = DELTA(cur);
2515
2516 /*
2517 * We should have a chain of zero or more ACTIVE states down to
2518 * a final DONE. We can quit after the DONE, because either it
2519 * has no bases, or we've already handled them in a previous
2520 * call.
2521 */
2522 if (cur->dfs_state == DFS_DONE)
2523 break;
2524 else if (cur->dfs_state != DFS_ACTIVE)
2525 BUG("confusing delta dfs state in second pass: %d",
2526 cur->dfs_state);
2527
2528 /*
2529 * If the total_depth is more than depth, then we need to snip
2530 * the chain into two or more smaller chains that don't exceed
2531 * the maximum depth. Most of the resulting chains will contain
2532 * (depth + 1) entries (i.e., depth deltas plus one base), and
2533 * the last chain (i.e., the one containing entry) will contain
2534 * whatever entries are left over, namely
2535 * (total_depth % (depth + 1)) of them.
2536 *
2537 * Since we are iterating towards decreasing depth, we need to
2538 * decrement total_depth as we go, and we need to write to the
2539 * entry what its final depth will be after all of the
2540 * snipping. Since we're snipping into chains of length (depth
2541 * + 1) entries, the final depth of an entry will be its
2542 * original depth modulo (depth + 1). Any time we encounter an
2543 * entry whose final depth is supposed to be zero, we snip it
2544 * from its delta base, thereby making it so.
2545 */
2546 cur->depth = (total_depth--) % (depth + 1);
2547 if (!cur->depth)
2548 drop_reused_delta(cur);
2549
2550 cur->dfs_state = DFS_DONE;
2551 }
2552}
2553
2554static void get_object_details(void)
2555{
2556 uint32_t i;
2557 struct object_entry **sorted_by_offset;
2558
2559 if (progress)
2560 progress_state = start_progress(the_repository,
2561 _("Counting objects"),
2562 to_pack.nr_objects);
2563
2564 CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects);
2565 for (i = 0; i < to_pack.nr_objects; i++)
2566 sorted_by_offset[i] = to_pack.objects + i;
2567 QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
2568
2569 for (i = 0; i < to_pack.nr_objects; i++) {
2570 struct object_entry *entry = sorted_by_offset[i];
2571 check_object(entry, i);
2572 if (entry->type_valid &&
2573 oe_size_greater_than(&to_pack, entry,
2574 repo_settings_get_big_file_threshold(the_repository)))
2575 entry->no_try_delta = 1;
2576 display_progress(progress_state, i + 1);
2577 }
2578 stop_progress(&progress_state);
2579
2580 /*
2581 * This must happen in a second pass, since we rely on the delta
2582 * information for the whole list being completed.
2583 */
2584 for (i = 0; i < to_pack.nr_objects; i++)
2585 break_delta_chains(&to_pack.objects[i]);
2586
2587 free(sorted_by_offset);
2588}
2589
2590/*
2591 * We search for deltas in a list sorted by type, by filename hash, and then
2592 * by size, so that we see progressively smaller and smaller files.
2593 * That's because we prefer deltas to be from the bigger file
2594 * to the smaller -- deletes are potentially cheaper, but perhaps
2595 * more importantly, the bigger file is likely the more recent
2596 * one. The deepest deltas are therefore the oldest objects which are
2597 * less susceptible to be accessed often.
2598 */
2599static int type_size_sort(const void *_a, const void *_b)
2600{
2601 const struct object_entry *a = *(struct object_entry **)_a;
2602 const struct object_entry *b = *(struct object_entry **)_b;
2603 const enum object_type a_type = oe_type(a);
2604 const enum object_type b_type = oe_type(b);
2605 const unsigned long a_size = SIZE(a);
2606 const unsigned long b_size = SIZE(b);
2607
2608 if (a_type > b_type)
2609 return -1;
2610 if (a_type < b_type)
2611 return 1;
2612 if (a->hash > b->hash)
2613 return -1;
2614 if (a->hash < b->hash)
2615 return 1;
2616 if (a->preferred_base > b->preferred_base)
2617 return -1;
2618 if (a->preferred_base < b->preferred_base)
2619 return 1;
2620 if (use_delta_islands) {
2621 const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid);
2622 if (island_cmp)
2623 return island_cmp;
2624 }
2625 if (a_size > b_size)
2626 return -1;
2627 if (a_size < b_size)
2628 return 1;
2629 return a < b ? -1 : (a > b); /* newest first */
2630}
2631
2632struct unpacked {
2633 struct object_entry *entry;
2634 void *data;
2635 struct delta_index *index;
2636 unsigned depth;
2637};
2638
2639static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
2640 unsigned long delta_size)
2641{
2642 if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
2643 return 0;
2644
2645 if (delta_size < cache_max_small_delta_size)
2646 return 1;
2647
2648 /* cache delta, if objects are large enough compared to delta size */
2649 if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
2650 return 1;
2651
2652 return 0;
2653}
2654
2655/* Protect delta_cache_size */
2656static pthread_mutex_t cache_mutex;
2657#define cache_lock() pthread_mutex_lock(&cache_mutex)
2658#define cache_unlock() pthread_mutex_unlock(&cache_mutex)
2659
2660/*
2661 * Protect object list partitioning (e.g. struct thread_param) and
2662 * progress_state
2663 */
2664static pthread_mutex_t progress_mutex;
2665#define progress_lock() pthread_mutex_lock(&progress_mutex)
2666#define progress_unlock() pthread_mutex_unlock(&progress_mutex)
2667
2668/*
2669 * Access to struct object_entry is unprotected since each thread owns
2670 * a portion of the main object list. Just don't access object entries
2671 * ahead in the list because they can be stolen and would need
2672 * progress_mutex for protection.
2673 */
2674
2675static inline int oe_size_less_than(struct packing_data *pack,
2676 const struct object_entry *lhs,
2677 unsigned long rhs)
2678{
2679 if (lhs->size_valid)
2680 return lhs->size_ < rhs;
2681 if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
2682 return 0;
2683 return oe_get_size_slow(pack, lhs) < rhs;
2684}
2685
2686static inline void oe_set_tree_depth(struct packing_data *pack,
2687 struct object_entry *e,
2688 unsigned int tree_depth)
2689{
2690 if (!pack->tree_depth)
2691 CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc);
2692 pack->tree_depth[e - pack->objects] = tree_depth;
2693}
2694
2695/*
2696 * Return the size of the object without doing any delta
2697 * reconstruction (so non-deltas are true object sizes, but deltas
2698 * return the size of the delta data).
2699 */
2700unsigned long oe_get_size_slow(struct packing_data *pack,
2701 const struct object_entry *e)
2702{
2703 struct packed_git *p;
2704 struct pack_window *w_curs;
2705 unsigned char *buf;
2706 enum object_type type;
2707 unsigned long used, avail, size;
2708
2709 if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
2710 packing_data_lock(&to_pack);
2711 if (odb_read_object_info(the_repository->objects,
2712 &e->idx.oid, &size) < 0)
2713 die(_("unable to get size of %s"),
2714 oid_to_hex(&e->idx.oid));
2715 packing_data_unlock(&to_pack);
2716 return size;
2717 }
2718
2719 p = oe_in_pack(pack, e);
2720 if (!p)
2721 BUG("when e->type is a delta, it must belong to a pack");
2722
2723 packing_data_lock(&to_pack);
2724 w_curs = NULL;
2725 buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
2726 used = unpack_object_header_buffer(buf, avail, &type, &size);
2727 if (used == 0)
2728 die(_("unable to parse object header of %s"),
2729 oid_to_hex(&e->idx.oid));
2730
2731 unuse_pack(&w_curs);
2732 packing_data_unlock(&to_pack);
2733 return size;
2734}
2735
2736static int try_delta(struct unpacked *trg, struct unpacked *src,
2737 unsigned max_depth, unsigned long *mem_usage)
2738{
2739 struct object_entry *trg_entry = trg->entry;
2740 struct object_entry *src_entry = src->entry;
2741 unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
2742 unsigned ref_depth;
2743 enum object_type type;
2744 void *delta_buf;
2745
2746 /* Don't bother doing diffs between different types */
2747 if (oe_type(trg_entry) != oe_type(src_entry))
2748 return -1;
2749
2750 /*
2751 * We do not bother to try a delta that we discarded on an
2752 * earlier try, but only when reusing delta data. Note that
2753 * src_entry that is marked as the preferred_base should always
2754 * be considered, as even if we produce a suboptimal delta against
2755 * it, we will still save the transfer cost, as we already know
2756 * the other side has it and we won't send src_entry at all.
2757 */
2758 if (reuse_delta && IN_PACK(trg_entry) &&
2759 IN_PACK(trg_entry) == IN_PACK(src_entry) &&
2760 !src_entry->preferred_base &&
2761 trg_entry->in_pack_type != OBJ_REF_DELTA &&
2762 trg_entry->in_pack_type != OBJ_OFS_DELTA)
2763 return 0;
2764
2765 /* Let's not bust the allowed depth. */
2766 if (src->depth >= max_depth)
2767 return 0;
2768
2769 /* Now some size filtering heuristics. */
2770 trg_size = SIZE(trg_entry);
2771 if (!DELTA(trg_entry)) {
2772 max_size = trg_size/2 - the_hash_algo->rawsz;
2773 ref_depth = 1;
2774 } else {
2775 max_size = DELTA_SIZE(trg_entry);
2776 ref_depth = trg->depth;
2777 }
2778 max_size = (uint64_t)max_size * (max_depth - src->depth) /
2779 (max_depth - ref_depth + 1);
2780 if (max_size == 0)
2781 return 0;
2782 src_size = SIZE(src_entry);
2783 sizediff = src_size < trg_size ? trg_size - src_size : 0;
2784 if (sizediff >= max_size)
2785 return 0;
2786 if (trg_size < src_size / 32)
2787 return 0;
2788
2789 if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid))
2790 return 0;
2791
2792 /* Load data if not already done */
2793 if (!trg->data) {
2794 packing_data_lock(&to_pack);
2795 trg->data = odb_read_object(the_repository->objects,
2796 &trg_entry->idx.oid, &type,
2797 &sz);
2798 packing_data_unlock(&to_pack);
2799 if (!trg->data)
2800 die(_("object %s cannot be read"),
2801 oid_to_hex(&trg_entry->idx.oid));
2802 if (sz != trg_size)
2803 die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2804 oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz,
2805 (uintmax_t)trg_size);
2806 *mem_usage += sz;
2807 }
2808 if (!src->data) {
2809 packing_data_lock(&to_pack);
2810 src->data = odb_read_object(the_repository->objects,
2811 &src_entry->idx.oid, &type,
2812 &sz);
2813 packing_data_unlock(&to_pack);
2814 if (!src->data) {
2815 if (src_entry->preferred_base) {
2816 static int warned = 0;
2817 if (!warned++)
2818 warning(_("object %s cannot be read"),
2819 oid_to_hex(&src_entry->idx.oid));
2820 /*
2821 * Those objects are not included in the
2822 * resulting pack. Be resilient and ignore
2823 * them if they can't be read, in case the
2824 * pack could be created nevertheless.
2825 */
2826 return 0;
2827 }
2828 die(_("object %s cannot be read"),
2829 oid_to_hex(&src_entry->idx.oid));
2830 }
2831 if (sz != src_size)
2832 die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2833 oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz,
2834 (uintmax_t)src_size);
2835 *mem_usage += sz;
2836 }
2837 if (!src->index) {
2838 src->index = create_delta_index(src->data, src_size);
2839 if (!src->index) {
2840 static int warned = 0;
2841 if (!warned++)
2842 warning(_("suboptimal pack - out of memory"));
2843 return 0;
2844 }
2845 *mem_usage += sizeof_delta_index(src->index);
2846 }
2847
2848 delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
2849 if (!delta_buf)
2850 return 0;
2851
2852 if (DELTA(trg_entry)) {
2853 /* Prefer only shallower same-sized deltas. */
2854 if (delta_size == DELTA_SIZE(trg_entry) &&
2855 src->depth + 1 >= trg->depth) {
2856 free(delta_buf);
2857 return 0;
2858 }
2859 }
2860
2861 /*
2862 * Handle memory allocation outside of the cache
2863 * accounting lock. Compiler will optimize the strangeness
2864 * away when NO_PTHREADS is defined.
2865 */
2866 free(trg_entry->delta_data);
2867 cache_lock();
2868 if (trg_entry->delta_data) {
2869 delta_cache_size -= DELTA_SIZE(trg_entry);
2870 trg_entry->delta_data = NULL;
2871 }
2872 if (delta_cacheable(src_size, trg_size, delta_size)) {
2873 delta_cache_size += delta_size;
2874 cache_unlock();
2875 trg_entry->delta_data = xrealloc(delta_buf, delta_size);
2876 } else {
2877 cache_unlock();
2878 free(delta_buf);
2879 }
2880
2881 SET_DELTA(trg_entry, src_entry);
2882 SET_DELTA_SIZE(trg_entry, delta_size);
2883 trg->depth = src->depth + 1;
2884
2885 return 1;
2886}
2887
2888static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
2889{
2890 struct object_entry *child = DELTA_CHILD(me);
2891 unsigned int m = n;
2892 while (child) {
2893 const unsigned int c = check_delta_limit(child, n + 1);
2894 if (m < c)
2895 m = c;
2896 child = DELTA_SIBLING(child);
2897 }
2898 return m;
2899}
2900
2901static unsigned long free_unpacked(struct unpacked *n)
2902{
2903 unsigned long freed_mem = sizeof_delta_index(n->index);
2904 free_delta_index(n->index);
2905 n->index = NULL;
2906 if (n->data) {
2907 freed_mem += SIZE(n->entry);
2908 FREE_AND_NULL(n->data);
2909 }
2910 n->entry = NULL;
2911 n->depth = 0;
2912 return freed_mem;
2913}
2914
2915static void find_deltas(struct object_entry **list, unsigned *list_size,
2916 int window, int depth, unsigned *processed)
2917{
2918 uint32_t i, idx = 0, count = 0;
2919 struct unpacked *array;
2920 unsigned long mem_usage = 0;
2921
2922 CALLOC_ARRAY(array, window);
2923
2924 for (;;) {
2925 struct object_entry *entry;
2926 struct unpacked *n = array + idx;
2927 int j, max_depth, best_base = -1;
2928
2929 progress_lock();
2930 if (!*list_size) {
2931 progress_unlock();
2932 break;
2933 }
2934 entry = *list++;
2935 (*list_size)--;
2936 if (!entry->preferred_base) {
2937 (*processed)++;
2938 display_progress(progress_state, *processed);
2939 }
2940 progress_unlock();
2941
2942 mem_usage -= free_unpacked(n);
2943 n->entry = entry;
2944
2945 while (window_memory_limit &&
2946 mem_usage > window_memory_limit &&
2947 count > 1) {
2948 const uint32_t tail = (idx + window - count) % window;
2949 mem_usage -= free_unpacked(array + tail);
2950 count--;
2951 }
2952
2953 /* We do not compute delta to *create* objects we are not
2954 * going to pack.
2955 */
2956 if (entry->preferred_base)
2957 goto next;
2958
2959 /*
2960 * If the current object is at pack edge, take the depth the
2961 * objects that depend on the current object into account
2962 * otherwise they would become too deep.
2963 */
2964 max_depth = depth;
2965 if (DELTA_CHILD(entry)) {
2966 max_depth -= check_delta_limit(entry, 0);
2967 if (max_depth <= 0)
2968 goto next;
2969 }
2970
2971 j = window;
2972 while (--j > 0) {
2973 int ret;
2974 uint32_t other_idx = idx + j;
2975 struct unpacked *m;
2976 if (other_idx >= window)
2977 other_idx -= window;
2978 m = array + other_idx;
2979 if (!m->entry)
2980 break;
2981 ret = try_delta(n, m, max_depth, &mem_usage);
2982 if (ret < 0)
2983 break;
2984 else if (ret > 0)
2985 best_base = other_idx;
2986 }
2987
2988 /*
2989 * If we decided to cache the delta data, then it is best
2990 * to compress it right away. First because we have to do
2991 * it anyway, and doing it here while we're threaded will
2992 * save a lot of time in the non threaded write phase,
2993 * as well as allow for caching more deltas within
2994 * the same cache size limit.
2995 * ...
2996 * But only if not writing to stdout, since in that case
2997 * the network is most likely throttling writes anyway,
2998 * and therefore it is best to go to the write phase ASAP
2999 * instead, as we can afford spending more time compressing
3000 * between writes at that moment.
3001 */
3002 if (entry->delta_data && !pack_to_stdout) {
3003 unsigned long size;
3004
3005 size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
3006 if (size < (1U << OE_Z_DELTA_BITS)) {
3007 entry->z_delta_size = size;
3008 cache_lock();
3009 delta_cache_size -= DELTA_SIZE(entry);
3010 delta_cache_size += entry->z_delta_size;
3011 cache_unlock();
3012 } else {
3013 FREE_AND_NULL(entry->delta_data);
3014 entry->z_delta_size = 0;
3015 }
3016 }
3017
3018 /* if we made n a delta, and if n is already at max
3019 * depth, leaving it in the window is pointless. we
3020 * should evict it first.
3021 */
3022 if (DELTA(entry) && max_depth <= n->depth)
3023 continue;
3024
3025 /*
3026 * Move the best delta base up in the window, after the
3027 * currently deltified object, to keep it longer. It will
3028 * be the first base object to be attempted next.
3029 */
3030 if (DELTA(entry)) {
3031 struct unpacked swap = array[best_base];
3032 int dist = (window + idx - best_base) % window;
3033 int dst = best_base;
3034 while (dist--) {
3035 int src = (dst + 1) % window;
3036 array[dst] = array[src];
3037 dst = src;
3038 }
3039 array[dst] = swap;
3040 }
3041
3042 next:
3043 idx++;
3044 if (count + 1 < window)
3045 count++;
3046 if (idx >= window)
3047 idx = 0;
3048 }
3049
3050 for (i = 0; i < window; ++i) {
3051 free_delta_index(array[i].index);
3052 free(array[i].data);
3053 }
3054 free(array);
3055}
3056
3057/*
3058 * The main object list is split into smaller lists, each is handed to
3059 * one worker.
3060 *
3061 * The main thread waits on the condition that (at least) one of the workers
3062 * has stopped working (which is indicated in the .working member of
3063 * struct thread_params).
3064 *
3065 * When a work thread has completed its work, it sets .working to 0 and
3066 * signals the main thread and waits on the condition that .data_ready
3067 * becomes 1.
3068 *
3069 * The main thread steals half of the work from the worker that has
3070 * most work left to hand it to the idle worker.
3071 */
3072
3073struct thread_params {
3074 pthread_t thread;
3075 struct object_entry **list;
3076 struct packing_region *regions;
3077 unsigned list_size;
3078 unsigned remaining;
3079 int window;
3080 int depth;
3081 int working;
3082 int data_ready;
3083 pthread_mutex_t mutex;
3084 pthread_cond_t cond;
3085 unsigned *processed;
3086};
3087
3088static pthread_cond_t progress_cond;
3089
3090/*
3091 * Mutex and conditional variable can't be statically-initialized on Windows.
3092 */
3093static void init_threaded_search(void)
3094{
3095 pthread_mutex_init(&cache_mutex, NULL);
3096 pthread_mutex_init(&progress_mutex, NULL);
3097 pthread_cond_init(&progress_cond, NULL);
3098}
3099
3100static void cleanup_threaded_search(void)
3101{
3102 pthread_cond_destroy(&progress_cond);
3103 pthread_mutex_destroy(&cache_mutex);
3104 pthread_mutex_destroy(&progress_mutex);
3105}
3106
3107static void *threaded_find_deltas(void *arg)
3108{
3109 struct thread_params *me = arg;
3110
3111 progress_lock();
3112 while (me->remaining) {
3113 progress_unlock();
3114
3115 find_deltas(me->list, &me->remaining,
3116 me->window, me->depth, me->processed);
3117
3118 progress_lock();
3119 me->working = 0;
3120 pthread_cond_signal(&progress_cond);
3121 progress_unlock();
3122
3123 /*
3124 * We must not set ->data_ready before we wait on the
3125 * condition because the main thread may have set it to 1
3126 * before we get here. In order to be sure that new
3127 * work is available if we see 1 in ->data_ready, it
3128 * was initialized to 0 before this thread was spawned
3129 * and we reset it to 0 right away.
3130 */
3131 pthread_mutex_lock(&me->mutex);
3132 while (!me->data_ready)
3133 pthread_cond_wait(&me->cond, &me->mutex);
3134 me->data_ready = 0;
3135 pthread_mutex_unlock(&me->mutex);
3136
3137 progress_lock();
3138 }
3139 progress_unlock();
3140 /* leave ->working 1 so that this doesn't get more work assigned */
3141 return NULL;
3142}
3143
3144static void ll_find_deltas(struct object_entry **list, unsigned list_size,
3145 int window, int depth, unsigned *processed)
3146{
3147 struct thread_params *p;
3148 int i, ret, active_threads = 0;
3149
3150 init_threaded_search();
3151
3152 if (delta_search_threads <= 1) {
3153 find_deltas(list, &list_size, window, depth, processed);
3154 cleanup_threaded_search();
3155 return;
3156 }
3157 if (progress > pack_to_stdout)
3158 fprintf_ln(stderr, _("Delta compression using up to %d threads"),
3159 delta_search_threads);
3160 CALLOC_ARRAY(p, delta_search_threads);
3161
3162 /* Partition the work amongst work threads. */
3163 for (i = 0; i < delta_search_threads; i++) {
3164 unsigned sub_size = list_size / (delta_search_threads - i);
3165
3166 /* don't use too small segments or no deltas will be found */
3167 if (sub_size < 2*window && i+1 < delta_search_threads)
3168 sub_size = 0;
3169
3170 p[i].window = window;
3171 p[i].depth = depth;
3172 p[i].processed = processed;
3173 p[i].working = 1;
3174 p[i].data_ready = 0;
3175
3176 /* try to split chunks on "path" boundaries */
3177 while (sub_size && sub_size < list_size &&
3178 list[sub_size]->hash &&
3179 list[sub_size]->hash == list[sub_size-1]->hash)
3180 sub_size++;
3181
3182 p[i].list = list;
3183 p[i].list_size = sub_size;
3184 p[i].remaining = sub_size;
3185
3186 list += sub_size;
3187 list_size -= sub_size;
3188 }
3189
3190 /* Start work threads. */
3191 for (i = 0; i < delta_search_threads; i++) {
3192 if (!p[i].list_size)
3193 continue;
3194 pthread_mutex_init(&p[i].mutex, NULL);
3195 pthread_cond_init(&p[i].cond, NULL);
3196 ret = pthread_create(&p[i].thread, NULL,
3197 threaded_find_deltas, &p[i]);
3198 if (ret)
3199 die(_("unable to create thread: %s"), strerror(ret));
3200 active_threads++;
3201 }
3202
3203 /*
3204 * Now let's wait for work completion. Each time a thread is done
3205 * with its work, we steal half of the remaining work from the
3206 * thread with the largest number of unprocessed objects and give
3207 * it to that newly idle thread. This ensure good load balancing
3208 * until the remaining object list segments are simply too short
3209 * to be worth splitting anymore.
3210 */
3211 while (active_threads) {
3212 struct thread_params *target = NULL;
3213 struct thread_params *victim = NULL;
3214 unsigned sub_size = 0;
3215
3216 progress_lock();
3217 for (;;) {
3218 for (i = 0; !target && i < delta_search_threads; i++)
3219 if (!p[i].working)
3220 target = &p[i];
3221 if (target)
3222 break;
3223 pthread_cond_wait(&progress_cond, &progress_mutex);
3224 }
3225
3226 for (i = 0; i < delta_search_threads; i++)
3227 if (p[i].remaining > 2*window &&
3228 (!victim || victim->remaining < p[i].remaining))
3229 victim = &p[i];
3230 if (victim) {
3231 sub_size = victim->remaining / 2;
3232 list = victim->list + victim->list_size - sub_size;
3233 while (sub_size && list[0]->hash &&
3234 list[0]->hash == list[-1]->hash) {
3235 list++;
3236 sub_size--;
3237 }
3238 if (!sub_size) {
3239 /*
3240 * It is possible for some "paths" to have
3241 * so many objects that no hash boundary
3242 * might be found. Let's just steal the
3243 * exact half in that case.
3244 */
3245 sub_size = victim->remaining / 2;
3246 list -= sub_size;
3247 }
3248 target->list = list;
3249 victim->list_size -= sub_size;
3250 victim->remaining -= sub_size;
3251 }
3252 target->list_size = sub_size;
3253 target->remaining = sub_size;
3254 target->working = 1;
3255 progress_unlock();
3256
3257 pthread_mutex_lock(&target->mutex);
3258 target->data_ready = 1;
3259 pthread_cond_signal(&target->cond);
3260 pthread_mutex_unlock(&target->mutex);
3261
3262 if (!sub_size) {
3263 pthread_join(target->thread, NULL);
3264 pthread_cond_destroy(&target->cond);
3265 pthread_mutex_destroy(&target->mutex);
3266 active_threads--;
3267 }
3268 }
3269 cleanup_threaded_search();
3270 free(p);
3271}
3272
3273static int obj_is_packed(const struct object_id *oid)
3274{
3275 return packlist_find(&to_pack, oid) ||
3276 (reuse_packfile_bitmap &&
3277 bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
3278}
3279
3280static void add_tag_chain(const struct object_id *oid)
3281{
3282 struct tag *tag;
3283
3284 /*
3285 * We catch duplicates already in add_object_entry(), but we'd
3286 * prefer to do this extra check to avoid having to parse the
3287 * tag at all if we already know that it's being packed (e.g., if
3288 * it was included via bitmaps, we would not have parsed it
3289 * previously).
3290 */
3291 if (obj_is_packed(oid))
3292 return;
3293
3294 tag = lookup_tag(the_repository, oid);
3295 while (1) {
3296 if (!tag || parse_tag(tag) || !tag->tagged)
3297 die(_("unable to pack objects reachable from tag %s"),
3298 oid_to_hex(oid));
3299
3300 add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
3301
3302 if (tag->tagged->type != OBJ_TAG)
3303 return;
3304
3305 tag = (struct tag *)tag->tagged;
3306 }
3307}
3308
3309static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, const struct object_id *oid,
3310 int flag UNUSED, void *cb_data UNUSED)
3311{
3312 struct object_id peeled;
3313
3314 if (!peel_iterated_oid(the_repository, oid, &peeled) && obj_is_packed(&peeled))
3315 add_tag_chain(oid);
3316 return 0;
3317}
3318
3319static int should_attempt_deltas(struct object_entry *entry)
3320{
3321 if (DELTA(entry))
3322 /* This happens if we decided to reuse existing
3323 * delta from a pack. "reuse_delta &&" is implied.
3324 */
3325 return 0;
3326
3327 if (!entry->type_valid ||
3328 oe_size_less_than(&to_pack, entry, 50))
3329 return 0;
3330
3331 if (entry->no_try_delta)
3332 return 0;
3333
3334 if (!entry->preferred_base) {
3335 if (oe_type(entry) < 0)
3336 die(_("unable to get type of object %s"),
3337 oid_to_hex(&entry->idx.oid));
3338 } else if (oe_type(entry) < 0) {
3339 /*
3340 * This object is not found, but we
3341 * don't have to include it anyway.
3342 */
3343 return 0;
3344 }
3345
3346 return 1;
3347}
3348
3349static void find_deltas_for_region(struct object_entry *list,
3350 struct packing_region *region,
3351 unsigned int *processed)
3352{
3353 struct object_entry **delta_list;
3354 unsigned int delta_list_nr = 0;
3355
3356 ALLOC_ARRAY(delta_list, region->nr);
3357 for (size_t i = 0; i < region->nr; i++) {
3358 struct object_entry *entry = list + region->start + i;
3359 if (should_attempt_deltas(entry))
3360 delta_list[delta_list_nr++] = entry;
3361 }
3362
3363 QSORT(delta_list, delta_list_nr, type_size_sort);
3364 find_deltas(delta_list, &delta_list_nr, window, depth, processed);
3365 free(delta_list);
3366}
3367
3368static void find_deltas_by_region(struct object_entry *list,
3369 struct packing_region *regions,
3370 size_t start, size_t nr)
3371{
3372 unsigned int processed = 0;
3373 size_t progress_nr;
3374
3375 if (!nr)
3376 return;
3377
3378 progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
3379
3380 if (progress)
3381 progress_state = start_progress(the_repository,
3382 _("Compressing objects by path"),
3383 progress_nr);
3384
3385 while (nr--)
3386 find_deltas_for_region(list,
3387 ®ions[start++],
3388 &processed);
3389
3390 display_progress(progress_state, progress_nr);
3391 stop_progress(&progress_state);
3392}
3393
3394static void *threaded_find_deltas_by_path(void *arg)
3395{
3396 struct thread_params *me = arg;
3397
3398 progress_lock();
3399 while (me->remaining) {
3400 while (me->remaining) {
3401 progress_unlock();
3402 find_deltas_for_region(to_pack.objects,
3403 me->regions,
3404 me->processed);
3405 progress_lock();
3406 me->remaining--;
3407 me->regions++;
3408 }
3409
3410 me->working = 0;
3411 pthread_cond_signal(&progress_cond);
3412 progress_unlock();
3413
3414 /*
3415 * We must not set ->data_ready before we wait on the
3416 * condition because the main thread may have set it to 1
3417 * before we get here. In order to be sure that new
3418 * work is available if we see 1 in ->data_ready, it
3419 * was initialized to 0 before this thread was spawned
3420 * and we reset it to 0 right away.
3421 */
3422 pthread_mutex_lock(&me->mutex);
3423 while (!me->data_ready)
3424 pthread_cond_wait(&me->cond, &me->mutex);
3425 me->data_ready = 0;
3426 pthread_mutex_unlock(&me->mutex);
3427
3428 progress_lock();
3429 }
3430 progress_unlock();
3431 /* leave ->working 1 so that this doesn't get more work assigned */
3432 return NULL;
3433}
3434
3435static void ll_find_deltas_by_region(struct object_entry *list,
3436 struct packing_region *regions,
3437 uint32_t start, uint32_t nr)
3438{
3439 struct thread_params *p;
3440 int i, ret, active_threads = 0;
3441 unsigned int processed = 0;
3442 uint32_t progress_nr;
3443 init_threaded_search();
3444
3445 if (!nr)
3446 return;
3447
3448 progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
3449 if (delta_search_threads <= 1) {
3450 find_deltas_by_region(list, regions, start, nr);
3451 cleanup_threaded_search();
3452 return;
3453 }
3454
3455 if (progress > pack_to_stdout)
3456 fprintf_ln(stderr,
3457 Q_("Path-based delta compression using up to %d thread",
3458 "Path-based delta compression using up to %d threads",
3459 delta_search_threads),
3460 delta_search_threads);
3461 CALLOC_ARRAY(p, delta_search_threads);
3462
3463 if (progress)
3464 progress_state = start_progress(the_repository,
3465 _("Compressing objects by path"),
3466 progress_nr);
3467 /* Partition the work amongst work threads. */
3468 for (i = 0; i < delta_search_threads; i++) {
3469 unsigned sub_size = nr / (delta_search_threads - i);
3470
3471 p[i].window = window;
3472 p[i].depth = depth;
3473 p[i].processed = &processed;
3474 p[i].working = 1;
3475 p[i].data_ready = 0;
3476
3477 p[i].regions = regions;
3478 p[i].list_size = sub_size;
3479 p[i].remaining = sub_size;
3480
3481 regions += sub_size;
3482 nr -= sub_size;
3483 }
3484
3485 /* Start work threads. */
3486 for (i = 0; i < delta_search_threads; i++) {
3487 if (!p[i].list_size)
3488 continue;
3489 pthread_mutex_init(&p[i].mutex, NULL);
3490 pthread_cond_init(&p[i].cond, NULL);
3491 ret = pthread_create(&p[i].thread, NULL,
3492 threaded_find_deltas_by_path, &p[i]);
3493 if (ret)
3494 die(_("unable to create thread: %s"), strerror(ret));
3495 active_threads++;
3496 }
3497
3498 /*
3499 * Now let's wait for work completion. Each time a thread is done
3500 * with its work, we steal half of the remaining work from the
3501 * thread with the largest number of unprocessed objects and give
3502 * it to that newly idle thread. This ensure good load balancing
3503 * until the remaining object list segments are simply too short
3504 * to be worth splitting anymore.
3505 */
3506 while (active_threads) {
3507 struct thread_params *target = NULL;
3508 struct thread_params *victim = NULL;
3509 unsigned sub_size = 0;
3510
3511 progress_lock();
3512 for (;;) {
3513 for (i = 0; !target && i < delta_search_threads; i++)
3514 if (!p[i].working)
3515 target = &p[i];
3516 if (target)
3517 break;
3518 pthread_cond_wait(&progress_cond, &progress_mutex);
3519 }
3520
3521 for (i = 0; i < delta_search_threads; i++)
3522 if (p[i].remaining > 2*window &&
3523 (!victim || victim->remaining < p[i].remaining))
3524 victim = &p[i];
3525 if (victim) {
3526 sub_size = victim->remaining / 2;
3527 target->regions = victim->regions + victim->remaining - sub_size;
3528 victim->list_size -= sub_size;
3529 victim->remaining -= sub_size;
3530 }
3531 target->list_size = sub_size;
3532 target->remaining = sub_size;
3533 target->working = 1;
3534 progress_unlock();
3535
3536 pthread_mutex_lock(&target->mutex);
3537 target->data_ready = 1;
3538 pthread_cond_signal(&target->cond);
3539 pthread_mutex_unlock(&target->mutex);
3540
3541 if (!sub_size) {
3542 pthread_join(target->thread, NULL);
3543 pthread_cond_destroy(&target->cond);
3544 pthread_mutex_destroy(&target->mutex);
3545 active_threads--;
3546 }
3547 }
3548 cleanup_threaded_search();
3549 free(p);
3550
3551 display_progress(progress_state, progress_nr);
3552 stop_progress(&progress_state);
3553}
3554
3555static void prepare_pack(int window, int depth)
3556{
3557 struct object_entry **delta_list;
3558 uint32_t i, nr_deltas;
3559 unsigned n;
3560
3561 if (use_delta_islands)
3562 resolve_tree_islands(the_repository, progress, &to_pack);
3563
3564 get_object_details();
3565
3566 /*
3567 * If we're locally repacking then we need to be doubly careful
3568 * from now on in order to make sure no stealth corruption gets
3569 * propagated to the new pack. Clients receiving streamed packs
3570 * should validate everything they get anyway so no need to incur
3571 * the additional cost here in that case.
3572 */
3573 if (!pack_to_stdout)
3574 do_check_packed_object_crc = 1;
3575
3576 if (!to_pack.nr_objects || !window || !depth)
3577 return;
3578
3579 if (path_walk)
3580 ll_find_deltas_by_region(to_pack.objects, to_pack.regions,
3581 0, to_pack.nr_regions);
3582
3583 ALLOC_ARRAY(delta_list, to_pack.nr_objects);
3584 nr_deltas = n = 0;
3585
3586 for (i = 0; i < to_pack.nr_objects; i++) {
3587 struct object_entry *entry = to_pack.objects + i;
3588
3589 if (!should_attempt_deltas(entry))
3590 continue;
3591
3592 if (!entry->preferred_base)
3593 nr_deltas++;
3594
3595 delta_list[n++] = entry;
3596 }
3597
3598 if (nr_deltas && n > 1) {
3599 unsigned nr_done = 0;
3600
3601 if (progress)
3602 progress_state = start_progress(the_repository,
3603 _("Compressing objects"),
3604 nr_deltas);
3605 QSORT(delta_list, n, type_size_sort);
3606 ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
3607 stop_progress(&progress_state);
3608 if (nr_done != nr_deltas)
3609 die(_("inconsistency with delta count"));
3610 }
3611 free(delta_list);
3612}
3613
3614static int git_pack_config(const char *k, const char *v,
3615 const struct config_context *ctx, void *cb)
3616{
3617 if (!strcmp(k, "pack.window")) {
3618 window = git_config_int(k, v, ctx->kvi);
3619 return 0;
3620 }
3621 if (!strcmp(k, "pack.windowmemory")) {
3622 window_memory_limit = git_config_ulong(k, v, ctx->kvi);
3623 return 0;
3624 }
3625 if (!strcmp(k, "pack.depth")) {
3626 depth = git_config_int(k, v, ctx->kvi);
3627 return 0;
3628 }
3629 if (!strcmp(k, "pack.deltacachesize")) {
3630 max_delta_cache_size = git_config_int(k, v, ctx->kvi);
3631 return 0;
3632 }
3633 if (!strcmp(k, "pack.deltacachelimit")) {
3634 cache_max_small_delta_size = git_config_int(k, v, ctx->kvi);
3635 return 0;
3636 }
3637 if (!strcmp(k, "pack.writebitmaphashcache")) {
3638 if (git_config_bool(k, v))
3639 write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
3640 else
3641 write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
3642 }
3643
3644 if (!strcmp(k, "pack.writebitmaplookuptable")) {
3645 if (git_config_bool(k, v))
3646 write_bitmap_options |= BITMAP_OPT_LOOKUP_TABLE;
3647 else
3648 write_bitmap_options &= ~BITMAP_OPT_LOOKUP_TABLE;
3649 }
3650
3651 if (!strcmp(k, "pack.usebitmaps")) {
3652 use_bitmap_index_default = git_config_bool(k, v);
3653 return 0;
3654 }
3655 if (!strcmp(k, "pack.allowpackreuse")) {
3656 int res = git_parse_maybe_bool_text(v);
3657 if (res < 0) {
3658 if (!strcasecmp(v, "single"))
3659 allow_pack_reuse = SINGLE_PACK_REUSE;
3660 else if (!strcasecmp(v, "multi"))
3661 allow_pack_reuse = MULTI_PACK_REUSE;
3662 else
3663 die(_("invalid pack.allowPackReuse value: '%s'"), v);
3664 } else if (res) {
3665 allow_pack_reuse = SINGLE_PACK_REUSE;
3666 } else {
3667 allow_pack_reuse = NO_PACK_REUSE;
3668 }
3669 return 0;
3670 }
3671 if (!strcmp(k, "pack.threads")) {
3672 delta_search_threads = git_config_int(k, v, ctx->kvi);
3673 if (delta_search_threads < 0)
3674 die(_("invalid number of threads specified (%d)"),
3675 delta_search_threads);
3676 if (!HAVE_THREADS && delta_search_threads != 1) {
3677 warning(_("no threads support, ignoring %s"), k);
3678 delta_search_threads = 0;
3679 }
3680 return 0;
3681 }
3682 if (!strcmp(k, "pack.indexversion")) {
3683 pack_idx_opts.version = git_config_int(k, v, ctx->kvi);
3684 if (pack_idx_opts.version > 2)
3685 die(_("bad pack.indexVersion=%"PRIu32),
3686 pack_idx_opts.version);
3687 return 0;
3688 }
3689 if (!strcmp(k, "pack.writereverseindex")) {
3690 if (git_config_bool(k, v))
3691 pack_idx_opts.flags |= WRITE_REV;
3692 else
3693 pack_idx_opts.flags &= ~WRITE_REV;
3694 return 0;
3695 }
3696 if (!strcmp(k, "uploadpack.blobpackfileuri")) {
3697 struct configured_exclusion *ex;
3698 const char *oid_end, *pack_end;
3699 /*
3700 * Stores the pack hash. This is not a true object ID, but is
3701 * of the same form.
3702 */
3703 struct object_id pack_hash;
3704
3705 if (!v)
3706 return config_error_nonbool(k);
3707
3708 ex = xmalloc(sizeof(*ex));
3709 if (parse_oid_hex(v, &ex->e.oid, &oid_end) ||
3710 *oid_end != ' ' ||
3711 parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
3712 *pack_end != ' ')
3713 die(_("value of uploadpack.blobpackfileuri must be "
3714 "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
3715 if (oidmap_get(&configured_exclusions, &ex->e.oid))
3716 die(_("object already configured in another "
3717 "uploadpack.blobpackfileuri (got '%s')"), v);
3718 ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
3719 memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
3720 ex->uri = xstrdup(pack_end + 1);
3721 oidmap_put(&configured_exclusions, ex);
3722 }
3723 return git_default_config(k, v, ctx, cb);
3724}
3725
3726/* Counters for trace2 output when in --stdin-packs mode. */
3727static int stdin_packs_found_nr;
3728static int stdin_packs_hints_nr;
3729
3730static int add_object_entry_from_pack(const struct object_id *oid,
3731 struct packed_git *p,
3732 uint32_t pos,
3733 void *_data)
3734{
3735 off_t ofs;
3736 enum object_type type = OBJ_NONE;
3737
3738 display_progress(progress_state, ++nr_seen);
3739
3740 if (have_duplicate_entry(oid, 0))
3741 return 0;
3742
3743 ofs = nth_packed_object_offset(p, pos);
3744 if (!want_object_in_pack(oid, 0, &p, &ofs))
3745 return 0;
3746
3747 if (p) {
3748 struct object_info oi = OBJECT_INFO_INIT;
3749
3750 oi.typep = &type;
3751 if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
3752 die(_("could not get type of object %s in pack %s"),
3753 oid_to_hex(oid), p->pack_name);
3754 } else if (type == OBJ_COMMIT) {
3755 struct rev_info *revs = _data;
3756 /*
3757 * commits in included packs are used as starting points for the
3758 * subsequent revision walk
3759 */
3760 add_pending_oid(revs, NULL, oid, 0);
3761 }
3762
3763 stdin_packs_found_nr++;
3764 }
3765
3766 create_object_entry(oid, type, 0, 0, 0, p, ofs);
3767
3768 return 0;
3769}
3770
3771static void show_object_pack_hint(struct object *object, const char *name,
3772 void *data)
3773{
3774 enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
3775 if (mode == STDIN_PACKS_MODE_FOLLOW) {
3776 if (object->type == OBJ_BLOB &&
3777 !odb_has_object(the_repository->objects, &object->oid, 0))
3778 return;
3779 add_object_entry(&object->oid, object->type, name, 0);
3780 } else {
3781 struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3782 if (!oe)
3783 return;
3784
3785 /*
3786 * Our 'to_pack' list was constructed by iterating all
3787 * objects packed in included packs, and so doesn't have
3788 * a non-zero hash field that you would typically pick
3789 * up during a reachability traversal.
3790 *
3791 * Make a best-effort attempt to fill in the ->hash and
3792 * ->no_try_delta fields here in order to perhaps
3793 * improve the delta selection process.
3794 */
3795 oe->hash = pack_name_hash_fn(name);
3796 oe->no_try_delta = name && no_try_delta(name);
3797
3798 stdin_packs_hints_nr++;
3799 }
3800}
3801
3802static void show_commit_pack_hint(struct commit *commit, void *data)
3803{
3804 enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
3805
3806 if (mode == STDIN_PACKS_MODE_FOLLOW) {
3807 show_object_pack_hint((struct object *)commit, "", data);
3808 return;
3809 }
3810
3811 /* nothing to do; commits don't have a namehash */
3812
3813}
3814
3815static int pack_mtime_cmp(const void *_a, const void *_b)
3816{
3817 struct packed_git *a = ((const struct string_list_item*)_a)->util;
3818 struct packed_git *b = ((const struct string_list_item*)_b)->util;
3819
3820 /*
3821 * order packs by descending mtime so that objects are laid out
3822 * roughly as newest-to-oldest
3823 */
3824 if (a->mtime < b->mtime)
3825 return 1;
3826 else if (b->mtime < a->mtime)
3827 return -1;
3828 else
3829 return 0;
3830}
3831
3832static void read_packs_list_from_stdin(struct rev_info *revs)
3833{
3834 struct packfile_store *packs = the_repository->objects->packfiles;
3835 struct strbuf buf = STRBUF_INIT;
3836 struct string_list include_packs = STRING_LIST_INIT_DUP;
3837 struct string_list exclude_packs = STRING_LIST_INIT_DUP;
3838 struct string_list_item *item = NULL;
3839
3840 struct packed_git *p;
3841
3842 while (strbuf_getline(&buf, stdin) != EOF) {
3843 if (!buf.len)
3844 continue;
3845
3846 if (*buf.buf == '^')
3847 string_list_append(&exclude_packs, buf.buf + 1);
3848 else
3849 string_list_append(&include_packs, buf.buf);
3850
3851 strbuf_reset(&buf);
3852 }
3853
3854 string_list_sort(&include_packs);
3855 string_list_remove_duplicates(&include_packs, 0);
3856 string_list_sort(&exclude_packs);
3857 string_list_remove_duplicates(&exclude_packs, 0);
3858
3859 for (p = packfile_store_get_all_packs(packs); p; p = p->next) {
3860 const char *pack_name = pack_basename(p);
3861
3862 if ((item = string_list_lookup(&include_packs, pack_name)))
3863 item->util = p;
3864 if ((item = string_list_lookup(&exclude_packs, pack_name)))
3865 item->util = p;
3866 }
3867
3868 /*
3869 * Arguments we got on stdin may not even be packs. First
3870 * check that to avoid segfaulting later on in
3871 * e.g. pack_mtime_cmp(), excluded packs are handled below.
3872 *
3873 * Since we first parsed our STDIN and then sorted the input
3874 * lines the pack we error on will be whatever line happens to
3875 * sort first. This is lazy, it's enough that we report one
3876 * bad case here, we don't need to report the first/last one,
3877 * or all of them.
3878 */
3879 for_each_string_list_item(item, &include_packs) {
3880 struct packed_git *p = item->util;
3881 if (!p)
3882 die(_("could not find pack '%s'"), item->string);
3883 if (!is_pack_valid(p))
3884 die(_("packfile %s cannot be accessed"), p->pack_name);
3885 }
3886
3887 /*
3888 * Then, handle all of the excluded packs, marking them as
3889 * kept in-core so that later calls to add_object_entry()
3890 * discards any objects that are also found in excluded packs.
3891 */
3892 for_each_string_list_item(item, &exclude_packs) {
3893 struct packed_git *p = item->util;
3894 if (!p)
3895 die(_("could not find pack '%s'"), item->string);
3896 p->pack_keep_in_core = 1;
3897 }
3898
3899 /*
3900 * Order packs by ascending mtime; use QSORT directly to access the
3901 * string_list_item's ->util pointer, which string_list_sort() does not
3902 * provide.
3903 */
3904 QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
3905
3906 for_each_string_list_item(item, &include_packs) {
3907 struct packed_git *p = item->util;
3908 for_each_object_in_pack(p,
3909 add_object_entry_from_pack,
3910 revs,
3911 FOR_EACH_OBJECT_PACK_ORDER);
3912 }
3913
3914 strbuf_release(&buf);
3915 string_list_clear(&include_packs, 0);
3916 string_list_clear(&exclude_packs, 0);
3917}
3918
3919static void add_unreachable_loose_objects(struct rev_info *revs);
3920
3921static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
3922{
3923 struct rev_info revs;
3924
3925 repo_init_revisions(the_repository, &revs, NULL);
3926 /*
3927 * Use a revision walk to fill in the namehash of objects in the include
3928 * packs. To save time, we'll avoid traversing through objects that are
3929 * in excluded packs.
3930 *
3931 * That may cause us to avoid populating all of the namehash fields of
3932 * all included objects, but our goal is best-effort, since this is only
3933 * an optimization during delta selection.
3934 */
3935 revs.no_kept_objects = 1;
3936 revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3937 revs.blob_objects = 1;
3938 revs.tree_objects = 1;
3939 revs.tag_objects = 1;
3940 revs.ignore_missing_links = 1;
3941
3942 /* avoids adding objects in excluded packs */
3943 ignore_packed_keep_in_core = 1;
3944 read_packs_list_from_stdin(&revs);
3945 if (rev_list_unpacked)
3946 add_unreachable_loose_objects(&revs);
3947
3948 if (prepare_revision_walk(&revs))
3949 die(_("revision walk setup failed"));
3950 traverse_commit_list(&revs,
3951 show_commit_pack_hint,
3952 show_object_pack_hint,
3953 &mode);
3954
3955 trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
3956 stdin_packs_found_nr);
3957 trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
3958 stdin_packs_hints_nr);
3959}
3960
3961static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
3962 struct packed_git *pack, off_t offset,
3963 const char *name, uint32_t mtime)
3964{
3965 struct object_entry *entry;
3966
3967 display_progress(progress_state, ++nr_seen);
3968
3969 entry = packlist_find(&to_pack, oid);
3970 if (entry) {
3971 if (name) {
3972 entry->hash = pack_name_hash_fn(name);
3973 entry->no_try_delta = no_try_delta(name);
3974 }
3975 } else {
3976 if (!want_object_in_pack_mtime(oid, 0, &pack, &offset, mtime))
3977 return;
3978 if (!pack && type == OBJ_BLOB) {
3979 struct odb_source *source = the_repository->objects->sources;
3980 int found = 0;
3981
3982 for (; !found && source; source = source->next)
3983 if (has_loose_object(source, oid))
3984 found = 1;
3985
3986 /*
3987 * If a traversed tree has a missing blob then we want
3988 * to avoid adding that missing object to our pack.
3989 *
3990 * This only applies to missing blobs, not trees,
3991 * because the traversal needs to parse sub-trees but
3992 * not blobs.
3993 *
3994 * Note we only perform this check when we couldn't
3995 * already find the object in a pack, so we're really
3996 * limited to "ensure non-tip blobs which don't exist in
3997 * packs do exist via loose objects". Confused?
3998 */
3999 if (!found)
4000 return;
4001 }
4002
4003 entry = create_object_entry(oid, type, pack_name_hash_fn(name),
4004 0, name && no_try_delta(name),
4005 pack, offset);
4006 }
4007
4008 if (mtime > oe_cruft_mtime(&to_pack, entry))
4009 oe_set_cruft_mtime(&to_pack, entry, mtime);
4010 return;
4011}
4012
4013static void show_cruft_object(struct object *obj, const char *name, void *data UNUSED)
4014{
4015 /*
4016 * if we did not record it earlier, it's at least as old as our
4017 * expiration value. Rather than find it exactly, just use that
4018 * value. This may bump it forward from its real mtime, but it
4019 * will still be "too old" next time we run with the same
4020 * expiration.
4021 *
4022 * if obj does appear in the packing list, this call is a noop (or may
4023 * set the namehash).
4024 */
4025 add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration);
4026}
4027
4028static void show_cruft_commit(struct commit *commit, void *data)
4029{
4030 show_cruft_object((struct object*)commit, NULL, data);
4031}
4032
4033static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
4034{
4035 return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
4036}
4037
4038static int cruft_include_check(struct commit *commit, void *data)
4039{
4040 return cruft_include_check_obj((struct object*)commit, data);
4041}
4042
4043static void set_cruft_mtime(const struct object *object,
4044 struct packed_git *pack,
4045 off_t offset, time_t mtime)
4046{
4047 add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL,
4048 mtime);
4049}
4050
4051static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
4052{
4053 struct string_list_item *item = NULL;
4054 for_each_string_list_item(item, packs) {
4055 struct packed_git *p = item->util;
4056 if (!p)
4057 die(_("could not find pack '%s'"), item->string);
4058 if (p->is_cruft && keep)
4059 ignore_packed_keep_in_core_has_cruft = 1;
4060 p->pack_keep_in_core = keep;
4061 }
4062}
4063
4064static void add_objects_in_unpacked_packs(void);
4065
4066static void enumerate_cruft_objects(void)
4067{
4068 if (progress)
4069 progress_state = start_progress(the_repository,
4070 _("Enumerating cruft objects"), 0);
4071
4072 add_objects_in_unpacked_packs();
4073 add_unreachable_loose_objects(NULL);
4074
4075 stop_progress(&progress_state);
4076}
4077
4078static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs)
4079{
4080 struct packfile_store *packs = the_repository->objects->packfiles;
4081 struct packed_git *p;
4082 struct rev_info revs;
4083 int ret;
4084
4085 repo_init_revisions(the_repository, &revs, NULL);
4086
4087 revs.tag_objects = 1;
4088 revs.tree_objects = 1;
4089 revs.blob_objects = 1;
4090
4091 revs.include_check = cruft_include_check;
4092 revs.include_check_obj = cruft_include_check_obj;
4093
4094 revs.ignore_missing_links = 1;
4095
4096 if (progress)
4097 progress_state = start_progress(the_repository,
4098 _("Enumerating cruft objects"), 0);
4099 ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration,
4100 set_cruft_mtime, 1);
4101 stop_progress(&progress_state);
4102
4103 if (ret)
4104 die(_("unable to add cruft objects"));
4105
4106 /*
4107 * Re-mark only the fresh packs as kept so that objects in
4108 * unknown packs do not halt the reachability traversal early.
4109 */
4110 for (p = packfile_store_get_all_packs(packs); p; p = p->next)
4111 p->pack_keep_in_core = 0;
4112 mark_pack_kept_in_core(fresh_packs, 1);
4113
4114 if (prepare_revision_walk(&revs))
4115 die(_("revision walk setup failed"));
4116 if (progress)
4117 progress_state = start_progress(the_repository,
4118 _("Traversing cruft objects"), 0);
4119 nr_seen = 0;
4120 traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL);
4121
4122 stop_progress(&progress_state);
4123}
4124
4125static void read_cruft_objects(void)
4126{
4127 struct packfile_store *packs = the_repository->objects->packfiles;
4128 struct strbuf buf = STRBUF_INIT;
4129 struct string_list discard_packs = STRING_LIST_INIT_DUP;
4130 struct string_list fresh_packs = STRING_LIST_INIT_DUP;
4131 struct packed_git *p;
4132
4133 ignore_packed_keep_in_core = 1;
4134
4135 while (strbuf_getline(&buf, stdin) != EOF) {
4136 if (!buf.len)
4137 continue;
4138
4139 if (*buf.buf == '-')
4140 string_list_append(&discard_packs, buf.buf + 1);
4141 else
4142 string_list_append(&fresh_packs, buf.buf);
4143 }
4144
4145 string_list_sort(&discard_packs);
4146 string_list_sort(&fresh_packs);
4147
4148 for (p = packfile_store_get_all_packs(packs); p; p = p->next) {
4149 const char *pack_name = pack_basename(p);
4150 struct string_list_item *item;
4151
4152 item = string_list_lookup(&fresh_packs, pack_name);
4153 if (!item)
4154 item = string_list_lookup(&discard_packs, pack_name);
4155
4156 if (item) {
4157 item->util = p;
4158 } else {
4159 /*
4160 * This pack wasn't mentioned in either the "fresh" or
4161 * "discard" list, so the caller didn't know about it.
4162 *
4163 * Mark it as kept so that its objects are ignored by
4164 * add_unseen_recent_objects_to_traversal(). We'll
4165 * unmark it before starting the traversal so it doesn't
4166 * halt the traversal early.
4167 */
4168 p->pack_keep_in_core = 1;
4169 }
4170 }
4171
4172 mark_pack_kept_in_core(&fresh_packs, 1);
4173 mark_pack_kept_in_core(&discard_packs, 0);
4174
4175 if (cruft_expiration)
4176 enumerate_and_traverse_cruft_objects(&fresh_packs);
4177 else
4178 enumerate_cruft_objects();
4179
4180 strbuf_release(&buf);
4181 string_list_clear(&discard_packs, 0);
4182 string_list_clear(&fresh_packs, 0);
4183}
4184
4185static void read_object_list_from_stdin(void)
4186{
4187 char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
4188 struct object_id oid;
4189 const char *p;
4190
4191 for (;;) {
4192 if (!fgets(line, sizeof(line), stdin)) {
4193 if (feof(stdin))
4194 break;
4195 if (!ferror(stdin))
4196 BUG("fgets returned NULL, not EOF, not error!");
4197 if (errno != EINTR)
4198 die_errno("fgets");
4199 clearerr(stdin);
4200 continue;
4201 }
4202 if (line[0] == '-') {
4203 if (get_oid_hex(line+1, &oid))
4204 die(_("expected edge object ID, got garbage:\n %s"),
4205 line);
4206 add_preferred_base(&oid);
4207 continue;
4208 }
4209 if (parse_oid_hex(line, &oid, &p))
4210 die(_("expected object ID, got garbage:\n %s"), line);
4211
4212 add_preferred_base_object(p + 1);
4213 add_object_entry(&oid, OBJ_NONE, p + 1, 0);
4214 }
4215}
4216
4217static void show_commit(struct commit *commit, void *data UNUSED)
4218{
4219 add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
4220
4221 if (write_bitmap_index)
4222 index_commit_for_bitmap(commit);
4223
4224 if (use_delta_islands)
4225 propagate_island_marks(the_repository, commit);
4226}
4227
4228static void show_object(struct object *obj, const char *name,
4229 void *data UNUSED)
4230{
4231 add_preferred_base_object(name);
4232 add_object_entry(&obj->oid, obj->type, name, 0);
4233
4234 if (use_delta_islands) {
4235 const char *p;
4236 unsigned depth;
4237 struct object_entry *ent;
4238
4239 /* the empty string is a root tree, which is depth 0 */
4240 depth = *name ? 1 : 0;
4241 for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
4242 depth++;
4243
4244 ent = packlist_find(&to_pack, &obj->oid);
4245 if (ent && depth > oe_tree_depth(&to_pack, ent))
4246 oe_set_tree_depth(&to_pack, ent, depth);
4247 }
4248}
4249
4250static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
4251{
4252 assert(arg_missing_action == MA_ALLOW_ANY);
4253
4254 /*
4255 * Quietly ignore ALL missing objects. This avoids problems with
4256 * staging them now and getting an odd error later.
4257 */
4258 if (!odb_has_object(the_repository->objects, &obj->oid, 0))
4259 return;
4260
4261 show_object(obj, name, data);
4262}
4263
4264static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
4265{
4266 assert(arg_missing_action == MA_ALLOW_PROMISOR);
4267
4268 /*
4269 * Quietly ignore EXPECTED missing objects. This avoids problems with
4270 * staging them now and getting an odd error later.
4271 */
4272 if (!odb_has_object(the_repository->objects, &obj->oid, 0) &&
4273 is_promisor_object(to_pack.repo, &obj->oid))
4274 return;
4275
4276 show_object(obj, name, data);
4277}
4278
4279static int option_parse_missing_action(const struct option *opt UNUSED,
4280 const char *arg, int unset)
4281{
4282 assert(arg);
4283 assert(!unset);
4284
4285 if (!strcmp(arg, "error")) {
4286 arg_missing_action = MA_ERROR;
4287 fn_show_object = show_object;
4288 return 0;
4289 }
4290
4291 if (!strcmp(arg, "allow-any")) {
4292 arg_missing_action = MA_ALLOW_ANY;
4293 fetch_if_missing = 0;
4294 fn_show_object = show_object__ma_allow_any;
4295 return 0;
4296 }
4297
4298 if (!strcmp(arg, "allow-promisor")) {
4299 arg_missing_action = MA_ALLOW_PROMISOR;
4300 fetch_if_missing = 0;
4301 fn_show_object = show_object__ma_allow_promisor;
4302 return 0;
4303 }
4304
4305 die(_("invalid value for '%s': '%s'"), "--missing", arg);
4306 return 0;
4307}
4308
4309static void show_edge(struct commit *commit)
4310{
4311 add_preferred_base(&commit->object.oid);
4312}
4313
4314static int add_object_in_unpacked_pack(const struct object_id *oid,
4315 struct packed_git *pack,
4316 uint32_t pos,
4317 void *data UNUSED)
4318{
4319 if (cruft) {
4320 off_t offset;
4321 time_t mtime;
4322
4323 if (pack->is_cruft) {
4324 if (load_pack_mtimes(pack) < 0)
4325 die(_("could not load cruft pack .mtimes"));
4326 mtime = nth_packed_mtime(pack, pos);
4327 } else {
4328 mtime = pack->mtime;
4329 }
4330 offset = nth_packed_object_offset(pack, pos);
4331
4332 add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
4333 NULL, mtime);
4334 } else {
4335 add_object_entry(oid, OBJ_NONE, "", 0);
4336 }
4337 return 0;
4338}
4339
4340static void add_objects_in_unpacked_packs(void)
4341{
4342 if (for_each_packed_object(to_pack.repo,
4343 add_object_in_unpacked_pack,
4344 NULL,
4345 FOR_EACH_OBJECT_PACK_ORDER |
4346 FOR_EACH_OBJECT_LOCAL_ONLY |
4347 FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
4348 FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
4349 die(_("cannot open pack index"));
4350}
4351
4352static int add_loose_object(const struct object_id *oid, const char *path,
4353 void *data)
4354{
4355 struct rev_info *revs = data;
4356 enum object_type type = odb_read_object_info(the_repository->objects, oid, NULL);
4357
4358 if (type < 0) {
4359 warning(_("loose object at %s could not be examined"), path);
4360 return 0;
4361 }
4362
4363 if (cruft) {
4364 struct stat st;
4365 if (stat(path, &st) < 0) {
4366 if (errno == ENOENT)
4367 return 0;
4368 return error_errno("unable to stat %s", oid_to_hex(oid));
4369 }
4370
4371 add_cruft_object_entry(oid, type, NULL, 0, NULL,
4372 st.st_mtime);
4373 } else {
4374 add_object_entry(oid, type, "", 0);
4375 }
4376
4377 if (revs && type == OBJ_COMMIT)
4378 add_pending_oid(revs, NULL, oid, 0);
4379
4380 return 0;
4381}
4382
4383/*
4384 * We actually don't even have to worry about reachability here.
4385 * add_object_entry will weed out duplicates, so we just add every
4386 * loose object we find.
4387 */
4388static void add_unreachable_loose_objects(struct rev_info *revs)
4389{
4390 for_each_loose_file_in_source(the_repository->objects->sources,
4391 add_loose_object, NULL, NULL, revs);
4392}
4393
4394static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
4395{
4396 struct packfile_store *packs = the_repository->objects->packfiles;
4397 static struct packed_git *last_found = (void *)1;
4398 struct packed_git *p;
4399
4400 p = (last_found != (void *)1) ? last_found :
4401 packfile_store_get_all_packs(packs);
4402
4403 while (p) {
4404 if ((!p->pack_local || p->pack_keep ||
4405 p->pack_keep_in_core) &&
4406 find_pack_entry_one(oid, p)) {
4407 last_found = p;
4408 return 1;
4409 }
4410 if (p == last_found)
4411 p = packfile_store_get_all_packs(packs);
4412 else
4413 p = p->next;
4414 if (p == last_found)
4415 p = p->next;
4416 }
4417 return 0;
4418}
4419
4420/*
4421 * Store a list of sha1s that are should not be discarded
4422 * because they are either written too recently, or are
4423 * reachable from another object that was.
4424 *
4425 * This is filled by get_object_list.
4426 */
4427static struct oid_array recent_objects;
4428
4429static int loosened_object_can_be_discarded(const struct object_id *oid,
4430 timestamp_t mtime)
4431{
4432 if (!unpack_unreachable_expiration)
4433 return 0;
4434 if (mtime > unpack_unreachable_expiration)
4435 return 0;
4436 if (oid_array_lookup(&recent_objects, oid) >= 0)
4437 return 0;
4438 return 1;
4439}
4440
4441static void loosen_unused_packed_objects(void)
4442{
4443 struct packfile_store *packs = the_repository->objects->packfiles;
4444 struct packed_git *p;
4445 uint32_t i;
4446 uint32_t loosened_objects_nr = 0;
4447 struct object_id oid;
4448
4449 for (p = packfile_store_get_all_packs(packs); p; p = p->next) {
4450 if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
4451 continue;
4452
4453 if (open_pack_index(p))
4454 die(_("cannot open pack index"));
4455
4456 for (i = 0; i < p->num_objects; i++) {
4457 nth_packed_object_id(&oid, p, i);
4458 if (!packlist_find(&to_pack, &oid) &&
4459 !has_sha1_pack_kept_or_nonlocal(&oid) &&
4460 !loosened_object_can_be_discarded(&oid, p->mtime)) {
4461 if (force_object_loose(the_repository->objects->sources,
4462 &oid, p->mtime))
4463 die(_("unable to force loose object"));
4464 loosened_objects_nr++;
4465 }
4466 }
4467 }
4468
4469 trace2_data_intmax("pack-objects", the_repository,
4470 "loosen_unused_packed_objects/loosened", loosened_objects_nr);
4471}
4472
4473/*
4474 * This tracks any options which pack-reuse code expects to be on, or which a
4475 * reader of the pack might not understand, and which would therefore prevent
4476 * blind reuse of what we have on disk.
4477 */
4478static int pack_options_allow_reuse(void)
4479{
4480 return allow_pack_reuse != NO_PACK_REUSE &&
4481 pack_to_stdout &&
4482 !ignore_packed_keep_on_disk &&
4483 !ignore_packed_keep_in_core &&
4484 (!local || !have_non_local_packs) &&
4485 !incremental;
4486}
4487
4488static int get_object_list_from_bitmap(struct rev_info *revs)
4489{
4490 if (!(bitmap_git = prepare_bitmap_walk(revs, 0)))
4491 return -1;
4492
4493 /*
4494 * For now, force the name-hash version to be 1 since that
4495 * is the version implied by the bitmap format. Later, the
4496 * format can include this version explicitly in its format,
4497 * allowing readers to know the version that was used during
4498 * the bitmap write.
4499 */
4500 name_hash_version = 1;
4501
4502 if (pack_options_allow_reuse())
4503 reuse_partial_packfile_from_bitmap(bitmap_git,
4504 &reuse_packfiles,
4505 &reuse_packfiles_nr,
4506 &reuse_packfile_bitmap,
4507 allow_pack_reuse == MULTI_PACK_REUSE);
4508
4509 if (reuse_packfiles) {
4510 reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap);
4511 if (!reuse_packfile_objects)
4512 BUG("expected non-empty reuse bitmap");
4513
4514 nr_result += reuse_packfile_objects;
4515 nr_seen += reuse_packfile_objects;
4516 display_progress(progress_state, nr_seen);
4517 }
4518
4519 traverse_bitmap_commit_list(bitmap_git, revs,
4520 &add_object_entry_from_bitmap);
4521 return 0;
4522}
4523
4524static void record_recent_object(struct object *obj,
4525 const char *name UNUSED,
4526 void *data UNUSED)
4527{
4528 oid_array_append(&recent_objects, &obj->oid);
4529}
4530
4531static void record_recent_commit(struct commit *commit, void *data UNUSED)
4532{
4533 oid_array_append(&recent_objects, &commit->object.oid);
4534}
4535
4536static int mark_bitmap_preferred_tip(const char *refname,
4537 const char *referent UNUSED,
4538 const struct object_id *oid,
4539 int flags UNUSED,
4540 void *data UNUSED)
4541{
4542 struct object_id peeled;
4543 struct object *object;
4544
4545 if (!peel_iterated_oid(the_repository, oid, &peeled))
4546 oid = &peeled;
4547
4548 object = parse_object_or_die(the_repository, oid, refname);
4549 if (object->type == OBJ_COMMIT)
4550 object->flags |= NEEDS_BITMAP;
4551
4552 return 0;
4553}
4554
4555static void mark_bitmap_preferred_tips(void)
4556{
4557 struct string_list_item *item;
4558 const struct string_list *preferred_tips;
4559
4560 preferred_tips = bitmap_preferred_tips(the_repository);
4561 if (!preferred_tips)
4562 return;
4563
4564 for_each_string_list_item(item, preferred_tips) {
4565 refs_for_each_ref_in(get_main_ref_store(the_repository),
4566 item->string, mark_bitmap_preferred_tip,
4567 NULL);
4568 }
4569}
4570
4571static inline int is_oid_uninteresting(struct repository *repo,
4572 struct object_id *oid)
4573{
4574 struct object *o = lookup_object(repo, oid);
4575 return !o || (o->flags & UNINTERESTING);
4576}
4577
4578static int add_objects_by_path(const char *path,
4579 struct oid_array *oids,
4580 enum object_type type,
4581 void *data)
4582{
4583 size_t oe_start = to_pack.nr_objects;
4584 size_t oe_end;
4585 unsigned int *processed = data;
4586
4587 /*
4588 * First, add all objects to the packing data, including the ones
4589 * marked UNINTERESTING (translated to 'exclude') as they can be
4590 * used as delta bases.
4591 */
4592 for (size_t i = 0; i < oids->nr; i++) {
4593 int exclude;
4594 struct object_info oi = OBJECT_INFO_INIT;
4595 struct object_id *oid = &oids->oid[i];
4596
4597 /* Skip objects that do not exist locally. */
4598 if ((exclude_promisor_objects || arg_missing_action != MA_ERROR) &&
4599 odb_read_object_info_extended(the_repository->objects, oid, &oi,
4600 OBJECT_INFO_FOR_PREFETCH) < 0)
4601 continue;
4602
4603 exclude = is_oid_uninteresting(the_repository, oid);
4604
4605 if (exclude && !thin)
4606 continue;
4607
4608 add_object_entry(oid, type, path, exclude);
4609 }
4610
4611 oe_end = to_pack.nr_objects;
4612
4613 /* We can skip delta calculations if it is a no-op. */
4614 if (oe_end == oe_start || !window)
4615 return 0;
4616
4617 ALLOC_GROW(to_pack.regions,
4618 to_pack.nr_regions + 1,
4619 to_pack.nr_regions_alloc);
4620
4621 to_pack.regions[to_pack.nr_regions].start = oe_start;
4622 to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
4623 to_pack.nr_regions++;
4624
4625 *processed += oids->nr;
4626 display_progress(progress_state, *processed);
4627
4628 return 0;
4629}
4630
4631static void get_object_list_path_walk(struct rev_info *revs)
4632{
4633 struct path_walk_info info = PATH_WALK_INFO_INIT;
4634 unsigned int processed = 0;
4635 int result;
4636
4637 info.revs = revs;
4638 info.path_fn = add_objects_by_path;
4639 info.path_fn_data = &processed;
4640
4641 /*
4642 * Allow the --[no-]sparse option to be interesting here, if only
4643 * for testing purposes. Paths with no interesting objects will not
4644 * contribute to the resulting pack, but only create noisy preferred
4645 * base objects.
4646 */
4647 info.prune_all_uninteresting = sparse;
4648 info.edge_aggressive = shallow;
4649
4650 trace2_region_enter("pack-objects", "path-walk", revs->repo);
4651 result = walk_objects_by_path(&info);
4652 trace2_region_leave("pack-objects", "path-walk", revs->repo);
4653
4654 if (result)
4655 die(_("failed to pack objects via path-walk"));
4656}
4657
4658static void get_object_list(struct rev_info *revs, struct strvec *argv)
4659{
4660 struct setup_revision_opt s_r_opt = {
4661 .allow_exclude_promisor_objects = 1,
4662 };
4663 char line[1000];
4664 int flags = 0;
4665 int save_warning;
4666
4667 save_commit_buffer = 0;
4668 setup_revisions_from_strvec(argv, revs, &s_r_opt);
4669
4670 /* make sure shallows are read */
4671 is_repository_shallow(the_repository);
4672
4673 save_warning = warn_on_object_refname_ambiguity;
4674 warn_on_object_refname_ambiguity = 0;
4675
4676 while (fgets(line, sizeof(line), stdin) != NULL) {
4677 int len = strlen(line);
4678 if (len && line[len - 1] == '\n')
4679 line[--len] = 0;
4680 if (!len)
4681 break;
4682 if (*line == '-') {
4683 if (!strcmp(line, "--not")) {
4684 flags ^= UNINTERESTING;
4685 write_bitmap_index = 0;
4686 continue;
4687 }
4688 if (starts_with(line, "--shallow ")) {
4689 struct object_id oid;
4690 if (get_oid_hex(line + 10, &oid))
4691 die("not an object name '%s'", line + 10);
4692 register_shallow(the_repository, &oid);
4693 use_bitmap_index = 0;
4694 continue;
4695 }
4696 die(_("not a rev '%s'"), line);
4697 }
4698 if (handle_revision_arg(line, revs, flags, REVARG_CANNOT_BE_FILENAME))
4699 die(_("bad revision '%s'"), line);
4700 }
4701
4702 warn_on_object_refname_ambiguity = save_warning;
4703
4704 if (use_bitmap_index && !get_object_list_from_bitmap(revs))
4705 return;
4706
4707 if (use_delta_islands)
4708 load_delta_islands(the_repository, progress);
4709
4710 if (write_bitmap_index)
4711 mark_bitmap_preferred_tips();
4712
4713 if (!fn_show_object)
4714 fn_show_object = show_object;
4715
4716 if (path_walk) {
4717 get_object_list_path_walk(revs);
4718 } else {
4719 if (prepare_revision_walk(revs))
4720 die(_("revision walk setup failed"));
4721 mark_edges_uninteresting(revs, show_edge, sparse);
4722 traverse_commit_list(revs,
4723 show_commit, fn_show_object,
4724 NULL);
4725 }
4726
4727 if (unpack_unreachable_expiration) {
4728 revs->ignore_missing_links = 1;
4729 if (add_unseen_recent_objects_to_traversal(revs,
4730 unpack_unreachable_expiration, NULL, 0))
4731 die(_("unable to add recent objects"));
4732 if (prepare_revision_walk(revs))
4733 die(_("revision walk setup failed"));
4734 traverse_commit_list(revs, record_recent_commit,
4735 record_recent_object, NULL);
4736 }
4737
4738 if (keep_unreachable)
4739 add_objects_in_unpacked_packs();
4740 if (pack_loose_unreachable)
4741 add_unreachable_loose_objects(NULL);
4742 if (unpack_unreachable)
4743 loosen_unused_packed_objects();
4744
4745 oid_array_clear(&recent_objects);
4746}
4747
4748static void add_extra_kept_packs(const struct string_list *names)
4749{
4750 struct packfile_store *packs = the_repository->objects->packfiles;
4751 struct packed_git *p;
4752
4753 if (!names->nr)
4754 return;
4755
4756 for (p = packfile_store_get_all_packs(packs); p; p = p->next) {
4757 const char *name = basename(p->pack_name);
4758 int i;
4759
4760 if (!p->pack_local)
4761 continue;
4762
4763 for (i = 0; i < names->nr; i++)
4764 if (!fspathcmp(name, names->items[i].string))
4765 break;
4766
4767 if (i < names->nr) {
4768 p->pack_keep_in_core = 1;
4769 ignore_packed_keep_in_core = 1;
4770 continue;
4771 }
4772 }
4773}
4774
4775static int option_parse_quiet(const struct option *opt, const char *arg,
4776 int unset)
4777{
4778 int *val = opt->value;
4779
4780 BUG_ON_OPT_ARG(arg);
4781
4782 if (!unset)
4783 *val = 0;
4784 else if (!*val)
4785 *val = 1;
4786 return 0;
4787}
4788
4789static int option_parse_index_version(const struct option *opt,
4790 const char *arg, int unset)
4791{
4792 struct pack_idx_option *popts = opt->value;
4793 char *c;
4794 const char *val = arg;
4795
4796 BUG_ON_OPT_NEG(unset);
4797
4798 popts->version = strtoul(val, &c, 10);
4799 if (popts->version > 2)
4800 die(_("unsupported index version %s"), val);
4801 if (*c == ',' && c[1])
4802 popts->off32_limit = strtoul(c+1, &c, 0);
4803 if (*c || popts->off32_limit & 0x80000000)
4804 die(_("bad index version '%s'"), val);
4805 return 0;
4806}
4807
4808static int option_parse_unpack_unreachable(const struct option *opt UNUSED,
4809 const char *arg, int unset)
4810{
4811 if (unset) {
4812 unpack_unreachable = 0;
4813 unpack_unreachable_expiration = 0;
4814 }
4815 else {
4816 unpack_unreachable = 1;
4817 if (arg)
4818 unpack_unreachable_expiration = approxidate(arg);
4819 }
4820 return 0;
4821}
4822
4823static int option_parse_cruft_expiration(const struct option *opt UNUSED,
4824 const char *arg, int unset)
4825{
4826 if (unset) {
4827 cruft = 0;
4828 cruft_expiration = 0;
4829 } else {
4830 cruft = 1;
4831 if (arg)
4832 cruft_expiration = approxidate(arg);
4833 }
4834 return 0;
4835}
4836
4837static int is_not_in_promisor_pack_obj(struct object *obj, void *data UNUSED)
4838{
4839 struct object_info info = OBJECT_INFO_INIT;
4840 if (odb_read_object_info_extended(the_repository->objects, &obj->oid, &info, 0))
4841 BUG("should_include_obj should only be called on existing objects");
4842 return info.whence != OI_PACKED || !info.u.packed.pack->pack_promisor;
4843}
4844
4845static int is_not_in_promisor_pack(struct commit *commit, void *data) {
4846 return is_not_in_promisor_pack_obj((struct object *) commit, data);
4847}
4848
4849static int parse_stdin_packs_mode(const struct option *opt, const char *arg,
4850 int unset)
4851{
4852 enum stdin_packs_mode *mode = opt->value;
4853
4854 if (unset)
4855 *mode = STDIN_PACKS_MODE_NONE;
4856 else if (!arg || !*arg)
4857 *mode = STDIN_PACKS_MODE_STANDARD;
4858 else if (!strcmp(arg, "follow"))
4859 *mode = STDIN_PACKS_MODE_FOLLOW;
4860 else
4861 die(_("invalid value for '%s': '%s'"), opt->long_name, arg);
4862
4863 return 0;
4864}
4865
4866int cmd_pack_objects(int argc,
4867 const char **argv,
4868 const char *prefix,
4869 struct repository *repo UNUSED)
4870{
4871 int use_internal_rev_list = 0;
4872 int all_progress_implied = 0;
4873 struct strvec rp = STRVEC_INIT;
4874 int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
4875 int rev_list_index = 0;
4876 enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE;
4877 struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
4878 struct list_objects_filter_options filter_options =
4879 LIST_OBJECTS_FILTER_INIT;
4880
4881 struct option pack_objects_options[] = {
4882 OPT_CALLBACK_F('q', "quiet", &progress, NULL,
4883 N_("do not show progress meter"),
4884 PARSE_OPT_NOARG, option_parse_quiet),
4885 OPT_SET_INT(0, "progress", &progress,
4886 N_("show progress meter"), 1),
4887 OPT_SET_INT(0, "all-progress", &progress,
4888 N_("show progress meter during object writing phase"), 2),
4889 OPT_BOOL(0, "all-progress-implied",
4890 &all_progress_implied,
4891 N_("similar to --all-progress when progress meter is shown")),
4892 OPT_CALLBACK_F(0, "index-version", &pack_idx_opts, N_("<version>[,<offset>]"),
4893 N_("write the pack index file in the specified idx format version"),
4894 PARSE_OPT_NONEG, option_parse_index_version),
4895 OPT_UNSIGNED(0, "max-pack-size", &pack_size_limit,
4896 N_("maximum size of each output pack file")),
4897 OPT_BOOL(0, "local", &local,
4898 N_("ignore borrowed objects from alternate object store")),
4899 OPT_BOOL(0, "incremental", &incremental,
4900 N_("ignore packed objects")),
4901 OPT_INTEGER(0, "window", &window,
4902 N_("limit pack window by objects")),
4903 OPT_UNSIGNED(0, "window-memory", &window_memory_limit,
4904 N_("limit pack window by memory in addition to object limit")),
4905 OPT_INTEGER(0, "depth", &depth,
4906 N_("maximum length of delta chain allowed in the resulting pack")),
4907 OPT_BOOL(0, "reuse-delta", &reuse_delta,
4908 N_("reuse existing deltas")),
4909 OPT_BOOL(0, "reuse-object", &reuse_object,
4910 N_("reuse existing objects")),
4911 OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
4912 N_("use OFS_DELTA objects")),
4913 OPT_INTEGER(0, "threads", &delta_search_threads,
4914 N_("use threads when searching for best delta matches")),
4915 OPT_BOOL(0, "non-empty", &non_empty,
4916 N_("do not create an empty pack output")),
4917 OPT_BOOL(0, "revs", &use_internal_rev_list,
4918 N_("read revision arguments from standard input")),
4919 OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
4920 N_("limit the objects to those that are not yet packed"),
4921 1, PARSE_OPT_NONEG),
4922 OPT_SET_INT_F(0, "all", &rev_list_all,
4923 N_("include objects reachable from any reference"),
4924 1, PARSE_OPT_NONEG),
4925 OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
4926 N_("include objects referred by reflog entries"),
4927 1, PARSE_OPT_NONEG),
4928 OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
4929 N_("include objects referred to by the index"),
4930 1, PARSE_OPT_NONEG),
4931 OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"),
4932 N_("read packs from stdin"),
4933 PARSE_OPT_OPTARG, parse_stdin_packs_mode),
4934 OPT_BOOL(0, "stdin-packs", &stdin_packs,
4935 N_("read packs from stdin")),
4936 OPT_BOOL(0, "stdout", &pack_to_stdout,
4937 N_("output pack to stdout")),
4938 OPT_BOOL(0, "include-tag", &include_tag,
4939 N_("include tag objects that refer to objects to be packed")),
4940 OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
4941 N_("keep unreachable objects")),
4942 OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,
4943 N_("pack loose unreachable objects")),
4944 OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"),
4945 N_("unpack unreachable objects newer than <time>"),
4946 PARSE_OPT_OPTARG, option_parse_unpack_unreachable),
4947 OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")),
4948 OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"),
4949 N_("expire cruft objects older than <time>"),
4950 PARSE_OPT_OPTARG, option_parse_cruft_expiration),
4951 OPT_BOOL(0, "sparse", &sparse,
4952 N_("use the sparse reachability algorithm")),
4953 OPT_BOOL(0, "thin", &thin,
4954 N_("create thin packs")),
4955 OPT_BOOL(0, "path-walk", &path_walk,
4956 N_("use the path-walk API to walk objects when possible")),
4957 OPT_BOOL(0, "shallow", &shallow,
4958 N_("create packs suitable for shallow fetches")),
4959 OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
4960 N_("ignore packs that have companion .keep file")),
4961 OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
4962 N_("ignore this pack")),
4963 OPT_INTEGER(0, "compression", &pack_compression_level,
4964 N_("pack compression level")),
4965 OPT_BOOL(0, "keep-true-parents", &grafts_keep_true_parents,
4966 N_("do not hide commits by grafts")),
4967 OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
4968 N_("use a bitmap index if available to speed up counting objects")),
4969 OPT_SET_INT(0, "write-bitmap-index", &write_bitmap_index,
4970 N_("write a bitmap index together with the pack index"),
4971 WRITE_BITMAP_TRUE),
4972 OPT_SET_INT_F(0, "write-bitmap-index-quiet",
4973 &write_bitmap_index,
4974 N_("write a bitmap index if possible"),
4975 WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
4976 OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
4977 OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
4978 N_("handling for missing objects"), PARSE_OPT_NONEG,
4979 option_parse_missing_action),
4980 OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
4981 N_("do not pack objects in promisor packfiles")),
4982 OPT_BOOL(0, "exclude-promisor-objects-best-effort",
4983 &exclude_promisor_objects_best_effort,
4984 N_("implies --missing=allow-any")),
4985 OPT_BOOL(0, "delta-islands", &use_delta_islands,
4986 N_("respect islands during delta compression")),
4987 OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
4988 N_("protocol"),
4989 N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
4990 OPT_INTEGER(0, "name-hash-version", &name_hash_version,
4991 N_("use the specified name-hash function to group similar objects")),
4992 OPT_END(),
4993 };
4994
4995 if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
4996 BUG("too many dfs states, increase OE_DFS_STATE_BITS");
4997
4998 disable_replace_refs();
4999
5000 sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
5001 if (the_repository->gitdir) {
5002 prepare_repo_settings(the_repository);
5003 if (sparse < 0)
5004 sparse = the_repository->settings.pack_use_sparse;
5005 if (the_repository->settings.pack_use_multi_pack_reuse)
5006 allow_pack_reuse = MULTI_PACK_REUSE;
5007 }
5008
5009 reset_pack_idx_option(&pack_idx_opts);
5010 pack_idx_opts.flags |= WRITE_REV;
5011 repo_config(the_repository, git_pack_config, NULL);
5012 if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX, 0))
5013 pack_idx_opts.flags &= ~WRITE_REV;
5014
5015 progress = isatty(2);
5016 argc = parse_options(argc, argv, prefix, pack_objects_options,
5017 pack_usage, 0);
5018
5019 if (argc) {
5020 base_name = argv[0];
5021 argc--;
5022 }
5023 if (pack_to_stdout != !base_name || argc)
5024 usage_with_options(pack_usage, pack_objects_options);
5025
5026 if (path_walk < 0) {
5027 if (use_bitmap_index > 0 ||
5028 !use_internal_rev_list)
5029 path_walk = 0;
5030 else if (the_repository->gitdir &&
5031 the_repository->settings.pack_use_path_walk)
5032 path_walk = 1;
5033 else
5034 path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", 0);
5035 }
5036
5037 if (depth < 0)
5038 depth = 0;
5039 if (depth >= (1 << OE_DEPTH_BITS)) {
5040 warning(_("delta chain depth %d is too deep, forcing %d"),
5041 depth, (1 << OE_DEPTH_BITS) - 1);
5042 depth = (1 << OE_DEPTH_BITS) - 1;
5043 }
5044 if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
5045 warning(_("pack.deltaCacheLimit is too high, forcing %d"),
5046 (1U << OE_Z_DELTA_BITS) - 1);
5047 cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
5048 }
5049 if (window < 0)
5050 window = 0;
5051
5052 strvec_push(&rp, "pack-objects");
5053
5054 if (path_walk) {
5055 const char *option = NULL;
5056 if (filter_options.choice)
5057 option = "--filter";
5058 else if (use_delta_islands)
5059 option = "--delta-islands";
5060
5061 if (option) {
5062 warning(_("cannot use %s with %s"),
5063 option, "--path-walk");
5064 path_walk = 0;
5065 }
5066 }
5067 if (path_walk) {
5068 strvec_push(&rp, "--boundary");
5069 /*
5070 * We must disable the bitmaps because we are removing
5071 * the --objects / --objects-edge[-aggressive] options.
5072 */
5073 use_bitmap_index = 0;
5074 } else if (thin) {
5075 use_internal_rev_list = 1;
5076 strvec_push(&rp, shallow
5077 ? "--objects-edge-aggressive"
5078 : "--objects-edge");
5079 } else
5080 strvec_push(&rp, "--objects");
5081
5082 if (rev_list_all) {
5083 use_internal_rev_list = 1;
5084 strvec_push(&rp, "--all");
5085 }
5086 if (rev_list_reflog) {
5087 use_internal_rev_list = 1;
5088 strvec_push(&rp, "--reflog");
5089 }
5090 if (rev_list_index) {
5091 use_internal_rev_list = 1;
5092 strvec_push(&rp, "--indexed-objects");
5093 }
5094 if (rev_list_unpacked && !stdin_packs) {
5095 use_internal_rev_list = 1;
5096 strvec_push(&rp, "--unpacked");
5097 }
5098
5099 die_for_incompatible_opt2(exclude_promisor_objects,
5100 "--exclude-promisor-objects",
5101 exclude_promisor_objects_best_effort,
5102 "--exclude-promisor-objects-best-effort");
5103 if (exclude_promisor_objects) {
5104 use_internal_rev_list = 1;
5105 fetch_if_missing = 0;
5106 strvec_push(&rp, "--exclude-promisor-objects");
5107 } else if (exclude_promisor_objects_best_effort) {
5108 use_internal_rev_list = 1;
5109 fetch_if_missing = 0;
5110 option_parse_missing_action(NULL, "allow-any", 0);
5111 /* revs configured below */
5112 }
5113 if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
5114 use_internal_rev_list = 1;
5115
5116 if (!reuse_object)
5117 reuse_delta = 0;
5118 if (pack_compression_level == -1)
5119 pack_compression_level = Z_DEFAULT_COMPRESSION;
5120 else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
5121 die(_("bad pack compression level %d"), pack_compression_level);
5122
5123 if (!delta_search_threads) /* --threads=0 means autodetect */
5124 delta_search_threads = online_cpus();
5125
5126 if (!HAVE_THREADS && delta_search_threads != 1)
5127 warning(_("no threads support, ignoring --threads"));
5128 if (!pack_to_stdout && !pack_size_limit)
5129 pack_size_limit = pack_size_limit_cfg;
5130 if (pack_to_stdout && pack_size_limit)
5131 die(_("--max-pack-size cannot be used to build a pack for transfer"));
5132 if (pack_size_limit && pack_size_limit < 1024*1024) {
5133 warning(_("minimum pack size limit is 1 MiB"));
5134 pack_size_limit = 1024*1024;
5135 }
5136
5137 if (!pack_to_stdout && thin)
5138 die(_("--thin cannot be used to build an indexable pack"));
5139
5140 die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable",
5141 unpack_unreachable, "--unpack-unreachable");
5142 if (!rev_list_all || !rev_list_reflog || !rev_list_index)
5143 unpack_unreachable_expiration = 0;
5144
5145 die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
5146 filter_options.choice, "--filter");
5147
5148
5149 if (stdin_packs && use_internal_rev_list)
5150 die(_("cannot use internal rev list with --stdin-packs"));
5151
5152 if (cruft) {
5153 if (use_internal_rev_list)
5154 die(_("cannot use internal rev list with --cruft"));
5155 die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
5156 cruft, "--cruft");
5157 }
5158
5159 /*
5160 * "soft" reasons not to use bitmaps - for on-disk repack by default we want
5161 *
5162 * - to produce good pack (with bitmap index not-yet-packed objects are
5163 * packed in suboptimal order).
5164 *
5165 * - to use more robust pack-generation codepath (avoiding possible
5166 * bugs in bitmap code and possible bitmap index corruption).
5167 */
5168 if (!pack_to_stdout)
5169 use_bitmap_index_default = 0;
5170
5171 if (use_bitmap_index < 0)
5172 use_bitmap_index = use_bitmap_index_default;
5173
5174 /* "hard" reasons not to use bitmaps; these just won't work at all */
5175 if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository))
5176 use_bitmap_index = 0;
5177
5178 if (pack_to_stdout || !rev_list_all)
5179 write_bitmap_index = 0;
5180
5181 if (name_hash_version < 0)
5182 name_hash_version = (int)git_env_ulong("GIT_TEST_NAME_HASH_VERSION", 1);
5183
5184 validate_name_hash_version();
5185
5186 if (use_delta_islands)
5187 strvec_push(&rp, "--topo-order");
5188
5189 if (progress && all_progress_implied)
5190 progress = 2;
5191
5192 add_extra_kept_packs(&keep_pack_list);
5193 if (ignore_packed_keep_on_disk) {
5194 struct packfile_store *packs = the_repository->objects->packfiles;
5195 struct packed_git *p;
5196
5197 for (p = packfile_store_get_all_packs(packs); p; p = p->next)
5198 if (p->pack_local && p->pack_keep)
5199 break;
5200 if (!p) /* no keep-able packs found */
5201 ignore_packed_keep_on_disk = 0;
5202 }
5203 if (local) {
5204 /*
5205 * unlike ignore_packed_keep_on_disk above, we do not
5206 * want to unset "local" based on looking at packs, as
5207 * it also covers non-local objects
5208 */
5209 struct packfile_store *packs = the_repository->objects->packfiles;
5210 struct packed_git *p;
5211
5212 for (p = packfile_store_get_all_packs(packs); p; p = p->next) {
5213 if (!p->pack_local) {
5214 have_non_local_packs = 1;
5215 break;
5216 }
5217 }
5218 }
5219
5220 trace2_region_enter("pack-objects", "enumerate-objects",
5221 the_repository);
5222 prepare_packing_data(the_repository, &to_pack);
5223
5224 if (progress && !cruft)
5225 progress_state = start_progress(the_repository,
5226 _("Enumerating objects"), 0);
5227 if (stdin_packs) {
5228 read_stdin_packs(stdin_packs, rev_list_unpacked);
5229 } else if (cruft) {
5230 read_cruft_objects();
5231 } else if (!use_internal_rev_list) {
5232 read_object_list_from_stdin();
5233 } else {
5234 struct rev_info revs;
5235
5236 repo_init_revisions(the_repository, &revs, NULL);
5237 list_objects_filter_copy(&revs.filter, &filter_options);
5238 if (exclude_promisor_objects_best_effort) {
5239 revs.include_check = is_not_in_promisor_pack;
5240 revs.include_check_obj = is_not_in_promisor_pack_obj;
5241 }
5242 get_object_list(&revs, &rp);
5243 release_revisions(&revs);
5244 }
5245 cleanup_preferred_base();
5246 if (include_tag && nr_result)
5247 refs_for_each_tag_ref(get_main_ref_store(the_repository),
5248 add_ref_tag, NULL);
5249 stop_progress(&progress_state);
5250 trace2_region_leave("pack-objects", "enumerate-objects",
5251 the_repository);
5252
5253 if (non_empty && !nr_result)
5254 goto cleanup;
5255 if (nr_result) {
5256 trace2_region_enter("pack-objects", "prepare-pack",
5257 the_repository);
5258 prepare_pack(window, depth);
5259 trace2_region_leave("pack-objects", "prepare-pack",
5260 the_repository);
5261 }
5262
5263 trace2_region_enter("pack-objects", "write-pack-file", the_repository);
5264 write_excluded_by_configs();
5265 write_pack_file();
5266 trace2_region_leave("pack-objects", "write-pack-file", the_repository);
5267
5268 if (progress)
5269 fprintf_ln(stderr,
5270 _("Total %"PRIu32" (delta %"PRIu32"),"
5271 " reused %"PRIu32" (delta %"PRIu32"),"
5272 " pack-reused %"PRIu32" (from %"PRIuMAX")"),
5273 written, written_delta, reused, reused_delta,
5274 reuse_packfile_objects,
5275 (uintmax_t)reuse_packfiles_used_nr);
5276
5277 trace2_data_intmax("pack-objects", the_repository, "written", written);
5278 trace2_data_intmax("pack-objects", the_repository, "written/delta", written_delta);
5279 trace2_data_intmax("pack-objects", the_repository, "reused", reused);
5280 trace2_data_intmax("pack-objects", the_repository, "reused/delta", reused_delta);
5281 trace2_data_intmax("pack-objects", the_repository, "pack-reused", reuse_packfile_objects);
5282 trace2_data_intmax("pack-objects", the_repository, "packs-reused", reuse_packfiles_used_nr);
5283
5284cleanup:
5285 clear_packing_data(&to_pack);
5286 list_objects_filter_release(&filter_options);
5287 string_list_clear(&keep_pack_list, 0);
5288 strvec_clear(&rp);
5289
5290 return 0;
5291}