Git fork
at reftables-rust 3983 lines 112 kB view raw
1/* 2 * GIT - The information manager from hell 3 * 4 * Copyright (C) Linus Torvalds, 2005 5 */ 6 7#define USE_THE_REPOSITORY_VARIABLE 8#define DISABLE_SIGN_COMPARE_WARNINGS 9 10#include "git-compat-util.h" 11#include "config.h" 12#include "date.h" 13#include "diff.h" 14#include "diffcore.h" 15#include "hex.h" 16#include "tempfile.h" 17#include "lockfile.h" 18#include "cache-tree.h" 19#include "refs.h" 20#include "dir.h" 21#include "object-file.h" 22#include "odb.h" 23#include "oid-array.h" 24#include "tree.h" 25#include "commit.h" 26#include "environment.h" 27#include "gettext.h" 28#include "mem-pool.h" 29#include "name-hash.h" 30#include "object-name.h" 31#include "path.h" 32#include "preload-index.h" 33#include "read-cache.h" 34#include "repository.h" 35#include "resolve-undo.h" 36#include "revision.h" 37#include "strbuf.h" 38#include "trace2.h" 39#include "varint.h" 40#include "split-index.h" 41#include "symlinks.h" 42#include "utf8.h" 43#include "fsmonitor.h" 44#include "thread-utils.h" 45#include "progress.h" 46#include "sparse-index.h" 47#include "csum-file.h" 48#include "promisor-remote.h" 49#include "hook.h" 50 51/* Mask for the name length in ce_flags in the on-disk index */ 52 53#define CE_NAMEMASK (0x0fff) 54 55/* Index extensions. 56 * 57 * The first letter should be 'A'..'Z' for extensions that are not 58 * necessary for a correct operation (i.e. optimization data). 59 * When new extensions are added that _needs_ to be understood in 60 * order to correctly interpret the index file, pick character that 61 * is outside the range, to cause the reader to abort. 62 */ 63 64#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) ) 65#define CACHE_EXT_TREE 0x54524545 /* "TREE" */ 66#define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */ 67#define CACHE_EXT_LINK 0x6c696e6b /* "link" */ 68#define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */ 69#define CACHE_EXT_FSMONITOR 0x46534D4E /* "FSMN" */ 70#define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */ 71#define CACHE_EXT_INDEXENTRYOFFSETTABLE 0x49454F54 /* "IEOT" */ 72#define CACHE_EXT_SPARSE_DIRECTORIES 0x73646972 /* "sdir" */ 73 74/* changes that can be kept in $GIT_DIR/index (basically all extensions) */ 75#define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ 76 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \ 77 SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED | FSMONITOR_CHANGED) 78 79 80/* 81 * This is an estimate of the pathname length in the index. We use 82 * this for V4 index files to guess the un-deltafied size of the index 83 * in memory because of pathname deltafication. This is not required 84 * for V2/V3 index formats because their pathnames are not compressed. 85 * If the initial amount of memory set aside is not sufficient, the 86 * mem pool will allocate extra memory. 87 */ 88#define CACHE_ENTRY_PATH_LENGTH 80 89 90enum index_search_mode { 91 NO_EXPAND_SPARSE = 0, 92 EXPAND_SPARSE = 1 93}; 94 95static inline struct cache_entry *mem_pool__ce_alloc(struct mem_pool *mem_pool, size_t len) 96{ 97 struct cache_entry *ce; 98 ce = mem_pool_alloc(mem_pool, cache_entry_size(len)); 99 ce->mem_pool_allocated = 1; 100 return ce; 101} 102 103static inline struct cache_entry *mem_pool__ce_calloc(struct mem_pool *mem_pool, size_t len) 104{ 105 struct cache_entry * ce; 106 ce = mem_pool_calloc(mem_pool, 1, cache_entry_size(len)); 107 ce->mem_pool_allocated = 1; 108 return ce; 109} 110 111static struct mem_pool *find_mem_pool(struct index_state *istate) 112{ 113 struct mem_pool **pool_ptr; 114 115 if (istate->split_index && istate->split_index->base) 116 pool_ptr = &istate->split_index->base->ce_mem_pool; 117 else 118 pool_ptr = &istate->ce_mem_pool; 119 120 if (!*pool_ptr) { 121 *pool_ptr = xmalloc(sizeof(**pool_ptr)); 122 mem_pool_init(*pool_ptr, 0); 123 } 124 125 return *pool_ptr; 126} 127 128static const char *alternate_index_output; 129 130static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) 131{ 132 if (S_ISSPARSEDIR(ce->ce_mode)) 133 istate->sparse_index = INDEX_COLLAPSED; 134 135 istate->cache[nr] = ce; 136 add_name_hash(istate, ce); 137} 138 139static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) 140{ 141 struct cache_entry *old = istate->cache[nr]; 142 143 replace_index_entry_in_base(istate, old, ce); 144 remove_name_hash(istate, old); 145 discard_cache_entry(old); 146 ce->ce_flags &= ~CE_HASHED; 147 set_index_entry(istate, nr, ce); 148 ce->ce_flags |= CE_UPDATE_IN_BASE; 149 mark_fsmonitor_invalid(istate, ce); 150 istate->cache_changed |= CE_ENTRY_CHANGED; 151} 152 153void rename_index_entry_at(struct index_state *istate, int nr, const char *new_name) 154{ 155 struct cache_entry *old_entry = istate->cache[nr], *new_entry, *refreshed; 156 int namelen = strlen(new_name); 157 158 new_entry = make_empty_cache_entry(istate, namelen); 159 copy_cache_entry(new_entry, old_entry); 160 new_entry->ce_flags &= ~CE_HASHED; 161 new_entry->ce_namelen = namelen; 162 new_entry->index = 0; 163 memcpy(new_entry->name, new_name, namelen + 1); 164 165 cache_tree_invalidate_path(istate, old_entry->name); 166 untracked_cache_remove_from_index(istate, old_entry->name); 167 remove_index_entry_at(istate, nr); 168 169 /* 170 * Refresh the new index entry. Using 'refresh_cache_entry' ensures 171 * we only update stat info if the entry is otherwise up-to-date (i.e., 172 * the contents/mode haven't changed). This ensures that we reflect the 173 * 'ctime' of the rename in the index without (incorrectly) updating 174 * the cached stat info to reflect unstaged changes on disk. 175 */ 176 refreshed = refresh_cache_entry(istate, new_entry, CE_MATCH_REFRESH); 177 if (refreshed && refreshed != new_entry) { 178 add_index_entry(istate, refreshed, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE); 179 discard_cache_entry(new_entry); 180 } else 181 add_index_entry(istate, new_entry, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE); 182} 183 184/* 185 * This only updates the "non-critical" parts of the directory 186 * cache, ie the parts that aren't tracked by GIT, and only used 187 * to validate the cache. 188 */ 189void fill_stat_cache_info(struct index_state *istate, struct cache_entry *ce, struct stat *st) 190{ 191 fill_stat_data(&ce->ce_stat_data, st); 192 193 if (assume_unchanged) 194 ce->ce_flags |= CE_VALID; 195 196 if (S_ISREG(st->st_mode)) { 197 ce_mark_uptodate(ce); 198 mark_fsmonitor_valid(istate, ce); 199 } 200} 201 202static unsigned int st_mode_from_ce(const struct cache_entry *ce) 203{ 204 extern int trust_executable_bit, has_symlinks; 205 206 switch (ce->ce_mode & S_IFMT) { 207 case S_IFLNK: 208 return has_symlinks ? S_IFLNK : (S_IFREG | 0644); 209 case S_IFREG: 210 return (ce->ce_mode & (trust_executable_bit ? 0755 : 0644)) | S_IFREG; 211 case S_IFGITLINK: 212 return S_IFDIR | 0755; 213 case S_IFDIR: 214 return ce->ce_mode; 215 default: 216 BUG("unsupported ce_mode: %o", ce->ce_mode); 217 } 218} 219 220int fake_lstat(const struct cache_entry *ce, struct stat *st) 221{ 222 fake_lstat_data(&ce->ce_stat_data, st); 223 st->st_mode = st_mode_from_ce(ce); 224 225 /* always succeed as lstat() replacement */ 226 return 0; 227} 228 229static int ce_compare_data(struct index_state *istate, 230 const struct cache_entry *ce, 231 struct stat *st) 232{ 233 int match = -1; 234 int fd = git_open_cloexec(ce->name, O_RDONLY); 235 236 if (fd >= 0) { 237 struct object_id oid; 238 if (!index_fd(istate, &oid, fd, st, OBJ_BLOB, ce->name, 0)) 239 match = !oideq(&oid, &ce->oid); 240 /* index_fd() closed the file descriptor already */ 241 } 242 return match; 243} 244 245static int ce_compare_link(const struct cache_entry *ce, size_t expected_size) 246{ 247 int match = -1; 248 void *buffer; 249 unsigned long size; 250 enum object_type type; 251 struct strbuf sb = STRBUF_INIT; 252 253 if (strbuf_readlink(&sb, ce->name, expected_size)) 254 return -1; 255 256 buffer = odb_read_object(the_repository->objects, &ce->oid, &type, &size); 257 if (buffer) { 258 if (size == sb.len) 259 match = memcmp(buffer, sb.buf, size); 260 free(buffer); 261 } 262 strbuf_release(&sb); 263 return match; 264} 265 266static int ce_compare_gitlink(const struct cache_entry *ce) 267{ 268 struct object_id oid; 269 270 /* 271 * We don't actually require that the .git directory 272 * under GITLINK directory be a valid git directory. It 273 * might even be missing (in case nobody populated that 274 * sub-project). 275 * 276 * If so, we consider it always to match. 277 */ 278 if (repo_resolve_gitlink_ref(the_repository, ce->name, 279 "HEAD", &oid) < 0) 280 return 0; 281 return !oideq(&oid, &ce->oid); 282} 283 284static int ce_modified_check_fs(struct index_state *istate, 285 const struct cache_entry *ce, 286 struct stat *st) 287{ 288 switch (st->st_mode & S_IFMT) { 289 case S_IFREG: 290 if (ce_compare_data(istate, ce, st)) 291 return DATA_CHANGED; 292 break; 293 case S_IFLNK: 294 if (ce_compare_link(ce, xsize_t(st->st_size))) 295 return DATA_CHANGED; 296 break; 297 case S_IFDIR: 298 if (S_ISGITLINK(ce->ce_mode)) 299 return ce_compare_gitlink(ce) ? DATA_CHANGED : 0; 300 /* else fallthrough */ 301 default: 302 return TYPE_CHANGED; 303 } 304 return 0; 305} 306 307static int ce_match_stat_basic(const struct cache_entry *ce, struct stat *st) 308{ 309 unsigned int changed = 0; 310 311 if (ce->ce_flags & CE_REMOVE) 312 return MODE_CHANGED | DATA_CHANGED | TYPE_CHANGED; 313 314 switch (ce->ce_mode & S_IFMT) { 315 case S_IFREG: 316 changed |= !S_ISREG(st->st_mode) ? TYPE_CHANGED : 0; 317 /* We consider only the owner x bit to be relevant for 318 * "mode changes" 319 */ 320 if (trust_executable_bit && 321 (0100 & (ce->ce_mode ^ st->st_mode))) 322 changed |= MODE_CHANGED; 323 break; 324 case S_IFLNK: 325 if (!S_ISLNK(st->st_mode) && 326 (has_symlinks || !S_ISREG(st->st_mode))) 327 changed |= TYPE_CHANGED; 328 break; 329 case S_IFGITLINK: 330 /* We ignore most of the st_xxx fields for gitlinks */ 331 if (!S_ISDIR(st->st_mode)) 332 changed |= TYPE_CHANGED; 333 else if (ce_compare_gitlink(ce)) 334 changed |= DATA_CHANGED; 335 return changed; 336 default: 337 BUG("unsupported ce_mode: %o", ce->ce_mode); 338 } 339 340 changed |= match_stat_data(&ce->ce_stat_data, st); 341 342 /* Racily smudged entry? */ 343 if (!ce->ce_stat_data.sd_size) { 344 if (!is_empty_blob_oid(&ce->oid, the_repository->hash_algo)) 345 changed |= DATA_CHANGED; 346 } 347 348 return changed; 349} 350 351static int is_racy_stat(const struct index_state *istate, 352 const struct stat_data *sd) 353{ 354 return (istate->timestamp.sec && 355#ifdef USE_NSEC 356 /* nanosecond timestamped files can also be racy! */ 357 (istate->timestamp.sec < sd->sd_mtime.sec || 358 (istate->timestamp.sec == sd->sd_mtime.sec && 359 istate->timestamp.nsec <= sd->sd_mtime.nsec)) 360#else 361 istate->timestamp.sec <= sd->sd_mtime.sec 362#endif 363 ); 364} 365 366int is_racy_timestamp(const struct index_state *istate, 367 const struct cache_entry *ce) 368{ 369 return (!S_ISGITLINK(ce->ce_mode) && 370 is_racy_stat(istate, &ce->ce_stat_data)); 371} 372 373int match_stat_data_racy(const struct index_state *istate, 374 const struct stat_data *sd, struct stat *st) 375{ 376 if (is_racy_stat(istate, sd)) 377 return MTIME_CHANGED; 378 return match_stat_data(sd, st); 379} 380 381int ie_match_stat(struct index_state *istate, 382 const struct cache_entry *ce, struct stat *st, 383 unsigned int options) 384{ 385 unsigned int changed; 386 int ignore_valid = options & CE_MATCH_IGNORE_VALID; 387 int ignore_skip_worktree = options & CE_MATCH_IGNORE_SKIP_WORKTREE; 388 int assume_racy_is_modified = options & CE_MATCH_RACY_IS_DIRTY; 389 int ignore_fsmonitor = options & CE_MATCH_IGNORE_FSMONITOR; 390 391 if (!ignore_fsmonitor) 392 refresh_fsmonitor(istate); 393 /* 394 * If it's marked as always valid in the index, it's 395 * valid whatever the checked-out copy says. 396 * 397 * skip-worktree has the same effect with higher precedence 398 */ 399 if (!ignore_skip_worktree && ce_skip_worktree(ce)) 400 return 0; 401 if (!ignore_valid && (ce->ce_flags & CE_VALID)) 402 return 0; 403 if (!ignore_fsmonitor && (ce->ce_flags & CE_FSMONITOR_VALID)) 404 return 0; 405 406 /* 407 * Intent-to-add entries have not been added, so the index entry 408 * by definition never matches what is in the work tree until it 409 * actually gets added. 410 */ 411 if (ce_intent_to_add(ce)) 412 return DATA_CHANGED | TYPE_CHANGED | MODE_CHANGED; 413 414 changed = ce_match_stat_basic(ce, st); 415 416 /* 417 * Within 1 second of this sequence: 418 * echo xyzzy >file && git-update-index --add file 419 * running this command: 420 * echo frotz >file 421 * would give a falsely clean cache entry. The mtime and 422 * length match the cache, and other stat fields do not change. 423 * 424 * We could detect this at update-index time (the cache entry 425 * being registered/updated records the same time as "now") 426 * and delay the return from git-update-index, but that would 427 * effectively mean we can make at most one commit per second, 428 * which is not acceptable. Instead, we check cache entries 429 * whose mtime are the same as the index file timestamp more 430 * carefully than others. 431 */ 432 if (!changed && is_racy_timestamp(istate, ce)) { 433 if (assume_racy_is_modified) 434 changed |= DATA_CHANGED; 435 else 436 changed |= ce_modified_check_fs(istate, ce, st); 437 } 438 439 return changed; 440} 441 442int ie_modified(struct index_state *istate, 443 const struct cache_entry *ce, 444 struct stat *st, unsigned int options) 445{ 446 int changed, changed_fs; 447 448 changed = ie_match_stat(istate, ce, st, options); 449 if (!changed) 450 return 0; 451 /* 452 * If the mode or type has changed, there's no point in trying 453 * to refresh the entry - it's not going to match 454 */ 455 if (changed & (MODE_CHANGED | TYPE_CHANGED)) 456 return changed; 457 458 /* 459 * Immediately after read-tree or update-index --cacheinfo, 460 * the length field is zero, as we have never even read the 461 * lstat(2) information once, and we cannot trust DATA_CHANGED 462 * returned by ie_match_stat() which in turn was returned by 463 * ce_match_stat_basic() to signal that the filesize of the 464 * blob changed. We have to actually go to the filesystem to 465 * see if the contents match, and if so, should answer "unchanged". 466 * 467 * The logic does not apply to gitlinks, as ce_match_stat_basic() 468 * already has checked the actual HEAD from the filesystem in the 469 * subproject. If ie_match_stat() already said it is different, 470 * then we know it is. 471 */ 472 if ((changed & DATA_CHANGED) && 473 (S_ISGITLINK(ce->ce_mode) || ce->ce_stat_data.sd_size != 0)) 474 return changed; 475 476 changed_fs = ce_modified_check_fs(istate, ce, st); 477 if (changed_fs) 478 return changed | changed_fs; 479 return 0; 480} 481 482static int cache_name_stage_compare(const char *name1, int len1, int stage1, 483 const char *name2, int len2, int stage2) 484{ 485 int cmp; 486 487 cmp = name_compare(name1, len1, name2, len2); 488 if (cmp) 489 return cmp; 490 491 if (stage1 < stage2) 492 return -1; 493 if (stage1 > stage2) 494 return 1; 495 return 0; 496} 497 498int cmp_cache_name_compare(const void *a_, const void *b_) 499{ 500 const struct cache_entry *ce1, *ce2; 501 502 ce1 = *((const struct cache_entry **)a_); 503 ce2 = *((const struct cache_entry **)b_); 504 return cache_name_stage_compare(ce1->name, ce1->ce_namelen, ce_stage(ce1), 505 ce2->name, ce2->ce_namelen, ce_stage(ce2)); 506} 507 508static int index_name_stage_pos(struct index_state *istate, 509 const char *name, int namelen, 510 int stage, 511 enum index_search_mode search_mode) 512{ 513 int first, last; 514 515 first = 0; 516 last = istate->cache_nr; 517 while (last > first) { 518 int next = first + ((last - first) >> 1); 519 struct cache_entry *ce = istate->cache[next]; 520 int cmp = cache_name_stage_compare(name, namelen, stage, ce->name, ce_namelen(ce), ce_stage(ce)); 521 if (!cmp) 522 return next; 523 if (cmp < 0) { 524 last = next; 525 continue; 526 } 527 first = next+1; 528 } 529 530 if (search_mode == EXPAND_SPARSE && istate->sparse_index && 531 first > 0) { 532 /* Note: first <= istate->cache_nr */ 533 struct cache_entry *ce = istate->cache[first - 1]; 534 535 /* 536 * If we are in a sparse-index _and_ the entry before the 537 * insertion position is a sparse-directory entry that is 538 * an ancestor of 'name', then we need to expand the index 539 * and search again. This will only trigger once, because 540 * thereafter the index is fully expanded. 541 */ 542 if (S_ISSPARSEDIR(ce->ce_mode) && 543 ce_namelen(ce) < namelen && 544 !strncmp(name, ce->name, ce_namelen(ce))) { 545 ensure_full_index(istate); 546 return index_name_stage_pos(istate, name, namelen, stage, search_mode); 547 } 548 } 549 550 return -first-1; 551} 552 553int index_name_pos(struct index_state *istate, const char *name, int namelen) 554{ 555 return index_name_stage_pos(istate, name, namelen, 0, EXPAND_SPARSE); 556} 557 558int index_name_pos_sparse(struct index_state *istate, const char *name, int namelen) 559{ 560 return index_name_stage_pos(istate, name, namelen, 0, NO_EXPAND_SPARSE); 561} 562 563int index_entry_exists(struct index_state *istate, const char *name, int namelen) 564{ 565 return index_name_stage_pos(istate, name, namelen, 0, NO_EXPAND_SPARSE) >= 0; 566} 567 568int remove_index_entry_at(struct index_state *istate, int pos) 569{ 570 struct cache_entry *ce = istate->cache[pos]; 571 572 record_resolve_undo(istate, ce); 573 remove_name_hash(istate, ce); 574 save_or_free_index_entry(istate, ce); 575 istate->cache_changed |= CE_ENTRY_REMOVED; 576 istate->cache_nr--; 577 if (pos >= istate->cache_nr) 578 return 0; 579 MOVE_ARRAY(istate->cache + pos, istate->cache + pos + 1, 580 istate->cache_nr - pos); 581 return 1; 582} 583 584/* 585 * Remove all cache entries marked for removal, that is where 586 * CE_REMOVE is set in ce_flags. This is much more effective than 587 * calling remove_index_entry_at() for each entry to be removed. 588 */ 589void remove_marked_cache_entries(struct index_state *istate, int invalidate) 590{ 591 struct cache_entry **ce_array = istate->cache; 592 unsigned int i, j; 593 594 for (i = j = 0; i < istate->cache_nr; i++) { 595 if (ce_array[i]->ce_flags & CE_REMOVE) { 596 if (invalidate) { 597 cache_tree_invalidate_path(istate, 598 ce_array[i]->name); 599 untracked_cache_remove_from_index(istate, 600 ce_array[i]->name); 601 } 602 remove_name_hash(istate, ce_array[i]); 603 save_or_free_index_entry(istate, ce_array[i]); 604 } 605 else 606 ce_array[j++] = ce_array[i]; 607 } 608 if (j == istate->cache_nr) 609 return; 610 istate->cache_changed |= CE_ENTRY_REMOVED; 611 istate->cache_nr = j; 612} 613 614int remove_file_from_index(struct index_state *istate, const char *path) 615{ 616 int pos = index_name_pos(istate, path, strlen(path)); 617 if (pos < 0) 618 pos = -pos-1; 619 cache_tree_invalidate_path(istate, path); 620 untracked_cache_remove_from_index(istate, path); 621 while (pos < istate->cache_nr && !strcmp(istate->cache[pos]->name, path)) 622 remove_index_entry_at(istate, pos); 623 return 0; 624} 625 626static int compare_name(struct cache_entry *ce, const char *path, int namelen) 627{ 628 return namelen != ce_namelen(ce) || memcmp(path, ce->name, namelen); 629} 630 631static int index_name_pos_also_unmerged(struct index_state *istate, 632 const char *path, int namelen) 633{ 634 int pos = index_name_pos(istate, path, namelen); 635 struct cache_entry *ce; 636 637 if (pos >= 0) 638 return pos; 639 640 /* maybe unmerged? */ 641 pos = -1 - pos; 642 if (pos >= istate->cache_nr || 643 compare_name((ce = istate->cache[pos]), path, namelen)) 644 return -1; 645 646 /* order of preference: stage 2, 1, 3 */ 647 if (ce_stage(ce) == 1 && pos + 1 < istate->cache_nr && 648 ce_stage((ce = istate->cache[pos + 1])) == 2 && 649 !compare_name(ce, path, namelen)) 650 pos++; 651 return pos; 652} 653 654static int different_name(struct cache_entry *ce, struct cache_entry *alias) 655{ 656 int len = ce_namelen(ce); 657 return ce_namelen(alias) != len || memcmp(ce->name, alias->name, len); 658} 659 660/* 661 * If we add a filename that aliases in the cache, we will use the 662 * name that we already have - but we don't want to update the same 663 * alias twice, because that implies that there were actually two 664 * different files with aliasing names! 665 * 666 * So we use the CE_ADDED flag to verify that the alias was an old 667 * one before we accept it as 668 */ 669static struct cache_entry *create_alias_ce(struct index_state *istate, 670 struct cache_entry *ce, 671 struct cache_entry *alias) 672{ 673 int len; 674 struct cache_entry *new_entry; 675 676 if (alias->ce_flags & CE_ADDED) 677 die(_("will not add file alias '%s' ('%s' already exists in index)"), 678 ce->name, alias->name); 679 680 /* Ok, create the new entry using the name of the existing alias */ 681 len = ce_namelen(alias); 682 new_entry = make_empty_cache_entry(istate, len); 683 memcpy(new_entry->name, alias->name, len); 684 copy_cache_entry(new_entry, ce); 685 save_or_free_index_entry(istate, ce); 686 return new_entry; 687} 688 689void set_object_name_for_intent_to_add_entry(struct cache_entry *ce) 690{ 691 struct object_id oid; 692 if (odb_write_object(the_repository->objects, "", 0, OBJ_BLOB, &oid)) 693 die(_("cannot create an empty blob in the object database")); 694 oidcpy(&ce->oid, &oid); 695} 696 697int add_to_index(struct index_state *istate, const char *path, struct stat *st, int flags) 698{ 699 int namelen, was_same; 700 mode_t st_mode = st->st_mode; 701 struct cache_entry *ce, *alias = NULL; 702 unsigned ce_option = CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE|CE_MATCH_RACY_IS_DIRTY; 703 int verbose = flags & (ADD_CACHE_VERBOSE | ADD_CACHE_PRETEND); 704 int pretend = flags & ADD_CACHE_PRETEND; 705 int intent_only = flags & ADD_CACHE_INTENT; 706 int add_option = (ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE| 707 (intent_only ? ADD_CACHE_NEW_ONLY : 0)); 708 unsigned hash_flags = pretend ? 0 : INDEX_WRITE_OBJECT; 709 struct object_id oid; 710 711 if (flags & ADD_CACHE_RENORMALIZE) 712 hash_flags |= INDEX_RENORMALIZE; 713 714 if (!S_ISREG(st_mode) && !S_ISLNK(st_mode) && !S_ISDIR(st_mode)) 715 return error(_("%s: can only add regular files, symbolic links or git-directories"), path); 716 717 namelen = strlen(path); 718 if (S_ISDIR(st_mode)) { 719 if (repo_resolve_gitlink_ref(the_repository, path, "HEAD", &oid) < 0) 720 return error(_("'%s' does not have a commit checked out"), path); 721 while (namelen && path[namelen-1] == '/') 722 namelen--; 723 } 724 ce = make_empty_cache_entry(istate, namelen); 725 memcpy(ce->name, path, namelen); 726 ce->ce_namelen = namelen; 727 if (!intent_only) 728 fill_stat_cache_info(istate, ce, st); 729 else 730 ce->ce_flags |= CE_INTENT_TO_ADD; 731 732 733 if (trust_executable_bit && has_symlinks) { 734 ce->ce_mode = create_ce_mode(st_mode); 735 } else { 736 /* If there is an existing entry, pick the mode bits and type 737 * from it, otherwise assume unexecutable regular file. 738 */ 739 struct cache_entry *ent; 740 int pos = index_name_pos_also_unmerged(istate, path, namelen); 741 742 ent = (0 <= pos) ? istate->cache[pos] : NULL; 743 ce->ce_mode = ce_mode_from_stat(ent, st_mode); 744 } 745 746 /* When core.ignorecase=true, determine if a directory of the same name but differing 747 * case already exists within the Git repository. If it does, ensure the directory 748 * case of the file being added to the repository matches (is folded into) the existing 749 * entry's directory case. 750 */ 751 if (ignore_case) { 752 adjust_dirname_case(istate, ce->name); 753 } 754 if (!(flags & ADD_CACHE_RENORMALIZE)) { 755 alias = index_file_exists(istate, ce->name, 756 ce_namelen(ce), ignore_case); 757 if (alias && 758 !ce_stage(alias) && 759 !ie_match_stat(istate, alias, st, ce_option)) { 760 /* Nothing changed, really */ 761 if (!S_ISGITLINK(alias->ce_mode)) 762 ce_mark_uptodate(alias); 763 alias->ce_flags |= CE_ADDED; 764 765 discard_cache_entry(ce); 766 return 0; 767 } 768 } 769 if (!intent_only) { 770 if (index_path(istate, &ce->oid, path, st, hash_flags)) { 771 discard_cache_entry(ce); 772 return error(_("unable to index file '%s'"), path); 773 } 774 } else 775 set_object_name_for_intent_to_add_entry(ce); 776 777 if (ignore_case && alias && different_name(ce, alias)) 778 ce = create_alias_ce(istate, ce, alias); 779 ce->ce_flags |= CE_ADDED; 780 781 /* It was suspected to be racily clean, but it turns out to be Ok */ 782 was_same = (alias && 783 !ce_stage(alias) && 784 oideq(&alias->oid, &ce->oid) && 785 ce->ce_mode == alias->ce_mode); 786 787 if (pretend) 788 discard_cache_entry(ce); 789 else if (add_index_entry(istate, ce, add_option)) { 790 discard_cache_entry(ce); 791 return error(_("unable to add '%s' to index"), path); 792 } 793 if (verbose && !was_same) 794 printf("add '%s'\n", path); 795 return 0; 796} 797 798int add_file_to_index(struct index_state *istate, const char *path, int flags) 799{ 800 struct stat st; 801 if (lstat(path, &st)) 802 die_errno(_("unable to stat '%s'"), path); 803 return add_to_index(istate, path, &st, flags); 804} 805 806struct cache_entry *make_empty_cache_entry(struct index_state *istate, size_t len) 807{ 808 return mem_pool__ce_calloc(find_mem_pool(istate), len); 809} 810 811struct cache_entry *make_empty_transient_cache_entry(size_t len, 812 struct mem_pool *ce_mem_pool) 813{ 814 if (ce_mem_pool) 815 return mem_pool__ce_calloc(ce_mem_pool, len); 816 return xcalloc(1, cache_entry_size(len)); 817} 818 819enum verify_path_result { 820 PATH_OK, 821 PATH_INVALID, 822 PATH_DIR_WITH_SEP, 823}; 824 825static enum verify_path_result verify_path_internal(const char *, unsigned); 826 827int verify_path(const char *path, unsigned mode) 828{ 829 return verify_path_internal(path, mode) == PATH_OK; 830} 831 832struct cache_entry *make_cache_entry(struct index_state *istate, 833 unsigned int mode, 834 const struct object_id *oid, 835 const char *path, 836 int stage, 837 unsigned int refresh_options) 838{ 839 struct cache_entry *ce, *ret; 840 int len; 841 842 if (verify_path_internal(path, mode) == PATH_INVALID) { 843 error(_("invalid path '%s'"), path); 844 return NULL; 845 } 846 847 len = strlen(path); 848 ce = make_empty_cache_entry(istate, len); 849 850 oidcpy(&ce->oid, oid); 851 memcpy(ce->name, path, len); 852 ce->ce_flags = create_ce_flags(stage); 853 ce->ce_namelen = len; 854 ce->ce_mode = create_ce_mode(mode); 855 856 ret = refresh_cache_entry(istate, ce, refresh_options); 857 if (ret != ce) 858 discard_cache_entry(ce); 859 return ret; 860} 861 862struct cache_entry *make_transient_cache_entry(unsigned int mode, 863 const struct object_id *oid, 864 const char *path, 865 int stage, 866 struct mem_pool *ce_mem_pool) 867{ 868 struct cache_entry *ce; 869 int len; 870 871 if (!verify_path(path, mode)) { 872 error(_("invalid path '%s'"), path); 873 return NULL; 874 } 875 876 len = strlen(path); 877 ce = make_empty_transient_cache_entry(len, ce_mem_pool); 878 879 oidcpy(&ce->oid, oid); 880 memcpy(ce->name, path, len); 881 ce->ce_flags = create_ce_flags(stage); 882 ce->ce_namelen = len; 883 ce->ce_mode = create_ce_mode(mode); 884 885 return ce; 886} 887 888/* 889 * Chmod an index entry with either +x or -x. 890 * 891 * Returns -1 if the chmod for the particular cache entry failed (if it's 892 * not a regular file), -2 if an invalid flip argument is passed in, 0 893 * otherwise. 894 */ 895int chmod_index_entry(struct index_state *istate, struct cache_entry *ce, 896 char flip) 897{ 898 if (!S_ISREG(ce->ce_mode)) 899 return -1; 900 switch (flip) { 901 case '+': 902 ce->ce_mode |= 0111; 903 break; 904 case '-': 905 ce->ce_mode &= ~0111; 906 break; 907 default: 908 return -2; 909 } 910 cache_tree_invalidate_path(istate, ce->name); 911 ce->ce_flags |= CE_UPDATE_IN_BASE; 912 mark_fsmonitor_invalid(istate, ce); 913 istate->cache_changed |= CE_ENTRY_CHANGED; 914 915 return 0; 916} 917 918int ce_same_name(const struct cache_entry *a, const struct cache_entry *b) 919{ 920 int len = ce_namelen(a); 921 return ce_namelen(b) == len && !memcmp(a->name, b->name, len); 922} 923 924/* 925 * We fundamentally don't like some paths: we don't want 926 * dot or dot-dot anywhere, and for obvious reasons don't 927 * want to recurse into ".git" either. 928 * 929 * Also, we don't want double slashes or slashes at the 930 * end that can make pathnames ambiguous. 931 */ 932static int verify_dotfile(const char *rest, unsigned mode) 933{ 934 /* 935 * The first character was '.', but that 936 * has already been discarded, we now test 937 * the rest. 938 */ 939 940 /* "." is not allowed */ 941 if (*rest == '\0' || is_dir_sep(*rest)) 942 return 0; 943 944 switch (*rest) { 945 /* 946 * ".git" followed by NUL or slash is bad. Note that we match 947 * case-insensitively here, even if ignore_case is not set. 948 * This outlaws ".GIT" everywhere out of an abundance of caution, 949 * since there's really no good reason to allow it. 950 * 951 * Once we've seen ".git", we can also find ".gitmodules", etc (also 952 * case-insensitively). 953 */ 954 case 'g': 955 case 'G': 956 if (rest[1] != 'i' && rest[1] != 'I') 957 break; 958 if (rest[2] != 't' && rest[2] != 'T') 959 break; 960 if (rest[3] == '\0' || is_dir_sep(rest[3])) 961 return 0; 962 if (S_ISLNK(mode)) { 963 rest += 3; 964 if (skip_iprefix(rest, "modules", &rest) && 965 (*rest == '\0' || is_dir_sep(*rest))) 966 return 0; 967 } 968 break; 969 case '.': 970 if (rest[1] == '\0' || is_dir_sep(rest[1])) 971 return 0; 972 } 973 return 1; 974} 975 976static enum verify_path_result verify_path_internal(const char *path, 977 unsigned mode) 978{ 979 char c = 0; 980 981 if (has_dos_drive_prefix(path)) 982 return PATH_INVALID; 983 984 if (!is_valid_path(path)) 985 return PATH_INVALID; 986 987 goto inside; 988 for (;;) { 989 if (!c) 990 return PATH_OK; 991 if (is_dir_sep(c)) { 992inside: 993 if (protect_hfs) { 994 995 if (is_hfs_dotgit(path)) 996 return PATH_INVALID; 997 if (S_ISLNK(mode)) { 998 if (is_hfs_dotgitmodules(path)) 999 return PATH_INVALID; 1000 } 1001 } 1002 if (protect_ntfs) { 1003#if defined GIT_WINDOWS_NATIVE || defined __CYGWIN__ 1004 if (c == '\\') 1005 return PATH_INVALID; 1006#endif 1007 if (is_ntfs_dotgit(path)) 1008 return PATH_INVALID; 1009 if (S_ISLNK(mode)) { 1010 if (is_ntfs_dotgitmodules(path)) 1011 return PATH_INVALID; 1012 } 1013 } 1014 1015 c = *path++; 1016 if ((c == '.' && !verify_dotfile(path, mode)) || 1017 is_dir_sep(c)) 1018 return PATH_INVALID; 1019 /* 1020 * allow terminating directory separators for 1021 * sparse directory entries. 1022 */ 1023 if (c == '\0') 1024 return S_ISDIR(mode) ? PATH_DIR_WITH_SEP : 1025 PATH_INVALID; 1026 } else if (c == '\\' && protect_ntfs) { 1027 if (is_ntfs_dotgit(path)) 1028 return PATH_INVALID; 1029 if (S_ISLNK(mode)) { 1030 if (is_ntfs_dotgitmodules(path)) 1031 return PATH_INVALID; 1032 } 1033 } 1034 1035 c = *path++; 1036 } 1037} 1038 1039/* 1040 * Do we have another file that has the beginning components being a 1041 * proper superset of the name we're trying to add? 1042 */ 1043static int has_file_name(struct index_state *istate, 1044 const struct cache_entry *ce, int pos, int ok_to_replace) 1045{ 1046 int retval = 0; 1047 int len = ce_namelen(ce); 1048 int stage = ce_stage(ce); 1049 const char *name = ce->name; 1050 1051 while (pos < istate->cache_nr) { 1052 struct cache_entry *p = istate->cache[pos++]; 1053 1054 if (len >= ce_namelen(p)) 1055 break; 1056 if (memcmp(name, p->name, len)) 1057 break; 1058 if (ce_stage(p) != stage) 1059 continue; 1060 if (p->name[len] != '/') 1061 continue; 1062 if (p->ce_flags & CE_REMOVE) 1063 continue; 1064 retval = -1; 1065 if (!ok_to_replace) 1066 break; 1067 remove_index_entry_at(istate, --pos); 1068 } 1069 return retval; 1070} 1071 1072 1073/* 1074 * Like strcmp(), but also return the offset of the first change. 1075 * If strings are equal, return the length. 1076 */ 1077int strcmp_offset(const char *s1, const char *s2, size_t *first_change) 1078{ 1079 size_t k; 1080 1081 if (!first_change) 1082 return strcmp(s1, s2); 1083 1084 for (k = 0; s1[k] == s2[k]; k++) 1085 if (s1[k] == '\0') 1086 break; 1087 1088 *first_change = k; 1089 return (unsigned char)s1[k] - (unsigned char)s2[k]; 1090} 1091 1092/* 1093 * Do we have another file with a pathname that is a proper 1094 * subset of the name we're trying to add? 1095 * 1096 * That is, is there another file in the index with a path 1097 * that matches a sub-directory in the given entry? 1098 */ 1099static int has_dir_name(struct index_state *istate, 1100 const struct cache_entry *ce, int pos, int ok_to_replace) 1101{ 1102 int retval = 0; 1103 int stage = ce_stage(ce); 1104 const char *name = ce->name; 1105 const char *slash = name + ce_namelen(ce); 1106 size_t len_eq_last; 1107 int cmp_last = 0; 1108 1109 /* 1110 * We are frequently called during an iteration on a sorted 1111 * list of pathnames and while building a new index. Therefore, 1112 * there is a high probability that this entry will eventually 1113 * be appended to the index, rather than inserted in the middle. 1114 * If we can confirm that, we can avoid binary searches on the 1115 * components of the pathname. 1116 * 1117 * Compare the entry's full path with the last path in the index. 1118 */ 1119 if (!istate->cache_nr) 1120 return 0; 1121 1122 cmp_last = strcmp_offset(name, 1123 istate->cache[istate->cache_nr - 1]->name, 1124 &len_eq_last); 1125 if (cmp_last > 0 && name[len_eq_last] != '/') 1126 /* 1127 * The entry sorts AFTER the last one in the 1128 * index and their paths have no common prefix, 1129 * so there cannot be a F/D conflict. 1130 */ 1131 return 0; 1132 1133 for (;;) { 1134 size_t len; 1135 1136 for (;;) { 1137 if (*--slash == '/') 1138 break; 1139 if (slash <= ce->name) 1140 return retval; 1141 } 1142 len = slash - name; 1143 1144 pos = index_name_stage_pos(istate, name, len, stage, EXPAND_SPARSE); 1145 if (pos >= 0) { 1146 /* 1147 * Found one, but not so fast. This could 1148 * be a marker that says "I was here, but 1149 * I am being removed". Such an entry is 1150 * not a part of the resulting tree, and 1151 * it is Ok to have a directory at the same 1152 * path. 1153 */ 1154 if (!(istate->cache[pos]->ce_flags & CE_REMOVE)) { 1155 retval = -1; 1156 if (!ok_to_replace) 1157 break; 1158 remove_index_entry_at(istate, pos); 1159 continue; 1160 } 1161 } 1162 else 1163 pos = -pos-1; 1164 1165 /* 1166 * Trivial optimization: if we find an entry that 1167 * already matches the sub-directory, then we know 1168 * we're ok, and we can exit. 1169 */ 1170 while (pos < istate->cache_nr) { 1171 struct cache_entry *p = istate->cache[pos]; 1172 if ((ce_namelen(p) <= len) || 1173 (p->name[len] != '/') || 1174 memcmp(p->name, name, len)) 1175 break; /* not our subdirectory */ 1176 if (ce_stage(p) == stage && !(p->ce_flags & CE_REMOVE)) 1177 /* 1178 * p is at the same stage as our entry, and 1179 * is a subdirectory of what we are looking 1180 * at, so we cannot have conflicts at our 1181 * level or anything shorter. 1182 */ 1183 return retval; 1184 pos++; 1185 } 1186 } 1187 return retval; 1188} 1189 1190/* We may be in a situation where we already have path/file and path 1191 * is being added, or we already have path and path/file is being 1192 * added. Either one would result in a nonsense tree that has path 1193 * twice when git-write-tree tries to write it out. Prevent it. 1194 * 1195 * If ok-to-replace is specified, we remove the conflicting entries 1196 * from the cache so the caller should recompute the insert position. 1197 * When this happens, we return non-zero. 1198 */ 1199static int check_file_directory_conflict(struct index_state *istate, 1200 const struct cache_entry *ce, 1201 int pos, int ok_to_replace) 1202{ 1203 int retval; 1204 1205 /* 1206 * When ce is an "I am going away" entry, we allow it to be added 1207 */ 1208 if (ce->ce_flags & CE_REMOVE) 1209 return 0; 1210 1211 /* 1212 * We check if the path is a sub-path of a subsequent pathname 1213 * first, since removing those will not change the position 1214 * in the array. 1215 */ 1216 retval = has_file_name(istate, ce, pos, ok_to_replace); 1217 1218 /* 1219 * Then check if the path might have a clashing sub-directory 1220 * before it. 1221 */ 1222 return retval + has_dir_name(istate, ce, pos, ok_to_replace); 1223} 1224 1225static int add_index_entry_with_check(struct index_state *istate, struct cache_entry *ce, int option) 1226{ 1227 int pos; 1228 int ok_to_add = option & ADD_CACHE_OK_TO_ADD; 1229 int ok_to_replace = option & ADD_CACHE_OK_TO_REPLACE; 1230 int skip_df_check = option & ADD_CACHE_SKIP_DFCHECK; 1231 int new_only = option & ADD_CACHE_NEW_ONLY; 1232 1233 /* 1234 * If this entry's path sorts after the last entry in the index, 1235 * we can avoid searching for it. 1236 */ 1237 if (istate->cache_nr > 0 && 1238 strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0) 1239 pos = index_pos_to_insert_pos(istate->cache_nr); 1240 else 1241 pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce), EXPAND_SPARSE); 1242 1243 /* 1244 * Cache tree path should be invalidated only after index_name_stage_pos, 1245 * in case it expands a sparse index. 1246 */ 1247 if (!(option & ADD_CACHE_KEEP_CACHE_TREE)) 1248 cache_tree_invalidate_path(istate, ce->name); 1249 1250 /* existing match? Just replace it. */ 1251 if (pos >= 0) { 1252 if (!new_only) 1253 replace_index_entry(istate, pos, ce); 1254 return 0; 1255 } 1256 pos = -pos-1; 1257 1258 if (!(option & ADD_CACHE_KEEP_CACHE_TREE)) 1259 untracked_cache_add_to_index(istate, ce->name); 1260 1261 /* 1262 * Inserting a merged entry ("stage 0") into the index 1263 * will always replace all non-merged entries.. 1264 */ 1265 if (pos < istate->cache_nr && ce_stage(ce) == 0) { 1266 while (ce_same_name(istate->cache[pos], ce)) { 1267 ok_to_add = 1; 1268 if (!remove_index_entry_at(istate, pos)) 1269 break; 1270 } 1271 } 1272 1273 if (!ok_to_add) 1274 return -1; 1275 if (verify_path_internal(ce->name, ce->ce_mode) == PATH_INVALID) 1276 return error(_("invalid path '%s'"), ce->name); 1277 1278 if (!skip_df_check && 1279 check_file_directory_conflict(istate, ce, pos, ok_to_replace)) { 1280 if (!ok_to_replace) 1281 return error(_("'%s' appears as both a file and as a directory"), 1282 ce->name); 1283 pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce), EXPAND_SPARSE); 1284 pos = -pos-1; 1285 } 1286 return pos + 1; 1287} 1288 1289int add_index_entry(struct index_state *istate, struct cache_entry *ce, int option) 1290{ 1291 int pos; 1292 1293 if (option & ADD_CACHE_JUST_APPEND) 1294 pos = istate->cache_nr; 1295 else { 1296 int ret; 1297 ret = add_index_entry_with_check(istate, ce, option); 1298 if (ret <= 0) 1299 return ret; 1300 pos = ret - 1; 1301 } 1302 1303 /* Make sure the array is big enough .. */ 1304 ALLOC_GROW(istate->cache, istate->cache_nr + 1, istate->cache_alloc); 1305 1306 /* Add it in.. */ 1307 istate->cache_nr++; 1308 if (istate->cache_nr > pos + 1) 1309 MOVE_ARRAY(istate->cache + pos + 1, istate->cache + pos, 1310 istate->cache_nr - pos - 1); 1311 set_index_entry(istate, pos, ce); 1312 istate->cache_changed |= CE_ENTRY_ADDED; 1313 return 0; 1314} 1315 1316/* 1317 * "refresh" does not calculate a new sha1 file or bring the 1318 * cache up-to-date for mode/content changes. But what it 1319 * _does_ do is to "re-match" the stat information of a file 1320 * with the cache, so that you can refresh the cache for a 1321 * file that hasn't been changed but where the stat entry is 1322 * out of date. 1323 * 1324 * For example, you'd want to do this after doing a "git-read-tree", 1325 * to link up the stat cache details with the proper files. 1326 */ 1327static struct cache_entry *refresh_cache_ent(struct index_state *istate, 1328 struct cache_entry *ce, 1329 unsigned int options, int *err, 1330 int *changed_ret, 1331 int *t2_did_lstat, 1332 int *t2_did_scan) 1333{ 1334 struct stat st; 1335 struct cache_entry *updated; 1336 int changed; 1337 int refresh = options & CE_MATCH_REFRESH; 1338 int ignore_valid = options & CE_MATCH_IGNORE_VALID; 1339 int ignore_skip_worktree = options & CE_MATCH_IGNORE_SKIP_WORKTREE; 1340 int ignore_missing = options & CE_MATCH_IGNORE_MISSING; 1341 int ignore_fsmonitor = options & CE_MATCH_IGNORE_FSMONITOR; 1342 1343 if (!refresh || ce_uptodate(ce)) 1344 return ce; 1345 1346 if (!ignore_fsmonitor) 1347 refresh_fsmonitor(istate); 1348 /* 1349 * CE_VALID or CE_SKIP_WORKTREE means the user promised us 1350 * that the change to the work tree does not matter and told 1351 * us not to worry. 1352 */ 1353 if (!ignore_skip_worktree && ce_skip_worktree(ce)) { 1354 ce_mark_uptodate(ce); 1355 return ce; 1356 } 1357 if (!ignore_valid && (ce->ce_flags & CE_VALID)) { 1358 ce_mark_uptodate(ce); 1359 return ce; 1360 } 1361 if (!ignore_fsmonitor && (ce->ce_flags & CE_FSMONITOR_VALID)) { 1362 ce_mark_uptodate(ce); 1363 return ce; 1364 } 1365 1366 if (has_symlink_leading_path(ce->name, ce_namelen(ce))) { 1367 if (ignore_missing) 1368 return ce; 1369 if (err) 1370 *err = ENOENT; 1371 return NULL; 1372 } 1373 1374 if (t2_did_lstat) 1375 *t2_did_lstat = 1; 1376 if (lstat(ce->name, &st) < 0) { 1377 if (ignore_missing && errno == ENOENT) 1378 return ce; 1379 if (err) 1380 *err = errno; 1381 return NULL; 1382 } 1383 1384 changed = ie_match_stat(istate, ce, &st, options); 1385 if (changed_ret) 1386 *changed_ret = changed; 1387 if (!changed) { 1388 /* 1389 * The path is unchanged. If we were told to ignore 1390 * valid bit, then we did the actual stat check and 1391 * found that the entry is unmodified. If the entry 1392 * is not marked VALID, this is the place to mark it 1393 * valid again, under "assume unchanged" mode. 1394 */ 1395 if (ignore_valid && assume_unchanged && 1396 !(ce->ce_flags & CE_VALID)) 1397 ; /* mark this one VALID again */ 1398 else { 1399 /* 1400 * We do not mark the index itself "modified" 1401 * because CE_UPTODATE flag is in-core only; 1402 * we are not going to write this change out. 1403 */ 1404 if (!S_ISGITLINK(ce->ce_mode)) { 1405 ce_mark_uptodate(ce); 1406 mark_fsmonitor_valid(istate, ce); 1407 } 1408 return ce; 1409 } 1410 } 1411 1412 if (t2_did_scan) 1413 *t2_did_scan = 1; 1414 if (ie_modified(istate, ce, &st, options)) { 1415 if (err) 1416 *err = EINVAL; 1417 return NULL; 1418 } 1419 1420 updated = make_empty_cache_entry(istate, ce_namelen(ce)); 1421 copy_cache_entry(updated, ce); 1422 memcpy(updated->name, ce->name, ce->ce_namelen + 1); 1423 fill_stat_cache_info(istate, updated, &st); 1424 /* 1425 * If ignore_valid is not set, we should leave CE_VALID bit 1426 * alone. Otherwise, paths marked with --no-assume-unchanged 1427 * (i.e. things to be edited) will reacquire CE_VALID bit 1428 * automatically, which is not really what we want. 1429 */ 1430 if (!ignore_valid && assume_unchanged && 1431 !(ce->ce_flags & CE_VALID)) 1432 updated->ce_flags &= ~CE_VALID; 1433 1434 /* istate->cache_changed is updated in the caller */ 1435 return updated; 1436} 1437 1438static void show_file(const char * fmt, const char * name, int in_porcelain, 1439 int * first, const char *header_msg) 1440{ 1441 if (in_porcelain && *first && header_msg) { 1442 printf("%s\n", header_msg); 1443 *first = 0; 1444 } 1445 printf(fmt, name); 1446} 1447 1448int repo_refresh_and_write_index(struct repository *repo, 1449 unsigned int refresh_flags, 1450 unsigned int write_flags, 1451 int gentle, 1452 const struct pathspec *pathspec, 1453 char *seen, const char *header_msg) 1454{ 1455 struct lock_file lock_file = LOCK_INIT; 1456 int fd, ret = 0; 1457 1458 fd = repo_hold_locked_index(repo, &lock_file, 1459 gentle ? 0 : LOCK_REPORT_ON_ERROR); 1460 if (!gentle && fd < 0) 1461 return -1; 1462 if (refresh_index(repo->index, refresh_flags, pathspec, seen, header_msg)) 1463 ret = 1; 1464 if (0 <= fd && write_locked_index(repo->index, &lock_file, COMMIT_LOCK | write_flags)) 1465 ret = -1; 1466 return ret; 1467} 1468 1469 1470int refresh_index(struct index_state *istate, unsigned int flags, 1471 const struct pathspec *pathspec, 1472 char *seen, const char *header_msg) 1473{ 1474 int i; 1475 int has_errors = 0; 1476 int really = (flags & REFRESH_REALLY) != 0; 1477 int allow_unmerged = (flags & REFRESH_UNMERGED) != 0; 1478 int quiet = (flags & REFRESH_QUIET) != 0; 1479 int not_new = (flags & REFRESH_IGNORE_MISSING) != 0; 1480 int ignore_submodules = (flags & REFRESH_IGNORE_SUBMODULES) != 0; 1481 int ignore_skip_worktree = (flags & REFRESH_IGNORE_SKIP_WORKTREE) != 0; 1482 int first = 1; 1483 int in_porcelain = (flags & REFRESH_IN_PORCELAIN); 1484 unsigned int options = (CE_MATCH_REFRESH | 1485 (really ? CE_MATCH_IGNORE_VALID : 0) | 1486 (not_new ? CE_MATCH_IGNORE_MISSING : 0)); 1487 const char *modified_fmt; 1488 const char *deleted_fmt; 1489 const char *typechange_fmt; 1490 const char *added_fmt; 1491 const char *unmerged_fmt; 1492 struct progress *progress = NULL; 1493 int t2_sum_lstat = 0; 1494 int t2_sum_scan = 0; 1495 1496 if (flags & REFRESH_PROGRESS && isatty(2)) 1497 progress = start_delayed_progress(the_repository, 1498 _("Refresh index"), 1499 istate->cache_nr); 1500 1501 trace_performance_enter(); 1502 modified_fmt = in_porcelain ? "M\t%s\n" : "%s: needs update\n"; 1503 deleted_fmt = in_porcelain ? "D\t%s\n" : "%s: needs update\n"; 1504 typechange_fmt = in_porcelain ? "T\t%s\n" : "%s: needs update\n"; 1505 added_fmt = in_porcelain ? "A\t%s\n" : "%s: needs update\n"; 1506 unmerged_fmt = in_porcelain ? "U\t%s\n" : "%s: needs merge\n"; 1507 /* 1508 * Use the multi-threaded preload_index() to refresh most of the 1509 * cache entries quickly then in the single threaded loop below, 1510 * we only have to do the special cases that are left. 1511 */ 1512 preload_index(istate, pathspec, 0); 1513 trace2_region_enter("index", "refresh", NULL); 1514 1515 for (i = 0; i < istate->cache_nr; i++) { 1516 struct cache_entry *ce, *new_entry; 1517 int cache_errno = 0; 1518 int changed = 0; 1519 int filtered = 0; 1520 int t2_did_lstat = 0; 1521 int t2_did_scan = 0; 1522 1523 ce = istate->cache[i]; 1524 if (ignore_submodules && S_ISGITLINK(ce->ce_mode)) 1525 continue; 1526 if (ignore_skip_worktree && ce_skip_worktree(ce)) 1527 continue; 1528 1529 /* 1530 * If this entry is a sparse directory, then there isn't 1531 * any stat() information to update. Ignore the entry. 1532 */ 1533 if (S_ISSPARSEDIR(ce->ce_mode)) 1534 continue; 1535 1536 if (pathspec && !ce_path_match(istate, ce, pathspec, seen)) 1537 filtered = 1; 1538 1539 if (ce_stage(ce)) { 1540 while ((i < istate->cache_nr) && 1541 ! strcmp(istate->cache[i]->name, ce->name)) 1542 i++; 1543 i--; 1544 if (allow_unmerged) 1545 continue; 1546 if (!filtered) 1547 show_file(unmerged_fmt, ce->name, in_porcelain, 1548 &first, header_msg); 1549 has_errors = 1; 1550 continue; 1551 } 1552 1553 if (filtered) 1554 continue; 1555 1556 new_entry = refresh_cache_ent(istate, ce, options, 1557 &cache_errno, &changed, 1558 &t2_did_lstat, &t2_did_scan); 1559 t2_sum_lstat += t2_did_lstat; 1560 t2_sum_scan += t2_did_scan; 1561 if (new_entry == ce) 1562 continue; 1563 display_progress(progress, i); 1564 if (!new_entry) { 1565 const char *fmt; 1566 1567 if (really && cache_errno == EINVAL) { 1568 /* If we are doing --really-refresh that 1569 * means the index is not valid anymore. 1570 */ 1571 ce->ce_flags &= ~CE_VALID; 1572 ce->ce_flags |= CE_UPDATE_IN_BASE; 1573 mark_fsmonitor_invalid(istate, ce); 1574 istate->cache_changed |= CE_ENTRY_CHANGED; 1575 } 1576 if (quiet) 1577 continue; 1578 1579 if (cache_errno == ENOENT) 1580 fmt = deleted_fmt; 1581 else if (ce_intent_to_add(ce)) 1582 fmt = added_fmt; /* must be before other checks */ 1583 else if (changed & TYPE_CHANGED) 1584 fmt = typechange_fmt; 1585 else 1586 fmt = modified_fmt; 1587 show_file(fmt, 1588 ce->name, in_porcelain, &first, header_msg); 1589 has_errors = 1; 1590 continue; 1591 } 1592 1593 replace_index_entry(istate, i, new_entry); 1594 } 1595 trace2_data_intmax("index", NULL, "refresh/sum_lstat", t2_sum_lstat); 1596 trace2_data_intmax("index", NULL, "refresh/sum_scan", t2_sum_scan); 1597 trace2_region_leave("index", "refresh", NULL); 1598 display_progress(progress, istate->cache_nr); 1599 stop_progress(&progress); 1600 trace_performance_leave("refresh index"); 1601 return has_errors; 1602} 1603 1604struct cache_entry *refresh_cache_entry(struct index_state *istate, 1605 struct cache_entry *ce, 1606 unsigned int options) 1607{ 1608 return refresh_cache_ent(istate, ce, options, NULL, NULL, NULL, NULL); 1609} 1610 1611 1612/***************************************************************** 1613 * Index File I/O 1614 *****************************************************************/ 1615 1616#define INDEX_FORMAT_DEFAULT 3 1617 1618static unsigned int get_index_format_default(struct repository *r) 1619{ 1620 char *envversion = getenv("GIT_INDEX_VERSION"); 1621 char *endp; 1622 unsigned int version = INDEX_FORMAT_DEFAULT; 1623 1624 if (!envversion) { 1625 prepare_repo_settings(r); 1626 1627 if (r->settings.index_version >= 0) 1628 version = r->settings.index_version; 1629 if (version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < version) { 1630 warning(_("index.version set, but the value is invalid.\n" 1631 "Using version %i"), INDEX_FORMAT_DEFAULT); 1632 return INDEX_FORMAT_DEFAULT; 1633 } 1634 return version; 1635 } 1636 1637 version = strtoul(envversion, &endp, 10); 1638 if (*endp || 1639 version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < version) { 1640 warning(_("GIT_INDEX_VERSION set, but the value is invalid.\n" 1641 "Using version %i"), INDEX_FORMAT_DEFAULT); 1642 version = INDEX_FORMAT_DEFAULT; 1643 } 1644 return version; 1645} 1646 1647/* 1648 * dev/ino/uid/gid/size are also just tracked to the low 32 bits 1649 * Again - this is just a (very strong in practice) heuristic that 1650 * the inode hasn't changed. 1651 * 1652 * We save the fields in big-endian order to allow using the 1653 * index file over NFS transparently. 1654 */ 1655struct ondisk_cache_entry { 1656 struct cache_time ctime; 1657 struct cache_time mtime; 1658 uint32_t dev; 1659 uint32_t ino; 1660 uint32_t mode; 1661 uint32_t uid; 1662 uint32_t gid; 1663 uint32_t size; 1664 /* 1665 * unsigned char hash[hashsz]; 1666 * uint16_t flags; 1667 * if (flags & CE_EXTENDED) 1668 * uint16_t flags2; 1669 */ 1670 unsigned char data[GIT_MAX_RAWSZ + 2 * sizeof(uint16_t)]; 1671 char name[FLEX_ARRAY]; 1672}; 1673 1674/* These are only used for v3 or lower */ 1675#define align_padding_size(size, len) ((size + (len) + 8) & ~7) - (size + len) 1676#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,data) + (len) + 8) & ~7) 1677#define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len) 1678#define ondisk_data_size(flags, len) (the_hash_algo->rawsz + \ 1679 ((flags & CE_EXTENDED) ? 2 : 1) * sizeof(uint16_t) + len) 1680#define ondisk_data_size_max(len) (ondisk_data_size(CE_EXTENDED, len)) 1681#define ondisk_ce_size(ce) (ondisk_cache_entry_size(ondisk_data_size((ce)->ce_flags, ce_namelen(ce)))) 1682 1683/* Allow fsck to force verification of the index checksum. */ 1684int verify_index_checksum; 1685 1686/* Allow fsck to force verification of the cache entry order. */ 1687int verify_ce_order; 1688 1689static int verify_hdr(const struct cache_header *hdr, unsigned long size) 1690{ 1691 struct git_hash_ctx c; 1692 unsigned char hash[GIT_MAX_RAWSZ]; 1693 int hdr_version; 1694 unsigned char *start, *end; 1695 struct object_id oid; 1696 1697 if (hdr->hdr_signature != htonl(CACHE_SIGNATURE)) 1698 return error(_("bad signature 0x%08x"), hdr->hdr_signature); 1699 hdr_version = ntohl(hdr->hdr_version); 1700 if (hdr_version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < hdr_version) 1701 return error(_("bad index version %d"), hdr_version); 1702 1703 if (!verify_index_checksum) 1704 return 0; 1705 1706 end = (unsigned char *)hdr + size; 1707 start = end - the_hash_algo->rawsz; 1708 oidread(&oid, start, the_repository->hash_algo); 1709 if (oideq(&oid, null_oid(the_hash_algo))) 1710 return 0; 1711 1712 the_hash_algo->init_fn(&c); 1713 git_hash_update(&c, hdr, size - the_hash_algo->rawsz); 1714 git_hash_final(hash, &c); 1715 if (!hasheq(hash, start, the_repository->hash_algo)) 1716 return error(_("bad index file sha1 signature")); 1717 return 0; 1718} 1719 1720static int read_index_extension(struct index_state *istate, 1721 const char *ext, const char *data, unsigned long sz) 1722{ 1723 switch (CACHE_EXT(ext)) { 1724 case CACHE_EXT_TREE: 1725 istate->cache_tree = cache_tree_read(data, sz); 1726 break; 1727 case CACHE_EXT_RESOLVE_UNDO: 1728 istate->resolve_undo = resolve_undo_read(data, sz, the_hash_algo); 1729 break; 1730 case CACHE_EXT_LINK: 1731 if (read_link_extension(istate, data, sz)) 1732 return -1; 1733 break; 1734 case CACHE_EXT_UNTRACKED: 1735 istate->untracked = read_untracked_extension(data, sz); 1736 break; 1737 case CACHE_EXT_FSMONITOR: 1738 read_fsmonitor_extension(istate, data, sz); 1739 break; 1740 case CACHE_EXT_ENDOFINDEXENTRIES: 1741 case CACHE_EXT_INDEXENTRYOFFSETTABLE: 1742 /* already handled in do_read_index() */ 1743 break; 1744 case CACHE_EXT_SPARSE_DIRECTORIES: 1745 /* no content, only an indicator */ 1746 istate->sparse_index = INDEX_COLLAPSED; 1747 break; 1748 default: 1749 if (*ext < 'A' || 'Z' < *ext) 1750 return error(_("index uses %.4s extension, which we do not understand"), 1751 ext); 1752 fprintf_ln(stderr, _("ignoring %.4s extension"), ext); 1753 break; 1754 } 1755 return 0; 1756} 1757 1758/* 1759 * Parses the contents of the cache entry contained within the 'ondisk' buffer 1760 * into a new incore 'cache_entry'. 1761 * 1762 * Note that 'char *ondisk' may not be aligned to a 4-byte address interval in 1763 * index v4, so we cannot cast it to 'struct ondisk_cache_entry *' and access 1764 * its members. Instead, we use the byte offsets of members within the struct to 1765 * identify where 'get_be16()', 'get_be32()', and 'oidread()' (which can all 1766 * read from an unaligned memory buffer) should read from the 'ondisk' buffer 1767 * into the corresponding incore 'cache_entry' members. 1768 */ 1769static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, 1770 unsigned int version, 1771 const char *ondisk, 1772 unsigned long *ent_size, 1773 const struct cache_entry *previous_ce) 1774{ 1775 struct cache_entry *ce; 1776 size_t len; 1777 const char *name; 1778 const unsigned hashsz = the_hash_algo->rawsz; 1779 const char *flagsp = ondisk + offsetof(struct ondisk_cache_entry, data) + hashsz; 1780 unsigned int flags; 1781 size_t copy_len = 0; 1782 /* 1783 * Adjacent cache entries tend to share the leading paths, so it makes 1784 * sense to only store the differences in later entries. In the v4 1785 * on-disk format of the index, each on-disk cache entry stores the 1786 * number of bytes to be stripped from the end of the previous name, 1787 * and the bytes to append to the result, to come up with its name. 1788 */ 1789 int expand_name_field = version == 4; 1790 1791 /* On-disk flags are just 16 bits */ 1792 flags = get_be16(flagsp); 1793 len = flags & CE_NAMEMASK; 1794 1795 if (flags & CE_EXTENDED) { 1796 int extended_flags; 1797 extended_flags = get_be16(flagsp + sizeof(uint16_t)) << 16; 1798 /* We do not yet understand any bit out of CE_EXTENDED_FLAGS */ 1799 if (extended_flags & ~CE_EXTENDED_FLAGS) 1800 die(_("unknown index entry format 0x%08x"), extended_flags); 1801 flags |= extended_flags; 1802 name = (const char *)(flagsp + 2 * sizeof(uint16_t)); 1803 } 1804 else 1805 name = (const char *)(flagsp + sizeof(uint16_t)); 1806 1807 if (expand_name_field) { 1808 const unsigned char *cp = (const unsigned char *)name; 1809 uint64_t strip_len, previous_len; 1810 1811 /* If we're at the beginning of a block, ignore the previous name */ 1812 strip_len = decode_varint(&cp); 1813 if (previous_ce) { 1814 previous_len = previous_ce->ce_namelen; 1815 if (previous_len < strip_len) 1816 die(_("malformed name field in the index, near path '%s'"), 1817 previous_ce->name); 1818 copy_len = previous_len - strip_len; 1819 } 1820 name = (const char *)cp; 1821 } 1822 1823 if (len == CE_NAMEMASK) { 1824 len = strlen(name); 1825 if (expand_name_field) 1826 len += copy_len; 1827 } 1828 1829 ce = mem_pool__ce_alloc(ce_mem_pool, len); 1830 1831 /* 1832 * NEEDSWORK: using 'offsetof()' is cumbersome and should be replaced 1833 * with something more akin to 'load_bitmap_entries_v1()'s use of 1834 * 'read_be16'/'read_be32'. For consistency with the corresponding 1835 * ondisk entry write function ('copy_cache_entry_to_ondisk()'), this 1836 * should be done at the same time as removing references to 1837 * 'ondisk_cache_entry' there. 1838 */ 1839 ce->ce_stat_data.sd_ctime.sec = get_be32(ondisk + offsetof(struct ondisk_cache_entry, ctime) 1840 + offsetof(struct cache_time, sec)); 1841 ce->ce_stat_data.sd_mtime.sec = get_be32(ondisk + offsetof(struct ondisk_cache_entry, mtime) 1842 + offsetof(struct cache_time, sec)); 1843 ce->ce_stat_data.sd_ctime.nsec = get_be32(ondisk + offsetof(struct ondisk_cache_entry, ctime) 1844 + offsetof(struct cache_time, nsec)); 1845 ce->ce_stat_data.sd_mtime.nsec = get_be32(ondisk + offsetof(struct ondisk_cache_entry, mtime) 1846 + offsetof(struct cache_time, nsec)); 1847 ce->ce_stat_data.sd_dev = get_be32(ondisk + offsetof(struct ondisk_cache_entry, dev)); 1848 ce->ce_stat_data.sd_ino = get_be32(ondisk + offsetof(struct ondisk_cache_entry, ino)); 1849 ce->ce_mode = get_be32(ondisk + offsetof(struct ondisk_cache_entry, mode)); 1850 ce->ce_stat_data.sd_uid = get_be32(ondisk + offsetof(struct ondisk_cache_entry, uid)); 1851 ce->ce_stat_data.sd_gid = get_be32(ondisk + offsetof(struct ondisk_cache_entry, gid)); 1852 ce->ce_stat_data.sd_size = get_be32(ondisk + offsetof(struct ondisk_cache_entry, size)); 1853 ce->ce_flags = flags & ~CE_NAMEMASK; 1854 ce->ce_namelen = len; 1855 ce->index = 0; 1856 oidread(&ce->oid, (const unsigned char *)ondisk + offsetof(struct ondisk_cache_entry, data), 1857 the_repository->hash_algo); 1858 1859 if (expand_name_field) { 1860 if (copy_len) 1861 memcpy(ce->name, previous_ce->name, copy_len); 1862 memcpy(ce->name + copy_len, name, len + 1 - copy_len); 1863 *ent_size = (name - ((char *)ondisk)) + len + 1 - copy_len; 1864 } else { 1865 memcpy(ce->name, name, len + 1); 1866 *ent_size = ondisk_ce_size(ce); 1867 } 1868 return ce; 1869} 1870 1871static void check_ce_order(struct index_state *istate) 1872{ 1873 unsigned int i; 1874 1875 if (!verify_ce_order) 1876 return; 1877 1878 for (i = 1; i < istate->cache_nr; i++) { 1879 struct cache_entry *ce = istate->cache[i - 1]; 1880 struct cache_entry *next_ce = istate->cache[i]; 1881 int name_compare = strcmp(ce->name, next_ce->name); 1882 1883 if (0 < name_compare) 1884 die(_("unordered stage entries in index")); 1885 if (!name_compare) { 1886 if (!ce_stage(ce)) 1887 die(_("multiple stage entries for merged file '%s'"), 1888 ce->name); 1889 if (ce_stage(ce) > ce_stage(next_ce)) 1890 die(_("unordered stage entries for '%s'"), 1891 ce->name); 1892 } 1893 } 1894} 1895 1896static void tweak_untracked_cache(struct index_state *istate) 1897{ 1898 struct repository *r = the_repository; 1899 1900 prepare_repo_settings(r); 1901 1902 switch (r->settings.core_untracked_cache) { 1903 case UNTRACKED_CACHE_REMOVE: 1904 remove_untracked_cache(istate); 1905 break; 1906 case UNTRACKED_CACHE_WRITE: 1907 add_untracked_cache(istate); 1908 break; 1909 case UNTRACKED_CACHE_KEEP: 1910 /* 1911 * Either an explicit "core.untrackedCache=keep", the 1912 * default if "core.untrackedCache" isn't configured, 1913 * or a fallback on an unknown "core.untrackedCache" 1914 * value. 1915 */ 1916 break; 1917 } 1918} 1919 1920static void tweak_split_index(struct index_state *istate) 1921{ 1922 switch (repo_config_get_split_index(the_repository)) { 1923 case -1: /* unset: do nothing */ 1924 break; 1925 case 0: /* false */ 1926 remove_split_index(istate); 1927 break; 1928 case 1: /* true */ 1929 add_split_index(istate); 1930 break; 1931 default: /* unknown value: do nothing */ 1932 break; 1933 } 1934} 1935 1936static void post_read_index_from(struct index_state *istate) 1937{ 1938 check_ce_order(istate); 1939 tweak_untracked_cache(istate); 1940 tweak_split_index(istate); 1941 tweak_fsmonitor(istate); 1942} 1943 1944static size_t estimate_cache_size_from_compressed(unsigned int entries) 1945{ 1946 return entries * (sizeof(struct cache_entry) + CACHE_ENTRY_PATH_LENGTH); 1947} 1948 1949static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) 1950{ 1951 long per_entry = sizeof(struct cache_entry) - sizeof(struct ondisk_cache_entry); 1952 1953 /* 1954 * Account for potential alignment differences. 1955 */ 1956 per_entry += align_padding_size(per_entry, 0); 1957 return ondisk_size + entries * per_entry; 1958} 1959 1960struct index_entry_offset 1961{ 1962 /* starting byte offset into index file, count of index entries in this block */ 1963 int offset, nr; 1964}; 1965 1966struct index_entry_offset_table 1967{ 1968 int nr; 1969 struct index_entry_offset entries[FLEX_ARRAY]; 1970}; 1971 1972static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset); 1973static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot); 1974 1975static size_t read_eoie_extension(const char *mmap, size_t mmap_size); 1976static void write_eoie_extension(struct strbuf *sb, struct git_hash_ctx *eoie_context, size_t offset); 1977 1978struct load_index_extensions 1979{ 1980 pthread_t pthread; 1981 struct index_state *istate; 1982 const char *mmap; 1983 size_t mmap_size; 1984 unsigned long src_offset; 1985}; 1986 1987static void *load_index_extensions(void *_data) 1988{ 1989 struct load_index_extensions *p = _data; 1990 unsigned long src_offset = p->src_offset; 1991 1992 while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) { 1993 /* After an array of active_nr index entries, 1994 * there can be arbitrary number of extended 1995 * sections, each of which is prefixed with 1996 * extension name (4-byte) and section length 1997 * in 4-byte network byte order. 1998 */ 1999 uint32_t extsize = get_be32(p->mmap + src_offset + 4); 2000 if (read_index_extension(p->istate, 2001 p->mmap + src_offset, 2002 p->mmap + src_offset + 8, 2003 extsize) < 0) { 2004 munmap((void *)p->mmap, p->mmap_size); 2005 die(_("index file corrupt")); 2006 } 2007 src_offset += 8; 2008 src_offset += extsize; 2009 } 2010 2011 return NULL; 2012} 2013 2014/* 2015 * A helper function that will load the specified range of cache entries 2016 * from the memory mapped file and add them to the given index. 2017 */ 2018static unsigned long load_cache_entry_block(struct index_state *istate, 2019 struct mem_pool *ce_mem_pool, int offset, int nr, const char *mmap, 2020 unsigned long start_offset, const struct cache_entry *previous_ce) 2021{ 2022 int i; 2023 unsigned long src_offset = start_offset; 2024 2025 for (i = offset; i < offset + nr; i++) { 2026 struct cache_entry *ce; 2027 unsigned long consumed; 2028 2029 ce = create_from_disk(ce_mem_pool, istate->version, 2030 mmap + src_offset, 2031 &consumed, previous_ce); 2032 set_index_entry(istate, i, ce); 2033 2034 src_offset += consumed; 2035 previous_ce = ce; 2036 } 2037 return src_offset - start_offset; 2038} 2039 2040static unsigned long load_all_cache_entries(struct index_state *istate, 2041 const char *mmap, size_t mmap_size, unsigned long src_offset) 2042{ 2043 unsigned long consumed; 2044 2045 istate->ce_mem_pool = xmalloc(sizeof(*istate->ce_mem_pool)); 2046 if (istate->version == 4) { 2047 mem_pool_init(istate->ce_mem_pool, 2048 estimate_cache_size_from_compressed(istate->cache_nr)); 2049 } else { 2050 mem_pool_init(istate->ce_mem_pool, 2051 estimate_cache_size(mmap_size, istate->cache_nr)); 2052 } 2053 2054 consumed = load_cache_entry_block(istate, istate->ce_mem_pool, 2055 0, istate->cache_nr, mmap, src_offset, NULL); 2056 return consumed; 2057} 2058 2059/* 2060 * Mostly randomly chosen maximum thread counts: we 2061 * cap the parallelism to online_cpus() threads, and we want 2062 * to have at least 10000 cache entries per thread for it to 2063 * be worth starting a thread. 2064 */ 2065 2066#define THREAD_COST (10000) 2067 2068struct load_cache_entries_thread_data 2069{ 2070 pthread_t pthread; 2071 struct index_state *istate; 2072 struct mem_pool *ce_mem_pool; 2073 int offset; 2074 const char *mmap; 2075 struct index_entry_offset_table *ieot; 2076 int ieot_start; /* starting index into the ieot array */ 2077 int ieot_blocks; /* count of ieot entries to process */ 2078 unsigned long consumed; /* return # of bytes in index file processed */ 2079}; 2080 2081/* 2082 * A thread proc to run the load_cache_entries() computation 2083 * across multiple background threads. 2084 */ 2085static void *load_cache_entries_thread(void *_data) 2086{ 2087 struct load_cache_entries_thread_data *p = _data; 2088 int i; 2089 2090 /* iterate across all ieot blocks assigned to this thread */ 2091 for (i = p->ieot_start; i < p->ieot_start + p->ieot_blocks; i++) { 2092 p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, 2093 p->offset, p->ieot->entries[i].nr, p->mmap, p->ieot->entries[i].offset, NULL); 2094 p->offset += p->ieot->entries[i].nr; 2095 } 2096 return NULL; 2097} 2098 2099static unsigned long load_cache_entries_threaded(struct index_state *istate, const char *mmap, size_t mmap_size, 2100 int nr_threads, struct index_entry_offset_table *ieot) 2101{ 2102 int i, offset, ieot_blocks, ieot_start, err; 2103 struct load_cache_entries_thread_data *data; 2104 unsigned long consumed = 0; 2105 2106 /* a little sanity checking */ 2107 if (istate->name_hash_initialized) 2108 BUG("the name hash isn't thread safe"); 2109 2110 istate->ce_mem_pool = xmalloc(sizeof(*istate->ce_mem_pool)); 2111 mem_pool_init(istate->ce_mem_pool, 0); 2112 2113 /* ensure we have no more threads than we have blocks to process */ 2114 if (nr_threads > ieot->nr) 2115 nr_threads = ieot->nr; 2116 CALLOC_ARRAY(data, nr_threads); 2117 2118 offset = ieot_start = 0; 2119 ieot_blocks = DIV_ROUND_UP(ieot->nr, nr_threads); 2120 for (i = 0; i < nr_threads; i++) { 2121 struct load_cache_entries_thread_data *p = &data[i]; 2122 int nr, j; 2123 2124 if (ieot_start + ieot_blocks > ieot->nr) 2125 ieot_blocks = ieot->nr - ieot_start; 2126 2127 p->istate = istate; 2128 p->offset = offset; 2129 p->mmap = mmap; 2130 p->ieot = ieot; 2131 p->ieot_start = ieot_start; 2132 p->ieot_blocks = ieot_blocks; 2133 2134 /* create a mem_pool for each thread */ 2135 nr = 0; 2136 for (j = p->ieot_start; j < p->ieot_start + p->ieot_blocks; j++) 2137 nr += p->ieot->entries[j].nr; 2138 p->ce_mem_pool = xmalloc(sizeof(*istate->ce_mem_pool)); 2139 if (istate->version == 4) { 2140 mem_pool_init(p->ce_mem_pool, 2141 estimate_cache_size_from_compressed(nr)); 2142 } else { 2143 mem_pool_init(p->ce_mem_pool, 2144 estimate_cache_size(mmap_size, nr)); 2145 } 2146 2147 err = pthread_create(&p->pthread, NULL, load_cache_entries_thread, p); 2148 if (err) 2149 die(_("unable to create load_cache_entries thread: %s"), strerror(err)); 2150 2151 /* increment by the number of cache entries in the ieot block being processed */ 2152 for (j = 0; j < ieot_blocks; j++) 2153 offset += ieot->entries[ieot_start + j].nr; 2154 ieot_start += ieot_blocks; 2155 } 2156 2157 for (i = 0; i < nr_threads; i++) { 2158 struct load_cache_entries_thread_data *p = &data[i]; 2159 2160 err = pthread_join(p->pthread, NULL); 2161 if (err) 2162 die(_("unable to join load_cache_entries thread: %s"), strerror(err)); 2163 mem_pool_combine(istate->ce_mem_pool, p->ce_mem_pool); 2164 free(p->ce_mem_pool); 2165 consumed += p->consumed; 2166 } 2167 2168 free(data); 2169 2170 return consumed; 2171} 2172 2173static void set_new_index_sparsity(struct index_state *istate) 2174{ 2175 /* 2176 * If the index's repo exists, mark it sparse according to 2177 * repo settings. 2178 */ 2179 prepare_repo_settings(istate->repo); 2180 if (!istate->repo->settings.command_requires_full_index && 2181 is_sparse_index_allowed(istate, 0)) 2182 istate->sparse_index = 1; 2183} 2184 2185/* remember to discard_cache() before reading a different cache! */ 2186int do_read_index(struct index_state *istate, const char *path, int must_exist) 2187{ 2188 int fd; 2189 struct stat st; 2190 unsigned long src_offset; 2191 const struct cache_header *hdr; 2192 const char *mmap; 2193 size_t mmap_size; 2194 struct load_index_extensions p; 2195 size_t extension_offset = 0; 2196 int nr_threads, cpus; 2197 struct index_entry_offset_table *ieot = NULL; 2198 2199 if (istate->initialized) 2200 return istate->cache_nr; 2201 2202 istate->timestamp.sec = 0; 2203 istate->timestamp.nsec = 0; 2204 fd = open(path, O_RDONLY); 2205 if (fd < 0) { 2206 if (!must_exist && errno == ENOENT) { 2207 set_new_index_sparsity(istate); 2208 istate->initialized = 1; 2209 return 0; 2210 } 2211 die_errno(_("%s: index file open failed"), path); 2212 } 2213 2214 if (fstat(fd, &st)) 2215 die_errno(_("%s: cannot stat the open index"), path); 2216 2217 mmap_size = xsize_t(st.st_size); 2218 if (mmap_size < sizeof(struct cache_header) + the_hash_algo->rawsz) 2219 die(_("%s: index file smaller than expected"), path); 2220 2221 mmap = xmmap_gently(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0); 2222 if (mmap == MAP_FAILED) 2223 die_errno(_("%s: unable to map index file%s"), path, 2224 mmap_os_err()); 2225 close(fd); 2226 2227 hdr = (const struct cache_header *)mmap; 2228 if (verify_hdr(hdr, mmap_size) < 0) 2229 goto unmap; 2230 2231 oidread(&istate->oid, (const unsigned char *)hdr + mmap_size - the_hash_algo->rawsz, 2232 the_repository->hash_algo); 2233 istate->version = ntohl(hdr->hdr_version); 2234 istate->cache_nr = ntohl(hdr->hdr_entries); 2235 istate->cache_alloc = alloc_nr(istate->cache_nr); 2236 CALLOC_ARRAY(istate->cache, istate->cache_alloc); 2237 istate->initialized = 1; 2238 2239 p.istate = istate; 2240 p.mmap = mmap; 2241 p.mmap_size = mmap_size; 2242 2243 src_offset = sizeof(*hdr); 2244 2245 if (repo_config_get_index_threads(the_repository, &nr_threads)) 2246 nr_threads = 1; 2247 2248 /* TODO: does creating more threads than cores help? */ 2249 if (!nr_threads) { 2250 nr_threads = istate->cache_nr / THREAD_COST; 2251 cpus = online_cpus(); 2252 if (nr_threads > cpus) 2253 nr_threads = cpus; 2254 } 2255 2256 if (!HAVE_THREADS) 2257 nr_threads = 1; 2258 2259 if (nr_threads > 1) { 2260 extension_offset = read_eoie_extension(mmap, mmap_size); 2261 if (extension_offset) { 2262 int err; 2263 2264 p.src_offset = extension_offset; 2265 err = pthread_create(&p.pthread, NULL, load_index_extensions, &p); 2266 if (err) 2267 die(_("unable to create load_index_extensions thread: %s"), strerror(err)); 2268 2269 nr_threads--; 2270 } 2271 } 2272 2273 /* 2274 * Locate and read the index entry offset table so that we can use it 2275 * to multi-thread the reading of the cache entries. 2276 */ 2277 if (extension_offset && nr_threads > 1) 2278 ieot = read_ieot_extension(mmap, mmap_size, extension_offset); 2279 2280 if (ieot) { 2281 src_offset += load_cache_entries_threaded(istate, mmap, mmap_size, nr_threads, ieot); 2282 free(ieot); 2283 } else { 2284 src_offset += load_all_cache_entries(istate, mmap, mmap_size, src_offset); 2285 } 2286 2287 istate->timestamp.sec = st.st_mtime; 2288 istate->timestamp.nsec = ST_MTIME_NSEC(st); 2289 2290 /* if we created a thread, join it otherwise load the extensions on the primary thread */ 2291 if (extension_offset) { 2292 int ret = pthread_join(p.pthread, NULL); 2293 if (ret) 2294 die(_("unable to join load_index_extensions thread: %s"), strerror(ret)); 2295 } else { 2296 p.src_offset = src_offset; 2297 load_index_extensions(&p); 2298 } 2299 munmap((void *)mmap, mmap_size); 2300 2301 /* 2302 * TODO trace2: replace "the_repository" with the actual repo instance 2303 * that is associated with the given "istate". 2304 */ 2305 trace2_data_intmax("index", the_repository, "read/version", 2306 istate->version); 2307 trace2_data_intmax("index", the_repository, "read/cache_nr", 2308 istate->cache_nr); 2309 2310 /* 2311 * If the command explicitly requires a full index, force it 2312 * to be full. Otherwise, correct the sparsity based on repository 2313 * settings and other properties of the index (if necessary). 2314 */ 2315 prepare_repo_settings(istate->repo); 2316 if (istate->repo->settings.command_requires_full_index) 2317 ensure_full_index(istate); 2318 else 2319 ensure_correct_sparsity(istate); 2320 2321 return istate->cache_nr; 2322 2323unmap: 2324 munmap((void *)mmap, mmap_size); 2325 die(_("index file corrupt")); 2326} 2327 2328/* 2329 * Signal that the shared index is used by updating its mtime. 2330 * 2331 * This way, shared index can be removed if they have not been used 2332 * for some time. 2333 */ 2334static void freshen_shared_index(const char *shared_index, int warn) 2335{ 2336 if (!check_and_freshen_file(shared_index, 1) && warn) 2337 warning(_("could not freshen shared index '%s'"), shared_index); 2338} 2339 2340int read_index_from(struct index_state *istate, const char *path, 2341 const char *gitdir) 2342{ 2343 struct split_index *split_index; 2344 int ret; 2345 char *base_oid_hex; 2346 char *base_path; 2347 2348 /* istate->initialized covers both .git/index and .git/sharedindex.xxx */ 2349 if (istate->initialized) 2350 return istate->cache_nr; 2351 2352 /* 2353 * TODO trace2: replace "the_repository" with the actual repo instance 2354 * that is associated with the given "istate". 2355 */ 2356 trace2_region_enter_printf("index", "do_read_index", the_repository, 2357 "%s", path); 2358 trace_performance_enter(); 2359 ret = do_read_index(istate, path, 0); 2360 trace_performance_leave("read cache %s", path); 2361 trace2_region_leave_printf("index", "do_read_index", the_repository, 2362 "%s", path); 2363 2364 split_index = istate->split_index; 2365 if (!split_index || is_null_oid(&split_index->base_oid)) { 2366 post_read_index_from(istate); 2367 return ret; 2368 } 2369 2370 trace_performance_enter(); 2371 if (split_index->base) 2372 release_index(split_index->base); 2373 else 2374 ALLOC_ARRAY(split_index->base, 1); 2375 index_state_init(split_index->base, istate->repo); 2376 2377 base_oid_hex = oid_to_hex(&split_index->base_oid); 2378 base_path = xstrfmt("%s/sharedindex.%s", gitdir, base_oid_hex); 2379 if (file_exists(base_path)) { 2380 trace2_region_enter_printf("index", "shared/do_read_index", 2381 the_repository, "%s", base_path); 2382 2383 ret = do_read_index(split_index->base, base_path, 0); 2384 trace2_region_leave_printf("index", "shared/do_read_index", 2385 the_repository, "%s", base_path); 2386 } else { 2387 char *path_copy = xstrdup(path); 2388 char *base_path2 = xstrfmt("%s/sharedindex.%s", 2389 dirname(path_copy), base_oid_hex); 2390 free(path_copy); 2391 trace2_region_enter_printf("index", "shared/do_read_index", 2392 the_repository, "%s", base_path2); 2393 ret = do_read_index(split_index->base, base_path2, 1); 2394 trace2_region_leave_printf("index", "shared/do_read_index", 2395 the_repository, "%s", base_path2); 2396 free(base_path2); 2397 } 2398 if (!oideq(&split_index->base_oid, &split_index->base->oid)) 2399 die(_("broken index, expect %s in %s, got %s"), 2400 base_oid_hex, base_path, 2401 oid_to_hex(&split_index->base->oid)); 2402 2403 freshen_shared_index(base_path, 0); 2404 merge_base_index(istate); 2405 post_read_index_from(istate); 2406 trace_performance_leave("read cache %s", base_path); 2407 free(base_path); 2408 return ret; 2409} 2410 2411int is_index_unborn(struct index_state *istate) 2412{ 2413 return (!istate->cache_nr && !istate->timestamp.sec); 2414} 2415 2416void index_state_init(struct index_state *istate, struct repository *r) 2417{ 2418 struct index_state blank = INDEX_STATE_INIT(r); 2419 memcpy(istate, &blank, sizeof(*istate)); 2420} 2421 2422void release_index(struct index_state *istate) 2423{ 2424 /* 2425 * Cache entries in istate->cache[] should have been allocated 2426 * from the memory pool associated with this index, or from an 2427 * associated split_index. There is no need to free individual 2428 * cache entries. validate_cache_entries can detect when this 2429 * assertion does not hold. 2430 */ 2431 validate_cache_entries(istate); 2432 2433 resolve_undo_clear_index(istate); 2434 free_name_hash(istate); 2435 cache_tree_free(&(istate->cache_tree)); 2436 free(istate->fsmonitor_last_update); 2437 free(istate->cache); 2438 discard_split_index(istate); 2439 free_untracked_cache(istate->untracked); 2440 2441 if (istate->sparse_checkout_patterns) { 2442 clear_pattern_list(istate->sparse_checkout_patterns); 2443 FREE_AND_NULL(istate->sparse_checkout_patterns); 2444 } 2445 2446 if (istate->ce_mem_pool) { 2447 mem_pool_discard(istate->ce_mem_pool, should_validate_cache_entries()); 2448 FREE_AND_NULL(istate->ce_mem_pool); 2449 } 2450} 2451 2452void discard_index(struct index_state *istate) 2453{ 2454 release_index(istate); 2455 index_state_init(istate, istate->repo); 2456} 2457 2458/* 2459 * Validate the cache entries of this index. 2460 * All cache entries associated with this index 2461 * should have been allocated by the memory pool 2462 * associated with this index, or by a referenced 2463 * split index. 2464 */ 2465void validate_cache_entries(const struct index_state *istate) 2466{ 2467 int i; 2468 2469 if (!should_validate_cache_entries() ||!istate || !istate->initialized) 2470 return; 2471 2472 for (i = 0; i < istate->cache_nr; i++) { 2473 if (!istate) { 2474 BUG("cache entry is not allocated from expected memory pool"); 2475 } else if (!istate->ce_mem_pool || 2476 !mem_pool_contains(istate->ce_mem_pool, istate->cache[i])) { 2477 if (!istate->split_index || 2478 !istate->split_index->base || 2479 !istate->split_index->base->ce_mem_pool || 2480 !mem_pool_contains(istate->split_index->base->ce_mem_pool, istate->cache[i])) { 2481 BUG("cache entry is not allocated from expected memory pool"); 2482 } 2483 } 2484 } 2485 2486 if (istate->split_index) 2487 validate_cache_entries(istate->split_index->base); 2488} 2489 2490int unmerged_index(const struct index_state *istate) 2491{ 2492 int i; 2493 for (i = 0; i < istate->cache_nr; i++) { 2494 if (ce_stage(istate->cache[i])) 2495 return 1; 2496 } 2497 return 0; 2498} 2499 2500int repo_index_has_changes(struct repository *repo, 2501 struct tree *tree, 2502 struct strbuf *sb) 2503{ 2504 struct index_state *istate = repo->index; 2505 struct object_id cmp; 2506 int i; 2507 2508 if (tree) 2509 cmp = tree->object.oid; 2510 if (tree || !repo_get_oid_tree(repo, "HEAD", &cmp)) { 2511 struct diff_options opt; 2512 2513 repo_diff_setup(repo, &opt); 2514 opt.flags.exit_with_status = 1; 2515 if (!sb) 2516 opt.flags.quick = 1; 2517 diff_setup_done(&opt); 2518 do_diff_cache(&cmp, &opt); 2519 diffcore_std(&opt); 2520 for (i = 0; sb && i < diff_queued_diff.nr; i++) { 2521 if (i) 2522 strbuf_addch(sb, ' '); 2523 strbuf_addstr(sb, diff_queued_diff.queue[i]->two->path); 2524 } 2525 diff_flush(&opt); 2526 return opt.flags.has_changes != 0; 2527 } else { 2528 /* TODO: audit for interaction with sparse-index. */ 2529 ensure_full_index(istate); 2530 for (i = 0; sb && i < istate->cache_nr; i++) { 2531 if (i) 2532 strbuf_addch(sb, ' '); 2533 strbuf_addstr(sb, istate->cache[i]->name); 2534 } 2535 return !!istate->cache_nr; 2536 } 2537} 2538 2539static int write_index_ext_header(struct hashfile *f, 2540 struct git_hash_ctx *eoie_f, 2541 unsigned int ext, 2542 unsigned int sz) 2543{ 2544 hashwrite_be32(f, ext); 2545 hashwrite_be32(f, sz); 2546 2547 if (eoie_f) { 2548 ext = htonl(ext); 2549 sz = htonl(sz); 2550 git_hash_update(eoie_f, &ext, sizeof(ext)); 2551 git_hash_update(eoie_f, &sz, sizeof(sz)); 2552 } 2553 return 0; 2554} 2555 2556static void ce_smudge_racily_clean_entry(struct index_state *istate, 2557 struct cache_entry *ce) 2558{ 2559 /* 2560 * The only thing we care about in this function is to smudge the 2561 * falsely clean entry due to touch-update-touch race, so we leave 2562 * everything else as they are. We are called for entries whose 2563 * ce_stat_data.sd_mtime match the index file mtime. 2564 * 2565 * Note that this actually does not do much for gitlinks, for 2566 * which ce_match_stat_basic() always goes to the actual 2567 * contents. The caller checks with is_racy_timestamp() which 2568 * always says "no" for gitlinks, so we are not called for them ;-) 2569 */ 2570 struct stat st; 2571 2572 if (lstat(ce->name, &st) < 0) 2573 return; 2574 if (ce_match_stat_basic(ce, &st)) 2575 return; 2576 if (ce_modified_check_fs(istate, ce, &st)) { 2577 /* This is "racily clean"; smudge it. Note that this 2578 * is a tricky code. At first glance, it may appear 2579 * that it can break with this sequence: 2580 * 2581 * $ echo xyzzy >frotz 2582 * $ git-update-index --add frotz 2583 * $ : >frotz 2584 * $ sleep 3 2585 * $ echo filfre >nitfol 2586 * $ git-update-index --add nitfol 2587 * 2588 * but it does not. When the second update-index runs, 2589 * it notices that the entry "frotz" has the same timestamp 2590 * as index, and if we were to smudge it by resetting its 2591 * size to zero here, then the object name recorded 2592 * in index is the 6-byte file but the cached stat information 2593 * becomes zero --- which would then match what we would 2594 * obtain from the filesystem next time we stat("frotz"). 2595 * 2596 * However, the second update-index, before calling 2597 * this function, notices that the cached size is 6 2598 * bytes and what is on the filesystem is an empty 2599 * file, and never calls us, so the cached size information 2600 * for "frotz" stays 6 which does not match the filesystem. 2601 */ 2602 ce->ce_stat_data.sd_size = 0; 2603 } 2604} 2605 2606/* Copy miscellaneous fields but not the name */ 2607static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk, 2608 struct cache_entry *ce) 2609{ 2610 short flags; 2611 const unsigned hashsz = the_hash_algo->rawsz; 2612 uint16_t *flagsp = (uint16_t *)(ondisk->data + hashsz); 2613 2614 ondisk->ctime.sec = htonl(ce->ce_stat_data.sd_ctime.sec); 2615 ondisk->mtime.sec = htonl(ce->ce_stat_data.sd_mtime.sec); 2616 ondisk->ctime.nsec = htonl(ce->ce_stat_data.sd_ctime.nsec); 2617 ondisk->mtime.nsec = htonl(ce->ce_stat_data.sd_mtime.nsec); 2618 ondisk->dev = htonl(ce->ce_stat_data.sd_dev); 2619 ondisk->ino = htonl(ce->ce_stat_data.sd_ino); 2620 ondisk->mode = htonl(ce->ce_mode); 2621 ondisk->uid = htonl(ce->ce_stat_data.sd_uid); 2622 ondisk->gid = htonl(ce->ce_stat_data.sd_gid); 2623 ondisk->size = htonl(ce->ce_stat_data.sd_size); 2624 hashcpy(ondisk->data, ce->oid.hash, the_repository->hash_algo); 2625 2626 flags = ce->ce_flags & ~CE_NAMEMASK; 2627 flags |= (ce_namelen(ce) >= CE_NAMEMASK ? CE_NAMEMASK : ce_namelen(ce)); 2628 flagsp[0] = htons(flags); 2629 if (ce->ce_flags & CE_EXTENDED) { 2630 flagsp[1] = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16); 2631 } 2632} 2633 2634static int ce_write_entry(struct hashfile *f, struct cache_entry *ce, 2635 struct strbuf *previous_name, struct ondisk_cache_entry *ondisk) 2636{ 2637 int size; 2638 unsigned int saved_namelen; 2639 int stripped_name = 0; 2640 static unsigned char padding[8] = { 0x00 }; 2641 2642 if (ce->ce_flags & CE_STRIP_NAME) { 2643 saved_namelen = ce_namelen(ce); 2644 ce->ce_namelen = 0; 2645 stripped_name = 1; 2646 } 2647 2648 size = offsetof(struct ondisk_cache_entry,data) + ondisk_data_size(ce->ce_flags, 0); 2649 2650 if (!previous_name) { 2651 int len = ce_namelen(ce); 2652 copy_cache_entry_to_ondisk(ondisk, ce); 2653 hashwrite(f, ondisk, size); 2654 hashwrite(f, ce->name, len); 2655 hashwrite(f, padding, align_padding_size(size, len)); 2656 } else { 2657 int common, to_remove; 2658 uint8_t prefix_size; 2659 unsigned char to_remove_vi[16]; 2660 2661 for (common = 0; 2662 (common < previous_name->len && 2663 ce->name[common] && 2664 ce->name[common] == previous_name->buf[common]); 2665 common++) 2666 ; /* still matching */ 2667 to_remove = previous_name->len - common; 2668 prefix_size = encode_varint(to_remove, to_remove_vi); 2669 2670 copy_cache_entry_to_ondisk(ondisk, ce); 2671 hashwrite(f, ondisk, size); 2672 hashwrite(f, to_remove_vi, prefix_size); 2673 hashwrite(f, ce->name + common, ce_namelen(ce) - common); 2674 hashwrite(f, padding, 1); 2675 2676 strbuf_splice(previous_name, common, to_remove, 2677 ce->name + common, ce_namelen(ce) - common); 2678 } 2679 if (stripped_name) { 2680 ce->ce_namelen = saved_namelen; 2681 ce->ce_flags &= ~CE_STRIP_NAME; 2682 } 2683 2684 return 0; 2685} 2686 2687/* 2688 * This function verifies if index_state has the correct sha1 of the 2689 * index file. Don't die if we have any other failure, just return 0. 2690 */ 2691static int verify_index_from(const struct index_state *istate, const char *path) 2692{ 2693 int fd; 2694 ssize_t n; 2695 struct stat st; 2696 unsigned char hash[GIT_MAX_RAWSZ]; 2697 2698 if (!istate->initialized) 2699 return 0; 2700 2701 fd = open(path, O_RDONLY); 2702 if (fd < 0) 2703 return 0; 2704 2705 if (fstat(fd, &st)) 2706 goto out; 2707 2708 if (st.st_size < sizeof(struct cache_header) + the_hash_algo->rawsz) 2709 goto out; 2710 2711 n = pread_in_full(fd, hash, the_hash_algo->rawsz, st.st_size - the_hash_algo->rawsz); 2712 if (n != the_hash_algo->rawsz) 2713 goto out; 2714 2715 if (!hasheq(istate->oid.hash, hash, the_repository->hash_algo)) 2716 goto out; 2717 2718 close(fd); 2719 return 1; 2720 2721out: 2722 close(fd); 2723 return 0; 2724} 2725 2726static int repo_verify_index(struct repository *repo) 2727{ 2728 return verify_index_from(repo->index, repo->index_file); 2729} 2730 2731int has_racy_timestamp(struct index_state *istate) 2732{ 2733 int entries = istate->cache_nr; 2734 int i; 2735 2736 for (i = 0; i < entries; i++) { 2737 struct cache_entry *ce = istate->cache[i]; 2738 if (is_racy_timestamp(istate, ce)) 2739 return 1; 2740 } 2741 return 0; 2742} 2743 2744void repo_update_index_if_able(struct repository *repo, 2745 struct lock_file *lockfile) 2746{ 2747 if ((repo->index->cache_changed || 2748 has_racy_timestamp(repo->index)) && 2749 repo_verify_index(repo)) 2750 write_locked_index(repo->index, lockfile, COMMIT_LOCK); 2751 else 2752 rollback_lock_file(lockfile); 2753} 2754 2755static int record_eoie(void) 2756{ 2757 int val; 2758 2759 if (!repo_config_get_bool(the_repository, "index.recordendofindexentries", &val)) 2760 return val; 2761 2762 /* 2763 * As a convenience, the end of index entries extension 2764 * used for threading is written by default if the user 2765 * explicitly requested threaded index reads. 2766 */ 2767 return !repo_config_get_index_threads(the_repository, &val) && val != 1; 2768} 2769 2770static int record_ieot(void) 2771{ 2772 int val; 2773 2774 if (!repo_config_get_bool(the_repository, "index.recordoffsettable", &val)) 2775 return val; 2776 2777 /* 2778 * As a convenience, the offset table used for threading is 2779 * written by default if the user explicitly requested 2780 * threaded index reads. 2781 */ 2782 return !repo_config_get_index_threads(the_repository, &val) && val != 1; 2783} 2784 2785enum write_extensions { 2786 WRITE_NO_EXTENSION = 0, 2787 WRITE_SPLIT_INDEX_EXTENSION = 1<<0, 2788 WRITE_CACHE_TREE_EXTENSION = 1<<1, 2789 WRITE_RESOLVE_UNDO_EXTENSION = 1<<2, 2790 WRITE_UNTRACKED_CACHE_EXTENSION = 1<<3, 2791 WRITE_FSMONITOR_EXTENSION = 1<<4, 2792}; 2793#define WRITE_ALL_EXTENSIONS ((enum write_extensions)-1) 2794 2795/* 2796 * On success, `tempfile` is closed. If it is the temporary file 2797 * of a `struct lock_file`, we will therefore effectively perform 2798 * a 'close_lock_file_gently()`. Since that is an implementation 2799 * detail of lockfiles, callers of `do_write_index()` should not 2800 * rely on it. 2801 */ 2802static int do_write_index(struct index_state *istate, struct tempfile *tempfile, 2803 enum write_extensions write_extensions, unsigned flags) 2804{ 2805 uint64_t start = getnanotime(); 2806 struct hashfile *f; 2807 struct git_hash_ctx *eoie_c = NULL; 2808 struct cache_header hdr; 2809 int i, err = 0, removed, extended, hdr_version; 2810 struct cache_entry **cache = istate->cache; 2811 int entries = istate->cache_nr; 2812 struct stat st; 2813 struct ondisk_cache_entry ondisk; 2814 struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; 2815 int drop_cache_tree = istate->drop_cache_tree; 2816 off_t offset; 2817 int csum_fsync_flag; 2818 int ieot_entries = 1; 2819 struct index_entry_offset_table *ieot = NULL; 2820 struct repository *r = istate->repo; 2821 struct strbuf sb = STRBUF_INIT; 2822 int nr, nr_threads, ret; 2823 2824 f = hashfd(the_repository->hash_algo, tempfile->fd, tempfile->filename.buf); 2825 2826 prepare_repo_settings(r); 2827 f->skip_hash = r->settings.index_skip_hash; 2828 2829 for (i = removed = extended = 0; i < entries; i++) { 2830 if (cache[i]->ce_flags & CE_REMOVE) 2831 removed++; 2832 2833 /* reduce extended entries if possible */ 2834 cache[i]->ce_flags &= ~CE_EXTENDED; 2835 if (cache[i]->ce_flags & CE_EXTENDED_FLAGS) { 2836 extended++; 2837 cache[i]->ce_flags |= CE_EXTENDED; 2838 } 2839 } 2840 2841 if (!istate->version) 2842 istate->version = get_index_format_default(r); 2843 2844 /* demote version 3 to version 2 when the latter suffices */ 2845 if (istate->version == 3 || istate->version == 2) 2846 istate->version = extended ? 3 : 2; 2847 2848 hdr_version = istate->version; 2849 2850 hdr.hdr_signature = htonl(CACHE_SIGNATURE); 2851 hdr.hdr_version = htonl(hdr_version); 2852 hdr.hdr_entries = htonl(entries - removed); 2853 2854 hashwrite(f, &hdr, sizeof(hdr)); 2855 2856 if (!HAVE_THREADS || repo_config_get_index_threads(the_repository, &nr_threads)) 2857 nr_threads = 1; 2858 2859 if (nr_threads != 1 && record_ieot()) { 2860 int ieot_blocks, cpus; 2861 2862 /* 2863 * ensure default number of ieot blocks maps evenly to the 2864 * default number of threads that will process them leaving 2865 * room for the thread to load the index extensions. 2866 */ 2867 if (!nr_threads) { 2868 ieot_blocks = istate->cache_nr / THREAD_COST; 2869 cpus = online_cpus(); 2870 if (ieot_blocks > cpus - 1) 2871 ieot_blocks = cpus - 1; 2872 } else { 2873 ieot_blocks = nr_threads; 2874 if (ieot_blocks > istate->cache_nr) 2875 ieot_blocks = istate->cache_nr; 2876 } 2877 2878 /* 2879 * no reason to write out the IEOT extension if we don't 2880 * have enough blocks to utilize multi-threading 2881 */ 2882 if (ieot_blocks > 1) { 2883 ieot = xcalloc(1, sizeof(struct index_entry_offset_table) 2884 + (ieot_blocks * sizeof(struct index_entry_offset))); 2885 ieot_entries = DIV_ROUND_UP(entries, ieot_blocks); 2886 } 2887 } 2888 2889 offset = hashfile_total(f); 2890 2891 nr = 0; 2892 previous_name = (hdr_version == 4) ? &previous_name_buf : NULL; 2893 2894 for (i = 0; i < entries; i++) { 2895 struct cache_entry *ce = cache[i]; 2896 if (ce->ce_flags & CE_REMOVE) 2897 continue; 2898 if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce)) 2899 ce_smudge_racily_clean_entry(istate, ce); 2900 if (is_null_oid(&ce->oid)) { 2901 static const char msg[] = "cache entry has null sha1: %s"; 2902 static int allow = -1; 2903 2904 if (allow < 0) 2905 allow = git_env_bool("GIT_ALLOW_NULL_SHA1", 0); 2906 if (allow) 2907 warning(msg, ce->name); 2908 else 2909 err = error(msg, ce->name); 2910 2911 drop_cache_tree = 1; 2912 } 2913 if (ieot && i && (i % ieot_entries == 0)) { 2914 ieot->entries[ieot->nr].nr = nr; 2915 ieot->entries[ieot->nr].offset = offset; 2916 ieot->nr++; 2917 /* 2918 * If we have a V4 index, set the first byte to an invalid 2919 * character to ensure there is nothing common with the previous 2920 * entry 2921 */ 2922 if (previous_name) 2923 previous_name->buf[0] = 0; 2924 nr = 0; 2925 2926 offset = hashfile_total(f); 2927 } 2928 if (ce_write_entry(f, ce, previous_name, (struct ondisk_cache_entry *)&ondisk) < 0) 2929 err = -1; 2930 2931 if (err) 2932 break; 2933 nr++; 2934 } 2935 if (ieot && nr) { 2936 ieot->entries[ieot->nr].nr = nr; 2937 ieot->entries[ieot->nr].offset = offset; 2938 ieot->nr++; 2939 } 2940 strbuf_release(&previous_name_buf); 2941 2942 if (err) { 2943 ret = err; 2944 goto out; 2945 } 2946 2947 offset = hashfile_total(f); 2948 2949 /* 2950 * The extension headers must be hashed on their own for the 2951 * EOIE extension. Create a hashfile here to compute that hash. 2952 */ 2953 if (offset && record_eoie()) { 2954 CALLOC_ARRAY(eoie_c, 1); 2955 the_hash_algo->init_fn(eoie_c); 2956 } 2957 2958 /* 2959 * Lets write out CACHE_EXT_INDEXENTRYOFFSETTABLE first so that we 2960 * can minimize the number of extensions we have to scan through to 2961 * find it during load. Write it out regardless of the 2962 * strip_extensions parameter as we need it when loading the shared 2963 * index. 2964 */ 2965 if (ieot) { 2966 strbuf_reset(&sb); 2967 2968 write_ieot_extension(&sb, ieot); 2969 err = write_index_ext_header(f, eoie_c, CACHE_EXT_INDEXENTRYOFFSETTABLE, sb.len) < 0; 2970 hashwrite(f, sb.buf, sb.len); 2971 if (err) { 2972 ret = -1; 2973 goto out; 2974 } 2975 } 2976 2977 if (write_extensions & WRITE_SPLIT_INDEX_EXTENSION && 2978 istate->split_index) { 2979 strbuf_reset(&sb); 2980 2981 if (istate->sparse_index) 2982 die(_("cannot write split index for a sparse index")); 2983 2984 err = write_link_extension(&sb, istate) < 0 || 2985 write_index_ext_header(f, eoie_c, CACHE_EXT_LINK, 2986 sb.len) < 0; 2987 hashwrite(f, sb.buf, sb.len); 2988 if (err) { 2989 ret = -1; 2990 goto out; 2991 } 2992 } 2993 if (write_extensions & WRITE_CACHE_TREE_EXTENSION && 2994 !drop_cache_tree && istate->cache_tree) { 2995 strbuf_reset(&sb); 2996 2997 cache_tree_write(&sb, istate->cache_tree); 2998 err = write_index_ext_header(f, eoie_c, CACHE_EXT_TREE, sb.len) < 0; 2999 hashwrite(f, sb.buf, sb.len); 3000 if (err) { 3001 ret = -1; 3002 goto out; 3003 } 3004 } 3005 if (write_extensions & WRITE_RESOLVE_UNDO_EXTENSION && 3006 istate->resolve_undo) { 3007 strbuf_reset(&sb); 3008 3009 resolve_undo_write(&sb, istate->resolve_undo, the_hash_algo); 3010 err = write_index_ext_header(f, eoie_c, CACHE_EXT_RESOLVE_UNDO, 3011 sb.len) < 0; 3012 hashwrite(f, sb.buf, sb.len); 3013 if (err) { 3014 ret = -1; 3015 goto out; 3016 } 3017 } 3018 if (write_extensions & WRITE_UNTRACKED_CACHE_EXTENSION && 3019 istate->untracked) { 3020 strbuf_reset(&sb); 3021 3022 write_untracked_extension(&sb, istate->untracked); 3023 err = write_index_ext_header(f, eoie_c, CACHE_EXT_UNTRACKED, 3024 sb.len) < 0; 3025 hashwrite(f, sb.buf, sb.len); 3026 if (err) { 3027 ret = -1; 3028 goto out; 3029 } 3030 } 3031 if (write_extensions & WRITE_FSMONITOR_EXTENSION && 3032 istate->fsmonitor_last_update) { 3033 strbuf_reset(&sb); 3034 3035 write_fsmonitor_extension(&sb, istate); 3036 err = write_index_ext_header(f, eoie_c, CACHE_EXT_FSMONITOR, sb.len) < 0; 3037 hashwrite(f, sb.buf, sb.len); 3038 if (err) { 3039 ret = -1; 3040 goto out; 3041 } 3042 } 3043 if (istate->sparse_index) { 3044 if (write_index_ext_header(f, eoie_c, CACHE_EXT_SPARSE_DIRECTORIES, 0) < 0) { 3045 ret = -1; 3046 goto out; 3047 } 3048 } 3049 3050 /* 3051 * CACHE_EXT_ENDOFINDEXENTRIES must be written as the last entry before the SHA1 3052 * so that it can be found and processed before all the index entries are 3053 * read. Write it out regardless of the strip_extensions parameter as we need it 3054 * when loading the shared index. 3055 */ 3056 if (eoie_c) { 3057 strbuf_reset(&sb); 3058 3059 write_eoie_extension(&sb, eoie_c, offset); 3060 err = write_index_ext_header(f, NULL, CACHE_EXT_ENDOFINDEXENTRIES, sb.len) < 0; 3061 hashwrite(f, sb.buf, sb.len); 3062 if (err) { 3063 ret = -1; 3064 goto out; 3065 } 3066 } 3067 3068 csum_fsync_flag = 0; 3069 if (!alternate_index_output && (flags & COMMIT_LOCK)) 3070 csum_fsync_flag = CSUM_FSYNC; 3071 3072 finalize_hashfile(f, istate->oid.hash, FSYNC_COMPONENT_INDEX, 3073 CSUM_HASH_IN_STREAM | csum_fsync_flag); 3074 f = NULL; 3075 3076 if (close_tempfile_gently(tempfile)) { 3077 ret = error(_("could not close '%s'"), get_tempfile_path(tempfile)); 3078 goto out; 3079 } 3080 if (stat(get_tempfile_path(tempfile), &st)) { 3081 ret = -1; 3082 goto out; 3083 } 3084 istate->timestamp.sec = (unsigned int)st.st_mtime; 3085 istate->timestamp.nsec = ST_MTIME_NSEC(st); 3086 trace_performance_since(start, "write index, changed mask = %x", istate->cache_changed); 3087 3088 /* 3089 * TODO trace2: replace "the_repository" with the actual repo instance 3090 * that is associated with the given "istate". 3091 */ 3092 trace2_data_intmax("index", the_repository, "write/version", 3093 istate->version); 3094 trace2_data_intmax("index", the_repository, "write/cache_nr", 3095 istate->cache_nr); 3096 3097 ret = 0; 3098 3099out: 3100 if (f) 3101 free_hashfile(f); 3102 strbuf_release(&sb); 3103 free(eoie_c); 3104 free(ieot); 3105 return ret; 3106} 3107 3108void set_alternate_index_output(const char *name) 3109{ 3110 alternate_index_output = name; 3111} 3112 3113static int commit_locked_index(struct lock_file *lk) 3114{ 3115 if (alternate_index_output) 3116 return commit_lock_file_to(lk, alternate_index_output); 3117 else 3118 return commit_lock_file(lk); 3119} 3120 3121static int do_write_locked_index(struct index_state *istate, 3122 struct lock_file *lock, 3123 unsigned flags, 3124 enum write_extensions write_extensions) 3125{ 3126 int ret; 3127 int was_full = istate->sparse_index == INDEX_EXPANDED; 3128 3129 ret = convert_to_sparse(istate, 0); 3130 3131 if (ret) { 3132 warning(_("failed to convert to a sparse-index")); 3133 return ret; 3134 } 3135 3136 /* 3137 * TODO trace2: replace "the_repository" with the actual repo instance 3138 * that is associated with the given "istate". 3139 */ 3140 trace2_region_enter_printf("index", "do_write_index", the_repository, 3141 "%s", get_lock_file_path(lock)); 3142 ret = do_write_index(istate, lock->tempfile, write_extensions, flags); 3143 trace2_region_leave_printf("index", "do_write_index", the_repository, 3144 "%s", get_lock_file_path(lock)); 3145 3146 if (was_full) 3147 ensure_full_index(istate); 3148 3149 if (ret) 3150 return ret; 3151 if (flags & COMMIT_LOCK) 3152 ret = commit_locked_index(lock); 3153 else 3154 ret = close_lock_file_gently(lock); 3155 3156 run_hooks_l(the_repository, "post-index-change", 3157 istate->updated_workdir ? "1" : "0", 3158 istate->updated_skipworktree ? "1" : "0", NULL); 3159 istate->updated_workdir = 0; 3160 istate->updated_skipworktree = 0; 3161 3162 return ret; 3163} 3164 3165static int write_split_index(struct index_state *istate, 3166 struct lock_file *lock, 3167 unsigned flags) 3168{ 3169 int ret; 3170 prepare_to_write_split_index(istate); 3171 ret = do_write_locked_index(istate, lock, flags, WRITE_ALL_EXTENSIONS); 3172 finish_writing_split_index(istate); 3173 return ret; 3174} 3175 3176static unsigned long get_shared_index_expire_date(void) 3177{ 3178 static unsigned long shared_index_expire_date; 3179 static int shared_index_expire_date_prepared; 3180 3181 if (!shared_index_expire_date_prepared) { 3182 const char *shared_index_expire = "2.weeks.ago"; 3183 char *value = NULL; 3184 3185 repo_config_get_expiry(the_repository, "splitindex.sharedindexexpire", 3186 &value); 3187 if (value) 3188 shared_index_expire = value; 3189 3190 shared_index_expire_date = approxidate(shared_index_expire); 3191 shared_index_expire_date_prepared = 1; 3192 3193 free(value); 3194 } 3195 3196 return shared_index_expire_date; 3197} 3198 3199static int should_delete_shared_index(const char *shared_index_path) 3200{ 3201 struct stat st; 3202 unsigned long expiration; 3203 3204 /* Check timestamp */ 3205 expiration = get_shared_index_expire_date(); 3206 if (!expiration) 3207 return 0; 3208 if (stat(shared_index_path, &st)) 3209 return error_errno(_("could not stat '%s'"), shared_index_path); 3210 if (st.st_mtime > expiration) 3211 return 0; 3212 3213 return 1; 3214} 3215 3216static int clean_shared_index_files(const char *current_hex) 3217{ 3218 struct dirent *de; 3219 DIR *dir = opendir(repo_get_git_dir(the_repository)); 3220 3221 if (!dir) 3222 return error_errno(_("unable to open git dir: %s"), 3223 repo_get_git_dir(the_repository)); 3224 3225 while ((de = readdir(dir)) != NULL) { 3226 const char *sha1_hex; 3227 char *shared_index_path; 3228 if (!skip_prefix(de->d_name, "sharedindex.", &sha1_hex)) 3229 continue; 3230 if (!strcmp(sha1_hex, current_hex)) 3231 continue; 3232 3233 shared_index_path = repo_git_path(the_repository, "%s", de->d_name); 3234 if (should_delete_shared_index(shared_index_path) > 0 && 3235 unlink(shared_index_path)) 3236 warning_errno(_("unable to unlink: %s"), shared_index_path); 3237 3238 free(shared_index_path); 3239 } 3240 closedir(dir); 3241 3242 return 0; 3243} 3244 3245static int write_shared_index(struct index_state *istate, 3246 struct tempfile **temp, unsigned flags) 3247{ 3248 struct split_index *si = istate->split_index; 3249 int ret, was_full = !istate->sparse_index; 3250 char *path; 3251 3252 move_cache_to_base_index(istate); 3253 convert_to_sparse(istate, 0); 3254 3255 trace2_region_enter_printf("index", "shared/do_write_index", 3256 the_repository, "%s", get_tempfile_path(*temp)); 3257 ret = do_write_index(si->base, *temp, WRITE_NO_EXTENSION, flags); 3258 trace2_region_leave_printf("index", "shared/do_write_index", 3259 the_repository, "%s", get_tempfile_path(*temp)); 3260 3261 if (was_full) 3262 ensure_full_index(istate); 3263 3264 if (ret) 3265 return ret; 3266 ret = adjust_shared_perm(the_repository, get_tempfile_path(*temp)); 3267 if (ret) { 3268 error(_("cannot fix permission bits on '%s'"), get_tempfile_path(*temp)); 3269 return ret; 3270 } 3271 3272 path = repo_git_path(the_repository, "sharedindex.%s", oid_to_hex(&si->base->oid)); 3273 ret = rename_tempfile(temp, path); 3274 if (!ret) { 3275 oidcpy(&si->base_oid, &si->base->oid); 3276 clean_shared_index_files(oid_to_hex(&si->base->oid)); 3277 } 3278 3279 free(path); 3280 return ret; 3281} 3282 3283static const int default_max_percent_split_change = 20; 3284 3285static int too_many_not_shared_entries(struct index_state *istate) 3286{ 3287 int i, not_shared = 0; 3288 int max_split = repo_config_get_max_percent_split_change(the_repository); 3289 3290 switch (max_split) { 3291 case -1: 3292 /* not or badly configured: use the default value */ 3293 max_split = default_max_percent_split_change; 3294 break; 3295 case 0: 3296 return 1; /* 0% means always write a new shared index */ 3297 case 100: 3298 return 0; /* 100% means never write a new shared index */ 3299 default: 3300 break; /* just use the configured value */ 3301 } 3302 3303 /* Count not shared entries */ 3304 for (i = 0; i < istate->cache_nr; i++) { 3305 struct cache_entry *ce = istate->cache[i]; 3306 if (!ce->index) 3307 not_shared++; 3308 } 3309 3310 return (int64_t)istate->cache_nr * max_split < (int64_t)not_shared * 100; 3311} 3312 3313int write_locked_index(struct index_state *istate, struct lock_file *lock, 3314 unsigned flags) 3315{ 3316 int new_shared_index, ret, test_split_index_env; 3317 struct split_index *si = istate->split_index; 3318 3319 if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0) && 3320 cache_tree_verify(the_repository, istate) < 0) 3321 return -1; 3322 3323 if ((flags & SKIP_IF_UNCHANGED) && !istate->cache_changed) { 3324 if (flags & COMMIT_LOCK) 3325 rollback_lock_file(lock); 3326 return 0; 3327 } 3328 3329 if (istate->fsmonitor_last_update) 3330 fill_fsmonitor_bitmap(istate); 3331 3332 test_split_index_env = git_env_bool("GIT_TEST_SPLIT_INDEX", 0); 3333 3334 if ((!si && !test_split_index_env) || 3335 alternate_index_output || 3336 (istate->cache_changed & ~EXTMASK)) { 3337 ret = do_write_locked_index(istate, lock, flags, 3338 ~WRITE_SPLIT_INDEX_EXTENSION); 3339 goto out; 3340 } 3341 3342 if (test_split_index_env) { 3343 if (!si) { 3344 si = init_split_index(istate); 3345 istate->cache_changed |= SPLIT_INDEX_ORDERED; 3346 } else { 3347 int v = si->base_oid.hash[0]; 3348 if ((v & 15) < 6) 3349 istate->cache_changed |= SPLIT_INDEX_ORDERED; 3350 } 3351 } 3352 if (too_many_not_shared_entries(istate)) 3353 istate->cache_changed |= SPLIT_INDEX_ORDERED; 3354 3355 new_shared_index = istate->cache_changed & SPLIT_INDEX_ORDERED; 3356 3357 if (new_shared_index) { 3358 struct tempfile *temp; 3359 int saved_errno; 3360 char *path; 3361 3362 /* Same initial permissions as the main .git/index file */ 3363 path = repo_git_path(the_repository, "sharedindex_XXXXXX"); 3364 temp = mks_tempfile_sm(path, 0, 0666); 3365 free(path); 3366 if (!temp) { 3367 ret = do_write_locked_index(istate, lock, flags, 3368 ~WRITE_SPLIT_INDEX_EXTENSION); 3369 goto out; 3370 } 3371 ret = write_shared_index(istate, &temp, flags); 3372 3373 saved_errno = errno; 3374 if (is_tempfile_active(temp)) 3375 delete_tempfile(&temp); 3376 errno = saved_errno; 3377 3378 if (ret) 3379 goto out; 3380 } 3381 3382 ret = write_split_index(istate, lock, flags); 3383 3384 /* Freshen the shared index only if the split-index was written */ 3385 if (!ret && !new_shared_index && !is_null_oid(&si->base_oid)) { 3386 char *shared_index = repo_git_path(the_repository, "sharedindex.%s", 3387 oid_to_hex(&si->base_oid)); 3388 freshen_shared_index(shared_index, 1); 3389 free(shared_index); 3390 } 3391 3392out: 3393 if (flags & COMMIT_LOCK) 3394 rollback_lock_file(lock); 3395 return ret; 3396} 3397 3398/* 3399 * Read the index file that is potentially unmerged into given 3400 * index_state, dropping any unmerged entries to stage #0 (potentially 3401 * resulting in a path appearing as both a file and a directory in the 3402 * index; the caller is responsible to clear out the extra entries 3403 * before writing the index to a tree). Returns true if the index is 3404 * unmerged. Callers who want to refuse to work from an unmerged 3405 * state can call this and check its return value, instead of calling 3406 * read_cache(). 3407 */ 3408int repo_read_index_unmerged(struct repository *repo) 3409{ 3410 struct index_state *istate; 3411 int i; 3412 int unmerged = 0; 3413 3414 repo_read_index(repo); 3415 istate = repo->index; 3416 for (i = 0; i < istate->cache_nr; i++) { 3417 struct cache_entry *ce = istate->cache[i]; 3418 struct cache_entry *new_ce; 3419 int len; 3420 3421 if (!ce_stage(ce)) 3422 continue; 3423 unmerged = 1; 3424 len = ce_namelen(ce); 3425 new_ce = make_empty_cache_entry(istate, len); 3426 memcpy(new_ce->name, ce->name, len); 3427 new_ce->ce_flags = create_ce_flags(0) | CE_CONFLICTED; 3428 new_ce->ce_namelen = len; 3429 new_ce->ce_mode = ce->ce_mode; 3430 if (add_index_entry(istate, new_ce, ADD_CACHE_SKIP_DFCHECK)) 3431 return error(_("%s: cannot drop to stage #0"), 3432 new_ce->name); 3433 } 3434 return unmerged; 3435} 3436 3437/* 3438 * Returns 1 if the path is an "other" path with respect to 3439 * the index; that is, the path is not mentioned in the index at all, 3440 * either as a file, a directory with some files in the index, 3441 * or as an unmerged entry. 3442 * 3443 * We helpfully remove a trailing "/" from directories so that 3444 * the output of read_directory can be used as-is. 3445 */ 3446int index_name_is_other(struct index_state *istate, const char *name, 3447 int namelen) 3448{ 3449 int pos; 3450 if (namelen && name[namelen - 1] == '/') 3451 namelen--; 3452 pos = index_name_pos(istate, name, namelen); 3453 if (0 <= pos) 3454 return 0; /* exact match */ 3455 pos = -pos - 1; 3456 if (pos < istate->cache_nr) { 3457 struct cache_entry *ce = istate->cache[pos]; 3458 if (ce_namelen(ce) == namelen && 3459 !memcmp(ce->name, name, namelen)) 3460 return 0; /* Yup, this one exists unmerged */ 3461 } 3462 return 1; 3463} 3464 3465void *read_blob_data_from_index(struct index_state *istate, 3466 const char *path, unsigned long *size) 3467{ 3468 int pos, len; 3469 unsigned long sz; 3470 enum object_type type; 3471 void *data; 3472 3473 len = strlen(path); 3474 pos = index_name_pos(istate, path, len); 3475 if (pos < 0) { 3476 /* 3477 * We might be in the middle of a merge, in which 3478 * case we would read stage #2 (ours). 3479 */ 3480 int i; 3481 for (i = -pos - 1; 3482 (pos < 0 && i < istate->cache_nr && 3483 !strcmp(istate->cache[i]->name, path)); 3484 i++) 3485 if (ce_stage(istate->cache[i]) == 2) 3486 pos = i; 3487 } 3488 if (pos < 0) 3489 return NULL; 3490 data = odb_read_object(the_repository->objects, &istate->cache[pos]->oid, 3491 &type, &sz); 3492 if (!data || type != OBJ_BLOB) { 3493 free(data); 3494 return NULL; 3495 } 3496 if (size) 3497 *size = sz; 3498 return data; 3499} 3500 3501void move_index_extensions(struct index_state *dst, struct index_state *src) 3502{ 3503 dst->untracked = src->untracked; 3504 src->untracked = NULL; 3505 dst->cache_tree = src->cache_tree; 3506 src->cache_tree = NULL; 3507} 3508 3509struct cache_entry *dup_cache_entry(const struct cache_entry *ce, 3510 struct index_state *istate) 3511{ 3512 unsigned int size = ce_size(ce); 3513 int mem_pool_allocated; 3514 struct cache_entry *new_entry = make_empty_cache_entry(istate, ce_namelen(ce)); 3515 mem_pool_allocated = new_entry->mem_pool_allocated; 3516 3517 memcpy(new_entry, ce, size); 3518 new_entry->mem_pool_allocated = mem_pool_allocated; 3519 return new_entry; 3520} 3521 3522void discard_cache_entry(struct cache_entry *ce) 3523{ 3524 if (ce && should_validate_cache_entries()) 3525 memset(ce, 0xCD, cache_entry_size(ce->ce_namelen)); 3526 3527 if (ce && ce->mem_pool_allocated) 3528 return; 3529 3530 free(ce); 3531} 3532 3533int should_validate_cache_entries(void) 3534{ 3535 static int validate_index_cache_entries = -1; 3536 3537 if (validate_index_cache_entries < 0) { 3538 if (getenv("GIT_TEST_VALIDATE_INDEX_CACHE_ENTRIES")) 3539 validate_index_cache_entries = 1; 3540 else 3541 validate_index_cache_entries = 0; 3542 } 3543 3544 return validate_index_cache_entries; 3545} 3546 3547#define EOIE_SIZE (4 + GIT_SHA1_RAWSZ) /* <4-byte offset> + <20-byte hash> */ 3548#define EOIE_SIZE_WITH_HEADER (4 + 4 + EOIE_SIZE) /* <4-byte signature> + <4-byte length> + EOIE_SIZE */ 3549 3550static size_t read_eoie_extension(const char *mmap, size_t mmap_size) 3551{ 3552 /* 3553 * The end of index entries (EOIE) extension is guaranteed to be last 3554 * so that it can be found by scanning backwards from the EOF. 3555 * 3556 * "EOIE" 3557 * <4-byte length> 3558 * <4-byte offset> 3559 * <20-byte hash> 3560 */ 3561 const char *index, *eoie; 3562 uint32_t extsize; 3563 size_t offset, src_offset; 3564 unsigned char hash[GIT_MAX_RAWSZ]; 3565 struct git_hash_ctx c; 3566 3567 /* ensure we have an index big enough to contain an EOIE extension */ 3568 if (mmap_size < sizeof(struct cache_header) + EOIE_SIZE_WITH_HEADER + the_hash_algo->rawsz) 3569 return 0; 3570 3571 /* validate the extension signature */ 3572 index = eoie = mmap + mmap_size - EOIE_SIZE_WITH_HEADER - the_hash_algo->rawsz; 3573 if (CACHE_EXT(index) != CACHE_EXT_ENDOFINDEXENTRIES) 3574 return 0; 3575 index += sizeof(uint32_t); 3576 3577 /* validate the extension size */ 3578 extsize = get_be32(index); 3579 if (extsize != EOIE_SIZE) 3580 return 0; 3581 index += sizeof(uint32_t); 3582 3583 /* 3584 * Validate the offset we're going to look for the first extension 3585 * signature is after the index header and before the eoie extension. 3586 */ 3587 offset = get_be32(index); 3588 if (mmap + offset < mmap + sizeof(struct cache_header)) 3589 return 0; 3590 if (mmap + offset >= eoie) 3591 return 0; 3592 index += sizeof(uint32_t); 3593 3594 /* 3595 * The hash is computed over extension types and their sizes (but not 3596 * their contents). E.g. if we have "TREE" extension that is N-bytes 3597 * long, "REUC" extension that is M-bytes long, followed by "EOIE", 3598 * then the hash would be: 3599 * 3600 * SHA-1("TREE" + <binary representation of N> + 3601 * "REUC" + <binary representation of M>) 3602 */ 3603 src_offset = offset; 3604 the_hash_algo->init_fn(&c); 3605 while (src_offset < mmap_size - the_hash_algo->rawsz - EOIE_SIZE_WITH_HEADER) { 3606 /* After an array of active_nr index entries, 3607 * there can be arbitrary number of extended 3608 * sections, each of which is prefixed with 3609 * extension name (4-byte) and section length 3610 * in 4-byte network byte order. 3611 */ 3612 uint32_t extsize; 3613 memcpy(&extsize, mmap + src_offset + 4, 4); 3614 extsize = ntohl(extsize); 3615 3616 /* verify the extension size isn't so large it will wrap around */ 3617 if (src_offset + 8 + extsize < src_offset) 3618 return 0; 3619 3620 git_hash_update(&c, mmap + src_offset, 8); 3621 3622 src_offset += 8; 3623 src_offset += extsize; 3624 } 3625 git_hash_final(hash, &c); 3626 if (!hasheq(hash, (const unsigned char *)index, the_repository->hash_algo)) 3627 return 0; 3628 3629 /* Validate that the extension offsets returned us back to the eoie extension. */ 3630 if (src_offset != mmap_size - the_hash_algo->rawsz - EOIE_SIZE_WITH_HEADER) 3631 return 0; 3632 3633 return offset; 3634} 3635 3636static void write_eoie_extension(struct strbuf *sb, struct git_hash_ctx *eoie_context, size_t offset) 3637{ 3638 uint32_t buffer; 3639 unsigned char hash[GIT_MAX_RAWSZ]; 3640 3641 /* offset */ 3642 put_be32(&buffer, offset); 3643 strbuf_add(sb, &buffer, sizeof(uint32_t)); 3644 3645 /* hash */ 3646 git_hash_final(hash, eoie_context); 3647 strbuf_add(sb, hash, the_hash_algo->rawsz); 3648} 3649 3650#define IEOT_VERSION (1) 3651 3652static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset) 3653{ 3654 const char *index = NULL; 3655 uint32_t extsize, ext_version; 3656 struct index_entry_offset_table *ieot; 3657 int i, nr; 3658 3659 /* find the IEOT extension */ 3660 if (!offset) 3661 return NULL; 3662 while (offset <= mmap_size - the_hash_algo->rawsz - 8) { 3663 extsize = get_be32(mmap + offset + 4); 3664 if (CACHE_EXT((mmap + offset)) == CACHE_EXT_INDEXENTRYOFFSETTABLE) { 3665 index = mmap + offset + 4 + 4; 3666 break; 3667 } 3668 offset += 8; 3669 offset += extsize; 3670 } 3671 if (!index) 3672 return NULL; 3673 3674 /* validate the version is IEOT_VERSION */ 3675 ext_version = get_be32(index); 3676 if (ext_version != IEOT_VERSION) { 3677 error("invalid IEOT version %d", ext_version); 3678 return NULL; 3679 } 3680 index += sizeof(uint32_t); 3681 3682 /* extension size - version bytes / bytes per entry */ 3683 nr = (extsize - sizeof(uint32_t)) / (sizeof(uint32_t) + sizeof(uint32_t)); 3684 if (!nr) { 3685 error("invalid number of IEOT entries %d", nr); 3686 return NULL; 3687 } 3688 ieot = xmalloc(sizeof(struct index_entry_offset_table) 3689 + (nr * sizeof(struct index_entry_offset))); 3690 ieot->nr = nr; 3691 for (i = 0; i < nr; i++) { 3692 ieot->entries[i].offset = get_be32(index); 3693 index += sizeof(uint32_t); 3694 ieot->entries[i].nr = get_be32(index); 3695 index += sizeof(uint32_t); 3696 } 3697 3698 return ieot; 3699} 3700 3701static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot) 3702{ 3703 uint32_t buffer; 3704 int i; 3705 3706 /* version */ 3707 put_be32(&buffer, IEOT_VERSION); 3708 strbuf_add(sb, &buffer, sizeof(uint32_t)); 3709 3710 /* ieot */ 3711 for (i = 0; i < ieot->nr; i++) { 3712 3713 /* offset */ 3714 put_be32(&buffer, ieot->entries[i].offset); 3715 strbuf_add(sb, &buffer, sizeof(uint32_t)); 3716 3717 /* count */ 3718 put_be32(&buffer, ieot->entries[i].nr); 3719 strbuf_add(sb, &buffer, sizeof(uint32_t)); 3720 } 3721} 3722 3723void prefetch_cache_entries(const struct index_state *istate, 3724 must_prefetch_predicate must_prefetch) 3725{ 3726 int i; 3727 struct oid_array to_fetch = OID_ARRAY_INIT; 3728 3729 for (i = 0; i < istate->cache_nr; i++) { 3730 struct cache_entry *ce = istate->cache[i]; 3731 3732 if (S_ISGITLINK(ce->ce_mode) || !must_prefetch(ce)) 3733 continue; 3734 if (!odb_read_object_info_extended(the_repository->objects, 3735 &ce->oid, NULL, 3736 OBJECT_INFO_FOR_PREFETCH)) 3737 continue; 3738 oid_array_append(&to_fetch, &ce->oid); 3739 } 3740 promisor_remote_get_direct(the_repository, 3741 to_fetch.oid, to_fetch.nr); 3742 oid_array_clear(&to_fetch); 3743} 3744 3745static int read_one_entry_opt(struct index_state *istate, 3746 const struct object_id *oid, 3747 struct strbuf *base, 3748 const char *pathname, 3749 unsigned mode, int opt) 3750{ 3751 int len; 3752 struct cache_entry *ce; 3753 3754 if (S_ISDIR(mode)) 3755 return READ_TREE_RECURSIVE; 3756 3757 len = strlen(pathname); 3758 ce = make_empty_cache_entry(istate, base->len + len); 3759 3760 ce->ce_mode = create_ce_mode(mode); 3761 ce->ce_flags = create_ce_flags(1); 3762 ce->ce_namelen = base->len + len; 3763 memcpy(ce->name, base->buf, base->len); 3764 memcpy(ce->name + base->len, pathname, len+1); 3765 oidcpy(&ce->oid, oid); 3766 return add_index_entry(istate, ce, opt); 3767} 3768 3769static int read_one_entry(const struct object_id *oid, struct strbuf *base, 3770 const char *pathname, unsigned mode, 3771 void *context) 3772{ 3773 struct index_state *istate = context; 3774 return read_one_entry_opt(istate, oid, base, pathname, 3775 mode, 3776 ADD_CACHE_OK_TO_ADD|ADD_CACHE_SKIP_DFCHECK); 3777} 3778 3779/* 3780 * This is used when the caller knows there is no existing entries at 3781 * the stage that will conflict with the entry being added. 3782 */ 3783static int read_one_entry_quick(const struct object_id *oid, struct strbuf *base, 3784 const char *pathname, unsigned mode, 3785 void *context) 3786{ 3787 struct index_state *istate = context; 3788 return read_one_entry_opt(istate, oid, base, pathname, 3789 mode, ADD_CACHE_JUST_APPEND); 3790} 3791 3792/* 3793 * Read the tree specified with --with-tree option 3794 * (typically, HEAD) into stage #1 and then 3795 * squash them down to stage #0. This is used for 3796 * --error-unmatch to list and check the path patterns 3797 * that were given from the command line. We are not 3798 * going to write this index out. 3799 */ 3800void overlay_tree_on_index(struct index_state *istate, 3801 const char *tree_name, const char *prefix) 3802{ 3803 struct tree *tree; 3804 struct object_id oid; 3805 struct pathspec pathspec; 3806 struct cache_entry *last_stage0 = NULL; 3807 int i; 3808 read_tree_fn_t fn = NULL; 3809 int err; 3810 3811 if (repo_get_oid(the_repository, tree_name, &oid)) 3812 die("tree-ish %s not found.", tree_name); 3813 tree = parse_tree_indirect(&oid); 3814 if (!tree) 3815 die("bad tree-ish %s", tree_name); 3816 3817 /* Hoist the unmerged entries up to stage #3 to make room */ 3818 /* TODO: audit for interaction with sparse-index. */ 3819 ensure_full_index(istate); 3820 for (i = 0; i < istate->cache_nr; i++) { 3821 struct cache_entry *ce = istate->cache[i]; 3822 if (!ce_stage(ce)) 3823 continue; 3824 ce->ce_flags |= CE_STAGEMASK; 3825 } 3826 3827 if (prefix) { 3828 static const char *(matchbuf[1]); 3829 matchbuf[0] = NULL; 3830 parse_pathspec(&pathspec, PATHSPEC_ALL_MAGIC, 3831 PATHSPEC_PREFER_CWD, prefix, matchbuf); 3832 } else 3833 memset(&pathspec, 0, sizeof(pathspec)); 3834 3835 /* 3836 * See if we have cache entry at the stage. If so, 3837 * do it the original slow way, otherwise, append and then 3838 * sort at the end. 3839 */ 3840 for (i = 0; !fn && i < istate->cache_nr; i++) { 3841 const struct cache_entry *ce = istate->cache[i]; 3842 if (ce_stage(ce) == 1) 3843 fn = read_one_entry; 3844 } 3845 3846 if (!fn) 3847 fn = read_one_entry_quick; 3848 err = read_tree(the_repository, tree, &pathspec, fn, istate); 3849 clear_pathspec(&pathspec); 3850 if (err) 3851 die("unable to read tree entries %s", tree_name); 3852 3853 /* 3854 * Sort the cache entry -- we need to nuke the cache tree, though. 3855 */ 3856 if (fn == read_one_entry_quick) { 3857 cache_tree_free(&istate->cache_tree); 3858 QSORT(istate->cache, istate->cache_nr, cmp_cache_name_compare); 3859 } 3860 3861 for (i = 0; i < istate->cache_nr; i++) { 3862 struct cache_entry *ce = istate->cache[i]; 3863 switch (ce_stage(ce)) { 3864 case 0: 3865 last_stage0 = ce; 3866 /* fallthru */ 3867 default: 3868 continue; 3869 case 1: 3870 /* 3871 * If there is stage #0 entry for this, we do not 3872 * need to show it. We use CE_UPDATE bit to mark 3873 * such an entry. 3874 */ 3875 if (last_stage0 && 3876 !strcmp(last_stage0->name, ce->name)) 3877 ce->ce_flags |= CE_UPDATE; 3878 } 3879 } 3880} 3881 3882struct update_callback_data { 3883 struct index_state *index; 3884 int include_sparse; 3885 int flags; 3886 int add_errors; 3887}; 3888 3889static int fix_unmerged_status(struct diff_filepair *p, 3890 struct update_callback_data *data) 3891{ 3892 if (p->status != DIFF_STATUS_UNMERGED) 3893 return p->status; 3894 if (!(data->flags & ADD_CACHE_IGNORE_REMOVAL) && !p->two->mode) 3895 /* 3896 * This is not an explicit add request, and the 3897 * path is missing from the working tree (deleted) 3898 */ 3899 return DIFF_STATUS_DELETED; 3900 else 3901 /* 3902 * Either an explicit add request, or path exists 3903 * in the working tree. An attempt to explicitly 3904 * add a path that does not exist in the working tree 3905 * will be caught as an error by the caller immediately. 3906 */ 3907 return DIFF_STATUS_MODIFIED; 3908} 3909 3910static void update_callback(struct diff_queue_struct *q, 3911 struct diff_options *opt UNUSED, void *cbdata) 3912{ 3913 int i; 3914 struct update_callback_data *data = cbdata; 3915 3916 for (i = 0; i < q->nr; i++) { 3917 struct diff_filepair *p = q->queue[i]; 3918 const char *path = p->one->path; 3919 3920 if (!data->include_sparse && 3921 !path_in_sparse_checkout(path, data->index)) 3922 continue; 3923 3924 switch (fix_unmerged_status(p, data)) { 3925 default: 3926 die(_("unexpected diff status %c"), p->status); 3927 case DIFF_STATUS_MODIFIED: 3928 case DIFF_STATUS_TYPE_CHANGED: 3929 if (add_file_to_index(data->index, path, data->flags)) { 3930 if (!(data->flags & ADD_CACHE_IGNORE_ERRORS)) 3931 die(_("updating files failed")); 3932 data->add_errors++; 3933 } 3934 break; 3935 case DIFF_STATUS_DELETED: 3936 if (data->flags & ADD_CACHE_IGNORE_REMOVAL) 3937 break; 3938 if (!(data->flags & ADD_CACHE_PRETEND)) 3939 remove_file_from_index(data->index, path); 3940 if (data->flags & (ADD_CACHE_PRETEND|ADD_CACHE_VERBOSE)) 3941 printf(_("remove '%s'\n"), path); 3942 break; 3943 } 3944 } 3945} 3946 3947int add_files_to_cache(struct repository *repo, const char *prefix, 3948 const struct pathspec *pathspec, char *ps_matched, 3949 int include_sparse, int flags) 3950{ 3951 struct odb_transaction *transaction; 3952 struct update_callback_data data; 3953 struct rev_info rev; 3954 3955 memset(&data, 0, sizeof(data)); 3956 data.index = repo->index; 3957 data.include_sparse = include_sparse; 3958 data.flags = flags; 3959 3960 repo_init_revisions(repo, &rev, prefix); 3961 setup_revisions(0, NULL, &rev, NULL); 3962 if (pathspec) { 3963 copy_pathspec(&rev.prune_data, pathspec); 3964 rev.ps_matched = ps_matched; 3965 } 3966 rev.diffopt.output_format = DIFF_FORMAT_CALLBACK; 3967 rev.diffopt.format_callback = update_callback; 3968 rev.diffopt.format_callback_data = &data; 3969 rev.diffopt.flags.override_submodule_config = 1; 3970 rev.max_count = 0; /* do not compare unmerged paths with stage #2 */ 3971 3972 /* 3973 * Use an ODB transaction to optimize adding multiple objects. 3974 * This function is invoked from commands other than 'add', which 3975 * may not have their own transaction active. 3976 */ 3977 transaction = odb_transaction_begin(repo->objects); 3978 run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); 3979 odb_transaction_commit(transaction); 3980 3981 release_revisions(&rev); 3982 return !!data.add_errors; 3983}