Git fork

commit: convert pop_most_recent_commit() to prio_queue

pop_most_recent_commit() calls commit_list_insert_by_date() for parent
commits, which is itself called in a loop. This can lead to quadratic
complexity if there are many merges. Replace the commit_list with a
prio_queue to ensure logarithmic worst case complexity and convert all
three users.

Add a performance test that exercises one of them using a pathological
history that consists of 50% merges and 50% root commits to demonstrate
the speedup:

Test v2.50.1 HEAD
----------------------------------------------------------------------
1501.2: rev-parse ':/65535' 2.48(2.47+0.00) 0.20(0.19+0.00) -91.9%

Alas, sane histories don't benefit from the conversion much, and
traversing Git's own history takes a 1% performance hit on my machine:

$ hyperfine -w3 -L git ./git_2.50.1,./git '{git} rev-parse :/^Initial.revision'
Benchmark 1: ./git_2.50.1 rev-parse :/^Initial.revision
Time (mean ± σ): 1.071 s ± 0.004 s [User: 1.052 s, System: 0.017 s]
Range (min … max): 1.067 s … 1.078 s 10 runs

Benchmark 2: ./git rev-parse :/^Initial.revision
Time (mean ± σ): 1.079 s ± 0.003 s [User: 1.060 s, System: 0.017 s]
Range (min … max): 1.074 s … 1.083 s 10 runs

Summary
./git_2.50.1 rev-parse :/^Initial.revision ran
1.01 ± 0.00 times faster than ./git rev-parse :/^Initial.revision

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

authored by

René Scharfe and committed by
Junio C Hamano
d6ec0878 16bd9f20

+100 -21
+4 -3
commit.c
··· 31 31 #include "parse.h" 32 32 #include "object-file.h" 33 33 #include "object-file-convert.h" 34 + #include "prio-queue.h" 34 35 35 36 static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **); 36 37 ··· 738 739 commit_list_sort(list, commit_list_compare_by_date); 739 740 } 740 741 741 - struct commit *pop_most_recent_commit(struct commit_list **list, 742 + struct commit *pop_most_recent_commit(struct prio_queue *queue, 742 743 unsigned int mark) 743 744 { 744 - struct commit *ret = pop_commit(list); 745 + struct commit *ret = prio_queue_get(queue); 745 746 struct commit_list *parents = ret->parents; 746 747 747 748 while (parents) { 748 749 struct commit *commit = parents->item; 749 750 if (!repo_parse_commit(the_repository, commit) && !(commit->object.flags & mark)) { 750 751 commit->object.flags |= mark; 751 - commit_list_insert_by_date(commit, list); 752 + prio_queue_put(queue, commit); 752 753 } 753 754 parents = parents->next; 754 755 }
+4 -4
commit.h
··· 201 201 202 202 const char *skip_blank_lines(const char *msg); 203 203 204 - /** Removes the first commit from a list sorted by date, and adds all 205 - * of its parents. 206 - **/ 207 - struct commit *pop_most_recent_commit(struct commit_list **list, 204 + struct prio_queue; 205 + 206 + /* Removes the first commit from a prio_queue and adds its parents. */ 207 + struct commit *pop_most_recent_commit(struct prio_queue *queue, 208 208 unsigned int mark); 209 209 210 210 struct commit *pop_commit(struct commit_list **stack);
+8 -5
fetch-pack.c
··· 34 34 #include "commit-graph.h" 35 35 #include "sigchain.h" 36 36 #include "mergesort.h" 37 + #include "prio-queue.h" 37 38 38 39 static int transfer_unpack_limit = -1; 39 40 static int fetch_unpack_limit = -1; ··· 600 601 return count ? retval : 0; 601 602 } 602 603 603 - static struct commit_list *complete; 604 + static struct prio_queue complete = { compare_commits_by_commit_date }; 604 605 605 606 static int mark_complete(const struct object_id *oid) 606 607 { ··· 608 609 609 610 if (commit && !(commit->object.flags & COMPLETE)) { 610 611 commit->object.flags |= COMPLETE; 611 - commit_list_insert(commit, &complete); 612 + prio_queue_put(&complete, commit); 612 613 } 613 614 return 0; 614 615 } ··· 625 626 static void mark_recent_complete_commits(struct fetch_pack_args *args, 626 627 timestamp_t cutoff) 627 628 { 628 - while (complete && cutoff <= complete->item->date) { 629 + while (complete.nr) { 630 + struct commit *item = prio_queue_peek(&complete); 631 + if (item->date < cutoff) 632 + break; 629 633 print_verbose(args, _("Marking %s as complete"), 630 - oid_to_hex(&complete->item->object.oid)); 634 + oid_to_hex(&item->object.oid)); 631 635 pop_most_recent_commit(&complete, COMPLETE); 632 636 } 633 637 } ··· 797 801 refs_for_each_rawref(get_main_ref_store(the_repository), 798 802 mark_complete_oid, NULL); 799 803 for_each_cached_alternate(NULL, mark_alternate_complete); 800 - commit_list_sort_by_date(&complete); 801 804 if (cutoff) 802 805 mark_recent_complete_commits(args, cutoff); 803 806 }
+5 -5
object-name.c
··· 28 28 #include "commit-reach.h" 29 29 #include "date.h" 30 30 #include "object-file-convert.h" 31 + #include "prio-queue.h" 31 32 32 33 static int get_oid_oneline(struct repository *r, const char *, struct object_id *, 33 34 const struct commit_list *); ··· 1457 1458 const char *prefix, struct object_id *oid, 1458 1459 const struct commit_list *list) 1459 1460 { 1460 - struct commit_list *copy = NULL, **copy_tail = &copy; 1461 + struct prio_queue copy = { compare_commits_by_commit_date }; 1461 1462 const struct commit_list *l; 1462 1463 int found = 0; 1463 1464 int negative = 0; ··· 1479 1480 1480 1481 for (l = list; l; l = l->next) { 1481 1482 l->item->object.flags |= ONELINE_SEEN; 1482 - copy_tail = &commit_list_insert(l->item, copy_tail)->next; 1483 + prio_queue_put(&copy, l->item); 1483 1484 } 1484 - while (copy) { 1485 + while (copy.nr) { 1485 1486 const char *p, *buf; 1486 1487 struct commit *commit; 1487 1488 int matches; ··· 1503 1504 regfree(&regex); 1504 1505 for (l = list; l; l = l->next) 1505 1506 clear_commit_marks(l->item, ONELINE_SEEN); 1506 - free_commit_list(copy); 1507 + clear_prio_queue(&copy); 1507 1508 return found ? 0 : -1; 1508 1509 } 1509 1510 ··· 2057 2058 cb.list = &list; 2058 2059 refs_for_each_ref(get_main_ref_store(repo), handle_one_ref, &cb); 2059 2060 refs_head_ref(get_main_ref_store(repo), handle_one_ref, &cb); 2060 - commit_list_sort_by_date(&list); 2061 2061 ret = get_oid_oneline(repo, name + 2, oid, list); 2062 2062 2063 2063 free_commit_list(list);
+1
t/meson.build
··· 1117 1117 'perf/p1450-fsck.sh', 1118 1118 'perf/p1451-fsck-skip-list.sh', 1119 1119 'perf/p1500-graph-walks.sh', 1120 + 'perf/p1501-rev-parse-oneline.sh', 1120 1121 'perf/p2000-sparse-operations.sh', 1121 1122 'perf/p3400-rebase.sh', 1122 1123 'perf/p3404-rebase-interactive.sh',
+71
t/perf/p1501-rev-parse-oneline.sh
··· 1 + #!/bin/sh 2 + 3 + test_description='Test :/ object name notation' 4 + 5 + . ./perf-lib.sh 6 + 7 + test_perf_fresh_repo 8 + 9 + # 10 + # Creates lots of merges to make history traversal costly. In 11 + # particular it creates 2^($max_level-1)-1 2-way merges on top of 12 + # 2^($max_level-1) root commits. E.g., the commit history looks like 13 + # this for a $max_level of 3: 14 + # 15 + # _1_ 16 + # / \ 17 + # 2 3 18 + # / \ / \ 19 + # 4 5 6 7 20 + # 21 + # The numbers are the fast-import marks, which also are the commit 22 + # messages. 1 is the HEAD commit and a merge, 2 and 3 are also merges, 23 + # 4-7 are the root commits. 24 + # 25 + build_history () { 26 + local max_level="$1" && 27 + local level="${2:-1}" && 28 + local mark="${3:-1}" && 29 + if test $level -eq $max_level 30 + then 31 + echo "reset refs/heads/master" && 32 + echo "from $ZERO_OID" && 33 + echo "commit refs/heads/master" && 34 + echo "mark :$mark" && 35 + echo "committer C <c@example.com> 1234567890 +0000" && 36 + echo "data <<EOF" && 37 + echo "$mark" && 38 + echo "EOF" 39 + else 40 + local level1=$((level+1)) && 41 + local mark1=$((2*mark)) && 42 + local mark2=$((2*mark+1)) && 43 + build_history $max_level $level1 $mark1 && 44 + build_history $max_level $level1 $mark2 && 45 + echo "commit refs/heads/master" && 46 + echo "mark :$mark" && 47 + echo "committer C <c@example.com> 1234567890 +0000" && 48 + echo "data <<EOF" && 49 + echo "$mark" && 50 + echo "EOF" && 51 + echo "from :$mark1" && 52 + echo "merge :$mark2" 53 + fi 54 + } 55 + 56 + test_expect_success 'setup' ' 57 + build_history 16 | git fast-import && 58 + git log --format="%H %s" --reverse >commits && 59 + sed -n -e "s/ .*$//p" -e "q" <commits >expect && 60 + sed -n -e "s/^.* //p" -e "q" <commits >needle 61 + ' 62 + 63 + test_perf "rev-parse :/$(cat needle)" ' 64 + git rev-parse :/$(cat needle) >actual 65 + ' 66 + 67 + test_expect_success 'verify result' ' 68 + test_cmp expect actual 69 + ' 70 + 71 + test_done
+7 -4
walker.c
··· 14 14 #include "blob.h" 15 15 #include "refs.h" 16 16 #include "progress.h" 17 + #include "prio-queue.h" 17 18 18 19 static struct object_id current_commit_oid; 19 20 ··· 78 79 #define SEEN (1U << 1) 79 80 #define TO_SCAN (1U << 2) 80 81 81 - static struct commit_list *complete = NULL; 82 + static struct prio_queue complete = { compare_commits_by_commit_date }; 82 83 83 84 static int process_commit(struct walker *walker, struct commit *commit) 84 85 { ··· 87 88 if (repo_parse_commit(the_repository, commit)) 88 89 return -1; 89 90 90 - while (complete && complete->item->date >= commit->date) { 91 + while (complete.nr) { 92 + struct commit *item = prio_queue_peek(&complete); 93 + if (item->date < commit->date) 94 + break; 91 95 pop_most_recent_commit(&complete, COMPLETE); 92 96 } 93 97 ··· 233 237 234 238 if (commit) { 235 239 commit->object.flags |= COMPLETE; 236 - commit_list_insert(commit, &complete); 240 + prio_queue_put(&complete, commit); 237 241 } 238 242 return 0; 239 243 } ··· 302 306 if (!walker->get_recover) { 303 307 refs_for_each_ref(get_main_ref_store(the_repository), 304 308 mark_complete, NULL); 305 - commit_list_sort_by_date(&complete); 306 309 } 307 310 308 311 for (i = 0; i < targets; i++) {