Git fork

Merge branch 'rs/pop-recent-commit-with-prio-queue'

The pop_most_recent_commit() function can have quite expensive
worst case performance characteristics, which has been optimized by
using prio-queue data structure.

* rs/pop-recent-commit-with-prio-queue:
commit: use prio_queue_replace() in pop_most_recent_commit()
prio-queue: add prio_queue_replace()
commit: convert pop_most_recent_commit() to prio_queue

+170 -34
+11 -3
commit.c
··· 31 31 #include "parse.h" 32 32 #include "object-file.h" 33 33 #include "object-file-convert.h" 34 + #include "prio-queue.h" 34 35 35 36 static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **); 36 37 ··· 739 740 commit_list_sort(list, commit_list_compare_by_date); 740 741 } 741 742 742 - struct commit *pop_most_recent_commit(struct commit_list **list, 743 + struct commit *pop_most_recent_commit(struct prio_queue *queue, 743 744 unsigned int mark) 744 745 { 745 - struct commit *ret = pop_commit(list); 746 + struct commit *ret = prio_queue_peek(queue); 747 + int get_pending = 1; 746 748 struct commit_list *parents = ret->parents; 747 749 748 750 while (parents) { 749 751 struct commit *commit = parents->item; 750 752 if (!repo_parse_commit(the_repository, commit) && !(commit->object.flags & mark)) { 751 753 commit->object.flags |= mark; 752 - commit_list_insert_by_date(commit, list); 754 + if (get_pending) 755 + prio_queue_replace(queue, commit); 756 + else 757 + prio_queue_put(queue, commit); 758 + get_pending = 0; 753 759 } 754 760 parents = parents->next; 755 761 } 762 + if (get_pending) 763 + prio_queue_get(queue); 756 764 return ret; 757 765 } 758 766
+4 -4
commit.h
··· 201 201 202 202 const char *skip_blank_lines(const char *msg); 203 203 204 - /** Removes the first commit from a list sorted by date, and adds all 205 - * of its parents. 206 - **/ 207 - struct commit *pop_most_recent_commit(struct commit_list **list, 204 + struct prio_queue; 205 + 206 + /* Removes the first commit from a prio_queue and adds its parents. */ 207 + struct commit *pop_most_recent_commit(struct prio_queue *queue, 208 208 unsigned int mark); 209 209 210 210 struct commit *pop_commit(struct commit_list **stack);
+8 -5
fetch-pack.c
··· 34 34 #include "commit-graph.h" 35 35 #include "sigchain.h" 36 36 #include "mergesort.h" 37 + #include "prio-queue.h" 37 38 38 39 static int transfer_unpack_limit = -1; 39 40 static int fetch_unpack_limit = -1; ··· 601 602 return count ? retval : 0; 602 603 } 603 604 604 - static struct commit_list *complete; 605 + static struct prio_queue complete = { compare_commits_by_commit_date }; 605 606 606 607 static int mark_complete(const struct object_id *oid) 607 608 { ··· 609 610 610 611 if (commit && !(commit->object.flags & COMPLETE)) { 611 612 commit->object.flags |= COMPLETE; 612 - commit_list_insert(commit, &complete); 613 + prio_queue_put(&complete, commit); 613 614 } 614 615 return 0; 615 616 } ··· 626 627 static void mark_recent_complete_commits(struct fetch_pack_args *args, 627 628 timestamp_t cutoff) 628 629 { 629 - while (complete && cutoff <= complete->item->date) { 630 + while (complete.nr) { 631 + struct commit *item = prio_queue_peek(&complete); 632 + if (item->date < cutoff) 633 + break; 630 634 print_verbose(args, _("Marking %s as complete"), 631 - oid_to_hex(&complete->item->object.oid)); 635 + oid_to_hex(&item->object.oid)); 632 636 pop_most_recent_commit(&complete, COMPLETE); 633 637 } 634 638 } ··· 798 802 refs_for_each_rawref(get_main_ref_store(the_repository), 799 803 mark_complete_oid, NULL); 800 804 for_each_cached_alternate(NULL, mark_alternate_complete); 801 - commit_list_sort_by_date(&complete); 802 805 if (cutoff) 803 806 mark_recent_complete_commits(args, cutoff); 804 807 }
+5 -5
object-name.c
··· 28 28 #include "commit-reach.h" 29 29 #include "date.h" 30 30 #include "object-file-convert.h" 31 + #include "prio-queue.h" 31 32 32 33 static int get_oid_oneline(struct repository *r, const char *, struct object_id *, 33 34 const struct commit_list *); ··· 1461 1462 const char *prefix, struct object_id *oid, 1462 1463 const struct commit_list *list) 1463 1464 { 1464 - struct commit_list *copy = NULL, **copy_tail = &copy; 1465 + struct prio_queue copy = { compare_commits_by_commit_date }; 1465 1466 const struct commit_list *l; 1466 1467 int found = 0; 1467 1468 int negative = 0; ··· 1483 1484 1484 1485 for (l = list; l; l = l->next) { 1485 1486 l->item->object.flags |= ONELINE_SEEN; 1486 - copy_tail = &commit_list_insert(l->item, copy_tail)->next; 1487 + prio_queue_put(&copy, l->item); 1487 1488 } 1488 - while (copy) { 1489 + while (copy.nr) { 1489 1490 const char *p, *buf; 1490 1491 struct commit *commit; 1491 1492 int matches; ··· 1507 1508 regfree(&regex); 1508 1509 for (l = list; l; l = l->next) 1509 1510 clear_commit_marks(l->item, ONELINE_SEEN); 1510 - free_commit_list(copy); 1511 + clear_prio_queue(&copy); 1511 1512 return found ? 0 : -1; 1512 1513 } 1513 1514 ··· 2061 2062 cb.list = &list; 2062 2063 refs_for_each_ref(get_main_ref_store(repo), handle_one_ref, &cb); 2063 2064 refs_head_ref(get_main_ref_store(repo), handle_one_ref, &cb); 2064 - commit_list_sort_by_date(&list); 2065 2065 ret = get_oid_oneline(repo, name + 2, oid, list); 2066 2066 2067 2067 free_commit_list(list);
+32 -13
prio-queue.c
··· 58 58 } 59 59 } 60 60 61 - void *prio_queue_get(struct prio_queue *queue) 61 + static void sift_down_root(struct prio_queue *queue) 62 62 { 63 - void *result; 64 63 size_t ix, child; 65 64 66 - if (!queue->nr) 67 - return NULL; 68 - if (!queue->compare) 69 - return queue->array[--queue->nr].data; /* LIFO */ 70 - 71 - result = queue->array[0].data; 72 - if (!--queue->nr) 73 - return result; 74 - 75 - queue->array[0] = queue->array[queue->nr]; 76 - 77 65 /* Push down the one at the root */ 78 66 for (ix = 0; ix * 2 + 1 < queue->nr; ix = child) { 79 67 child = ix * 2 + 1; /* left */ ··· 86 74 87 75 swap(queue, child, ix); 88 76 } 77 + } 78 + 79 + void *prio_queue_get(struct prio_queue *queue) 80 + { 81 + void *result; 82 + 83 + if (!queue->nr) 84 + return NULL; 85 + if (!queue->compare) 86 + return queue->array[--queue->nr].data; /* LIFO */ 87 + 88 + result = queue->array[0].data; 89 + if (!--queue->nr) 90 + return result; 91 + 92 + queue->array[0] = queue->array[queue->nr]; 93 + sift_down_root(queue); 89 94 return result; 90 95 } 91 96 ··· 97 102 return queue->array[queue->nr - 1].data; 98 103 return queue->array[0].data; 99 104 } 105 + 106 + void prio_queue_replace(struct prio_queue *queue, void *thing) 107 + { 108 + if (!queue->nr) { 109 + prio_queue_put(queue, thing); 110 + } else if (!queue->compare) { 111 + queue->array[queue->nr - 1].ctr = queue->insertion_ctr++; 112 + queue->array[queue->nr - 1].data = thing; 113 + } else { 114 + queue->array[0].ctr = queue->insertion_ctr++; 115 + queue->array[0].data = thing; 116 + sift_down_root(queue); 117 + } 118 + }
+8
prio-queue.h
··· 52 52 */ 53 53 void *prio_queue_peek(struct prio_queue *); 54 54 55 + /* 56 + * Replace the "thing" that compares the smallest with a new "thing", 57 + * like prio_queue_get()+prio_queue_put() would do, but in a more 58 + * efficient way. Does the same as prio_queue_put() if the queue is 59 + * empty. 60 + */ 61 + void prio_queue_replace(struct prio_queue *queue, void *thing); 62 + 55 63 void clear_prio_queue(struct prio_queue *); 56 64 57 65 /* Reverse the LIFO elements */
+1
t/meson.build
··· 1116 1116 'perf/p1450-fsck.sh', 1117 1117 'perf/p1451-fsck-skip-list.sh', 1118 1118 'perf/p1500-graph-walks.sh', 1119 + 'perf/p1501-rev-parse-oneline.sh', 1119 1120 'perf/p2000-sparse-operations.sh', 1120 1121 'perf/p3400-rebase.sh', 1121 1122 'perf/p3404-rebase-interactive.sh',
+71
t/perf/p1501-rev-parse-oneline.sh
··· 1 + #!/bin/sh 2 + 3 + test_description='Test :/ object name notation' 4 + 5 + . ./perf-lib.sh 6 + 7 + test_perf_fresh_repo 8 + 9 + # 10 + # Creates lots of merges to make history traversal costly. In 11 + # particular it creates 2^($max_level-1)-1 2-way merges on top of 12 + # 2^($max_level-1) root commits. E.g., the commit history looks like 13 + # this for a $max_level of 3: 14 + # 15 + # _1_ 16 + # / \ 17 + # 2 3 18 + # / \ / \ 19 + # 4 5 6 7 20 + # 21 + # The numbers are the fast-import marks, which also are the commit 22 + # messages. 1 is the HEAD commit and a merge, 2 and 3 are also merges, 23 + # 4-7 are the root commits. 24 + # 25 + build_history () { 26 + local max_level="$1" && 27 + local level="${2:-1}" && 28 + local mark="${3:-1}" && 29 + if test $level -eq $max_level 30 + then 31 + echo "reset refs/heads/master" && 32 + echo "from $ZERO_OID" && 33 + echo "commit refs/heads/master" && 34 + echo "mark :$mark" && 35 + echo "committer C <c@example.com> 1234567890 +0000" && 36 + echo "data <<EOF" && 37 + echo "$mark" && 38 + echo "EOF" 39 + else 40 + local level1=$((level+1)) && 41 + local mark1=$((2*mark)) && 42 + local mark2=$((2*mark+1)) && 43 + build_history $max_level $level1 $mark1 && 44 + build_history $max_level $level1 $mark2 && 45 + echo "commit refs/heads/master" && 46 + echo "mark :$mark" && 47 + echo "committer C <c@example.com> 1234567890 +0000" && 48 + echo "data <<EOF" && 49 + echo "$mark" && 50 + echo "EOF" && 51 + echo "from :$mark1" && 52 + echo "merge :$mark2" 53 + fi 54 + } 55 + 56 + test_expect_success 'setup' ' 57 + build_history 16 | git fast-import && 58 + git log --format="%H %s" --reverse >commits && 59 + sed -n -e "s/ .*$//p" -e "q" <commits >expect && 60 + sed -n -e "s/^.* //p" -e "q" <commits >needle 61 + ' 62 + 63 + test_perf "rev-parse :/$(cat needle)" ' 64 + git rev-parse :/$(cat needle) >actual 65 + ' 66 + 67 + test_expect_success 'verify result' ' 68 + test_cmp expect actual 69 + ' 70 + 71 + test_done
+23
t/unit-tests/u-prio-queue.c
··· 13 13 #define STACK -3 14 14 #define GET -4 15 15 #define REVERSE -5 16 + #define REPLACE -6 16 17 17 18 static int show(int *v) 18 19 { ··· 51 52 case REVERSE: 52 53 prio_queue_reverse(&pq); 53 54 break; 55 + case REPLACE: 56 + peek = prio_queue_peek(&pq); 57 + cl_assert(i + 1 < input_size); 58 + cl_assert(input[i + 1] >= 0); 59 + cl_assert(j < result_size); 60 + cl_assert_equal_i(result[j], show(peek)); 61 + j++; 62 + prio_queue_replace(&pq, &input[++i]); 63 + break; 54 64 default: 55 65 prio_queue_put(&pq, &input[i]); 56 66 break; ··· 81 91 ((int []){ 1, 2, MISSING, 1, 2, MISSING })); 82 92 } 83 93 94 + void test_prio_queue__replace(void) 95 + { 96 + TEST_INPUT(((int []){ REPLACE, 6, 2, 4, REPLACE, 5, 7, GET, 97 + REPLACE, 1, DUMP }), 98 + ((int []){ MISSING, 2, 4, 5, 1, 6, 7 })); 99 + } 100 + 84 101 void test_prio_queue__stack(void) 85 102 { 86 103 TEST_INPUT(((int []){ STACK, 8, 1, 5, 4, 6, 2, 3, DUMP }), ··· 92 109 TEST_INPUT(((int []){ STACK, 1, 2, 3, 4, 5, 6, REVERSE, DUMP }), 93 110 ((int []){ 1, 2, 3, 4, 5, 6 })); 94 111 } 112 + 113 + void test_prio_queue__replace_stack(void) 114 + { 115 + TEST_INPUT(((int []){ STACK, 8, 1, 5, REPLACE, 4, 6, 2, 3, DUMP }), 116 + ((int []){ 5, 3, 2, 6, 4, 1, 8 })); 117 + }
+7 -4
walker.c
··· 14 14 #include "blob.h" 15 15 #include "refs.h" 16 16 #include "progress.h" 17 + #include "prio-queue.h" 17 18 18 19 static struct object_id current_commit_oid; 19 20 ··· 78 79 #define SEEN (1U << 1) 79 80 #define TO_SCAN (1U << 2) 80 81 81 - static struct commit_list *complete = NULL; 82 + static struct prio_queue complete = { compare_commits_by_commit_date }; 82 83 83 84 static int process_commit(struct walker *walker, struct commit *commit) 84 85 { ··· 87 88 if (repo_parse_commit(the_repository, commit)) 88 89 return -1; 89 90 90 - while (complete && complete->item->date >= commit->date) { 91 + while (complete.nr) { 92 + struct commit *item = prio_queue_peek(&complete); 93 + if (item->date < commit->date) 94 + break; 91 95 pop_most_recent_commit(&complete, COMPLETE); 92 96 } 93 97 ··· 233 237 234 238 if (commit) { 235 239 commit->object.flags |= COMPLETE; 236 - commit_list_insert(commit, &complete); 240 + prio_queue_put(&complete, commit); 237 241 } 238 242 return 0; 239 243 } ··· 302 306 if (!walker->get_recover) { 303 307 refs_for_each_ref(get_main_ref_store(the_repository), 304 308 mark_complete, NULL); 305 - commit_list_sort_by_date(&complete); 306 309 } 307 310 308 311 for (i = 0; i < targets; i++) {