Git fork
at reftables-rust 170 lines 5.0 kB view raw
1#ifndef BLOOM_H 2#define BLOOM_H 3 4struct commit; 5struct repository; 6struct commit_graph; 7 8struct bloom_filter_settings { 9 /* 10 * The version of the hashing technique being used. 11 * The newest version is 2, which is 12 * the seeded murmur3 hashing technique implemented 13 * in bloom.c. Bloom filters of version 1 were created 14 * with prior versions of Git, which had a bug in the 15 * implementation of the hash function. 16 */ 17 uint32_t hash_version; 18 19 /* 20 * The number of times a path is hashed, i.e. the 21 * number of bit positions that cumulatively 22 * determine whether a path is present in the 23 * Bloom filter. 24 */ 25 uint32_t num_hashes; 26 27 /* 28 * The minimum number of bits per entry in the Bloom 29 * filter. If the filter contains 'n' entries, then 30 * filter size is the minimum number of 8-bit words 31 * that contain n*b bits. 32 */ 33 uint32_t bits_per_entry; 34 35 /* 36 * The maximum number of changed paths per commit 37 * before declaring a Bloom filter to be too-large. 38 * 39 * Not written to the commit-graph file. 40 */ 41 uint32_t max_changed_paths; 42}; 43 44#define DEFAULT_BLOOM_MAX_CHANGES 512 45#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES } 46#define BITS_PER_WORD 8 47#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t) 48 49/* 50 * A bloom_filter struct represents a data segment to 51 * use when testing hash values. The 'len' member 52 * dictates how many entries are stored in 53 * 'data'. 54 */ 55struct bloom_filter { 56 unsigned char *data; 57 size_t len; 58 int version; 59 60 void *to_free; 61}; 62 63/* 64 * A bloom_key represents the k hash values for a 65 * given string. These can be precomputed and 66 * stored in a bloom_key for re-use when testing 67 * against a bloom_filter. The number of hashes is 68 * given by the Bloom filter settings and is the same 69 * for all Bloom filters and keys interacting with 70 * the loaded version of the commit graph file and 71 * the Bloom data chunks. 72 */ 73struct bloom_key { 74 uint32_t *hashes; 75}; 76 77/* 78 * A bloom_keyvec is a vector of bloom_keys, which 79 * can be used to store multiple keys for a single 80 * pathspec item. 81 */ 82struct bloom_keyvec { 83 size_t count; 84 struct bloom_key key[FLEX_ARRAY]; 85}; 86 87int load_bloom_filter_from_graph(struct commit_graph *g, 88 struct bloom_filter *filter, 89 uint32_t graph_pos); 90 91void bloom_key_fill(struct bloom_key *key, const char *data, size_t len, 92 const struct bloom_filter_settings *settings); 93void bloom_key_clear(struct bloom_key *key); 94 95/* 96 * bloom_keyvec_new - Allocate and populate a bloom_keyvec with keys for the 97 * given path. 98 * 99 * This function splits the input path by '/' and generates a bloom key for each 100 * prefix, in reverse order of specificity. For example, given the input 101 * "a/b/c", it will generate bloom keys for: 102 * - "a/b/c" 103 * - "a/b" 104 * - "a" 105 * 106 * The resulting keys are stored in a newly allocated bloom_keyvec. 107 */ 108struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len, 109 const struct bloom_filter_settings *settings); 110void bloom_keyvec_free(struct bloom_keyvec *vec); 111 112void add_key_to_filter(const struct bloom_key *key, 113 struct bloom_filter *filter, 114 const struct bloom_filter_settings *settings); 115 116void init_bloom_filters(void); 117void deinit_bloom_filters(void); 118 119enum bloom_filter_computed { 120 BLOOM_NOT_COMPUTED = (1 << 0), 121 BLOOM_COMPUTED = (1 << 1), 122 BLOOM_TRUNC_LARGE = (1 << 2), 123 BLOOM_TRUNC_EMPTY = (1 << 3), 124 BLOOM_UPGRADED = (1 << 4), 125}; 126 127struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, 128 struct commit *c, 129 int compute_if_not_present, 130 const struct bloom_filter_settings *settings, 131 enum bloom_filter_computed *computed); 132 133/* 134 * Find the Bloom filter associated with the given commit "c". 135 * 136 * If any of the following are true 137 * 138 * - the repository does not have a commit-graph, or 139 * - the repository disables reading from the commit-graph, or 140 * - the given commit does not have a Bloom filter computed, or 141 * - there is a Bloom filter for commit "c", but it cannot be read 142 * because the filter uses an incompatible version of murmur3 143 * 144 * , then `get_bloom_filter()` will return NULL. Otherwise, the corresponding 145 * Bloom filter will be returned. 146 * 147 * For callers who wish to inspect Bloom filters with incompatible hash 148 * versions, use get_or_compute_bloom_filter(). 149 */ 150struct bloom_filter *get_bloom_filter(struct repository *r, struct commit *c); 151 152int bloom_filter_contains(const struct bloom_filter *filter, 153 const struct bloom_key *key, 154 const struct bloom_filter_settings *settings); 155 156/* 157 * bloom_filter_contains_vec - Check if all keys in a key vector are in the 158 * Bloom filter. 159 * 160 * Returns 1 if **all** keys in the vector are present in the filter, 161 * 0 if **any** key is not present. 162 */ 163int bloom_filter_contains_vec(const struct bloom_filter *filter, 164 const struct bloom_keyvec *v, 165 const struct bloom_filter_settings *settings); 166 167uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len, 168 int version); 169 170#endif