6
6
#include "commit-graph.h"
7
7
#include "commit.h"
8
8
#include "commit-slab.h"
9
+ #include "tree.h"
10
+ #include "tree-walk.h"
11
+ #include "config.h"
9
12
#include "repository.h"
10
13
11
14
define_commit_slab (bloom_filter_slab , struct bloom_filter );
@@ -49,9 +52,9 @@ static int check_bloom_offset(struct commit_graph *g, uint32_t pos,
49
52
return -1 ;
50
53
}
51
54
52
- static int load_bloom_filter_from_graph (struct commit_graph * g ,
53
- struct bloom_filter * filter ,
54
- uint32_t graph_pos )
55
+ int load_bloom_filter_from_graph (struct commit_graph * g ,
56
+ struct bloom_filter * filter ,
57
+ uint32_t graph_pos )
55
58
{
56
59
uint32_t lex_pos , start_index , end_index ;
57
60
@@ -89,6 +92,8 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
89
92
filter -> data = (unsigned char * )(g -> chunk_bloom_data +
90
93
sizeof (unsigned char ) * start_index +
91
94
BLOOMDATA_CHUNK_HEADER_SIZE );
95
+ filter -> version = g -> bloom_filter_settings -> hash_version ;
96
+ filter -> to_free = NULL ;
92
97
93
98
return 1 ;
94
99
}
@@ -100,7 +105,64 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
100
105
* Not considered to be cryptographically secure.
101
106
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
102
107
*/
103
- uint32_t murmur3_seeded (uint32_t seed , const char * data , size_t len )
108
+ uint32_t murmur3_seeded_v2 (uint32_t seed , const char * data , size_t len )
109
+ {
110
+ const uint32_t c1 = 0xcc9e2d51 ;
111
+ const uint32_t c2 = 0x1b873593 ;
112
+ const uint32_t r1 = 15 ;
113
+ const uint32_t r2 = 13 ;
114
+ const uint32_t m = 5 ;
115
+ const uint32_t n = 0xe6546b64 ;
116
+ int i ;
117
+ uint32_t k1 = 0 ;
118
+ const char * tail ;
119
+
120
+ int len4 = len / sizeof (uint32_t );
121
+
122
+ uint32_t k ;
123
+ for (i = 0 ; i < len4 ; i ++ ) {
124
+ uint32_t byte1 = (uint32_t )(unsigned char )data [4 * i ];
125
+ uint32_t byte2 = ((uint32_t )(unsigned char )data [4 * i + 1 ]) << 8 ;
126
+ uint32_t byte3 = ((uint32_t )(unsigned char )data [4 * i + 2 ]) << 16 ;
127
+ uint32_t byte4 = ((uint32_t )(unsigned char )data [4 * i + 3 ]) << 24 ;
128
+ k = byte1 | byte2 | byte3 | byte4 ;
129
+ k *= c1 ;
130
+ k = rotate_left (k , r1 );
131
+ k *= c2 ;
132
+
133
+ seed ^= k ;
134
+ seed = rotate_left (seed , r2 ) * m + n ;
135
+ }
136
+
137
+ tail = (data + len4 * sizeof (uint32_t ));
138
+
139
+ switch (len & (sizeof (uint32_t ) - 1 )) {
140
+ case 3 :
141
+ k1 ^= ((uint32_t )(unsigned char )tail [2 ]) << 16 ;
142
+ /*-fallthrough*/
143
+ case 2 :
144
+ k1 ^= ((uint32_t )(unsigned char )tail [1 ]) << 8 ;
145
+ /*-fallthrough*/
146
+ case 1 :
147
+ k1 ^= ((uint32_t )(unsigned char )tail [0 ]) << 0 ;
148
+ k1 *= c1 ;
149
+ k1 = rotate_left (k1 , r1 );
150
+ k1 *= c2 ;
151
+ seed ^= k1 ;
152
+ break ;
153
+ }
154
+
155
+ seed ^= (uint32_t )len ;
156
+ seed ^= (seed >> 16 );
157
+ seed *= 0x85ebca6b ;
158
+ seed ^= (seed >> 13 );
159
+ seed *= 0xc2b2ae35 ;
160
+ seed ^= (seed >> 16 );
161
+
162
+ return seed ;
163
+ }
164
+
165
+ static uint32_t murmur3_seeded_v1 (uint32_t seed , const char * data , size_t len )
104
166
{
105
167
const uint32_t c1 = 0xcc9e2d51 ;
106
168
const uint32_t c2 = 0x1b873593 ;
@@ -165,8 +227,14 @@ void fill_bloom_key(const char *data,
165
227
int i ;
166
228
const uint32_t seed0 = 0x293ae76f ;
167
229
const uint32_t seed1 = 0x7e646e2c ;
168
- const uint32_t hash0 = murmur3_seeded (seed0 , data , len );
169
- const uint32_t hash1 = murmur3_seeded (seed1 , data , len );
230
+ uint32_t hash0 , hash1 ;
231
+ if (settings -> hash_version == 2 ) {
232
+ hash0 = murmur3_seeded_v2 (seed0 , data , len );
233
+ hash1 = murmur3_seeded_v2 (seed1 , data , len );
234
+ } else {
235
+ hash0 = murmur3_seeded_v1 (seed0 , data , len );
236
+ hash1 = murmur3_seeded_v1 (seed1 , data , len );
237
+ }
170
238
171
239
key -> hashes = (uint32_t * )xcalloc (settings -> num_hashes , sizeof (uint32_t ));
172
240
for (i = 0 ; i < settings -> num_hashes ; i ++ )
@@ -198,6 +266,18 @@ void init_bloom_filters(void)
198
266
init_bloom_filter_slab (& bloom_filters );
199
267
}
200
268
269
+ static void free_one_bloom_filter (struct bloom_filter * filter )
270
+ {
271
+ if (!filter )
272
+ return ;
273
+ free (filter -> to_free );
274
+ }
275
+
276
+ void deinit_bloom_filters (void )
277
+ {
278
+ deep_clear_bloom_filter_slab (& bloom_filters , free_one_bloom_filter );
279
+ }
280
+
201
281
static int pathmap_cmp (const void * hashmap_cmp_fn_data UNUSED ,
202
282
const struct hashmap_entry * eptr ,
203
283
const struct hashmap_entry * entry_or_key ,
@@ -211,11 +291,97 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
211
291
return strcmp (e1 -> path , e2 -> path );
212
292
}
213
293
214
- static void init_truncated_large_filter (struct bloom_filter * filter )
294
+ static void init_truncated_large_filter (struct bloom_filter * filter ,
295
+ int version )
215
296
{
216
- filter -> data = xmalloc (1 );
297
+ filter -> data = filter -> to_free = xmalloc (1 );
217
298
filter -> data [0 ] = 0xFF ;
218
299
filter -> len = 1 ;
300
+ filter -> version = version ;
301
+ }
302
+
303
+ #define VISITED (1u<<21)
304
+ #define HIGH_BITS (1u<<22)
305
+
306
+ static int has_entries_with_high_bit (struct repository * r , struct tree * t )
307
+ {
308
+ if (parse_tree (t ))
309
+ return 1 ;
310
+
311
+ if (!(t -> object .flags & VISITED )) {
312
+ struct tree_desc desc ;
313
+ struct name_entry entry ;
314
+
315
+ init_tree_desc (& desc , & t -> object .oid , t -> buffer , t -> size );
316
+ while (tree_entry (& desc , & entry )) {
317
+ size_t i ;
318
+ for (i = 0 ; i < entry .pathlen ; i ++ ) {
319
+ if (entry .path [i ] & 0x80 ) {
320
+ t -> object .flags |= HIGH_BITS ;
321
+ goto done ;
322
+ }
323
+ }
324
+
325
+ if (S_ISDIR (entry .mode )) {
326
+ struct tree * sub = lookup_tree (r , & entry .oid );
327
+ if (sub && has_entries_with_high_bit (r , sub )) {
328
+ t -> object .flags |= HIGH_BITS ;
329
+ goto done ;
330
+ }
331
+ }
332
+
333
+ }
334
+
335
+ done :
336
+ t -> object .flags |= VISITED ;
337
+ }
338
+
339
+ return !!(t -> object .flags & HIGH_BITS );
340
+ }
341
+
342
+ static int commit_tree_has_high_bit_paths (struct repository * r ,
343
+ struct commit * c )
344
+ {
345
+ struct tree * t ;
346
+ if (repo_parse_commit (r , c ))
347
+ return 1 ;
348
+ t = repo_get_commit_tree (r , c );
349
+ if (!t )
350
+ return 1 ;
351
+ return has_entries_with_high_bit (r , t );
352
+ }
353
+
354
+ static struct bloom_filter * upgrade_filter (struct repository * r , struct commit * c ,
355
+ struct bloom_filter * filter ,
356
+ int hash_version )
357
+ {
358
+ struct commit_list * p = c -> parents ;
359
+ if (commit_tree_has_high_bit_paths (r , c ))
360
+ return NULL ;
361
+
362
+ if (p && commit_tree_has_high_bit_paths (r , p -> item ))
363
+ return NULL ;
364
+
365
+ filter -> version = hash_version ;
366
+
367
+ return filter ;
368
+ }
369
+
370
+ struct bloom_filter * get_bloom_filter (struct repository * r , struct commit * c )
371
+ {
372
+ struct bloom_filter * filter ;
373
+ int hash_version ;
374
+
375
+ filter = get_or_compute_bloom_filter (r , c , 0 , NULL , NULL );
376
+ if (!filter )
377
+ return NULL ;
378
+
379
+ prepare_repo_settings (r );
380
+ hash_version = r -> settings .commit_graph_changed_paths_version ;
381
+
382
+ if (!(hash_version == -1 || hash_version == filter -> version ))
383
+ return NULL ; /* unusable filter */
384
+ return filter ;
219
385
}
220
386
221
387
struct bloom_filter * get_or_compute_bloom_filter (struct repository * r ,
@@ -243,8 +409,23 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
243
409
filter , graph_pos );
244
410
}
245
411
246
- if (filter -> data && filter -> len )
247
- return filter ;
412
+ if (filter -> data && filter -> len ) {
413
+ struct bloom_filter * upgrade ;
414
+ if (!settings || settings -> hash_version == filter -> version )
415
+ return filter ;
416
+
417
+ /* version mismatch, see if we can upgrade */
418
+ if (compute_if_not_present &&
419
+ git_env_bool ("GIT_TEST_UPGRADE_BLOOM_FILTERS" , 1 )) {
420
+ upgrade = upgrade_filter (r , c , filter ,
421
+ settings -> hash_version );
422
+ if (upgrade ) {
423
+ if (computed )
424
+ * computed |= BLOOM_UPGRADED ;
425
+ return upgrade ;
426
+ }
427
+ }
428
+ }
248
429
if (!compute_if_not_present )
249
430
return NULL ;
250
431
@@ -300,19 +481,22 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
300
481
}
301
482
302
483
if (hashmap_get_size (& pathmap ) > settings -> max_changed_paths ) {
303
- init_truncated_large_filter (filter );
484
+ init_truncated_large_filter (filter ,
485
+ settings -> hash_version );
304
486
if (computed )
305
487
* computed |= BLOOM_TRUNC_LARGE ;
306
488
goto cleanup ;
307
489
}
308
490
309
491
filter -> len = (hashmap_get_size (& pathmap ) * settings -> bits_per_entry + BITS_PER_WORD - 1 ) / BITS_PER_WORD ;
492
+ filter -> version = settings -> hash_version ;
310
493
if (!filter -> len ) {
311
494
if (computed )
312
495
* computed |= BLOOM_TRUNC_EMPTY ;
313
496
filter -> len = 1 ;
314
497
}
315
498
CALLOC_ARRAY (filter -> data , filter -> len );
499
+ filter -> to_free = filter -> data ;
316
500
317
501
hashmap_for_each_entry (& pathmap , & iter , e , entry ) {
318
502
struct bloom_key key ;
@@ -326,7 +510,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
326
510
} else {
327
511
for (i = 0 ; i < diff_queued_diff .nr ; i ++ )
328
512
diff_free_filepair (diff_queued_diff .queue [i ]);
329
- init_truncated_large_filter (filter );
513
+ init_truncated_large_filter (filter , settings -> hash_version );
330
514
331
515
if (computed )
332
516
* computed |= BLOOM_TRUNC_LARGE ;
0 commit comments