|
7 | 7 | #include <stdlib.h>
|
8 | 8 | #include <string.h>
|
9 | 9 |
|
| 10 | +#ifndef XOR_SORT_ITERATIONS |
| 11 | +#define XOR_SORT_ITERATIONS 10 // after 10 iterations, we sort and remove duplicates |
| 12 | +#endif |
| 13 | + |
10 | 14 | #ifndef XOR_MAX_ITERATIONS
|
11 | 15 | #define XOR_MAX_ITERATIONS 100 // probabillity of success should always be > 0.5 so 100 iterations is highly unlikely
|
12 | 16 | #endif
|
13 | 17 |
|
| 18 | + |
| 19 | +static int xor_cmpfunc(const void * a, const void * b) { |
| 20 | + return ( *(const uint64_t*)a - *(const uint64_t*)b ); |
| 21 | +} |
| 22 | + |
| 23 | +static size_t xor_sort_and_remove_dup(uint64_t* keys, size_t length) { |
| 24 | + qsort(keys, length, sizeof(uint64_t), xor_cmpfunc); |
| 25 | + size_t j = 0; |
| 26 | + for(size_t i = 1; i < length; i++) { |
| 27 | + if(keys[i] != keys[i-1]) { |
| 28 | + keys[j] = keys[i]; |
| 29 | + j++; |
| 30 | + } |
| 31 | + } |
| 32 | + return j+1; |
| 33 | +} |
14 | 34 | /**
|
15 | 35 | * We assume that you have a large set of 64-bit integers
|
16 | 36 | * and you want a data structure to do membership tests using
|
@@ -421,10 +441,10 @@ static inline uint32_t xor_flushone_decrement_buffer(xor_setbuffer_t *buffer,
|
421 | 441 |
|
422 | 442 | // Construct the filter, returns true on success, false on failure.
|
423 | 443 | // The algorithm fails when there is insufficient memory.
|
424 |
| -// The caller is responsable for calling binary_fuse8_allocate(size,filter) |
| 444 | +// The caller is responsable for calling xor8_allocate(size,filter) |
425 | 445 | // before. For best performance, the caller should ensure that there are not too
|
426 | 446 | // many duplicated keys.
|
427 |
| -static inline bool xor8_buffered_populate(const uint64_t *keys, uint32_t size, xor8_t *filter) { |
| 447 | +static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t *filter) { |
428 | 448 | if(size == 0) { return false; }
|
429 | 449 | uint64_t rng_counter = 1;
|
430 | 450 | filter->seed = xor_rng_splitmix64(&rng_counter);
|
@@ -470,12 +490,12 @@ static inline bool xor8_buffered_populate(const uint64_t *keys, uint32_t size, x
|
470 | 490 |
|
471 | 491 | while (true) {
|
472 | 492 | iterations ++;
|
| 493 | + if(iterations == XOR_SORT_ITERATIONS) { |
| 494 | + size = xor_sort_and_remove_dup(keys, size); |
| 495 | + } |
473 | 496 | if(iterations > XOR_MAX_ITERATIONS) {
|
474 | 497 | // The probability of this happening is lower than the
|
475 |
| - // the cosmic-ray probability (i.e., a cosmic ray corrupts your system), |
476 |
| - // but if it happens, we just fill the fingerprint with ones which |
477 |
| - // will flag all possible keys as 'possible', ensuring a correct result. |
478 |
| - memset(filter->fingerprints, ~0, 3 * filter->blockLength); |
| 498 | + // the cosmic-ray probability (i.e., a cosmic ray corrupts your system). |
479 | 499 | xor_free_buffer(&buffer0);
|
480 | 500 | xor_free_buffer(&buffer1);
|
481 | 501 | xor_free_buffer(&buffer2);
|
@@ -632,10 +652,10 @@ static inline bool xor8_buffered_populate(const uint64_t *keys, uint32_t size, x
|
632 | 652 |
|
633 | 653 | // Construct the filter, returns true on success, false on failure.
|
634 | 654 | // The algorithm fails when there is insufficient memory.
|
635 |
| -// The caller is responsable for calling binary_fuse8_allocate(size,filter) |
| 655 | +// The caller is responsable for calling xor8_allocate(size,filter) |
636 | 656 | // before. For best performance, the caller should ensure that there are not too
|
637 | 657 | // many duplicated keys.
|
638 |
| -static inline bool xor8_populate(const uint64_t *keys, uint32_t size, xor8_t *filter) { |
| 658 | +static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter) { |
639 | 659 | if(size == 0) { return false; }
|
640 | 660 | uint64_t rng_counter = 1;
|
641 | 661 | filter->seed = xor_rng_splitmix64(&rng_counter);
|
@@ -668,12 +688,12 @@ static inline bool xor8_populate(const uint64_t *keys, uint32_t size, xor8_t *fi
|
668 | 688 |
|
669 | 689 | while (true) {
|
670 | 690 | iterations ++;
|
| 691 | + if(iterations == XOR_SORT_ITERATIONS) { |
| 692 | + size = xor_sort_and_remove_dup(keys, size); |
| 693 | + } |
671 | 694 | if(iterations > XOR_MAX_ITERATIONS) {
|
672 | 695 | // The probability of this happening is lower than the
|
673 |
| - // the cosmic-ray probability (i.e., a cosmic ray corrupts your system), |
674 |
| - // but if it happens, we just fill the fingerprint with ones which |
675 |
| - // will flag all possible keys as 'possible', ensuring a correct result. |
676 |
| - memset(filter->fingerprints, ~0, 3 * filter->blockLength); |
| 696 | + // the cosmic-ray probability (i.e., a cosmic ray corrupts your system). |
677 | 697 | free(sets);
|
678 | 698 | free(Q);
|
679 | 699 | free(stack);
|
@@ -839,10 +859,10 @@ static inline bool xor8_populate(const uint64_t *keys, uint32_t size, xor8_t *fi
|
839 | 859 |
|
840 | 860 | // Construct the filter, returns true on success, false on failure.
|
841 | 861 | // The algorithm fails when there is insufficient memory.
|
842 |
| -// The caller is responsable for calling binary_fuse8_allocate(size,filter) |
| 862 | +// The caller is responsable for calling xor16_allocate(size,filter) |
843 | 863 | // before. For best performance, the caller should ensure that there are not too
|
844 | 864 | // many duplicated keys.
|
845 |
| -static inline bool xor16_buffered_populate(const uint64_t *keys, uint32_t size, xor16_t *filter) { |
| 865 | +static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_t *filter) { |
846 | 866 | if(size == 0) { return false; }
|
847 | 867 | uint64_t rng_counter = 1;
|
848 | 868 | filter->seed = xor_rng_splitmix64(&rng_counter);
|
@@ -888,12 +908,12 @@ static inline bool xor16_buffered_populate(const uint64_t *keys, uint32_t size,
|
888 | 908 |
|
889 | 909 | while (true) {
|
890 | 910 | iterations ++;
|
| 911 | + if(iterations == XOR_SORT_ITERATIONS) { |
| 912 | + size = xor_sort_and_remove_dup(keys, size); |
| 913 | + } |
891 | 914 | if(iterations > XOR_MAX_ITERATIONS) {
|
892 | 915 | // The probability of this happening is lower than the
|
893 |
| - // the cosmic-ray probability (i.e., a cosmic ray corrupts your system), |
894 |
| - // but if it happens, we just fill the fingerprint with ones which |
895 |
| - // will flag all possible keys as 'possible', ensuring a correct result. |
896 |
| - memset(filter->fingerprints, ~0, 3 * filter->blockLength * sizeof(uint16_t)); |
| 916 | + // the cosmic-ray probability (i.e., a cosmic ray corrupts your system)é |
897 | 917 | xor_free_buffer(&buffer0);
|
898 | 918 | xor_free_buffer(&buffer1);
|
899 | 919 | xor_free_buffer(&buffer2);
|
@@ -1053,10 +1073,10 @@ static inline bool xor16_buffered_populate(const uint64_t *keys, uint32_t size,
|
1053 | 1073 |
|
1054 | 1074 | // Construct the filter, returns true on success, false on failure.
|
1055 | 1075 | // The algorithm fails when there is insufficient memory.
|
1056 |
| -// The caller is responsable for calling binary_fuse8_allocate(size,filter) |
| 1076 | +// The caller is responsable for calling xor16_allocate(size,filter) |
1057 | 1077 | // before. For best performance, the caller should ensure that there are not too
|
1058 | 1078 | // many duplicated keys.
|
1059 |
| -static inline bool xor16_populate(const uint64_t *keys, uint32_t size, xor16_t *filter) { |
| 1079 | +static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter) { |
1060 | 1080 | if(size == 0) { return false; }
|
1061 | 1081 | uint64_t rng_counter = 1;
|
1062 | 1082 | filter->seed = xor_rng_splitmix64(&rng_counter);
|
@@ -1090,16 +1110,16 @@ static inline bool xor16_populate(const uint64_t *keys, uint32_t size, xor16_t *
|
1090 | 1110 |
|
1091 | 1111 | while (true) {
|
1092 | 1112 | iterations ++;
|
| 1113 | + if(iterations == XOR_SORT_ITERATIONS) { |
| 1114 | + size = xor_sort_and_remove_dup(keys, size); |
| 1115 | + } |
1093 | 1116 | if(iterations > XOR_MAX_ITERATIONS) {
|
1094 | 1117 | // The probability of this happening is lower than the
|
1095 |
| - // the cosmic-ray probability (i.e., a cosmic ray corrupts your system), |
1096 |
| - // but if it happens, we just fill the fingerprint with ones which |
1097 |
| - // will flag all possible keys as 'possible', ensuring a correct result. |
1098 |
| - memset(filter->fingerprints, ~0, 3 * filter->blockLength * sizeof(uint16_t)); |
| 1118 | + // the cosmic-ray probability (i.e., a cosmic ray corrupts your system). |
1099 | 1119 | free(sets);
|
1100 | 1120 | free(Q);
|
1101 | 1121 | free(stack);
|
1102 |
| - return true; |
| 1122 | + return false; |
1103 | 1123 | }
|
1104 | 1124 |
|
1105 | 1125 | memset(sets, 0, sizeof(xor_xorset_t) * arrayLength);
|
|
0 commit comments