commit 39119945196c7b7275db1f3c641bbdc05fb35e0e
parent 6f32334f82c55b554d23e5a9ed5388d028677977
Author: olikru <olikru@tkruger.se>
Date: Mon, 2 Sep 2024 16:29:32 +0200
working bloomer
Diffstat:
| M | Makefile | | | 8 | ++++++-- |
| M | bloom.c | | | 71 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------- |
| A | bloom.h | | | 25 | +++++++++++++++++++++++++ |
| M | test_angrepp.c | | | 62 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | tools/bloomer.c | | | 129 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 277 insertions(+), 18 deletions(-)
diff --git a/Makefile b/Makefile
@@ -31,7 +31,7 @@ fmpzio.o\
wiener.o\
prodtree.o\
batchgcd.o\
-bsgs.o
+bloom.o
TOOLS=\
tools/hnpsolve\
tools/lcgfloyd\
@@ -40,7 +40,8 @@ tools/pierre\
tools/varmkorv\
tools/small\
tools/batchgcd\
-tools/pelofskegcd
+tools/pelofskegcd\
+tools/bloomer
PRECOMPUTERS=\
precomputers/header
TEST=test
@@ -89,6 +90,9 @@ tools/batchgcd: $(OBJ) $(@:S/$/.c/)
tools/pelofskegcd: $(OBJ) $(@:S/$/.c/)
$(CC) -I. $(CFLAGS) -o $@ $(TOOLS_DIR)/pelofskegcd.c $(OBJ) $(LDFLAGS)
+tools/bloomer: $(OBJ) $(@:S/$/.c/)
+ $(CC) -I. $(CFLAGS) -o $@ $(TOOLS_DIR)/bloomer.c $(OBJ) $(LDFLAGS)
+
tools/dbtool: $(OBJ) $(@:S/$/.c/)
$(CC) -I. $(CFLAGS) -o $@ $(TOOLS_DIR)/dbtool.c $(OBJ) $(LDFLAGS) -lsqlite3
diff --git a/bloom.c b/bloom.c
@@ -1,9 +1,6 @@
-typedef struct {
- size_t n;
- uint8_t* buffer;
-} bitvec_t;
+#include "bloom.h"
-void
+static inline void
bitvec_init(bitvec_t *bv, const size_t size)
{
assert(size < (0xffffffff - 7));
@@ -14,13 +11,13 @@ bitvec_init(bitvec_t *bv, const size_t size)
bv->buffer = calloc(size_bytes, sizeof(*bv->buffer));
}
-void
+static inline void
bitvec_clear(bitvec_t *bv)
{
free(bv->buffer);
}
-void
+static inline void
bitvec_set(bitvec_t *bv, const size_t index)
{
assert(index < bv->n);
@@ -32,6 +29,7 @@ bitvec_set(bitvec_t *bv, const size_t index)
bv->buffer[byte_index] |= mask;
}
+/*
void
bitvec_unset(bitvec_t *bv, const size_t index)
{
@@ -44,17 +42,18 @@ bitvec_unset(bitvec_t *bv, const size_t index)
bv->buffer[byte_index] &= mask;
}
+*/
-int
+static int
bitvec_is_set(const bitvec_t *bv, const size_t index)
{
assert(index < bv->n);
const size_t byte_index = index >> 3;
- const size_t bit_index = index % 7;
+ const size_t bit_index = index % 8;
const uint8_t mask = 0x80 >> bit_index;
- if (bv->buffer[byte_index] & mask != 0) {
+ if ((bv->buffer[byte_index] & mask) != 0) {
return 1;
}
@@ -73,21 +72,61 @@ mix64(uint64_t *out, uint64_t in)
}
static inline uint64_t
-hash_fmpz(uint64_t seed, fmpz_t n)
+hash_fmpz(uint64_t seed, const fmpz_t n)
{
size_t i;
uint64_t ret = seed;
mpz_ptr p;
- mp_limb_t *limbs;
+ mp_srcptr limbs;
+ size_t msize;
- if(COEFF_IS_MPZ(n[0]) != 0) {
+ if (COEFF_IS_MPZ(n[0]) != 0) {
p = COEFF_TO_PTR(n[0]);
msize = mpz_size(p);
limbs = mpz_limbs_read(p);
- for(i = 0; i < msize; i++) {
- mix64(&ret, (uint64_t) limbs[i]);
+ for (i = 0; i < msize; i++) {
+ mix64(&ret, (uint64_t)limbs[i]);
}
} else {
- mix64(&ret, (uint64_t) n[0]);
+ mix64(&ret, (uint64_t)n[0]);
}
+
+ return ret;
+}
+
+void
+bloom_init(bloom_t *bloom, const size_t k, const size_t m)
+{
+ bloom->k = k;
+ bitvec_init(&bloom->bv, m);
+}
+
+void
+bloom_clear(bloom_t *bloom)
+{
+ bitvec_clear(&bloom->bv);
+}
+
+void
+bloom_insert(bloom_t *bloom, const fmpz_t n)
+{
+ uint64_t i;
+
+ for (i = 0; i < bloom->k; i++) {
+ bitvec_set(&bloom->bv, hash_fmpz(i, n) % bloom->bv.n);
+ }
+}
+
+int
+bloom_prob_contains(const bloom_t *bloom, const fmpz_t n)
+{
+ uint64_t i;
+
+ for (i = 0; i < bloom->k; i++) {
+ if (!bitvec_is_set(&bloom->bv, hash_fmpz(i, n) % bloom->bv.n)) {
+ return 0;
+ }
+ }
+
+ return 1;
}
diff --git a/bloom.h b/bloom.h
@@ -0,0 +1,25 @@
+#ifndef BLOOM_H
+#define BLOOM_H
+
+#include <assert.h>
+#include <flint.h>
+#include <fmpz.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef struct {
+ size_t n;
+ uint8_t *buffer;
+} bitvec_t;
+
+typedef struct {
+ size_t k; // number of hash functions
+ bitvec_t bv;
+} bloom_t;
+
+void bloom_init(bloom_t *bloom, const size_t k, const size_t m);
+void bloom_clear(bloom_t *bloom);
+void bloom_insert(bloom_t *bloom, const fmpz_t n);
+int bloom_prob_contains(const bloom_t *bloom, const fmpz_t n);
+
+#endif
diff --git a/test_angrepp.c b/test_angrepp.c
@@ -12,6 +12,7 @@
#include "wiener.h"
#include "hnp.h"
#include "context.h"
+#include "bloom.h"
static uint64_t
test_f(uint64_t x)
@@ -186,6 +187,66 @@ test_hidden_number_problem(void)
fmpz_clear(B);
}
+static void
+test_bloom(void)
+{
+ const uint64_t k = 10;
+ const uint64_t m = 1000000;
+ const uint64_t n = 800;
+ size_t i;
+ bloom_t bloom;
+
+ bloom_init(&bloom, k, m);
+
+ flint_rand_t state;
+ flint_randinit(state);
+
+ fmpz *v = _fmpz_vec_init(n);
+ for (i = 0; i < n; i++) {
+ fmpz_randtest_unsigned(&v[i], state, 4096);
+ }
+
+ for (i = 0; i < n; i++) {
+ bloom_insert(&bloom, &v[i]);
+ }
+
+ for (i = 0; i < n; i++) {
+ assert(bloom_prob_contains(&bloom, &v[i]));
+ }
+
+ fmpz_t t;
+ fmpz_init(t);
+
+ size_t loops = 0;
+ int inlist;
+ while (loops < 10000) {
+ fmpz_randtest_unsigned(t, state, 4096);
+ inlist = 0;
+ for (i = 0; i < n; i++) {
+ if (fmpz_cmp(t, &v[i]) == 0) {
+ inlist = 1;
+ break;
+ }
+ }
+
+ if (inlist) {
+ continue;
+ }
+
+ if (bloom_prob_contains(&bloom, t)) {
+ fprintf(stderr, "An 1.0694557203850693e-29 per cent likely event occured, you must be very unlucky.\n");
+ }
+
+ loops++;
+ }
+
+ fmpz_clear(t);
+ flint_randclear(state);
+
+ bloom_clear(&bloom);
+ _fmpz_vec_clear(v, n);
+}
+
int
main(void)
{
@@ -196,6 +257,7 @@ main(void)
test_smallfactor_euclidean();
test_prodtree();
test_hidden_number_problem();
+ test_bloom();
printf("test ok\n");
}
diff --git a/tools/bloomer.c b/tools/bloomer.c
@@ -0,0 +1,129 @@
+#include <errno.h>
+#include <flint.h>
+#include <fmpz.h>
+#include <fmpz_vec.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "bloom.h"
+
+#define BIT_SIZE 4096
+
+static inline void
+print_usage(char *prog)
+{
+ printf("Usage: %s k m n\n"
+ " k: number of hash functions\n"
+ " m: size of bit vector\n"
+ " n: number of entries to insert\n");
+}
+
+int
+main(int argc, char *argv[])
+{
+ uint64_t k, m, n;
+ size_t i;
+ char *ep;
+ bloom_t bloom;
+
+ if (argc != 4) {
+ print_usage(argv[0]);
+ return 1;
+ }
+
+ errno = 0;
+ k = strtoull(argv[1], &ep, 10);
+ if (argv[1][0] == '\0' || *ep != '\0' ||
+ (k == ULLONG_MAX && errno == ERANGE)) {
+ fprintf(stderr,
+ "Could not interpret k as a 64 bit integer in base 10.");
+ return 1;
+ }
+
+ errno = 0;
+ m = strtoull(argv[2], &ep, 10);
+ if (argv[2][0] == '\0' || *ep != '\0' ||
+ (m == ULLONG_MAX && errno == ERANGE)) {
+ fprintf(stderr,
+ "Could not interpret m as a 64 bit integer in base 10.");
+ return 1;
+ }
+
+ errno = 0;
+ n = strtoull(argv[3], &ep, 10);
+ if (argv[3][0] == '\0' || *ep != '\0' ||
+ (n == ULLONG_MAX && errno == ERANGE)) {
+ fprintf(stderr,
+ "Could not interpret n as a 64 bit integer in base 10.");
+ return 1;
+ }
+
+ printf("[+] Running, with\n");
+ printf(" k = %llu, m = %llu, n = %llu\n", k, m, n);
+
+ bloom_init(&bloom, k, m);
+
+ printf("[+] Generating n random %lld bit integers...\n", BIT_SIZE);
+ flint_rand_t state;
+ flint_randinit(state);
+
+ fmpz *v = _fmpz_vec_init(n);
+ for (i = 0; i < n; i++) {
+ fmpz_randtest_unsigned(&v[i], state, BIT_SIZE);
+ }
+
+ printf("[+] Inserting the numbers in the Bloom filter...\n");
+ for (i = 0; i < n; i++) {
+ bloom_insert(&bloom, &v[i]);
+ }
+
+ printf("[+] Testing for the inserted numbers...\n");
+ for (i = 0; i < n; i++) {
+ if (!bloom_prob_contains(&bloom, &v[i])) {
+ fprintf(stderr, "This is not a bloom filter, %zu\n", i);
+ return 1;
+ }
+ }
+
+ fmpz_t t;
+ fmpz_init(t);
+
+ printf("[+] Testing the rate of false positives...\n");
+ size_t loops = 0;
+ size_t false_pos = 0;
+ int inlist;
+ while (1) {
+ fmpz_randtest_unsigned(t, state, BIT_SIZE);
+ inlist = 0;
+ for (i = 0; i < n; i++) {
+ if (fmpz_cmp(t, &v[i]) == 0) {
+ inlist = 1;
+ break;
+ }
+ }
+
+ if (inlist) {
+ continue;
+ }
+
+ if (bloom_prob_contains(&bloom, t)) {
+ false_pos++;
+ }
+
+ loops++;
+ if (loops % 100000 == 0) {
+ double x = (double)false_pos / (double)loops;
+ printf("%zu out of %zu (%llf per cent) false positive\n",
+ false_pos, loops, x * 100.0);
+ }
+ }
+
+ fmpz_clear(t);
+ flint_randclear(state);
+
+ bloom_clear(&bloom);
+ _fmpz_vec_clear(v, n);
+
+ return 0;
+}