smallset

Tools for subsets of [n], where n is not big
git clone git://www.tkruger.se/set_u64.git
Log | Files | Refs | README

commit b7eee3cedc9e9df775610e6f323909cc8b914a3a
Author: olikru <olikru@tkruger.se>
Date:   Mon,  8 Jan 2024 13:56:04 +0100

initial

Diffstat:
AMakefile | 40++++++++++++++++++++++++++++++++++++++++
AREADME | 12++++++++++++
Asmallset.c | 100+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asmallset.h | 132+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest_smallset.c | 224+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 508 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,40 @@ +.SUFFIXES: .c .o .so +CC=clang +CFLAGS+=-std=c99 -pedantic -Wall -Werror -Wstrict-prototypes +CFLAGS+=-Wmissing-prototypes -Wmissing-declarations -Wshadow +CFLAGS+=-Wpointer-arith -Wcast-qual -Wsign-compare +CFLAGS+=-O2 -g +CFLAGS+=-fstack-protector-all -Wtype-limits -fno-common +CFLAGS+=-fno-builtin +CFLAGS+=-I/usr/local/include + +INSTALL_PATH=$(HOME)/.local +BUILD=build + +TEST_SOURCE=test_smallset.c +HEADER=smallset.h +OBJS=smallset.o +SHARED=smallset.so +LIBSHARED=libsmallset.so + +all: build $(OBJS) $(SHARED) test + +.c.o: + $(CC) $(CFLAGS) -c $< -o $(BUILD)/$@ + +.o.so: + $(CC) -shared -fPIC $(BUILD)/$< -o $(BUILD)/$@ + +test: $(TEST_SOURCE) + $(CC) $(CFLAGS) -o $(BUILD)/test $(TEST_SOURCE) $(BUILD)/$(OBJS) + +build: + mkdir -p $(BUILD) + +install: + cp $(BUILD)/$(SHARED) $(INSTALL_PATH)/lib/$(LIBSHARED) + chmod 644 $(INSTALL_PATH)/lib/$(LIBSHARED) + cp $(HEADER) $(INSTALL_PATH)/include/ + +clean: + rm -rf $(BUILD) diff --git a/README b/README @@ -0,0 +1,12 @@ +stack_u64 +========= + +Implementation of subsets of {0,1,...,n-1} where n is small. This is +implemented as a n-element bit vector. + + typedef struct { + size_t nmemb; // number of entries in stack + size_t alloc; // number of entries currently allocated for + + uint64_t *data; // array of entries + } stack_u64_t; diff --git a/smallset.c b/smallset.c @@ -0,0 +1,100 @@ +#include "smallset.h" + +#define BIDX(x) (x / 8) +#define BMSK(x) (0x80 >> (x % 8)) +#define SIZE(b) ((b + 7) >> 3) + +void smallset_init(smallset_t *s, const size_t bits) { + s->bits = bits; + s->data = calloc(SIZE(bits), sizeof(*s->data)); + assert(sizeof(*s->data) == 1); +} + +void smallset_clear(smallset_t *s) { + s->bits = 0; + free(s->data); +} + +void smallset_insert(smallset_t *s, const uint64_t x) { + assert(x < s->bits); + s->data[BIDX(x)] |= BMSK(x); +} + +int smallset_lookup(smallset_t *s, const uint64_t x) { + assert(x < s->bits); + return s->data[BIDX(x)] & BMSK(x); +} + +void smallset_intersection(smallset_t *out, smallset_t *a, smallset_t *b) { + assert(a->bits == b->bits); + assert(out->bits == a->bits); + + size_t i; + for (i = 0; i < SIZE(out->bits); i++) { + out->data[i] = a->data[i] & b->data[i]; + } +} + +void smallset_minus(smallset_t *out, smallset_t *a, smallset_t *b) { + assert(a->bits == b->bits); + assert(out->bits == a->bits); + + size_t i; + for (i = 0; i < SIZE(out->bits); i++) { + out->data[i] = a->data[i] - (a->data[i] & b->data[i]); + } +} + +void smallset_tonstr(char *str, size_t n, smallset_t *s) { + int first = 1; + int u = snprintf(str, n, "{"); + + size_t i; + for (i = 0; i < s->bits; i++) { + if (smallset_lookup(s, i)) { + if (!first) { + u += snprintf(&str[u], n - u, ", %zu", i); + } else { + u += snprintf(&str[u], n - u, "%zu", i); + first = !first; + } + } + } + + snprintf(&str[u], n - u, "}"); +} + +uint64_t smallset_getone(smallset_t *s) { + uint64_t i; + for (i = 0; i < s->bits; i++) { + if (smallset_lookup(s, i)) + return i; + } + + return SMALLSET_IS_EMPTY; +} + +void smallset_empty(smallset_t *s) { memset(s->data, 0, SIZE(s->bits)); } + +uint64_t smallset_cardinality(smallset_t *s) { + uint64_t a; + uint64_t r = 0; + size_t i; + for (i = 0; i < SIZE(s->bits) - 8; i += 8) { + __asm__ volatile("POPCNT %1, %0" + : "=r"(a) + : "r"(*((uint64_t *)&s->data[i])) + :); + r += a; + } + + uint64_t tmp = 0; + while (i < SIZE(s->bits)) { + tmp = (tmp << 8) ^ ((uint64_t)s->data[i]); + i++; + } + __asm__ volatile("POPCNT %1, %0" : "=r"(a) : "r"(tmp) :); + r += a; + + return r; +} diff --git a/smallset.h b/smallset.h @@ -0,0 +1,132 @@ +/** + * Implementation of "small sets" (subsets of [n], where n is not big). + */ +#ifndef SMALLSET_H_ +#define SMALLSET_H_ + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define SMALLSET_IS_EMPTY UINT64_MAX + +typedef struct { + size_t bits; + uint8_t *data; +} smallset_t; + +/** + * Intializes a small set. + * + * The set is a subset of some fixed [n]. + * Allocates memory that needs to be smallset_clear'd. + * + * @param s pointer to the smallset_t to be inited + * @param bits specifies that the set is a subset of [bits] + */ +void smallset_init(smallset_t *s, const size_t bits); + +/** + * Clears a set. + * + * @param s pointer to the smallset_t to clear + */ +void smallset_clear(smallset_t *s); + +/** + * Insert an element into a set. + * + * Inserts an element x \in [n] into a smallset. If the + * element is already in the set the set is unchanged. + * Assertions will fail of x \not\in [n]. + * + * @param s pointer to the set to insert into + * @param x element to insert into the set + */ +void smallset_insert(smallset_t *s, const uint64_t x); + +/** + * Check if an element is in a set. + * + * Checks if the element x is in the set pointed to by + * s. Assumes that x \in [n] (or assertions will fail). + * + * @param s pointer to the set to check in + * @param x the element to check for + * @returns a nonzero value iff x is in the set + */ +int smallset_lookup(smallset_t *s, const uint64_t x); + +/** + * Compute the intersection of two sets. + * + * This assumes that all three sets are subsets of the + * same [n]. Otherwise assertions will fail. Note that + * any of the pointers are allowed to be equal. + * + * @param out pointer to the set where to write intersection + * @param a pointer to one of the intersect sets + * @param b pointer to the other intersect set + */ +void smallset_intersection(smallset_t *out, smallset_t *a, smallset_t *b); + +/** + * Compute the set subtraction of two sets. + * + * This assumes that all three sets are subsets of the + * same [n]. Otherwise assertions will fail. Note that + * any of the pointers are allowed to be equal. + * + * @param out pointer to the set where to write difference + * @param a pointer to set to subtract from + * @param b pointer to set to subtract + */ +void smallset_minus(smallset_t *out, smallset_t *a, smallset_t *b); + +/** + * Writes the smallset_t to a string. + * + * The string buffer must already be allocated, and the + * parameter n indicates the maximum number of bytes that + * may be written to the buffer, including termingating \0. + * + * String is reprensented in trad notation: e.g. + * {}, {1,3}, {0,16,19} + * + * @param str pointer to the first char of the write buffer + * @param n limit on the number of bytes to be written + * @param s pointer to the smallset + */ +void smallset_tonstr(char *str, size_t n, smallset_t *s); + +/** + * Get an element that is in the set. + * + * Returns the smallest element from the set. If no element + * is in the set (i.e. it is empty) then this returns the + * SMALLSET_IS_EMPTY value (== UINT64_MAX). + * + * @param s pointer to the smallset_t to get elem from + */ +uint64_t smallset_getone(smallset_t *s); + +/** + * Empty a set. + * + * Empties a set by removing all elements. + * + * @param s pointer to the smallset_t to empty + */ +void smallset_empty(smallset_t *s); + +/** + * Get the cardinality of a set. + * + * @param s the set to get the cardinality of + * @return the cardinality + */ +uint64_t smallset_cardinality(smallset_t *s); + +#endif diff --git a/test_smallset.c b/test_smallset.c @@ -0,0 +1,224 @@ +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include "smallset.h" + +static void test_smallset_init() { + smallset_t s; + smallset_init(&s, 17); + + size_t i; + for (i = 0; i < (17 + 7) / 8; i++) { + assert(s.data[i] == 0); + } + + smallset_clear(&s); +} + +static void test_smallset_insert() { + smallset_t s; + smallset_init(&s, 17); + + smallset_insert(&s, 3); + + assert(s.data[0] == 0x80 >> 3); + assert(s.data[1] == 0x00); + assert(s.data[2] == 0x00); + + smallset_clear(&s); +} + +static void test_smallset_lookup() { + smallset_t s; + smallset_init(&s, 17); + + smallset_insert(&s, 3); + smallset_insert(&s, 15); + smallset_insert(&s, 4); + + assert(smallset_lookup(&s, 0) == 0); + assert(smallset_lookup(&s, 1) == 0); + assert(smallset_lookup(&s, 2) == 0); + assert(smallset_lookup(&s, 3) != 0); + assert(smallset_lookup(&s, 4) != 0); + assert(smallset_lookup(&s, 5) == 0); + assert(smallset_lookup(&s, 6) == 0); + assert(smallset_lookup(&s, 7) == 0); + assert(smallset_lookup(&s, 8) == 0); + assert(smallset_lookup(&s, 9) == 0); + assert(smallset_lookup(&s, 10) == 0); + assert(smallset_lookup(&s, 11) == 0); + assert(smallset_lookup(&s, 12) == 0); + assert(smallset_lookup(&s, 13) == 0); + assert(smallset_lookup(&s, 14) == 0); + assert(smallset_lookup(&s, 15) != 0); + assert(smallset_lookup(&s, 16) == 0); + + smallset_clear(&s); +} + +static void test_smallset_intersection() { + smallset_t a, b, s; + smallset_init(&a, 17); + smallset_init(&b, 17); + smallset_init(&s, 17); + + smallset_insert(&a, 3); + smallset_insert(&a, 4); + smallset_insert(&a, 15); + smallset_insert(&a, 5); + smallset_insert(&a, 7); + + smallset_insert(&b, 3); + smallset_insert(&b, 4); + smallset_insert(&b, 15); + smallset_insert(&b, 6); + smallset_insert(&b, 9); + smallset_insert(&b, 10); + + smallset_intersection(&s, &a, &b); + + assert(smallset_lookup(&s, 0) == 0); + assert(smallset_lookup(&s, 1) == 0); + assert(smallset_lookup(&s, 2) == 0); + assert(smallset_lookup(&s, 3) != 0); + assert(smallset_lookup(&s, 4) != 0); + assert(smallset_lookup(&s, 5) == 0); + assert(smallset_lookup(&s, 6) == 0); + assert(smallset_lookup(&s, 7) == 0); + assert(smallset_lookup(&s, 8) == 0); + assert(smallset_lookup(&s, 9) == 0); + assert(smallset_lookup(&s, 10) == 0); + assert(smallset_lookup(&s, 11) == 0); + assert(smallset_lookup(&s, 12) == 0); + assert(smallset_lookup(&s, 13) == 0); + assert(smallset_lookup(&s, 14) == 0); + assert(smallset_lookup(&s, 15) != 0); + assert(smallset_lookup(&s, 16) == 0); + + smallset_clear(&s); +} + +static void test_smallset_minus() { + smallset_t a, b, s; + smallset_init(&a, 17); + smallset_init(&b, 17); + smallset_init(&s, 17); + + smallset_insert(&a, 3); + smallset_insert(&a, 4); + smallset_insert(&a, 15); + smallset_insert(&a, 5); + smallset_insert(&a, 7); + + smallset_insert(&b, 3); + smallset_insert(&b, 4); + smallset_insert(&b, 15); + smallset_insert(&b, 6); + smallset_insert(&b, 9); + smallset_insert(&b, 10); + + smallset_minus(&s, &a, &b); + + assert(smallset_lookup(&s, 0) == 0); + assert(smallset_lookup(&s, 1) == 0); + assert(smallset_lookup(&s, 2) == 0); + assert(smallset_lookup(&s, 3) == 0); + assert(smallset_lookup(&s, 4) == 0); + assert(smallset_lookup(&s, 5) != 0); + assert(smallset_lookup(&s, 6) == 0); + assert(smallset_lookup(&s, 7) != 0); + assert(smallset_lookup(&s, 8) == 0); + assert(smallset_lookup(&s, 9) == 0); + assert(smallset_lookup(&s, 10) == 0); + assert(smallset_lookup(&s, 11) == 0); + assert(smallset_lookup(&s, 12) == 0); + assert(smallset_lookup(&s, 13) == 0); + assert(smallset_lookup(&s, 14) == 0); + assert(smallset_lookup(&s, 15) == 0); + assert(smallset_lookup(&s, 16) == 0); + + smallset_clear(&s); +} + +static void test_smallset_tonstr() { + smallset_t s; + smallset_init(&s, 25); + + char buffer[256]; + + smallset_tonstr(buffer, 256, &s); + assert(strncmp(buffer, "{}", 2) == 0); + + smallset_insert(&s, 1); + smallset_tonstr(buffer, 256, &s); + assert(strncmp(buffer, "{1}", 3) == 0); + + smallset_insert(&s, 12); + smallset_tonstr(buffer, 256, &s); + assert(strncmp(buffer, "{1, 12}", 6) == 0); + + smallset_clear(&s); +} + +static void test_smallset_getone() { + smallset_t s; + smallset_init(&s, 17); + + assert(smallset_getone(&s) == SMALLSET_IS_EMPTY); + + smallset_insert(&s, 3); + smallset_insert(&s, 15); + smallset_insert(&s, 4); + + assert(smallset_getone(&s) == 3); + + smallset_clear(&s); +} + +static void test_smallset_empty() { + smallset_t s; + smallset_init(&s, 17); + + assert(smallset_getone(&s) == SMALLSET_IS_EMPTY); + + smallset_insert(&s, 3); + smallset_insert(&s, 15); + smallset_insert(&s, 4); + + assert(smallset_getone(&s) == 3); + + smallset_empty(&s); + + assert(smallset_getone(&s) == SMALLSET_IS_EMPTY); + + smallset_clear(&s); +} + +static void test_smallset_cardinality() { + smallset_t s; + smallset_init(&s, 7 * 50); + + uint64_t i, j; + for (i = 0; i < 50; i++) { + for (j = 0; j < 7; j++) { + smallset_insert(&s, i * 7 + j); + } + assert(smallset_cardinality(&s) == 7 * (i + 1)); + } + smallset_clear(&s); +} + +int main() { + test_smallset_init(); + test_smallset_insert(); + test_smallset_lookup(); + test_smallset_intersection(); + test_smallset_minus(); + test_smallset_tonstr(); + test_smallset_getone(); + test_smallset_empty(); + test_smallset_cardinality(); + + printf("test ok\n"); +}