reading

Tools for reading input
git clone git://www.tkruger.se/reading.git
Log | Files | Refs | README

commit 5c6586f1703bc59e2a6c5b97ffa213bdce524db5
Author: olikru <olikru@tkruger.se>
Date:   Mon,  8 Jan 2024 12:02:43 +0100

initial

Diffstat:
AMakefile | 40++++++++++++++++++++++++++++++++++++++++
AREADME | 5+++++
Areading.c | 158+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Areading.h | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest_reading.c | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atestfiles/testfile1.txt | 3+++
Atestfiles/testfile2.txt | 2++
7 files changed, 363 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,40 @@ +.SUFFIXES: .c .o .so +CC=clang +CFLAGS+=-std=c99 -pedantic -Wall -Werror -Wstrict-prototypes +CFLAGS+=-Wmissing-prototypes -Wmissing-declarations -Wshadow +CFLAGS+=-Wpointer-arith -Wcast-qual -Wsign-compare +CFLAGS+=-O2 -g +CFLAGS+=-fstack-protector-all -Wtype-limits -fno-common +CFLAGS+=-fno-builtin +CFLAGS+=-I/usr/local/include + +INSTALL_PATH=$(HOME)/.local +BUILD=build + +TEST_SOURCE=test_reading.c +HEADER=reading.h +OBJS=reading.o +SHARED=reading.so +LIBSHARED=libreading.so + +all: build $(OBJS) $(SHARED) test + +.c.o: + $(CC) $(CFLAGS) -c $< -o $(BUILD)/$@ + +.o.so: + $(CC) -shared -fPIC $(BUILD)/$< -o $(BUILD)/$@ + +test: $(TEST_SOURCE) + $(CC) $(CFLAGS) -o $(BUILD)/test $(TEST_SOURCE) $(BUILD)/$(OBJS) + +build: + mkdir -p $(BUILD) + +install: + cp $(BUILD)/$(SHARED) $(INSTALL_PATH)/lib/$(LIBSHARED) + chmod 644 $(INSTALL_PATH)/lib/$(LIBSHARED) + cp $(HEADER) $(INSTALL_PATH)/include/ + +clean: + rm -rf $(BUILD) diff --git a/README b/README @@ -0,0 +1,5 @@ +reading +======= + +Small library for input related functions. Basicly a quick way to get +get input from a file. diff --git a/reading.c b/reading.c @@ -0,0 +1,158 @@ +#include "reading.h" + +size_t readall(char **output, char *filename) { + FILE *f = NULL; + + f = fopen(filename, "r"); + + if (f == NULL) { + perror("Error when opening file"); + exit(EXIT_FAILURE); + } + + char buffer[MAXIMUM_FILESIZE_BYTES]; + size_t read_size = fread(buffer, 1, MAXIMUM_FILESIZE_BYTES, f); + + if (read_size == MAXIMUM_FILESIZE_BYTES) { + perror("File is to big?!"); + exit(EXIT_FAILURE); + } + + *output = (char *)malloc(read_size + 1); + memcpy(*output, buffer, read_size); + (*output)[read_size] = '\0'; + + return read_size; +} + +size_t readlines(char ***lines, char *filename) { + FILE *f = fopen(filename, "r"); + + if (f == NULL) { + perror("Error when opening file"); + exit(EXIT_FAILURE); + } + + size_t tl_nalloc = LINES_ALLOC_MIN; + size_t ntl = 0; + char **tl = calloc(tl_nalloc, sizeof(*tl)); + + char line_buffer[MAXIMUM_LINE_BYTES]; + while (fgets(line_buffer, MAXIMUM_LINE_BYTES, f)) { + if (ntl == tl_nalloc) { + tl_nalloc <<= 1; + tl = realloc(tl, tl_nalloc * sizeof(*tl)); + } + + tl[ntl] = strdup(line_buffer); + + ntl++; + } + + tl = realloc(tl, ntl * sizeof(*tl)); + *lines = tl; + + return (ntl); +} + +int read_next_u64(uint64_t *n, FILE *fp) { + int r, c = 0; + + // skip to next numerical char + while ((r = fgetc(fp)) != EOF) { + if ((int)'0' <= r && r <= (int)'9') + break; + } + if (r == EOF) + return EOF; + + // read an unsigned int + *n = (uint64_t)(r - (int)'0'); + c++; + while ((r = fgetc(fp)) != EOF) { + if ((int)'0' <= r && r <= (int)'9') { + *n = (*n) * 10UL + (uint64_t)(r - (int)'0'); + c++; + } else { + break; + } + } + + return c; +} + +char *sread_next_u64(uint64_t *n, char *s) { + char *fc = s; + + // skip to next numerical char + while (*fc != '\0') { + if ('0' <= *fc && *fc <= '9') + break; + fc++; + } + if (*fc == '\0') + return NULL; + + // read an unsigned int + *n = (uint64_t)(*fc - '0'); + fc++; + while (*fc != EOF) { + if ('0' <= *fc && *fc <= '9') { + *n = (*n) * 10UL + (uint64_t)(*fc - '0'); + } else { + break; + } + fc++; + } + + if (*fc == '\0') + return NULL; + + return fc; +} + +char *sread_next_i64(int64_t *n, char *s) { + char *fc = s; + + // skip to the next numerival char or - + while (*fc != '\0') { + if (('0' <= *fc && *fc <= '9') || *fc == '-') + break; + fc++; + } + if (*fc == '\0') + return NULL; + + // -1 if starts with '-', otherwise +1 + int64_t sign = 1; + if (*fc == '-') { + sign = -1; + fc++; + // if no following digit this is interpreted as -0 == 0. + if (*fc < '0' || *fc > '9') { + *n = 0; + if (*fc == '\0') + return NULL; + return fc; + } + } + + // read an unsigned int, starts with digit + *n = (int64_t)(*fc - '0'); + fc++; + while (*fc != EOF) { + if ('0' <= *fc && *fc <= '9') { + *n = (*n) * 10L + (int64_t)(*fc - '0'); + } else { + break; + } + fc++; + } + // apply sign + *n *= sign; + + if (*fc == '\0') + return NULL; + + return fc; +} diff --git a/reading.h b/reading.h @@ -0,0 +1,82 @@ +#ifndef READING_H +#define READING_H + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define MAXIMUM_FILESIZE_BYTES 256000 +#define MAXIMUM_LINE_BYTES 256000 +#define LINES_ALLOC_MIN 2048 + +/** + * Read all chars from a file. + * + * Reads an entire file (assuming its size is < MAXIMUM_FILESIZE_BYTES). + * This allocates memory that needs to be free'd. + * + * @param output pointer to the address where the allocated string + * should be written + * @param filename the name of the file + * @return the number of bytes read (excluding final null byte) + */ +size_t readall(char **output, char *filename); + +/** + * Read all lines from a file. + * + * Reads a file, line-by-line (assuming no line is longer than + * MAXIMUM_LINE_BYTES). + * Allocates memory for each line that needs to be free'd. + * + * @param lines pointer to the address where the list of pointers + * should be written + * @param filename the name of the file + * @return the number of lines read + */ +size_t readlines(char ***lines, char *filename); + +/** + * Read the next uint64_t from FILE ptr. + * + * This has no checks for overflows, if it doesnt fit it will have + * undefined behaviour. Returns EOF if EOF is found before any + * uints. + * + * @param n pointer to where to write result + * @param fp file pointer to read from + * @returns the number of characters read as part of u64 + */ +int read_next_u64(uint64_t *n, FILE *fp); + +/** + * Read the next uin64_t from string. + * + * This assumes that the string is a well-formatted null-terminated + * string. Returns a pointer to the * first char after the uint64 in + * the string, or NULL if this is not a part of the string or if it + * failed to read an uint64_t. + * + * @param n pointer to where to write the result + * @param s the string + * @returns pointer to next char in string, or NULL + */ +char *sread_next_u64(uint64_t *n, char *s); + +/** + * Read the next int64_t from string. + * + * This assumes that the string is a well-formatted null-terminated + * string. Returns a pointer to the first char after the uint64 in + * the string, or NULL if this is not a part of the string or if it + * failed to read an int64_t. Substrings '-' that are not followed + * by a digit are interpreted as 0. + * + * @param n pointer to where to write the result + * @param s the string + * @returns pointer to next char in string, or NULL + */ +char *sread_next_i64(int64_t *n, char *s); + +#endif diff --git a/test_reading.c b/test_reading.c @@ -0,0 +1,73 @@ +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include "reading.h" + +static void test_reading_readall() { + char *read; + size_t n = readall(&read, "testfiles/testfile1.txt"); + + assert(n == 49); + assert( + strcmp(read, "Hello, this is a test file.\nIt contains two rows.") == 0); + + free(read); +} + +static void test_reading_readlines() { + char **read; + size_t n = readlines(&read, "testfiles/testfile1.txt"); + + assert(n == 2); + assert(strcmp(read[0], "Hello, this is a test file.\n") == 0); + assert(strcmp(read[1], "It contains two rows.") == 0); + + size_t i; + for (i = 0; i < n; i++) + free(read[i]); + free(read); +} + +static void test_reading_read_next_u64() { + FILE *fp = fopen("testfiles/testfile2.txt", "r"); + + uint64_t r; + assert(read_next_u64(&r, fp) == 4); + assert(r == 4711); + assert(read_next_u64(&r, fp) == 2); + assert(r == 65); + assert(read_next_u64(&r, fp) == 4); + assert(r == 1337); + assert(read_next_u64(&r, fp) == 1); + assert(r == 1); + assert(read_next_u64(&r, fp) == 1); + assert(r == 0); + + fclose(fp); +} + +static void test_reading_sread_next_u64() { + char tst[] = "foobarm4711asd65,1337.1\n0"; + char *n = tst; + uint64_t r; + + assert((n = sread_next_u64(&r, n)) != NULL); + assert(r == 4711); + assert((n = sread_next_u64(&r, n)) != NULL); + assert(r == 65); + assert((n = sread_next_u64(&r, n)) != NULL); + assert(r == 1337); + assert((n = sread_next_u64(&r, n)) != NULL); + assert(r == 1); + assert((n = sread_next_u64(&r, n)) == NULL); + assert(r == 0); +} + +int main() { + test_reading_readall(); + test_reading_readlines(); + test_reading_read_next_u64(); + test_reading_sread_next_u64(); + + printf("test ok\n"); +} diff --git a/testfiles/testfile1.txt b/testfiles/testfile1.txt @@ -0,0 +1,2 @@ +Hello, this is a test file. +It contains two rows. +\ No newline at end of file diff --git a/testfiles/testfile2.txt b/testfiles/testfile2.txt @@ -0,0 +1,2 @@ +4711ashdha65---1337asd +addasda1asdl0asd