From 3019338ae9ed53fc41020535e2f0fc36dec42603 Mon Sep 17 00:00:00 2001 From: norly Date: Sun, 14 Jul 2019 23:42:54 +0200 Subject: [PATCH] lookup: Clean up code a bit --- Makefile | 2 +- src/lookup-incoming.c | 135 ++++++++++++++++++++---------------------- 2 files changed, 65 insertions(+), 72 deletions(-) diff --git a/Makefile b/Makefile index 960fcd7..0dd478f 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ lookuptools: $(BINDIR)/lookup-incoming $(BINDIR)/lookup-incoming: $(SRCDIR)/lookup-incoming.c mkdir -p $(BINDIR) - $(GCC) -o $@ $< + $(GCC) -Wall -o $@ $< diff --git a/src/lookup-incoming.c b/src/lookup-incoming.c index 2d3fa9c..77ffda4 100644 --- a/src/lookup-incoming.c +++ b/src/lookup-incoming.c @@ -6,9 +6,11 @@ #include +/* + * Bitfield + */ #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) - typedef __uint32_t bitfield_type; #define BITS_PER_BITFIELD 32 #define BITS_PER_BITFIELD_LOG 5 @@ -30,8 +32,11 @@ int ANY_BITS_SET(bitfield_type *set, size_t elems) } -typedef __uint32_t art_id; +/* + * Misc helpers + */ +typedef __uint32_t art_id; static int @@ -41,24 +46,52 @@ cmpstring_p_pp(const void *p1, const void *p2) } +void* load_file(char *path) +{ + FILE *in_file; + void *blob; + size_t blob_len; + + in_file = fopen(path, "rb"); + + fseek(in_file, 0, SEEK_END); + blob_len = ftell(in_file); + rewind(in_file); + printf(" blob size: %zd bytes.\n", blob_len); + + blob = malloc(blob_len); + if (!blob) { + printf("Failed to allocate memory.\n"); + return NULL; + } + if (blob_len != fread(blob, 1, blob_len, in_file)) { + printf("Failed to read entire file in one go.\n"); + fclose(in_file); + return NULL; + } + printf(" blob read :%zd bytes.\n", ftell(in_file)); + fclose(in_file); + return blob; +} + + + +/* + * Main + */ int main(int argc, char **argv) { - FILE *in_file; - FILE *out_file; + art_id *link_blob; + art_id **linki; + art_id *linkis; char *title_blob; - size_t title_blob_len; char **title = NULL; art_id titles; art_id titles_read = 0; - art_id *link_blob; - size_t link_blob_len; - art_id **linki; - art_id *linkis; - char **cur_title; art_id title_id; @@ -79,29 +112,12 @@ int main(int argc, char **argv) } - /* - * Read all incoming links into memory - */ - - in_file = fopen("links-incoming.bin", "rb"); - fseek(in_file, 0, SEEK_END); - link_blob_len = ftell(in_file); - rewind(in_file); - printf("Link blob size: %zd bytes.\n", link_blob_len); - - link_blob = malloc(link_blob_len); - if (!link_blob) { - printf("Failed to allocate memory.\n"); - return 1; - } - - if (link_blob_len != fread(link_blob, 1, link_blob_len, in_file)) { - printf("Failed to read entire file in one go.\n"); + /* Read all incoming links into memory */ + link_blob = load_file("links-incoming.bin"); + if (!link_blob) return 1; - } - printf("Link blob read (%zd bytes).\n", ftell(in_file)); - fclose(in_file); + /* Parse blob */ titles = *link_blob; link_blob++; @@ -109,62 +125,39 @@ int main(int argc, char **argv) linkis = malloc(titles * sizeof(art_id)); for (i = 0; i < titles; i++) { - art_id j; - linkis[i] = *link_blob; link_blob++; linki[i] = link_blob; link_blob += linkis[i]; } - printf("Incoming links prepared.\n"); - - - - - - /* - * Read all titles into memory - */ - - + printf("Incoming links parsed: %d\n", titles); - in_file = fopen("titles-sorted.txt", "r"); - fseek(in_file, 0, SEEK_END); - title_blob_len = ftell(in_file); - rewind(in_file); - printf("Title blob size: %zd bytes.\n", title_blob_len); - title_blob = malloc(title_blob_len + 1); - if (!title_blob) { - printf("Failed to allocate memory.\n"); + /* Read all titles into memory */ + title_blob = load_file("titles-sorted.txt"); + if (!title_blob) return 1; - } - - if (title_blob_len != fread(title_blob, 1, title_blob_len, in_file)) { - printf("Failed to read entire file in one go.\n"); - return 1; - } - printf("Title blob read (%zd bytes).\n", ftell(in_file)); - fclose(in_file); - - title_blob[title_blob_len] = '\0'; + /* Parse blob */ title = malloc((titles + 1) * sizeof(title[0])); title[0] = title_blob; titles_read = 1; - while (title_blob = strchr(title_blob, '\n')) { + while ((title_blob = strchr(title_blob, '\n'))) { title_blob[0] = '\0'; title_blob++; + + if (title_blob[0] == '\0') + /* Reached empty line at end of file */ + continue; + title[titles_read] = title_blob; titles_read++; } /* Last title will be an empty string, as the file ends with a newline. */ - printf("Titles prepared: %d of %d.\n", titles_read - 1, titles); - - + printf("Titles parsed: %d of %d.\n", titles_read, titles); /* Look up article ID */ @@ -190,13 +183,13 @@ int main(int argc, char **argv) #endif - - printf("\n\n\n\nBuilding table of distances...\n\n"); + /* Main part */ + printf("\n\n\n\nFlooding graph...\n\n"); titles_bitfield_elems = ROUNDUP(titles / BITS_PER_BITFIELD, BITS_PER_BITFIELD); titles_bitfield_bytes = titles_bitfield_elems * sizeof(bitfield_type); -BENCHMARK: +//BENCHMARK: titles_seen = calloc(titles_bitfield_bytes, 1); titles_prev_round = NULL; @@ -235,11 +228,11 @@ BENCHMARK: } } - /* Result is in titles_prev_round */ + /* Result is now in titles_prev_round */ cur_dist -= 1; - printf("\n\n\nThe articles with the distance of %zd are:\n\n", cur_dist); + printf("\n\n\nThe articles with a distance of %d are:\n\n", cur_dist); for (i = 0; i < titles; i++) { if (BIT_ISSET(titles_prev_round, i)) { printf(" %s\n", title[i]); -- 2.30.2