diff options
author | norly <ny-git@enpas.org> | 2019-07-14 17:28:47 +0200 |
---|---|---|
committer | norly <ny-git@enpas.org> | 2019-07-14 17:28:47 +0200 |
commit | f0f54296b5b445c6ce0e47486bcdcb0deca582ff (patch) | |
tree | 35c3858bab40f4bf8f6c57e2d5522f17d2928511 /lookup-incoming.c | |
parent | 64907e38005ada5b2a545ae58f05d0fd616ffa79 (diff) |
Move to Makefile and .gitignore
Diffstat (limited to 'lookup-incoming.c')
-rw-r--r-- | lookup-incoming.c | 257 |
1 files changed, 0 insertions, 257 deletions
diff --git a/lookup-incoming.c b/lookup-incoming.c deleted file mode 100644 index 2d3fa9c..0000000 --- a/lookup-incoming.c +++ /dev/null @@ -1,257 +0,0 @@ -#include <assert.h> -#include <ctype.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> - - -#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) - - -typedef __uint32_t bitfield_type; -#define BITS_PER_BITFIELD 32 -#define BITS_PER_BITFIELD_LOG 5 - -#define BIT_SET(set, d) \ - ( (void) ( set[d >> BITS_PER_BITFIELD_LOG] |= (1 << (d & (BITS_PER_BITFIELD-1))) ) ) -#define BIT_ISSET(set, d) \ - (!(! ( set[d >> BITS_PER_BITFIELD_LOG] & (1 << (d & (BITS_PER_BITFIELD-1))) ) )) - -int ANY_BITS_SET(bitfield_type *set, size_t elems) -{ - size_t i; - - for (i = 0; i < elems; i++) - if (set[i] != 0) - return 1; - - return 0; -} - - -typedef __uint32_t art_id; - - - -static int -cmpstring_p_pp(const void *p1, const void *p2) -{ - return strcasecmp((char * const) p1, * (char * const *) p2); -} - - - - -int main(int argc, char **argv) -{ - FILE *in_file; - FILE *out_file; - - char *title_blob; - size_t title_blob_len; - char **title = NULL; - art_id titles; - art_id titles_read = 0; - - art_id *link_blob; - size_t link_blob_len; - art_id **linki; - art_id *linkis; - - char **cur_title; - art_id title_id; - - art_id cur_dist; - - art_id i; - - bitfield_type *titles_seen; - bitfield_type *titles_prev_round; - bitfield_type *titles_this_round; - size_t titles_bitfield_elems; - size_t titles_bitfield_bytes; - - - if (argc < 2) { - printf("Supply article name to look up articles linking to it.\n"); - return 1; - } - - - /* - * Read all incoming links into memory - */ - - in_file = fopen("links-incoming.bin", "rb"); - fseek(in_file, 0, SEEK_END); - link_blob_len = ftell(in_file); - rewind(in_file); - printf("Link blob size: %zd bytes.\n", link_blob_len); - - link_blob = malloc(link_blob_len); - if (!link_blob) { - printf("Failed to allocate memory.\n"); - return 1; - } - - if (link_blob_len != fread(link_blob, 1, link_blob_len, in_file)) { - printf("Failed to read entire file in one go.\n"); - return 1; - } - printf("Link blob read (%zd bytes).\n", ftell(in_file)); - fclose(in_file); - - titles = *link_blob; - link_blob++; - - linki = malloc(titles * sizeof(art_id*)); - linkis = malloc(titles * sizeof(art_id)); - - for (i = 0; i < titles; i++) { - art_id j; - - linkis[i] = *link_blob; - link_blob++; - - linki[i] = link_blob; - link_blob += linkis[i]; - } - printf("Incoming links prepared.\n"); - - - - - - /* - * Read all titles into memory - */ - - - - in_file = fopen("titles-sorted.txt", "r"); - fseek(in_file, 0, SEEK_END); - title_blob_len = ftell(in_file); - rewind(in_file); - printf("Title blob size: %zd bytes.\n", title_blob_len); - - title_blob = malloc(title_blob_len + 1); - if (!title_blob) { - printf("Failed to allocate memory.\n"); - return 1; - } - - if (title_blob_len != fread(title_blob, 1, title_blob_len, in_file)) { - printf("Failed to read entire file in one go.\n"); - return 1; - } - printf("Title blob read (%zd bytes).\n", ftell(in_file)); - fclose(in_file); - - title_blob[title_blob_len] = '\0'; - - title = malloc((titles + 1) * sizeof(title[0])); - - title[0] = title_blob; - titles_read = 1; - while (title_blob = strchr(title_blob, '\n')) { - title_blob[0] = '\0'; - title_blob++; - title[titles_read] = title_blob; - titles_read++; - } - - /* Last title will be an empty string, as the file ends with a newline. */ - printf("Titles prepared: %d of %d.\n", titles_read - 1, titles); - - - - - /* Look up article ID */ - cur_title = bsearch(argv[1], title, - titles, sizeof(title[0]), - cmpstring_p_pp); - - if (!cur_title) { - printf("TITLE NOT FOUND: %s\n", argv[1]); - return 1; - } - - title_id = cur_title - title; - - - #if 0 - /* Print first degree incoming edges */ - printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]); - for (i = 0; i < linkis[title_id]; i++) { - art_id x = linki[title_id][i]; - printf(" %s\n", title[x]); - } - #endif - - - - printf("\n\n\n\nBuilding table of distances...\n\n"); - - titles_bitfield_elems = ROUNDUP(titles / BITS_PER_BITFIELD, BITS_PER_BITFIELD); - titles_bitfield_bytes = titles_bitfield_elems * sizeof(bitfield_type); - -BENCHMARK: - - titles_seen = calloc(titles_bitfield_bytes, 1); - titles_prev_round = NULL; - titles_this_round = calloc(titles_bitfield_bytes, 1); - - cur_dist = 0; - - BIT_SET(titles_seen, title_id); - BIT_SET(titles_this_round, title_id); - - while (ANY_BITS_SET(titles_this_round, titles_bitfield_elems)) { - art_id i; - - cur_dist++; - printf("Checking distance %d\n", cur_dist); - - free(titles_prev_round); /* free is safe to call on NULL */ - titles_prev_round = titles_this_round; - titles_this_round = calloc(titles_bitfield_bytes, 1); - - for (i = 0; i < titles; i++) { - int j; - - if (!BIT_ISSET(titles_prev_round, i)) - continue; - - for (j = 0; j < linkis[i]; j++) { - art_id x = linki[i][j]; - BIT_SET(titles_this_round, x); - } - } - - for (i = 0; i < titles_bitfield_elems; i++) { - titles_this_round[i] &= ~titles_seen[i]; - titles_seen[i] |= titles_this_round[i]; - } - } - - /* Result is in titles_prev_round */ - - cur_dist -= 1; - - printf("\n\n\nThe articles with the distance of %zd are:\n\n", cur_dist); - for (i = 0; i < titles; i++) { - if (BIT_ISSET(titles_prev_round, i)) { - printf(" %s\n", title[i]); - } - } - - - free(titles_seen); - free(titles_prev_round); - free(titles_this_round); - - //goto BENCHMARK; - - return 0; -} |