summaryrefslogtreecommitdiff
path: root/lookup-incoming.c
diff options
context:
space:
mode:
authornorly <ny-git@enpas.org>2019-07-14 17:28:47 +0200
committernorly <ny-git@enpas.org>2019-07-14 17:28:47 +0200
commitf0f54296b5b445c6ce0e47486bcdcb0deca582ff (patch)
tree35c3858bab40f4bf8f6c57e2d5522f17d2928511 /lookup-incoming.c
parent64907e38005ada5b2a545ae58f05d0fd616ffa79 (diff)
Move to Makefile and .gitignore
Diffstat (limited to 'lookup-incoming.c')
-rw-r--r--lookup-incoming.c257
1 files changed, 0 insertions, 257 deletions
diff --git a/lookup-incoming.c b/lookup-incoming.c
deleted file mode 100644
index 2d3fa9c..0000000
--- a/lookup-incoming.c
+++ /dev/null
@@ -1,257 +0,0 @@
-#include <assert.h>
-#include <ctype.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-
-
-#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
-
-
-typedef __uint32_t bitfield_type;
-#define BITS_PER_BITFIELD 32
-#define BITS_PER_BITFIELD_LOG 5
-
-#define BIT_SET(set, d) \
- ( (void) ( set[d >> BITS_PER_BITFIELD_LOG] |= (1 << (d & (BITS_PER_BITFIELD-1))) ) )
-#define BIT_ISSET(set, d) \
- (!(! ( set[d >> BITS_PER_BITFIELD_LOG] & (1 << (d & (BITS_PER_BITFIELD-1))) ) ))
-
-int ANY_BITS_SET(bitfield_type *set, size_t elems)
-{
- size_t i;
-
- for (i = 0; i < elems; i++)
- if (set[i] != 0)
- return 1;
-
- return 0;
-}
-
-
-typedef __uint32_t art_id;
-
-
-
-static int
-cmpstring_p_pp(const void *p1, const void *p2)
-{
- return strcasecmp((char * const) p1, * (char * const *) p2);
-}
-
-
-
-
-int main(int argc, char **argv)
-{
- FILE *in_file;
- FILE *out_file;
-
- char *title_blob;
- size_t title_blob_len;
- char **title = NULL;
- art_id titles;
- art_id titles_read = 0;
-
- art_id *link_blob;
- size_t link_blob_len;
- art_id **linki;
- art_id *linkis;
-
- char **cur_title;
- art_id title_id;
-
- art_id cur_dist;
-
- art_id i;
-
- bitfield_type *titles_seen;
- bitfield_type *titles_prev_round;
- bitfield_type *titles_this_round;
- size_t titles_bitfield_elems;
- size_t titles_bitfield_bytes;
-
-
- if (argc < 2) {
- printf("Supply article name to look up articles linking to it.\n");
- return 1;
- }
-
-
- /*
- * Read all incoming links into memory
- */
-
- in_file = fopen("links-incoming.bin", "rb");
- fseek(in_file, 0, SEEK_END);
- link_blob_len = ftell(in_file);
- rewind(in_file);
- printf("Link blob size: %zd bytes.\n", link_blob_len);
-
- link_blob = malloc(link_blob_len);
- if (!link_blob) {
- printf("Failed to allocate memory.\n");
- return 1;
- }
-
- if (link_blob_len != fread(link_blob, 1, link_blob_len, in_file)) {
- printf("Failed to read entire file in one go.\n");
- return 1;
- }
- printf("Link blob read (%zd bytes).\n", ftell(in_file));
- fclose(in_file);
-
- titles = *link_blob;
- link_blob++;
-
- linki = malloc(titles * sizeof(art_id*));
- linkis = malloc(titles * sizeof(art_id));
-
- for (i = 0; i < titles; i++) {
- art_id j;
-
- linkis[i] = *link_blob;
- link_blob++;
-
- linki[i] = link_blob;
- link_blob += linkis[i];
- }
- printf("Incoming links prepared.\n");
-
-
-
-
-
- /*
- * Read all titles into memory
- */
-
-
-
- in_file = fopen("titles-sorted.txt", "r");
- fseek(in_file, 0, SEEK_END);
- title_blob_len = ftell(in_file);
- rewind(in_file);
- printf("Title blob size: %zd bytes.\n", title_blob_len);
-
- title_blob = malloc(title_blob_len + 1);
- if (!title_blob) {
- printf("Failed to allocate memory.\n");
- return 1;
- }
-
- if (title_blob_len != fread(title_blob, 1, title_blob_len, in_file)) {
- printf("Failed to read entire file in one go.\n");
- return 1;
- }
- printf("Title blob read (%zd bytes).\n", ftell(in_file));
- fclose(in_file);
-
- title_blob[title_blob_len] = '\0';
-
- title = malloc((titles + 1) * sizeof(title[0]));
-
- title[0] = title_blob;
- titles_read = 1;
- while (title_blob = strchr(title_blob, '\n')) {
- title_blob[0] = '\0';
- title_blob++;
- title[titles_read] = title_blob;
- titles_read++;
- }
-
- /* Last title will be an empty string, as the file ends with a newline. */
- printf("Titles prepared: %d of %d.\n", titles_read - 1, titles);
-
-
-
-
- /* Look up article ID */
- cur_title = bsearch(argv[1], title,
- titles, sizeof(title[0]),
- cmpstring_p_pp);
-
- if (!cur_title) {
- printf("TITLE NOT FOUND: %s\n", argv[1]);
- return 1;
- }
-
- title_id = cur_title - title;
-
-
- #if 0
- /* Print first degree incoming edges */
- printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]);
- for (i = 0; i < linkis[title_id]; i++) {
- art_id x = linki[title_id][i];
- printf(" %s\n", title[x]);
- }
- #endif
-
-
-
- printf("\n\n\n\nBuilding table of distances...\n\n");
-
- titles_bitfield_elems = ROUNDUP(titles / BITS_PER_BITFIELD, BITS_PER_BITFIELD);
- titles_bitfield_bytes = titles_bitfield_elems * sizeof(bitfield_type);
-
-BENCHMARK:
-
- titles_seen = calloc(titles_bitfield_bytes, 1);
- titles_prev_round = NULL;
- titles_this_round = calloc(titles_bitfield_bytes, 1);
-
- cur_dist = 0;
-
- BIT_SET(titles_seen, title_id);
- BIT_SET(titles_this_round, title_id);
-
- while (ANY_BITS_SET(titles_this_round, titles_bitfield_elems)) {
- art_id i;
-
- cur_dist++;
- printf("Checking distance %d\n", cur_dist);
-
- free(titles_prev_round); /* free is safe to call on NULL */
- titles_prev_round = titles_this_round;
- titles_this_round = calloc(titles_bitfield_bytes, 1);
-
- for (i = 0; i < titles; i++) {
- int j;
-
- if (!BIT_ISSET(titles_prev_round, i))
- continue;
-
- for (j = 0; j < linkis[i]; j++) {
- art_id x = linki[i][j];
- BIT_SET(titles_this_round, x);
- }
- }
-
- for (i = 0; i < titles_bitfield_elems; i++) {
- titles_this_round[i] &= ~titles_seen[i];
- titles_seen[i] |= titles_this_round[i];
- }
- }
-
- /* Result is in titles_prev_round */
-
- cur_dist -= 1;
-
- printf("\n\n\nThe articles with the distance of %zd are:\n\n", cur_dist);
- for (i = 0; i < titles; i++) {
- if (BIT_ISSET(titles_prev_round, i)) {
- printf(" %s\n", title[i]);
- }
- }
-
-
- free(titles_seen);
- free(titles_prev_round);
- free(titles_this_round);
-
- //goto BENCHMARK;
-
- return 0;
-}