summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authornorly <ny-git@enpas.org>2019-07-14 23:42:54 +0200
committernorly <ny-git@enpas.org>2019-07-14 23:43:35 +0200
commit3019338ae9ed53fc41020535e2f0fc36dec42603 (patch)
tree1ad99173586169833779c2392b5e3bda92502928 /src
parentb754222ce45b041a96137d65c793efa7d19d0950 (diff)
lookup: Clean up code a bitHEADmaster
Diffstat (limited to 'src')
-rw-r--r--src/lookup-incoming.c135
1 files changed, 64 insertions, 71 deletions
diff --git a/src/lookup-incoming.c b/src/lookup-incoming.c
index 2d3fa9c..77ffda4 100644
--- a/src/lookup-incoming.c
+++ b/src/lookup-incoming.c
@@ -6,9 +6,11 @@
#include <sys/types.h>
+/*
+ * Bitfield
+ */
#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
-
typedef __uint32_t bitfield_type;
#define BITS_PER_BITFIELD 32
#define BITS_PER_BITFIELD_LOG 5
@@ -30,8 +32,11 @@ int ANY_BITS_SET(bitfield_type *set, size_t elems)
}
-typedef __uint32_t art_id;
+/*
+ * Misc helpers
+ */
+typedef __uint32_t art_id;
static int
@@ -41,24 +46,52 @@ cmpstring_p_pp(const void *p1, const void *p2)
}
+void* load_file(char *path)
+{
+ FILE *in_file;
+ void *blob;
+ size_t blob_len;
+
+ in_file = fopen(path, "rb");
+
+ fseek(in_file, 0, SEEK_END);
+ blob_len = ftell(in_file);
+ rewind(in_file);
+ printf(" blob size: %zd bytes.\n", blob_len);
+
+ blob = malloc(blob_len);
+ if (!blob) {
+ printf("Failed to allocate memory.\n");
+ return NULL;
+ }
+ if (blob_len != fread(blob, 1, blob_len, in_file)) {
+ printf("Failed to read entire file in one go.\n");
+ fclose(in_file);
+ return NULL;
+ }
+ printf(" blob read :%zd bytes.\n", ftell(in_file));
+ fclose(in_file);
+ return blob;
+}
+
+
+
+/*
+ * Main
+ */
int main(int argc, char **argv)
{
- FILE *in_file;
- FILE *out_file;
+ art_id *link_blob;
+ art_id **linki;
+ art_id *linkis;
char *title_blob;
- size_t title_blob_len;
char **title = NULL;
art_id titles;
art_id titles_read = 0;
- art_id *link_blob;
- size_t link_blob_len;
- art_id **linki;
- art_id *linkis;
-
char **cur_title;
art_id title_id;
@@ -79,29 +112,12 @@ int main(int argc, char **argv)
}
- /*
- * Read all incoming links into memory
- */
-
- in_file = fopen("links-incoming.bin", "rb");
- fseek(in_file, 0, SEEK_END);
- link_blob_len = ftell(in_file);
- rewind(in_file);
- printf("Link blob size: %zd bytes.\n", link_blob_len);
-
- link_blob = malloc(link_blob_len);
- if (!link_blob) {
- printf("Failed to allocate memory.\n");
- return 1;
- }
-
- if (link_blob_len != fread(link_blob, 1, link_blob_len, in_file)) {
- printf("Failed to read entire file in one go.\n");
+ /* Read all incoming links into memory */
+ link_blob = load_file("links-incoming.bin");
+ if (!link_blob)
return 1;
- }
- printf("Link blob read (%zd bytes).\n", ftell(in_file));
- fclose(in_file);
+ /* Parse blob */
titles = *link_blob;
link_blob++;
@@ -109,62 +125,39 @@ int main(int argc, char **argv)
linkis = malloc(titles * sizeof(art_id));
for (i = 0; i < titles; i++) {
- art_id j;
-
linkis[i] = *link_blob;
link_blob++;
linki[i] = link_blob;
link_blob += linkis[i];
}
- printf("Incoming links prepared.\n");
-
-
-
-
-
- /*
- * Read all titles into memory
- */
-
-
+ printf("Incoming links parsed: %d\n", titles);
- in_file = fopen("titles-sorted.txt", "r");
- fseek(in_file, 0, SEEK_END);
- title_blob_len = ftell(in_file);
- rewind(in_file);
- printf("Title blob size: %zd bytes.\n", title_blob_len);
- title_blob = malloc(title_blob_len + 1);
- if (!title_blob) {
- printf("Failed to allocate memory.\n");
+ /* Read all titles into memory */
+ title_blob = load_file("titles-sorted.txt");
+ if (!title_blob)
return 1;
- }
-
- if (title_blob_len != fread(title_blob, 1, title_blob_len, in_file)) {
- printf("Failed to read entire file in one go.\n");
- return 1;
- }
- printf("Title blob read (%zd bytes).\n", ftell(in_file));
- fclose(in_file);
-
- title_blob[title_blob_len] = '\0';
+ /* Parse blob */
title = malloc((titles + 1) * sizeof(title[0]));
title[0] = title_blob;
titles_read = 1;
- while (title_blob = strchr(title_blob, '\n')) {
+ while ((title_blob = strchr(title_blob, '\n'))) {
title_blob[0] = '\0';
title_blob++;
+
+ if (title_blob[0] == '\0')
+ /* Reached empty line at end of file */
+ continue;
+
title[titles_read] = title_blob;
titles_read++;
}
/* Last title will be an empty string, as the file ends with a newline. */
- printf("Titles prepared: %d of %d.\n", titles_read - 1, titles);
-
-
+ printf("Titles parsed: %d of %d.\n", titles_read, titles);
/* Look up article ID */
@@ -190,13 +183,13 @@ int main(int argc, char **argv)
#endif
-
- printf("\n\n\n\nBuilding table of distances...\n\n");
+ /* Main part */
+ printf("\n\n\n\nFlooding graph...\n\n");
titles_bitfield_elems = ROUNDUP(titles / BITS_PER_BITFIELD, BITS_PER_BITFIELD);
titles_bitfield_bytes = titles_bitfield_elems * sizeof(bitfield_type);
-BENCHMARK:
+//BENCHMARK:
titles_seen = calloc(titles_bitfield_bytes, 1);
titles_prev_round = NULL;
@@ -235,11 +228,11 @@ BENCHMARK:
}
}
- /* Result is in titles_prev_round */
+ /* Result is now in titles_prev_round */
cur_dist -= 1;
- printf("\n\n\nThe articles with the distance of %zd are:\n\n", cur_dist);
+ printf("\n\n\nThe articles with a distance of %d are:\n\n", cur_dist);
for (i = 0; i < titles; i++) {
if (BIT_ISSET(titles_prev_round, i)) {
printf(" %s\n", title[i]);