diff options
Diffstat (limited to 'lookup-incoming.c')
-rw-r--r-- | lookup-incoming.c | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/lookup-incoming.c b/lookup-incoming.c new file mode 100644 index 0000000..50f2dbc --- /dev/null +++ b/lookup-incoming.c @@ -0,0 +1,181 @@ +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +static int +cmpstring_p_pp(const void *p1, const void *p2) +{ + return strcasecmp((char * const) p1, * (char * const *) p2); +} + + + + +int main(int argc, char **argv) +{ + FILE *in_file; + FILE *out_file; + + char **title = NULL; + size_t titles; + size_t titles_read = 0; + + size_t **linki; + size_t *linkis; + + char **cur_title; + size_t title_id; + + size_t *dist_table; + size_t cur_dist; + int cur_dist_is_not_last; + + size_t i; + + + if (argc < 2) { + printf("Supply article name to look up articles linking to it.\n"); + return 1; + } + + + /* + * Read all incoming links into memory + */ + + in_file = fopen("links-incoming.bin", "rb"); + + fread(&titles, sizeof(titles), 1, in_file); + + linki = malloc(titles * sizeof(size_t*)); + linkis = malloc(titles * sizeof(size_t)); + + for (i = 0; i < titles; i++) { + size_t j; + + fread(&linkis[i], sizeof(linkis[i]), 1, in_file); + //printf("linkis[%zd] = %zd\n", i, linkis[i]); + + linki[i] = malloc(linkis[i] * sizeof(linki[i][0])); + + j = fread(linki[i], sizeof(linki[i][0]), linkis[i], in_file); + assert(j == linkis[i]); + //for (j = 0; j < linkis[i]; j++) { + // fread(&linki[i][j], sizeof(linki[i][j]), 1, in_file); + //} + } + printf("Incoming links read (%zd bytes).\n", ftell(in_file)); + fclose(in_file); + + + + + /* + * Read all titles into memory + */ + + title = malloc(titles * sizeof(title[0])); + + in_file = fopen("titles-sorted.txt", "r"); + while (!feof(in_file)) { + char *in_line = NULL; + ssize_t in_line_len = 0; + size_t zero = 0; + + in_line_len = getline(&in_line, &zero, in_file); + + /* Ignore empty lines and errors */ + if (in_line_len < 2) { + continue; + } + + /* Delete trailing newline */ + in_line[in_line_len - 1] = '\0'; + + title[titles_read] = in_line; + titles_read++; + } + fclose(in_file); + + printf("Titles read.\n"); + + + + + /* Look up article ID */ + cur_title = bsearch(argv[1], title, + titles, sizeof(title[0]), + cmpstring_p_pp); + + if (!cur_title) { + printf("TITLE NOT FOUND: %s\n", argv[1]); + return 1; + } + + title_id = cur_title - title; + + printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]); + + for (i = 0; i < linkis[title_id]; i++) { + size_t x = linki[title_id][i]; + printf(" %s\n", title[x]); + } + + + + printf("\n\n\n\nBuilding table of distances...\n\n"); + + dist_table = malloc(titles * sizeof(dist_table[0])); + + dist_table[title_id] = 0xdeadbeef; + + for (i = 0; i < linkis[title_id]; i++) { + size_t x = linki[title_id][i]; + dist_table[x] = 1; + } + + cur_dist = 1; + cur_dist_is_not_last = 1; + + while (cur_dist_is_not_last) { + size_t articles_found = 0; + + cur_dist_is_not_last = 0; + + for (i = 0; i < titles; i++) { + if (dist_table[i] == cur_dist) { + size_t j; + + for (j = 0; j < linkis[i]; j++) { + size_t x = linki[i][j]; + + if (!dist_table[x]) { + dist_table[x] = cur_dist + 1; + } + } + + cur_dist_is_not_last = 1; + + articles_found++; + } + } + + printf("Distance %zd: Found %zd articles.\n", cur_dist, articles_found); + cur_dist++; + } + + cur_dist -= 2; + + printf("\n\n\nThe articles with the distance of %zd are:\n\n", cur_dist); + for (i = 0; i < titles; i++) { + if (dist_table[i] == cur_dist) { + printf(" %s\n", title[i]); + } + } + + + return 0; +} |