summaryrefslogtreecommitdiff
path: root/lookup-incoming.c
diff options
context:
space:
mode:
Diffstat (limited to 'lookup-incoming.c')
-rw-r--r--lookup-incoming.c181
1 files changed, 181 insertions, 0 deletions
diff --git a/lookup-incoming.c b/lookup-incoming.c
new file mode 100644
index 0000000..50f2dbc
--- /dev/null
+++ b/lookup-incoming.c
@@ -0,0 +1,181 @@
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+static int
+cmpstring_p_pp(const void *p1, const void *p2)
+{
+ return strcasecmp((char * const) p1, * (char * const *) p2);
+}
+
+
+
+
+int main(int argc, char **argv)
+{
+ FILE *in_file;
+ FILE *out_file;
+
+ char **title = NULL;
+ size_t titles;
+ size_t titles_read = 0;
+
+ size_t **linki;
+ size_t *linkis;
+
+ char **cur_title;
+ size_t title_id;
+
+ size_t *dist_table;
+ size_t cur_dist;
+ int cur_dist_is_not_last;
+
+ size_t i;
+
+
+ if (argc < 2) {
+ printf("Supply article name to look up articles linking to it.\n");
+ return 1;
+ }
+
+
+ /*
+ * Read all incoming links into memory
+ */
+
+ in_file = fopen("links-incoming.bin", "rb");
+
+ fread(&titles, sizeof(titles), 1, in_file);
+
+ linki = malloc(titles * sizeof(size_t*));
+ linkis = malloc(titles * sizeof(size_t));
+
+ for (i = 0; i < titles; i++) {
+ size_t j;
+
+ fread(&linkis[i], sizeof(linkis[i]), 1, in_file);
+ //printf("linkis[%zd] = %zd\n", i, linkis[i]);
+
+ linki[i] = malloc(linkis[i] * sizeof(linki[i][0]));
+
+ j = fread(linki[i], sizeof(linki[i][0]), linkis[i], in_file);
+ assert(j == linkis[i]);
+ //for (j = 0; j < linkis[i]; j++) {
+ // fread(&linki[i][j], sizeof(linki[i][j]), 1, in_file);
+ //}
+ }
+ printf("Incoming links read (%zd bytes).\n", ftell(in_file));
+ fclose(in_file);
+
+
+
+
+ /*
+ * Read all titles into memory
+ */
+
+ title = malloc(titles * sizeof(title[0]));
+
+ in_file = fopen("titles-sorted.txt", "r");
+ while (!feof(in_file)) {
+ char *in_line = NULL;
+ ssize_t in_line_len = 0;
+ size_t zero = 0;
+
+ in_line_len = getline(&in_line, &zero, in_file);
+
+ /* Ignore empty lines and errors */
+ if (in_line_len < 2) {
+ continue;
+ }
+
+ /* Delete trailing newline */
+ in_line[in_line_len - 1] = '\0';
+
+ title[titles_read] = in_line;
+ titles_read++;
+ }
+ fclose(in_file);
+
+ printf("Titles read.\n");
+
+
+
+
+ /* Look up article ID */
+ cur_title = bsearch(argv[1], title,
+ titles, sizeof(title[0]),
+ cmpstring_p_pp);
+
+ if (!cur_title) {
+ printf("TITLE NOT FOUND: %s\n", argv[1]);
+ return 1;
+ }
+
+ title_id = cur_title - title;
+
+ printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]);
+
+ for (i = 0; i < linkis[title_id]; i++) {
+ size_t x = linki[title_id][i];
+ printf(" %s\n", title[x]);
+ }
+
+
+
+ printf("\n\n\n\nBuilding table of distances...\n\n");
+
+ dist_table = malloc(titles * sizeof(dist_table[0]));
+
+ dist_table[title_id] = 0xdeadbeef;
+
+ for (i = 0; i < linkis[title_id]; i++) {
+ size_t x = linki[title_id][i];
+ dist_table[x] = 1;
+ }
+
+ cur_dist = 1;
+ cur_dist_is_not_last = 1;
+
+ while (cur_dist_is_not_last) {
+ size_t articles_found = 0;
+
+ cur_dist_is_not_last = 0;
+
+ for (i = 0; i < titles; i++) {
+ if (dist_table[i] == cur_dist) {
+ size_t j;
+
+ for (j = 0; j < linkis[i]; j++) {
+ size_t x = linki[i][j];
+
+ if (!dist_table[x]) {
+ dist_table[x] = cur_dist + 1;
+ }
+ }
+
+ cur_dist_is_not_last = 1;
+
+ articles_found++;
+ }
+ }
+
+ printf("Distance %zd: Found %zd articles.\n", cur_dist, articles_found);
+ cur_dist++;
+ }
+
+ cur_dist -= 2;
+
+ printf("\n\n\nThe articles with the distance of %zd are:\n\n", cur_dist);
+ for (i = 0; i < titles; i++) {
+ if (dist_table[i] == cur_dist) {
+ printf(" %s\n", title[i]);
+ }
+ }
+
+
+ return 0;
+}