#include #include #include #include #include #include typedef __uint32_t art_id; static int cmpstring_p_pp(const void *p1, const void *p2) { return strcasecmp((char * const) p1, * (char * const *) p2); } int main(int argc, char **argv) { FILE *in_file; FILE *out_file; char **title = NULL; art_id titles; art_id titles_read = 0; art_id **linki; art_id *linkis; char **cur_title; art_id title_id; art_id *dist_table; art_id cur_dist; int cur_dist_is_not_last; art_id i; if (argc < 2) { printf("Supply article name to look up articles linking to it.\n"); return 1; } /* * Read all incoming links into memory */ in_file = fopen("links-incoming.bin", "rb"); fread(&titles, sizeof(titles), 1, in_file); linki = malloc(titles * sizeof(art_id*)); linkis = malloc(titles * sizeof(art_id)); for (i = 0; i < titles; i++) { art_id j; fread(&linkis[i], sizeof(linkis[i]), 1, in_file); //printf("linkis[%zd] = %zd\n", i, linkis[i]); linki[i] = malloc(linkis[i] * sizeof(linki[i][0])); j = fread(linki[i], sizeof(linki[i][0]), linkis[i], in_file); assert(j == linkis[i]); //for (j = 0; j < linkis[i]; j++) { // fread(&linki[i][j], sizeof(linki[i][j]), 1, in_file); //} } printf("Incoming links read (%zd bytes).\n", ftell(in_file)); fclose(in_file); /* * Read all titles into memory */ title = malloc(titles * sizeof(title[0])); in_file = fopen("titles-sorted.txt", "r"); while (!feof(in_file)) { char *in_line = NULL; ssize_t in_line_len = 0; size_t zero = 0; in_line_len = getline(&in_line, &zero, in_file); /* Ignore empty lines and errors */ if (in_line_len < 2) { continue; } /* Delete trailing newline */ in_line[in_line_len - 1] = '\0'; title[titles_read] = in_line; titles_read++; } fclose(in_file); printf("Titles read.\n"); /* Look up article ID */ cur_title = bsearch(argv[1], title, titles, sizeof(title[0]), cmpstring_p_pp); if (!cur_title) { printf("TITLE NOT FOUND: %s\n", argv[1]); return 1; } title_id = cur_title - title; printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]); for (i = 0; i < linkis[title_id]; i++) { art_id x = linki[title_id][i]; printf(" %s\n", title[x]); } printf("\n\n\n\nBuilding table of distances...\n\n"); dist_table = malloc(titles * sizeof(dist_table[0])); dist_table[title_id] = 0xdeadbeef; for (i = 0; i < linkis[title_id]; i++) { art_id x = linki[title_id][i]; dist_table[x] = 1; } cur_dist = 1; cur_dist_is_not_last = 1; while (cur_dist_is_not_last) { art_id articles_found = 0; cur_dist_is_not_last = 0; for (i = 0; i < titles; i++) { if (dist_table[i] == cur_dist) { art_id j; for (j = 0; j < linkis[i]; j++) { art_id x = linki[i][j]; if (!dist_table[x]) { dist_table[x] = cur_dist + 1; } } cur_dist_is_not_last = 1; articles_found++; } } printf("Distance %zd: Found %zd articles.\n", cur_dist, articles_found); cur_dist++; } cur_dist -= 2; printf("\n\n\nThe articles with the distance of %zd are:\n\n", cur_dist); for (i = 0; i < titles; i++) { if (dist_table[i] == cur_dist) { printf(" %s\n", title[i]); } } return 0; }