9 cmpstring_pp_pp(const void *p1, const void *p2)
11 return strcasecmp(* (char * const *) p1, * (char * const *) p2);
15 cmpstring_p_pp(const void *p1, const void *p2)
17 return strcasecmp((char * const) p1, * (char * const *) p2);
30 size_t titles_alloc = 0;
35 size_t link_titles_done = 0;
41 * Read all titles into memory
44 in_file = fopen("titles.txt", "r");
45 while (!feof(in_file)) {
47 ssize_t in_line_len = 0;
50 if (titles == titles_alloc) {
51 titles_alloc += 100000;
52 title = realloc(title, titles_alloc * sizeof(title[0]));
55 in_line_len = getline(&in_line, &zero, in_file);
57 /* Ignore empty lines and errors */
58 if (in_line_len < 2) {
62 /* Delete trailing newline */
63 in_line[in_line_len - 1] = '\0';
65 title[titles] = in_line;
70 qsort(title, titles, sizeof(title[0]), cmpstring_pp_pp);
72 printf("Sorting done.\n");
76 link = malloc(titles * sizeof(size_t*));
77 links = malloc(titles * sizeof(size_t));
79 in_file = fopen("enwiki-links-plain.txt", "r");
80 while (!feof(in_file)) {
82 ssize_t in_line_len = 0;
85 in_line_len = getline(&in_line, &zero, in_file);
87 /* Ignore empty lines and errors */
88 if (in_line_len < 2) {
89 //printf("%d\n", links[i]);
95 /* Delete trailing newline */
96 in_line[in_line_len - 1] = '\0';
99 && !memcmp(in_line, "~~~~", 4)) {
101 char **cur_title = bsearch(&in_line[4], title,
102 titles, sizeof(title[0]),
106 printf("TITLE NOT FOUND: %s\n", in_line);
110 i = cur_title - title;
111 //printf("%s\n", title[i]);
114 if (0 == (link_titles_done % 100000)) {
115 printf("%d\n", link_titles_done);
120 /* Delete trailing anchor */
121 strtok(in_line, "#");
123 char **cur_link = bsearch(&in_line[0], title,
124 titles, sizeof(title[0]),
128 //printf("LINK NOT FOUND: %s\n", in_line);
134 link[i] = realloc(link[i], links[i] * sizeof(size_t));
136 link[i][links[i] - 1] = cur_link - title;
138 //printf("%s -- %d\n", title[i], links[i]);
145 printf("Links identified.\n");
149 out_file = fopen("titles-sorted.txt", "w");
150 for (i = 0; i < titles; i++) {
151 fputs(title[i], out_file);
152 fputs("\n", out_file);
156 printf("Titles written.\n");
160 out_file = fopen("links-outgoing.bin", "wb");
161 fwrite(&titles, sizeof(titles), 1, out_file);
162 for (i = 0; i < titles; i++) {
165 fwrite(&links[i], sizeof(links[i]), 1, out_file);
167 for (j = 0; j < links[i]; j++) {
168 fwrite(&link[i][j], sizeof(link[i][j]), 1, out_file);
173 printf("Links written.\n");