9 #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
11 #define BIT_SET(set, d) \
12 ( (void) ( set[d >> 3] |= (1 << (d & 7)) ) )
13 #define BIT_ISSET(set, d) \
14 (!(! ( set[d >> 3] & (1 << (d & 7)) ) ))
16 int ANY_BITS_SET(unsigned char *set, size_t bytes)
20 for (i = 0; i < bytes; i++)
28 typedef __uint32_t art_id;
33 cmpstring_p_pp(const void *p1, const void *p2)
35 return strcasecmp((char * const) p1, * (char * const *) p2);
41 int main(int argc, char **argv)
48 art_id titles_read = 0;
60 unsigned char *titles_seen;
61 unsigned char *titles_prev_round;
62 unsigned char *titles_this_round;
63 size_t titles_bitfield_len;
67 printf("Supply article name to look up articles linking to it.\n");
73 * Read all incoming links into memory
76 in_file = fopen("links-incoming.bin", "rb");
78 fread(&titles, sizeof(titles), 1, in_file);
80 linki = malloc(titles * sizeof(art_id*));
81 linkis = malloc(titles * sizeof(art_id));
83 for (i = 0; i < titles; i++) {
86 fread(&linkis[i], sizeof(linkis[i]), 1, in_file);
87 //printf("linkis[%zd] = %zd\n", i, linkis[i]);
89 linki[i] = malloc(linkis[i] * sizeof(linki[i][0]));
91 j = fread(linki[i], sizeof(linki[i][0]), linkis[i], in_file);
92 assert(j == linkis[i]);
93 //for (j = 0; j < linkis[i]; j++) {
94 // fread(&linki[i][j], sizeof(linki[i][j]), 1, in_file);
97 printf("Incoming links read (%zd bytes).\n", ftell(in_file));
104 * Read all titles into memory
107 title = malloc(titles * sizeof(title[0]));
109 in_file = fopen("titles-sorted.txt", "r");
110 while (!feof(in_file)) {
111 char *in_line = NULL;
112 ssize_t in_line_len = 0;
115 in_line_len = getline(&in_line, &zero, in_file);
117 /* Ignore empty lines and errors */
118 if (in_line_len < 2) {
122 /* Delete trailing newline */
123 in_line[in_line_len - 1] = '\0';
125 title[titles_read] = in_line;
130 printf("Titles read.\n");
135 /* Look up article ID */
136 cur_title = bsearch(argv[1], title,
137 titles, sizeof(title[0]),
141 printf("TITLE NOT FOUND: %s\n", argv[1]);
145 title_id = cur_title - title;
149 /* Print first degree incoming edges */
150 printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]);
151 for (i = 0; i < linkis[title_id]; i++) {
152 art_id x = linki[title_id][i];
153 printf(" %s\n", title[x]);
159 printf("\n\n\n\nBuilding table of distances...\n\n");
161 titles_bitfield_len = ROUNDUP(titles / 8, sizeof(size_t));
163 titles_seen = calloc(titles_bitfield_len, 1);
164 titles_prev_round = NULL;
165 titles_this_round = calloc(titles_bitfield_len, 1);
169 BIT_SET(titles_seen, title_id);
170 BIT_SET(titles_this_round, title_id);
172 while (ANY_BITS_SET(titles_this_round, titles_bitfield_len)) {
176 printf("Checking distance %d\n", cur_dist);
178 free(titles_prev_round); /* free is safe to call on NULL */
179 titles_prev_round = titles_this_round;
180 titles_this_round = calloc(titles_bitfield_len, 1);
182 for (i = 0; i < titles; i++) {
185 if (!BIT_ISSET(titles_prev_round, i))
188 for (j = 0; j < linkis[i]; j++) {
189 art_id x = linki[i][j];
190 BIT_SET(titles_this_round, x);
194 for (i = 0; i < titles_bitfield_len; i++) {
195 titles_this_round[i] &= ~titles_seen[i];
196 titles_seen[i] |= titles_this_round[i];
200 /* Result is in titles_prev_round */
204 printf("\n\n\nThe articles with the distance of %zd are:\n\n", cur_dist);
205 for (i = 0; i < titles; i++) {
206 if (BIT_ISSET(titles_prev_round, i)) {
207 printf(" %s\n", title[i]);