lookup: Move to bitfield operations
authornorly <ny-git@enpas.org>
Sun, 14 Jul 2019 12:22:11 +0000 (14:22 +0200)
committernorly <ny-git@enpas.org>
Sun, 14 Jul 2019 12:22:11 +0000 (14:22 +0200)
lookup-incoming.c

index d1467f8e7f7d47db1690121d4aeb0f26d04413d9..ecb6a3e2686a3cdc3dd995caae90e3597f0724b0 100644 (file)
@@ -6,9 +6,29 @@
 #include <sys/types.h>
 
 
+#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
+
+#define BIT_SET(set, d) \
+  ( (void) ( set[d >> 3] |= (1 << (d & 7)) ) )
+#define BIT_ISSET(set, d) \
+  (!(! ( set[d >> 3] & (1 << (d & 7)) ) ))
+
+int ANY_BITS_SET(unsigned char *set, size_t bytes)
+{
+       size_t i;
+
+       for (i = 0; i < bytes; i++)
+               if (set[i] != 0)
+                       return 1;
+
+       return 0;
+}
+
+
 typedef __uint32_t art_id;
 
 
+
 static int
 cmpstring_p_pp(const void *p1, const void *p2)
 {
@@ -33,12 +53,15 @@ int main(int argc, char **argv)
        char **cur_title;
        art_id title_id;
 
-       art_id *dist_table;
        art_id cur_dist;
-       int cur_dist_is_not_last;
 
        art_id i;
 
+       unsigned char *titles_seen;
+       unsigned char *titles_prev_round;
+       unsigned char *titles_this_round;
+       size_t titles_bitfield_len;
+
 
        if (argc < 2) {
                printf("Supply article name to look up articles linking to it.\n");
@@ -121,61 +144,66 @@ int main(int argc, char **argv)
 
        title_id = cur_title - title;
 
-       printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]);
 
+       #if 0
+       /* Print first degree incoming edges */
+       printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]);
        for (i = 0; i < linkis[title_id]; i++) {
                art_id x = linki[title_id][i];
                printf("  %s\n", title[x]);
        }
+       #endif
 
 
 
        printf("\n\n\n\nBuilding table of distances...\n\n");
 
-       dist_table = malloc(titles * sizeof(dist_table[0]));
-
-       dist_table[title_id] = 0xdeadbeef;
+       titles_bitfield_len = ROUNDUP(titles / 8, sizeof(size_t));
 
-       for (i = 0; i < linkis[title_id]; i++) {
-               art_id x = linki[title_id][i];
-               dist_table[x] = 1;
-       }
+       titles_seen = calloc(titles_bitfield_len, 1);
+       titles_prev_round = NULL;
+       titles_this_round = calloc(titles_bitfield_len, 1);
 
-       cur_dist = 1;
-       cur_dist_is_not_last = 1;
+       cur_dist = 0;
 
-       while (cur_dist_is_not_last) {
-               art_id articles_found = 0;
+       BIT_SET(titles_seen, title_id);
+       BIT_SET(titles_this_round, title_id);
 
-               cur_dist_is_not_last = 0;
+       while (ANY_BITS_SET(titles_this_round, titles_bitfield_len)) {
+               art_id i;
 
-               for (i = 0; i < titles; i++) {
-                       if (dist_table[i] == cur_dist) {
-                               art_id j;
+               cur_dist++;
+               printf("Checking distance %d\n", cur_dist);
 
-                               for (j = 0; j < linkis[i]; j++) {
-                                       art_id x = linki[i][j];
+               free(titles_prev_round); /* free is safe to call on NULL */
+               titles_prev_round = titles_this_round;
+               titles_this_round = calloc(titles_bitfield_len, 1);
 
-                                       if (!dist_table[x]) {
-                                               dist_table[x] = cur_dist + 1;
-                                       }
-                               }
+               for (i = 0; i < titles; i++) {
+                       int j;
 
-                               cur_dist_is_not_last = 1;
+                       if (!BIT_ISSET(titles_prev_round, i))
+                               continue;
 
-                               articles_found++;
+                       for (j = 0; j < linkis[i]; j++) {
+                               art_id x = linki[i][j];
+                               BIT_SET(titles_this_round, x);
                        }
                }
 
-               printf("Distance %zd: Found %zd articles.\n", cur_dist, articles_found);
-               cur_dist++;
+               for (i = 0; i < titles_bitfield_len; i++) {
+                       titles_this_round[i] &= ~titles_seen[i];
+                       titles_seen[i] |= titles_this_round[i];
+               }
        }
 
-       cur_dist -= 2;
+       /* Result is in titles_prev_round */
+
+       cur_dist -= 1;
 
        printf("\n\n\nThe articles with the distance of %zd are:\n\n", cur_dist);
        for (i = 0; i < titles; i++) {
-               if (dist_table[i] == cur_dist) {
+               if (BIT_ISSET(titles_prev_round, i)) {
                        printf("  %s\n", title[i]);
                }
        }