From b7e63e5c4f703f50eafba6b945eab6ee88df457b Mon Sep 17 00:00:00 2001 From: norly Date: Sun, 14 Jul 2019 14:22:11 +0200 Subject: [PATCH] lookup: Move to bitfield operations --- lookup-incoming.c | 88 +++++++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 30 deletions(-) diff --git a/lookup-incoming.c b/lookup-incoming.c index d1467f8..ecb6a3e 100644 --- a/lookup-incoming.c +++ b/lookup-incoming.c @@ -6,9 +6,29 @@ #include +#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) + +#define BIT_SET(set, d) \ + ( (void) ( set[d >> 3] |= (1 << (d & 7)) ) ) +#define BIT_ISSET(set, d) \ + (!(! ( set[d >> 3] & (1 << (d & 7)) ) )) + +int ANY_BITS_SET(unsigned char *set, size_t bytes) +{ + size_t i; + + for (i = 0; i < bytes; i++) + if (set[i] != 0) + return 1; + + return 0; +} + + typedef __uint32_t art_id; + static int cmpstring_p_pp(const void *p1, const void *p2) { @@ -33,12 +53,15 @@ int main(int argc, char **argv) char **cur_title; art_id title_id; - art_id *dist_table; art_id cur_dist; - int cur_dist_is_not_last; art_id i; + unsigned char *titles_seen; + unsigned char *titles_prev_round; + unsigned char *titles_this_round; + size_t titles_bitfield_len; + if (argc < 2) { printf("Supply article name to look up articles linking to it.\n"); @@ -121,61 +144,66 @@ int main(int argc, char **argv) title_id = cur_title - title; - printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]); + #if 0 + /* Print first degree incoming edges */ + printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]); for (i = 0; i < linkis[title_id]; i++) { art_id x = linki[title_id][i]; printf(" %s\n", title[x]); } + #endif printf("\n\n\n\nBuilding table of distances...\n\n"); - dist_table = malloc(titles * sizeof(dist_table[0])); - - dist_table[title_id] = 0xdeadbeef; + titles_bitfield_len = ROUNDUP(titles / 8, sizeof(size_t)); - for (i = 0; i < linkis[title_id]; i++) { - art_id x = linki[title_id][i]; - dist_table[x] = 1; - } + titles_seen = calloc(titles_bitfield_len, 1); + titles_prev_round = NULL; + titles_this_round = calloc(titles_bitfield_len, 1); - cur_dist = 1; - cur_dist_is_not_last = 1; + cur_dist = 0; - while (cur_dist_is_not_last) { - art_id articles_found = 0; + BIT_SET(titles_seen, title_id); + BIT_SET(titles_this_round, title_id); - cur_dist_is_not_last = 0; + while (ANY_BITS_SET(titles_this_round, titles_bitfield_len)) { + art_id i; - for (i = 0; i < titles; i++) { - if (dist_table[i] == cur_dist) { - art_id j; + cur_dist++; + printf("Checking distance %d\n", cur_dist); - for (j = 0; j < linkis[i]; j++) { - art_id x = linki[i][j]; + free(titles_prev_round); /* free is safe to call on NULL */ + titles_prev_round = titles_this_round; + titles_this_round = calloc(titles_bitfield_len, 1); - if (!dist_table[x]) { - dist_table[x] = cur_dist + 1; - } - } + for (i = 0; i < titles; i++) { + int j; - cur_dist_is_not_last = 1; + if (!BIT_ISSET(titles_prev_round, i)) + continue; - articles_found++; + for (j = 0; j < linkis[i]; j++) { + art_id x = linki[i][j]; + BIT_SET(titles_this_round, x); } } - printf("Distance %zd: Found %zd articles.\n", cur_dist, articles_found); - cur_dist++; + for (i = 0; i < titles_bitfield_len; i++) { + titles_this_round[i] &= ~titles_seen[i]; + titles_seen[i] |= titles_this_round[i]; + } } - cur_dist -= 2; + /* Result is in titles_prev_round */ + + cur_dist -= 1; printf("\n\n\nThe articles with the distance of %zd are:\n\n", cur_dist); for (i = 0; i < titles; i++) { - if (dist_table[i] == cur_dist) { + if (BIT_ISSET(titles_prev_round, i)) { printf(" %s\n", title[i]); } } -- 2.30.2