From: norly Date: Sun, 14 Jul 2019 12:22:16 +0000 (+0200) Subject: lookup: Accelerate reading link database X-Git-Url: https://git.enpas.org/?p=enwiki-links-graph.git;a=commitdiff_plain;h=1027dcef94de9ee437ac4a5da846c2ddc70a31f9 lookup: Accelerate reading link database --- diff --git a/lookup-incoming.c b/lookup-incoming.c index ecb6a3e..4222d8f 100644 --- a/lookup-incoming.c +++ b/lookup-incoming.c @@ -47,6 +47,8 @@ int main(int argc, char **argv) art_id titles; art_id titles_read = 0; + art_id *link_blob; + size_t link_blob_len; art_id **linki; art_id *linkis; @@ -74,8 +76,26 @@ int main(int argc, char **argv) */ in_file = fopen("links-incoming.bin", "rb"); + fseek(in_file, 0, SEEK_END); + link_blob_len = ftell(in_file); + rewind(in_file); + printf("Link blob size: %zd bytes.\n", link_blob_len); + + link_blob = malloc(link_blob_len); + if (!link_blob) { + printf("Failed to allocate memory.\n"); + return 1; + } - fread(&titles, sizeof(titles), 1, in_file); + if (link_blob_len != fread(link_blob, 1, link_blob_len, in_file)) { + printf("Failed to read entire file in one go.\n"); + return 1; + } + printf("Link blob read (%zd bytes).\n", ftell(in_file)); + fclose(in_file); + + titles = *link_blob; + link_blob++; linki = malloc(titles * sizeof(art_id*)); linkis = malloc(titles * sizeof(art_id)); @@ -83,19 +103,14 @@ int main(int argc, char **argv) for (i = 0; i < titles; i++) { art_id j; - fread(&linkis[i], sizeof(linkis[i]), 1, in_file); - //printf("linkis[%zd] = %zd\n", i, linkis[i]); - - linki[i] = malloc(linkis[i] * sizeof(linki[i][0])); + linkis[i] = *link_blob; + link_blob++; - j = fread(linki[i], sizeof(linki[i][0]), linkis[i], in_file); - assert(j == linkis[i]); - //for (j = 0; j < linkis[i]; j++) { - // fread(&linki[i][j], sizeof(linki[i][j]), 1, in_file); - //} + linki[i] = link_blob; + link_blob += linkis[i]; } - printf("Incoming links read (%zd bytes).\n", ftell(in_file)); - fclose(in_file); + printf("Incoming links prepared.\n"); + @@ -116,6 +131,7 @@ int main(int argc, char **argv) /* Ignore empty lines and errors */ if (in_line_len < 2) { + free(in_line); continue; }