projects
/
enwiki-links-graph.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
b7e63e5
)
lookup: Accelerate reading link database
author
norly
<ny-git@enpas.org>
Sun, 14 Jul 2019 12:22:16 +0000
(14:22 +0200)
committer
norly
<ny-git@enpas.org>
Sun, 14 Jul 2019 12:22:16 +0000
(14:22 +0200)
lookup-incoming.c
patch
|
blob
|
history
diff --git
a/lookup-incoming.c
b/lookup-incoming.c
index ecb6a3e2686a3cdc3dd995caae90e3597f0724b0..4222d8f91f5abedbf37fca29220847d3d405c318 100644
(file)
--- a/
lookup-incoming.c
+++ b/
lookup-incoming.c
@@
-47,6
+47,8
@@
int main(int argc, char **argv)
art_id titles;
art_id titles_read = 0;
art_id titles;
art_id titles_read = 0;
+ art_id *link_blob;
+ size_t link_blob_len;
art_id **linki;
art_id *linkis;
art_id **linki;
art_id *linkis;
@@
-74,8
+76,26
@@
int main(int argc, char **argv)
*/
in_file = fopen("links-incoming.bin", "rb");
*/
in_file = fopen("links-incoming.bin", "rb");
+ fseek(in_file, 0, SEEK_END);
+ link_blob_len = ftell(in_file);
+ rewind(in_file);
+ printf("Link blob size: %zd bytes.\n", link_blob_len);
+
+ link_blob = malloc(link_blob_len);
+ if (!link_blob) {
+ printf("Failed to allocate memory.\n");
+ return 1;
+ }
- fread(&titles, sizeof(titles), 1, in_file);
+ if (link_blob_len != fread(link_blob, 1, link_blob_len, in_file)) {
+ printf("Failed to read entire file in one go.\n");
+ return 1;
+ }
+ printf("Link blob read (%zd bytes).\n", ftell(in_file));
+ fclose(in_file);
+
+ titles = *link_blob;
+ link_blob++;
linki = malloc(titles * sizeof(art_id*));
linkis = malloc(titles * sizeof(art_id));
linki = malloc(titles * sizeof(art_id*));
linkis = malloc(titles * sizeof(art_id));
@@
-83,19
+103,14
@@
int main(int argc, char **argv)
for (i = 0; i < titles; i++) {
art_id j;
for (i = 0; i < titles; i++) {
art_id j;
- fread(&linkis[i], sizeof(linkis[i]), 1, in_file);
- //printf("linkis[%zd] = %zd\n", i, linkis[i]);
-
- linki[i] = malloc(linkis[i] * sizeof(linki[i][0]));
+ linkis[i] = *link_blob;
+ link_blob++;
- j = fread(linki[i], sizeof(linki[i][0]), linkis[i], in_file);
- assert(j == linkis[i]);
- //for (j = 0; j < linkis[i]; j++) {
- // fread(&linki[i][j], sizeof(linki[i][j]), 1, in_file);
- //}
+ linki[i] = link_blob;
+ link_blob += linkis[i];
}
}
- printf("Incoming links
read (%zd bytes).\n", ftell(in_file)
);
- fclose(in_file);
+ printf("Incoming links
prepared.\n"
);
+
@@
-116,6
+131,7
@@
int main(int argc, char **argv)
/* Ignore empty lines and errors */
if (in_line_len < 2) {
/* Ignore empty lines and errors */
if (in_line_len < 2) {
+ free(in_line);
continue;
}
continue;
}