projects
/
enwiki-links-graph.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
1027dce
)
lookup: Accelerate reading titles
author
norly
<ny-git@enpas.org>
Sun, 14 Jul 2019 12:22:21 +0000
(14:22 +0200)
committer
norly
<ny-git@enpas.org>
Sun, 14 Jul 2019 12:22:21 +0000
(14:22 +0200)
lookup-incoming.c
patch
|
blob
|
history
diff --git
a/lookup-incoming.c
b/lookup-incoming.c
index 4222d8f91f5abedbf37fca29220847d3d405c318..c31251873c72f9f0d3503177da8896025c7327a0 100644
(file)
--- a/
lookup-incoming.c
+++ b/
lookup-incoming.c
@@
-43,6
+43,8
@@
int main(int argc, char **argv)
FILE *in_file;
FILE *out_file;
FILE *in_file;
FILE *out_file;
+ char *title_blob;
+ size_t title_blob_len;
char **title = NULL;
art_id titles;
art_id titles_read = 0;
char **title = NULL;
art_id titles;
art_id titles_read = 0;
@@
-119,31
+121,42
@@
int main(int argc, char **argv)
* Read all titles into memory
*/
* Read all titles into memory
*/
- title = malloc(titles * sizeof(title[0]));
+
in_file = fopen("titles-sorted.txt", "r");
in_file = fopen("titles-sorted.txt", "r");
- while (!feof(in_file)) {
-
char *in_line = NULL
;
-
ssize_t in_line_len = 0
;
-
size_t zero = 0
;
+ fseek(in_file, 0, SEEK_END);
+
title_blob_len = ftell(in_file)
;
+
rewind(in_file)
;
+
printf("Title blob size: %zd bytes.\n", title_blob_len)
;
- in_line_len = getline(&in_line, &zero, in_file);
+ title_blob = malloc(title_blob_len + 1);
+ if (!title_blob) {
+ printf("Failed to allocate memory.\n");
+ return 1;
+ }
- /* Ignore empty lines and errors */
- if (in_line_len < 2) {
- free(in_line);
- continue;
- }
+ if (title_blob_len != fread(title_blob, 1, title_blob_len, in_file)) {
+ printf("Failed to read entire file in one go.\n");
+ return 1;
+ }
+ printf("Title blob read (%zd bytes).\n", ftell(in_file));
+ fclose(in_file);
+
+ title_blob[title_blob_len] = '\0';
- /* Delete trailing newline */
- in_line[in_line_len - 1] = '\0';
+ title = malloc((titles + 1) * sizeof(title[0]));
- title[titles_read] = in_line;
+ title[0] = title_blob;
+ titles_read = 1;
+ while (title_blob = strchr(title_blob, '\n')) {
+ title_blob[0] = '\0';
+ title_blob++;
+ title[titles_read] = title_blob;
titles_read++;
}
titles_read++;
}
- fclose(in_file);
- printf("Titles read.\n");
+ /* Last title will be an empty string, as the file ends with a newline. */
+ printf("Titles prepared: %d of %d.\n", titles_read - 1, titles);