lookup: Accelerate reading titles
authornorly <ny-git@enpas.org>
Sun, 14 Jul 2019 12:22:21 +0000 (14:22 +0200)
committernorly <ny-git@enpas.org>
Sun, 14 Jul 2019 12:22:21 +0000 (14:22 +0200)
lookup-incoming.c

index 4222d8f91f5abedbf37fca29220847d3d405c318..c31251873c72f9f0d3503177da8896025c7327a0 100644 (file)
@@ -43,6 +43,8 @@ int main(int argc, char **argv)
        FILE *in_file;
        FILE *out_file;
 
+       char *title_blob;
+       size_t title_blob_len;
        char **title = NULL;
        art_id titles;
        art_id titles_read = 0;
@@ -119,31 +121,42 @@ int main(int argc, char **argv)
         * Read all titles into memory
         */
 
-       title = malloc(titles * sizeof(title[0]));
+
 
        in_file = fopen("titles-sorted.txt", "r");
-       while (!feof(in_file)) {
-               char *in_line = NULL;
-               ssize_t in_line_len = 0;
-               size_t zero = 0;
+       fseek(in_file, 0, SEEK_END);
+       title_blob_len = ftell(in_file);
+       rewind(in_file);
+       printf("Title blob size: %zd bytes.\n", title_blob_len);
 
-               in_line_len = getline(&in_line, &zero, in_file);
+       title_blob = malloc(title_blob_len + 1);
+       if (!title_blob) {
+               printf("Failed to allocate memory.\n");
+               return 1;
+       }
 
-               /* Ignore empty lines and errors */
-               if (in_line_len < 2) {
-                       free(in_line);
-                       continue;
-               }
+       if (title_blob_len != fread(title_blob, 1, title_blob_len, in_file)) {
+               printf("Failed to read entire file in one go.\n");
+               return 1;
+       }
+       printf("Title blob read (%zd bytes).\n", ftell(in_file));
+       fclose(in_file);
+
+       title_blob[title_blob_len] = '\0';
 
-               /* Delete trailing newline */
-               in_line[in_line_len - 1] = '\0';
+       title = malloc((titles + 1) * sizeof(title[0]));
 
-               title[titles_read] = in_line;
+       title[0] = title_blob;
+       titles_read = 1;
+       while (title_blob = strchr(title_blob, '\n')) {
+               title_blob[0] = '\0';
+               title_blob++;
+               title[titles_read] = title_blob;
                titles_read++;
        }
-       fclose(in_file);
 
-       printf("Titles read.\n");
+       /* Last title will be an empty string, as the file ends with a newline. */
+       printf("Titles prepared: %d of %d.\n", titles_read - 1, titles);