summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--links-outgoing-to-incoming.c33
-rw-r--r--links-plain-to-binary.c24
-rw-r--r--lookup-incoming.c36
3 files changed, 52 insertions, 41 deletions
diff --git a/links-outgoing-to-incoming.c b/links-outgoing-to-incoming.c
index 8a23ca7..2fd853a 100644
--- a/links-outgoing-to-incoming.c
+++ b/links-outgoing-to-incoming.c
@@ -3,25 +3,28 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/types.h>
+typedef __uint32_t art_id;
+
int main()
{
FILE *in_file;
FILE *out_file;
- size_t titles;
+ art_id titles;
- size_t **linko;
- size_t *linkos;
+ art_id **linko;
+ art_id *linkos;
- size_t **linki;
- size_t *linkis;
+ art_id **linki;
+ art_id *linkis;
- size_t link_titles_done = 0;
+ art_id link_titles_done = 0;
- size_t i;
+ art_id i;
/*
@@ -32,11 +35,11 @@ int main()
fread(&titles, sizeof(titles), 1, in_file);
- linko = malloc(titles * sizeof(size_t*));
- linkos = malloc(titles * sizeof(size_t));
+ linko = malloc(titles * sizeof(art_id*));
+ linkos = malloc(titles * sizeof(art_id));
for (i = 0; i < titles; i++) {
- size_t j;
+ art_id j;
fread(&linkos[i], sizeof(linkos[i]), 1, in_file);
@@ -52,14 +55,14 @@ int main()
- linki = malloc(titles * sizeof(size_t*));
- linkis = malloc(titles * sizeof(size_t));
+ linki = malloc(titles * sizeof(art_id*));
+ linkis = malloc(titles * sizeof(art_id));
for (i = 0; i < titles; i++) {
- size_t j;
+ art_id j;
for (j = 0; j < linkos[i]; j++) {
- size_t x = linko[i][j];
+ art_id x = linko[i][j];
linkis[x]++;
linki[x] = realloc(linki[x], linkis[x] * sizeof(linki[x][0]));
@@ -75,7 +78,7 @@ int main()
out_file = fopen("links-incoming.bin", "wb");
fwrite(&titles, sizeof(titles), 1, out_file);
for (i = 0; i < titles; i++) {
- size_t j;
+ art_id j;
fwrite(&linkis[i], sizeof(linkis[i]), 1, out_file);
diff --git a/links-plain-to-binary.c b/links-plain-to-binary.c
index 13ded29..005a496 100644
--- a/links-plain-to-binary.c
+++ b/links-plain-to-binary.c
@@ -3,6 +3,10 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/types.h>
+
+
+typedef __uint32_t art_id;
static int
@@ -26,15 +30,15 @@ int main()
FILE *out_file;
char **title = NULL;
- size_t titles = 0;
- size_t titles_alloc = 0;
+ art_id titles = 0;
+ art_id titles_alloc = 0;
- size_t **link;
- size_t *links;
+ art_id **link;
+ art_id *links;
- size_t link_titles_done = 0;
+ art_id link_titles_done = 0;
- size_t i;
+ art_id i;
/*
@@ -73,8 +77,8 @@ int main()
- link = malloc(titles * sizeof(size_t*));
- links = malloc(titles * sizeof(size_t));
+ link = malloc(titles * sizeof(art_id*));
+ links = malloc(titles * sizeof(art_id));
in_file = fopen("enwiki-links-plain.txt", "r");
while (!feof(in_file)) {
@@ -131,7 +135,7 @@ int main()
}
links[i]++;
- link[i] = realloc(link[i], links[i] * sizeof(size_t));
+ link[i] = realloc(link[i], links[i] * sizeof(art_id));
link[i][links[i] - 1] = cur_link - title;
@@ -160,7 +164,7 @@ int main()
out_file = fopen("links-outgoing.bin", "wb");
fwrite(&titles, sizeof(titles), 1, out_file);
for (i = 0; i < titles; i++) {
- size_t j;
+ art_id j;
fwrite(&links[i], sizeof(links[i]), 1, out_file);
diff --git a/lookup-incoming.c b/lookup-incoming.c
index 50f2dbc..d1467f8 100644
--- a/lookup-incoming.c
+++ b/lookup-incoming.c
@@ -3,6 +3,10 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/types.h>
+
+
+typedef __uint32_t art_id;
static int
@@ -20,20 +24,20 @@ int main(int argc, char **argv)
FILE *out_file;
char **title = NULL;
- size_t titles;
- size_t titles_read = 0;
+ art_id titles;
+ art_id titles_read = 0;
- size_t **linki;
- size_t *linkis;
+ art_id **linki;
+ art_id *linkis;
char **cur_title;
- size_t title_id;
+ art_id title_id;
- size_t *dist_table;
- size_t cur_dist;
+ art_id *dist_table;
+ art_id cur_dist;
int cur_dist_is_not_last;
- size_t i;
+ art_id i;
if (argc < 2) {
@@ -50,11 +54,11 @@ int main(int argc, char **argv)
fread(&titles, sizeof(titles), 1, in_file);
- linki = malloc(titles * sizeof(size_t*));
- linkis = malloc(titles * sizeof(size_t));
+ linki = malloc(titles * sizeof(art_id*));
+ linkis = malloc(titles * sizeof(art_id));
for (i = 0; i < titles; i++) {
- size_t j;
+ art_id j;
fread(&linkis[i], sizeof(linkis[i]), 1, in_file);
//printf("linkis[%zd] = %zd\n", i, linkis[i]);
@@ -120,7 +124,7 @@ int main(int argc, char **argv)
printf("Article %zd (%s) is linked from %zd articles:\n", title_id, title[title_id], linkis[title_id]);
for (i = 0; i < linkis[title_id]; i++) {
- size_t x = linki[title_id][i];
+ art_id x = linki[title_id][i];
printf(" %s\n", title[x]);
}
@@ -133,7 +137,7 @@ int main(int argc, char **argv)
dist_table[title_id] = 0xdeadbeef;
for (i = 0; i < linkis[title_id]; i++) {
- size_t x = linki[title_id][i];
+ art_id x = linki[title_id][i];
dist_table[x] = 1;
}
@@ -141,16 +145,16 @@ int main(int argc, char **argv)
cur_dist_is_not_last = 1;
while (cur_dist_is_not_last) {
- size_t articles_found = 0;
+ art_id articles_found = 0;
cur_dist_is_not_last = 0;
for (i = 0; i < titles; i++) {
if (dist_table[i] == cur_dist) {
- size_t j;
+ art_id j;
for (j = 0; j < linkis[i]; j++) {
- size_t x = linki[i][j];
+ art_id x = linki[i][j];
if (!dist_table[x]) {
dist_table[x] = cur_dist + 1;