summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile61
1 files changed, 61 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0182eff
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,61 @@
+GCC=gcc -O3 -g
+BINDIR=bin
+SRCDIR=src
+
+all: lookuptools preptools dbfiles
+
+
+$(BINDIR):
+ mkdir -p $(BINDIR)
+
+
+lookuptools: $(BINDIR)/lookup-incoming
+
+$(BINDIR)/lookup-incoming: $(SRCDIR)/lookup-incoming.c
+ mkdir -p $(BINDIR)
+ $(GCC) -o $@ $<
+
+
+
+preptools: $(BINDIR)/links-xml-to-plain \
+ $(BINDIR)/links-outgoing-to-incoming \
+ $(BINDIR)/links-plain-to-binary
+
+$(BINDIR)/links-xml-to-plain: $(SRCDIR)/links-xml-to-plain.l
+ mkdir -p $(BINDIR)
+ # Compile flex program to regex-convert the XML to plain text
+ #https://unix.stackexchange.com/a/413684
+ flex -o $(BINDIR)/links-xml-to-plain.c $^
+ $(GCC) -o $@ $(BINDIR)/links-xml-to-plain.c -lfl
+
+$(BINDIR)/links-plain-to-binary: $(SRCDIR)/links-plain-to-binary.c titles.txt
+ mkdir -p $(BINDIR)
+ $(GCC) -o $@ $<
+
+$(BINDIR)/links-outgoing-to-incoming: $(SRCDIR)/links-outgoing-to-incoming.c
+ mkdir -p $(BINDIR)
+ $(GCC) -o $@ $<
+
+
+
+dbfiles: links-incoming.bin titles-sorted.txt
+
+enwiki-links-plain.txt: $(BINDIR)/links-xml-to-plain enwiki-links.xml.lzo
+ # Convert to plain text
+ lzop -dc enwiki-links.xml.lzo | pv | tail -c +45 | $(BINDIR)/links-xml-to-plain > enwiki-links-plain.txt
+
+titles.txt: enwiki-links-plain.txt
+ # Extract titles
+ grep ^~~~~ enwiki-links-plain.txt | sed "s/^~~~~//g;te;d;:e" | sort > titles.txt
+
+links-outgoing.bin: $(BINDIR)/links-plain-to-binary titles.txt
+ # This also produces titles-sorted.txt
+ $(BINDIR)/links-plain-to-binary
+ # Update timestamp so make does not rebuild links-outgoing.bin
+ touch titles-sorted.txt
+
+titles-sorted.txt: links-outgoing.bin
+ # Generated in the same step as links-outgoing.bin
+
+links-incoming.bin: $(BINDIR)/links-outgoing-to-incoming links-outgoing.bin
+ $(BINDIR)/links-outgoing-to-incoming