Move to Makefile and .gitignore
[enwiki-links-graph.git] / Makefile
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..0182eff
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,61 @@
+GCC=gcc -O3 -g
+BINDIR=bin
+SRCDIR=src
+
+all: lookuptools preptools dbfiles
+
+
+$(BINDIR):
+       mkdir -p $(BINDIR)
+
+
+lookuptools: $(BINDIR)/lookup-incoming
+
+$(BINDIR)/lookup-incoming: $(SRCDIR)/lookup-incoming.c
+       mkdir -p $(BINDIR)
+       $(GCC) -o $@ $<
+
+
+
+preptools: $(BINDIR)/links-xml-to-plain \
+       $(BINDIR)/links-outgoing-to-incoming \
+       $(BINDIR)/links-plain-to-binary
+
+$(BINDIR)/links-xml-to-plain: $(SRCDIR)/links-xml-to-plain.l
+       mkdir -p $(BINDIR)
+       # Compile flex program to regex-convert the XML to plain text
+       #https://unix.stackexchange.com/a/413684
+       flex -o $(BINDIR)/links-xml-to-plain.c $^
+       $(GCC) -o $@ $(BINDIR)/links-xml-to-plain.c -lfl
+
+$(BINDIR)/links-plain-to-binary: $(SRCDIR)/links-plain-to-binary.c titles.txt
+       mkdir -p $(BINDIR)
+       $(GCC) -o $@ $<
+
+$(BINDIR)/links-outgoing-to-incoming: $(SRCDIR)/links-outgoing-to-incoming.c
+       mkdir -p $(BINDIR)
+       $(GCC) -o $@ $<
+
+
+
+dbfiles: links-incoming.bin titles-sorted.txt
+
+enwiki-links-plain.txt: $(BINDIR)/links-xml-to-plain enwiki-links.xml.lzo
+       # Convert to plain text
+       lzop -dc enwiki-links.xml.lzo | pv | tail -c +45 | $(BINDIR)/links-xml-to-plain > enwiki-links-plain.txt
+
+titles.txt: enwiki-links-plain.txt
+       # Extract titles
+       grep ^~~~~ enwiki-links-plain.txt | sed "s/^~~~~//g;te;d;:e" | sort > titles.txt
+
+links-outgoing.bin: $(BINDIR)/links-plain-to-binary titles.txt
+       # This also produces titles-sorted.txt
+       $(BINDIR)/links-plain-to-binary
+       # Update timestamp so make does not rebuild links-outgoing.bin
+       touch titles-sorted.txt
+
+titles-sorted.txt: links-outgoing.bin
+       # Generated in the same step as links-outgoing.bin
+
+links-incoming.bin: $(BINDIR)/links-outgoing-to-incoming links-outgoing.bin
+       $(BINDIR)/links-outgoing-to-incoming