diff options
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0182eff --- /dev/null +++ b/Makefile @@ -0,0 +1,61 @@ +GCC=gcc -O3 -g +BINDIR=bin +SRCDIR=src + +all: lookuptools preptools dbfiles + + +$(BINDIR): + mkdir -p $(BINDIR) + + +lookuptools: $(BINDIR)/lookup-incoming + +$(BINDIR)/lookup-incoming: $(SRCDIR)/lookup-incoming.c + mkdir -p $(BINDIR) + $(GCC) -o $@ $< + + + +preptools: $(BINDIR)/links-xml-to-plain \ + $(BINDIR)/links-outgoing-to-incoming \ + $(BINDIR)/links-plain-to-binary + +$(BINDIR)/links-xml-to-plain: $(SRCDIR)/links-xml-to-plain.l + mkdir -p $(BINDIR) + # Compile flex program to regex-convert the XML to plain text + #https://unix.stackexchange.com/a/413684 + flex -o $(BINDIR)/links-xml-to-plain.c $^ + $(GCC) -o $@ $(BINDIR)/links-xml-to-plain.c -lfl + +$(BINDIR)/links-plain-to-binary: $(SRCDIR)/links-plain-to-binary.c titles.txt + mkdir -p $(BINDIR) + $(GCC) -o $@ $< + +$(BINDIR)/links-outgoing-to-incoming: $(SRCDIR)/links-outgoing-to-incoming.c + mkdir -p $(BINDIR) + $(GCC) -o $@ $< + + + +dbfiles: links-incoming.bin titles-sorted.txt + +enwiki-links-plain.txt: $(BINDIR)/links-xml-to-plain enwiki-links.xml.lzo + # Convert to plain text + lzop -dc enwiki-links.xml.lzo | pv | tail -c +45 | $(BINDIR)/links-xml-to-plain > enwiki-links-plain.txt + +titles.txt: enwiki-links-plain.txt + # Extract titles + grep ^~~~~ enwiki-links-plain.txt | sed "s/^~~~~//g;te;d;:e" | sort > titles.txt + +links-outgoing.bin: $(BINDIR)/links-plain-to-binary titles.txt + # This also produces titles-sorted.txt + $(BINDIR)/links-plain-to-binary + # Update timestamp so make does not rebuild links-outgoing.bin + touch titles-sorted.txt + +titles-sorted.txt: links-outgoing.bin + # Generated in the same step as links-outgoing.bin + +links-incoming.bin: $(BINDIR)/links-outgoing-to-incoming links-outgoing.bin + $(BINDIR)/links-outgoing-to-incoming |