lookup: Clean up code a bit
[enwiki-links-graph.git] / Makefile
1 GCC=gcc -O3 -g
2 BINDIR=bin
3 SRCDIR=src
4
5 all: lookuptools preptools dbfiles
6
7
8 $(BINDIR):
9         mkdir -p $(BINDIR)
10
11
12 lookuptools: $(BINDIR)/lookup-incoming
13
14 $(BINDIR)/lookup-incoming: $(SRCDIR)/lookup-incoming.c
15         mkdir -p $(BINDIR)
16         $(GCC) -Wall -o $@ $<
17
18
19
20 preptools: $(BINDIR)/links-xml-to-plain \
21         $(BINDIR)/links-outgoing-to-incoming \
22         $(BINDIR)/links-plain-to-binary
23
24 $(BINDIR)/links-xml-to-plain: $(SRCDIR)/links-xml-to-plain.l
25         mkdir -p $(BINDIR)
26         # Compile flex program to regex-convert the XML to plain text
27         #https://unix.stackexchange.com/a/413684
28         flex -o $(BINDIR)/links-xml-to-plain.c $^
29         $(GCC) -o $@ $(BINDIR)/links-xml-to-plain.c -lfl
30
31 $(BINDIR)/links-plain-to-binary: $(SRCDIR)/links-plain-to-binary.c titles.txt
32         mkdir -p $(BINDIR)
33         $(GCC) -o $@ $<
34
35 $(BINDIR)/links-outgoing-to-incoming: $(SRCDIR)/links-outgoing-to-incoming.c
36         mkdir -p $(BINDIR)
37         $(GCC) -o $@ $<
38
39
40
41 dbfiles: links-incoming.bin titles-sorted.txt
42
43 enwiki-links-plain.txt: $(BINDIR)/links-xml-to-plain enwiki-links.xml.xz
44         # Convert to plain text
45         unxz -c enwiki-links.xml.xz | pv | tail -c +45 | $(BINDIR)/links-xml-to-plain > enwiki-links-plain.txt
46
47 titles.txt: enwiki-links-plain.txt
48         # Extract titles
49         grep ^~~~~ enwiki-links-plain.txt | sed "s/^~~~~//g;te;d;:e" | sort > titles.txt
50
51 links-outgoing.bin: $(BINDIR)/links-plain-to-binary titles.txt
52         # This also produces titles-sorted.txt
53         $(BINDIR)/links-plain-to-binary
54         # Update timestamp so make does not rebuild links-outgoing.bin
55         touch titles-sorted.txt
56
57 titles-sorted.txt: links-outgoing.bin
58         # Generated in the same step as links-outgoing.bin
59
60 links-incoming.bin: $(BINDIR)/links-outgoing-to-incoming links-outgoing.bin
61         $(BINDIR)/links-outgoing-to-incoming