summaryrefslogtreecommitdiff
path: root/Makefile
blob: 0182effae4942099568989788d199e12058c8fab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
GCC=gcc -O3 -g
BINDIR=bin
SRCDIR=src

all: lookuptools preptools dbfiles


$(BINDIR):
	mkdir -p $(BINDIR)


lookuptools: $(BINDIR)/lookup-incoming

$(BINDIR)/lookup-incoming: $(SRCDIR)/lookup-incoming.c
	mkdir -p $(BINDIR)
	$(GCC) -o $@ $<



preptools: $(BINDIR)/links-xml-to-plain \
	$(BINDIR)/links-outgoing-to-incoming \
	$(BINDIR)/links-plain-to-binary

$(BINDIR)/links-xml-to-plain: $(SRCDIR)/links-xml-to-plain.l
	mkdir -p $(BINDIR)
	# Compile flex program to regex-convert the XML to plain text
	#https://unix.stackexchange.com/a/413684
	flex -o $(BINDIR)/links-xml-to-plain.c $^
	$(GCC) -o $@ $(BINDIR)/links-xml-to-plain.c -lfl

$(BINDIR)/links-plain-to-binary: $(SRCDIR)/links-plain-to-binary.c titles.txt
	mkdir -p $(BINDIR)
	$(GCC) -o $@ $<

$(BINDIR)/links-outgoing-to-incoming: $(SRCDIR)/links-outgoing-to-incoming.c
	mkdir -p $(BINDIR)
	$(GCC) -o $@ $<



dbfiles: links-incoming.bin titles-sorted.txt

enwiki-links-plain.txt: $(BINDIR)/links-xml-to-plain enwiki-links.xml.lzo
	# Convert to plain text
	lzop -dc enwiki-links.xml.lzo | pv | tail -c +45 | $(BINDIR)/links-xml-to-plain > enwiki-links-plain.txt

titles.txt: enwiki-links-plain.txt
	# Extract titles
	grep ^~~~~ enwiki-links-plain.txt | sed "s/^~~~~//g;te;d;:e" | sort > titles.txt

links-outgoing.bin: $(BINDIR)/links-plain-to-binary titles.txt
	# This also produces titles-sorted.txt
	$(BINDIR)/links-plain-to-binary
	# Update timestamp so make does not rebuild links-outgoing.bin
	touch titles-sorted.txt

titles-sorted.txt: links-outgoing.bin
	# Generated in the same step as links-outgoing.bin

links-incoming.bin: $(BINDIR)/links-outgoing-to-incoming links-outgoing.bin
	$(BINDIR)/links-outgoing-to-incoming