3 # Compile flex program to regex-convert the XML to plain text
4 #https://unix.stackexchange.com/a/413684
5 flex -o links-xml-to-plain.c links-xml-to-plain.l
6 gcc -O3 -o links-xml-to-plain links-xml-to-plain.c -lfl
8 # Convert to plain text
9 lzop -dc enwiki-links.xml.lzo | pv | tail -c +45 | ./links-xml-to-plain > enwiki-links-plain.txt
12 grep ^~~~~ enwiki-links-plain.txt | sed "s/^~~~~//g;te;d;:e" | sort > titles.txt
16 gcc -O3 -g -o links-plain-to-binary links-plain-to-binary.c
17 time ./links-plain-to-binary
19 gcc -O3 -g -o links-outgoing-to-incoming links-outgoing-to-incoming.c
20 time ./links-outgoing-to-incoming