summaryrefslogtreecommitdiff
path: root/convert-to-plain.sh
diff options
context:
space:
mode:
authornorly <ny-git@enpas.org>2019-07-14 17:28:47 +0200
committernorly <ny-git@enpas.org>2019-07-14 17:28:47 +0200
commitf0f54296b5b445c6ce0e47486bcdcb0deca582ff (patch)
tree35c3858bab40f4bf8f6c57e2d5522f17d2928511 /convert-to-plain.sh
parent64907e38005ada5b2a545ae58f05d0fd616ffa79 (diff)
Move to Makefile and .gitignore
Diffstat (limited to 'convert-to-plain.sh')
-rw-r--r--convert-to-plain.sh20
1 files changed, 0 insertions, 20 deletions
diff --git a/convert-to-plain.sh b/convert-to-plain.sh
deleted file mode 100644
index e7bd378..0000000
--- a/convert-to-plain.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-# Compile flex program to regex-convert the XML to plain text
-#https://unix.stackexchange.com/a/413684
-flex -o links-xml-to-plain.c links-xml-to-plain.l
-gcc -O3 -o links-xml-to-plain links-xml-to-plain.c -lfl
-
-# Convert to plain text
-lzop -dc enwiki-links.xml.lzo | pv | tail -c +45 | ./links-xml-to-plain > enwiki-links-plain.txt
-
-# Extract titles
-grep ^~~~~ enwiki-links-plain.txt | sed "s/^~~~~//g;te;d;:e" | sort > titles.txt
-
-
-
-gcc -O3 -g -o links-plain-to-binary links-plain-to-binary.c
-time ./links-plain-to-binary
-
-gcc -O3 -g -o links-outgoing-to-incoming links-outgoing-to-incoming.c
-time ./links-outgoing-to-incoming