From f0f54296b5b445c6ce0e47486bcdcb0deca582ff Mon Sep 17 00:00:00 2001 From: norly Date: Sun, 14 Jul 2019 17:28:47 +0200 Subject: [PATCH] Move to Makefile and .gitignore --- .gitignore | 6 ++ Makefile | 61 +++++++++++++++++++ convert-to-plain.sh | 20 ------ .../links-outgoing-to-incoming.c | 0 .../links-plain-to-binary.c | 0 .../links-xml-to-plain.l | 0 lookup-incoming.c => src/lookup-incoming.c | 0 7 files changed, 67 insertions(+), 20 deletions(-) create mode 100644 .gitignore create mode 100644 Makefile delete mode 100644 convert-to-plain.sh rename links-outgoing-to-incoming.c => src/links-outgoing-to-incoming.c (100%) rename links-plain-to-binary.c => src/links-plain-to-binary.c (100%) rename links-xml-to-plain.l => src/links-xml-to-plain.l (100%) rename lookup-incoming.c => src/lookup-incoming.c (100%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..20bdc9e --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +bin/ +enwiki-links-plain.txt +titles.txt +titles-sorted.txt +links-outgoing.bin +links-incoming.bin diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0182eff --- /dev/null +++ b/Makefile @@ -0,0 +1,61 @@ +GCC=gcc -O3 -g +BINDIR=bin +SRCDIR=src + +all: lookuptools preptools dbfiles + + +$(BINDIR): + mkdir -p $(BINDIR) + + +lookuptools: $(BINDIR)/lookup-incoming + +$(BINDIR)/lookup-incoming: $(SRCDIR)/lookup-incoming.c + mkdir -p $(BINDIR) + $(GCC) -o $@ $< + + + +preptools: $(BINDIR)/links-xml-to-plain \ + $(BINDIR)/links-outgoing-to-incoming \ + $(BINDIR)/links-plain-to-binary + +$(BINDIR)/links-xml-to-plain: $(SRCDIR)/links-xml-to-plain.l + mkdir -p $(BINDIR) + # Compile flex program to regex-convert the XML to plain text + #https://unix.stackexchange.com/a/413684 + flex -o $(BINDIR)/links-xml-to-plain.c $^ + $(GCC) -o $@ $(BINDIR)/links-xml-to-plain.c -lfl + +$(BINDIR)/links-plain-to-binary: $(SRCDIR)/links-plain-to-binary.c titles.txt + mkdir -p $(BINDIR) + $(GCC) -o $@ $< + +$(BINDIR)/links-outgoing-to-incoming: $(SRCDIR)/links-outgoing-to-incoming.c + mkdir -p $(BINDIR) + $(GCC) -o $@ $< + + + +dbfiles: links-incoming.bin titles-sorted.txt + +enwiki-links-plain.txt: $(BINDIR)/links-xml-to-plain enwiki-links.xml.lzo + # Convert to plain text + lzop -dc enwiki-links.xml.lzo | pv | tail -c +45 | $(BINDIR)/links-xml-to-plain > enwiki-links-plain.txt + +titles.txt: enwiki-links-plain.txt + # Extract titles + grep ^~~~~ enwiki-links-plain.txt | sed "s/^~~~~//g;te;d;:e" | sort > titles.txt + +links-outgoing.bin: $(BINDIR)/links-plain-to-binary titles.txt + # This also produces titles-sorted.txt + $(BINDIR)/links-plain-to-binary + # Update timestamp so make does not rebuild links-outgoing.bin + touch titles-sorted.txt + +titles-sorted.txt: links-outgoing.bin + # Generated in the same step as links-outgoing.bin + +links-incoming.bin: $(BINDIR)/links-outgoing-to-incoming links-outgoing.bin + $(BINDIR)/links-outgoing-to-incoming diff --git a/convert-to-plain.sh b/convert-to-plain.sh deleted file mode 100644 index e7bd378..0000000 --- a/convert-to-plain.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -# Compile flex program to regex-convert the XML to plain text -#https://unix.stackexchange.com/a/413684 -flex -o links-xml-to-plain.c links-xml-to-plain.l -gcc -O3 -o links-xml-to-plain links-xml-to-plain.c -lfl - -# Convert to plain text -lzop -dc enwiki-links.xml.lzo | pv | tail -c +45 | ./links-xml-to-plain > enwiki-links-plain.txt - -# Extract titles -grep ^~~~~ enwiki-links-plain.txt | sed "s/^~~~~//g;te;d;:e" | sort > titles.txt - - - -gcc -O3 -g -o links-plain-to-binary links-plain-to-binary.c -time ./links-plain-to-binary - -gcc -O3 -g -o links-outgoing-to-incoming links-outgoing-to-incoming.c -time ./links-outgoing-to-incoming diff --git a/links-outgoing-to-incoming.c b/src/links-outgoing-to-incoming.c similarity index 100% rename from links-outgoing-to-incoming.c rename to src/links-outgoing-to-incoming.c diff --git a/links-plain-to-binary.c b/src/links-plain-to-binary.c similarity index 100% rename from links-plain-to-binary.c rename to src/links-plain-to-binary.c diff --git a/links-xml-to-plain.l b/src/links-xml-to-plain.l similarity index 100% rename from links-xml-to-plain.l rename to src/links-xml-to-plain.l diff --git a/lookup-incoming.c b/src/lookup-incoming.c similarity index 100% rename from lookup-incoming.c rename to src/lookup-incoming.c -- 2.30.2