From 99011e7fcebeccc26a3da591e3445a93ffadad3c Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 30 Jun 2017 20:52:56 -0400 Subject: more --- Makefile | 24 +++++++++++------------- bin/cdxcat | 0 bin/cdxcut | 0 3 files changed, 11 insertions(+), 13 deletions(-) delete mode 100644 bin/cdxcat delete mode 100644 bin/cdxcut diff --git a/Makefile b/Makefile index eb8ae8e..262d3af 100644 --- a/Makefile +++ b/Makefile @@ -1,30 +1,28 @@ +SHELL=bash -o pipefail PATH:=$(CURDIR)/bin:$(PATH) export PATH -all: each-cdx +all: dat/index.txt + +fix: + grep -rl '

503' dat | xargs rm -fv -- dat: mkdir -p $@ -dat/ftp.txt: | dat - cdxget 'url=ftp.unicode.org/Public/*' 'fl=urlkey' 'filter=statuscode:200' 'filter=urlkey:.*(cvt|convert)utf.*' > $@ -dat/www.txt: | dat +dat/cdxindex.txt: | dat cdxget 'url=www.unicode.org/Public/*' 'fl=urlkey' 'filter=statuscode:200' 'filter=urlkey:.*(cvt|convert)utf.*' > $@ -dat/urlkeys.txt: dat/ftp.txt dat/www.txt - cat $^ | cut -d '?' -f1 | sed 's/,ftp)/)/' | sort -u > $@ +dat/urlkeys.txt: dat/cdxindex.txt + cat $^ | cut -d '?' -f1 | sort -u > $@ dat/urlkeys.mk: dat/urlkeys.txt cat $^ | sed 's/^/urlkeys+=/' < $< > $@ -include dat/urlkeys.mk -rp = ) -c = , -all_urlkeys = $(urlkeys) $(subst $(rp),$(c)ftp$(rp),$(urlkeys)) dat/each-cdx/%.txt: - mkdir -p '$(@D)' + @mkdir -p '$(@D)' cdxget "url=$$(urlkey2url '$*')" 'filter=statuscode:200' 'collapse=digest' 'fl=timestamp,original' > '$@' - -each-cdx: $(addprefix dat/each-cdx/,$(addsuffix .txt,$(all_urlkeys))) -.PHONY: each-cdx +dat/index.txt: $(addprefix dat/each-cdx/,$(addsuffix .txt,$(urlkeys))) + cat -- $(foreach c,$^,'$c') | sort > $@ .DELETE_ON_ERROR: .SECONDARY: diff --git a/bin/cdxcat b/bin/cdxcat deleted file mode 100644 index e69de29..0000000 diff --git a/bin/cdxcut b/bin/cdxcut deleted file mode 100644 index e69de29..0000000 -- cgit v1.1-4-g5e80