From 838a73363333bd2eda5aaf08e41701760406987a Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 1 Jul 2017 19:55:21 -0400 Subject: Almost there Now it just needs to figure out that it needs to synthesize directory listings for dat/pools/files/*/*.1 files. --- bin/cdxget | 2 +- bin/fmt-metadata | 5 ++++- bin/poolify | 9 +++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'bin') diff --git a/bin/cdxget b/bin/cdxget index a54612d..46d56c4 100755 --- a/bin/cdxget +++ b/bin/cdxget @@ -4,4 +4,4 @@ url='http://web.archive.org/cdx/search/cdx?' for arg in "$@"; do url+="$s${arg%%=*}=$(printf '%s' "${arg#*=}"|urlencode)&" done -curl -s "$url" +curl -sL "$url" diff --git a/bin/fmt-metadata b/bin/fmt-metadata index 7867d63..c92419b 100755 --- a/bin/fmt-metadata +++ b/bin/fmt-metadata @@ -19,5 +19,8 @@ $stdin.each_line do |line| datetime = Time.parse("#{datetime} +00:00").utc.strftime('%Y-%m-%d %H:%M') end - puts ("%-22s %s %3s%s" % [ name, datetime, size_numb, size_unit ]) + # discard the size, I guess. The number of digits precision was + # inconsistent over the years. + #puts ("%-22s %s %3s%s" % [ name, datetime, size_numb, size_unit ]) + puts ("%-22s %s" % [ name, datetime ]) end diff --git a/bin/poolify b/bin/poolify index af8bf40..e33821b 100755 --- a/bin/poolify +++ b/bin/poolify @@ -13,6 +13,7 @@ main() { set -euE -o pipefail shopt -s nullglob + echo '# Pass 1' while read -r snap name date time size; do dirpart="${name%/*}" filepart="${name##*/}" @@ -22,6 +23,7 @@ main() { ln -sr "$filedir/$filepart" "$snapdir" done < "$1" + echo '# Pass 2' while read -r time url; do name="${url##*/Public/}" dirpart="${name%/*}" @@ -63,6 +65,13 @@ main() { i+=1 done done < "$2" + + echo '# Pass 3' + while read -r missing; do + if [[ -f "${missing/.OLD}/${missing##*_}" ]]; then + ln -sr "${missing/.OLD}/${missing##*_}" "$missing" + fi + done < <(find dat/pools/files/*-PROGRAMS_CVTUTF.OLD_* -type d -empty) } main "$@" -- cgit v1.2.3-2-g168b