From 0f34be118ae3b89a707eca452ede80cb05bbc589 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sat, 14 Oct 2023 16:02:04 -0600 Subject: cdxget: Tidy URL separators --- bin/cdxget | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'bin') diff --git a/bin/cdxget b/bin/cdxget index 46d56c4..1d465b2 100755 --- a/bin/cdxget +++ b/bin/cdxget @@ -1,7 +1,9 @@ #!/usr/bin/env bash -url='http://web.archive.org/cdx/search/cdx?' +url='http://web.archive.org/cdx/search/cdx' +s='?' for arg in "$@"; do - url+="$s${arg%%=*}=$(printf '%s' "${arg#*=}"|urlencode)&" + url+="$s${arg%%=*}=$(printf '%s' "${arg#*=}"|urlencode)" + s='&' done curl -sL "$url" -- cgit v1.2.3-2-g168b From b15e310abe81a952624c3e96dd117699de7359e1 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Thu, 12 Oct 2023 18:33:50 -0600 Subject: Always pass -f (--fail) to curl --- bin/cdxget | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bin') diff --git a/bin/cdxget b/bin/cdxget index 1d465b2..6844fa6 100755 --- a/bin/cdxget +++ b/bin/cdxget @@ -6,4 +6,4 @@ for arg in "$@"; do url+="$s${arg%%=*}=$(printf '%s' "${arg#*=}"|urlencode)" s='&' done -curl -sL "$url" +curl -sfL "$url" -- cgit v1.2.3-2-g168b From 8722fff2231107011d4b87104b7a024e48a6e0a3 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Thu, 12 Oct 2023 18:34:46 -0600 Subject: Ditch bin/wayfore --- bin/wayfore | 4 ---- 1 file changed, 4 deletions(-) delete mode 100755 bin/wayfore (limited to 'bin') diff --git a/bin/wayfore b/bin/wayfore deleted file mode 100755 index b0bde8a..0000000 --- a/bin/wayfore +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/sed -zrf -# The opposite of 'wayback' -s/(<[hH][eE][aA][dD]>).*/\1/ -s/.*// -- cgit v1.2.3-2-g168b From 84d09b97bf85096e98b8f6f7e95008788ab15f5f Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sat, 14 Oct 2023 17:41:48 -0600 Subject: sed -r is deprecated in favor of -E --- bin/gitify | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bin') diff --git a/bin/gitify b/bin/gitify index 1e5d43d..7282dc3 100755 --- a/bin/gitify +++ b/bin/gitify @@ -82,7 +82,7 @@ main() { HACK_NAME='Luke Shumaker' HACK_EMAIL='lukeshu@lukeshu.com' - gitdate="$(sed -r 's/(....)(..)(..)(..)(..)(..)/\1-\2-\3T\4:\5:\6 '"$HACK_TZ"'/' <<<"$time")" + gitdate="$(sed -E 's/(....)(..)(..)(..)(..)(..)/\1-\2-\3T\4:\5:\6 '"$HACK_TZ"'/' <<<"$time")" git add . -- cgit v1.2.3-2-g168b From 402abe1085460e03de15d9dbca53487b930bb721 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sat, 14 Oct 2023 17:44:00 -0600 Subject: gitify: Squelch the filter-branch warning --- bin/gitify | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'bin') diff --git a/bin/gitify b/bin/gitify index 7282dc3..b245f3e 100755 --- a/bin/gitify +++ b/bin/gitify @@ -99,11 +99,11 @@ main() { lastbranch="$branch" fi if [[ "$branch" == PROGRAMS/CVTUTF ]] && git log -n1 --stat|grep -qF 'ExpectedOutput.txt'; then - git filter-branch -f --parent-filter 'cat; echo " -p BETA/CVTUTF-1-3"' HEAD^..HEAD + FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --parent-filter 'cat; echo " -p BETA/CVTUTF-1-3"' HEAD^..HEAD git update-ref -d refs/original/refs/heads/"$branch" fi if [[ "$branch" == PROGRAMS/CVTUTF.OLD ]] && git log -n1 --stat|grep -qi '.*\.c\s'; then - git filter-branch -f --parent-filter 'cat; echo " -p PROGRAMS/CVTUTF^"' HEAD^..HEAD + FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --parent-filter 'cat; echo " -p PROGRAMS/CVTUTF^"' HEAD^..HEAD git update-ref -d refs/original/refs/heads/"$branch" fi fi -- cgit v1.2.3-2-g168b From 6af424ea20392be0487ec204591c93b0c65d4cb2 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sat, 14 Oct 2023 18:41:45 -0600 Subject: poolify: Improve readability --- bin/poolify | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) (limited to 'bin') diff --git a/bin/poolify b/bin/poolify index e256157..649b71b 100755 --- a/bin/poolify +++ b/bin/poolify @@ -13,14 +13,27 @@ main() { set -euE -o pipefail shopt -s nullglob - echo '# Pass 1' - declare -A rewrite - rewrite[200109261739]=200303310700 - while read -r snap name date time size; do + arg_metadata_txt=$1 + arg_index_txt=$2 + + # Overrides ############################################################ + + declare -A override_datetime + override_datetime[200109261739]=200303310700 + + override_synthetic_listings=( + #YYYYMMDDHHMM branch_name newfiles + '200307291500 ALPHA/CVTUTF-1-1 ExpectedOutput.txt readme.txt' + ) + + # Main ################################################################# + + echo '# Pass 1 (initialize snapshots from $arg_metadata_txt)' + while read -r snap name date time; do dirpart="${name%/*}" filepart="${name##*/}" datetime="${date//-/}${time//:/}" - datetime="${rewrite[$datetime]:-$datetime}" + datetime="${override_datetime[$datetime]:-$datetime}" filedir=dat/pools/files/"${datetime}-${name//\//_}" snapdir=dat/pools/snaps/"${snap}-${dirpart//\//_}" if [[ -d "${filedir/.OLD/}" ]]; then @@ -28,9 +41,9 @@ main() { fi mkdir -p -- "$filedir" "$snapdir" ln -sr "$filedir/$filepart" "$snapdir" - done < "$1" + done < "$arg_metadata_txt" - echo '# Pass 1.5' + echo '# Pass 1.5 (initialize synthetic snapshots)' # Looking at the data, there are 3 revisions that we DON'T # have directory listings for. So we need to synthesize # those. @@ -39,7 +52,8 @@ main() { # synthesizing anything, then looking for files ending in # ".1". They are created during pass 2 if we have a file with # no matching listing. - while read -r datetime dirpart newfiles; do + for line in "${override_synthetic_listings[@]}"; do + read -r datetime dirpart newfiles <<<"$line" # We need to figure out which files to put in the # directory listing. We're going to do that by # mimicking the previous listing with that dirpart. @@ -65,12 +79,14 @@ main() { rm -- "$snapdir/$filepart" ln -sr "$filedir/$filepart" "$snapdir" done - done < <(printf '%s\n' \ - '200307291500 ALPHA/CVTUTF-1-1 ExpectedOutput.txt readme.txt' \ - ) + done - echo '# Pass 2' + echo '# Pass 2 (resolve files)' while read -r time url; do + if [[ "$url" == */ ]]; then + # Skip directories + continue + fi if [[ "$url" == */2.0-Update/* ]]; then # Gross hack continue @@ -79,10 +95,6 @@ main() { dirpart="${name%/*}" filepart="${name##*/}" - if [[ -z "$filepart" ]]; then - continue - fi - pools=(dat/pools/files/*-"${name//\//_}") if [[ "$name" = *.OLD* ]]; then pname="${name//\//_}" @@ -119,9 +131,9 @@ main() { fi i+=1 done - done < "$2" + done < "$arg_index_txt" - echo '# Pass 3' + echo '# Pass 3 (resolve missing files)' while read -r missing; do if [[ -f "${missing/.OLD}/${missing##*_}" ]]; then ln -sr "${missing/.OLD}/${missing##*_}" "$missing" -- cgit v1.2.3-2-g168b