summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2017-07-02 18:36:07 -0400
committerLuke Shumaker <lukeshu@lukeshu.com>2017-07-02 18:36:07 -0400
commit71e3ca00e35e26fc7738ed955bb7178fac4c919d (patch)
treeb515646f1242f7c15c7ea9b3ec80f5dc71fda097
parent3f1ea48182ef0972e3df162a446cbcbbfc1e2a4e (diff)
Use pools in git
-rw-r--r--Makefile7
-rwxr-xr-xbin/gitify80
-rwxr-xr-xbin/gitthing71
-rwxr-xr-xbin/poolify2
4 files changed, 84 insertions, 76 deletions
diff --git a/Makefile b/Makefile
index 0272edb..2ee9a42 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ dirfail = ( r=$$?; mv -- '$@'{,.bak}; exit $$r; )
all:
$(MAKE) dat/urlkeys.mk
$(MAKE) dat/index.mk
- $(MAKE) dat/pools dat/git
+ $(MAKE) dat/git
fix:
grep -rl '<html><body><h1>503' dat | xargs rm -fv --
@@ -69,10 +69,9 @@ dat/pools.txt: $(addsuffix metadata.txt,$(content-dir)) dat/index.txt
dat/pools: $(download) dat/pools.txt dat/index.txt
rm -rf -- $@ $@.bak
poolify dat/pools.txt dat/index.txt || $(dirfail)
-
-dat/git: $(download) dat/index.txt
+dat/git: dat/pools $(addsuffix readme.txt,$(content-dir)) $(addsuffix metadata.txt,$(content-dir))
rm -rf -- $@ $@.bak
- gitthing dat/git < dat/index.txt || $(dirfail)
+ gitify $@ || $(dirfail)
################################################################################
endif
diff --git a/bin/gitify b/bin/gitify
new file mode 100755
index 0000000..55ddff8
--- /dev/null
+++ b/bin/gitify
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+
+empty() {
+ [[ $(stat -c %s "$1") -eq 0 ]]
+}
+
+url2murl() {
+ local x
+ x=$1
+ x=${x//'^'/'^5E'}
+ x=${x//':'/'^3A'}
+ x=${x//'%'/'^25'}
+ printf '%s' "$x"
+}
+
+murl2url() {
+ local x
+ x=$1
+ x=${x//'^25'/'%'}
+ x=${x//'^3A'/':'}
+ x=${x//'^5E'/'^'}
+ printf '%s' "$x"
+}
+
+main() {
+ set -euE -o pipefail
+ shopt -s nullglob
+ shopt -s globstar
+
+ top=$PWD
+ mkdir -p "$1"
+ cd "$1"
+ git init
+ echo 'ref: refs/heads/PROGRAMS/CVTUTF' > .git/HEAD
+ git commit --allow-empty -m 'initial commit'
+
+ for snapshot in "$top"/dat/pools/snaps/*; do
+ snapname="${snapshot##*/}"
+ time="${snapname%%-*}"
+ dirpart="${snapname#*-}"
+ dirpart="${dirpart//_/\/}"
+
+ branch=$dirpart
+ git checkout PROGRAMS/CVTUTF
+ git checkout -b "$branch" || true
+ git checkout "$branch"
+
+ rm -f -- * .metadata.txt
+ cp -- "$snapshot"/* .
+
+ if [[ "$time" != *99 ]]; then
+ listingdir=("$top/dat/content-dir/$time/"**"/Public/$dirpart")
+ if [[ ${#listingdir[@]} != 1 ]]; then
+ >&2 printf 'Cannot find wayback listing for %s' "$snapname"
+ false
+ fi
+ if ! [[ -f readme.txt ]] && ! empty "$listingdir/readme.txt"; then
+ cp "$listingdir/readme.txt" .
+ fi
+ cp "$listingdir/metadata.txt" .metadata.txt
+ fi
+
+ if [[ -n "$(git status -s .)" ]]; then
+ if [[ "$time" = *99 ]]; then
+ msg="Synthesized listing: ${time} ${dirpart}"
+ time="${time%99}00"
+ else
+ waurl="http://web.archive.org/web/$(murl2url "${listingdir##*/content-dir/}/")"
+ msg="$waurl"
+ fi
+ gitdate="$(sed -r 's/(....)(..)(..)(..)(..)(..)/\1-\2-\3T\4:\5:\6/' <<<"$time")"
+ git add .
+ export GIT_AUTHOR_DATE=$gitdate
+ export GIT_COMMITTER_DATE=$gitdate
+ git commit -m "$msg"
+ fi
+ done
+}
+
+main "$@"
diff --git a/bin/gitthing b/bin/gitthing
deleted file mode 100755
index ff7b5ac..0000000
--- a/bin/gitthing
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env bash
-
-empty() {
- [[ $(stat -c %s "$1") -eq 0 ]]
-}
-
-url2murl() {
- local x
- x=$1
- x=${x//'^'/'^5E'}
- x=${x//':'/'^3A'}
- x=${x//'%'/'^25'}
- printf '%s' "$x"
-}
-
-murl2url() {
- local x
- x=$1
- x=${x//'^25'/'%'}
- x=${x//'^3A'/':'}
- x=${x//'^5E'/'^'}
- printf '%s' "$x"
-}
-
-main() {
- set -euE -o pipefail
- top=$PWD
-
- mkdir -p "$1"
- cd "$1"
- git init
- echo 'ref: refs/heads/PROGRAMS/CVTUTF' > .git/HEAD
- git commit --allow-empty -m 'initial commit'
-
- while read -r time url; do
- suffix="${url##*/Public/}"
- dirpart="${suffix%/*}"
- filepart="${suffix##*/}"
-
- branch=$dirpart
-
- git checkout PROGRAMS/CVTUTF
- git checkout -b "$branch" || true
- git checkout "$branch"
-
- if [[ -n "$filepart" ]]; then
- file="$top/dat/content-file/$time/$(url2murl "$url")"
- cp "$file" .
- else
- dir="$top/dat/content-dir/$time/$(url2murl "$url")"
- comm -23 \
- <(git ls-files) \
- <(< "$dir/metadata.txt" awk '{print $1}') \
- | xargs -r0 rm -f --
- if ! empty "$dir/readme.txt"; then
- cp "$dir/readme.txt" .
- fi
- cp "$dir/metadata.txt" .metadata.txt
- fi
-
- if [[ -n "$(git status -s .)" ]]; then
- gitdate="$(sed -r 's/(....)(..)(..)(..)(..)(..)/\1-\2-\3T\4:\5:\6/' <<<"$time")"
- git add .
- export GIT_AUTHOR_DATE=$gitdate
- export GIT_COMMITTER_DATE=$gitdate
- git commit -m "$time $url"
- fi
- done
-}
-
-main "$@"
diff --git a/bin/poolify b/bin/poolify
index 48dc828..f47f2f3 100755
--- a/bin/poolify
+++ b/bin/poolify
@@ -48,7 +48,7 @@ main() {
false
fi
# Ok, now copy that snapshot
- snapdir=dat/pools/snaps/"${datetime}00-${dirpart//\//_}"
+ snapdir=dat/pools/snaps/"${datetime}99-${dirpart//\//_}"
cp -aT dat/pools/snaps/"$prevsnap" "$snapdir"
# And touch file files we need to change
for filepart in $newfiles; do