summaryrefslogtreecommitdiff
path: root/bin/poolify
blob: e2561573291e7d165b4ab217897c071a0a0da054 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env bash

url2murl() {
	local x
	x=$1
	x=${x//'^'/'^5E'}
	x=${x//':'/'^3A'}
	x=${x//'%'/'^25'}
	printf '%s' "$x"
}

main() {
	set -euE -o pipefail
	shopt -s nullglob

	echo '# Pass 1'
	declare -A rewrite
	rewrite[200109261739]=200303310700
	while read -r snap name date time size; do
		dirpart="${name%/*}"
		filepart="${name##*/}"
		datetime="${date//-/}${time//:/}"
		datetime="${rewrite[$datetime]:-$datetime}"
		filedir=dat/pools/files/"${datetime}-${name//\//_}"
		snapdir=dat/pools/snaps/"${snap}-${dirpart//\//_}"
		if [[ -d "${filedir/.OLD/}" ]]; then
			filedir="${filedir/.OLD/}"
		fi
		mkdir -p -- "$filedir" "$snapdir"
		ln -sr "$filedir/$filepart" "$snapdir"
	done < "$1"

	echo '# Pass 1.5'
	# Looking at the data, there are 3 revisions that we DON'T
	# have directory listings for.  So we need to synthesize
	# those.
	#
	# I created the list of listings to synthesize by not
	# synthesizing anything, then looking for files ending in
	# ".1".  They are created during pass 2 if we have a file with
	# no matching listing.
	while read -r datetime dirpart newfiles; do
		# We need to figure out which files to put in the
		# directory listing.  We're going to do that by
		# mimicking the previous listing with that dirpart.
		prevsnap=''
		for isnap in dat/pools/snaps/*-"${dirpart//\//_}"; do
			isnap=${isnap##*/}
			if [[ "${isnap%%-*}" -lt "${datetime}00" ]]; then
				prevsnap=$isnap
			fi
		done
		if [[ -z "$prevsnap" ]]; then
			>& printf 'Could not find listing of %s before %s\n' "$dirpart" "$datetime"
			false
		fi
		# Ok, now copy that snapshot
		snapdir=dat/pools/snaps/"${datetime}99-${dirpart//\//_}"
		cp -aT dat/pools/snaps/"$prevsnap" "$snapdir"
		# And touch file files we need to change
		for filepart in $newfiles; do
			name="$dirpart/$filepart"
			filedir=dat/pools/files/"${datetime}-${name//\//_}"
			mkdir -p -- "$filedir"
			rm -- "$snapdir/$filepart"
			ln -sr "$filedir/$filepart" "$snapdir"
		done
	done < <(printf '%s\n' \
			'200307291500 ALPHA/CVTUTF-1-1    ExpectedOutput.txt readme.txt' \
		)

	echo '# Pass 2'
	while read -r time url; do
		if [[ "$url" == */2.0-Update/* ]]; then
			# Gross hack
			continue
		fi
		name="${url##*/Public/}"
		dirpart="${name%/*}"
		filepart="${name##*/}"

		if [[ -z "$filepart" ]]; then
			continue
		fi

		pools=(dat/pools/files/*-"${name//\//_}")
		if [[ "$name" = *.OLD* ]]; then
			pname="${name//\//_}"
			pools+=(dat/pools/files/*-"${pname/.OLD/}")
			readarray -t pools < <(printf '%s\n' "${pools[@]}" | sort)
		fi
		pools=("${pools[@]##*/}")

		mypool=''
		for pool in "${pools[@]}"; do
			pooltime="${pool%%-*}"
			if [[ "${pooltime}00" -le "$time" ]]; then
				mypool=$pool
			fi
		done
		if [[ -z "$mypool" ]]; then
			>&2 printf 'Could not find pool for %s %s\n' "$time" "$url"
			false
		fi

		file="dat/content-file/$time/$(url2murl $url)"

		declare -i i=0
		while true; do
			link="dat/pools/files/$mypool/$filepart.$i"
			link="${link%.0}"
			a="$(readlink -f "$link")" || true
			b="$(readlink -f "$file")"
			if cmp -s -- "$a" "$b"; then
				break
			fi
			if ln -sr "$b" "$link"; then
				break
			fi
			i+=1
		done
	done < "$2"

	echo '# Pass 3'
	while read -r missing; do
		if [[ -f "${missing/.OLD}/${missing##*_}" ]]; then
			ln -sr "${missing/.OLD}/${missing##*_}" "$missing"
		fi
	done < <(find dat/pools/files/*-PROGRAMS_CVTUTF.OLD_* -type d -empty)
}

main "$@"