1 files changed, 63 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index ed1dbad..63fc135 100644
--- a/Makefile
+++ b/Makefile
@@ -6,13 +6,28 @@ url2murl = $(subst %,^25,$(subst :,^3A,$(subst ^,^5E,$1)))
 murl2url = $(subst ^5E,^,$(subst ^3A,:,$(subst ^25,%,$1)))
 dirfail = ( r=$$?; mv -- '$@'{,.bak}; exit $$r; )
 
+# This is split into stages for when Make has to make decisions about
+# the build tree based on the output of a previous stage.  That is:
+# these stages exist for a technical GNU Make reason, not for
+# human-comprehensibility reasons; so stages have lopsided sizes; the
+# first two are very small, and almost everything is in the third
+# stage.
 all:
+	# Stage 1 ######################################################################
 	$(MAKE) dat/urlkeys.mk
+	# Stage 2 ######################################################################
 	$(MAKE) dat/index.mk
+	# Stage 3 ######################################################################
 	$(MAKE) dat/git
 .PHONY: all
 
 # Stage 1 ######################################################################
+#
+# Fetch a listing of all relevant URLs.
+#
+#  - `dat/cdxindex.txt`
+#  - `dat/urlkeys.txt`
+#  - `dat/urlkeys.mk`
 
 dat:
 	mkdir -p $@
@@ -24,6 +39,17 @@ dat/urlkeys.mk: dat/urlkeys.txt
 	< $< sed 's/^/urlkeys+=/' > $@
 
 # Stage 2 ######################################################################
+#
+# Fetch the history for each relevant URL.
+#
+# - `dat/each-cdx/$(urlkey).txt` (for each urlkey in `dat/urlkeys.mk`)
+#
+# - `dat/index.txt`
+#   has a line for each relevant URL:
+#
+#       ${wayback_timestamp:YYYYmmddHHMMSS} ${url}
+#
+# - `dat/index.mk`
 ifneq ($(wildcard dat/urlkeys.mk),)
 include dat/urlkeys.mk
 
@@ -36,9 +62,22 @@ dat/index.mk: dat/index.txt
 	< $< sed -e 's,^,index+=,' -e 's, ,/,' > $@
 
 # Stage 3 ######################################################################
+#
+# The main stage.
 ifneq ($(wildcard dat/index.mk),)
 -include dat/index.mk
 
+# Part 1: Directory indexes:
+#
+# - `dat/content-dir/$(wayback_timestamp:YYYYmmddHHMMSS)/$(dir_murl)/index.html`
+#
+# - `dat/content-dir/$(wayback_timestamp:YYYYmmddHHMMSS)/$(dir_murl)/readme.txt`
+#
+# - `dat/content-dir/$(wayback_timestamp:YYYYmmddHHMMSS)/$(dir_murl)/metadata.txt`
+#   has a line for each file mentioned in index.html (this format is
+#   controlled by `bin/fmt-metadata`):
+#
+#          ${file_name} ${file_timestamp:YYYY-mm-dd HH:MM}
 dat/content-dir/%/index.html:
 	@mkdir -p '$(@D)'
 	curl -sfL 'http://web.archive.org/web/$(call murl2url,$(subst /http,id_/http,$*))' > $@
@@ -49,20 +88,44 @@ dat/content-dir/%/metadata.txt: dat/content-dir/%/index.html
 content-dir = $(foreach u,$(filter %/,$(index)),dat/content-dir/$(call url2murl,$(u)))
 download += $(addsuffix readme.txt,$(content-dir)) $(addsuffix metadata.txt,$(content-dir))
 
+# Part 2: File contents:
+# - `dat/content-file/$(wayback_timestamp:YYYYmmddHHMMSS)/$(file_murl)`
 dat/content-file/%:
 	@mkdir -p '$(@D)'
 	curl -sfL 'http://web.archive.org/web/$(call murl2url,$(subst /http,id_/http,$*))' > $@
 content-file = $(foreach u,$(filter-out %/,$(index)),dat/content-file/$(call url2murl,$(u)))
 download += $(content-file)
 
+# `download` is a convenience target to download files without
+# processing them.  It isn't depended on by anything.
 download: $(download)
 .PHONY: download
 
+# Part 3: Aggregate:
+# - `dat/metadata.txt`
+#   has a line for each file mentioned in any index.html:
+#
+#          ${dirindex_wayback_timestamp:YYYYmmddHHMMSS} ${branch_name}/${file_name} ${file_html_timestamp:YYYY-mm-dd HH:MM}
+#
+#   where the ${dirindex_wayback_timestamp} and ${branch_name} are
+#   determined from the path to the relevant index.html.
+#
+# - `dat/pools/`
+#   + pass 1 and pass 1.5
+#     * `dat/pools/files/${file_html_timestamp:YYYYmmddHHMM}-${branch_name}_${file_name}/`
+#     * `dat/pools/snaps/${dirindex_wayback_timestamp:YYYYmmddHHMMSS}-${branch_name}/${file_name}` (symlink to the /files/ file)
+#   + pass 2 and pass 3:
+#     * `dat/pools/files/${file_html_timestamp:YYYYmmddHHMM}-${branch_name}_${file_name}/${file_name}` (for each existing /file/ dir)
+#
 dat/metadata.txt: $(addsuffix metadata.txt,$(content-dir)) dat/index.txt
 	grep ^ $(foreach c,$(filter %/metadata.txt,$^),'$c') | sed -E -e 's,^dat/content-dir/,,' -e 's,/.*/Public/, ,' -e 's,/metadata\.txt:,/,' -e 's,\s+, ,g' | sort -u > $@
 dat/pools: $(download) dat/metadata.txt dat/index.txt
 	rm -rf -- $@ $@.bak
 	poolify dat/metadata.txt dat/index.txt || $(dirfail)
+
+# Part 4: Turn each `dat/pools/snaps/*` directory into a Git commit
+#
+# - `dat/git/`
 dat/git: dat/pools $(addsuffix readme.txt,$(content-dir)) $(addsuffix metadata.txt,$(content-dir))
 	rm -rf -- $@ $@.bak
 	gitify $@ || $(dirfail)