3 files changed, 331 insertions, 0 deletions
diff --git a/db-import b/db-import
new file mode 100755
index 0000000..7bc822f
--- /dev/null
+++ b/db-import
@@ -0,0 +1,293 @@
+#!/bin/bash -euE
+# Imports Arch-like repos, running them through a blacklist
+# License: GPLv3
+. $(dirname $0)/config
+. $(dirname $0)/db-import.conf
+. $(librelib messages)
+. $(librelib blacklist)
+# DBs = pacman DataBases
+# This replaces two scripts:
+# - abslibre : imported ABS tree from Arch
+# - db-sync : imported pacman DBs from Arch
+# The flow here is:
+# 1. "${IMPORTDIR}/cache/${name}/dbs/" # Download the pacman databases
+# 2. "${IMPORTDIR}/cache/${name}/abs/" # Download the ABS tree
+# 3. "${IMPORTDIR}/clean/${name}/dbs/" # Run the pacman DBs through the blacklist
+# 4. Download all the package files mentioned in "clean/${name/dbs/"
+# 5. "${STAGING}-importer-${tag}" Copy all the package files we just downloaded to here
+# 6. Run db-update on "${STAGING}-importer-${tag}"
+SYNCARGS='-mrtvlH --no-motd --no-p --no-o --no-g'
+main() {
+ blacklist-update
+ local importStr
+ for importStr in "${imports[@]}"; do
+ local importAry=($importStr)
+ local name=${importAry[0]}
+ local pkgmirror=${importAry[1]}
+ local absmirror=${importAry[2]}
+ local tags=("${importAry[@]:3}")
+ msg "Fetching remote package source: %s" "$name"
+ fetch_dbs "$name" "$pkgmirror"
+ fetch_abs "$name" "$absmirror" "${tags[@]}"
+ msg "Filtering blacklisted packages from remote package source: %s" "$name"
+ clean_dbs "$name" "${tags[@]}"
+ msg "Publishing changes from remote package source: %s" "$name"
+ publish "$name" "${tags[@]}"
+ done
+ return $r
+fetch_dbs() {
+ local name=$1
+ local pkgmirror=$2
+ msg2 'Synchronizing package databases...'
+ mkdir -p -- "${IMPORTDIR}/cache/${name}/dbs"
+ # Grab just the .db files from $pkgmirror
+ rsync $SYNCARGS --delete-after \
+ --include="*/" \
+ --include="*.db" \
+ --include="*${DBEXT}" \
+ --exclude="*" \
+ "rsync://${pkgmirror}/" "${IMPORTDIR}/cache/${name}/dbs"
+fetch_abs() {
+ local name=$1
+ local absmirror=$2
+ local tags=("${@:3}")
+ local _HOME=$HOME
+ local absroot
+ # Sync the ABS tree from $absmirror
+ local arch
+ for arch in $(list_arches "${tags[@]}"); do
+ msg2 'Synchronizing %s ABS tree...' "$arch"
+ absroot="${IMPORTDIR}/cache/${name}/abs/${arch}"
+ mkdir -p -- "$absroot"
+ # Configure `abs` for this mirror
+ export HOME="${IMPORTDIR}/homes/${name}/${arch}"
+ mkdir -p -- "$HOME"
+ {
+ printf "ABSROOT='%s'\n" "$absroot"
+ printf "SYNCSERVER='%s'\n" "$absmirror"
+ printf "ARCH='%s'\n" "$arch"
+ printf 'REPOS=(\n'
+ list_repos "$arch" "${tags[@]}"
+ printf ')\n'
+ } > ~/.abs.conf
+ # Run `abs`
+ abs
+ done
+ export HOME=$_HOME
+clean_dbs() {
+ local name=$1
+ local tags=("${@:2}")
+ rm -rf -- "${IMPORTDIR}/clean/$name"
+ local tag
+ for tag in "${tags[@]}"; do
+ msg2 'Creating clean version of %s package database...' "$tag"
+ local cache="${IMPORTDIR}/cache/$name/dbs/$(db_file "$tag")"
+ local clean="${IMPORTDIR}/clean/$name/dbs/$(db_file "$tag")"
+ install -Dm644 "$cache" "$clean"
+ repo-remove "$clean" $(blacklist-cat|blacklist-get-pkg)
+ done
+fetch_pkgs() {
+ local name=$1
+ local tags=("${@:2}")
+ local repo arch dbfile whitelist
+ local tag
+ for tag in "${tags[@]}"; do
+ msg2 'Syncronizing package files for %s...' "$tag"
+ repo=${tag%-*}
+ arch=${tag##*-}
+ dbfile="${IMPORTDIR}/clean/$name/dbs/$(db_file "$tag")"
+ whitelist="${IMPORTDIR}/clean/$name/dbs/$tag.whitelist"
+ list_pkgs "$dbfile" > "$whitelist"
+ # fetch the architecture-specific packages
+ rsync $SYNCARGS --delete-after --delete-excluded \
+ --delay-updates \
+ --include-from=<(sed "s|\$|-$arch.tar.?z|" "$whitelist") \
+ --exclude='*' \
+ "rsync://${pkgmirror}/$(db_dir "$tag")/" \
+ "${IMPORTDIR}/clean/${name}/pkgs/${tag}/"
+ # fetch the architecture-independent packages
+ rsync $SYNCARGS --delete-after --delete-excluded \
+ --delay-updates \
+ --include-from=<(sed "s|\$|-any.tar.?z|" "$whitelist") \
+ --exclude='*' \
+ "rsync://${pkgmirror}/$(db_dir "$tag")/" \
+ "${IMPORTDIR}/clean/${name}/pkgs/${repo}-any/"
+ done
+publish() {
+ local name=$1
+ local tags=("${@:2}")
+ local tag
+ for tag in "${tags[@]}"; do
+ msg2 'Publishing changes to %s...' "$tag"
+ publish_tag "$name" "$tag"
+ done
+publish_tag() {
+ local name=$1
+ local tag=$2
+ local repo=${tag%-*}
+ local arch=${tag##*-}
+ local dir="${IMPORTDIR}/clean/${name}/pkgs/${tag}"
+ local found
+ local error=false
+ local files=()
+ local pkgid pkgarch
+ for pkgid in $(list_added_pkgs "$name" "$tag"); do
+ found=false
+ for pkgarch in "${arch}" any; do
+ file="${dir}/${pkgid}-${arch}".pkg.tar.?z
+ if ! $found && [[ -r $file ]]; then
+ files+=("$file")
+ found=true
+ fi
+ done
+ if ! $found; then
+ error 'Could not find package file for %s' "$pkgid"
+ error=true
+ fi
+ done
+ if $error; then
+ error 'Quitting...'
+ return 1
+ fi
+ export STAGING="$STAGING-importer-$tag"
+ mkdir -p -- "${STAGING}/${repo}"
+ cp -a -- "${files[@]}" "${STAGING}/${repo}/"
+ db-update
+ # XXX: db-remove wants pkgbase, not pkgname
+ db-remove "$repo" "$arch" $(list_removed_pkgs "$name" "$tag")
+# Usage: list_arches repo-arch...
+# Returns a list of the architectures mentioned in a list of "repo-arch" pairs.
+list_arches() {
+ local tags=("$@")
+ printf '%s\n' "${tags[@]##*-}" | sort -u
+# Usage: list_repos arch repo-arch...
+# Returns a list of all the repositories mentioned for a given architecture in a
+# list of "repo-arch" pairs.
+list_repos() {
+ local arch=$1
+ local tags=("${@:2}")
+ printf '%s\n' "${tags[@]}" | sed -n "s/-$arch\$//p"
+# Usage: db_dir repo-arch
+db_dir() {
+ local tag=$1
+ local repo=${tag%-*}
+ local arch=${tag##*-}
+ echo "${repo}/os/${arch}"
+# Usage; db_file repo-arch
+db_file() {
+ local tag=$1
+ local repo=${tag%-*}
+ local arch=${tag##*-}
+ echo "${repo}/os/${arch}/${repo}${DBEXT}"
+# Usage: list_pkgs dbfile
+# Prints "$pkgname-$(get_full_version "$pkgname")" for every package in $dbfile
+list_pkgs() {
+ local dbfile=$1
+ bsdtar tf "$dbfile" | cut -d/ -f1
+# Usage: list_pkgs | sep_ver
+# Separates the pkgname from the version (replaces the '-' with ' ') for the
+# list provided on stdin.
+sep_ver() {
+ sed -r 's/-([^-]*-[^-]*)$/ \1/'
+# Usage: list_removed_pkgs importsrc repo-arch
+# Prints "$pkgname-$(get_full_version "$pkgname")" for every removed package.
+list_removed_pkgs() {
+ local name=$1
+ local tag=$2
+ local old="${FTP_BASE}/$(db_file "$tag")"
+ local new="${IMPORTDIR}/clean/$name/dbs/$(db_file "$tag")"
+ # make a list of:
+ # pkgname oldver[ newver]
+ # It will include removed or updated packages (changed packages)
+ join -a1 \
+ <(list_pkgs "$old"|sep_ver|sort) \
+ <(list_pkgs "$new"|sep_ver|sort)
+ | grep -v ' .* ' # remove updated packages
+ | sed 's/ /-/' # re-combine the pkgname and version
+# Usage: list_added_pkgs importsrc repo-arch
+# slightly a misnomer; added and updated
+# Prints "$pkgname-$(get_full_version "$pkgname")" for every added or updated
+# package.
+list_added_pkgs() {
+ local name=$1
+ local tag=$2
+ local old="${FTP_BASE}/$(db_file "$tag")"
+ local new="${IMPORTDIR}/clean/$name/dbs/$(db_file "$tag")"
+ comm -13 <(list_pkgs "$old") <(list_pkgs "$new")
+main "$@"
diff --git a/db-import.conf b/db-import.conf
new file mode 100644
index 0000000..1ba50ba
--- /dev/null
+++ b/db-import.conf
@@ -0,0 +1,14 @@
+#/bin/bash # as a hint to text editors
+ {core,extra,testing}-{i686,x86_64}
+ community{,-testing}-{i686,x86_64}
+ multilib{,-testing}-x86_64
+_archpkgmirror=$(db-pick-mirror rsync
+# name pkgmirror absmirror repo-arch...
+imports=("archlinux ${_archpkgmirror} ${_archrepos[*]}")
diff --git a/db-pick-mirror b/db-pick-mirror
new file mode 100755
index 0000000..7cbc032
--- /dev/null
+++ b/db-pick-mirror
@@ -0,0 +1,24 @@
+#!/usr/bin/env ruby
+require 'json'
+require 'rest_client'
+protocol = ARGV[0]
+jsonurl = ARGV[1]
+data = JSON::parse(RestClient.get(jsonurl))
+if data["version"] != 3
+ print "Data format version != 3"
+ exit 1
+urls = data["urls"]
+rsync_urls ={|a| a["protocol"]==protocol}
+# By score ( (delay+speed)/completion )
+#best = rsync_urls.sort{|a,b| (a["score"] || Float::INFINITY) <=> (b["score"] || Float::INFINITY) }.first
+# By delay/completion ; hopefully this gives us a tier 1 mirror
+best = rsync_urls.sort{|a,b| a["delay"]/a["completion_pct"] <=> b["delay"]/b["completion_pct"] }.first
+puts best["url"]