From 8bb0c5aa9d00dfe16101329d62691d3f60cf134b Mon Sep 17 00:00:00 2001 From: Nicolas Reynolds Date: Fri, 28 Oct 2011 12:18:29 -0300 Subject: Sync package databases first * Get all available packages * Remove unfree from the sync list * Sync everything whitelisted --- db-sync | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 db-sync (limited to 'db-sync') diff --git a/db-sync b/db-sync new file mode 100644 index 0000000..0baf497 --- /dev/null +++ b/db-sync @@ -0,0 +1,108 @@ +#!/bin/bash +# Syncs Arch repos based on info contained in repo.db files +# License: GPLv3 + +# Principles +# * Get repo.db from an Arch-like repo +# * Generate a list of available packages +# * Create sync whitelist (based on package blacklist) +# * Get packages +# * Check package signatures +# * Check database signatures +# * Sync repo => repo + + +# eval this +BASEURL="ftp://ftp.archlinux.org/\$repo/os/\$arch/\$file" + +# Generates an URL from BASE_URL +# _Params_ +# * repo +# * arch +# * file +eval_url() { + repo="$1" + arch="$2" + file="$3" + + eval "${BASE_URL}" +} + +# Returns contents of a repo +get_repos() { + rsync -av --include="*.db*" --exclude="*" rsync://${mirror}/${mirror_path}/ cache/ +} + +get_repo_content() { +# Return all contents + bsdtar tf cache/$1/os/$2/$1.db.tar.* | \ + cut -d "/" -f 1 | \ + sort -u +} + +# Get the database compression as an extension +get_repo_ext() { + file "$1" | tr A-Z a-z | sed -e "s/^[^:]\+: *\(.z\).*$/.tar.\1/" -e "s/bz/&2" +} + +# Prints blacklisted packages +get_blacklist() { + cut -d ':' -f 1 "${BLACKLIST_FILE}" +} + +# repo +# arch +get_repo_file() { + [ ! -f "cache/${1}/os/${2}/${1}.db.tar.*" ] && return 1 + + echo cache/${1}/os/${2}/${1}.db.tar.* +} + +# Process the databases and get the libre packages +init() { +# Fail on every error + set -E + + source $(dirname $0)/config + source $(dirname $0)/local_config + source $(dirname $0)/libremessages + +# Get the blacklisted packages + blacklist=($(get_blacklist)) + +# Sync the repos databases + get_repos + +# Traverse all repo-arch pairs + for _arch in ${ARCHARCHES[@]}; do + for _repo in ${ARCHREPOS[@]}; do + msg "Processing ${_repo}-${_arch}" + + repo_file=$(get_repo_file ${_repo} ${_arch}) + +# Remove blacklisted packages and count them + msg2 "Removing blacklisted packages: $( + LC_ALL=C repo-remove ${repo_file} ${blacklist[@]} 2>&1 | \ + grep "\-> Removing" 2>/dev/null| wc -l)" + +# Get db contents + db=($(get_repo ${_repo} ${_arch})) + + msg2 "Process clean db for syncing..." + +# Create a whitelist + echo ${db[@]} | tr ' ' "\n" | sed "s|$|*|g" > /tmp/${_repo}-${_arch}.whitelist + +# Sync excluding everything but blacklist + rsync -av --include-from=/tmp/${_repo}-${_arch}.whitelist --exclude="*" + + +# Cleanup + unset db + done + done + +# Cleanup + unset blacklist _arch _repo repo_file +} + -- cgit v1.2.3-2-g168b From 666f2b153c4bd06259124dcc6641db6b21495787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Reynolds?= Date: Sun, 30 Oct 2011 20:34:34 -0300 Subject: Seems to work, entering test status. --- db-sync | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 100 insertions(+), 15 deletions(-) mode change 100644 => 100755 db-sync (limited to 'db-sync') diff --git a/db-sync b/db-sync old mode 100644 new mode 100755 index 0baf497..4c5dd7a --- a/db-sync +++ b/db-sync @@ -11,8 +11,13 @@ # * Check database signatures # * Sync repo => repo +# TODO +# * verbose mode +# * make a tarball of files used for forensics +# * get files db # eval this +# *not needed* BASEURL="ftp://ftp.archlinux.org/\$repo/os/\$arch/\$file" # Generates an URL from BASE_URL @@ -30,12 +35,18 @@ eval_url() { # Returns contents of a repo get_repos() { - rsync -av --include="*.db*" --exclude="*" rsync://${mirror}/${mirror_path}/ cache/ +# Exclude everything but db files + rsync -avm --include="*/" \ + --include="*.db" \ + --include="*${DBEXT}" \ + --exclude="*" \ + --delete-after \ + rsync://${mirror}/${mirrorpath}/ cache/ } get_repo_content() { # Return all contents - bsdtar tf cache/$1/os/$2/$1.db.tar.* | \ + bsdtar tf ${1} | \ cut -d "/" -f 1 | \ sort -u } @@ -53,22 +64,20 @@ get_blacklist() { # repo # arch get_repo_file() { - [ ! -f "cache/${1}/os/${2}/${1}.db.tar.*" ] && return 1 + [ ! -r "cache/${1}/os/${2}/${1}${DBEXT}" ] && return 1 - echo cache/${1}/os/${2}/${1}.db.tar.* + echo "cache/${1}/os/${2}/${1}${DBEXT}" } # Process the databases and get the libre packages init() { -# Fail on every error - set -E - - source $(dirname $0)/config - source $(dirname $0)/local_config - source $(dirname $0)/libremessages # Get the blacklisted packages blacklist=($(get_blacklist)) +# Store all the whitelist files + whitelists=() + + msg "${#blacklist[@]} packages in blacklist" # Sync the repos databases get_repos @@ -81,28 +90,104 @@ init() { repo_file=$(get_repo_file ${_repo} ${_arch}) # Remove blacklisted packages and count them +# TODO capture all removed packages for printing on debug mode msg2 "Removing blacklisted packages: $( LC_ALL=C repo-remove ${repo_file} ${blacklist[@]} 2>&1 | \ grep "\-> Removing" 2>/dev/null| wc -l)" # Get db contents - db=($(get_repo ${_repo} ${_arch})) + db=($(get_repo_content ${repo_file})) msg2 "Process clean db for syncing..." -# Create a whitelist +# Create a whitelist, add * wildcard to end +# TODO due to lack of -arch suffix, the pool sync retrieves every arch even if +# we aren't syncing them echo ${db[@]} | tr ' ' "\n" | sed "s|$|*|g" > /tmp/${_repo}-${_arch}.whitelist -# Sync excluding everything but blacklist - rsync -av --include-from=/tmp/${_repo}-${_arch}.whitelist --exclude="*" + msg2 "$(wc -l /tmp/${_repo}-${_arch}.whitelist | cut -d' ' -f1) packages in whitelist" + +# Sync excluding everything but whitelist +# We delete here for cleanup + rsync -vrtlH \ + --delete-after \ + --safe-links \ + --delay-updates \ + --max-delete=1000 \ + --include-from=/tmp/${_repo}-${_arch}.whitelist \ + --exclude="*" \ + rsync://${mirror}/${mirrorpath}/${_repo}/os/${_arch}/ \ + ${FTP_BASE}/${_repo}/os/${_arch}/ +# Add a new whitelist + whitelists+=(/tmp/${_repo}-${_arch}.whitelist) # Cleanup unset db done done + msg "Putting databases back in place" + rsync -vrtlH \ + --delay-updates \ + --safe-links \ + cache/ \ + ${FTP_BASE}/ + + msg "Syncing package pool" +# Concatenate all whitelists + cat ${whitelists[@]} | sort -u > /tmp/any.whitelist + + msg2 "Retrieving $(wc -l /tmp/any.whitelist | cut -d' ' -f1) packages from pool" + +# Sync +# *Don't delete-after*, this is the job of cleanup scripts. It will remove our +# packages too + rsync -vrtlH \ + --delay-updates \ + --safe-links \ + --include-from=/tmp/any.whitelist \ + --exclude="*" \ + rsync://${mirror}/${mirrorpath}/${PKGPOOL}/ \ + ${FTP_BASE}/${PKGPOOL}/ + +# Sync sources + msg "Syncing source pool" + #sed "s|\.pkg\.tar\.|.src.tar.|" /tmp/any.whitelist > /tmp/any-src.whitelist + + #msg2 "Retrieving $(wc -l /tmp/any-src.whitelist | cut -d' ' -f1) sources from pool" +# Sync +# *Don't delete-after*, this is the job of cleanup scripts. It will remove our +# packages too + rsync -vrtlH \ + --delay-updates \ + --safe-links \ + --include-from=/tmp/any.whitelist \ + --exclude="*" \ + rsync://${mirror}/${mirrorpath}/${SRCPOOL}/ \ + ${FTP_BASE}/${SRCPOOL}/ + + # Cleanup - unset blacklist _arch _repo repo_file + unset blacklist whitelists _arch _repo repo_file +} + +trap_exit() { + echo + error "$@" + exit 1 } + +source $(dirname $0)/config +source $(dirname $0)/local_config +source $(dirname $0)/libremessages + +# From makepkg +set -E + +trap 'trap_exit "$(gettext "TERM signal caught. Exiting...")"' TERM HUP QUIT +trap 'trap_exit "$(gettext "Aborted by user! Exiting...")"' INT +trap 'trap_exit "$(gettext "An unknown error has occurred. Exiting...")"' ERR + +init -- cgit v1.2.3-2-g168b From c60d06e050e929d454b447a3cac3263a83d0a445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Reynolds?= Date: Thu, 3 Nov 2011 15:42:12 -0300 Subject: Removed unneeded code --- db-sync | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'db-sync') diff --git a/db-sync b/db-sync index 4c5dd7a..0bb79b7 100755 --- a/db-sync +++ b/db-sync @@ -16,23 +16,6 @@ # * make a tarball of files used for forensics # * get files db -# eval this -# *not needed* -BASEURL="ftp://ftp.archlinux.org/\$repo/os/\$arch/\$file" - -# Generates an URL from BASE_URL -# _Params_ -# * repo -# * arch -# * file -eval_url() { - repo="$1" - arch="$2" - file="$3" - - eval "${BASE_URL}" -} - # Returns contents of a repo get_repos() { # Exclude everything but db files @@ -51,11 +34,6 @@ get_repo_content() { sort -u } -# Get the database compression as an extension -get_repo_ext() { - file "$1" | tr A-Z a-z | sed -e "s/^[^:]\+: *\(.z\).*$/.tar.\1/" -e "s/bz/&2" -} - # Prints blacklisted packages get_blacklist() { cut -d ':' -f 1 "${BLACKLIST_FILE}" -- cgit v1.2.3-2-g168b