From e1b7cc252a63de40135cf6ed4ce1c96d9abca303 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 3 Nov 2012 00:32:01 -0400 Subject: improve performance of pbs-absrepo-convert's collect-data --- pbs-absrepo-convert | 34 ++++++++++++++++++---------------- pbs-absrepo-convert--filterlist | 7 ++----- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/pbs-absrepo-convert b/pbs-absrepo-convert index 7b0bdaf..29f749f 100755 --- a/pbs-absrepo-convert +++ b/pbs-absrepo-convert @@ -15,23 +15,17 @@ abort() { cleanup } -_collect-data-gc() { - local base=$1 - local files=`echo $base.new*` - cat -- $files 2>>/dev/null | sort -u > $base.tmp - rm -f -- $files - mv $base.tmp $base.new -} - collect-data-gc() {( cd "$TMPDIR" + local files while [[ ! -f collect-data-filter-branch.done ]]; do - _collect-data-gc packages - _collect-data-gc repo-arch + files=`echo find.new*` + cat -- $files 2>>/dev/null | sort -u > find.tmp + rm -f -- $files + mv find.tmp find.new sleep 10 done - mv packages.new packages - mv repo-arch.new repo-arch + mv find.new find rm -f collect-data-filter-branch.done )} @@ -42,25 +36,33 @@ collect-data-gc() {( # - git branch "master" exists and is untouched # - TMPDIR is set and exists # Effects: +# - creates file "${TMPDIR}/find" # - creates file "${TMPDIR}/packages" -# - creates file "${TMPDIR}/repo-arch" # - creates file "${TMPDIR}/architectures" ## collect-data() { msg "$(gettext "Collecting package data...")" [[ $# = 0 ]] || { usage; return 1; } + + # main data collection ################################################# collect-data-gc & git filter-branch --tree-filter pbs-absrepo-convert--filterlist master # notify collect-data-gc that filter-branch is done touch "${TMPDIR}/collect-data-filter-branch.done" # wait for collect-data-gc to finish - while [[ ! -f "${TMPDIR}/packages" ]]; do + while [[ ! -f "${TMPDIR}/find" ]]; do sleep 10; done - # extract some data from those files + + # extract some things ################################################## + # packages + { + < "${TMPDIR}/find" sed -r 's|^\./([^/]*)/.*|\1|' + } | sort -u > "${TMPDIR}/packages" + # architectures { echo master - < "${TMPDIR}/repo-arch" sed -e 's/.*/-/' -e '/^any$/d' + < "${TMPDIR}/find" sed -e 's/.*/-/' -e '/^any$/d' } | sort -u > "${TMPDIR}/architectures" } diff --git a/pbs-absrepo-convert--filterlist b/pbs-absrepo-convert--filterlist index 645c351..df24066 100755 --- a/pbs-absrepo-convert--filterlist +++ b/pbs-absrepo-convert--filterlist @@ -1,7 +1,4 @@ #!/bin/bash -printf '%s\n' * > "${TMPDIR}/packages.tmp.${GIT_COMMIT}" -find */repos/* -type d -printf '%f\n' >> "${TMPDIR}/repo-arch.tmp.${GIT_COMMIT}" - -mv "${TMPDIR}/packages.tmp.${GIT_COMMIT}" "${TMPDIR}/packages.new.${GIT_COMMIT}" -mv "${TMPDIR}/repo-arch.tmp.${GIT_COMMIT}" "${TMPDIR}/repo-arch.new.${GIT_COMMIT}" +find . -mindepth 3 -maxdepth 3| fgrep /repos/ > "${TMPDIR}/find.tmp.${GIT_COMMIT}" +mv "${TMPDIR}/find.tmp.${GIT_COMMIT}" "${TMPDIR}/find.new.${GIT_COMMIT}" -- cgit v1.1-4-g5e80