From d2ef0c1c9925ec7f4d7494dd20a9c73451587592 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 3 Nov 2012 00:45:44 -0400 Subject: improve performance of pbs-absrepo-convert's collect-data --- pbs-absrepo-convert | 33 ++++++++++----------------------- pbs-absrepo-convert--filterlist | 4 ---- 2 files changed, 10 insertions(+), 27 deletions(-) delete mode 100755 pbs-absrepo-convert--filterlist diff --git a/pbs-absrepo-convert b/pbs-absrepo-convert index 29f749f..901b924 100755 --- a/pbs-absrepo-convert +++ b/pbs-absrepo-convert @@ -15,20 +15,6 @@ abort() { cleanup } -collect-data-gc() {( - cd "$TMPDIR" - local files - while [[ ! -f collect-data-filter-branch.done ]]; do - files=`echo find.new*` - cat -- $files 2>>/dev/null | sort -u > find.tmp - rm -f -- $files - mv find.tmp find.new - sleep 10 - done - mv find.new find - rm -f collect-data-filter-branch.done -)} - ## # Usage: collect-data # Assumptions: @@ -36,6 +22,7 @@ collect-data-gc() {( # - git branch "master" exists and is untouched # - TMPDIR is set and exists # Effects: +# - creates file "${TMPDIR}/commits" # - creates file "${TMPDIR}/find" # - creates file "${TMPDIR}/packages" # - creates file "${TMPDIR}/architectures" @@ -44,15 +31,15 @@ collect-data() { msg "$(gettext "Collecting package data...")" [[ $# = 0 ]] || { usage; return 1; } - # main data collection ################################################# - collect-data-gc & - git filter-branch --tree-filter pbs-absrepo-convert--filterlist master - # notify collect-data-gc that filter-branch is done - touch "${TMPDIR}/collect-data-filter-branch.done" - # wait for collect-data-gc to finish - while [[ ! -f "${TMPDIR}/find" ]]; do - sleep 10; - done + git log --pretty=format:'%H' master > "${TMPDIR}/commits" + + # actual data collection ############################################### + local count="$(wc -l < "${TMPDIR}/commits")" + cat -n "${TMPDIR}/commits" | while read n commit; do + printf '\rscanning commit %s (%d/%d)' "$commit" "$n" "$count" >> /dev/tty + git ls-tree -rd --name-only "$commit" + done | fgrep /repos/ | sort -u > "${TMPDIR}/find" + echo # newline # extract some things ################################################## # packages diff --git a/pbs-absrepo-convert--filterlist b/pbs-absrepo-convert--filterlist deleted file mode 100755 index df24066..0000000 --- a/pbs-absrepo-convert--filterlist +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -find . -mindepth 3 -maxdepth 3| fgrep /repos/ > "${TMPDIR}/find.tmp.${GIT_COMMIT}" -mv "${TMPDIR}/find.tmp.${GIT_COMMIT}" "${TMPDIR}/find.new.${GIT_COMMIT}" -- cgit v1.1-4-g5e80