summaryrefslogtreecommitdiff
path: root/go/src/cow-dedupe
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@beefcake.parabola.nu>2018-06-03 13:20:19 -0400
committerLuke Shumaker <lukeshu@beefcake.parabola.nu>2018-06-03 14:08:27 -0400
commit4af19e8a4c5c1363872ca4c37f181a19e67da4c0 (patch)
tree67154418dc133b1bd2ffe523519bd7333d166445 /go/src/cow-dedupe
parent10aee1432c2c925bf65a194e87671291d27d786e (diff)
cow-dedupe: Shorter status line; better statistics
Diffstat (limited to 'go/src/cow-dedupe')
-rw-r--r--go/src/cow-dedupe/dedupe.go37
1 files changed, 17 insertions, 20 deletions
diff --git a/go/src/cow-dedupe/dedupe.go b/go/src/cow-dedupe/dedupe.go
index 5717e30..3fc2254 100644
--- a/go/src/cow-dedupe/dedupe.go
+++ b/go/src/cow-dedupe/dedupe.go
@@ -176,19 +176,17 @@ func dedupe(srcFile string, dupFiles []string) error {
return err
}
-// [ 0s ] size-set[n/d]->c :: sha256-set[t+n/d]->c :: (summed=n deduped=n) :: verb[m/n]
+// [ 0s ] set[n/(d+e)]->c :: (summed=n deduped=n) :: verb[m/n]
type fancyStatus struct {
- sizeN, sizeD, sizeC int
- sumT, sumN, sumD, sumC int
- summed, errs int
- deduped struct{ srcs, dstMaps, dstFiles int }
- verb string
+ set struct{ n, d, p, m, size int }
+ summed, errs int
+ deduped struct{ srcs, dstMaps, dstFiles int }
+ verb string
}
func (s fancyStatus) String() string {
- return fmt.Sprintf("size-set[%d/%d]->%d :: sha256-set[%d+%d/%d]->%d :: (summed=%d deduped={%dmaps:%dfiles->%d} errs=%d) :: %s",
- s.sizeN, s.sizeD, s.sizeC,
- s.sumT, s.sumN, s.sumD, s.sumC,
+ return fmt.Sprintf("set[%d/(%d+%d-%d)]->%d :: summed=%d deduped={%dmaps:%dfiles->%d} errs=%d :: %s",
+ s.set.n, s.set.d, s.set.p, s.set.m, s.set.size,
s.summed,
s.deduped.dstMaps+s.deduped.srcs, s.deduped.dstFiles+s.deduped.srcs, s.deduped.srcs,
s.errs,
@@ -236,9 +234,9 @@ func main() {
sl = myStatusLine()
var status fancyStatus
- status.sizeD = len(size2filenames)
+ status.set.d = len(size2filenames)
for _, filenames := range size2filenames {
- status.sizeC = len(filenames)
+ status.set.size = len(filenames)
// Now do strict hashing, instead of the incredibly
// sloppy (but fast) size-bucketing.
status.verb = "sha256sum[%v/%v]"
@@ -248,9 +246,13 @@ func main() {
sl.Put(status.String())
pruneSingles(checksum2filenames)
// And loop over the smaller, precise buckets
- status.sumD = len(checksum2filenames)
+ if len(checksum2filenames) == 0 {
+ status.set.m++
+ } else {
+ status.set.p += len(checksum2filenames) - 1
+ }
for _, filenames := range checksum2filenames {
- status.sumC = len(filenames)
+ status.set.size = len(filenames)
status.verb = "prep"
sl.Put(status.String())
var fiemaps []string
@@ -285,7 +287,7 @@ func main() {
if len(name) > 21 {
name = name[:20] + "…"
}
- status.verb = fmt.Sprintf("dedupe %q (and %d more)", name, len(dupFiles))
+ status.verb = fmt.Sprintf("dedupe %q<-[%d]file{…}", name, len(dupFiles))
sl.Put(status.String())
err := dedupe(srcFile, dupFiles)
if err != nil {
@@ -295,13 +297,8 @@ func main() {
status.deduped.dstMaps += len(fiemaps) - 1
status.deduped.dstFiles += len(dupFiles)
}
- status.sumN++
+ status.set.n++
}
- status.sumT += len(checksum2filenames)
- status.sumN = 0
- status.sumD = 0
- status.sumC = 0
- status.sizeN++
}
status.verb = "done"
sl.Put(status.String())