summaryrefslogtreecommitdiff
path: root/go/src/cow-dedupe/dedupe.go
diff options
context:
space:
mode:
Diffstat (limited to 'go/src/cow-dedupe/dedupe.go')
-rw-r--r--go/src/cow-dedupe/dedupe.go119
1 files changed, 39 insertions, 80 deletions
diff --git a/go/src/cow-dedupe/dedupe.go b/go/src/cow-dedupe/dedupe.go
index f28d2f4..9fee970 100644
--- a/go/src/cow-dedupe/dedupe.go
+++ b/go/src/cow-dedupe/dedupe.go
@@ -10,6 +10,8 @@ import (
"runtime"
"strconv"
"strings"
+
+ "lib/statusline"
)
//#include <unistd.h>
@@ -23,100 +25,62 @@ func errhandle(err error) {
}
}
-func findLikelyDups(paths []string) map[string][]string {
- ret := map[string][]string{}
+func getFiemaps(paths []string) map[string][]string {
var err error
for i := range paths {
paths[i], err = filepath.Abs(paths[i])
errhandle(err)
}
- cmd := exec.Command("find", append(paths, "-type", "f", "-printf", "%s %p\\0")...)
+
+ ret := map[string][]string{}
+
+ sl := statusline.NewStatusLine(os.Stderr)
+ cnt := 0
+ sl.Put("Mapping extents...")
+
+ cmd := exec.Command("find", append(paths, "-type", "f", "-exec", "./cow-extent-map", "-m", "--", "{}", "+")...)
stdout, err := cmd.StdoutPipe()
errhandle(err)
cmd.Stderr = os.Stderr
+
errhandle(cmd.Start())
rd := bufio.NewReader(stdout)
for {
- line, err := rd.ReadString('\x00')
- if line == "" && err == io.EOF {
+ filename, err := rd.ReadString('\x00')
+ if filename == "" && err == io.EOF {
break
}
- errhandle(err)
- parts := strings.SplitN(strings.TrimSuffix(line, "\x00"), " ", 2)
- if len(parts) != 2 {
- panic("wut")
- }
- size := parts[0]
- filename := parts[1]
- basename := filepath.Base(filename)
- key := size + " " + basename
- ret[key] = append(ret[key], filename)
- }
- errhandle(cmd.Wait())
- for key := range ret {
- if len(ret[key]) < 2 {
- delete(ret, key)
+ filename = strings.TrimSuffix(filename, "\x00")
+ if !strings.HasPrefix(filename, "/") {
+ panic("ugly filename")
}
- }
- return ret
-}
-
-func getFiemaps(paths []string) map[string][]string {
- ret := map[string][]string{}
- fmt.Fprintf(os.Stderr, "Getting fiemaps for %d files...\n", len(paths))
-
- cnt := 0
- for len(paths) > 0 {
- _paths := paths
- arg_len := 0
- for i := range _paths {
- arg_len += len(_paths[i]) + 1
- if arg_len > arg_max/2 {
- _paths = _paths[:i-1]
- break
- }
+ errhandle(err)
+ fiemap, err := rd.ReadString('\x00')
+ fiemap = strings.TrimSuffix(fiemap, "\x00")
+ if !(strings.HasPrefix(fiemap, "logical=") || fiemap == "") {
+ panic("ugly fiemap")
}
- paths = paths[len(_paths):]
-
- cmd := exec.Command("./cow-extent-map", append([]string{"-m", "--"}, _paths...)...)
- stdout, err := cmd.StdoutPipe()
errhandle(err)
- cmd.Stderr = os.Stderr
- errhandle(cmd.Start())
- rd := bufio.NewReader(stdout)
- for {
- filename, err := rd.ReadString('\x00')
- if filename == "" && err == io.EOF {
- break
- }
- filename = strings.TrimSuffix(filename, "\x00")
- if !strings.HasPrefix(filename, "/") {
- panic("ugly filename")
- }
- errhandle(err)
- fiemap, err := rd.ReadString('\x00')
- fiemap = strings.TrimSuffix(fiemap, "\x00")
- if !(strings.HasPrefix(fiemap, "logical=") || fiemap == "") {
- panic("ugly fiemap")
- }
- errhandle(err)
- ret[fiemap] = append(ret[fiemap], filename)
- cnt++
- fmt.Fprintf(os.Stderr, "\r%d ", cnt)
- }
- errhandle(cmd.Wait())
+ ret[fiemap] = append(ret[fiemap], filename)
+ cnt++
+ sl.Put(fmt.Sprintf("Mapping extents... %d", cnt))
}
+ errhandle(cmd.Wait())
- fmt.Fprintf(os.Stderr, "\r...done \n")
+ sl.Put(fmt.Sprintf("Mapping extents... done; mapped %d files", cnt))
+ sl.End()
+ io.WriteString(os.Stderr, "\n")
return ret
}
func getChecksums(paths []string) map[string][]string {
ret := map[string][]string{}
- fmt.Fprintf(os.Stderr, "Generating checksums for %d files...\n", len(paths))
+ sl := statusline.NewStatusLine(os.Stderr)
cnt := 0
+ sl.Put(fmt.Sprintf("Generating checksums for files... %d/%d\n", cnt, len(paths)))
+
for len(paths) > 0 {
_paths := paths
arg_len := 0
@@ -150,28 +114,23 @@ func getChecksums(paths []string) map[string][]string {
ret[checksum] = append(ret[checksum], filename)
cnt++
- fmt.Fprintf(os.Stderr, "\r%d ", cnt)
+ sl.Put(fmt.Sprintf("Generating checksums for files... %d/%d\n", cnt, len(paths)))
}
errhandle(cmd.Wait())
}
- fmt.Fprintf(os.Stderr, "\r...done \n")
+ sl.Put(fmt.Sprintf("Generating checksums for files... done; summed %d files\n", cnt))
+ sl.End()
+ io.WriteString(os.Stderr, "\n")
return ret
}
func main() {
- // we have no parallelism, don't let syscalls fan-out weird on
- // many-core systems
+ // we have low parallelism, don't let syscalls fan-out weird
+ // on many-core systems
runtime.GOMAXPROCS(1)
- likely := findLikelyDups(os.Args[1:])
-
- var flatLikely []string
- for _, filenames := range likely {
- flatLikely = append(flatLikely, filenames...)
- }
-
- fiemap2filenames := getFiemaps(flatLikely)
+ fiemap2filenames := getFiemaps(os.Args[1:])
filename2fiemap := map[string]string{}
for fiemap, filenames := range fiemap2filenames {