diff options
Diffstat (limited to 'go/src/cow-dedupe/dedupe.go')
-rw-r--r-- | go/src/cow-dedupe/dedupe.go | 119 |
1 files changed, 39 insertions, 80 deletions
diff --git a/go/src/cow-dedupe/dedupe.go b/go/src/cow-dedupe/dedupe.go index f28d2f4..9fee970 100644 --- a/go/src/cow-dedupe/dedupe.go +++ b/go/src/cow-dedupe/dedupe.go @@ -10,6 +10,8 @@ import ( "runtime" "strconv" "strings" + + "lib/statusline" ) //#include <unistd.h> @@ -23,100 +25,62 @@ func errhandle(err error) { } } -func findLikelyDups(paths []string) map[string][]string { - ret := map[string][]string{} +func getFiemaps(paths []string) map[string][]string { var err error for i := range paths { paths[i], err = filepath.Abs(paths[i]) errhandle(err) } - cmd := exec.Command("find", append(paths, "-type", "f", "-printf", "%s %p\\0")...) + + ret := map[string][]string{} + + sl := statusline.NewStatusLine(os.Stderr) + cnt := 0 + sl.Put("Mapping extents...") + + cmd := exec.Command("find", append(paths, "-type", "f", "-exec", "./cow-extent-map", "-m", "--", "{}", "+")...) stdout, err := cmd.StdoutPipe() errhandle(err) cmd.Stderr = os.Stderr + errhandle(cmd.Start()) rd := bufio.NewReader(stdout) for { - line, err := rd.ReadString('\x00') - if line == "" && err == io.EOF { + filename, err := rd.ReadString('\x00') + if filename == "" && err == io.EOF { break } - errhandle(err) - parts := strings.SplitN(strings.TrimSuffix(line, "\x00"), " ", 2) - if len(parts) != 2 { - panic("wut") - } - size := parts[0] - filename := parts[1] - basename := filepath.Base(filename) - key := size + " " + basename - ret[key] = append(ret[key], filename) - } - errhandle(cmd.Wait()) - for key := range ret { - if len(ret[key]) < 2 { - delete(ret, key) + filename = strings.TrimSuffix(filename, "\x00") + if !strings.HasPrefix(filename, "/") { + panic("ugly filename") } - } - return ret -} - -func getFiemaps(paths []string) map[string][]string { - ret := map[string][]string{} - fmt.Fprintf(os.Stderr, "Getting fiemaps for %d files...\n", len(paths)) - - cnt := 0 - for len(paths) > 0 { - _paths := paths - arg_len := 0 - for i := range _paths { - arg_len += len(_paths[i]) + 1 - if arg_len > arg_max/2 { - _paths = _paths[:i-1] - break - } + errhandle(err) + fiemap, err := rd.ReadString('\x00') + fiemap = strings.TrimSuffix(fiemap, "\x00") + if !(strings.HasPrefix(fiemap, "logical=") || fiemap == "") { + panic("ugly fiemap") } - paths = paths[len(_paths):] - - cmd := exec.Command("./cow-extent-map", append([]string{"-m", "--"}, _paths...)...) - stdout, err := cmd.StdoutPipe() errhandle(err) - cmd.Stderr = os.Stderr - errhandle(cmd.Start()) - rd := bufio.NewReader(stdout) - for { - filename, err := rd.ReadString('\x00') - if filename == "" && err == io.EOF { - break - } - filename = strings.TrimSuffix(filename, "\x00") - if !strings.HasPrefix(filename, "/") { - panic("ugly filename") - } - errhandle(err) - fiemap, err := rd.ReadString('\x00') - fiemap = strings.TrimSuffix(fiemap, "\x00") - if !(strings.HasPrefix(fiemap, "logical=") || fiemap == "") { - panic("ugly fiemap") - } - errhandle(err) - ret[fiemap] = append(ret[fiemap], filename) - cnt++ - fmt.Fprintf(os.Stderr, "\r%d ", cnt) - } - errhandle(cmd.Wait()) + ret[fiemap] = append(ret[fiemap], filename) + cnt++ + sl.Put(fmt.Sprintf("Mapping extents... %d", cnt)) } + errhandle(cmd.Wait()) - fmt.Fprintf(os.Stderr, "\r...done \n") + sl.Put(fmt.Sprintf("Mapping extents... done; mapped %d files", cnt)) + sl.End() + io.WriteString(os.Stderr, "\n") return ret } func getChecksums(paths []string) map[string][]string { ret := map[string][]string{} - fmt.Fprintf(os.Stderr, "Generating checksums for %d files...\n", len(paths)) + sl := statusline.NewStatusLine(os.Stderr) cnt := 0 + sl.Put(fmt.Sprintf("Generating checksums for files... %d/%d\n", cnt, len(paths))) + for len(paths) > 0 { _paths := paths arg_len := 0 @@ -150,28 +114,23 @@ func getChecksums(paths []string) map[string][]string { ret[checksum] = append(ret[checksum], filename) cnt++ - fmt.Fprintf(os.Stderr, "\r%d ", cnt) + sl.Put(fmt.Sprintf("Generating checksums for files... %d/%d\n", cnt, len(paths))) } errhandle(cmd.Wait()) } - fmt.Fprintf(os.Stderr, "\r...done \n") + sl.Put(fmt.Sprintf("Generating checksums for files... done; summed %d files\n", cnt)) + sl.End() + io.WriteString(os.Stderr, "\n") return ret } func main() { - // we have no parallelism, don't let syscalls fan-out weird on - // many-core systems + // we have low parallelism, don't let syscalls fan-out weird + // on many-core systems runtime.GOMAXPROCS(1) - likely := findLikelyDups(os.Args[1:]) - - var flatLikely []string - for _, filenames := range likely { - flatLikely = append(flatLikely, filenames...) - } - - fiemap2filenames := getFiemaps(flatLikely) + fiemap2filenames := getFiemaps(os.Args[1:]) filename2fiemap := map[string]string{} for fiemap, filenames := range fiemap2filenames { |