From 436e1681c9fcda246c6d84526fc79c87adc7b28d Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 13 Jul 2022 21:31:23 -0600 Subject: Move lib/kmp to lib/diskio --- lib/diskio/kmp.go | 85 ++++++++++++++++++++++ lib/diskio/kmp_test.go | 62 ++++++++++++++++ ...6820e3f98383aebadd2af3a22aa248546a228b08451c30d | 3 + ...14e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 | 3 + ...293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 | 3 + ...3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 | 3 + ...7d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d | 3 + lib/kmp/kmp.go | 83 --------------------- lib/kmp/kmp_test.go | 62 ---------------- ...6820e3f98383aebadd2af3a22aa248546a228b08451c30d | 3 - ...14e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 | 3 - ...293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 | 3 - ...3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 | 3 - ...7d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d | 3 - 14 files changed, 162 insertions(+), 160 deletions(-) create mode 100644 lib/diskio/kmp.go create mode 100644 lib/diskio/kmp_test.go create mode 100644 lib/diskio/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d create mode 100644 lib/diskio/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 create mode 100644 lib/diskio/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 create mode 100644 lib/diskio/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 create mode 100644 lib/diskio/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d delete mode 100644 lib/kmp/kmp.go delete mode 100644 lib/kmp/kmp_test.go delete mode 100644 lib/kmp/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d delete mode 100644 lib/kmp/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 delete mode 100644 lib/kmp/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 delete mode 100644 lib/kmp/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 delete mode 100644 lib/kmp/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d (limited to 'lib') diff --git a/lib/diskio/kmp.go b/lib/diskio/kmp.go new file mode 100644 index 0000000..4c0f531 --- /dev/null +++ b/lib/diskio/kmp.go @@ -0,0 +1,85 @@ +// Copyright (C) 2022 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package diskio + +import ( + "errors" + "io" +) + +// buildKMPTable takes the string 'substr', and returns a table such +// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and +// 'substr[:val] == substr[matchLen-val:matchLen]'. +func buildKMPTable(substr []byte) []int { + table := make([]int, len(substr)) + for j := range table { + if j == 0 { + // First entry must always be 0 (in order to + // satisfy 'val < matchLen'). + continue + } + val := table[j-1] + // not a match; go back + for val > 0 && substr[j] != substr[val] { + val = table[val-1] + } + // is a match; go forward + if substr[val] == substr[j] { + val++ + } + table[j] = val + } + return table +} + +// FindAll returns the starting-position of all possibly-overlapping +// occurances of 'substr' in the 'r' stream. +// +// Will panic if len(substr)==0. +// +// Uses the Knuth-Morris-Pratt algorithm. +func FindAll(r io.ByteReader, substr []byte) ([]int64, error) { + if len(substr) == 0 { + panic(errors.New("diskio.FindAll: empty substring")) + } + table := buildKMPTable(substr) + + var matches []int64 + var curMatchBeg int64 + var curMatchLen int + + pos := int64(-1) // if 'r' were a slice; define 'pos' such that 'chr=r[pos]' + for { + // I/O + var chr byte + chr, err := r.ReadByte() + if err != nil { + if errors.Is(err, io.EOF) { + err = nil + } + return matches, err + } + pos++ + + // Consider 'chr' + for curMatchLen > 0 && chr != substr[curMatchLen] { // shorten the match + overlap := table[curMatchLen-1] + curMatchBeg += int64(curMatchLen - overlap) + curMatchLen = overlap + } + if chr == substr[curMatchLen] { // lengthen the match + if curMatchLen == 0 { + curMatchBeg = pos + } + curMatchLen++ + if curMatchLen == len(substr) { + matches = append(matches, curMatchBeg) + overlap := table[curMatchLen-1] + curMatchBeg += int64(curMatchLen - overlap) + curMatchLen = overlap + } + } + } +} diff --git a/lib/diskio/kmp_test.go b/lib/diskio/kmp_test.go new file mode 100644 index 0000000..6c6ef78 --- /dev/null +++ b/lib/diskio/kmp_test.go @@ -0,0 +1,62 @@ +// Copyright (C) 2022 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package diskio + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBuildKMPTable(t *testing.T) { + substr := []byte("ababaa") + table := buildKMPTable(substr) + assert.Equal(t, + []int{0, 0, 1, 2, 3, 1}, + table) + for j, val := range table { + matchLen := j + 1 + assert.Equalf(t, substr[:val], substr[matchLen-val:matchLen], + "for table[%d]=%d", j, val) + } +} + +func FuzzBuildKMPTable(f *testing.F) { + f.Add([]byte("ababaa")) + f.Fuzz(func(t *testing.T, substr []byte) { + table := buildKMPTable(substr) + assert.Equal(t, len(substr), len(table), "length") + for j, val := range table { + matchLen := j + 1 + assert.Equalf(t, substr[:val], substr[matchLen-val:matchLen], + "for table[%d]=%d", j, val) + } + }) +} + +func NaiveFindAll(str, substr []byte) []int64 { + var matches []int64 + for i := range str { + if bytes.HasPrefix(str[i:], substr) { + matches = append(matches, int64(i)) + } + } + return matches +} + +func FuzzFindAll(f *testing.F) { + f.Fuzz(func(t *testing.T, str, substr []byte) { + if len(substr) == 0 { + t.Skip() + } + t.Logf("str =%q", str) + t.Logf("substr=%q", substr) + exp := NaiveFindAll(str, substr) + act, err := FindAll(bytes.NewReader(str), substr) + assert.NoError(t, err) + assert.Equal(t, exp, act) + }) +} diff --git a/lib/diskio/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d b/lib/diskio/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d new file mode 100644 index 0000000..9d14adf --- /dev/null +++ b/lib/diskio/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("0") +[]byte("1") diff --git a/lib/diskio/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 b/lib/diskio/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 new file mode 100644 index 0000000..269f061 --- /dev/null +++ b/lib/diskio/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("0") +[]byte("0") diff --git a/lib/diskio/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 b/lib/diskio/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 new file mode 100644 index 0000000..b8f1562 --- /dev/null +++ b/lib/diskio/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("0") +[]byte("") diff --git a/lib/diskio/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 b/lib/diskio/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 new file mode 100644 index 0000000..be67506 --- /dev/null +++ b/lib/diskio/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("\xde\xdb!") +[]byte("\xde\xdb") diff --git a/lib/diskio/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d b/lib/diskio/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d new file mode 100644 index 0000000..c3bfa37 --- /dev/null +++ b/lib/diskio/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("\x10\x10\x15") +[]byte("\x10\x15") diff --git a/lib/kmp/kmp.go b/lib/kmp/kmp.go deleted file mode 100644 index 66b5516..0000000 --- a/lib/kmp/kmp.go +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package kmp - -import ( - "errors" - "io" -) - -// buildTable takes the string 'substr', and returns a table such -// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and -// 'substr[:val] == substr[matchLen-val:matchLen]'. -func buildTable(substr []byte) []int { - table := make([]int, len(substr)) - for j := range table { - if j == 0 { - // First entry must always be 0 (in order to - // satisfy 'val < matchLen'). - continue - } - val := table[j-1] - // not a match; go back - for val > 0 && substr[j] != substr[val] { - val = table[val-1] - } - // is a match; go forward - if substr[val] == substr[j] { - val++ - } - table[j] = val - } - return table -} - -// FindAll returns the starting-position of all possibly-overlapping -// occurances of 'substr' in the 'r' stream. -// -// Will panic if len(substr)==0. -func FindAll(r io.ByteReader, substr []byte) ([]int64, error) { - if len(substr) == 0 { - panic(errors.New("kmp.FindAll: empty substring")) - } - table := buildTable(substr) - - var matches []int64 - var curMatchBeg int64 - var curMatchLen int - - pos := int64(-1) // if 'r' were a slice; define 'pos' such that 'chr=r[pos]' - for { - // I/O - var chr byte - chr, err := r.ReadByte() - if err != nil { - if errors.Is(err, io.EOF) { - err = nil - } - return matches, err - } - pos++ - - // Consider 'chr' - for curMatchLen > 0 && chr != substr[curMatchLen] { // shorten the match - overlap := table[curMatchLen-1] - curMatchBeg += int64(curMatchLen - overlap) - curMatchLen = overlap - } - if chr == substr[curMatchLen] { // lengthen the match - if curMatchLen == 0 { - curMatchBeg = pos - } - curMatchLen++ - if curMatchLen == len(substr) { - matches = append(matches, curMatchBeg) - overlap := table[curMatchLen-1] - curMatchBeg += int64(curMatchLen - overlap) - curMatchLen = overlap - } - } - } -} diff --git a/lib/kmp/kmp_test.go b/lib/kmp/kmp_test.go deleted file mode 100644 index 6a11b50..0000000 --- a/lib/kmp/kmp_test.go +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package kmp - -import ( - "bytes" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestBuildTable(t *testing.T) { - substr := []byte("ababaa") - table := buildTable(substr) - assert.Equal(t, - []int{0, 0, 1, 2, 3, 1}, - table) - for j, val := range table { - matchLen := j + 1 - assert.Equalf(t, substr[:val], substr[matchLen-val:matchLen], - "for table[%d]=%d", j, val) - } -} - -func FuzzBuildTable(f *testing.F) { - f.Add([]byte("ababaa")) - f.Fuzz(func(t *testing.T, substr []byte) { - table := buildTable(substr) - assert.Equal(t, len(substr), len(table), "length") - for j, val := range table { - matchLen := j + 1 - assert.Equalf(t, substr[:val], substr[matchLen-val:matchLen], - "for table[%d]=%d", j, val) - } - }) -} - -func NaiveFindAll(str, substr []byte) []int64 { - var matches []int64 - for i := range str { - if bytes.HasPrefix(str[i:], substr) { - matches = append(matches, int64(i)) - } - } - return matches -} - -func FuzzFindAll(f *testing.F) { - f.Fuzz(func(t *testing.T, str, substr []byte) { - if len(substr) == 0 { - t.Skip() - } - t.Logf("str =%q", str) - t.Logf("substr=%q", substr) - exp := NaiveFindAll(str, substr) - act, err := FindAll(bytes.NewReader(str), substr) - assert.NoError(t, err) - assert.Equal(t, exp, act) - }) -} diff --git a/lib/kmp/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d b/lib/kmp/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d deleted file mode 100644 index 9d14adf..0000000 --- a/lib/kmp/testdata/fuzz/FuzzFindAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d +++ /dev/null @@ -1,3 +0,0 @@ -go test fuzz v1 -[]byte("0") -[]byte("1") diff --git a/lib/kmp/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 b/lib/kmp/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 deleted file mode 100644 index 269f061..0000000 --- a/lib/kmp/testdata/fuzz/FuzzFindAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 +++ /dev/null @@ -1,3 +0,0 @@ -go test fuzz v1 -[]byte("0") -[]byte("0") diff --git a/lib/kmp/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 b/lib/kmp/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 deleted file mode 100644 index b8f1562..0000000 --- a/lib/kmp/testdata/fuzz/FuzzFindAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 +++ /dev/null @@ -1,3 +0,0 @@ -go test fuzz v1 -[]byte("0") -[]byte("") diff --git a/lib/kmp/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 b/lib/kmp/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 deleted file mode 100644 index be67506..0000000 --- a/lib/kmp/testdata/fuzz/FuzzFindAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 +++ /dev/null @@ -1,3 +0,0 @@ -go test fuzz v1 -[]byte("\xde\xdb!") -[]byte("\xde\xdb") diff --git a/lib/kmp/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d b/lib/kmp/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d deleted file mode 100644 index c3bfa37..0000000 --- a/lib/kmp/testdata/fuzz/FuzzFindAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d +++ /dev/null @@ -1,3 +0,0 @@ -go test fuzz v1 -[]byte("\x10\x10\x15") -[]byte("\x10\x15") -- cgit v1.2.3-2-g168b