Skip to content

Commit a52d8bf

Browse files
authored
Reactivate horspool (TheAlgorithms#663)
1 parent 4460ba0 commit a52d8bf

File tree

2 files changed

+123
-90
lines changed

2 files changed

+123
-90
lines changed

strings/horspool/horspool.go

+55-90
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,60 @@
1+
// Implementation of the
2+
// [Boyer–Moore–Horspool algorithm](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm)
3+
14
package horspool
25

3-
// User defined.
4-
// Set to true to read input from two command line arguments
5-
// Set to false to read input from two files "pattern.txt" and "text.txt"
6-
// const commandLineInput bool = false
6+
import "errors"
7+
8+
var ErrNotFound = errors.New("pattern was not found in the input string")
9+
10+
func Horspool(t, p string) (int, error) {
11+
// in order to handle multy-byte character properly
12+
// the input is converted into rune arrays
13+
return horspool([]rune(t), []rune(p))
14+
}
15+
16+
func horspool(t, p []rune) (int, error) {
17+
shiftMap := computeShiftMap(t, p)
18+
pos := 0
19+
for pos <= len(t)-len(p) {
20+
if isMatch(pos, t, p) {
21+
return pos, nil
22+
}
23+
if pos+len(p) >= len(t) {
24+
// because the remaining length of the input string
25+
// is the same as the length of the pattern
26+
// and it does not match the pattern
27+
// it is impossible to find the pattern
28+
break
29+
}
30+
31+
// because of the check above
32+
// t[pos+len(p)] is defined
33+
pos += shiftMap[t[pos+len(p)]]
34+
}
735

8-
// Implementation of Boyer-Moore-Horspool algorithm (Suffix based approach).
9-
// Requires either a two command line arguments separated by a single space,
10-
// or two files in the same folder: "pattern.txt" containing the string to
11-
// be searched for, "text.txt" containing the text to be searched in.
12-
// func main() {
13-
// if commandLineInput == true { // case of command line input
14-
// args := os.Args
15-
// if len(args) <= 2 {
16-
// log.Fatal("Not enough arguments. Two string arguments separated by spaces are required!")
17-
// }
18-
// pattern := args[1]
19-
// s := args[2]
20-
// for i := 3; i < len(args); i++ {
21-
// s = s + " " + args[i]
22-
// }
23-
// if len(args[1]) > len(s) {
24-
// log.Fatal("Pattern is longer than text!")
25-
// }
26-
// fmt.Printf("\nRunning: Horspool algorithm.\n\n")
27-
// fmt.Printf("Search word (%d chars long): %q.\n", len(args[1]), pattern)
28-
// fmt.Printf("Text (%d chars long): %q.\n\n", len(s), s)
29-
// horspool(s, pattern)
30-
// } else if commandLineInput == false { // case of file line input
31-
// patFile, err := ioutil.ReadFile("pattern.txt")
32-
// if err != nil {
33-
// log.Fatal(err)
34-
// }
35-
// textFile, err := ioutil.ReadFile("text.txt")
36-
// if err != nil {
37-
// log.Fatal(err)
38-
// }
39-
// if len(patFile) > len(textFile) {
40-
// log.Fatal("Pattern is longer than text!")
41-
// }
42-
// fmt.Printf("\nRunning: Horspool algorithm.\n\n")
43-
// fmt.Printf("Search word (%d chars long): %q.\n", len(patFile), patFile)
44-
// fmt.Printf("Text (%d chars long): %q.\n\n", len(textFile), textFile)
45-
// horspool(string(textFile), string(patFile))
46-
// }
47-
// }
36+
return -1, ErrNotFound
37+
}
4838

49-
// // Function horspool performing the Horspool algorithm.
50-
// // Prints whether the word/pattern was found and on what position in the text or not.
51-
// func horspool(t, p string) {
52-
// m, n, c, pos := len(p), len(t), 0, 0
53-
// //Perprocessing
54-
// d := preprocess(t, p)
55-
// //Map output
56-
// fmt.Printf("Precomputed shifts per symbol: ")
57-
// for key, value := range d {
58-
// fmt.Printf("%c:%d; ", key, value)
59-
// }
60-
// fmt.Println()
61-
// //Searching
62-
// for pos <= n-m {
63-
// j := m
64-
// if t[pos+j-1] != p[j-1] {
65-
// fmt.Printf("\n comparing characters %c %c at positions %d %d", t[pos+j-1], p[j-1], pos+j-1, j-1)
66-
// c++
67-
// }
68-
// for j > 0 && t[pos+j-1] == p[j-1] {
69-
// fmt.Printf("\n comparing characters %c %c at positions %d %d", t[pos+j-1], p[j-1], pos+j-1, j-1)
70-
// c++
71-
// fmt.Printf(" - match")
72-
// j--
73-
// }
74-
// if j == 0 {
75-
// fmt.Printf("\n\nWord %q was found at position %d in %q. \n%d comparisons were done.", p, pos, t, c)
76-
// return
77-
// }
78-
// pos = pos + d[t[pos+m]]
79-
// }
80-
// fmt.Printf("\n\nWord was not found.\n%d comparisons were done.", c)
81-
// return
82-
// }
39+
// Checks if the array p matches the subarray of t starting at pos.
40+
// Note that backward iteration.
41+
// There are [other](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm#Tuning_the_comparison_loop)
42+
// approaches possible.
43+
func isMatch(pos int, t, p []rune) bool {
44+
j := len(p)
45+
for j > 0 && t[pos+j-1] == p[j-1] {
46+
j--
47+
}
48+
return j == 0
49+
}
8350

84-
// // Function that pre-computes map with Key: uint8 (char) Value: int.
85-
// // Values determine safe shifting of search window.
86-
// func preprocess(t, p string) (d map[uint8]int) {
87-
// d = make(map[uint8]int)
88-
// for i := 0; i < len(t); i++ {
89-
// d[t[i]] = len(p)
90-
// }
91-
// for i := 0; i < len(p); i++ {
92-
// d[p[i]] = len(p) - i
93-
// }
94-
// return d
95-
// }
51+
func computeShiftMap(t, p []rune) (res map[rune]int) {
52+
res = make(map[rune]int)
53+
for _, tCode := range t {
54+
res[tCode] = len(p)
55+
}
56+
for i, pCode := range p {
57+
res[pCode] = len(p) - i
58+
}
59+
return res
60+
}

strings/horspool/horspool_test.go

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// horspool_test.go
2+
// description: Tests for horspool
3+
// see horspool.go
4+
5+
package horspool
6+
7+
import "testing"
8+
import "fmt"
9+
10+
func TestLHorspool(t *testing.T) {
11+
testCases := []struct {
12+
input string
13+
pattern string
14+
expected int
15+
}{
16+
{"aaaaXaaa", "X", 4},
17+
{"aaaaXXaa", "XX", 4},
18+
{"Xaaab", "X", 0},
19+
{"XYaab", "XY", 0},
20+
{"abcefghXYZ", "XYZ", 7},
21+
{"abcefgh€YZ⌘", "€YZ", 7},
22+
{"⌘bcefgh€YZ⌘", "€YZ", 7},
23+
{"abc", "abc", 0},
24+
{"", "", 0},
25+
{"a", "", 0},
26+
{"a", "a", 0},
27+
{"aa", "a", 0},
28+
{"aa", "aa", 0},
29+
}
30+
for _, tc := range testCases {
31+
t.Run(fmt.Sprint("test with ", tc.input, " ", tc.pattern), func(t *testing.T) {
32+
result, curError := Horspool(tc.input, tc.pattern)
33+
if curError != nil {
34+
t.Fatalf("Got unexpected error")
35+
}
36+
if tc.expected != result {
37+
t.Fatalf("expected %d, got %d", tc.expected, result)
38+
}
39+
})
40+
}
41+
}
42+
43+
func TestLHorspoolNotExisintPattern(t *testing.T) {
44+
testCases := []struct {
45+
input string
46+
pattern string
47+
}{
48+
{"", "X"},
49+
{"X", "Y"},
50+
{"X", "XX"},
51+
{"aaaaaaaXaXaaaa", "XXX"},
52+
{"aaaaaaaXaX", "XXX"},
53+
{"XaX", "XXX"},
54+
{"XaX", "XXX"},
55+
{"\xe2\x8c\x98", "\x98"},
56+
}
57+
for _, tc := range testCases {
58+
t.Run(fmt.Sprint("test with ", tc.input, " ", tc.pattern), func(t *testing.T) {
59+
result, curError := Horspool(tc.input, tc.pattern)
60+
if curError != ErrNotFound {
61+
t.Fatalf("Got unexpected error")
62+
}
63+
if result != -1 {
64+
t.Fatalf("expected -1, got %d", result)
65+
}
66+
})
67+
}
68+
}

0 commit comments

Comments
 (0)