|
| 1 | +// Implementation of the |
| 2 | +// [Boyer–Moore–Horspool algorithm](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm) |
| 3 | + |
1 | 4 | package horspool
|
2 | 5 |
|
3 |
| -// User defined. |
4 |
| -// Set to true to read input from two command line arguments |
5 |
| -// Set to false to read input from two files "pattern.txt" and "text.txt" |
6 |
| -// const commandLineInput bool = false |
| 6 | +import "errors" |
| 7 | + |
| 8 | +var ErrNotFound = errors.New("pattern was not found in the input string") |
| 9 | + |
| 10 | +func Horspool(t, p string) (int, error) { |
| 11 | + // in order to handle multy-byte character properly |
| 12 | + // the input is converted into rune arrays |
| 13 | + return horspool([]rune(t), []rune(p)) |
| 14 | +} |
| 15 | + |
| 16 | +func horspool(t, p []rune) (int, error) { |
| 17 | + shiftMap := computeShiftMap(t, p) |
| 18 | + pos := 0 |
| 19 | + for pos <= len(t)-len(p) { |
| 20 | + if isMatch(pos, t, p) { |
| 21 | + return pos, nil |
| 22 | + } |
| 23 | + if pos+len(p) >= len(t) { |
| 24 | + // because the remaining length of the input string |
| 25 | + // is the same as the length of the pattern |
| 26 | + // and it does not match the pattern |
| 27 | + // it is impossible to find the pattern |
| 28 | + break |
| 29 | + } |
| 30 | + |
| 31 | + // because of the check above |
| 32 | + // t[pos+len(p)] is defined |
| 33 | + pos += shiftMap[t[pos+len(p)]] |
| 34 | + } |
7 | 35 |
|
8 |
| -// Implementation of Boyer-Moore-Horspool algorithm (Suffix based approach). |
9 |
| -// Requires either a two command line arguments separated by a single space, |
10 |
| -// or two files in the same folder: "pattern.txt" containing the string to |
11 |
| -// be searched for, "text.txt" containing the text to be searched in. |
12 |
| -// func main() { |
13 |
| -// if commandLineInput == true { // case of command line input |
14 |
| -// args := os.Args |
15 |
| -// if len(args) <= 2 { |
16 |
| -// log.Fatal("Not enough arguments. Two string arguments separated by spaces are required!") |
17 |
| -// } |
18 |
| -// pattern := args[1] |
19 |
| -// s := args[2] |
20 |
| -// for i := 3; i < len(args); i++ { |
21 |
| -// s = s + " " + args[i] |
22 |
| -// } |
23 |
| -// if len(args[1]) > len(s) { |
24 |
| -// log.Fatal("Pattern is longer than text!") |
25 |
| -// } |
26 |
| -// fmt.Printf("\nRunning: Horspool algorithm.\n\n") |
27 |
| -// fmt.Printf("Search word (%d chars long): %q.\n", len(args[1]), pattern) |
28 |
| -// fmt.Printf("Text (%d chars long): %q.\n\n", len(s), s) |
29 |
| -// horspool(s, pattern) |
30 |
| -// } else if commandLineInput == false { // case of file line input |
31 |
| -// patFile, err := ioutil.ReadFile("pattern.txt") |
32 |
| -// if err != nil { |
33 |
| -// log.Fatal(err) |
34 |
| -// } |
35 |
| -// textFile, err := ioutil.ReadFile("text.txt") |
36 |
| -// if err != nil { |
37 |
| -// log.Fatal(err) |
38 |
| -// } |
39 |
| -// if len(patFile) > len(textFile) { |
40 |
| -// log.Fatal("Pattern is longer than text!") |
41 |
| -// } |
42 |
| -// fmt.Printf("\nRunning: Horspool algorithm.\n\n") |
43 |
| -// fmt.Printf("Search word (%d chars long): %q.\n", len(patFile), patFile) |
44 |
| -// fmt.Printf("Text (%d chars long): %q.\n\n", len(textFile), textFile) |
45 |
| -// horspool(string(textFile), string(patFile)) |
46 |
| -// } |
47 |
| -// } |
| 36 | + return -1, ErrNotFound |
| 37 | +} |
48 | 38 |
|
49 |
| -// // Function horspool performing the Horspool algorithm. |
50 |
| -// // Prints whether the word/pattern was found and on what position in the text or not. |
51 |
| -// func horspool(t, p string) { |
52 |
| -// m, n, c, pos := len(p), len(t), 0, 0 |
53 |
| -// //Perprocessing |
54 |
| -// d := preprocess(t, p) |
55 |
| -// //Map output |
56 |
| -// fmt.Printf("Precomputed shifts per symbol: ") |
57 |
| -// for key, value := range d { |
58 |
| -// fmt.Printf("%c:%d; ", key, value) |
59 |
| -// } |
60 |
| -// fmt.Println() |
61 |
| -// //Searching |
62 |
| -// for pos <= n-m { |
63 |
| -// j := m |
64 |
| -// if t[pos+j-1] != p[j-1] { |
65 |
| -// fmt.Printf("\n comparing characters %c %c at positions %d %d", t[pos+j-1], p[j-1], pos+j-1, j-1) |
66 |
| -// c++ |
67 |
| -// } |
68 |
| -// for j > 0 && t[pos+j-1] == p[j-1] { |
69 |
| -// fmt.Printf("\n comparing characters %c %c at positions %d %d", t[pos+j-1], p[j-1], pos+j-1, j-1) |
70 |
| -// c++ |
71 |
| -// fmt.Printf(" - match") |
72 |
| -// j-- |
73 |
| -// } |
74 |
| -// if j == 0 { |
75 |
| -// fmt.Printf("\n\nWord %q was found at position %d in %q. \n%d comparisons were done.", p, pos, t, c) |
76 |
| -// return |
77 |
| -// } |
78 |
| -// pos = pos + d[t[pos+m]] |
79 |
| -// } |
80 |
| -// fmt.Printf("\n\nWord was not found.\n%d comparisons were done.", c) |
81 |
| -// return |
82 |
| -// } |
| 39 | +// Checks if the array p matches the subarray of t starting at pos. |
| 40 | +// Note that backward iteration. |
| 41 | +// There are [other](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm#Tuning_the_comparison_loop) |
| 42 | +// approaches possible. |
| 43 | +func isMatch(pos int, t, p []rune) bool { |
| 44 | + j := len(p) |
| 45 | + for j > 0 && t[pos+j-1] == p[j-1] { |
| 46 | + j-- |
| 47 | + } |
| 48 | + return j == 0 |
| 49 | +} |
83 | 50 |
|
84 |
| -// // Function that pre-computes map with Key: uint8 (char) Value: int. |
85 |
| -// // Values determine safe shifting of search window. |
86 |
| -// func preprocess(t, p string) (d map[uint8]int) { |
87 |
| -// d = make(map[uint8]int) |
88 |
| -// for i := 0; i < len(t); i++ { |
89 |
| -// d[t[i]] = len(p) |
90 |
| -// } |
91 |
| -// for i := 0; i < len(p); i++ { |
92 |
| -// d[p[i]] = len(p) - i |
93 |
| -// } |
94 |
| -// return d |
95 |
| -// } |
| 51 | +func computeShiftMap(t, p []rune) (res map[rune]int) { |
| 52 | + res = make(map[rune]int) |
| 53 | + for _, tCode := range t { |
| 54 | + res[tCode] = len(p) |
| 55 | + } |
| 56 | + for i, pCode := range p { |
| 57 | + res[pCode] = len(p) - i |
| 58 | + } |
| 59 | + return res |
| 60 | +} |
0 commit comments