Skip to content

feat: add boyer moore algorithm implementation #2441

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 34 commits into from
Jun 16, 2023
Merged
Changes from 9 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
1ca23c7
add boyer moore algorithm implementation
stoychoX Mar 12, 2023
6a561c1
add a one-line description of what the library/header is for
stoychoX Mar 16, 2023
859385e
fix comments pattern and make tests static
stoychoX Mar 16, 2023
86fa7ce
Merge branch 'TheAlgorithms:master' into master
stoychoX Mar 30, 2023
01c8049
add documentation
stoychoX Mar 30, 2023
e5444da
add namespaces
stoychoX Mar 30, 2023
f6fb91d
Merge branch 'TheAlgorithms:master' into master
stoychoX Apr 1, 2023
718284f
fix all warnings for clang-tydy.exe <filename>
stoychoX Apr 1, 2023
dd98f84
Change lib from limits to climits (CHAR_MAX macro)
stoychoX Apr 10, 2023
507c0f3
Merge branch 'TheAlgorithms:master' into master
stoychoX Apr 19, 2023
3f438ff
Add breif description of boyer-moore algorithm
stoychoX Apr 19, 2023
845d8a1
Fix styling
stoychoX Apr 19, 2023
a46de77
Merge branch 'TheAlgorithms:master' into master
stoychoX May 1, 2023
d87a3e9
Add needed documentation
stoychoX May 1, 2023
87fffc9
my commit
stoychoX May 10, 2023
97b38ad
fix type of index_pattern
stoychoX May 13, 2023
c42f5f6
Fix clang-warnings
stoychoX May 13, 2023
47784c9
Merge branch 'master' into master
Panquesito7 May 26, 2023
bc5e346
chore: apply suggestions from code review
Panquesito7 May 26, 2023
959075a
chore: add print message after tests
Panquesito7 May 26, 2023
5cb9c0a
Update strings/boyer_moore.cpp
stoychoX May 30, 2023
259f53c
Update strings/boyer_moore.cpp
stoychoX May 30, 2023
7115d63
Update strings/boyer_moore.cpp
stoychoX May 30, 2023
a0bb48e
Update strings/boyer_moore.cpp
stoychoX May 30, 2023
44b4ffa
Update strings/boyer_moore.cpp
stoychoX May 30, 2023
45effd1
Update strings/boyer_moore.cpp
stoychoX May 30, 2023
4039f01
Update strings/boyer_moore.cpp
stoychoX May 30, 2023
e6677b8
Update strings/boyer_moore.cpp
stoychoX May 30, 2023
961a35b
Update strings/boyer_moore.cpp
stoychoX May 31, 2023
a07b77c
Merge branch 'TheAlgorithms:master' into master
stoychoX May 31, 2023
611cfb4
fix: variable name
stoychoX May 31, 2023
5800dd8
Update strings/boyer_moore.cpp
stoychoX Jun 9, 2023
6192838
Merge branch 'master' into master
realstealthninja Jun 16, 2023
0de51d6
Merge branch 'master' into master
Panquesito7 Jun 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 222 additions & 0 deletions strings/boyer_moore.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
/**
* @file
* @brief Boyer-moore's algorithm finding all occurrences of pattern in given
* text.
* @author [Stoycho Kyosev](https://github.com/stoychoX)
*/
#include <cassert> /// for assert
#include <cstring> /// for strlen
#include <climits> /// for CHAR_MAX macro
#include <string> /// for std::string
#include <vector> /// for std::vector

#define APLHABET_SIZE CHAR_MAX ///< number of symbols in the alphabet we use

/**
* @namespace
* @brief String algorithms
*/
namespace strings {
/**
* @namespace
* @brief Functions for the [Boyer
* Moore](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm)
* algorithm implementation
*/
namespace boyer_moore {
/**
* @brief A structure representing all the data we need to search the
* preprocessed pattern in text.
*/
struct pattern {
std::string pat;

/// @brief bad char table used in Bad Character Heuristic
/// [https://www.geeksforgeeks.org/boyer-moore-algorithm-for-pattern-searching/]
std::vector<size_t> bad_char;

/// @brief good suffix table used for Good Suffix heuristic
/// [https://www.geeksforgeeks.org/boyer-moore-algorithm-good-suffix-heuristic/?ref=rp]
std::vector<size_t> good_suffix;
};

/**
* @brief A function that preprocess the good suffix thable
*
* @param str The string being preprocessed
* @param arg The good suffix table
* @return void
*/
void init_good_suffix(const std::string& str, std::vector<size_t>& arg) {
arg.resize(str.size() + 1, 0);

// border_pos[i] - the index of the longest proper suffix of str[i..] which
// is also a proper prefix.
std::vector<size_t> border_pos(str.size() + 1, 0);

size_t current_char = str.length();

size_t border_index = str.length() + 1;

border_pos[current_char] = border_index;

while (current_char > 0) {
while (border_index <= str.length() &&
str[current_char - 1] != str[border_index - 1]) {
if (arg[border_index] == 0) {
arg[border_index] = border_index - current_char;
}

border_index = border_pos[border_index];
}

current_char--;
border_index--;
border_pos[current_char] = border_index;
}

size_t largest_border_index = border_pos[0];

for (size_t i = 0; i < str.size(); i++) {
if (arg[i] == 0) {
arg[i] = largest_border_index;
}

// If we go pass the largest border we find the next one as we iterate
if (i == largest_border_index) {
largest_border_index = border_pos[largest_border_index];
}
}
}

/**
* @brief A function that preprocess the bad char table
*
* @param str The string being preprocessed
* @param arg The bad char table
* @return void
*/
void init_bad_char(const std::string& str, std::vector<size_t>& arg) {
arg.resize(APLHABET_SIZE, str.length());

for (size_t i = 0; i < str.length(); i++) {
arg[str[i]] = str.length() - i - 1;
}
}

/**
* @brief A function that initializes pattern
*
* @param str Text used for initialization
* @param arg Initialized structure
* @return void
*/
void init_pattern(const std::string& str, pattern& arg) {
arg.pat = str;
init_bad_char(str, arg.bad_char);
init_good_suffix(str, arg.good_suffix);
}
/**
* @brief A function that implements Boyer-Moore's algorithm.
*
* @param str Text we are seatching in.
* @param arg pattern structure containing the preprocessed pattern
* @return (vector of) indexes of the occurrences of pattern in text
*/
std::vector<size_t> search(const std::string& str, const pattern& arg) {
size_t index_position = arg.pat.size() - 1;
std::vector<size_t> index_storage;

while (index_position < str.length()) {
size_t index_string = index_position;
size_t index_pattern = arg.pat.size() - 1;

while (index_pattern >= 0 &&
str[index_string] == arg.pat[index_pattern]) {
--index_pattern;
--index_string;
}

if (index_pattern < 0) {
index_storage.push_back(index_position - arg.pat.length() + 1);
index_position += arg.good_suffix[0];
} else {
index_position += std::max(arg.bad_char[str[index_string]],
arg.good_suffix[index_pattern + 1]);
}
}

return index_storage;
}

/**
* @brief Check if pat is prefix of str.
*
* @param str pointer to some part of the input text.
* @param pat the searched pattern.
* @param len length of the searched pattern
* @return true if pat is prefix of str. false otherwise.
*/
bool is_prefix(const char* str, const char* pat, size_t len) {
if (strlen(str) < len) {
return false;
}

for (size_t i = 0; i < len; i++) {
if (str[i] != pat[i]) {
return false;
}
}

return true;
}
} // namespace boyer_moore
} // namespace strings

void and_test(const char* text) {
strings::boyer_moore::pattern ands;
strings::boyer_moore::init_pattern("and", ands);
std::vector<size_t> indexes = strings::boyer_moore::search(text, ands);

assert(indexes.size() == 2);
assert(strings::boyer_moore::is_prefix(text + indexes[0], "and", 3));
assert(strings::boyer_moore::is_prefix(text + indexes[1], "and", 3));
}

void pat_test(const char* text) {
strings::boyer_moore::pattern pat;
strings::boyer_moore::init_pattern("pat", pat);
std::vector<size_t> indexes = strings::boyer_moore::search(text, pat);

assert(indexes.size() == 6);

for (const auto& idx : indexes) {
assert(strings::boyer_moore::is_prefix(text + idx, "pat", 3));
}
}
/**
* @brief Self-test implementations
* @return void
*/
static void tests() {
const char* text =
"When pat Mr. and Mrs. pat Dursley woke up on the dull, gray \
Tuesday our story starts, \
there was nothing about pat the cloudy sky outside to pat suggest that\
strange and \
mysterious things would pat soon be happening all pat over the \
country.";

and_test(text);
pat_test(text);
}

/**
* @brief Main function
*
* @return 0 on exit
*/
int main() {
tests(); // run self-test implementations
return 0;
}