Skip to content

Commit 418bf15

Browse files
authored
Improve Work Break (#804)
* ref: improve work break * ref: refactor implementation - use `word_dict` as `&[&str]` to pass a borrowed slice without needing to own the vector - adjust size memoization vector to `s.len() + 1`, which allows us to mark the `start == s.len()` case explicitly in the vector. * feat(tests): update tests with edge cases
1 parent be27f2c commit 418bf15

File tree

1 file changed

+55
-48
lines changed

1 file changed

+55
-48
lines changed

src/dynamic_programming/word_break.rs

+55-48
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,38 @@
1-
// Given a string and a list of words, return true if the string can be
2-
// segmented into a space-separated sequence of one or more words.
3-
4-
// Note that the same word may be reused
5-
// multiple times in the segmentation.
6-
7-
// Implementation notes: Trie + Dynamic programming up -> down.
8-
// The Trie will be used to store the words. It will be useful for scanning
9-
// available words for the current position in the string.
10-
111
use crate::data_structures::Trie;
122

13-
pub fn word_break(s: &str, word_dict: Vec<&str>) -> bool {
3+
/// Checks if a string can be segmented into a space-separated sequence
4+
/// of one or more words from the given dictionary.
5+
///
6+
/// # Arguments
7+
/// * `s` - The input string to be segmented.
8+
/// * `word_dict` - A slice of words forming the dictionary.
9+
///
10+
/// # Returns
11+
/// * `bool` - `true` if the string can be segmented, `false` otherwise.
12+
pub fn word_break(s: &str, word_dict: &[&str]) -> bool {
1413
let mut trie = Trie::new();
15-
for word in word_dict {
16-
trie.insert(word.chars(), true); // Insert each word with a value `true`
14+
for &word in word_dict {
15+
trie.insert(word.chars(), true);
1716
}
1817

19-
let mut memo = vec![None; s.len()];
18+
// Memoization vector: one extra space to handle out-of-bound end case.
19+
let mut memo = vec![None; s.len() + 1];
2020
search(&trie, s, 0, &mut memo)
2121
}
2222

23+
/// Recursively checks if the substring starting from `start` can be segmented
24+
/// using words in the trie and memoizes the results.
25+
///
26+
/// # Arguments
27+
/// * `trie` - The Trie containing the dictionary words.
28+
/// * `s` - The input string.
29+
/// * `start` - The starting index for the current substring.
30+
/// * `memo` - A vector for memoization to store intermediate results.
31+
///
32+
/// # Returns
33+
/// * `bool` - `true` if the substring can be segmented, `false` otherwise.
2334
fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<bool>>) -> bool {
24-
if start >= s.len() {
35+
if start == s.len() {
2536
return true;
2637
}
2738

@@ -30,7 +41,6 @@ fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<
3041
}
3142

3243
for end in start + 1..=s.len() {
33-
// Using trie.get to check if a substring is a word
3444
if trie.get(s[start..end].chars()).is_some() && search(trie, s, end, memo) {
3545
memo[start] = Some(true);
3646
return true;
@@ -43,40 +53,37 @@ fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<
4353

4454
#[cfg(test)]
4555
mod tests {
46-
use super::word_break;
47-
48-
#[test]
49-
fn typical_cases() {
50-
assert!(word_break("applepenapple", vec!["apple", "pen"]));
51-
assert!(!word_break(
52-
"catsandog",
53-
vec!["cats", "dog", "sand", "and", "cat"]
54-
));
55-
assert!(word_break("cars", vec!["car", "ca", "rs"]));
56-
}
56+
use super::*;
5757

58-
#[test]
59-
fn edge_cases() {
60-
assert!(!word_break("abc", vec![]));
61-
assert!(word_break("a", vec!["a"]));
62-
}
63-
64-
#[test]
65-
fn repeated_words() {
66-
assert!(word_break("aabb", vec!["a", "b"]));
67-
assert!(word_break("aaaaaaa", vec!["a", "aa", "aaa"]));
68-
}
69-
70-
#[test]
71-
fn no_solution() {
72-
assert!(!word_break("abcdef", vec!["ab", "abc", "cd"]));
73-
assert!(!word_break("xyz", vec!["a", "b", "c"]));
58+
macro_rules! test_cases {
59+
($($name:ident: $test_case:expr,)*) => {
60+
$(
61+
#[test]
62+
fn $name() {
63+
let (input, dict, expected) = $test_case;
64+
assert_eq!(word_break(input, &dict), expected);
65+
}
66+
)*
67+
}
7468
}
7569

76-
#[test]
77-
fn long_string() {
78-
let long_string = "a".repeat(100);
79-
let words = vec!["a", "aa", "aaa", "aaaa"];
80-
assert!(word_break(&long_string, words));
70+
test_cases! {
71+
typical_case_1: ("applepenapple", vec!["apple", "pen"], true),
72+
typical_case_2: ("catsandog", vec!["cats", "dog", "sand", "and", "cat"], false),
73+
typical_case_3: ("cars", vec!["car", "ca", "rs"], true),
74+
edge_case_empty_string: ("", vec!["apple", "pen"], true),
75+
edge_case_empty_dict: ("apple", vec![], false),
76+
edge_case_single_char_in_dict: ("a", vec!["a"], true),
77+
edge_case_single_char_not_in_dict: ("b", vec!["a"], false),
78+
edge_case_all_words_larger_than_input: ("a", vec!["apple", "banana"], false),
79+
edge_case_no_solution_large_string: ("abcdefghijklmnoqrstuv", vec!["a", "bc", "def", "ghij", "klmno", "pqrst"], false),
80+
successful_segmentation_large_string: ("abcdefghijklmnopqrst", vec!["a", "bc", "def", "ghij", "klmno", "pqrst"], true),
81+
long_string_repeated_pattern: (&"ab".repeat(100), vec!["a", "b", "ab"], true),
82+
long_string_no_solution: (&"a".repeat(100), vec!["b"], false),
83+
mixed_size_dict_1: ("pineapplepenapple", vec!["apple", "pen", "applepen", "pine", "pineapple"], true),
84+
mixed_size_dict_2: ("catsandog", vec!["cats", "dog", "sand", "and", "cat"], false),
85+
mixed_size_dict_3: ("abcd", vec!["a", "abc", "b", "cd"], true),
86+
performance_stress_test_large_valid: (&"abc".repeat(1000), vec!["a", "ab", "abc"], true),
87+
performance_stress_test_large_invalid: (&"x".repeat(1000), vec!["a", "ab", "abc"], false),
8188
}
8289
}

0 commit comments

Comments
 (0)