Improve Work Break (#804)

sozelfist · web-flow · commit 418bf15bbb6b · 2024-10-04T11:36:08.000+02:00
* ref: improve work break

* ref: refactor implementation

- use `word_dict` as `&amp;[&amp;str]` to pass a borrowed slice without needing
to own the vector
- adjust size memoization vector to `s.len() + 1`, which allows us to
mark the `start == s.len()` case explicitly in the vector.

* feat(tests): update tests with edge cases
diff --git a/src/dynamic_programming/word_break.rs b/src/dynamic_programming/word_break.rs
@@ -1,27 +1,38 @@
-// Given a string and a list of words, return true if the string can be
-// segmented into a space-separated sequence of one or more words.
-
-// Note that the same word may be reused
-// multiple times in the segmentation.
-
-// Implementation notes: Trie + Dynamic programming up -> down.
-// The Trie will be used to store the words. It will be useful for scanning
-// available words for the current position in the string.
-
 use crate::data_structures::Trie;
 
-pub fn word_break(s: &str, word_dict: Vec<&str>) -> bool {
+/// Checks if a string can be segmented into a space-separated sequence
+/// of one or more words from the given dictionary.
+///
+/// # Arguments
+/// * `s` - The input string to be segmented.
+/// * `word_dict` - A slice of words forming the dictionary.
+///
+/// # Returns
+/// * `bool` - `true` if the string can be segmented, `false` otherwise.
+pub fn word_break(s: &str, word_dict: &[&str]) -> bool {
     let mut trie = Trie::new();
-    for word in word_dict {
-        trie.insert(word.chars(), true); // Insert each word with a value `true`
+    for &word in word_dict {
+        trie.insert(word.chars(), true);
     }
 
-    let mut memo = vec![None; s.len()];
+    // Memoization vector: one extra space to handle out-of-bound end case.
+    let mut memo = vec![None; s.len() + 1];
     search(&trie, s, 0, &mut memo)
 }
 
+/// Recursively checks if the substring starting from `start` can be segmented
+/// using words in the trie and memoizes the results.
+///
+/// # Arguments
+/// * `trie` - The Trie containing the dictionary words.
+/// * `s` - The input string.
+/// * `start` - The starting index for the current substring.
+/// * `memo` - A vector for memoization to store intermediate results.
+///
+/// # Returns
+/// * `bool` - `true` if the substring can be segmented, `false` otherwise.
 fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<bool>>) -> bool {
-    if start >= s.len() {
+    if start == s.len() {
         return true;
     }
 
@@ -30,7 +41,6 @@ fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<
     }
 
     for end in start + 1..=s.len() {
-        // Using trie.get to check if a substring is a word
         if trie.get(s[start..end].chars()).is_some() && search(trie, s, end, memo) {
             memo[start] = Some(true);
             return true;
@@ -43,40 +53,37 @@ fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<
 
 #[cfg(test)]
 mod tests {
-    use super::word_break;
-
-    #[test]
-    fn typical_cases() {
-        assert!(word_break("applepenapple", vec!["apple", "pen"]));
-        assert!(!word_break(
-            "catsandog",
-            vec!["cats", "dog", "sand", "and", "cat"]
-        ));
-        assert!(word_break("cars", vec!["car", "ca", "rs"]));
-    }
+    use super::*;
 
-    #[test]
-    fn edge_cases() {
-        assert!(!word_break("abc", vec![]));
-        assert!(word_break("a", vec!["a"]));
-    }
-
-    #[test]
-    fn repeated_words() {
-        assert!(word_break("aabb", vec!["a", "b"]));
-        assert!(word_break("aaaaaaa", vec!["a", "aa", "aaa"]));
-    }
-
-    #[test]
-    fn no_solution() {
-        assert!(!word_break("abcdef", vec!["ab", "abc", "cd"]));
-        assert!(!word_break("xyz", vec!["a", "b", "c"]));
+    macro_rules! test_cases {
+        ($($name:ident: $test_case:expr,)*) => {
+            $(
+                #[test]
+                fn $name() {
+                    let (input, dict, expected) = $test_case;
+                    assert_eq!(word_break(input, &dict), expected);
+                }
+            )*
+        }
     }
 
-    #[test]
-    fn long_string() {
-        let long_string = "a".repeat(100);
-        let words = vec!["a", "aa", "aaa", "aaaa"];
-        assert!(word_break(&long_string, words));
+    test_cases! {
+        typical_case_1: ("applepenapple", vec!["apple", "pen"], true),
+        typical_case_2: ("catsandog", vec!["cats", "dog", "sand", "and", "cat"], false),
+        typical_case_3: ("cars", vec!["car", "ca", "rs"], true),
+        edge_case_empty_string: ("", vec!["apple", "pen"], true),
+        edge_case_empty_dict: ("apple", vec![], false),
+        edge_case_single_char_in_dict: ("a", vec!["a"], true),
+        edge_case_single_char_not_in_dict: ("b", vec!["a"], false),
+        edge_case_all_words_larger_than_input: ("a", vec!["apple", "banana"], false),
+        edge_case_no_solution_large_string: ("abcdefghijklmnoqrstuv", vec!["a", "bc", "def", "ghij", "klmno", "pqrst"], false),
+        successful_segmentation_large_string: ("abcdefghijklmnopqrst", vec!["a", "bc", "def", "ghij", "klmno", "pqrst"], true),
+        long_string_repeated_pattern: (&"ab".repeat(100), vec!["a", "b", "ab"], true),
+        long_string_no_solution: (&"a".repeat(100), vec!["b"], false),
+        mixed_size_dict_1: ("pineapplepenapple", vec!["apple", "pen", "applepen", "pine", "pineapple"], true),
+        mixed_size_dict_2: ("catsandog", vec!["cats", "dog", "sand", "and", "cat"], false),
+        mixed_size_dict_3: ("abcd", vec!["a", "abc", "b", "cd"], true),
+        performance_stress_test_large_valid: (&"abc".repeat(1000), vec!["a", "ab", "abc"], true),
+        performance_stress_test_large_invalid: (&"x".repeat(1000), vec!["a", "ab", "abc"], false),
     }
 }