From 6ea41811f7b9837dfbb727b0bf39b319de8ff29e Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 00:09:55 +0900
Subject: [PATCH 01/36] documentation

---
 pythainlp/util/normalize.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index a8cacae22..bffe6d278 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -23,8 +23,10 @@
 from pythainlp import thai_follow_vowels as follow_v
 from pythainlp import thai_lead_vowels as lead_v
 from pythainlp import thai_tonemarks as tonemarks
+from pythainlp import thai_consonants as consonants
 from pythainlp.tokenize import word_tokenize
-
+from pythainlp.corpus import thai_words
+from pythainlp.util.trie import Trie
 
 _DANGLING_CHARS = f"{above_v}{below_v}{tonemarks}\u0e3a\u0e4c\u0e4d\u0e4e"
 _RE_REMOVE_DANGLINGS = re.compile(f"^[{_DANGLING_CHARS}]+")
@@ -218,6 +220,27 @@ def remove_repeat_vowels(text: str) -> str:
     return text
 
 
+def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
+    """
+    Remove repeating consonants at the last of the sentence.
+
+    This function will remove the repeating consonants
+    before a whitespace or new line until the last word matches
+    a word in the given dictionary.
+    If there is no match, the repeating consonants will be
+    reduced to one.
+    Since this function uses a dictionary, the result may differs
+    depending on the dictionary used.
+    Plus, it is recommended to use normalize() to have a better result.
+
+    :param str text: input text
+    :param Trie dictionary: Trie dictionary to check the last word.
+    If None, pythainlp.corpus.thai_words() will be used
+    :return: text without repeating Thai consonants
+    :rtype: str
+    """
+
+
 def normalize(text: str) -> str:
     """
     Normalize and clean Thai text with normalizing rules as follows:

From be29c00551da590560b752e3c865523d6ab1ffac Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 01:22:57 +0900
Subject: [PATCH 02/36] Add: implemation

tested
---
 pythainlp/util/normalize.py | 99 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 97 insertions(+), 2 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index bffe6d278..2beac8a38 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -26,6 +26,7 @@
 from pythainlp import thai_consonants as consonants
 from pythainlp.tokenize import word_tokenize
 from pythainlp.corpus import thai_words
+from pythainlp.util import isthaichar
 from pythainlp.util.trie import Trie
 
 _DANGLING_CHARS = f"{above_v}{below_v}{tonemarks}\u0e3a\u0e4c\u0e4d\u0e4e"
@@ -225,10 +226,11 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
     Remove repeating consonants at the last of the sentence.
 
     This function will remove the repeating consonants
-    before a whitespace or new line until the last word matches
-    a word in the given dictionary.
+    before a whitespace, new line or at the last 
+    so that the last word matches a word in the given dictionary.
     If there is no match, the repeating consonants will be
     reduced to one.
+    If there are several match, the longest word will be used.
     Since this function uses a dictionary, the result may differs
     depending on the dictionary used.
     Plus, it is recommended to use normalize() to have a better result.
@@ -238,7 +240,100 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
     If None, pythainlp.corpus.thai_words() will be used
     :return: text without repeating Thai consonants
     :rtype: str
+
+    :Example:
+    ::
+
+        from pythainlp.util import remove_repeat_consonants
+        from pythainlp.util import dict_trie
+
+        # use default dictionary (pythainlp.corpus.thai_words())
+        remove_repeat_consonants('เริ่ดดดดดดดด')
+        # output: เริ่ด
+
+        remove_repeat_consonants('อืมมมมมมมมมมมมมมม')
+        # output: อืมมม
+        # "อืมมม" is in the default dictionary
+
+        # use custom dictionary
+        custom_dictionary = dict_trie(["อืมมมมม"])
+        remove_repeat_consonants('อืมมมมมมมมมมมมมมม', custom_dictionary)
+        # output: อืมมมมม
+
+        # long text
+        remove_repeat_consonants('อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '\
+        'ฉันจะให้เกรดดีกับคุณณณ\nนี่เป็นความลับบบบบ')
+        # output: อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ
     """
+    # use default dictionary if not given
+    if dictionary is None:
+        dictionary = thai_words()
+
+    # seperate by newline
+    modified_lines = []
+    for line in text.split("\n"):
+        segments = line.split(" ")
+
+        for segment in segments:
+            # skip if the segment is not the target
+            if (not
+                ((len(segment) > 1)  # the segment is long enough
+                 and (isthaichar(segment[-1]))   # the last is Thai
+                 and (segment[-1] == segment[-2]))):  # has repiitition
+
+                # skip
+                continue
+
+            # duplicating character
+            dup = segment[-1]
+
+            # find the words that has 2 or more duplication of
+            # this character at the end.
+            # TODO: This maybe slow if the dictionary is large.
+            #       If the dictionary not changed, this could be done
+            #       only once in the kernel.
+            #       But it will requires a global variable.
+            repeaters = []
+            for word in dictionary:
+                if (len(word) > 1) and (word[-1] == word[-2] == dup):
+                    repeaters.append(word)
+
+            # remove all of the last repeating character
+            segment_head = segment
+            while ((len(segment) > 0) and (segment[-1] == dup)):
+                segment = segment[:-1]
+
+            # find the longest word that matches the segment
+            longest_word = ""
+            repetition = 0
+            for repeater in repeaters:
+                # remove all of the last repeating character
+                repeater_head = repeater
+                while ((len(repeater) > 0) and (repeater[-1] == dup)):
+                    repeater = repeater[:-1]
+
+                # check match
+                if ((len(segment) >= len(repeater))
+                        and (segment[-len(repeater):] == repeater)):
+                    # matched
+                    if len(repeater) > len(longest_word):
+                        longest_word = repeater
+
+            if len(longest_word) > 0:
+                # if there is a match, use it
+                segment = segment_head + (dup * repetition)
+            else:
+                # if none found, make the repition to once
+                segment = segment_head + (dup * 1)
+
+        # revert spaces
+        modified_line = " ".join(segments)
+        modified_lines.append(modified_line)
+
+    # revert newlines
+    modified_text = "\n".join(modified_lines)
+
+    return modified_text
 
 
 def normalize(text: str) -> str:

From 3e94234cfaa9ea876a8e7eaa804cd3bcaae7867d Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 01:32:37 +0900
Subject: [PATCH 03/36] Add: test code

---
 tests/test_util.py | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/tests/test_util.py b/tests/test_util.py
index 1840e2dc0..b3165e25a 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -60,6 +60,7 @@
     ipa_to_rtgs,
     remove_tone_ipa,
     tis620_to_utf8,
+    remove_repeat_consonants
 )
 from pythainlp.util.spell_words import spell_word
 
@@ -832,7 +833,8 @@ def test_convert_years(self):
         self.assertEqual(convert_years("242", src="re", target="ad"), "2023")
         self.assertEqual(convert_years("242", src="re", target="ah"), "1444")
         with self.assertRaises(NotImplementedError):
-            self.assertIsNotNone(convert_years("2023", src="cat", target="dog"))
+            self.assertIsNotNone(convert_years(
+                "2023", src="cat", target="dog"))
 
     def test_nectec_to_ipa(self):
         self.assertEqual(nectec_to_ipa("kl-uua-j^-2"), 'kl uua j ˥˩')
@@ -846,17 +848,44 @@ def test_remove_tone_ipa(self):
         self.assertEqual(remove_tone_ipa("laː˦˥.sa˨˩.maj˩˩˦"), "laː.sa.maj")
 
     def test_tis620_to_utf8(self):
-        self.assertEqual(tis620_to_utf8("¡ÃÐ·ÃÇ§ÍØµÊÒË¡ÃÃÁ"), "กระทรวงอุตสาหกรรม")
+        self.assertEqual(tis620_to_utf8(
+            "¡ÃÐ·ÃÇ§ÍØµÊÒË¡ÃÃÁ"), "กระทรวงอุตสาหกรรม")
 
     def test_spell_word(self):
-        self.assertEqual(spell_word("เสือ"),['สอ', 'เอือ', 'เสือ'])
-        self.assertEqual(spell_word("เสื้อ"),['สอ', 'เอือ', 'ไม้โท', 'เสื้อ'])
-        self.assertEqual(spell_word("คน"),['คอ', 'นอ', 'คน'])
-        self.assertEqual(spell_word("คนดี"),['คอ', 'นอ', 'คน', 'ดอ', 'อี', 'ดี', 'คนดี'])
+        self.assertEqual(spell_word("เสือ"), ['สอ', 'เอือ', 'เสือ'])
+        self.assertEqual(spell_word("เสื้อ"), ['สอ', 'เอือ', 'ไม้โท', 'เสื้อ'])
+        self.assertEqual(spell_word("คน"), ['คอ', 'นอ', 'คน'])
+        self.assertEqual(spell_word("คนดี"), [
+                         'คอ', 'นอ', 'คน', 'ดอ', 'อี', 'ดี', 'คนดี'])
 
     def test_rhyme(self):
         self.assertIsInstance(rhyme("แมว"), list)
         self.assertTrue(len(rhyme("แมว")) > 2)
 
+    def test_remove_repeat_consonants(self):
+        # update of pythainlp.copus.thai_words() able to break this
+        self.assertEqual(
+            remove_repeat_consonants('เริ่ดดดดดดดด'),
+            'เริ่ด'
+        )
+        self.assertEqual(
+            remove_repeat_consonants('อืมมมมมมมมมมมมมมม'),
+            'อืมมม'
+        )
+
+        custom_dictionary = dict_trie(["อืมมมมม"])
+        self.assertEqual(
+            remove_repeat_consonants('อืมมมมมมมมมมมมมมม', custom_dictionary),
+            'อืมมมมม'
+        )
+
+        self.assertEqual(
+            remove_repeat_consonants(
+                'อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '
+                'ฉันจะให้เกรดดีกับคุณณณ\nนี่เป็นความลับบบบบ'
+            ),
+            'ออืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ'
+        )
+
     # def test_abbreviation_to_full_text(self):
     #     self.assertIsInstance(abbreviation_to_full_text("รร.ของเราน่าอยู่", list))

From ca6cd941d9e9edfc43dc6fcaf5ac9678d9cc9d03 Mon Sep 17 00:00:00 2001
From: Konbraphat <101827492+konbraphat51@users.noreply.github.com>
Date: Fri, 10 Nov 2023 01:35:21 +0900
Subject: [PATCH 04/36] Add: remove_repeat_consonants()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

function to remove consonants
เริ่ดดดดดดดด -> เริ่ด

implementation + test code written.
Test passed
---
 pythainlp/util/normalize.py | 120 +++++++++++++++++++++++++++++++++++-
 tests/test_util.py          |  41 ++++++++++--
 2 files changed, 154 insertions(+), 7 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index a8cacae22..2beac8a38 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -23,8 +23,11 @@
 from pythainlp import thai_follow_vowels as follow_v
 from pythainlp import thai_lead_vowels as lead_v
 from pythainlp import thai_tonemarks as tonemarks
+from pythainlp import thai_consonants as consonants
 from pythainlp.tokenize import word_tokenize
-
+from pythainlp.corpus import thai_words
+from pythainlp.util import isthaichar
+from pythainlp.util.trie import Trie
 
 _DANGLING_CHARS = f"{above_v}{below_v}{tonemarks}\u0e3a\u0e4c\u0e4d\u0e4e"
 _RE_REMOVE_DANGLINGS = re.compile(f"^[{_DANGLING_CHARS}]+")
@@ -218,6 +221,121 @@ def remove_repeat_vowels(text: str) -> str:
     return text
 
 
+def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
+    """
+    Remove repeating consonants at the last of the sentence.
+
+    This function will remove the repeating consonants
+    before a whitespace, new line or at the last 
+    so that the last word matches a word in the given dictionary.
+    If there is no match, the repeating consonants will be
+    reduced to one.
+    If there are several match, the longest word will be used.
+    Since this function uses a dictionary, the result may differs
+    depending on the dictionary used.
+    Plus, it is recommended to use normalize() to have a better result.
+
+    :param str text: input text
+    :param Trie dictionary: Trie dictionary to check the last word.
+    If None, pythainlp.corpus.thai_words() will be used
+    :return: text without repeating Thai consonants
+    :rtype: str
+
+    :Example:
+    ::
+
+        from pythainlp.util import remove_repeat_consonants
+        from pythainlp.util import dict_trie
+
+        # use default dictionary (pythainlp.corpus.thai_words())
+        remove_repeat_consonants('เริ่ดดดดดดดด')
+        # output: เริ่ด
+
+        remove_repeat_consonants('อืมมมมมมมมมมมมมมม')
+        # output: อืมมม
+        # "อืมมม" is in the default dictionary
+
+        # use custom dictionary
+        custom_dictionary = dict_trie(["อืมมมมม"])
+        remove_repeat_consonants('อืมมมมมมมมมมมมมมม', custom_dictionary)
+        # output: อืมมมมม
+
+        # long text
+        remove_repeat_consonants('อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '\
+        'ฉันจะให้เกรดดีกับคุณณณ\nนี่เป็นความลับบบบบ')
+        # output: อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ
+    """
+    # use default dictionary if not given
+    if dictionary is None:
+        dictionary = thai_words()
+
+    # seperate by newline
+    modified_lines = []
+    for line in text.split("\n"):
+        segments = line.split(" ")
+
+        for segment in segments:
+            # skip if the segment is not the target
+            if (not
+                ((len(segment) > 1)  # the segment is long enough
+                 and (isthaichar(segment[-1]))   # the last is Thai
+                 and (segment[-1] == segment[-2]))):  # has repiitition
+
+                # skip
+                continue
+
+            # duplicating character
+            dup = segment[-1]
+
+            # find the words that has 2 or more duplication of
+            # this character at the end.
+            # TODO: This maybe slow if the dictionary is large.
+            #       If the dictionary not changed, this could be done
+            #       only once in the kernel.
+            #       But it will requires a global variable.
+            repeaters = []
+            for word in dictionary:
+                if (len(word) > 1) and (word[-1] == word[-2] == dup):
+                    repeaters.append(word)
+
+            # remove all of the last repeating character
+            segment_head = segment
+            while ((len(segment) > 0) and (segment[-1] == dup)):
+                segment = segment[:-1]
+
+            # find the longest word that matches the segment
+            longest_word = ""
+            repetition = 0
+            for repeater in repeaters:
+                # remove all of the last repeating character
+                repeater_head = repeater
+                while ((len(repeater) > 0) and (repeater[-1] == dup)):
+                    repeater = repeater[:-1]
+
+                # check match
+                if ((len(segment) >= len(repeater))
+                        and (segment[-len(repeater):] == repeater)):
+                    # matched
+                    if len(repeater) > len(longest_word):
+                        longest_word = repeater
+
+            if len(longest_word) > 0:
+                # if there is a match, use it
+                segment = segment_head + (dup * repetition)
+            else:
+                # if none found, make the repition to once
+                segment = segment_head + (dup * 1)
+
+        # revert spaces
+        modified_line = " ".join(segments)
+        modified_lines.append(modified_line)
+
+    # revert newlines
+    modified_text = "\n".join(modified_lines)
+
+    return modified_text
+
+
 def normalize(text: str) -> str:
     """
     Normalize and clean Thai text with normalizing rules as follows:
diff --git a/tests/test_util.py b/tests/test_util.py
index 1840e2dc0..b3165e25a 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -60,6 +60,7 @@
     ipa_to_rtgs,
     remove_tone_ipa,
     tis620_to_utf8,
+    remove_repeat_consonants
 )
 from pythainlp.util.spell_words import spell_word
 
@@ -832,7 +833,8 @@ def test_convert_years(self):
         self.assertEqual(convert_years("242", src="re", target="ad"), "2023")
         self.assertEqual(convert_years("242", src="re", target="ah"), "1444")
         with self.assertRaises(NotImplementedError):
-            self.assertIsNotNone(convert_years("2023", src="cat", target="dog"))
+            self.assertIsNotNone(convert_years(
+                "2023", src="cat", target="dog"))
 
     def test_nectec_to_ipa(self):
         self.assertEqual(nectec_to_ipa("kl-uua-j^-2"), 'kl uua j ˥˩')
@@ -846,17 +848,44 @@ def test_remove_tone_ipa(self):
         self.assertEqual(remove_tone_ipa("laː˦˥.sa˨˩.maj˩˩˦"), "laː.sa.maj")
 
     def test_tis620_to_utf8(self):
-        self.assertEqual(tis620_to_utf8("¡ÃÐ·ÃÇ§ÍØµÊÒË¡ÃÃÁ"), "กระทรวงอุตสาหกรรม")
+        self.assertEqual(tis620_to_utf8(
+            "¡ÃÐ·ÃÇ§ÍØµÊÒË¡ÃÃÁ"), "กระทรวงอุตสาหกรรม")
 
     def test_spell_word(self):
-        self.assertEqual(spell_word("เสือ"),['สอ', 'เอือ', 'เสือ'])
-        self.assertEqual(spell_word("เสื้อ"),['สอ', 'เอือ', 'ไม้โท', 'เสื้อ'])
-        self.assertEqual(spell_word("คน"),['คอ', 'นอ', 'คน'])
-        self.assertEqual(spell_word("คนดี"),['คอ', 'นอ', 'คน', 'ดอ', 'อี', 'ดี', 'คนดี'])
+        self.assertEqual(spell_word("เสือ"), ['สอ', 'เอือ', 'เสือ'])
+        self.assertEqual(spell_word("เสื้อ"), ['สอ', 'เอือ', 'ไม้โท', 'เสื้อ'])
+        self.assertEqual(spell_word("คน"), ['คอ', 'นอ', 'คน'])
+        self.assertEqual(spell_word("คนดี"), [
+                         'คอ', 'นอ', 'คน', 'ดอ', 'อี', 'ดี', 'คนดี'])
 
     def test_rhyme(self):
         self.assertIsInstance(rhyme("แมว"), list)
         self.assertTrue(len(rhyme("แมว")) > 2)
 
+    def test_remove_repeat_consonants(self):
+        # update of pythainlp.copus.thai_words() able to break this
+        self.assertEqual(
+            remove_repeat_consonants('เริ่ดดดดดดดด'),
+            'เริ่ด'
+        )
+        self.assertEqual(
+            remove_repeat_consonants('อืมมมมมมมมมมมมมมม'),
+            'อืมมม'
+        )
+
+        custom_dictionary = dict_trie(["อืมมมมม"])
+        self.assertEqual(
+            remove_repeat_consonants('อืมมมมมมมมมมมมมมม', custom_dictionary),
+            'อืมมมมม'
+        )
+
+        self.assertEqual(
+            remove_repeat_consonants(
+                'อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '
+                'ฉันจะให้เกรดดีกับคุณณณ\nนี่เป็นความลับบบบบ'
+            ),
+            'ออืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ'
+        )
+
     # def test_abbreviation_to_full_text(self):
     #     self.assertIsInstance(abbreviation_to_full_text("รร.ของเราน่าอยู่", list))

From 702be9a3cd60629b045808b803e39e39532b23a1 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 01:48:41 +0900
Subject: [PATCH 05/36] Fix: push miss

this is the right commit
---
 pythainlp/util/normalize.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 2beac8a38..a2614aa21 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -274,7 +274,9 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
     for line in text.split("\n"):
         segments = line.split(" ")
 
-        for segment in segments:
+        for cnt in range(len(segments)):
+            segment = segments[cnt]
+
             # skip if the segment is not the target
             if (not
                 ((len(segment) > 1)  # the segment is long enough
@@ -300,32 +302,38 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
 
             # remove all of the last repeating character
             segment_head = segment
-            while ((len(segment) > 0) and (segment[-1] == dup)):
-                segment = segment[:-1]
+            while ((len(segment_head) > 0) and (segment_head[-1] == dup)):
+                segment_head = segment_head[:-1]
 
             # find the longest word that matches the segment
             longest_word = ""
-            repetition = 0
+            repetition = 0  # how much the last character is repeated correctly
             for repeater in repeaters:
                 # remove all of the last repeating character
                 repeater_head = repeater
-                while ((len(repeater) > 0) and (repeater[-1] == dup)):
-                    repeater = repeater[:-1]
+                while ((len(repeater_head) > 0) and (repeater_head[-1] == dup)):
+                    repeater_head = repeater_head[:-1]
 
                 # check match
-                if ((len(segment) >= len(repeater))
-                        and (segment[-len(repeater):] == repeater)):
+                if ((len(segment_head) >= len(repeater_head))
+                        and (segment_head[-len(repeater_head):] == repeater_head)):
                     # matched
                     if len(repeater) > len(longest_word):
                         longest_word = repeater
+                        repetition = len(repeater) - len(repeater_head)
 
             if len(longest_word) > 0:
                 # if there is a match, use it
                 segment = segment_head + (dup * repetition)
             else:
-                # if none found, make the repition to once
+                # if none found, the chance is that the correct is one character,
+                # or it's not in the dictionary.
+
+                # make the repition to once
                 segment = segment_head + (dup * 1)
 
+            segments[cnt] = segment
+
         # revert spaces
         modified_line = " ".join(segments)
         modified_lines.append(modified_line)

From 130b1ecf4e069e5887872bb009b309d967375ae0 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 01:53:23 +0900
Subject: [PATCH 06/36] Fix: divide the exceeding length code

---
 pythainlp/util/normalize.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index a2614aa21..c338e8105 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -263,7 +263,8 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
         # long text
         remove_repeat_consonants('อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '\
         'ฉันจะให้เกรดดีกับคุณณณ\nนี่เป็นความลับบบบบ')
-        # output: อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ
+        # output: อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ
+        #         นี่เป็นความลับ
     """
     # use default dictionary if not given
     if dictionary is None:
@@ -311,12 +312,14 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
             for repeater in repeaters:
                 # remove all of the last repeating character
                 repeater_head = repeater
-                while ((len(repeater_head) > 0) and (repeater_head[-1] == dup)):
+                while ((len(repeater_head) > 0)
+                        and (repeater_head[-1] == dup)):
                     repeater_head = repeater_head[:-1]
 
                 # check match
                 if ((len(segment_head) >= len(repeater_head))
-                        and (segment_head[-len(repeater_head):] == repeater_head)):
+                        and (segment_head[-len(repeater_head):]
+                             == repeater_head)):
                     # matched
                     if len(repeater) > len(longest_word):
                         longest_word = repeater
@@ -326,7 +329,8 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
                 # if there is a match, use it
                 segment = segment_head + (dup * repetition)
             else:
-                # if none found, the chance is that the correct is one character,
+                # if none found,
+                # the chance is that the correct is one character,
                 # or it's not in the dictionary.
 
                 # make the repition to once

From ef8ac0fd700da8934e18bb7617753201efdd045f Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 01:56:28 +0900
Subject: [PATCH 07/36] Refac: remove last white space

---
 pythainlp/util/normalize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index c338e8105..d9d7c309e 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -226,7 +226,7 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
     Remove repeating consonants at the last of the sentence.
 
     This function will remove the repeating consonants
-    before a whitespace, new line or at the last 
+    before a whitespace, new line or at the last
     so that the last word matches a word in the given dictionary.
     If there is no match, the repeating consonants will be
     reduced to one.

From 2df4d371644c99916bf77c839bacf786c0970ebf Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 01:58:13 +0900
Subject: [PATCH 08/36] Fix: restrict only to consonants

---
 pythainlp/util/normalize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index d9d7c309e..788f173c1 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -281,7 +281,7 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
             # skip if the segment is not the target
             if (not
                 ((len(segment) > 1)  # the segment is long enough
-                 and (isthaichar(segment[-1]))   # the last is Thai
+                 and (segment[-1] in consonants)   # last is Thai consonant
                  and (segment[-1] == segment[-2]))):  # has repiitition
 
                 # skip

From 16c3154aa2893236553ac74d836421f82b8768d2 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 01:59:21 +0900
Subject: [PATCH 09/36] Refac: Remove unused import

---
 pythainlp/util/normalize.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 788f173c1..259760759 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -26,7 +26,6 @@
 from pythainlp import thai_consonants as consonants
 from pythainlp.tokenize import word_tokenize
 from pythainlp.corpus import thai_words
-from pythainlp.util import isthaichar
 from pythainlp.util.trie import Trie
 
 _DANGLING_CHARS = f"{above_v}{below_v}{tonemarks}\u0e3a\u0e4c\u0e4d\u0e4e"

From cc62a95ba5a696935b1f18a3cb83ca9d20d95cd2 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 08:34:14 +0900
Subject: [PATCH 10/36] Refac: Use enumerate

pointed out by codacy
---
 pythainlp/util/normalize.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 259760759..8e97995ab 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -274,9 +274,7 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
     for line in text.split("\n"):
         segments = line.split(" ")
 
-        for cnt in range(len(segments)):
-            segment = segments[cnt]
-
+        for cnt, segment in enumerate(segments):
             # skip if the segment is not the target
             if (not
                 ((len(segment) > 1)  # the segment is long enough

From d74af323a11865ebd083eddecfb00f4ba7fa2561 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 08:36:13 +0900
Subject: [PATCH 11/36] Fix: add the function in init

---
 pythainlp/util/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
index 2b2ff40e4..05432ecea 100644
--- a/pythainlp/util/__init__.py
+++ b/pythainlp/util/__init__.py
@@ -102,6 +102,7 @@
     remove_tonemark,
     remove_zw,
     reorder_vowels,
+    remove_repeat_consonants
 )
 from pythainlp.util.numtoword import bahttext, num_to_thaiword
 from pythainlp.util.strftime import thai_strftime

From 5bfa50d552b962ba915c2c70e6ee357e510dfa0f Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 08:48:50 +0900
Subject: [PATCH 12/36] Refac: use black

---
 pythainlp/util/normalize.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 8e97995ab..858477f61 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -276,11 +276,14 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
 
         for cnt, segment in enumerate(segments):
             # skip if the segment is not the target
-            if (not
-                ((len(segment) > 1)  # the segment is long enough
-                 and (segment[-1] in consonants)   # last is Thai consonant
-                 and (segment[-1] == segment[-2]))):  # has repiitition
-
+            if not (
+                # the segment is long enough
+                (len(segment) > 1)
+                # last is Thai consonant
+                and (segment[-1] in consonants)
+                # has repiitition
+                and (segment[-1] == segment[-2])
+            ):
                 # skip
                 continue
 
@@ -300,7 +303,7 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
 
             # remove all of the last repeating character
             segment_head = segment
-            while ((len(segment_head) > 0) and (segment_head[-1] == dup)):
+            while (len(segment_head) > 0) and (segment_head[-1] == dup):
                 segment_head = segment_head[:-1]
 
             # find the longest word that matches the segment
@@ -309,14 +312,13 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
             for repeater in repeaters:
                 # remove all of the last repeating character
                 repeater_head = repeater
-                while ((len(repeater_head) > 0)
-                        and (repeater_head[-1] == dup)):
+                while (len(repeater_head) > 0) and (repeater_head[-1] == dup):
                     repeater_head = repeater_head[:-1]
 
                 # check match
-                if ((len(segment_head) >= len(repeater_head))
-                        and (segment_head[-len(repeater_head):]
-                             == repeater_head)):
+                if (len(segment_head) >= len(repeater_head)) and (
+                    segment_head[-len(repeater_head):] == repeater_head
+                ):
                     # matched
                     if len(repeater) > len(longest_word):
                         longest_word = repeater

From 28b6006693ccd0b4e9cb5b0c28b8119e51a32d27 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 08:52:24 +0900
Subject: [PATCH 13/36] Refac: repeatedly used black

"1 file left unchanged." shown
---
 pythainlp/util/normalize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 858477f61..9746689a6 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -317,7 +317,7 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
 
                 # check match
                 if (len(segment_head) >= len(repeater_head)) and (
-                    segment_head[-len(repeater_head):] == repeater_head
+                    segment_head[-len(repeater_head) :] == repeater_head
                 ):
                     # matched
                     if len(repeater) > len(longest_word):

From c6b564dad962caa3fda7a7d6ee0f1da41a6fd44f Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 09:01:01 +0900
Subject: [PATCH 14/36] Refac: resolve nested if

pointed out by codeclimate
---
 pythainlp/util/normalize.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 9746689a6..0fcfb9764 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -316,13 +316,14 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
                     repeater_head = repeater_head[:-1]
 
                 # check match
-                if (len(segment_head) >= len(repeater_head)) and (
-                    segment_head[-len(repeater_head) :] == repeater_head
+                if (
+                    (len(segment_head) >= len(repeater_head))
+                    and (segment_head[-len(repeater_head):] == repeater_head)
+                    # matched confirmed, check it's longer
+                    and (len(repeater) > len(longest_word))
                 ):
-                    # matched
-                    if len(repeater) > len(longest_word):
-                        longest_word = repeater
-                        repetition = len(repeater) - len(repeater_head)
+                    longest_word = repeater
+                    repetition = len(repeater) - len(repeater_head)
 
             if len(longest_word) > 0:
                 # if there is a match, use it

From 8d09323ac8b4d64e3ccdf8442cccc8ad5543bc28 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 10:45:38 +0900
Subject: [PATCH 15/36] Fix test case

---
 tests/test_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_util.py b/tests/test_util.py
index b3165e25a..3c1618201 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -884,7 +884,7 @@ def test_remove_repeat_consonants(self):
                 'อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '
                 'ฉันจะให้เกรดดีกับคุณณณ\nนี่เป็นความลับบบบบ'
             ),
-            'ออืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ'
+            'อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ'
         )
 
     # def test_abbreviation_to_full_text(self):

From 946f59c0ea61934a4291610c2345e407356b605e Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 10:56:39 +0900
Subject: [PATCH 16/36] Refac: seperate function

Cognitive complexity pointed out by CodeClimate

Black used
---
 pythainlp/util/normalize.py | 144 ++++++++++++++++++++----------------
 1 file changed, 79 insertions(+), 65 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 0fcfb9764..b3d36693a 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -51,9 +51,7 @@
 ]
 
 # VOWELS + Phinthu, Thanthakhat, Nikhahit, Yamakkan
-_NOREPEAT_CHARS = (
-    f"{follow_v}{lead_v}{above_v}{below_v}\u0e3a\u0e4c\u0e4d\u0e4e"
-)
+_NOREPEAT_CHARS = f"{follow_v}{lead_v}{above_v}{below_v}\u0e3a\u0e4c\u0e4d\u0e4e"
 _NOREPEAT_PAIRS = list(
     zip([f"({ch}[ ]*)+{ch}" for ch in _NOREPEAT_CHARS], _NOREPEAT_CHARS)
 )
@@ -275,68 +273,8 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
         segments = line.split(" ")
 
         for cnt, segment in enumerate(segments):
-            # skip if the segment is not the target
-            if not (
-                # the segment is long enough
-                (len(segment) > 1)
-                # last is Thai consonant
-                and (segment[-1] in consonants)
-                # has repiitition
-                and (segment[-1] == segment[-2])
-            ):
-                # skip
-                continue
-
-            # duplicating character
-            dup = segment[-1]
-
-            # find the words that has 2 or more duplication of
-            # this character at the end.
-            # TODO: This maybe slow if the dictionary is large.
-            #       If the dictionary not changed, this could be done
-            #       only once in the kernel.
-            #       But it will requires a global variable.
-            repeaters = []
-            for word in dictionary:
-                if (len(word) > 1) and (word[-1] == word[-2] == dup):
-                    repeaters.append(word)
-
-            # remove all of the last repeating character
-            segment_head = segment
-            while (len(segment_head) > 0) and (segment_head[-1] == dup):
-                segment_head = segment_head[:-1]
-
-            # find the longest word that matches the segment
-            longest_word = ""
-            repetition = 0  # how much the last character is repeated correctly
-            for repeater in repeaters:
-                # remove all of the last repeating character
-                repeater_head = repeater
-                while (len(repeater_head) > 0) and (repeater_head[-1] == dup):
-                    repeater_head = repeater_head[:-1]
-
-                # check match
-                if (
-                    (len(segment_head) >= len(repeater_head))
-                    and (segment_head[-len(repeater_head):] == repeater_head)
-                    # matched confirmed, check it's longer
-                    and (len(repeater) > len(longest_word))
-                ):
-                    longest_word = repeater
-                    repetition = len(repeater) - len(repeater_head)
-
-            if len(longest_word) > 0:
-                # if there is a match, use it
-                segment = segment_head + (dup * repetition)
-            else:
-                # if none found,
-                # the chance is that the correct is one character,
-                # or it's not in the dictionary.
-
-                # make the repition to once
-                segment = segment_head + (dup * 1)
-
-            segments[cnt] = segment
+            segments[cnt] = _remove_repeat_consonants_from_segment(
+                segment, dictionary)
 
         # revert spaces
         modified_line = " ".join(segments)
@@ -348,6 +286,82 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
     return modified_text
 
 
+def _remove_repeat_consonants_from_segment(segment: str, dictionary: Trie) -> str:
+    """
+    Remove repeating consonants at the last of the segment.
+
+    This function process only at the last of the given text.
+    Details is same as remove_repeat_consonants().
+
+    :param str segment: segment of text
+    :param Trie dictionary: Trie dictionary to check the last word.
+    :return: segment without repeating Thai consonants
+    :rtype: str
+    """
+    # skip if the segment is not the target
+    if not (
+        # the segment is long enough
+        (len(segment) > 1)
+        # last is Thai consonant
+        and (segment[-1] in consonants)
+        # has repiitition
+        and (segment[-1] == segment[-2])
+    ):
+        # no need to process
+        return segment
+
+    # duplicating character
+    dup = segment[-1]
+
+    # find the words that has 2 or more duplication of
+    # this character at the end.
+    # TODO: This maybe slow if the dictionary is large.
+    #       If the dictionary not changed, this could be done
+    #       only once in the kernel.
+    #       But it will requires a global variable.
+    repeaters = []
+    for word in dictionary:
+        if (len(word) > 1) and (word[-1] == word[-2] == dup):
+            repeaters.append(word)
+
+    # remove all of the last repeating character
+    segment_head = segment
+    while (len(segment_head) > 0) and (segment_head[-1] == dup):
+        segment_head = segment_head[:-1]
+
+    # find the longest word that matches the segment
+    longest_word = ""
+    repetition = 0  # how much the last character is repeated correctly
+    for repeater in repeaters:
+        # remove all of the last repeating character
+        repeater_head = repeater
+        while (len(repeater_head) > 0) and (repeater_head[-1] == dup):
+            repeater_head = repeater_head[:-1]
+
+        # check match
+        if (
+            (len(segment_head) >= len(repeater_head))
+            and (segment_head[-len(repeater_head):] == repeater_head)
+            # matched confirmed, check it's longer
+            and (len(repeater) > len(longest_word))
+        ):
+            longest_word = repeater
+            repetition = len(repeater) - len(repeater_head)
+
+    if len(longest_word) > 0:
+        # if there is a match, use it
+        segment = segment_head + (dup * repetition)
+    else:
+        # if none found,
+        # the chance is that the correct is one character,
+        # or it's not in the dictionary.
+
+        # make the repition to once
+        segment = segment_head + (dup * 1)
+
+    return segment
+
+
 def normalize(text: str) -> str:
     """
     Normalize and clean Thai text with normalizing rules as follows:

From a5153e0b57f56f040423352d3aacb5a38ff6b4d4 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 10:58:45 +0900
Subject: [PATCH 17/36] Refac: reduce line length

pointed out by Lint
black used
---
 pythainlp/util/normalize.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index b3d36693a..2c56f813c 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -51,7 +51,9 @@
 ]
 
 # VOWELS + Phinthu, Thanthakhat, Nikhahit, Yamakkan
-_NOREPEAT_CHARS = f"{follow_v}{lead_v}{above_v}{below_v}\u0e3a\u0e4c\u0e4d\u0e4e"
+_NOREPEAT_CHARS = (
+    f"{follow_v}{lead_v}{above_v}{below_v}\u0e3a\u0e4c\u0e4d\u0e4e"
+)
 _NOREPEAT_PAIRS = list(
     zip([f"({ch}[ ]*)+{ch}" for ch in _NOREPEAT_CHARS], _NOREPEAT_CHARS)
 )
@@ -274,7 +276,8 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
 
         for cnt, segment in enumerate(segments):
             segments[cnt] = _remove_repeat_consonants_from_segment(
-                segment, dictionary)
+                segment, dictionary
+            )
 
         # revert spaces
         modified_line = " ".join(segments)
@@ -286,7 +289,9 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
     return modified_text
 
 
-def _remove_repeat_consonants_from_segment(segment: str, dictionary: Trie) -> str:
+def _remove_repeat_consonants_from_segment(
+    segment: str, dictionary: Trie
+) -> str:
     """
     Remove repeating consonants at the last of the segment.
 

From 43dfd25f742fee532c4ef2a366761447d9389f91 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 11:08:19 +0900
Subject: [PATCH 18/36] Refac: seperate 2 functions

---
 pythainlp/util/normalize.py | 54 ++++++++++++++++++++++++++++++-------
 1 file changed, 44 insertions(+), 10 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 2c56f813c..67948cce6 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -324,24 +324,17 @@ def _remove_repeat_consonants_from_segment(
     #       If the dictionary not changed, this could be done
     #       only once in the kernel.
     #       But it will requires a global variable.
-    repeaters = []
-    for word in dictionary:
-        if (len(word) > 1) and (word[-1] == word[-2] == dup):
-            repeaters.append(word)
+    repeaters = _get_all_last_consonant_repeaters(dup, dictionary)
 
     # remove all of the last repeating character
-    segment_head = segment
-    while (len(segment_head) > 0) and (segment_head[-1] == dup):
-        segment_head = segment_head[:-1]
+    segment_head = _get_repitition_head(segment, dup)
 
     # find the longest word that matches the segment
     longest_word = ""
     repetition = 0  # how much the last character is repeated correctly
     for repeater in repeaters:
         # remove all of the last repeating character
-        repeater_head = repeater
-        while (len(repeater_head) > 0) and (repeater_head[-1] == dup):
-            repeater_head = repeater_head[:-1]
+        repeater_head = _get_repitition_head(repeater, dup)
 
         # check match
         if (
@@ -367,6 +360,47 @@ def _remove_repeat_consonants_from_segment(
     return segment
 
 
+def _get_repitition_head(text: str, dup: str) -> str:
+    """
+    Reduce repeating characters at the end of the text.
+
+    This function will remove the repeating characters at the last.
+    The text just before the repeating characters will be returned.
+
+    :param str text: input text
+    :param str dup: repeating character to be removed
+    :return: text without repeating characters at the end
+    :rtype: str
+    """
+    head = text
+    while (len(head) > 0) and (head[-1] == dup):
+        head = head[:-1]
+
+    return head
+
+
+def _get_all_last_consonant_repeaters(
+    consonant: str, dictionary: Trie
+) -> List[str]:
+    """
+    Get all words that has repeating consonants at the end from the dictionary.
+
+    Search all words in the dictionary that has more than 1 given consonants
+    repeating at the end.
+
+    :param str consonant: consonant to be searched
+    :param Trie dictionary: Trie dictionary to search
+    :return: list of words that has repeating consonants at the end
+    :rtype: List[str]
+    """
+    repeaters = []
+    for word in dictionary:
+        if (len(word) > 1) and (word[-1] == word[-2] == consonant):
+            repeaters.append(word)
+
+    return repeaters
+
+
 def normalize(text: str) -> str:
     """
     Normalize and clean Thai text with normalizing rules as follows:

From d9ae5343a7069e02de73121528555e54f6d07327 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 11:11:05 +0900
Subject: [PATCH 19/36] Refac: use black

vscode autopep8 and black has been conflicting. So autopep8 cutted
---
 pythainlp/util/normalize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 67948cce6..c25b6c58a 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -339,7 +339,7 @@ def _remove_repeat_consonants_from_segment(
         # check match
         if (
             (len(segment_head) >= len(repeater_head))
-            and (segment_head[-len(repeater_head):] == repeater_head)
+            and (segment_head[-len(repeater_head) :] == repeater_head)
             # matched confirmed, check it's longer
             and (len(repeater) > len(longest_word))
         ):

From 844c21d8f09ad357216ff5d6fea08d801737a662 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 11:27:04 +0900
Subject: [PATCH 20/36] Refac: seperate match finding method

cognitive complexity pointed out by CodeClimate.
Black used.
---
 pythainlp/util/normalize.py | 58 +++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index c25b6c58a..76dc8f3a0 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -16,7 +16,7 @@
 Text normalization
 """
 import re
-from typing import List, Union
+from typing import List, Tuple, Union
 
 from pythainlp import thai_above_vowels as above_v
 from pythainlp import thai_below_vowels as below_v
@@ -330,21 +330,9 @@ def _remove_repeat_consonants_from_segment(
     segment_head = _get_repitition_head(segment, dup)
 
     # find the longest word that matches the segment
-    longest_word = ""
-    repetition = 0  # how much the last character is repeated correctly
-    for repeater in repeaters:
-        # remove all of the last repeating character
-        repeater_head = _get_repitition_head(repeater, dup)
-
-        # check match
-        if (
-            (len(segment_head) >= len(repeater_head))
-            and (segment_head[-len(repeater_head) :] == repeater_head)
-            # matched confirmed, check it's longer
-            and (len(repeater) > len(longest_word))
-        ):
-            longest_word = repeater
-            repetition = len(repeater) - len(repeater_head)
+    longest_word, repetition = _find_longest_consonant_repeaters_match(
+        segment_head, repeaters
+    )
 
     if len(longest_word) > 0:
         # if there is a match, use it
@@ -401,6 +389,44 @@ def _get_all_last_consonant_repeaters(
     return repeaters
 
 
+def _find_longest_consonant_repeaters_match(
+    segment_head: str, repeaters: List[str]
+) -> Tuple[str, int]:
+    """
+    Find the longest word that matches the segment.
+
+    Find the longest word that matches the last
+    of the segment from the given repeaters list.
+    This returns the word and
+    how much the last character is repeated correctly.
+
+    :param str segment: segment of text
+    :param List[str] repeaters: list of words
+    that has repeating consonants at the end
+    :return: "tuple of the word" and
+    "how much the last character is repeated correctly"
+    If none, ("", 0) will be returned.
+    :rtype: Tuple[str, int]
+    """
+    longest_word = ""  # the longest word that matches the segment
+    repetition = 0  # how much the last character is repeated correctly
+    for repeater in repeaters:
+        # remove all of the last repeating character
+        repeater_head = _get_repitition_head(repeater, repeater[-1])
+
+        # check match
+        if (
+            (len(segment_head) >= len(repeater_head))
+            and (segment_head[-len(repeater_head) :] == repeater_head)
+            # matched confirmed, check it's longer
+            and (len(repeater) > len(longest_word))
+        ):
+            longest_word = repeater
+            repetition = len(repeater) - len(repeater_head)
+
+    return longest_word, repetition
+
+
 def normalize(text: str) -> str:
     """
     Normalize and clean Thai text with normalizing rules as follows:

From 1e1631f3378cb0965d6730eb3c7da62fdd7cd825 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 11:43:32 +0900
Subject: [PATCH 21/36] Improve: save consonants repeaters for improve speed

TODO resolved, black used, test passed
---
 pythainlp/util/normalize.py | 55 +++++++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 76dc8f3a0..4939ad062 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -33,6 +33,14 @@
 
 _ZERO_WIDTH_CHARS = "\u200b\u200c"  # ZWSP, ZWNJ
 
+# used by remove_repeat_consonants()
+# contains all words that has repeating consonants at the end
+# for each consonant
+# when dictionary updated, this should be updated too
+# key: consonant
+# value: list of words that has repeating consonants at the end
+consonants_repeaters = {}
+
 _REORDER_PAIRS = [
     ("\u0e40\u0e40", "\u0e41"),  # Sara E + Sara E -> Sara Ae
     (
@@ -220,7 +228,9 @@ def remove_repeat_vowels(text: str) -> str:
     return text
 
 
-def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
+def remove_repeat_consonants(
+    text: str, dictionary: Trie = None, dictionary_updated: bool = True
+) -> str:
     """
     Remove repeating consonants at the last of the sentence.
 
@@ -237,6 +247,9 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
     :param str text: input text
     :param Trie dictionary: Trie dictionary to check the last word.
     If None, pythainlp.corpus.thai_words() will be used
+    :param bool dictionary_updated: If the dictionary is updated 
+    or the first time using in the kernel, set this true.
+    If not, set this false to save time.
     :return: text without repeating Thai consonants
     :rtype: str
 
@@ -269,6 +282,10 @@ def remove_repeat_consonants(text: str, dictionary: Trie = None) -> str:
     if dictionary is None:
         dictionary = thai_words()
 
+    # update repeaters dictionary if not updated
+    if dictionary_updated:
+        _update_consonant_repeaters(dictionary)
+
     # seperate by newline
     modified_lines = []
     for line in text.split("\n"):
@@ -320,11 +337,7 @@ def _remove_repeat_consonants_from_segment(
 
     # find the words that has 2 or more duplication of
     # this character at the end.
-    # TODO: This maybe slow if the dictionary is large.
-    #       If the dictionary not changed, this could be done
-    #       only once in the kernel.
-    #       But it will requires a global variable.
-    repeaters = _get_all_last_consonant_repeaters(dup, dictionary)
+    repeaters = consonants_repeaters[dup]
 
     # remove all of the last repeating character
     segment_head = _get_repitition_head(segment, dup)
@@ -367,26 +380,32 @@ def _get_repitition_head(text: str, dup: str) -> str:
     return head
 
 
-def _get_all_last_consonant_repeaters(
-    consonant: str, dictionary: Trie
-) -> List[str]:
+def _update_consonant_repeaters(dictionary: Trie) -> None:
     """
-    Get all words that has repeating consonants at the end from the dictionary.
+    Update dictionary of all words that has
+    repeating consonants at the end from the dictionary.
 
-    Search all words in the dictionary that has more than 1 given consonants
-    repeating at the end.
+    Search all words in the dictionary that has more than 1 consonants
+    repeating at the end and store them in the global dictionary.
 
     :param str consonant: consonant to be searched
     :param Trie dictionary: Trie dictionary to search
-    :return: list of words that has repeating consonants at the end
-    :rtype: List[str]
+    :rtype: None
     """
-    repeaters = []
+    # initialize dictionary
+    for consonant in list(consonants):
+        consonants_repeaters[consonant] = []
+
+    # register
     for word in dictionary:
-        if (len(word) > 1) and (word[-1] == word[-2] == consonant):
-            repeaters.append(word)
+        if (
+            (len(word) > 1)
+            and (word[-1] == word[-2])
+            and (word[-1] in consonants)
+        ):
+            consonants_repeaters[word[-1]].append(word)
 
-    return repeaters
+    return
 
 
 def _find_longest_consonant_repeaters_match(

From ceb9d76a4fe935e4bd8f46dabebaccfdfa9938f0 Mon Sep 17 00:00:00 2001
From: konbraphat51 <brightray48@zeus.eonet.ne.jp>
Date: Fri, 10 Nov 2023 11:51:40 +0900
Subject: [PATCH 22/36] Refac: make repeater checking function

Code complexity pointed out by CodeClimate, black used
---
 pythainlp/util/normalize.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 4939ad062..9c47f407e 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -398,16 +398,28 @@ def _update_consonant_repeaters(dictionary: Trie) -> None:
 
     # register
     for word in dictionary:
-        if (
-            (len(word) > 1)
-            and (word[-1] == word[-2])
-            and (word[-1] in consonants)
-        ):
+        if _is_consonant_repeater(word):
             consonants_repeaters[word[-1]].append(word)
 
     return
 
 
+def _is_consonant_repeater(word: str) -> bool:
+    """
+    Check if the word has repeating consonants at the end.
+
+    This function checks if the word has
+    more than 1 repeating consonants at the end.
+
+    :param str word: word to be checked
+    :return: True if the word has repeating consonants at the end.
+    :rtype: bool
+    """
+    return (
+        (len(word) > 1) and (word[-1] == word[-2]) and (word[-1] in consonants)
+    )
+
+
 def _find_longest_consonant_repeaters_match(
     segment_head: str, repeaters: List[str]
 ) -> Tuple[str, int]:

From 6509e0da3478bf01a2cf248d96f00b8030c6fae9 Mon Sep 17 00:00:00 2001
From: konbraphat51 <konbraphat@gmail.com>
Date: Sat, 11 Nov 2023 23:11:16 +0900
Subject: [PATCH 23/36] Refac: seperate function

---
 pythainlp/util/__init__.py               |   2 +-
 pythainlp/util/normalize.py              | 248 +---------------------
 pythainlp/util/removerepeatconsonants.py | 253 +++++++++++++++++++++++
 3 files changed, 257 insertions(+), 246 deletions(-)
 create mode 100644 pythainlp/util/removerepeatconsonants.py

diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
index 05432ecea..99bc46621 100644
--- a/pythainlp/util/__init__.py
+++ b/pythainlp/util/__init__.py
@@ -102,8 +102,8 @@
     remove_tonemark,
     remove_zw,
     reorder_vowels,
-    remove_repeat_consonants
 )
+from pythainlp.util.removerepeatconsonants import remove_repeat_consonants
 from pythainlp.util.numtoword import bahttext, num_to_thaiword
 from pythainlp.util.strftime import thai_strftime
 from pythainlp.util.thai import (
diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 9c47f407e..825ed79eb 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -16,31 +16,21 @@
 Text normalization
 """
 import re
-from typing import List, Tuple, Union
+from typing import List, Union
 
 from pythainlp import thai_above_vowels as above_v
 from pythainlp import thai_below_vowels as below_v
 from pythainlp import thai_follow_vowels as follow_v
 from pythainlp import thai_lead_vowels as lead_v
 from pythainlp import thai_tonemarks as tonemarks
-from pythainlp import thai_consonants as consonants
 from pythainlp.tokenize import word_tokenize
-from pythainlp.corpus import thai_words
-from pythainlp.util.trie import Trie
+
 
 _DANGLING_CHARS = f"{above_v}{below_v}{tonemarks}\u0e3a\u0e4c\u0e4d\u0e4e"
 _RE_REMOVE_DANGLINGS = re.compile(f"^[{_DANGLING_CHARS}]+")
 
 _ZERO_WIDTH_CHARS = "\u200b\u200c"  # ZWSP, ZWNJ
 
-# used by remove_repeat_consonants()
-# contains all words that has repeating consonants at the end
-# for each consonant
-# when dictionary updated, this should be updated too
-# key: consonant
-# value: list of words that has repeating consonants at the end
-consonants_repeaters = {}
-
 _REORDER_PAIRS = [
     ("\u0e40\u0e40", "\u0e41"),  # Sara E + Sara E -> Sara Ae
     (
@@ -59,9 +49,7 @@
 ]
 
 # VOWELS + Phinthu, Thanthakhat, Nikhahit, Yamakkan
-_NOREPEAT_CHARS = (
-    f"{follow_v}{lead_v}{above_v}{below_v}\u0e3a\u0e4c\u0e4d\u0e4e"
-)
+_NOREPEAT_CHARS = f"{follow_v}{lead_v}{above_v}{below_v}\u0e3a\u0e4c\u0e4d\u0e4e"
 _NOREPEAT_PAIRS = list(
     zip([f"({ch}[ ]*)+{ch}" for ch in _NOREPEAT_CHARS], _NOREPEAT_CHARS)
 )
@@ -228,236 +216,6 @@ def remove_repeat_vowels(text: str) -> str:
     return text
 
 
-def remove_repeat_consonants(
-    text: str, dictionary: Trie = None, dictionary_updated: bool = True
-) -> str:
-    """
-    Remove repeating consonants at the last of the sentence.
-
-    This function will remove the repeating consonants
-    before a whitespace, new line or at the last
-    so that the last word matches a word in the given dictionary.
-    If there is no match, the repeating consonants will be
-    reduced to one.
-    If there are several match, the longest word will be used.
-    Since this function uses a dictionary, the result may differs
-    depending on the dictionary used.
-    Plus, it is recommended to use normalize() to have a better result.
-
-    :param str text: input text
-    :param Trie dictionary: Trie dictionary to check the last word.
-    If None, pythainlp.corpus.thai_words() will be used
-    :param bool dictionary_updated: If the dictionary is updated 
-    or the first time using in the kernel, set this true.
-    If not, set this false to save time.
-    :return: text without repeating Thai consonants
-    :rtype: str
-
-    :Example:
-    ::
-
-        from pythainlp.util import remove_repeat_consonants
-        from pythainlp.util import dict_trie
-
-        # use default dictionary (pythainlp.corpus.thai_words())
-        remove_repeat_consonants('เริ่ดดดดดดดด')
-        # output: เริ่ด
-
-        remove_repeat_consonants('อืมมมมมมมมมมมมมมม')
-        # output: อืมมม
-        # "อืมมม" is in the default dictionary
-
-        # use custom dictionary
-        custom_dictionary = dict_trie(["อืมมมมม"])
-        remove_repeat_consonants('อืมมมมมมมมมมมมมมม', custom_dictionary)
-        # output: อืมมมมม
-
-        # long text
-        remove_repeat_consonants('อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '\
-        'ฉันจะให้เกรดดีกับคุณณณ\nนี่เป็นความลับบบบบ')
-        # output: อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ
-        #         นี่เป็นความลับ
-    """
-    # use default dictionary if not given
-    if dictionary is None:
-        dictionary = thai_words()
-
-    # update repeaters dictionary if not updated
-    if dictionary_updated:
-        _update_consonant_repeaters(dictionary)
-
-    # seperate by newline
-    modified_lines = []
-    for line in text.split("\n"):
-        segments = line.split(" ")
-
-        for cnt, segment in enumerate(segments):
-            segments[cnt] = _remove_repeat_consonants_from_segment(
-                segment, dictionary
-            )
-
-        # revert spaces
-        modified_line = " ".join(segments)
-        modified_lines.append(modified_line)
-
-    # revert newlines
-    modified_text = "\n".join(modified_lines)
-
-    return modified_text
-
-
-def _remove_repeat_consonants_from_segment(
-    segment: str, dictionary: Trie
-) -> str:
-    """
-    Remove repeating consonants at the last of the segment.
-
-    This function process only at the last of the given text.
-    Details is same as remove_repeat_consonants().
-
-    :param str segment: segment of text
-    :param Trie dictionary: Trie dictionary to check the last word.
-    :return: segment without repeating Thai consonants
-    :rtype: str
-    """
-    # skip if the segment is not the target
-    if not (
-        # the segment is long enough
-        (len(segment) > 1)
-        # last is Thai consonant
-        and (segment[-1] in consonants)
-        # has repiitition
-        and (segment[-1] == segment[-2])
-    ):
-        # no need to process
-        return segment
-
-    # duplicating character
-    dup = segment[-1]
-
-    # find the words that has 2 or more duplication of
-    # this character at the end.
-    repeaters = consonants_repeaters[dup]
-
-    # remove all of the last repeating character
-    segment_head = _get_repitition_head(segment, dup)
-
-    # find the longest word that matches the segment
-    longest_word, repetition = _find_longest_consonant_repeaters_match(
-        segment_head, repeaters
-    )
-
-    if len(longest_word) > 0:
-        # if there is a match, use it
-        segment = segment_head + (dup * repetition)
-    else:
-        # if none found,
-        # the chance is that the correct is one character,
-        # or it's not in the dictionary.
-
-        # make the repition to once
-        segment = segment_head + (dup * 1)
-
-    return segment
-
-
-def _get_repitition_head(text: str, dup: str) -> str:
-    """
-    Reduce repeating characters at the end of the text.
-
-    This function will remove the repeating characters at the last.
-    The text just before the repeating characters will be returned.
-
-    :param str text: input text
-    :param str dup: repeating character to be removed
-    :return: text without repeating characters at the end
-    :rtype: str
-    """
-    head = text
-    while (len(head) > 0) and (head[-1] == dup):
-        head = head[:-1]
-
-    return head
-
-
-def _update_consonant_repeaters(dictionary: Trie) -> None:
-    """
-    Update dictionary of all words that has
-    repeating consonants at the end from the dictionary.
-
-    Search all words in the dictionary that has more than 1 consonants
-    repeating at the end and store them in the global dictionary.
-
-    :param str consonant: consonant to be searched
-    :param Trie dictionary: Trie dictionary to search
-    :rtype: None
-    """
-    # initialize dictionary
-    for consonant in list(consonants):
-        consonants_repeaters[consonant] = []
-
-    # register
-    for word in dictionary:
-        if _is_consonant_repeater(word):
-            consonants_repeaters[word[-1]].append(word)
-
-    return
-
-
-def _is_consonant_repeater(word: str) -> bool:
-    """
-    Check if the word has repeating consonants at the end.
-
-    This function checks if the word has
-    more than 1 repeating consonants at the end.
-
-    :param str word: word to be checked
-    :return: True if the word has repeating consonants at the end.
-    :rtype: bool
-    """
-    return (
-        (len(word) > 1) and (word[-1] == word[-2]) and (word[-1] in consonants)
-    )
-
-
-def _find_longest_consonant_repeaters_match(
-    segment_head: str, repeaters: List[str]
-) -> Tuple[str, int]:
-    """
-    Find the longest word that matches the segment.
-
-    Find the longest word that matches the last
-    of the segment from the given repeaters list.
-    This returns the word and
-    how much the last character is repeated correctly.
-
-    :param str segment: segment of text
-    :param List[str] repeaters: list of words
-    that has repeating consonants at the end
-    :return: "tuple of the word" and
-    "how much the last character is repeated correctly"
-    If none, ("", 0) will be returned.
-    :rtype: Tuple[str, int]
-    """
-    longest_word = ""  # the longest word that matches the segment
-    repetition = 0  # how much the last character is repeated correctly
-    for repeater in repeaters:
-        # remove all of the last repeating character
-        repeater_head = _get_repitition_head(repeater, repeater[-1])
-
-        # check match
-        if (
-            (len(segment_head) >= len(repeater_head))
-            and (segment_head[-len(repeater_head) :] == repeater_head)
-            # matched confirmed, check it's longer
-            and (len(repeater) > len(longest_word))
-        ):
-            longest_word = repeater
-            repetition = len(repeater) - len(repeater_head)
-
-    return longest_word, repetition
-
-
 def normalize(text: str) -> str:
     """
     Normalize and clean Thai text with normalizing rules as follows:
diff --git a/pythainlp/util/removerepeatconsonants.py b/pythainlp/util/removerepeatconsonants.py
new file mode 100644
index 000000000..966712b37
--- /dev/null
+++ b/pythainlp/util/removerepeatconsonants.py
@@ -0,0 +1,253 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2016-2023 PyThaiNLP Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Removement of repeated consonants
+"""
+from pythainlp.corpus import thai_words
+from pythainlp.util.trie import Trie
+from pythainlp import thai_consonants as consonants
+from typing import Tuple, List
+
+# used by remove_repeat_consonants()
+# contains all words that has repeating consonants at the end
+# for each consonant
+# when dictionary updated, this should be updated too
+# key: consonan
+# value: list of words that has repeating consonants at the end
+consonants_repeaters = {}
+
+
+def remove_repeat_consonants(
+    text: str, dictionary: Trie = None, dictionary_updated: bool = True
+) -> str:
+    """
+    Remove repeating consonants at the last of the sentence.
+
+    This function will remove the repeating consonants
+    before a whitespace, new line or at the last
+    so that the last word matches a word in the given dictionary.
+    If there is no match, the repeating consonants will be
+    reduced to one.
+    If there are several match, the longest word will be used.
+    Since this function uses a dictionary, the result may differs
+    depending on the dictionary used.
+    Plus, it is recommended to use normalize() to have a better result.
+
+    :param str text: input text
+    :param Trie dictionary: Trie dictionary to check the last word.
+    If None, pythainlp.corpus.thai_words() will be used
+    :param bool dictionary_updated: If the dictionary is updated 
+    or the first time using in the kernel, set this true.
+    If not, set this false to save time.
+    :return: text without repeating Thai consonants
+    :rtype: str
+
+    :Example:
+    ::
+
+        from pythainlp.util import remove_repeat_consonants
+        from pythainlp.util import dict_trie
+
+        # use default dictionary (pythainlp.corpus.thai_words())
+        remove_repeat_consonants('เริ่ดดดดดดดด')
+        # output: เริ่ด
+
+        remove_repeat_consonants('อืมมมมมมมมมมมมมมม')
+        # output: อืมมม
+        # "อืมมม" is in the default dictionary
+
+        # use custom dictionary
+        custom_dictionary = dict_trie(["อืมมมมม"])
+        remove_repeat_consonants('อืมมมมมมมมมมมมมมม', custom_dictionary)
+        # output: อืมมมมม
+
+        # long text
+        remove_repeat_consonants('อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '\
+        'ฉันจะให้เกรดดีกับคุณณณ\nนี่เป็นความลับบบบบ')
+        # output: อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ
+        #         นี่เป็นความลับ
+    """
+    # use default dictionary if not given
+    if dictionary is None:
+        dictionary = thai_words()
+
+    # update repeaters dictionary if not updated
+    if dictionary_updated:
+        _update_consonant_repeaters(dictionary)
+
+    # seperate by newline
+    modified_lines = []
+    for line in text.split("\n"):
+        segments = line.split(" ")
+
+        for cnt, segment in enumerate(segments):
+            segments[cnt] = _remove_repeat_consonants_from_segment(segment, dictionary)
+
+        # revert spaces
+        modified_line = " ".join(segments)
+        modified_lines.append(modified_line)
+
+    # revert newlines
+    modified_text = "\n".join(modified_lines)
+
+    return modified_text
+
+
+def _remove_repeat_consonants_from_segment(segment: str, dictionary: Trie) -> str:
+    """
+    Remove repeating consonants at the last of the segment.
+
+    This function process only at the last of the given text.
+    Details is same as remove_repeat_consonants().
+
+    :param str segment: segment of text
+    :param Trie dictionary: Trie dictionary to check the last word.
+    :return: segment without repeating Thai consonants
+    :rtype: str
+    """
+    # skip if the segment is not the target
+    if not (
+        # the segment is long enough
+        (len(segment) > 1)
+        # last is Thai consonant
+        and (segment[-1] in consonants)
+        # has repiitition
+        and (segment[-1] == segment[-2])
+    ):
+        # no need to process
+        return segment
+
+    # duplicating character
+    dup = segment[-1]
+
+    # find the words that has 2 or more duplication of
+    # this character at the end.
+    repeaters = consonants_repeaters[dup]
+
+    # remove all of the last repeating character
+    segment_head = _get_repitition_head(segment, dup)
+
+    # find the longest word that matches the segment
+    longest_word, repetition = _find_longest_consonant_repeaters_match(
+        segment_head, repeaters
+    )
+
+    if len(longest_word) > 0:
+        # if there is a match, use it
+        segment = segment_head + (dup * repetition)
+    else:
+        # if none found,
+        # the chance is that the correct is one character,
+        # or it's not in the dictionary.
+
+        # make the repition to once
+        segment = segment_head + (dup * 1)
+
+    return segment
+
+
+def _get_repitition_head(text: str, dup: str) -> str:
+    """
+    Reduce repeating characters at the end of the text.
+
+    This function will remove the repeating characters at the last.
+    The text just before the repeating characters will be returned.
+
+    :param str text: input text
+    :param str dup: repeating character to be removed
+    :return: text without repeating characters at the end
+    :rtype: str
+    """
+    head = text
+    while (len(head) > 0) and (head[-1] == dup):
+        head = head[:-1]
+
+    return head
+
+
+def _update_consonant_repeaters(dictionary: Trie) -> None:
+    """
+    Update dictionary of all words that has
+    repeating consonants at the end from the dictionary.
+
+    Search all words in the dictionary that has more than 1 consonants
+    repeating at the end and store them in the global dictionary.
+
+    :param str consonant: consonant to be searched
+    :param Trie dictionary: Trie dictionary to search
+    :rtype: None
+    """
+    # initialize dictionary
+    for consonant in list(consonants):
+        consonants_repeaters[consonant] = []
+
+    # register
+    for word in dictionary:
+        if _is_consonant_repeater(word):
+            consonants_repeaters[word[-1]].append(word)
+
+    return
+
+
+def _is_consonant_repeater(word: str) -> bool:
+    """
+    Check if the word has repeating consonants at the end.
+
+    This function checks if the word has
+    more than 1 repeating consonants at the end.
+
+    :param str word: word to be checked
+    :return: True if the word has repeating consonants at the end.
+    :rtype: bool
+    """
+    return (len(word) > 1) and (word[-1] == word[-2]) and (word[-1] in consonants)
+
+
+def _find_longest_consonant_repeaters_match(
+    segment_head: str, repeaters: List[str]
+) -> Tuple[str, int]:
+    """
+    Find the longest word that matches the segment.
+
+    Find the longest word that matches the last
+    of the segment from the given repeaters list.
+    This returns the word and
+    how much the last character is repeated correctly.
+
+    :param str segment: segment of text
+    :param List[str] repeaters: list of words
+    that has repeating consonants at the end
+    :return: "tuple of the word" and
+    "how much the last character is repeated correctly"
+    If none, ("", 0) will be returned.
+    :rtype: Tuple[str, int]
+    """
+    longest_word = ""  # the longest word that matches the segment
+    repetition = 0  # how much the last character is repeated correctly
+    for repeater in repeaters:
+        # remove all of the last repeating character
+        repeater_head = _get_repitition_head(repeater, repeater[-1])
+
+        # check match
+        if (
+            (len(segment_head) >= len(repeater_head))
+            and (segment_head[-len(repeater_head) :] == repeater_head)
+            # matched confirmed, check it's longer
+            and (len(repeater) > len(longest_word))
+        ):
+            longest_word = repeater
+            repetition = len(repeater) - len(repeater_head)
+
+    return longest_word, repetition

From 9c1a34ca39a3c11d6d27ca8c254c7fd92c93a2d6 Mon Sep 17 00:00:00 2001
From: konbraphat51 <konbraphat@gmail.com>
Date: Sat, 11 Nov 2023 23:15:59 +0900
Subject: [PATCH 24/36] Improve: Rename method

suggested by https://github.com/PyThaiNLP/pythainlp/pull/862#issuecomment-1805830606
---
 pythainlp/util/__init__.py               |  2 +-
 pythainlp/util/removerepeatconsonants.py |  2 +-
 tests/test_util.py                       | 10 +++++-----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
index 99bc46621..3b03f8367 100644
--- a/pythainlp/util/__init__.py
+++ b/pythainlp/util/__init__.py
@@ -103,7 +103,7 @@
     remove_zw,
     reorder_vowels,
 )
-from pythainlp.util.removerepeatconsonants import remove_repeat_consonants
+from pythainlp.util.removerepeatconsonants import remove_trailing_repeat_consonants
 from pythainlp.util.numtoword import bahttext, num_to_thaiword
 from pythainlp.util.strftime import thai_strftime
 from pythainlp.util.thai import (
diff --git a/pythainlp/util/removerepeatconsonants.py b/pythainlp/util/removerepeatconsonants.py
index 966712b37..32b0cf472 100644
--- a/pythainlp/util/removerepeatconsonants.py
+++ b/pythainlp/util/removerepeatconsonants.py
@@ -29,7 +29,7 @@
 consonants_repeaters = {}
 
 
-def remove_repeat_consonants(
+def remove_trailing_repeat_consonants(
     text: str, dictionary: Trie = None, dictionary_updated: bool = True
 ) -> str:
     """
diff --git a/tests/test_util.py b/tests/test_util.py
index 3c1618201..e45319c99 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -60,7 +60,7 @@
     ipa_to_rtgs,
     remove_tone_ipa,
     tis620_to_utf8,
-    remove_repeat_consonants
+    remove_trailing_repeat_consonants
 )
 from pythainlp.util.spell_words import spell_word
 
@@ -865,22 +865,22 @@ def test_rhyme(self):
     def test_remove_repeat_consonants(self):
         # update of pythainlp.copus.thai_words() able to break this
         self.assertEqual(
-            remove_repeat_consonants('เริ่ดดดดดดดด'),
+            remove_trailing_repeat_consonants('เริ่ดดดดดดดด'),
             'เริ่ด'
         )
         self.assertEqual(
-            remove_repeat_consonants('อืมมมมมมมมมมมมมมม'),
+            remove_trailing_repeat_consonants('อืมมมมมมมมมมมมมมม'),
             'อืมมม'
         )
 
         custom_dictionary = dict_trie(["อืมมมมม"])
         self.assertEqual(
-            remove_repeat_consonants('อืมมมมมมมมมมมมมมม', custom_dictionary),
+            remove_trailing_repeat_consonants('อืมมมมมมมมมมมมมมม', custom_dictionary),
             'อืมมมมม'
         )
 
         self.assertEqual(
-            remove_repeat_consonants(
+            remove_trailing_repeat_consonants(
                 'อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '
                 'ฉันจะให้เกรดดีกับคุณณณ\nนี่เป็นความลับบบบบ'
             ),

From 24c30500ab3616c4a74ef204b6b86884c55bcb97 Mon Sep 17 00:00:00 2001
From: konbraphat51 <konbraphat@gmail.com>
Date: Sat, 11 Nov 2023 23:27:58 +0900
Subject: [PATCH 25/36] Refac: make names more clear

---
 pythainlp/util/removerepeatconsonants.py | 39 ++++++++++++------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/pythainlp/util/removerepeatconsonants.py b/pythainlp/util/removerepeatconsonants.py
index 32b0cf472..aad8f2638 100644
--- a/pythainlp/util/removerepeatconsonants.py
+++ b/pythainlp/util/removerepeatconsonants.py
@@ -26,11 +26,11 @@
 # when dictionary updated, this should be updated too
 # key: consonan
 # value: list of words that has repeating consonants at the end
-consonants_repeaters = {}
+last_consonants_repeaters = {}
 
 
 def remove_trailing_repeat_consonants(
-    text: str, dictionary: Trie = None, dictionary_updated: bool = True
+    text: str, dictionary: Trie = None, has_dictionary_updated: bool = True
 ) -> str:
     """
     Remove repeating consonants at the last of the sentence.
@@ -48,7 +48,7 @@ def remove_trailing_repeat_consonants(
     :param str text: input text
     :param Trie dictionary: Trie dictionary to check the last word.
     If None, pythainlp.corpus.thai_words() will be used
-    :param bool dictionary_updated: If the dictionary is updated 
+    :param bool has_dictionary_updated: If the dictionary is updated 
     or the first time using in the kernel, set this true.
     If not, set this false to save time.
     :return: text without repeating Thai consonants
@@ -84,7 +84,7 @@ def remove_trailing_repeat_consonants(
         dictionary = thai_words()
 
     # update repeaters dictionary if not updated
-    if dictionary_updated:
+    if has_dictionary_updated:
         _update_consonant_repeaters(dictionary)
 
     # seperate by newline
@@ -93,7 +93,9 @@ def remove_trailing_repeat_consonants(
         segments = line.split(" ")
 
         for cnt, segment in enumerate(segments):
-            segments[cnt] = _remove_repeat_consonants_from_segment(segment, dictionary)
+            segments[cnt] = _remove_repeat_trailing_consonants_from_segment(
+                segment, dictionary
+            )
 
         # revert spaces
         modified_line = " ".join(segments)
@@ -105,7 +107,7 @@ def remove_trailing_repeat_consonants(
     return modified_text
 
 
-def _remove_repeat_consonants_from_segment(segment: str, dictionary: Trie) -> str:
+def _remove_repeat_trailing_consonants_from_segment(segment: str) -> str:
     """
     Remove repeating consonants at the last of the segment.
 
@@ -113,7 +115,6 @@ def _remove_repeat_consonants_from_segment(segment: str, dictionary: Trie) -> st
     Details is same as remove_repeat_consonants().
 
     :param str segment: segment of text
-    :param Trie dictionary: Trie dictionary to check the last word.
     :return: segment without repeating Thai consonants
     :rtype: str
     """
@@ -134,10 +135,10 @@ def _remove_repeat_consonants_from_segment(segment: str, dictionary: Trie) -> st
 
     # find the words that has 2 or more duplication of
     # this character at the end.
-    repeaters = consonants_repeaters[dup]
+    repeaters = last_consonants_repeaters[dup]
 
     # remove all of the last repeating character
-    segment_head = _get_repitition_head(segment, dup)
+    segment_head = _remove_all_last_consonants(segment, dup)
 
     # find the longest word that matches the segment
     longest_word, repetition = _find_longest_consonant_repeaters_match(
@@ -158,7 +159,7 @@ def _remove_repeat_consonants_from_segment(segment: str, dictionary: Trie) -> st
     return segment
 
 
-def _get_repitition_head(text: str, dup: str) -> str:
+def _remove_all_last_consonants(text: str, dup: str) -> str:
     """
     Reduce repeating characters at the end of the text.
 
@@ -170,11 +171,11 @@ def _get_repitition_head(text: str, dup: str) -> str:
     :return: text without repeating characters at the end
     :rtype: str
     """
-    head = text
-    while (len(head) > 0) and (head[-1] == dup):
-        head = head[:-1]
+    removed = text
+    while (len(removed) > 0) and (removed[-1] == dup):
+        removed = removed[:-1]
 
-    return head
+    return removed
 
 
 def _update_consonant_repeaters(dictionary: Trie) -> None:
@@ -191,17 +192,17 @@ def _update_consonant_repeaters(dictionary: Trie) -> None:
     """
     # initialize dictionary
     for consonant in list(consonants):
-        consonants_repeaters[consonant] = []
+        last_consonants_repeaters[consonant] = []
 
     # register
     for word in dictionary:
-        if _is_consonant_repeater(word):
-            consonants_repeaters[word[-1]].append(word)
+        if _is_last_consonant_repeater(word):
+            last_consonants_repeaters[word[-1]].append(word)
 
     return
 
 
-def _is_consonant_repeater(word: str) -> bool:
+def _is_last_consonant_repeater(word: str) -> bool:
     """
     Check if the word has repeating consonants at the end.
 
@@ -238,7 +239,7 @@ def _find_longest_consonant_repeaters_match(
     repetition = 0  # how much the last character is repeated correctly
     for repeater in repeaters:
         # remove all of the last repeating character
-        repeater_head = _get_repitition_head(repeater, repeater[-1])
+        repeater_head = _remove_all_last_consonants(repeater, repeater[-1])
 
         # check match
         if (

From 13cf54ad1657e820f7bd9d34105abbcfc51b3cbf Mon Sep 17 00:00:00 2001
From: konbraphat51 <konbraphat@gmail.com>
Date: Sat, 11 Nov 2023 23:38:15 +0900
Subject: [PATCH 26/36] Refac: reflect method name change

---
 pythainlp/util/__init__.py                             |  2 +-
 ...sonants.py => remove_trailing_repeat_consonants.py} | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)
 rename pythainlp/util/{removerepeatconsonants.py => remove_trailing_repeat_consonants.py} (94%)

diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
index 3b03f8367..d05d15c3a 100644
--- a/pythainlp/util/__init__.py
+++ b/pythainlp/util/__init__.py
@@ -103,7 +103,7 @@
     remove_zw,
     reorder_vowels,
 )
-from pythainlp.util.removerepeatconsonants import remove_trailing_repeat_consonants
+from pythainlp.util.remove_trailing_repeat_consonants import remove_trailing_repeat_consonants
 from pythainlp.util.numtoword import bahttext, num_to_thaiword
 from pythainlp.util.strftime import thai_strftime
 from pythainlp.util.thai import (
diff --git a/pythainlp/util/removerepeatconsonants.py b/pythainlp/util/remove_trailing_repeat_consonants.py
similarity index 94%
rename from pythainlp/util/removerepeatconsonants.py
rename to pythainlp/util/remove_trailing_repeat_consonants.py
index aad8f2638..33ca1c692 100644
--- a/pythainlp/util/removerepeatconsonants.py
+++ b/pythainlp/util/remove_trailing_repeat_consonants.py
@@ -57,24 +57,24 @@ def remove_trailing_repeat_consonants(
     :Example:
     ::
 
-        from pythainlp.util import remove_repeat_consonants
+        from pythainlp.util import remove_trailing_repeat_consonants
         from pythainlp.util import dict_trie
 
         # use default dictionary (pythainlp.corpus.thai_words())
-        remove_repeat_consonants('เริ่ดดดดดดดด')
+        remove_trailing_repeat_consonants('เริ่ดดดดดดดด')
         # output: เริ่ด
 
-        remove_repeat_consonants('อืมมมมมมมมมมมมมมม')
+        remove_trailing_repeat_consonants('อืมมมมมมมมมมมมมมม')
         # output: อืมมม
         # "อืมมม" is in the default dictionary
 
         # use custom dictionary
         custom_dictionary = dict_trie(["อืมมมมม"])
-        remove_repeat_consonants('อืมมมมมมมมมมมมมมม', custom_dictionary)
+        remove_trailing_repeat_consonants('อืมมมมมมมมมมมมมมม', custom_dictionary)
         # output: อืมมมมม
 
         # long text
-        remove_repeat_consonants('อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '\
+        remove_trailing_repeat_consonants('อืมมมมมมมมมมมมม คุณมีบุคลิกที่เริ่ดดดดด '\
         'ฉันจะให้เกรดดีกับคุณณณ\nนี่เป็นความลับบบบบ')
         # output: อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ
         #         นี่เป็นความลับ

From a94fccbd8bcab9db092a2bdc12f0d854d6f8f0f2 Mon Sep 17 00:00:00 2001
From: konbraphat51 <konbraphat@gmail.com>
Date: Sat, 11 Nov 2023 23:39:22 +0900
Subject: [PATCH 27/36] Fix: argument inconsistence

---
 pythainlp/util/remove_trailing_repeat_consonants.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pythainlp/util/remove_trailing_repeat_consonants.py b/pythainlp/util/remove_trailing_repeat_consonants.py
index 33ca1c692..6f2d92c39 100644
--- a/pythainlp/util/remove_trailing_repeat_consonants.py
+++ b/pythainlp/util/remove_trailing_repeat_consonants.py
@@ -93,9 +93,7 @@ def remove_trailing_repeat_consonants(
         segments = line.split(" ")
 
         for cnt, segment in enumerate(segments):
-            segments[cnt] = _remove_repeat_trailing_consonants_from_segment(
-                segment, dictionary
-            )
+            segments[cnt] = _remove_repeat_trailing_consonants_from_segment(segment)
 
         # revert spaces
         modified_line = " ".join(segments)

From 832d28c2767f91f7881243dbf0bc1084b2b4508a Mon Sep 17 00:00:00 2001
From: konbraphat51 <konbraphat@gmail.com>
Date: Sat, 11 Nov 2023 23:46:13 +0900
Subject: [PATCH 28/36] Refac: revert to the first place

because this PR inplemention seperated from normalize.py
---
 pythainlp/util/normalize.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 825ed79eb..b7e0f558b 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -49,7 +49,9 @@
 ]
 
 # VOWELS + Phinthu, Thanthakhat, Nikhahit, Yamakkan
-_NOREPEAT_CHARS = f"{follow_v}{lead_v}{above_v}{below_v}\u0e3a\u0e4c\u0e4d\u0e4e"
+_NOREPEAT_CHARS = (
+    f"{follow_v}{lead_v}{above_v}{below_v}\u0e3a\u0e4c\u0e4d\u0e4e"
+)
 _NOREPEAT_PAIRS = list(
     zip([f"({ch}[ ]*)+{ch}" for ch in _NOREPEAT_CHARS], _NOREPEAT_CHARS)
 )
@@ -297,4 +299,4 @@ def maiyamok(sent: Union[str, List[str]]) -> List[str]:
             i += 1
         _list_word.append(text)
         i += 1
-    return _list_word
+    return _list_word
\ No newline at end of file

From 95761ea55f88466095d1f114a97a9f1680fe8615 Mon Sep 17 00:00:00 2001
From: konbraphat51 <konbraphat@gmail.com>
Date: Sat, 11 Nov 2023 23:46:33 +0900
Subject: [PATCH 29/36] Refac: use black

line-length=79
---
 pythainlp/util/remove_trailing_repeat_consonants.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pythainlp/util/remove_trailing_repeat_consonants.py b/pythainlp/util/remove_trailing_repeat_consonants.py
index 6f2d92c39..7ccec361e 100644
--- a/pythainlp/util/remove_trailing_repeat_consonants.py
+++ b/pythainlp/util/remove_trailing_repeat_consonants.py
@@ -93,7 +93,9 @@ def remove_trailing_repeat_consonants(
         segments = line.split(" ")
 
         for cnt, segment in enumerate(segments):
-            segments[cnt] = _remove_repeat_trailing_consonants_from_segment(segment)
+            segments[cnt] = _remove_repeat_trailing_consonants_from_segment(
+                segment
+            )
 
         # revert spaces
         modified_line = " ".join(segments)
@@ -211,7 +213,9 @@ def _is_last_consonant_repeater(word: str) -> bool:
     :return: True if the word has repeating consonants at the end.
     :rtype: bool
     """
-    return (len(word) > 1) and (word[-1] == word[-2]) and (word[-1] in consonants)
+    return (
+        (len(word) > 1) and (word[-1] == word[-2]) and (word[-1] in consonants)
+    )
 
 
 def _find_longest_consonant_repeaters_match(

From cefc4e7246114867f8c5ec0827e236161fdbbafa Mon Sep 17 00:00:00 2001
From: konbraphat51 <konbraphat@gmail.com>
Date: Sat, 11 Nov 2023 23:54:28 +0900
Subject: [PATCH 30/36] Refac: reduce col length

used black line-length=79
---
 pythainlp/util/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
index d05d15c3a..f6f5e373c 100644
--- a/pythainlp/util/__init__.py
+++ b/pythainlp/util/__init__.py
@@ -103,7 +103,9 @@
     remove_zw,
     reorder_vowels,
 )
-from pythainlp.util.remove_trailing_repeat_consonants import remove_trailing_repeat_consonants
+from pythainlp.util.remove_trailing_repeat_consonants import (
+    remove_trailing_repeat_consonants,
+)
 from pythainlp.util.numtoword import bahttext, num_to_thaiword
 from pythainlp.util.strftime import thai_strftime
 from pythainlp.util.thai import (

From fd2896b0cc1aa889b21e461687f23bbfbc08c9d4 Mon Sep 17 00:00:00 2001
From: Konbraphat <101827492+konbraphat51@users.noreply.github.com>
Date: Mon, 13 Nov 2023 10:32:03 +0900
Subject: [PATCH 31/36] Refac: add last new line

---
 pythainlp/util/normalize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index b7e0f558b..a8cacae22 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -299,4 +299,4 @@ def maiyamok(sent: Union[str, List[str]]) -> List[str]:
             i += 1
         _list_word.append(text)
         i += 1
-    return _list_word
\ No newline at end of file
+    return _list_word

From ee492f14c4ec992c152869c595c8eea624416aac Mon Sep 17 00:00:00 2001
From: Konbraphat <101827492+konbraphat51@users.noreply.github.com>
Date: Mon, 13 Nov 2023 10:48:17 +0900
Subject: [PATCH 32/36] Update commentation

Update responding to method rename
---
 pythainlp/util/remove_trailing_repeat_consonants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pythainlp/util/remove_trailing_repeat_consonants.py b/pythainlp/util/remove_trailing_repeat_consonants.py
index 7ccec361e..60634b236 100644
--- a/pythainlp/util/remove_trailing_repeat_consonants.py
+++ b/pythainlp/util/remove_trailing_repeat_consonants.py
@@ -20,7 +20,7 @@
 from pythainlp import thai_consonants as consonants
 from typing import Tuple, List
 
-# used by remove_repeat_consonants()
+# used by remove_trailing_repeat_consonants()
 # contains all words that has repeating consonants at the end
 # for each consonant
 # when dictionary updated, this should be updated too

From 4212ff3e1c4a776735f0dfc91ad19291302bfc8e Mon Sep 17 00:00:00 2001
From: Konbraphat <101827492+konbraphat51@users.noreply.github.com>
Date: Mon, 13 Nov 2023 10:51:32 +0900
Subject: [PATCH 33/36] Refac: clearify commentation

---
 pythainlp/util/remove_trailing_repeat_consonants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pythainlp/util/remove_trailing_repeat_consonants.py b/pythainlp/util/remove_trailing_repeat_consonants.py
index 60634b236..51e269575 100644
--- a/pythainlp/util/remove_trailing_repeat_consonants.py
+++ b/pythainlp/util/remove_trailing_repeat_consonants.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Removement of repeated consonants
+Removement of repeated consonants at the end of words
 """
 from pythainlp.corpus import thai_words
 from pythainlp.util.trie import Trie

From abd47025d7ff4ecc7bdc40df16cd5bea6075e458 Mon Sep 17 00:00:00 2001
From: Konbraphat <101827492+konbraphat51@users.noreply.github.com>
Date: Mon, 13 Nov 2023 10:55:04 +0900
Subject: [PATCH 34/36] Refac: fix typi

---
 pythainlp/util/remove_trailing_repeat_consonants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pythainlp/util/remove_trailing_repeat_consonants.py b/pythainlp/util/remove_trailing_repeat_consonants.py
index 51e269575..7aae7e519 100644
--- a/pythainlp/util/remove_trailing_repeat_consonants.py
+++ b/pythainlp/util/remove_trailing_repeat_consonants.py
@@ -24,7 +24,7 @@
 # contains all words that has repeating consonants at the end
 # for each consonant
 # when dictionary updated, this should be updated too
-# key: consonan
+# key: consonant
 # value: list of words that has repeating consonants at the end
 last_consonants_repeaters = {}
 

From 740c5e5e4b46176819049745f8529cf413e28e9b Mon Sep 17 00:00:00 2001
From: Konbraphat <101827492+konbraphat51@users.noreply.github.com>
Date: Mon, 13 Nov 2023 10:58:16 +0900
Subject: [PATCH 35/36] Add: __all__

---
 pythainlp/util/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
index f6f5e373c..ddcb9b62b 100644
--- a/pythainlp/util/__init__.py
+++ b/pythainlp/util/__init__.py
@@ -69,6 +69,7 @@
     "remove_tone_ipa",
     "tis620_to_utf8",
     "spell_words",
+    "remove_trailing_repeat_consonants",
 ]
 
 from pythainlp.util.collate import collate

From 3315cb026fc2e2b8b4518c3ae3a9b04c50738b10 Mon Sep 17 00:00:00 2001
From: Arthit Suriyawongkul <arthit@gmail.com>
Date: Mon, 13 Nov 2023 07:49:54 +0000
Subject: [PATCH 36/36] Sort export names in __all__

---
 pythainlp/util/__init__.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
index ddcb9b62b..55302507b 100644
--- a/pythainlp/util/__init__.py
+++ b/pythainlp/util/__init__.py
@@ -21,19 +21,21 @@
     "abbreviation_to_full_text",
     "arabic_digit_to_thai_digit",
     "bahttext",
-    "convert_years",
     "collate",
-    "countthai",
+    "convert_years",
     "count_thai_chars",
+    "countthai",
     "dict_trie",
     "digit_to_text",
     "display_thai_char",
     "emoji_to_thai",
     "eng_to_thai",
     "find_keyword",
+    "ipa_to_rtgs",
     "is_native_thai",
     "isthai",
     "isthaichar",
+    "nectec_to_ipa",
     "normalize",
     "now_reign_year",
     "num_to_thaiword",
@@ -42,11 +44,18 @@
     "remove_dangling",
     "remove_dup_spaces",
     "remove_repeat_vowels",
+    "remove_tone_ipa",
     "remove_tonemark",
+    "remove_trailing_repeat_consonants",
     "remove_zw",
     "reorder_vowels",
     "rhyme",
+    "sound_syllable",
+    "spell_words",
+    "syllable_length",
+    "syllable_open_close_detector",
     "text_to_arabic_digit",
+    "text_to_num",
     "text_to_thai_digit",
     "thai_digit_to_arabic_digit",
     "thai_keyboard_dist",
@@ -58,18 +67,9 @@
     "thaiword_to_num",
     "thaiword_to_time",
     "time_to_thaiword",
-    "text_to_num",
+    "tis620_to_utf8",
     "tone_detector",
     "words_to_num",
-    "sound_syllable",
-    "syllable_length",
-    "syllable_open_close_detector",
-    "nectec_to_ipa",
-    "ipa_to_rtgs",
-    "remove_tone_ipa",
-    "tis620_to_utf8",
-    "spell_words",
-    "remove_trailing_repeat_consonants",
 ]
 
 from pythainlp.util.collate import collate