TheAlgorithms · cclauss · Dec 28, 2022 · Dec 27, 2022 · Dec 27, 2022 · Dec 27, 2022
diff --git a/compression/lz77.py b/compression/lz77.py
@@ -32,6 +32,20 @@
 __version__ = "0.1"
 __author__ = "Lucia Harcekova"
 
+from typing import List
+
+
+class Token:
+    """
+    Dataclass representing triplet called token consisting of length, offset
+    and indicator. This triplet is used during LZ77 compression.
+    """
+
+    def __init__(self, offset: int, length: int, indicator: str) -> None:
+        self.offset = offset
+        self.length = length
+        self.indicator = indicator
+
 
 class LZ77Compressor:
     """
@@ -43,21 +57,14 @@ def __init__(self, window_size: int = 13, lookahead_buffer_size: int = 6) -> Non
         self.lookahead_buffer_size = lookahead_buffer_size
         self.search_buffer_size = self.window_size - self.lookahead_buffer_size
 
-    def compress(self, text: str) -> list:
+    def compress(self, text: str) -> List[Token]:
         """This method compresses given string text using LZ77 compression algorithm.
 
         Args:
             text (str): string that's going to be compressed
 
         Returns:
-            output (list): the compressed text
-
-        Tests:
-            >>> lz77_compressor = LZ77Compressor(13, 6)
-            >>> lz77_compressor.compress("ababcbababaa")
-            [(0, 0, 'a'), (0, 0, 'b'), (2, 2, 'c'), (4, 3, 'a'), (2, 2, 'a')]
-            >>> lz77_compressor.compress("aacaacabcabaaac")
-            [(0, 0, 'a'), (1, 1, 'c'), (3, 4, 'b'), (3, 3, 'a'), (1, 2, 'c')]
+            output (List[Token]): the compressed text
         """
 
         output = []
@@ -68,79 +75,80 @@ def compress(self, text: str) -> list:
 
             # find the next encoding phrase
             # - triplet with offset, length, indicator (the next encoding character)
-            (offset, length, indicator) = self._find_encoding_token(text, search_buffer)
+            token = self._find_encoding_token(text, search_buffer)
 
             # update the search buffer:
             # - add new characters from text into it
             # - check if size exceed the max search buffer size, if so, drop the
             #   oldest elements
-            search_buffer += text[: length + 1]
+            search_buffer += text[: token.length + 1]
             if len(search_buffer) > self.search_buffer_size:
                 search_buffer = search_buffer[-self.search_buffer_size :]
 
             # update the text
-            text = text[length + 1 :]
+            text = text[token.length + 1 :]
 
             # append the token to output
-            output.append((offset, length, indicator))
+            output.append(token)
 
         return output
 
-    def decompress(self, tokens: list) -> str:
-        """This method turns the list of tokens consisting of triplets of the form
+    def decompress(self, tokens: List[Token]) -> str:
+        """This method turns the List of tokens consisting of triplets of the form
         (offset, length, char), into an output string.
 
         Args:
-            tokens (list): Tokens (offset, length, char)
+            tokens (List[Token]): Tokens (offset, length, char)
 
         Returns:
             output (str): The decompressed text
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
-            >>> lz77_compressor.decompress([(0, 0, 'c'), (0, 0, 'a'), (0, 0, 'b'), \
-                    (0, 0, 'r'), (3, 1, 'c'), (2, 1, 'd'), (7, 4, 'r'), (3, 5, 'd')])
+            >>> lz77_compressor.decompress([Token(0, 0, 'c'), Token(0, 0, 'a'), \
+                Token(0, 0, 'b'), Token(0, 0, 'r'), Token(3, 1, 'c'), \
+                Token(2, 1, 'd'), Token(7, 4, 'r'), Token(3, 5, 'd')])
             'cabracadabrarrarrad'
-            >>> lz77_compressor.decompress([(0, 0, 'a'), (0, 0, 'b'), (2, 2, 'c'), \
-                    (4, 3, 'a'), (2, 2, 'a')])
+            >>> lz77_compressor.decompress([Token(0, 0, 'a'), Token(0, 0, 'b'), \
+                Token(2, 2, 'c'), Token(4, 3, 'a'), Token(2, 2, 'a')])
             'ababcbababaa'
-            >>> lz77_compressor.decompress([(0, 0, 'a'), (1, 1, 'c'), (3, 4, 'b'), \
-                    (3, 3, 'a'), (1, 2, 'c')])
+            >>> lz77_compressor.decompress([Token(0, 0, 'a'), Token(1, 1, 'c'), \
+                Token(3, 4, 'b'), Token(3, 3, 'a'), Token(1, 2, 'c')])
             'aacaacabcabaaac'
         """
 
         output = ""
 
-        for (offset, length, indicator) in tokens:
-            for _ in range(length):
-                output += output[-offset]
-            output += indicator
+        for token in tokens:
+            for _ in range(token.length):
+                output += output[-token.offset]
+            output += token.indicator
 
         return output
 
-    def _find_encoding_token(self, text: str, search_buffer: str) -> tuple:
+    def _find_encoding_token(self, text: str, search_buffer: str) -> Token:
         """Finds the encoding token for the first character in the text.
 
         Args:
             text (str)
             search_buffer (str)
 
         Returns:
-            tuple: Token
+            (offset, length, indicator) (Token)
 
         Tests:
             >>> lz77_compressor = LZ77Compressor(13, 6)
-            >>> lz77_compressor._find_encoding_token("abrarrarrad", "abracad")
-            (7, 4, 'r')
-            >>> lz77_compressor._find_encoding_token("adabrarrarrad", "cabrac")
-            (2, 1, 'd')
+            >>> lz77_compressor._find_encoding_token("abrarrarrad", "abracad").offset
+            7
+            >>> lz77_compressor._find_encoding_token("adabrarrarrad", "cabrac").length
+            1
         """
 
         # Initialise result parameters to default values
         length, offset = 0, 0
 
         if search_buffer == "":
-            return offset, length, text[length]
+            return Token(offset, length, text[length])
 
         for i, character in enumerate(search_buffer):
             found_offset = len(search_buffer) - i
@@ -151,7 +159,7 @@ def _find_encoding_token(self, text: str, search_buffer: str) -> tuple:
                 if found_length >= length:
                     offset, length = found_offset, found_length
 
-        return offset, length, text[length]
+        return Token(offset, length, text[length])
 
     def _match_length_from_index(
         self, text: str, window: str, text_index: int, window_index: int
@@ -192,4 +200,4 @@ def _match_length_from_index(
     TEXT = "cabracadabrarrarrad"
     compressed_text = lz77_compressor.compress(TEXT)
     decompressed_text = lz77_compressor.decompress(compressed_text)
-    assert decompressed_text == TEXT, "The LZ77 agirithm returned the invalid result."
+    assert decompressed_text == TEXT, "The LZ77 algorithm returned the invalid result."