Merge branch 'hypothesis_tests' into 'main'

Mateusz Bronisław Wasilewski · Mateusz Bronisław Wasilewski · commit 26f185b5569d · 2023-11-26T18:12:22.000+01:00
Hypothesis tests

See merge request mwasilew/2023-ZPRP!18
diff --git a/.gitignore b/.gitignore
@@ -3,4 +3,5 @@ venv/
 __pycache__/
 .vscode/*
 .pytest_cache/
-.tox/
+.tox/
+.hypothesis/
diff --git a/Makefile b/Makefile
@@ -15,11 +15,15 @@ venv/bin/activate: requirements.txt
 	. ./venv/bin/activate
 	$(PIP) install -r requirements.txt
 
-setup: venv/bin/activate
+setup_venv: venv/bin/activate
+
+setup:
+	$(PIP) install -r requirements.txt
 
 clean:
 	rm -rf __pycache__
 	rm -rf venv
 	rm -rf .tox
+	rm -rf .hypothesis
 
 .PHONY: tests clean pre_commit
diff --git a/image_formatter/error_handler/errors.py b/image_formatter/error_handler/errors.py
@@ -1,4 +1,5 @@
 from image_formatter.lexer.token import TokenType
+from typing import List
 
 
 class UnexpectedTagException(Exception):
@@ -15,3 +16,19 @@ def __eq__(self, other):
             return False
 
         return (self.expected == other.expected) and (self.actual == other.actual)
+
+
+class InvalidConfigCharacterError(Exception):
+    def __init__(self, invalid_char: str, valid_chars: List[str]):
+        super().__init__()
+        self.invalid_char = invalid_char
+        self.valid_chars = valid_chars
+
+    def __str__(self):
+        return f"{self.__class__}: invalid character found: {self.invalid_char} when list of valid chars is: {self.valid_chars}"
+
+    def __eq__(self, other):
+        if other.__class__ != self.__class__:
+            return False
+
+        return (set(self.valid_chars == other.valid_chars)) and set((self.invalid_char == other.invalid_char))
diff --git a/image_formatter/image_properties_tag_replacer/image_properties_tag_replacer.py b/image_formatter/image_properties_tag_replacer/image_properties_tag_replacer.py
@@ -27,9 +27,9 @@ def __init__(self, lex: Lexer, image_tags_properties: dict, error_handler: Error
         self.image_tags_properties = image_tags_properties
         self.error_handler = error_handler
 
-    @staticmethod
-    def name() -> str:
-        return __class__.__name__
+    @classmethod
+    def name(cls) -> str:
+        return cls.__name__
 
     def next_token(self):
         self.curr_token = self.lexer.get_token()
diff --git a/image_formatter/lexer/lexer.py b/image_formatter/lexer/lexer.py
@@ -1,9 +1,11 @@
 from image_formatter.lexer.token import Token, TokenType, IntegerToken
 from image_formatter.lexer.position import Position
+from image_formatter.error_handler.errors import InvalidConfigCharacterError
 import io
 import sys
 from mkdocs.plugins import get_plugin_logger
 from copy import deepcopy
+from typing import Tuple, List
 
 log = get_plugin_logger(__name__)
 
@@ -21,10 +23,10 @@ def __init__(
         fp: io.TextIOWrapper,
         *,
         max_int: int = sys.maxsize,
-        special_signs: tuple = ("-", "_"),
+        special_signs: Tuple[str] = ("-", "_"),
         tag: str = "@",
-        newline_characters: tuple = ("\n", "\r"),
-        additional_path_signs: tuple = ("/", "."),
+        newline_characters: Tuple[str] = ("\n", "\r"),
+        additional_path_signs: Tuple[str] = ("/", "."),
     ):
         """
         Args:
@@ -40,6 +42,7 @@ def __init__(
         Attributes:
             running: defines if lexer should still go through the characters or EOF was encountered
         """
+        Lexer.verify_config(special_signs, tag, newline_characters, additional_path_signs)
         self.fp = fp
         self.running = True
         self.current_char = ""
@@ -50,9 +53,104 @@ def __init__(
         self.newline_characters = newline_characters  # @TODO add hypothesis tests
         self.additional_path_signs = additional_path_signs
 
+    @classmethod
+    def name(cls) -> str:
+        return cls.__name__
+
+    @classmethod
+    def verify_config(
+        cls,
+        special_signs: Tuple[str],
+        tag: str,
+        newline_characters: Tuple[str],
+        additional_path_signs: Tuple[str],
+    ) -> bool:
+        """
+        Verifies if provided to Lexer configuration is valid. Upon failure on any of the verification steps, the function returns immediately with fail reason.
+
+        Returns:
+            True when configuration is valid
+        Raises:
+            InvalidConfigCharacterError: when invalid character is found
+            Exception: when there is different reason of validation fail
+        """
+        # configurations must be mutually exclusive
+        flat_list = [*set(special_signs), tag, *set(newline_characters), *set(additional_path_signs)]
+        if len(flat_list) != len(set(flat_list)):
+            raise Exception("Characters cannot repeat across configuration options")
+
+        Lexer.verify_special_signs(special_signs)
+        Lexer.verify_tag(tag)
+        Lexer.verify_newline_characters(newline_characters)
+        Lexer.verify_additional_path_signs(additional_path_signs)
+
     @staticmethod
-    def name() -> str:
-        return __class__.__name__
+    def find_invalid_char(valid_chars: List[str], check_chars: Tuple[str]) -> str:
+        invalid_char = next(filter(lambda x: x not in valid_chars, check_chars), None)
+        return invalid_char
+
+    @classmethod
+    def verify_special_signs(cls, signs: Tuple[str]) -> bool:
+        """
+        Verifies if all characters in the list are valid special signs characters
+
+        Returns:
+            True when configuration is valid
+        Raises:
+            InvalidConfigCharacterError: when invalid character is found
+        """
+        invalid_chars = [" ", "(", ")"]
+        if any([sign in invalid_chars for sign in signs]):
+            raise InvalidConfigCharacterError("<space>", [])
+        return True
+
+    @classmethod
+    def verify_tag(cls, tag: str) -> bool:
+        """
+        Verifies if tag is valid
+
+        Returns:
+            True when tag is valid
+        Raises:
+            InvalidConfigCharacterError: when invalid character is found
+        """
+        valid_tags = "@#$%&~>?+=:"
+        invalid_char = Lexer.find_invalid_char(valid_tags, (tag))
+        if invalid_char:
+            raise InvalidConfigCharacterError(invalid_char, valid_tags)
+        return True
+
+    @classmethod
+    def verify_newline_characters(cls, chars: Tuple[str]) -> bool:
+        """
+        Verifies if all characters in the list are valid new line characters
+
+        Returns:
+            True when configuration is valid
+        Raises:
+            InvalidConfigCharacterError: when invalid character is found
+        """
+        valid_newline_chars = ["\n", "\r", "\r\n", "\x0b", "\v", "\f"]
+        invalid_char = Lexer.find_invalid_char(valid_newline_chars, chars)
+        if invalid_char:
+            raise InvalidConfigCharacterError(invalid_char, valid_newline_chars)
+        return True
+
+    @classmethod
+    def verify_additional_path_signs(cls, signs: Tuple[str]) -> bool:
+        """
+        Verifies if all characters in the list are valid additional path signs
+
+        Returns:
+            True when configuration is valid
+        Raises:
+            InvalidConfigCharacterError: when invalid character is found
+        """
+        valid_additional_path_signs = "-_.~:/?#[]@!$&'()*+,;=%"
+        invalid_char = Lexer.find_invalid_char(valid_additional_path_signs, signs)
+        if invalid_char:
+            raise InvalidConfigCharacterError(invalid_char, valid_additional_path_signs)
+        return True
 
     def is_character(self) -> bool:
         """
diff --git a/requirements.txt b/requirements.txt
@@ -1,14 +1,21 @@
+attrs==23.1.0
+Babel==2.13.1
 black==23.10.1
 cachetools==5.3.2
+certifi==2023.11.17
 chardet==5.2.0
+charset-normalizer==3.3.2
 click==8.1.7
 colorama==0.4.6
-flake8==6.1.0
-ghp-import==2.1.0
 cssutils==2.9.0
 distlib==0.3.7
 exceptiongroup==1.1.3
 filelock==3.13.1
+flake8==6.1.0
+ghp-import==2.1.0
+griffe==0.38.0
+hypothesis==6.89.0
+idna==3.4
 importlib-metadata==6.8.0
 iniconfig==2.0.0
 Jinja2==3.1.2
@@ -17,22 +24,37 @@ MarkupSafe==2.1.3
 mccabe==0.7.0
 mergedeep==1.3.4
 mkdocs==1.5.3
+mkdocs-autorefs==0.5.0
 mkdocs-material==9.4.9
+mkdocs-material-extensions==1.3
 mkdocstrings==0.24.0
 mkdocstrings-python==1.7.4
 mock==5.1.0
 mypy-extensions==1.0.0
 packaging==23.2
+paginate==0.5.6
 pathspec==0.11.2
 platformdirs==3.11.0
 pluggy==1.3.0
 pycodestyle==2.11.1
 pyflakes==3.1.0
+Pygments==2.16.1
+pymdown-extensions==10.4
+pyproject-api==1.6.1
 pytest==7.4.3
-setuptools~=65.5.1
 python-dateutil==2.8.2
+pytz==2023.3.post1
 PyYAML==6.0.1
-pyyaml_env_tag==0.1
+pyyaml-env-tag==0.1
+regex==2023.10.3
+requests==2.31.0
+setuptools==65.5.1
 six==1.16.0
+sortedcontainers==2.4.0
+tomli==2.0.1
+tox==4.11.3
+typing-extensions==4.8.0
+urllib3==2.1.0
+virtualenv==20.24.6
 watchdog==3.0.0
-pyproject-api==1.6.1
+zipp==3.17.0
diff --git a/tests/lexer/test_hypothesis_lexer.py b/tests/lexer/test_hypothesis_lexer.py
@@ -0,0 +1,96 @@
+from image_formatter.lexer.lexer import Lexer
+from image_formatter.lexer.token import TokenType
+from image_formatter.lexer.position import Position
+from tests.test_helpers import get_all_tokens
+import io
+from hypothesis import strategies as st
+from hypothesis import given
+
+"""
+Lexer has some configurations that depend on user input (tag, special_signs, newline_characters and
+additional_path_signs). Most of them have strict list of available symbols. Special_signs doesn't
+have that. The following tests are to check if such freedom doesn't break lexer's logic.
+They are similar to unit tests in test_unit_lexer.py, but we decided to keep both, because one
+might want to execute unit tests without hypothesis tests as they are a bit more time consuming.
+"""
+
+
+def special_sign():
+    chars_to_exclude = ["\n", "\r", "@", "/", ".", " ", "(", ")", "&"]
+    return st.text(min_size=1, max_size=1).filter(lambda s: all(char not in s for char in chars_to_exclude))
+
+
+def special_sign_tuples():
+    return st.tuples(special_sign(), special_sign(), special_sign())
+
+
+@given(special_sign_tuples())
+def test_given_text_when_tags_not_separated_by_spaces_then_tokens_returned(
+    special_signs,
+):
+    text = f"@tag1(url1.png)@one{special_signs[0]}more{special_signs[1]}tag&and{special_signs[2]}word"
+    fp = io.StringIO(text)
+    lexer = Lexer(fp, special_signs=special_signs)
+    tokens = get_all_tokens(lexer)
+    assert [token.type for token in tokens] == [
+        TokenType.T_IMAGE_SIZE_TAG,
+        TokenType.T_IMAGE_URL,
+        TokenType.T_IMAGE_SIZE_TAG,
+        TokenType.T_CHAR,
+        TokenType.T_LITERAL,
+    ]
+    assert [token.position for token in tokens] == [
+        Position(1, 1),
+        Position(1, 6),
+        Position(1, 16),
+        Position(1, 29),
+        Position(1, 30),
+    ]
+
+
+@given(special_sign_tuples())
+def test_given_complex_text_with_special_chars_then_sequence_of_tokens_is_returned(
+    special_signs,
+):
+    text = f"word1& word2 && @tag1{special_signs[0]}tag \n\n @tag2(start{special_signs[1]}of/url.png)"
+    expected_types = [
+        TokenType.T_LITERAL,
+        TokenType.T_CHAR,
+        TokenType.T_WHITE_CHAR,
+        TokenType.T_LITERAL,
+        TokenType.T_WHITE_CHAR,
+        TokenType.T_CHAR,
+        TokenType.T_CHAR,
+        TokenType.T_WHITE_CHAR,
+        TokenType.T_IMAGE_SIZE_TAG,
+        TokenType.T_WHITE_CHAR,
+        TokenType.T_WHITE_CHAR,
+        TokenType.T_WHITE_CHAR,
+        TokenType.T_WHITE_CHAR,
+        TokenType.T_IMAGE_SIZE_TAG,
+        TokenType.T_IMAGE_URL,
+    ]
+    expected_positions = [
+        Position(1, 1),
+        Position(1, 6),
+        Position(1, 7),
+        Position(1, 8),
+        Position(1, 13),
+        Position(1, 14),
+        Position(1, 15),
+        Position(1, 16),
+        Position(1, 17),
+        Position(1, 26),
+        Position(1, 27),
+        Position(2, 1),
+        Position(3, 1),
+        Position(3, 2),
+        Position(3, 7),
+    ]
+    fp = io.StringIO(text)
+    lexer = Lexer(fp, special_signs=special_signs)
+    tokens = get_all_tokens(lexer)
+    assert len(tokens) == len(expected_types)
+    assert len(tokens) == len(expected_positions)
+    assert [token.type for token in tokens] == expected_types
+    assert [token.position for token in tokens] == expected_positions
diff --git a/tests/lexer/test_unit_lexer.py b/tests/lexer/test_unit_lexer.py