|
| 1 | +from lexer.token import Token, TokenType |
| 2 | + |
| 3 | +SPECIAL_SIGNS = ["-", "_"] |
| 4 | +TAG_CHAR = "@" |
| 5 | + |
| 6 | + |
| 7 | +class Lexer: |
| 8 | + curr_char = "" |
| 9 | + |
| 10 | + def __init__(self, fp): |
| 11 | + self.fp = fp |
| 12 | + self.running = True |
| 13 | + |
| 14 | + @staticmethod |
| 15 | + def is_character(char: str) -> bool: |
| 16 | + return char.isalnum() or char in SPECIAL_SIGNS |
| 17 | + |
| 18 | + # TODO: a better way of taking next characters? |
| 19 | + def next_char(self) -> str: |
| 20 | + self.curr_char = self.fp.read(1) |
| 21 | + if not self.curr_char: |
| 22 | + self.running = False |
| 23 | + |
| 24 | + def build_literal(self): |
| 25 | + if not self.curr_char.isalpha(): |
| 26 | + return 0 |
| 27 | + literal = self.curr_char |
| 28 | + self.next_char() |
| 29 | + while Lexer.is_character(self.curr_char): |
| 30 | + literal += self.curr_char |
| 31 | + self.next_char() |
| 32 | + return Token(TokenType.T_LITERAL, literal) |
| 33 | + |
| 34 | + def build_tag(self): |
| 35 | + if not self.curr_char == TAG_CHAR: |
| 36 | + return 0 |
| 37 | + self.next_char() |
| 38 | + token = self.build_literal() |
| 39 | + if token.type != TokenType.T_LITERAL: |
| 40 | + return 0 |
| 41 | + return Token(TokenType.T_IMAGE_SIZE_TAG, token.string) |
| 42 | + |
| 43 | + def get_url_ending(self, string): |
| 44 | + if self.curr_char != ".": |
| 45 | + return 0 |
| 46 | + string += self.curr_char |
| 47 | + self.next_char() |
| 48 | + while Lexer.is_character(self.curr_char) or self.curr_char in ["/", "."]: |
| 49 | + string += self.curr_char |
| 50 | + self.next_char() |
| 51 | + return string |
| 52 | + |
| 53 | + def build_url(self): |
| 54 | + if not self.curr_char == "(": |
| 55 | + return 0 |
| 56 | + self.next_char() |
| 57 | + string = "" |
| 58 | + while Lexer.is_character(self.curr_char) or self.curr_char == "/": |
| 59 | + string += self.curr_char |
| 60 | + self.next_char() |
| 61 | + if not (string := self.get_url_ending(string)): |
| 62 | + return 0 |
| 63 | + if not self.curr_char == ")": |
| 64 | + return 0 |
| 65 | + self.next_char() |
| 66 | + return Token(TokenType.T_IMAGE_URL, string) |
| 67 | + |
| 68 | + def get_token(self): |
| 69 | + if self.running: |
| 70 | + # watch out, the below works starting Python 3.8 |
| 71 | + if ( |
| 72 | + (token := self.build_tag()) |
| 73 | + or (token := self.build_url()) |
| 74 | + or (token := self.build_literal()) |
| 75 | + ): |
| 76 | + return token |
| 77 | + else: |
| 78 | + return Token(TokenType.T_EOF) |
0 commit comments