Skip to content

Commit 19d60bd

Browse files
committed
feat: [lexer] Added first basic implementation
1 parent 2833c23 commit 19d60bd

File tree

8 files changed

+124
-0
lines changed

8 files changed

+124
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
venv/

lexer/__init__.py

Whitespace-only changes.

lexer/lexer.py

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
from lexer.token import Token, TokenType
2+
3+
SPECIAL_SIGNS = ["-", "_"]
4+
TAG_CHAR = "@"
5+
6+
7+
class Lexer:
8+
curr_char = ""
9+
10+
def __init__(self, fp):
11+
self.fp = fp
12+
self.running = True
13+
14+
@staticmethod
15+
def is_character(char: str) -> bool:
16+
return char.isalnum() or char in SPECIAL_SIGNS
17+
18+
# TODO: a better way of taking next characters?
19+
def next_char(self) -> str:
20+
self.curr_char = self.fp.read(1)
21+
if not self.curr_char:
22+
self.running = False
23+
24+
def build_literal(self):
25+
if not self.curr_char.isalpha():
26+
return 0
27+
literal = self.curr_char
28+
self.next_char()
29+
while Lexer.is_character(self.curr_char):
30+
literal += self.curr_char
31+
self.next_char()
32+
return Token(TokenType.T_LITERAL, literal)
33+
34+
def build_tag(self):
35+
if not self.curr_char == TAG_CHAR:
36+
return 0
37+
self.next_char()
38+
token = self.build_literal()
39+
if token.type != TokenType.T_LITERAL:
40+
return 0
41+
return Token(TokenType.T_IMAGE_SIZE_TAG, token.string)
42+
43+
def get_url_ending(self, string):
44+
if self.curr_char != ".":
45+
return 0
46+
string += self.curr_char
47+
self.next_char()
48+
while Lexer.is_character(self.curr_char) or self.curr_char in ["/", "."]:
49+
string += self.curr_char
50+
self.next_char()
51+
return string
52+
53+
def build_url(self):
54+
if not self.curr_char == "(":
55+
return 0
56+
self.next_char()
57+
string = ""
58+
while Lexer.is_character(self.curr_char) or self.curr_char == "/":
59+
string += self.curr_char
60+
self.next_char()
61+
if not (string := self.get_url_ending(string)):
62+
return 0
63+
if not self.curr_char == ")":
64+
return 0
65+
self.next_char()
66+
return Token(TokenType.T_IMAGE_URL, string)
67+
68+
def get_token(self):
69+
if self.running:
70+
# watch out, the below works starting Python 3.8
71+
if (
72+
(token := self.build_tag())
73+
or (token := self.build_url())
74+
or (token := self.build_literal())
75+
):
76+
return token
77+
else:
78+
return Token(TokenType.T_EOF)

lexer/token.py

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from enum import Enum
2+
3+
4+
class TokenType(Enum):
5+
T_LITERAL = 0
6+
T_IMAGE_URL = 1
7+
T_IMAGE_SIZE_TAG = 2
8+
T_EOF = 3
9+
10+
11+
class Token:
12+
def __init__(self, type: TokenType, string: str = ""):
13+
self.type = type
14+
self.string = string

lexer_examples/test1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
hello
2+
second

lexer_examples/test2.txt

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
hello @small
2+
(some/url.com)+word

main.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from lexer.lexer import Lexer
2+
3+
if __name__ == "__main__":
4+
# filename = "lexer_examples/test1.txt"
5+
filename = "lexer_examples/test2.txt"
6+
with open(filename) as fp:
7+
lexer = Lexer(fp)
8+
lexer.next_char()
9+
i = 0
10+
while lexer.running:
11+
token = lexer.get_token()
12+
if token:
13+
print(f"{i}. Token: {token.type.name}, value: {token.string}")
14+
i += 1
15+
else:
16+
lexer.next_char()

requirements.txt

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
black==23.10.1
2+
click==8.1.7
3+
colorama==0.4.6
4+
flake8==6.1.0
5+
mccabe==0.7.0
6+
mypy-extensions==1.0.0
7+
packaging==23.2
8+
pathspec==0.11.2
9+
platformdirs==3.11.0
10+
pycodestyle==2.11.1
11+
pyflakes==3.1.0

0 commit comments

Comments
 (0)