Skip to content

Commit 9869527

Browse files
authored
Support f-strings in Basilisp (#1246)
Fixes #922
1 parent 32a18dc commit 9869527

File tree

4 files changed

+168
-12
lines changed

4 files changed

+168
-12
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
* Added the `basilisp.url` namespace for structured URL manipulation (#1239)
1111
* Added support for proxies (#425)
1212
* Added a `:slots` meta flag for `deftype` to disable creation of `__slots__` on created types (#1241)
13+
* Added support for f-strings (#922)
1314

1415
### Changed
1516
* Removed implicit support for single-use iterables in sequences, and introduced `iterator-seq` to expliciltly handle them (#1192)

docs/reader.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,28 @@ Their meanings match the equivalent escape sequences supported in `Python string
155155

156156
:ref:`strings_and_byte_strings`
157157

158+
.. _reader_f_strings:
159+
160+
f-strings
161+
^^^^^^^^^
162+
163+
::
164+
165+
basilisp.user=> #f ""
166+
""
167+
basilisp.user=> (let [a 1] #f "this is a string with {(inc a)}")
168+
"this is a string with 2"
169+
basilisp.user=> (let [a 1] #f "this is a string with \{(inc a)}")
170+
"this is a string with {(inc a)}"
171+
172+
f-strings are denoted as a series of characters enclosed by ``"`` quotation marks and preceded by a ``#f``.
173+
Expressions may be interpolated in the string enclosed in ``{}``.
174+
Each interpolation must contain exactly 1 expression and may be surrounded by optional whitespace characters which will not be included in the final string.
175+
Any valid expression may appear in a string interpolation, including another string.
176+
To include a literal opening ``{`` character, it must be escaped as ``\{``.
177+
178+
f-strings are otherwise identical to standard :ref:`string literals <reader_strings>`.
179+
158180
.. _reader_byte_strings:
159181

160182
Byte Strings

src/basilisp/lang/reader.py

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
_HASH_SET = sym.symbol("hash-set", ns="basilisp.core")
106106
_LIST = sym.symbol("list", ns="basilisp.core")
107107
_SEQ = sym.symbol("seq", ns="basilisp.core")
108+
_STR = sym.symbol("str", ns="basilisp.core")
108109
_UNQUOTE = sym.symbol("unquote", ns="basilisp.core")
109110
_UNQUOTE_SPLICING = sym.symbol("unquote-splicing", ns="basilisp.core")
110111
_VECTOR = sym.symbol("vector", ns="basilisp.core")
@@ -599,6 +600,14 @@ def with_lineno_and_col(ctx, **kwargs):
599600
return cast(W, with_lineno_and_col)
600601

601602

603+
def _consume_whitespace(ctx: ReaderContext) -> str:
604+
reader = ctx.reader
605+
char = reader.peek()
606+
while whitespace_chars.match(char):
607+
char = reader.next_char()
608+
return char
609+
610+
602611
def _read_namespaced(
603612
ctx: ReaderContext, allowed_suffix: Optional[str] = None
604613
) -> tuple[Optional[str], str]:
@@ -831,9 +840,7 @@ def _read_namespaced_map(ctx: ReaderContext) -> lmap.PersistentMap:
831840
"be specified as keywords without namespaces"
832841
)
833842

834-
char = ctx.reader.peek()
835-
while whitespace_chars.match(char):
836-
char = ctx.reader.next_char()
843+
_consume_whitespace(ctx)
837844

838845
return _read_map(ctx, namespace=map_ns)
839846

@@ -966,6 +973,49 @@ def _read_str(ctx: ReaderContext, allow_arbitrary_escapes: bool = False) -> str:
966973
s.append(char)
967974

968975

976+
def _read_fstr(ctx: ReaderContext) -> Union[str, llist.PersistentList]:
977+
"""Return a UTF-8 encoded string from the input stream."""
978+
elems: list[LispReaderForm] = []
979+
s: list[str] = []
980+
reader = ctx.reader
981+
982+
_consume_whitespace(ctx)
983+
984+
while True:
985+
char = reader.next_char()
986+
if char == "":
987+
raise ctx.eof_error("Unexpected EOF in string")
988+
if char == "\\":
989+
char = reader.next_char()
990+
escape_char = _STR_ESCAPE_CHARS.get(char, None)
991+
if escape_char:
992+
s.append(escape_char)
993+
continue
994+
if char == "{":
995+
s.append(char)
996+
continue
997+
raise ctx.syntax_error(f"Unknown escape sequence: \\{char}")
998+
if char == '"':
999+
reader.next_char()
1000+
elems.append("".join(s))
1001+
if all(isinstance(elem, str) for elem in elems):
1002+
return "".join(cast(list[str], elems))
1003+
else:
1004+
return llist.list([_STR, *elems])
1005+
if char == "{":
1006+
reader.next_char()
1007+
elems.append("".join(s))
1008+
s = []
1009+
expr = _read_next(ctx)
1010+
elems.append(expr)
1011+
char = _consume_whitespace(ctx)
1012+
if char != "}":
1013+
raise ctx.syntax_error("Expected single expression in f-string")
1014+
continue
1015+
1016+
s.append(char)
1017+
1018+
9691019
_BYTES_ESCAPE_CHARS = {
9701020
'"': b'"',
9711021
"\\": b"\\",
@@ -1000,9 +1050,7 @@ def _read_byte_str(ctx: ReaderContext) -> bytes:
10001050
"""
10011051
reader = ctx.reader
10021052

1003-
char = reader.peek()
1004-
while whitespace_chars.match(char):
1005-
char = reader.next_char()
1053+
char = _consume_whitespace(ctx)
10061054

10071055
if char != '"':
10081056
raise ctx.syntax_error(f"Expected '\"'; got '{char}' instead")
@@ -1681,8 +1729,11 @@ def _read_reader_macro(ctx: ReaderContext) -> LispReaderForm:
16811729
elif ns_name_chars.match(char):
16821730
s = _read_sym(ctx, is_reader_macro_sym=True)
16831731
assert isinstance(s, sym.Symbol)
1684-
if s.ns is None and s.name == "b":
1685-
return _read_byte_str(ctx)
1732+
if s.ns is None:
1733+
if s.name == "b":
1734+
return _read_byte_str(ctx)
1735+
elif s.name == "f":
1736+
return _read_fstr(ctx)
16861737

16871738
v = _read_next_consuming_comment(ctx)
16881739

@@ -1724,10 +1775,7 @@ def _read_next_consuming_comment(ctx: ReaderContext) -> RawReaderForm:
17241775

17251776
def _read_next_consuming_whitespace(ctx: ReaderContext) -> LispReaderForm:
17261777
"""Read the next full form from the input stream, consuming any whitespace."""
1727-
reader = ctx.reader
1728-
char = reader.peek()
1729-
while whitespace_chars.match(char):
1730-
char = reader.next_char()
1778+
_consume_whitespace(ctx)
17311779
return _read_next(ctx)
17321780

17331781

tests/basilisp/reader_test.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,91 @@ def test_missing_terminating_quote(self):
746746
read_str_first('"Start of a string')
747747

748748

749+
class TestFormatString:
750+
def test_must_include_quote(self):
751+
with pytest.raises(reader.SyntaxError):
752+
read_str_first(r"#f []")
753+
754+
@pytest.mark.parametrize(
755+
"v,raw",
756+
[
757+
("", '#f ""'),
758+
('"', r'#f "\""'),
759+
("\\", r'#f "\\"'),
760+
("\a", r'#f "\a"'),
761+
("\b", r'#f "\b"'),
762+
("\f", r'#f "\f"'),
763+
("\n", r'#f "\n"'),
764+
("\r", r'#f "\r"'),
765+
("\t", r'#f "\t"'),
766+
("\v", r'#f "\v"'),
767+
("Hello,\nmy name is\tChris.", r'#f "Hello,\nmy name is\tChris."'),
768+
("Regular string", '#f "Regular string"'),
769+
("String with 'inner string'", "#f \"String with 'inner string'\""),
770+
('String with "inner string"', r'#f "String with \"inner string\""'),
771+
],
772+
)
773+
def test_legal_string_is_legal_fstring(self, v: str, raw: str):
774+
assert v == read_str_first(raw)
775+
776+
@pytest.mark.parametrize(
777+
"v,raw",
778+
[
779+
(
780+
llist.l(
781+
reader._STR, "[", kw.keyword("whitespace", ns="surrounded.by"), "]"
782+
),
783+
'#f "[{ :surrounded.by/whitespace }]""',
784+
),
785+
(llist.l(reader._STR, "[", None, "]"), '#f "[{nil}]""'),
786+
(llist.l(reader._STR, "[", True, "]"), '#f "[{true}]""'),
787+
(llist.l(reader._STR, "[", False, "]"), '#f "[{false}]""'),
788+
(llist.l(reader._STR, "[", 0, "]"), '#f "[{0}]""'),
789+
(llist.l(reader._STR, "[", 0.1, "]"), '#f "[{0.1}]""'),
790+
(llist.l(reader._STR, "[", kw.keyword("a"), "]"), '#f "[{:a}]""'),
791+
(llist.l(reader._STR, "[", sym.symbol("sym"), "]"), '#f "[{sym}]""'),
792+
(
793+
llist.l(
794+
reader._STR, "[", llist.l(reader._QUOTE, sym.symbol("sym")), "]"
795+
),
796+
'#f "[{\'sym}]""',
797+
),
798+
(llist.l(reader._STR, "[", vec.EMPTY, "]"), '#f "[{[]}]""'),
799+
(llist.l(reader._STR, "[", vec.v("string"), "]"), '#f "[{["string"]}]""'),
800+
(llist.l(reader._STR, "[", llist.EMPTY, "]"), '#f "[{()}]""'),
801+
(llist.l(reader._STR, "[", llist.l("string"), "]"), '#f "[{("string")}]""'),
802+
(llist.l(reader._STR, "[", lset.EMPTY, "]"), '#f "[{#{}}]""'),
803+
(llist.l(reader._STR, "[", lset.s("string"), "]"), '#f "[{#{"string"}}]""'),
804+
(llist.l(reader._STR, "[", lmap.EMPTY, "]"), '#f "[{{}}]""'),
805+
(
806+
llist.l(reader._STR, "[", lmap.map({kw.keyword("a"): "string"}), "]"),
807+
'#f "[{{:a "string"}}]""',
808+
),
809+
("{}", r'#f "\{}""'),
810+
("{(inc 1)}", r'#f "\{(inc 1)}""'),
811+
("[inner]", '#f "[{"inner"}]""'),
812+
],
813+
)
814+
def test_legal_fstring(self, v: str, raw: str):
815+
assert v == read_str_first(raw)
816+
817+
def test_only_one_expr_allowed(self):
818+
with pytest.raises(reader.SyntaxError):
819+
read_str_first(r'#f "one {(+ 1 2) :a} three"')
820+
821+
def test_invalid_escape(self):
822+
with pytest.raises(reader.SyntaxError):
823+
read_str_first(r'#f "\q"')
824+
825+
def test_missing_expression(self):
826+
with pytest.raises(reader.SyntaxError):
827+
read_str_first('#f "some val {} with no expr"')
828+
829+
def test_missing_terminating_quote(self):
830+
with pytest.raises(reader.SyntaxError):
831+
read_str_first('#f "Start of a format string')
832+
833+
749834
class TestByteString:
750835
def test_must_include_quote(self):
751836
with pytest.raises(reader.SyntaxError):

0 commit comments

Comments
 (0)