From 8caea859be9eb17822b5381c41355af7d3cc14df Mon Sep 17 00:00:00 2001 From: sabiwara Date: Fri, 23 Feb 2024 10:27:43 +0900 Subject: [PATCH 1/4] Fix charlist formatting issue on '\"' --- lib/elixir/lib/code/formatter.ex | 13 +++++++++++-- .../test/elixir/code_formatter/literals_test.exs | 4 ++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/lib/elixir/lib/code/formatter.ex b/lib/elixir/lib/code/formatter.ex index 1d4aa84f66d..0ec3a9753f2 100644 --- a/lib/elixir/lib/code/formatter.ex +++ b/lib/elixir/lib/code/formatter.ex @@ -375,7 +375,7 @@ defmodule Code.Formatter do ~s['] -> {opener, quotes} = get_charlist_quotes(false, state) - string = list |> List.to_string() |> escape_string(quotes) + string = list |> List.to_string() |> escape_charlist_string(quotes) {opener |> concat(string) |> concat(quotes), state} _other -> @@ -1306,7 +1306,7 @@ defmodule Code.Formatter do defp list_interpolation_to_algebra([entry | entries], escape, state, acc, last) when is_binary(entry) do - acc = concat(acc, escape_string(entry, escape)) + acc = concat(acc, escape_charlist_string(entry, escape)) list_interpolation_to_algebra(entries, escape, state, acc, last) end @@ -1658,6 +1658,15 @@ defmodule Code.Formatter do heredoc_to_algebra(["" | String.split(string, "\n")]) end + defp escape_charlist_string(string, escape = @double_quote) do + # prevent double escapes, e.g. '\"' -> ~c"\"" + string + |> String.replace("\\" <> escape, escape) + |> escape_string(escape) + end + + defp escape_charlist_string(string, escape), do: escape_string(string, escape) + defp escape_string(string, <<_, _, _>> = escape) do string = String.replace(string, escape, "\\" <> escape) heredoc_to_algebra(String.split(string, "\n")) diff --git a/lib/elixir/test/elixir/code_formatter/literals_test.exs b/lib/elixir/test/elixir/code_formatter/literals_test.exs index ec614b6949e..e8a7e24d088 100644 --- a/lib/elixir/test/elixir/code_formatter/literals_test.exs +++ b/lib/elixir/test/elixir/code_formatter/literals_test.exs @@ -211,9 +211,11 @@ defmodule Code.Formatter.LiteralsTest do assert_format ~S['f\a\b\ro'], ~S[~c"f\a\b\ro"] assert_format ~S['single \' quote'], ~S[~c"single ' quote"] assert_format ~S['double " quote'], ~S[~c"double \" quote"] + assert_format ~S['escaped \" quote'], ~S[~c"escaped \" quote"] assert_same ~S['f\a\b\ro'], @keep_charlists assert_same ~S['single \' quote'], @keep_charlists + assert_same ~S['escaped \" quote'], @keep_charlists end test "keeps literal new lines" do @@ -235,8 +237,10 @@ defmodule Code.Formatter.LiteralsTest do test "with interpolation" do assert_format ~S['one #{2} three'], ~S[~c"one #{2} three"] + assert_format ~S['#{1}\n \\ " \"'], ~S[~c"#{1}\n \\ \" \""] assert_same ~S['one #{2} three'], @keep_charlists + assert_same ~S['#{1}\n \\ " \"'], @keep_charlists end test "with escape and interpolation" do From 7b13759379946d1ab857b10971eb3f9e808403e9 Mon Sep 17 00:00:00 2001 From: sabiwara Date: Fri, 23 Feb 2024 18:55:45 +0900 Subject: [PATCH 2/4] Test for extra edge case --- lib/elixir/test/elixir/code_formatter/literals_test.exs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/elixir/test/elixir/code_formatter/literals_test.exs b/lib/elixir/test/elixir/code_formatter/literals_test.exs index e8a7e24d088..13de5a2cdc9 100644 --- a/lib/elixir/test/elixir/code_formatter/literals_test.exs +++ b/lib/elixir/test/elixir/code_formatter/literals_test.exs @@ -212,10 +212,12 @@ defmodule Code.Formatter.LiteralsTest do assert_format ~S['single \' quote'], ~S[~c"single ' quote"] assert_format ~S['double " quote'], ~S[~c"double \" quote"] assert_format ~S['escaped \" quote'], ~S[~c"escaped \" quote"] + assert_format ~S['\\"'], ~S[~c"\\\""] assert_same ~S['f\a\b\ro'], @keep_charlists assert_same ~S['single \' quote'], @keep_charlists assert_same ~S['escaped \" quote'], @keep_charlists + assert_same ~S['\\"'], @keep_charlists end test "keeps literal new lines" do From a91b626a6749008c736d106886f7cb85200d9735 Mon Sep 17 00:00:00 2001 From: sabiwara Date: Fri, 23 Feb 2024 18:56:07 +0900 Subject: [PATCH 3/4] Alternative implementation fixing new edge cases --- lib/elixir/lib/code/formatter.ex | 44 +++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/lib/elixir/lib/code/formatter.ex b/lib/elixir/lib/code/formatter.ex index 0ec3a9753f2..eb942d1fcf5 100644 --- a/lib/elixir/lib/code/formatter.ex +++ b/lib/elixir/lib/code/formatter.ex @@ -375,7 +375,7 @@ defmodule Code.Formatter do ~s['] -> {opener, quotes} = get_charlist_quotes(false, state) - string = list |> List.to_string() |> escape_charlist_string(quotes) + string = list |> List.to_string() |> escape_string(quotes) {opener |> concat(string) |> concat(quotes), state} _other -> @@ -1306,7 +1306,7 @@ defmodule Code.Formatter do defp list_interpolation_to_algebra([entry | entries], escape, state, acc, last) when is_binary(entry) do - acc = concat(acc, escape_charlist_string(entry, escape)) + acc = concat(acc, escape_string(entry, escape)) list_interpolation_to_algebra(entries, escape, state, acc, last) end @@ -1658,15 +1658,6 @@ defmodule Code.Formatter do heredoc_to_algebra(["" | String.split(string, "\n")]) end - defp escape_charlist_string(string, escape = @double_quote) do - # prevent double escapes, e.g. '\"' -> ~c"\"" - string - |> String.replace("\\" <> escape, escape) - |> escape_string(escape) - end - - defp escape_charlist_string(string, escape), do: escape_string(string, escape) - defp escape_string(string, <<_, _, _>> = escape) do string = String.replace(string, escape, "\\" <> escape) heredoc_to_algebra(String.split(string, "\n")) @@ -1674,13 +1665,36 @@ defmodule Code.Formatter do defp escape_string(string, escape) when is_binary(escape) do string - |> String.replace(escape, "\\" <> escape) - |> String.split("\n") - |> Enum.reverse() - |> Enum.map(&string/1) + |> do_escape_string(escape, [], []) + |> Enum.map(fn reversed_chars -> + Enum.reverse(reversed_chars) |> IO.chardata_to_string() |> string() + end) |> Enum.reduce(&concat(&1, concat(nest(line(), :reset), &2))) end + defp do_escape_string(string, escape, current, acc) do + case string do + <<>> -> + [current | acc] + + "\n" <> rest -> + do_escape_string(rest, escape, [], [current | acc]) + + "\\\\" <> rest -> + do_escape_string(rest, escape, [?\\, ?\\ | current], acc) + + # prevent double escapes, e.g. '\"' -> ~c"\"" + "\\" <> ^escape <> rest -> + do_escape_string(rest, escape, [escape, ?\\ | current], acc) + + ^escape <> rest -> + do_escape_string(rest, escape, [escape, ?\\ | current], acc) + + <> -> + do_escape_string(rest, escape, [char | current], acc) + end + end + defp heredoc_to_algebra([string]) do string(string) end From 5c11f6a22501e52f52388f91d8bd21f2210f37bf Mon Sep 17 00:00:00 2001 From: sabiwara Date: Fri, 23 Feb 2024 19:40:59 +0900 Subject: [PATCH 4/4] Escape as single quotes if double quotes --- lib/elixir/lib/code/formatter.ex | 65 +++++++------------ .../elixir/code_formatter/literals_test.exs | 11 ++-- 2 files changed, 30 insertions(+), 46 deletions(-) diff --git a/lib/elixir/lib/code/formatter.ex b/lib/elixir/lib/code/formatter.ex index eb942d1fcf5..ed2466f3796 100644 --- a/lib/elixir/lib/code/formatter.ex +++ b/lib/elixir/lib/code/formatter.ex @@ -6,7 +6,8 @@ defmodule Code.Formatter do @double_heredoc "\"\"\"" @single_quote "'" @single_heredoc "'''" - @sigil_c "~c\"" + @sigil_c_double "~c\"" + @sigil_c_single "~c'" @sigil_c_heredoc "~c\"\"\"" @newlines 2 @min_line 0 @@ -300,7 +301,7 @@ defmodule Code.Formatter do remote_to_algebra(quoted, context, state) meta[:delimiter] == ~s['''] -> - {opener, quotes} = get_charlist_quotes(true, state) + {opener, quotes} = get_charlist_quotes(:heredoc, state) {doc, state} = entries @@ -310,7 +311,7 @@ defmodule Code.Formatter do {force_unfit(doc), state} true -> - {opener, quotes} = get_charlist_quotes(false, state) + {opener, quotes} = get_charlist_quotes({:regular, entries}, state) list_interpolation_to_algebra(entries, quotes, state, opener, quotes) end end @@ -369,13 +370,14 @@ defmodule Code.Formatter do defp quoted_to_algebra({:__block__, meta, [list]}, _context, state) when is_list(list) do case meta[:delimiter] do ~s['''] -> - {opener, quotes} = get_charlist_quotes(true, state) + {opener, quotes} = get_charlist_quotes(:heredoc, state) string = list |> List.to_string() |> escape_heredoc(quotes) {opener |> concat(string) |> concat(quotes) |> force_unfit(), state} ~s['] -> - {opener, quotes} = get_charlist_quotes(false, state) - string = list |> List.to_string() |> escape_string(quotes) + string = list |> List.to_string() + {opener, quotes} = get_charlist_quotes({:regular, [string]}, state) + string = escape_string(string, quotes) {opener |> concat(string) |> concat(quotes), state} _other -> @@ -1665,36 +1667,13 @@ defmodule Code.Formatter do defp escape_string(string, escape) when is_binary(escape) do string - |> do_escape_string(escape, [], []) - |> Enum.map(fn reversed_chars -> - Enum.reverse(reversed_chars) |> IO.chardata_to_string() |> string() - end) + |> String.replace(escape, "\\" <> escape) + |> String.split("\n") + |> Enum.reverse() + |> Enum.map(&string/1) |> Enum.reduce(&concat(&1, concat(nest(line(), :reset), &2))) end - defp do_escape_string(string, escape, current, acc) do - case string do - <<>> -> - [current | acc] - - "\n" <> rest -> - do_escape_string(rest, escape, [], [current | acc]) - - "\\\\" <> rest -> - do_escape_string(rest, escape, [?\\, ?\\ | current], acc) - - # prevent double escapes, e.g. '\"' -> ~c"\"" - "\\" <> ^escape <> rest -> - do_escape_string(rest, escape, [escape, ?\\ | current], acc) - - ^escape <> rest -> - do_escape_string(rest, escape, [escape, ?\\ | current], acc) - - <> -> - do_escape_string(rest, escape, [char | current], acc) - end - end - defp heredoc_to_algebra([string]) do string(string) end @@ -2445,19 +2424,23 @@ defmodule Code.Formatter do {left, right} end - defp get_charlist_quotes(_heredoc = false, state) do + defp get_charlist_quotes(:heredoc, state) do if state.normalize_charlists_as_sigils do - {@sigil_c, @double_quote} + {@sigil_c_heredoc, @double_heredoc} else - {@single_quote, @single_quote} + {@single_heredoc, @single_heredoc} end end - defp get_charlist_quotes(_heredoc = true, state) do - if state.normalize_charlists_as_sigils do - {@sigil_c_heredoc, @double_heredoc} - else - {@single_heredoc, @single_heredoc} + defp get_charlist_quotes({:regular, chunks}, state) do + cond do + !state.normalize_charlists_as_sigils -> {@single_quote, @single_quote} + Enum.any?(chunks, &has_double_quote?/1) -> {@sigil_c_single, @single_quote} + true -> {@sigil_c_double, @double_quote} end end + + defp has_double_quote?(chunk) do + is_binary(chunk) and chunk =~ @double_quote + end end diff --git a/lib/elixir/test/elixir/code_formatter/literals_test.exs b/lib/elixir/test/elixir/code_formatter/literals_test.exs index 13de5a2cdc9..7f5a7632faf 100644 --- a/lib/elixir/test/elixir/code_formatter/literals_test.exs +++ b/lib/elixir/test/elixir/code_formatter/literals_test.exs @@ -210,12 +210,13 @@ defmodule Code.Formatter.LiteralsTest do test "with escapes" do assert_format ~S['f\a\b\ro'], ~S[~c"f\a\b\ro"] assert_format ~S['single \' quote'], ~S[~c"single ' quote"] - assert_format ~S['double " quote'], ~S[~c"double \" quote"] - assert_format ~S['escaped \" quote'], ~S[~c"escaped \" quote"] - assert_format ~S['\\"'], ~S[~c"\\\""] + assert_format ~S['double " quote'], ~S[~c'double " quote'] + assert_format ~S['escaped \" quote'], ~S[~c'escaped \" quote'] + assert_format ~S['\\"'], ~S[~c'\\"'] assert_same ~S['f\a\b\ro'], @keep_charlists assert_same ~S['single \' quote'], @keep_charlists + assert_same ~S['double " quote'], @keep_charlists assert_same ~S['escaped \" quote'], @keep_charlists assert_same ~S['\\"'], @keep_charlists end @@ -239,7 +240,7 @@ defmodule Code.Formatter.LiteralsTest do test "with interpolation" do assert_format ~S['one #{2} three'], ~S[~c"one #{2} three"] - assert_format ~S['#{1}\n \\ " \"'], ~S[~c"#{1}\n \\ \" \""] + assert_format ~S['#{1}\n \\ " \"'], ~S[~c'#{1}\n \\ " \"'] assert_same ~S['one #{2} three'], @keep_charlists assert_same ~S['#{1}\n \\ " \"'], @keep_charlists @@ -247,7 +248,7 @@ defmodule Code.Formatter.LiteralsTest do test "with escape and interpolation" do assert_format ~S['one\n\'#{2}\'\nthree'], ~S[~c"one\n'#{2}'\nthree"] - assert_format ~S['one\n"#{2}"\nthree'], ~S[~c"one\n\"#{2}\"\nthree"] + assert_format ~S['one\n"#{2}"\nthree'], ~S[~c'one\n"#{2}"\nthree'] assert_same ~S['one\n\'#{2}\'\nthree'], @keep_charlists end