Skip to content

Commit 9395744

Browse files
authored
feat: ranked fuzzy match search of workspace symbols (#212)
This is an implementation of a sequential fuzzy string matching algorithm, similar to those used in code editors like Sublime Text. It is based on Forrest Smith's work on https://github.com/forrestthewoods/lib_fts/ and his blog post https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
1 parent d91bd01 commit 9395744

File tree

2 files changed

+201
-63
lines changed

2 files changed

+201
-63
lines changed

lib/next_ls.ex

+126-42
Original file line numberDiff line numberDiff line change
@@ -235,14 +235,7 @@ defmodule NextLS do
235235
end
236236

237237
def handle_request(%WorkspaceSymbol{params: %{query: query}}, lsp) do
238-
filter = fn sym ->
239-
if query == "" do
240-
true
241-
else
242-
# TODO: sqlite has a regexp feature, this can be done in sql most likely
243-
to_string(sym) =~ query
244-
end
245-
end
238+
case_sensitive? = String.downcase(query) != query
246239

247240
symbols = fn pid ->
248241
rows =
@@ -270,32 +263,35 @@ defmodule NextLS do
270263

271264
symbols =
272265
dispatch(lsp.assigns.registry, :databases, fn entries ->
273-
for {pid, _} <- entries, symbol <- symbols.(pid), filter.(symbol.name) do
274-
name =
275-
if symbol.type != "defstruct" do
276-
"#{symbol.type} #{symbol.name}"
277-
else
278-
"#{symbol.name}"
279-
end
266+
filtered_symbols =
267+
for {pid, _} <- entries, symbol <- symbols.(pid), score = fuzzy_match(symbol.name, query, case_sensitive?) do
268+
name =
269+
if symbol.type != "defstruct" do
270+
"#{symbol.type} #{symbol.name}"
271+
else
272+
"#{symbol.name}"
273+
end
274+
275+
{%SymbolInformation{
276+
name: name,
277+
kind: elixir_kind_to_lsp_kind(symbol.type),
278+
location: %Location{
279+
uri: "file://#{symbol.file}",
280+
range: %Range{
281+
start: %Position{
282+
line: symbol.line - 1,
283+
character: symbol.column - 1
284+
},
285+
end: %Position{
286+
line: symbol.line - 1,
287+
character: symbol.column - 1
288+
}
289+
}
290+
}
291+
}, score}
292+
end
280293

281-
%SymbolInformation{
282-
name: name,
283-
kind: elixir_kind_to_lsp_kind(symbol.type),
284-
location: %Location{
285-
uri: "file://#{symbol.file}",
286-
range: %Range{
287-
start: %Position{
288-
line: symbol.line - 1,
289-
character: symbol.column - 1
290-
},
291-
end: %Position{
292-
line: symbol.line - 1,
293-
character: symbol.column - 1
294-
}
295-
}
296-
}
297-
}
298-
end
294+
filtered_symbols |> List.keysort(1, :desc) |> Enum.map(&elem(&1, 0))
299295
end)
300296

301297
{:reply, symbols, lsp}
@@ -706,15 +702,14 @@ defmodule NextLS do
706702
end
707703

708704
defp symbol_info(file, line, col, database) do
709-
definition_query =
710-
~Q"""
711-
SELECT module, type, name
712-
FROM "symbols" sym
713-
WHERE sym.file = ?
714-
AND sym.line = ?
715-
ORDER BY sym.id ASC
716-
LIMIT 1
717-
"""
705+
definition_query = ~Q"""
706+
SELECT module, type, name
707+
FROM "symbols" sym
708+
WHERE sym.file = ?
709+
AND sym.line = ?
710+
ORDER BY sym.id ASC
711+
LIMIT 1
712+
"""
718713

719714
reference_query = ~Q"""
720715
SELECT identifier, type, module
@@ -757,4 +752,93 @@ defmodule NextLS do
757752
end
758753

759754
defp clamp(line), do: max(line, 0)
755+
756+
# This is an implementation of a sequential fuzzy string matching algorithm,
757+
# similar to those used in code editors like Sublime Text.
758+
# It is based on Forrest Smith's work on https://github.com/forrestthewoods/lib_fts/)
759+
# and his blog post https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
760+
#
761+
# Function checks if letters from the query present in the source in correct order.
762+
# It calculates match score only for matching sources.
763+
764+
defp fuzzy_match(_source, "", _case_sensitive), do: 1
765+
766+
defp fuzzy_match(source, query, case_sensitive) do
767+
source_converted = if case_sensitive, do: source, else: String.downcase(source)
768+
source_letters = String.codepoints(source_converted)
769+
query_letters = String.codepoints(query)
770+
771+
if do_fuzzy_match?(source_letters, query_letters) do
772+
source_anycase = String.codepoints(source)
773+
source_downcase = query |> String.downcase() |> String.codepoints()
774+
775+
calc_match_score(source_anycase, source_downcase, %{leading: true, separator: true}, 0)
776+
else
777+
false
778+
end
779+
end
780+
781+
defp do_fuzzy_match?(_source_letters, []), do: true
782+
783+
defp do_fuzzy_match?(source_letters, [query_head | query_rest]) do
784+
case match_letter(source_letters, query_head) do
785+
:no_match -> false
786+
rest_source_letters -> do_fuzzy_match?(rest_source_letters, query_rest)
787+
end
788+
end
789+
790+
defp match_letter([], _query_letter), do: :no_match
791+
792+
defp match_letter([source_letter | source_rest], query_letter) when query_letter == source_letter, do: source_rest
793+
794+
defp match_letter([_ | source_rest], query_letter), do: match_letter(source_rest, query_letter)
795+
796+
defp calc_match_score(_source_letters, [], _traits, score), do: score
797+
798+
defp calc_match_score(source_letters, [query_letter | query_rest], traits, score) do
799+
{rest_source_letters, new_traits, new_score} = calc_letter_score(source_letters, query_letter, traits, score)
800+
801+
calc_match_score(rest_source_letters, query_rest, new_traits, new_score)
802+
end
803+
804+
defp calc_letter_score([source_letter | source_rest], query_letter, traits, score) do
805+
separator? = source_letter in ["_", ".", "-", "/", " "]
806+
source_letter_downcase = String.downcase(source_letter)
807+
upper? = source_letter_downcase != source_letter
808+
809+
if query_letter == source_letter_downcase do
810+
new_traits = %{matched: true, leading: false, separator: separator?, upper: upper?}
811+
new_score = calc_matched_bonus(score, traits, new_traits)
812+
813+
{source_rest, new_traits, new_score}
814+
else
815+
new_traits = %{
816+
matched: false,
817+
separator: separator?,
818+
upper: upper?,
819+
leading: traits.leading
820+
}
821+
822+
new_score = calc_unmatched_penalty(score, traits)
823+
824+
calc_letter_score(source_rest, query_letter, new_traits, new_score)
825+
end
826+
end
827+
828+
# bonus if match occurs after a separator or on the first letter
829+
defp calc_matched_bonus(score, %{separator: true}, _new_traits), do: score + 30
830+
831+
# bonus if match is uppercase and previous is lowercase
832+
defp calc_matched_bonus(score, %{upper: false}, %{upper: true}), do: score + 30
833+
834+
# bonus for adjacent matches
835+
defp calc_matched_bonus(score, %{matched: true}, _new_traits), do: score + 15
836+
837+
defp calc_matched_bonus(score, _traits, _new_traits), do: score
838+
839+
# penalty applied for every letter in str before the first match
840+
defp calc_unmatched_penalty(score, %{leading: true}) when score > -15, do: score - 5
841+
842+
# penalty for unmatched letter
843+
defp calc_unmatched_penalty(score, _traits), do: score - 1
760844
end

test/next_ls_test.exs

+75-21
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,40 @@ defmodule NextLSTest do
334334
id: 2,
335335
jsonrpc: "2.0",
336336
params: %{
337-
query: "fo"
337+
query: "Project"
338+
}
339+
}
340+
341+
assert_result 2, symbols
342+
343+
assert [
344+
%{
345+
"kind" => 2,
346+
"location" => %{
347+
"range" => %{
348+
"end" => %{"character" => 0, "line" => 0},
349+
"start" => %{"character" => 0, "line" => 0}
350+
},
351+
"uri" => "file://#{cwd}/my_proj/lib/project.ex"
352+
},
353+
"name" => "defmodule Project"
354+
}
355+
] == symbols
356+
end
357+
358+
test "workspace symbols with query fuzzy search", %{client: client, cwd: cwd} = context do
359+
assert :ok == notify(client, %{method: "initialized", jsonrpc: "2.0", params: %{}})
360+
assert_request(client, "client/registerCapability", fn _params -> nil end)
361+
362+
assert_is_ready(context, "my_proj")
363+
assert_notification "$/progress", %{"value" => %{"kind" => "end", "message" => "Finished indexing!"}}
364+
365+
request client, %{
366+
method: "workspace/symbol",
367+
id: 2,
368+
jsonrpc: "2.0",
369+
params: %{
370+
query: "heo"
338371
}
339372
}
340373

@@ -345,35 +378,56 @@ defmodule NextLSTest do
345378
"kind" => 12,
346379
"location" => %{
347380
"range" => %{
348-
"start" => %{
349-
"line" => 4,
350-
"character" => 0
351-
},
352-
"end" => %{
353-
"line" => 4,
354-
"character" => 0
355-
}
381+
"end" => %{"character" => 0, "line" => 1},
382+
"start" => %{"character" => 0, "line" => 1}
383+
},
384+
"uri" => "file://#{cwd}/my_proj/lib/project.ex"
385+
},
386+
"name" => "def hello"
387+
}
388+
] == symbols
389+
end
390+
391+
test "workspace symbols with query case sensitive fuzzy search", %{client: client, cwd: cwd} = context do
392+
assert :ok == notify(client, %{method: "initialized", jsonrpc: "2.0", params: %{}})
393+
assert_request(client, "client/registerCapability", fn _params -> nil end)
394+
395+
assert_is_ready(context, "my_proj")
396+
assert_notification "$/progress", %{"value" => %{"kind" => "end", "message" => "Finished indexing!"}}
397+
398+
request client, %{
399+
method: "workspace/symbol",
400+
id: 2,
401+
jsonrpc: "2.0",
402+
params: %{
403+
query: "Ct"
404+
}
405+
}
406+
407+
assert_result 2, symbols
408+
409+
assert [
410+
%{
411+
"kind" => 2,
412+
"location" => %{
413+
"range" => %{
414+
"end" => %{"character" => 0, "line" => 3},
415+
"start" => %{"character" => 0, "line" => 3}
356416
},
357417
"uri" => "file://#{cwd}/my_proj/lib/code_action.ex"
358418
},
359-
"name" => "def foo"
419+
"name" => "defmodule Foo.CodeAction.NestedMod"
360420
},
361421
%{
362-
"kind" => 12,
422+
"kind" => 2,
363423
"location" => %{
364424
"range" => %{
365-
"start" => %{
366-
"line" => 3,
367-
"character" => 0
368-
},
369-
"end" => %{
370-
"line" => 3,
371-
"character" => 0
372-
}
425+
"end" => %{"character" => 0, "line" => 0},
426+
"start" => %{"character" => 0, "line" => 0}
373427
},
374-
"uri" => "file://#{cwd}/my_proj/lib/bar.ex"
428+
"uri" => "file://#{cwd}/my_proj/lib/code_action.ex"
375429
},
376-
"name" => "def foo"
430+
"name" => "defmodule Foo.CodeAction"
377431
}
378432
] == symbols
379433
end

0 commit comments

Comments
 (0)