Skip to content

Commit 12c8b49

Browse files
committed
Parallel fuzzy filtering (#2225)
* Revert "Inline Text.Fuzzy to add INLINABLE pragmas (#2215)" This reverts commit 2869077. * Fuzz in parallel * Efficiently with vectors * use mapMaybe for compat. with older versions * switch to stable sort * clean ups
1 parent b89a4f9 commit 12c8b49

File tree

4 files changed

+132
-13
lines changed

4 files changed

+132
-13
lines changed

Diff for: ghcide/ghcide.cabal

+4
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ library
6464
hiedb == 0.4.0.*,
6565
lsp-types >= 1.3.0.1 && < 1.4,
6666
lsp == 1.2.*,
67+
monoid-subclasses,
6768
mtl,
6869
network-uri,
6970
optparse-applicative,
@@ -86,6 +87,7 @@ library
8687
unordered-containers >= 0.2.10.0,
8788
utf8-string,
8889
vector,
90+
vector-algorithms,
8991
hslogger,
9092
Diff ^>=0.4.0,
9193
vector,
@@ -215,6 +217,8 @@ library
215217
Development.IDE.Plugin.Completions.Logic
216218
Development.IDE.Session.VersionCheck
217219
Development.IDE.Types.Action
220+
Text.Fuzzy.Parallel
221+
218222
ghc-options: -Wall -Wno-name-shadowing -Wincomplete-uni-patterns -Wno-unticked-promoted-constructors
219223

220224
if flag(ghc-patched-unboxed-bytecode)

Diff for: ghcide/src/Development/IDE/Plugin/Completions/Logic.hs

+18-11
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import Data.Maybe (fromMaybe, isJust,
2424
listToMaybe,
2525
mapMaybe)
2626
import qualified Data.Text as T
27-
import qualified Text.Fuzzy as Fuzzy
27+
import qualified Text.Fuzzy.Parallel as Fuzzy
2828

2929
import HscTypes
3030
import Name
@@ -42,8 +42,8 @@ import Data.Aeson (ToJSON (toJSON))
4242
import Data.Either (fromRight)
4343
import Data.Functor
4444
import qualified Data.HashMap.Strict as HM
45-
import qualified Data.Set as Set
4645
import qualified Data.HashSet as HashSet
46+
import qualified Data.Set as Set
4747
import Development.IDE.Core.Compile
4848
import Development.IDE.Core.PositionMapping
4949
import Development.IDE.GHC.Compat as GHC
@@ -66,6 +66,10 @@ import qualified Language.LSP.VFS as VFS
6666
import Outputable (Outputable)
6767
import TyCoRep
6868

69+
-- Chunk size used for parallelizing fuzzy matching
70+
chunkSize :: Int
71+
chunkSize = 1000
72+
6973
-- From haskell-ide-engine/hie-plugin-api/Haskell/Ide/Engine/Context.hs
7074

7175
-- | A context of a declaration in the program
@@ -503,14 +507,14 @@ ppr :: Outputable a => a -> T.Text
503507
ppr = T.pack . prettyPrint
504508

505509
toggleSnippets :: ClientCapabilities -> CompletionsConfig -> CompletionItem -> CompletionItem
506-
toggleSnippets ClientCapabilities {_textDocument} (CompletionsConfig with _) =
507-
removeSnippetsWhen (not $ with && supported)
510+
toggleSnippets ClientCapabilities {_textDocument} CompletionsConfig{..} =
511+
removeSnippetsWhen (not $ enableSnippets && supported)
508512
where
509513
supported =
510514
Just True == (_textDocument >>= _completion >>= _completionItem >>= _snippetSupport)
511515

512516
toggleAutoExtend :: CompletionsConfig -> CompItem -> CompItem
513-
toggleAutoExtend (CompletionsConfig _ False) x = x {additionalTextEdits = Nothing}
517+
toggleAutoExtend CompletionsConfig{enableAutoExtend=False} x = x {additionalTextEdits = Nothing}
514518
toggleAutoExtend _ x = x
515519

516520
removeSnippetsWhen :: Bool -> CompletionItem -> CompletionItem
@@ -548,12 +552,14 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu
548552
-}
549553
pos = VFS.cursorPos prefixInfo
550554

555+
maxC = maxCompletions config
556+
551557
filtModNameCompls =
552558
map mkModCompl
553559
$ mapMaybe (T.stripPrefix enteredQual)
554-
$ Fuzzy.simpleFilter fullPrefix allModNamesAsNS
560+
$ Fuzzy.simpleFilter chunkSize fullPrefix allModNamesAsNS
555561

556-
filtCompls = map Fuzzy.original $ Fuzzy.filter prefixText ctxCompls "" "" label False
562+
filtCompls = map Fuzzy.original $ Fuzzy.filter chunkSize prefixText ctxCompls "" "" label False
557563
where
558564

559565
mcc = case maybe_parsed of
@@ -600,7 +606,7 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu
600606

601607
filtListWith f list =
602608
[ f label
603-
| label <- Fuzzy.simpleFilter fullPrefix list
609+
| label <- Fuzzy.simpleFilter chunkSize fullPrefix list
604610
, enteredQual `T.isPrefixOf` label
605611
]
606612

@@ -611,8 +617,8 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu
611617
| otherwise = []
612618

613619
if
614-
-- TODO: handle multiline imports
615-
| "import " `T.isPrefixOf` fullLine
620+
-- TODO: handle multiline imports
621+
| "import " `T.isPrefixOf` fullLine
616622
&& (List.length (words (T.unpack fullLine)) >= 2)
617623
&& "(" `isInfixOf` T.unpack fullLine
618624
-> do
@@ -628,7 +634,8 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu
628634
-> return []
629635
| otherwise -> do
630636
-- assumes that nubOrdBy is stable
631-
let uniqueFiltCompls = nubOrdBy uniqueCompl filtCompls
637+
-- nubOrd is very slow - take 10x the maximum configured
638+
let uniqueFiltCompls = nubOrdBy uniqueCompl $ take (maxC*10) filtCompls
632639
let compls = map (mkCompl plId ideOpts) uniqueFiltCompls
633640
return $ filtModNameCompls
634641
++ filtKeywordCompls

Diff for: ghcide/src/Development/IDE/Plugin/Completions/Types.hs

+5-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ import Data.Text (Text)
1616
import Development.IDE.Spans.Common
1717
import GHC.Generics (Generic)
1818
import Ide.Plugin.Config (Config)
19+
import qualified Ide.Plugin.Config as Config
1920
import Ide.Plugin.Properties
20-
import Ide.PluginUtils (usePropertyLsp)
21+
import Ide.PluginUtils (getClientConfig, usePropertyLsp)
2122
import Ide.Types (PluginId)
2223
import Language.LSP.Server (MonadLsp)
2324
import Language.LSP.Types (CompletionItemKind (..), Uri)
@@ -46,11 +47,13 @@ getCompletionsConfig pId =
4647
CompletionsConfig
4748
<$> usePropertyLsp #snippetsOn pId properties
4849
<*> usePropertyLsp #autoExtendOn pId properties
50+
<*> (Config.maxCompletions <$> getClientConfig)
4951

5052

5153
data CompletionsConfig = CompletionsConfig {
5254
enableSnippets :: Bool,
53-
enableAutoExtend :: Bool
55+
enableAutoExtend :: Bool,
56+
maxCompletions :: Int
5457
}
5558

5659
data ExtendImport = ExtendImport

Diff for: ghcide/src/Text/Fuzzy/Parallel.hs

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
-- | Parallel versions of 'filter' and 'simpleFilter'
2+
module Text.Fuzzy.Parallel
3+
( filter,
4+
simpleFilter,
5+
-- reexports
6+
Fuzzy(..),
7+
match
8+
) where
9+
10+
import Control.Monad.ST (runST)
11+
import Control.Parallel.Strategies (Eval, Strategy, evalTraversable,
12+
parTraversable, rseq, using)
13+
import Data.Function (on)
14+
import Data.Monoid.Textual (TextualMonoid)
15+
import Data.Ord (Down (Down))
16+
import Data.Vector (Vector, (!))
17+
import qualified Data.Vector as V
18+
-- need to use a stable sort
19+
import Data.Bifunctor (second)
20+
import qualified Data.Vector.Algorithms.Tim as VA
21+
import Prelude hiding (filter)
22+
import Text.Fuzzy (Fuzzy (..), match)
23+
24+
-- | The function to filter a list of values by fuzzy search on the text extracted from them.
25+
--
26+
-- >>> length $ filter 1000 200 "ML" (concat $ replicate 10000 [("Standard ML", 1990),("OCaml",1996),("Scala",2003)]) "<" ">" fst False
27+
-- 200
28+
filter :: (TextualMonoid s)
29+
=> Int -- ^ Chunk size. 1000 works well.
30+
-> s -- ^ Pattern.
31+
-> [t] -- ^ The list of values containing the text to search in.
32+
-> s -- ^ The text to add before each match.
33+
-> s -- ^ The text to add after each match.
34+
-> (t -> s) -- ^ The function to extract the text from the container.
35+
-> Bool -- ^ Case sensitivity.
36+
-> [Fuzzy t s] -- ^ The list of results, sorted, highest score first.
37+
filter chunkSize pattern ts pre post extract caseSen = runST $ do
38+
let v = (V.mapMaybe id
39+
(V.map (\t -> match pattern t pre post extract caseSen) (V.fromList ts)
40+
`using`
41+
parVectorChunk chunkSize (evalTraversable forceScore)))
42+
v' <- V.unsafeThaw v
43+
VA.sortBy (compare `on` (Down . score)) v'
44+
v'' <- V.unsafeFreeze v'
45+
return $ V.toList v''
46+
47+
-- | Return all elements of the list that have a fuzzy
48+
-- match against the pattern. Runs with default settings where
49+
-- nothing is added around the matches, as case insensitive.
50+
--
51+
-- >>> simpleFilter "vm" ["vim", "emacs", "virtual machine"]
52+
-- ["vim","virtual machine"]
53+
{-# INLINABLE simpleFilter #-}
54+
simpleFilter :: (TextualMonoid s)
55+
=> Int -- ^ Chunk size. 1000 works well.
56+
-> s -- ^ Pattern to look for.
57+
-> [s] -- ^ List of texts to check.
58+
-> [s] -- ^ The ones that match.
59+
simpleFilter chunk pattern xs =
60+
map original $ filter chunk pattern xs mempty mempty id False
61+
62+
--------------------------------------------------------------------------------
63+
64+
-- | Evaluation that forces the 'score' field
65+
forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s)
66+
forceScore it@Fuzzy{score} = do
67+
score' <- rseq score
68+
return it{score = score'}
69+
70+
--------------------------------------------------------------------------------
71+
72+
-- | Divides a vector in chunks, applies the strategy in parallel to each chunk.
73+
parVectorChunk :: Int -> Strategy a -> Vector a -> Eval (Vector a)
74+
parVectorChunk chunkSize st v =
75+
V.concat <$> parTraversable (evalTraversable st) (chunkVector chunkSize v)
76+
77+
-- >>> chunkVector 3 (V.fromList [0..10])
78+
-- >>> chunkVector 3 (V.fromList [0..11])
79+
-- >>> chunkVector 3 (V.fromList [0..12])
80+
-- [[0,1,2],[3,4,5],[6,7,8],[9,10]]
81+
-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11]]
82+
-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12]]
83+
chunkVector :: Int -> Vector a -> [Vector a]
84+
chunkVector chunkSize v = do
85+
let indices = chunkIndices chunkSize (0,l)
86+
l = V.length v
87+
[V.fromListN (h-l+1) [v ! j | j <- [l .. h]]
88+
| (l,h) <- indices]
89+
90+
-- >>> chunkIndices 3 (0,9)
91+
-- >>> chunkIndices 3 (0,10)
92+
-- >>> chunkIndices 3 (0,11)
93+
-- [(0,2),(3,5),(6,8)]
94+
-- [(0,2),(3,5),(6,8),(9,9)]
95+
-- [(0,2),(3,5),(6,8),(9,10)]
96+
chunkIndices :: Int -> (Int,Int) -> [(Int,Int)]
97+
chunkIndices chunkSize (from,to) =
98+
map (second pred) $
99+
pairwise $
100+
[from, from+chunkSize .. to-1] ++ [to]
101+
102+
pairwise :: [a] -> [(a,a)]
103+
pairwise [] = []
104+
pairwise [_] = []
105+
pairwise (x:y:xs) = (x,y) : pairwise (y:xs)

0 commit comments

Comments
 (0)