Skip to content

Parallel fuzzy filtering #2225

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Sep 22, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ghcide/ghcide.cabal
Original file line number Diff line number Diff line change
@@ -64,6 +64,7 @@ library
hiedb == 0.4.1.*,
lsp-types >= 1.3.0.1 && < 1.4,
lsp == 1.2.*,
monoid-subclasses,
mtl,
network-uri,
optparse-applicative,
@@ -86,6 +87,7 @@ library
unordered-containers >= 0.2.10.0,
utf8-string,
vector,
vector-algorithms,
hslogger,
Diff ^>=0.4.0,
vector,
@@ -208,6 +210,8 @@ library
Development.IDE.Plugin.Completions.Logic
Development.IDE.Session.VersionCheck
Development.IDE.Types.Action
Text.Fuzzy.Parallel

ghc-options: -Wall -Wno-name-shadowing -Wincomplete-uni-patterns -Wno-unticked-promoted-constructors

if flag(ghc-patched-unboxed-bytecode)
23 changes: 15 additions & 8 deletions ghcide/src/Development/IDE/Plugin/Completions/Logic.hs
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@ import Data.Maybe (fromMaybe, isJust,
listToMaybe,
mapMaybe)
import qualified Data.Text as T
import qualified Text.Fuzzy as Fuzzy
import qualified Text.Fuzzy.Parallel as Fuzzy

import Control.Monad
import Data.Aeson (ToJSON (toJSON))
@@ -53,6 +53,10 @@ import Language.LSP.Types
import Language.LSP.Types.Capabilities
import qualified Language.LSP.VFS as VFS

-- Chunk size used for parallelizing fuzzy matching
chunkSize :: Int
chunkSize = 1000

-- From haskell-ide-engine/hie-plugin-api/Haskell/Ide/Engine/Context.hs

-- | A context of a declaration in the program
@@ -490,14 +494,14 @@ ppr :: Outputable a => a -> T.Text
ppr = T.pack . prettyPrint

toggleSnippets :: ClientCapabilities -> CompletionsConfig -> CompletionItem -> CompletionItem
toggleSnippets ClientCapabilities {_textDocument} (CompletionsConfig with _) =
removeSnippetsWhen (not $ with && supported)
toggleSnippets ClientCapabilities {_textDocument} CompletionsConfig{..} =
removeSnippetsWhen (not $ enableSnippets && supported)
where
supported =
Just True == (_textDocument >>= _completion >>= _completionItem >>= _snippetSupport)

toggleAutoExtend :: CompletionsConfig -> CompItem -> CompItem
toggleAutoExtend (CompletionsConfig _ False) x = x {additionalTextEdits = Nothing}
toggleAutoExtend CompletionsConfig{enableAutoExtend=False} x = x {additionalTextEdits = Nothing}
toggleAutoExtend _ x = x

removeSnippetsWhen :: Bool -> CompletionItem -> CompletionItem
@@ -535,12 +539,14 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu
-}
pos = VFS.cursorPos prefixInfo

maxC = maxCompletions config

filtModNameCompls =
map mkModCompl
$ mapMaybe (T.stripPrefix enteredQual)
$ Fuzzy.simpleFilter fullPrefix allModNamesAsNS
$ Fuzzy.simpleFilter chunkSize fullPrefix allModNamesAsNS

filtCompls = map Fuzzy.original $ Fuzzy.filter prefixText ctxCompls "" "" label False
filtCompls = map Fuzzy.original $ Fuzzy.filter chunkSize prefixText ctxCompls "" "" label False
where

mcc = case maybe_parsed of
@@ -587,7 +593,7 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu

filtListWith f list =
[ f label
| label <- Fuzzy.simpleFilter fullPrefix list
| label <- Fuzzy.simpleFilter chunkSize fullPrefix list
, enteredQual `T.isPrefixOf` label
]

@@ -615,7 +621,8 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu
-> return []
| otherwise -> do
-- assumes that nubOrdBy is stable
let uniqueFiltCompls = nubOrdBy uniqueCompl filtCompls
-- nubOrd is very slow - take 10x the maximum configured
let uniqueFiltCompls = nubOrdBy uniqueCompl $ take (maxC*10) filtCompls
let compls = map (mkCompl plId ideOpts) uniqueFiltCompls
return $ filtModNameCompls
++ filtKeywordCompls
9 changes: 6 additions & 3 deletions ghcide/src/Development/IDE/Plugin/Completions/Types.hs
Original file line number Diff line number Diff line change
@@ -12,12 +12,13 @@ import qualified Data.Text as T

import Data.Aeson (FromJSON, ToJSON)
import Data.Text (Text)
import Development.IDE.Spans.Common
import Development.IDE.GHC.Compat
import Development.IDE.Spans.Common
import GHC.Generics (Generic)
import Ide.Plugin.Config (Config)
import qualified Ide.Plugin.Config as Config
import Ide.Plugin.Properties
import Ide.PluginUtils (usePropertyLsp)
import Ide.PluginUtils (getClientConfig, usePropertyLsp)
import Ide.Types (PluginId)
import Language.LSP.Server (MonadLsp)
import Language.LSP.Types (CompletionItemKind (..), Uri)
@@ -46,11 +47,13 @@ getCompletionsConfig pId =
CompletionsConfig
<$> usePropertyLsp #snippetsOn pId properties
<*> usePropertyLsp #autoExtendOn pId properties
<*> (Config.maxCompletions <$> getClientConfig)


data CompletionsConfig = CompletionsConfig {
enableSnippets :: Bool,
enableAutoExtend :: Bool
enableAutoExtend :: Bool,
maxCompletions :: Int
}

data ExtendImport = ExtendImport
105 changes: 105 additions & 0 deletions ghcide/src/Text/Fuzzy/Parallel.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
-- | Parallel versions of 'filter' and 'simpleFilter'
module Text.Fuzzy.Parallel
( filter,
simpleFilter,
-- reexports
Fuzzy(..),
match
) where

import Control.Monad.ST (runST)
import Control.Parallel.Strategies (Eval, Strategy, evalTraversable,
parTraversable, rseq, using)
import Data.Function (on)
import Data.Monoid.Textual (TextualMonoid)
import Data.Ord (Down (Down))
import Data.Vector (Vector, (!))
import qualified Data.Vector as V
-- need to use a stable sort
import Data.Bifunctor (second)
import qualified Data.Vector.Algorithms.Tim as VA
import Prelude hiding (filter)
import Text.Fuzzy (Fuzzy (..), match)

-- | The function to filter a list of values by fuzzy search on the text extracted from them.
--
-- >>> length $ filter 1000 200 "ML" (concat $ replicate 10000 [("Standard ML", 1990),("OCaml",1996),("Scala",2003)]) "<" ">" fst False
-- 200
filter :: (TextualMonoid s)
=> Int -- ^ Chunk size. 1000 works well.
-> s -- ^ Pattern.
-> [t] -- ^ The list of values containing the text to search in.
-> s -- ^ The text to add before each match.
-> s -- ^ The text to add after each match.
-> (t -> s) -- ^ The function to extract the text from the container.
-> Bool -- ^ Case sensitivity.
-> [Fuzzy t s] -- ^ The list of results, sorted, highest score first.
filter chunkSize pattern ts pre post extract caseSen = runST $ do
let v = (V.mapMaybe id
(V.map (\t -> match pattern t pre post extract caseSen) (V.fromList ts)
`using`
parVectorChunk chunkSize (evalTraversable forceScore)))
v' <- V.unsafeThaw v
VA.sortBy (compare `on` (Down . score)) v'
v'' <- V.unsafeFreeze v'
return $ V.toList v''

-- | Return all elements of the list that have a fuzzy
-- match against the pattern. Runs with default settings where
-- nothing is added around the matches, as case insensitive.
--
-- >>> simpleFilter "vm" ["vim", "emacs", "virtual machine"]
-- ["vim","virtual machine"]
{-# INLINABLE simpleFilter #-}
simpleFilter :: (TextualMonoid s)
=> Int -- ^ Chunk size. 1000 works well.
-> s -- ^ Pattern to look for.
-> [s] -- ^ List of texts to check.
-> [s] -- ^ The ones that match.
simpleFilter chunk pattern xs =
map original $ filter chunk pattern xs mempty mempty id False

--------------------------------------------------------------------------------

-- | Evaluation that forces the 'score' field
forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s)
forceScore it@Fuzzy{score} = do
score' <- rseq score
return it{score = score'}

--------------------------------------------------------------------------------

-- | Divides a vector in chunks, applies the strategy in parallel to each chunk.
parVectorChunk :: Int -> Strategy a -> Vector a -> Eval (Vector a)
parVectorChunk chunkSize st v =
V.concat <$> parTraversable (evalTraversable st) (chunkVector chunkSize v)

-- >>> chunkVector 3 (V.fromList [0..10])
-- >>> chunkVector 3 (V.fromList [0..11])
-- >>> chunkVector 3 (V.fromList [0..12])
-- [[0,1,2],[3,4,5],[6,7,8],[9,10]]
-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11]]
-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12]]
chunkVector :: Int -> Vector a -> [Vector a]
chunkVector chunkSize v = do
let indices = chunkIndices chunkSize (0,l)
l = V.length v
[V.fromListN (h-l+1) [v ! j | j <- [l .. h]]
| (l,h) <- indices]

-- >>> chunkIndices 3 (0,9)
-- >>> chunkIndices 3 (0,10)
-- >>> chunkIndices 3 (0,11)
-- [(0,2),(3,5),(6,8)]
-- [(0,2),(3,5),(6,8),(9,9)]
-- [(0,2),(3,5),(6,8),(9,10)]
chunkIndices :: Int -> (Int,Int) -> [(Int,Int)]
chunkIndices chunkSize (from,to) =
map (second pred) $
pairwise $
[from, from+chunkSize .. to-1] ++ [to]

pairwise :: [a] -> [(a,a)]
pairwise [] = []
pairwise [_] = []
pairwise (x:y:xs) = (x,y) : pairwise (y:xs)