|
| 1 | +-- Copyright (c) 2015 Joomy Korkut |
| 2 | +-- Forked from https://github.com/joom/fuzzy/commit/eecbdd04e86c48c964544dbede2665f72fe1f923 |
| 3 | +-- temporarily for https://github.com/joom/fuzzy/pull/3 |
| 4 | + |
| 5 | +{-# LANGUAGE FlexibleContexts #-} |
| 6 | + |
| 7 | + |
| 8 | +-- | Fuzzy string search in Haskell. |
| 9 | +-- Uses 'TextualMonoid' to be able to run on different types of strings. |
| 10 | +module Text.Fuzzy where |
| 11 | + |
| 12 | +import Prelude hiding (filter) |
| 13 | +import qualified Prelude as P |
| 14 | + |
| 15 | +import Data.Char (toLower) |
| 16 | +import Data.List (sortOn) |
| 17 | +import Data.Maybe (isJust, mapMaybe) |
| 18 | +import Data.Monoid (mempty, (<>)) |
| 19 | +import Data.Ord |
| 20 | +import Data.String |
| 21 | +import Data.Text (Text) |
| 22 | + |
| 23 | +import qualified Data.Monoid.Textual as T |
| 24 | + |
| 25 | +-- | Included in the return type of @'match'@ and @'filter'@. |
| 26 | +-- Contains the original value given, the rendered string |
| 27 | +-- and the matching score. |
| 28 | +data (T.TextualMonoid s) => Fuzzy t s = |
| 29 | + Fuzzy { original :: t |
| 30 | + , rendered :: s |
| 31 | + , score :: Int |
| 32 | + } deriving (Show, Eq) |
| 33 | + |
| 34 | +-- | Returns the rendered output and the |
| 35 | +-- matching score for a pattern and a text. |
| 36 | +-- Two examples are given below: |
| 37 | +-- |
| 38 | +-- >>> match "fnt" "infinite" "" "" id True |
| 39 | +-- Just ("infinite",3) |
| 40 | +-- |
| 41 | +-- >>> match "hsk" ("Haskell",1995) "<" ">" fst False |
| 42 | +-- Just ("<h>a<s><k>ell",5) |
| 43 | +-- |
| 44 | +match :: (T.TextualMonoid s) |
| 45 | + => s -- ^ Pattern. |
| 46 | + -> t -- ^ The value containing the text to search in. |
| 47 | + -> s -- ^ The text to add before each match. |
| 48 | + -> s -- ^ The text to add after each match. |
| 49 | + -> (t -> s) -- ^ The function to extract the text from the container. |
| 50 | + -> Bool -- ^ Case sensitivity. |
| 51 | + -> Maybe (Fuzzy t s) -- ^ The original value, rendered string and score. |
| 52 | +match pattern t pre post extract caseSensitive = |
| 53 | + if null pat then Just (Fuzzy t result totalScore) else Nothing |
| 54 | + where |
| 55 | + null :: (T.TextualMonoid s) => s -> Bool |
| 56 | + null = not . T.any (const True) |
| 57 | + |
| 58 | + s = extract t |
| 59 | + (s', pattern') = let f = T.map toLower in |
| 60 | + if caseSensitive then (s, pattern) else (f s, f pattern) |
| 61 | + |
| 62 | + (totalScore, currScore, result, pat) = |
| 63 | + T.foldl' |
| 64 | + undefined |
| 65 | + (\(tot, cur, res, pat) c -> |
| 66 | + case T.splitCharacterPrefix pat of |
| 67 | + Nothing -> (tot, 0, res <> T.singleton c, pat) |
| 68 | + Just (x, xs) -> |
| 69 | + if x == c then |
| 70 | + let cur' = cur * 2 + 1 in |
| 71 | + (tot + cur', cur', res <> pre <> T.singleton c <> post, xs) |
| 72 | + else (tot, 0, res <> T.singleton c, pat) |
| 73 | + ) (0, 0, mempty, pattern') s' |
| 74 | + |
| 75 | +-- | The function to filter a list of values by fuzzy search on the text extracted from them. |
| 76 | +-- |
| 77 | +-- >>> filter "ML" [("Standard ML", 1990),("OCaml",1996),("Scala",2003)] "<" ">" fst False |
| 78 | +-- [Fuzzy {original = ("Standard ML",1990), rendered = "standard <m><l>", score = 4},Fuzzy {original = ("OCaml",1996), rendered = "oca<m><l>", score = 4}] |
| 79 | +filter :: (T.TextualMonoid s) |
| 80 | + => s -- ^ Pattern. |
| 81 | + -> [t] -- ^ The list of values containing the text to search in. |
| 82 | + -> s -- ^ The text to add before each match. |
| 83 | + -> s -- ^ The text to add after each match. |
| 84 | + -> (t -> s) -- ^ The function to extract the text from the container. |
| 85 | + -> Bool -- ^ Case sensitivity. |
| 86 | + -> [Fuzzy t s] -- ^ The list of results, sorted, highest score first. |
| 87 | +filter pattern ts pre post extract caseSen = |
| 88 | + sortOn (Down . score) |
| 89 | + (mapMaybe (\t -> match pattern t pre post extract caseSen) ts) |
| 90 | + |
| 91 | +-- | Return all elements of the list that have a fuzzy |
| 92 | +-- match against the pattern. Runs with default settings where |
| 93 | +-- nothing is added around the matches, as case insensitive. |
| 94 | +-- |
| 95 | +-- >>> simpleFilter "vm" ["vim", "emacs", "virtual machine"] |
| 96 | +-- ["vim","virtual machine"] |
| 97 | +simpleFilter :: (T.TextualMonoid s) |
| 98 | + => s -- ^ Pattern to look for. |
| 99 | + -> [s] -- ^ List of texts to check. |
| 100 | + -> [s] -- ^ The ones that match. |
| 101 | +simpleFilter pattern xs = |
| 102 | + map original $ filter pattern xs mempty mempty id False |
| 103 | + |
| 104 | +-- | Returns false if the pattern and the text do not match at all. |
| 105 | +-- Returns true otherwise. |
| 106 | +-- |
| 107 | +-- >>> test "brd" "bread" |
| 108 | +-- True |
| 109 | +test :: (T.TextualMonoid s) |
| 110 | + => s -> s -> Bool |
| 111 | +test p s = isJust (match p s mempty mempty id False) |
| 112 | + |
| 113 | + |
| 114 | +{-# INLINABLE match #-} |
| 115 | +{-# INLINABLE filter #-} |
| 116 | +{-# INLINABLE simpleFilter #-} |
0 commit comments