|
2 | 2 | import json as _json
|
3 | 3 | import sys
|
4 | 4 | import re
|
| 5 | +from functools import reduce |
5 | 6 |
|
6 | 7 | from _plotly_utils.optional_imports import get_module
|
7 | 8 | from _plotly_utils.basevalidators import ImageUriValidator
|
|
10 | 11 | PY36_OR_LATER = sys.version_info >= (3, 6)
|
11 | 12 |
|
12 | 13 |
|
| 14 | +def cumsum(x): |
| 15 | + """ |
| 16 | + Custom cumsum to avoid a numpy import. |
| 17 | + """ |
| 18 | + |
| 19 | + def _reducer(a, x): |
| 20 | + if len(a) == 0: |
| 21 | + return [x] |
| 22 | + return a + [a[-1] + x] |
| 23 | + |
| 24 | + ret = reduce(_reducer, x, []) |
| 25 | + return ret |
| 26 | + |
| 27 | + |
13 | 28 | class PlotlyJSONEncoder(_json.JSONEncoder):
|
14 | 29 | """
|
15 | 30 | Meant to be passed as the `cls` kwarg to json.dumps(obj, cls=..)
|
@@ -256,3 +271,170 @@ def _get_int_type():
|
256 | 271 | else:
|
257 | 272 | int_type = (int,)
|
258 | 273 | return int_type
|
| 274 | + |
| 275 | + |
| 276 | +def split_multichar(ss, chars): |
| 277 | + """ |
| 278 | + Split all the strings in ss at any of the characters in chars. |
| 279 | + Example: |
| 280 | +
|
| 281 | + >>> ss = ["a.string[0].with_separators"] |
| 282 | + >>> chars = list(".[]_") |
| 283 | + >>> split_multichar(ss, chars) |
| 284 | + ['a', 'string', '0', '', 'with', 'separators'] |
| 285 | +
|
| 286 | + :param (list) ss: A list of strings. |
| 287 | + :param (list) chars: Is a list of chars (note: not a string). |
| 288 | + """ |
| 289 | + if len(chars) == 0: |
| 290 | + return ss |
| 291 | + c = chars.pop() |
| 292 | + ss = reduce(lambda x, y: x + y, map(lambda x: x.split(c), ss)) |
| 293 | + return split_multichar(ss, chars) |
| 294 | + |
| 295 | + |
| 296 | +def split_string_positions(ss): |
| 297 | + """ |
| 298 | + Given a list of strings split using split_multichar, return a list of |
| 299 | + integers representing the indices of the first character of every string in |
| 300 | + the original string. |
| 301 | + Example: |
| 302 | +
|
| 303 | + >>> ss = ["a.string[0].with_separators"] |
| 304 | + >>> chars = list(".[]_") |
| 305 | + >>> ss_split = split_multichar(ss, chars) |
| 306 | + >>> ss_split |
| 307 | + ['a', 'string', '0', '', 'with', 'separators'] |
| 308 | + >>> split_string_positions(ss_split) |
| 309 | + [0, 2, 9, 11, 12, 17] |
| 310 | +
|
| 311 | + :param (list) ss: A list of strings. |
| 312 | + """ |
| 313 | + return list( |
| 314 | + map( |
| 315 | + lambda t: t[0] + t[1], |
| 316 | + zip(range(len(ss)), cumsum([0] + list(map(len, ss[:-1])))), |
| 317 | + ) |
| 318 | + ) |
| 319 | + |
| 320 | + |
| 321 | +def display_string_positions(p, i=None, offset=0, length=1, char="^", trim=True): |
| 322 | + """ |
| 323 | + Return a string that is whitespace except at p[i] which is replaced with char. |
| 324 | + If i is None then all the indices of the string in p are replaced with char. |
| 325 | +
|
| 326 | + Example: |
| 327 | +
|
| 328 | + >>> ss = ["a.string[0].with_separators"] |
| 329 | + >>> chars = list(".[]_") |
| 330 | + >>> ss_split = split_multichar(ss, chars) |
| 331 | + >>> ss_split |
| 332 | + ['a', 'string', '0', '', 'with', 'separators'] |
| 333 | + >>> ss_pos = split_string_positions(ss_split) |
| 334 | + >>> ss[0] |
| 335 | + 'a.string[0].with_separators' |
| 336 | + >>> display_string_positions(ss_pos,4) |
| 337 | + ' ^' |
| 338 | + >>> display_string_positions(ss_pos,4,offset=1,length=3,char="~",trim=False) |
| 339 | + ' ~~~ ' |
| 340 | + >>> display_string_positions(ss_pos) |
| 341 | + '^ ^ ^ ^^ ^' |
| 342 | + :param (list) p: A list of integers. |
| 343 | + :param (integer|None) i: Optional index of p to display. |
| 344 | + :param (integer) offset: Allows adding a number of spaces to the replacement. |
| 345 | + :param (integer) length: Allows adding a replacement that is the char |
| 346 | + repeated length times. |
| 347 | + :param (str) char: allows customizing the replacement character. |
| 348 | + :param (boolean) trim: trims the remaining whitespace if True. |
| 349 | + """ |
| 350 | + s = [" " for _ in range(max(p) + 1 + offset + length)] |
| 351 | + maxaddr = 0 |
| 352 | + if i is None: |
| 353 | + for p_ in p: |
| 354 | + for l in range(length): |
| 355 | + maxaddr = p_ + offset + l |
| 356 | + s[maxaddr] = char |
| 357 | + else: |
| 358 | + for l in range(length): |
| 359 | + maxaddr = p[i] + offset + l |
| 360 | + s[maxaddr] = char |
| 361 | + ret = "".join(s) |
| 362 | + if trim: |
| 363 | + ret = ret[: maxaddr + 1] |
| 364 | + return ret |
| 365 | + |
| 366 | + |
| 367 | +def chomp_empty_strings(strings, c, reverse=False): |
| 368 | + """ |
| 369 | + Given a list of strings, some of which are the empty string "", replace the |
| 370 | + empty strings with c and combine them with the closest non-empty string on |
| 371 | + the left or "" if it is the first string. |
| 372 | + Examples: |
| 373 | + for c="_" |
| 374 | + ['hey', '', 'why', '', '', 'whoa', '', ''] -> ['hey_', 'why__', 'whoa__'] |
| 375 | + ['', 'hi', '', "I'm", 'bob', '', ''] -> ['_', 'hi_', "I'm", 'bob__'] |
| 376 | + ['hi', "i'm", 'a', 'good', 'string'] -> ['hi', "i'm", 'a', 'good', 'string'] |
| 377 | + Some special cases are: |
| 378 | + [] -> [] |
| 379 | + [''] -> [''] |
| 380 | + ['', ''] -> ['_'] |
| 381 | + ['', '', '', ''] -> ['___'] |
| 382 | + If reverse is true, empty strings are combined with closest non-empty string |
| 383 | + on the right or "" if it is the last string. |
| 384 | + """ |
| 385 | + |
| 386 | + def _rev(l): |
| 387 | + return [s[::-1] for s in l][::-1] |
| 388 | + |
| 389 | + if reverse: |
| 390 | + return _rev(chomp_empty_strings(_rev(strings), c)) |
| 391 | + if not len(strings): |
| 392 | + return strings |
| 393 | + if sum(map(len, strings)) == 0: |
| 394 | + return [c * (len(strings) - 1)] |
| 395 | + |
| 396 | + class _Chomper: |
| 397 | + def __init__(self, c): |
| 398 | + self.c = c |
| 399 | + |
| 400 | + def __call__(self, x, y): |
| 401 | + # x is list up to now |
| 402 | + # y is next item in list |
| 403 | + # x should be [""] initially, and then empty strings filtered out at the |
| 404 | + # end |
| 405 | + if len(y) == 0: |
| 406 | + return x[:-1] + [x[-1] + self.c] |
| 407 | + else: |
| 408 | + return x + [y] |
| 409 | + |
| 410 | + return list(filter(len, reduce(_Chomper(c), strings, [""]))) |
| 411 | + |
| 412 | + |
| 413 | +# taken from |
| 414 | +# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python |
| 415 | +def levenshtein(s1, s2): |
| 416 | + if len(s1) < len(s2): |
| 417 | + return levenshtein(s2, s1) # len(s1) >= len(s2) |
| 418 | + if len(s2) == 0: |
| 419 | + return len(s1) |
| 420 | + previous_row = range(len(s2) + 1) |
| 421 | + for i, c1 in enumerate(s1): |
| 422 | + current_row = [i + 1] |
| 423 | + for j, c2 in enumerate(s2): |
| 424 | + # j+1 instead of j since previous_row and current_row are one character longer |
| 425 | + # than s2 |
| 426 | + insertions = previous_row[j + 1] + 1 |
| 427 | + deletions = current_row[j] + 1 |
| 428 | + substitutions = previous_row[j] + (c1 != c2) |
| 429 | + current_row.append(min(insertions, deletions, substitutions)) |
| 430 | + previous_row = current_row |
| 431 | + return previous_row[-1] |
| 432 | + |
| 433 | + |
| 434 | +def find_closest_string(string, strings): |
| 435 | + def _key(s): |
| 436 | + # sort by levenshtein distance and lexographically to maintain a stable |
| 437 | + # sort for different keys with the same levenshtein distance |
| 438 | + return (levenshtein(s, string), s) |
| 439 | + |
| 440 | + return sorted(strings, key=_key)[0] |
0 commit comments