-
Notifications
You must be signed in to change notification settings - Fork 510
/
Copy pathword.js
60 lines (53 loc) · 2.25 KB
/
word.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import Diff from './base';
import {generateOptions} from '../util/params';
// Based on https://en.wikipedia.org/wiki/Latin_script_in_Unicode
//
// Ranges and exceptions:
// Latin-1 Supplement, 0080–00FF
// - U+00D7 × Multiplication sign
// - U+00F7 ÷ Division sign
// Latin Extended-A, 0100–017F
// Latin Extended-B, 0180–024F
// IPA Extensions, 0250–02AF
// Spacing Modifier Letters, 02B0–02FF
// - U+02C7 ˇ ˇ Caron
// - U+02D8 ˘ ˘ Breve
// - U+02D9 ˙ ˙ Dot Above
// - U+02DA ˚ ˚ Ring Above
// - U+02DB ˛ ˛ Ogonek
// - U+02DC ˜ ˜ Small Tilde
// - U+02DD ˝ ˝ Double Acute Accent
// Latin Extended Additional, 1E00–1EFF
const extendedWordChars = /^[a-zA-Z\u{C0}-\u{FF}\u{D8}-\u{F6}\u{F8}-\u{2C6}\u{2C8}-\u{2D7}\u{2DE}-\u{2FF}\u{1E00}-\u{1EFF}]+$/u;
const reWhitespace = /\S/;
export const wordDiff = new Diff();
wordDiff.equals = function(left, right) {
if (this.options.ignoreCase) {
left = left.toLowerCase();
right = right.toLowerCase();
}
return left === right || (this.options.ignoreWhitespace && !reWhitespace.test(left) && !reWhitespace.test(right));
};
wordDiff.tokenize = function(value) {
const tokenizer = this.options.tokenizer || /(\s+|[()[\]{}'"]|\b)/; // Use the tokenizer regex in the options or use the default regex
const tokens = value.split(tokenizer); // Join the boundary splits that we do not consider to be boundaries. This is primarily the extended Latin character set.
// Join the boundary splits that we do not consider to be boundaries. This is primarily the extended Latin character set.
for (let i = 0; i < tokens.length - 1; i++) {
// If we have an empty string in the next field and we have only word chars before and after, merge
if (!tokens[i + 1] && tokens[i + 2]
&& extendedWordChars.test(tokens[i])
&& extendedWordChars.test(tokens[i + 2])) {
tokens[i] += tokens[i + 2];
tokens.splice(i + 1, 2);
i--;
}
}
return tokens;
};
export function diffWords(oldStr, newStr, options) {
options = generateOptions(options, {ignoreWhitespace: true});
return wordDiff.diff(oldStr, newStr, options);
}
export function diffWordsWithSpace(oldStr, newStr, options) {
return wordDiff.diff(oldStr, newStr, options);
}