Skip to content

Commit 8733340

Browse files
committed
Strip diacritics from non-latin alphabets (#659, #660)
1 parent e23261c commit 8733340

File tree

2 files changed

+15
-10
lines changed

2 files changed

+15
-10
lines changed

src/__tests__/utils/stripDiacritics.test.js

+7
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ describe('stripDiacritics', () => {
1212
expect(stripDiacritics(string)).toBe(result);
1313
});
1414

15+
it('works for non-latin alphabets', () => {
16+
const string = 'ΆΈΉΊΪΌΎΫΏάέίϊΐόύϋΰ';
17+
const result = 'ΑΕΗΙΙΟΥΥΩαειιιουυυ';
18+
19+
expect(stripDiacritics(string)).toBe(result);
20+
});
21+
1522
it('removes combining diacritical marks from a string', () => {
1623
const alphaRange = ['a', 'b', 'c', 'd', 'e', 'f'];
1724
const numRange = range(30, 37);

src/utils/stripDiacritics.js

+8-10
Original file line numberDiff line numberDiff line change
@@ -104,22 +104,20 @@ const map = [
104104
{ base: 'x', letters: '\u0078\u24E7\uFF58\u1E8B\u1E8D' },
105105
{ base: 'y', letters: '\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF' },
106106
{ base: 'z', letters: '\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763' },
107-
];
108-
109-
const diacriticsMap = {};
110-
for (let ii = 0; ii < map.length; ii++) {
111-
const { letters } = map[ii];
112-
for (let jj = 0; jj < letters.length; jj++) {
113-
diacriticsMap[letters[jj]] = map[ii].base;
114-
}
115-
}
107+
].reduce((acc, { base, letters }) => {
108+
letters.split('').forEach((letter) => {
109+
acc[letter] = base;
110+
});
111+
return acc;
112+
}, {});
116113

117114
// "what?" version ... http://jsperf.com/diacritics/12
118115
export default function stripDiacritics(str: string): string {
119116
return (
120117
str
118+
.normalize('NFD')
121119
.replace(/[\u0300-\u036F]/g, '') // Remove combining diacritics
122120
/* eslint-disable-next-line no-control-regex */
123-
.replace(/[^\u0000-\u007E]/g, (a) => diacriticsMap[a] || a)
121+
.replace(/[^\u0000-\u007E]/g, (a) => map[a] || a)
124122
);
125123
}

0 commit comments

Comments
 (0)