Skip to content

Commit 3b56f6d

Browse files
Mark U+A8FA DEVANAGARI CARET as zero-width
1 parent 934c875 commit 3b56f6d

File tree

4 files changed

+11
-3
lines changed

4 files changed

+11
-3
lines changed

scripts/unicode.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,10 @@ def load_zero_widths() -> "list[bool]":
245245
# This is a `Prepended_Concatenation_Mark`, but unlike the others it's zero-width
246246
zw_map[0x070F] = True
247247

248+
# U+A8FA DEVANAGARI CARET
249+
# https://www.unicode.org/versions/Unicode15.0.0/ch12.pdf#G667447
250+
zw_map[0xA8FA] = True
251+
248252
return zw_map
249253

250254

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
5959
//! with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`).
6060
//! - `'\u{070F}'` [SYRIAC] ABBREVIATION MARK.
61+
//! - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA).
6162
//! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
6263
//! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
6364
//! 8. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)

src/tables.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ pub mod charwidth {
438438
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
439439
0x55, 0x55, 0x50, 0x55, 0x55, 0x55, 0x45, 0x45, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
440440
0x41, 0x55, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
441-
0x00, 0x00, 0x00, 0x00, 0x50, 0x55, 0x55, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
441+
0x00, 0x00, 0x00, 0x00, 0x50, 0x55, 0x45, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
442442
0x55, 0x55, 0x05, 0x00, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x50, 0x55,
443443
0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x56, 0x40, 0x55, 0x55, 0x55, 0x55,
444444
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x05, 0x50, 0x50, 0x55, 0x55, 0x55, 0x55,

tests/tests.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,8 @@ fn test_prepended_concatenation_marks() {
107107
#[test]
108108
fn test_syriac_abbreviation_mark() {
109109
assert_eq!('\u{070F}'.width(), Some(0));
110-
assert_eq!("\u{070F}".width(), 0);
111110
}
112111

113-
114112
#[test]
115113
fn test_interlinear_annotation_chars() {
116114
assert_eq!('\u{FFF9}'.width(), Some(1));
@@ -137,6 +135,11 @@ fn test_marks() {
137135
assert_eq!('\u{09BE}'.width(), Some(0));
138136
}
139137

138+
#[test]
139+
fn test_devanagari_caret() {
140+
assert_eq!('\u{A8FA}'.width(), Some(0));
141+
}
142+
140143
#[test]
141144
fn test_canonical_equivalence() {
142145
let norm_file = BufReader::new(

0 commit comments

Comments
 (0)