Mark U+A8FA DEVANAGARI CARET as zero-width

Jules-Bertholet · Jules-Bertholet · commit 3b56f6ddcb37 · 2024-05-21T15:33:35.000-04:00
diff --git a/scripts/unicode.py b/scripts/unicode.py
@@ -245,6 +245,10 @@ def load_zero_widths() -> "list[bool]":
     # This is a `Prepended_Concatenation_Mark`, but unlike the others it's zero-width
     zw_map[0x070F] = True
 
+    # U+A8FA DEVANAGARI CARET
+    # https://www.unicode.org/versions/Unicode15.0.0/ch12.pdf#G667447
+    zw_map[0xA8FA] = True
+
     return zw_map
 
 
diff --git a/src/lib.rs b/src/lib.rs
@@ -58,6 +58,7 @@
 //!    - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
 //!       with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`).
 //!    - `'\u{070F}'` [SYRIAC] ABBREVIATION MARK.
+//!    - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA).
 //! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
 //!    with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
 //! 8. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)
diff --git a/src/tables.rs b/src/tables.rs
@@ -438,7 +438,7 @@ pub mod charwidth {
         0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
         0x55, 0x55, 0x50, 0x55, 0x55, 0x55, 0x45, 0x45, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
         0x41, 0x55, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
-        0x00, 0x00, 0x00, 0x00, 0x50, 0x55, 0x55, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
+        0x00, 0x00, 0x00, 0x00, 0x50, 0x55, 0x45, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
         0x55, 0x55, 0x05, 0x00, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x50, 0x55,
         0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x56, 0x40, 0x55, 0x55, 0x55, 0x55,
         0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x05, 0x50, 0x50, 0x55, 0x55, 0x55, 0x55,
diff --git a/tests/tests.rs b/tests/tests.rs
@@ -107,10 +107,8 @@ fn test_prepended_concatenation_marks() {
 #[test]
 fn test_syriac_abbreviation_mark() {
     assert_eq!('\u{070F}'.width(), Some(0));
-    assert_eq!("\u{070F}".width(), 0);
 }
 
-
 #[test]
 fn test_interlinear_annotation_chars() {
     assert_eq!('\u{FFF9}'.width(), Some(1));
@@ -137,6 +135,11 @@ fn test_marks() {
     assert_eq!('\u{09BE}'.width(), Some(0));
 }
 
+#[test]
+fn test_devanagari_caret() {
+    assert_eq!('\u{A8FA}'.width(), Some(0));
+}
+
 #[test]
 fn test_canonical_equivalence() {
     let norm_file = BufReader::new(