40
40
//! 3. The sequence `"\r\n"` has width 1.
41
41
//! 4. [Lisu tone letter] combinations consisting of a character in the range `'\u{A4F8}'..='\u{A4FB}'`
42
42
//! followed by a character in the range `'\u{A4FC}'..='\u{A4FD}'` have width 1.
43
- //! 5. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2.
44
- //! 6. The following have width 0:
43
+ //! 5. In an East Asian context only, `<`, `=`, or `>` have width 2 when followed by [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY].
44
+ //! 6. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2.
45
+ //! 7. The following have width 0:
45
46
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BDefault_Ignorable_Code_Point%7D)
46
47
//! with the [`Default_Ignorable_Code_Point`] property.
47
48
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BGrapheme_Extend%7D)
64
65
//! - [`'\u{0891}'` PIASTRE MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0891), and
65
66
//! - [`'\u{08E2}'` DISPUTED END OF AYAH](https://util.unicode.org/UnicodeJsps/character.jsp?a=08E2).
66
67
//! - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA).
67
- //! 7 . [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
68
+ //! 8 . [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
68
69
//! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
69
- //! 8. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)
70
- //! with an [`East_Asian_Width`] of [`Ambiguous`] have width 2 in an East Asian context, and width 1 otherwise.
71
- //! 9. All other characters have width 1.
70
+ //! 9. Characters fulfilling all of the following conditions have width 2 in an East Asian context, and width 1 otherwise:
71
+ //! - Has an [`East_Asian_Width`] of [`Ambiguous`], or
72
+ //! has a canonical decomposition to an [`Ambiguous`] character followed by [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY], or
73
+ //! is [`'\u{0387}'` GREEK ANO TELEIA](https://util.unicode.org/UnicodeJsps/character.jsp?a=0387), and
74
+ //! - Does not have a [`General_Category`] of `Modifier_Symbol`, and
75
+ //! - Does not have a [`Script`] of `Latin`, `Greek`, or `Cyrillic`, or is a Roman numeral in the range `'\u{2160}'..='\u{217F}'`.
76
+ //! 10. All other characters have width 1.
77
+ //!
78
+ //! [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY]: https://util.unicode.org/UnicodeJsps/character.jsp?a=0338
72
79
//!
73
80
//! [`Default_Ignorable_Code_Point`]: https://www.unicode.org/versions/Unicode15.0.0/ch05.pdf#G40095
74
81
//! [`East_Asian_Width`]: https://www.unicode.org/reports/tr11/#ED1
75
82
//! [`Emoji_Presentation`]: https://unicode.org/reports/tr51/#def_emoji_presentation
83
+ //! [`General_Category`]: https://www.unicode.org/versions/Unicode15.0.0/ch04.pdf#G124142
76
84
//! [`Grapheme_Extend`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G52443
77
85
//! [`Hangul_Syllable_Type`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G45593
78
86
//! [`Prepended_Concatenation_Mark`]: https://www.unicode.org/versions/Unicode15.0.0/ch23.pdf#G37908
87
+ //! [`Script`]: https://www.unicode.org/reports/tr24/#Script
79
88
//!
80
89
//! [`Fullwidth`]: https://www.unicode.org/reports/tr11/#ED2
81
90
//! [`Wide`]: https://www.unicode.org/reports/tr11/#ED4
84
93
//! [Emoji presentation sequences]: https://unicode.org/reports/tr51/#def_emoji_presentation_sequence
85
94
//! [text presentation sequences]: https://unicode.org/reports/tr51/#def_text_presentation_sequence
86
95
//!
87
- //! [Enclosed Ideographic Supplement]: https://unicode.org/charts/PDF/U1F200.pdf
96
+ //! [Enclosed Ideographic Supplement]: https://unicode.org/charts/nameslist/n_1F200.html
88
97
//!
89
98
//! [Lisu tone letter]: https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078
90
99
//!
91
100
//! ## Canonical equivalence
92
101
//!
93
- //! The non-CJK width methods guarantee that canonically equivalent strings are assigned the same width.
94
- //! However, this guarantee does not currently hold for the CJK width variants.
102
+ //! Canonically equivalent strings are assigned the same width (CJK and non-CJK).
95
103
96
104
#![ forbid( unsafe_code) ]
97
105
#![ deny( missing_docs) ]
@@ -198,14 +206,17 @@ enum NextCharInfo {
198
206
#[ default]
199
207
Default ,
200
208
/// `'\n'`
201
- LineFeed = 0x0A ,
209
+ LineFeed ,
210
+ /// '\u{0338}'
211
+ /// For preserving canonical equivalence with CJK
212
+ CombiningLongSolidusOverlay ,
202
213
/// `'\u{A4FC}'..='\u{A4FD}'`
203
214
/// <https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078>
204
215
TrailingLisuToneLetter ,
205
216
/// `'\u{FE0E}'`
206
- Vs15 = 0x0E ,
217
+ Vs15 ,
207
218
/// `'\u{FE0F}'`
208
- Vs16 = 0x0F ,
219
+ Vs16 ,
209
220
}
210
221
211
222
fn str_width ( s : & str , is_cjk : bool ) -> usize {
@@ -222,7 +233,11 @@ fn str_width(s: &str, is_cjk: bool) -> usize {
222
233
/// they're treated as single width.
223
234
#[ inline]
224
235
fn width_in_str ( c : char , is_cjk : bool , next_info : NextCharInfo ) -> ( usize , NextCharInfo ) {
225
- if next_info == NextCharInfo :: Vs16 && cw:: starts_emoji_presentation_seq ( c) {
236
+ if ( is_cjk
237
+ && next_info == NextCharInfo :: CombiningLongSolidusOverlay
238
+ && matches ! ( c, '<' | '=' | '>' ) )
239
+ || ( next_info == NextCharInfo :: Vs16 && cw:: starts_emoji_presentation_seq ( c) )
240
+ {
226
241
( 2 , NextCharInfo :: Default )
227
242
} else if c <= '\u{A0}' {
228
243
match c {
@@ -235,6 +250,7 @@ fn width_in_str(c: char, is_cjk: bool, next_info: NextCharInfo) -> (usize, NextC
235
250
( '\u{A4F8}' ..='\u{A4FB}' , NextCharInfo :: TrailingLisuToneLetter ) => {
236
251
( 0 , NextCharInfo :: Default )
237
252
}
253
+ ( '\u{0338}' , _) => ( 0 , NextCharInfo :: CombiningLongSolidusOverlay ) ,
238
254
( '\u{A4FC}' ..='\u{A4FD}' , _) => ( 1 , NextCharInfo :: TrailingLisuToneLetter ) ,
239
255
( '\u{FE0E}' , _) => ( 0 , NextCharInfo :: Vs15 ) ,
240
256
( '\u{FE0F}' , _) => ( 0 , NextCharInfo :: Vs16 ) ,
0 commit comments