@@ -95,7 +95,7 @@ pub enum TokenKind {
95
95
Literal { kind : LiteralKind , suffix_start : u32 } ,
96
96
97
97
/// "'a"
98
- Lifetime { starts_with_number : bool } ,
98
+ Lifetime { starts_with_number : bool , contains_emoji : bool } ,
99
99
100
100
// One-char tokens:
101
101
/// ";"
@@ -630,7 +630,13 @@ impl Cursor<'_> {
630
630
// If the first symbol is valid for identifier, it can be a lifetime.
631
631
// Also check if it's a number for a better error reporting (so '0 will
632
632
// be reported as invalid lifetime and not as unterminated char literal).
633
- is_id_start ( self . first ( ) ) || self . first ( ) . is_digit ( 10 )
633
+ // We also have to account for potential `'🐱` emojis to avoid reporting
634
+ // it as an unterminated char literal.
635
+ is_id_start ( self . first ( ) )
636
+ || self . first ( ) . is_digit ( 10 )
637
+ // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
638
+ // 5.0, but Unicode is already newer than this.
639
+ || unic_emoji_char:: is_emoji ( self . first ( ) )
634
640
} ;
635
641
636
642
if !can_be_a_lifetime {
@@ -643,16 +649,33 @@ impl Cursor<'_> {
643
649
return Literal { kind, suffix_start } ;
644
650
}
645
651
646
- // Either a lifetime or a character literal with
647
- // length greater than 1.
652
+ // Either a lifetime or a character literal.
648
653
649
654
let starts_with_number = self . first ( ) . is_digit ( 10 ) ;
655
+ let mut contains_emoji = false ;
650
656
651
- // Skip the literal contents.
652
- // First symbol can be a number (which isn't a valid identifier start),
653
- // so skip it without any checks.
654
- self . bump ( ) ;
655
- self . eat_while ( is_id_continue) ;
657
+ // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
658
+ // 5.0, but Unicode is already newer than this.
659
+ if unic_emoji_char:: is_emoji ( self . first ( ) ) {
660
+ contains_emoji = true ;
661
+ } else {
662
+ // Skip the literal contents.
663
+ // First symbol can be a number (which isn't a valid identifier start),
664
+ // so skip it without any checks.
665
+ self . bump ( ) ;
666
+ }
667
+ self . eat_while ( |c| {
668
+ if is_id_continue ( c) {
669
+ true
670
+ // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
671
+ // 5.0, but Unicode is already newer than this.
672
+ } else if unic_emoji_char:: is_emoji ( c) {
673
+ contains_emoji = true ;
674
+ true
675
+ } else {
676
+ false
677
+ }
678
+ } ) ;
656
679
657
680
// Check if after skipping literal contents we've met a closing
658
681
// single quote (which means that user attempted to create a
@@ -662,7 +685,7 @@ impl Cursor<'_> {
662
685
let kind = Char { terminated : true } ;
663
686
Literal { kind, suffix_start : self . pos_within_token ( ) }
664
687
} else {
665
- Lifetime { starts_with_number }
688
+ Lifetime { starts_with_number, contains_emoji }
666
689
}
667
690
}
668
691
0 commit comments