@@ -1635,6 +1635,42 @@ pub enum Look {
1635
1635
WordUnicode = 1 << 8 ,
1636
1636
/// Match a Unicode-aware negation of a word boundary.
1637
1637
WordUnicodeNegate = 1 << 9 ,
1638
+ /// Match the start of an ASCII-only word boundary. That is, this matches a
1639
+ /// position at either the beginning of the haystack or where the previous
1640
+ /// character is not a word character and the following character is a word
1641
+ /// character.
1642
+ WordStartAscii = 1 << 10 ,
1643
+ /// Match the end of an ASCII-only word boundary. That is, this matches
1644
+ /// a position at either the end of the haystack or where the previous
1645
+ /// character is a word character and the following character is not a word
1646
+ /// character.
1647
+ WordEndAscii = 1 << 11 ,
1648
+ /// Match the start of a Unicode word boundary. That is, this matches a
1649
+ /// position at either the beginning of the haystack or where the previous
1650
+ /// character is not a word character and the following character is a word
1651
+ /// character.
1652
+ WordStartUnicode = 1 << 12 ,
1653
+ /// Match the end of a Unicode word boundary. That is, this matches a
1654
+ /// position at either the end of the haystack or where the previous
1655
+ /// character is a word character and the following character is not a word
1656
+ /// character.
1657
+ WordEndUnicode = 1 << 13 ,
1658
+ /// Match the start half of an ASCII-only word boundary. That is, this
1659
+ /// matches a position at either the beginning of the haystack or where the
1660
+ /// previous character is not a word character.
1661
+ WordStartHalfAscii = 1 << 14 ,
1662
+ /// Match the end half of an ASCII-only word boundary. That is, this
1663
+ /// matches a position at either the end of the haystack or where the
1664
+ /// following character is not a word character.
1665
+ WordEndHalfAscii = 1 << 15 ,
1666
+ /// Match the start half of a Unicode word boundary. That is, this matches
1667
+ /// a position at either the beginning of the haystack or where the
1668
+ /// previous character is not a word character.
1669
+ WordStartHalfUnicode = 1 << 16 ,
1670
+ /// Match the end half of a Unicode word boundary. That is, this matches
1671
+ /// a position at either the end of the haystack or where the following
1672
+ /// character is not a word character.
1673
+ WordEndHalfUnicode = 1 << 17 ,
1638
1674
}
1639
1675
1640
1676
impl Look {
@@ -1656,6 +1692,14 @@ impl Look {
1656
1692
Look :: WordAsciiNegate => Look :: WordAsciiNegate ,
1657
1693
Look :: WordUnicode => Look :: WordUnicode ,
1658
1694
Look :: WordUnicodeNegate => Look :: WordUnicodeNegate ,
1695
+ Look :: WordStartAscii => Look :: WordEndAscii ,
1696
+ Look :: WordEndAscii => Look :: WordStartAscii ,
1697
+ Look :: WordStartUnicode => Look :: WordEndUnicode ,
1698
+ Look :: WordEndUnicode => Look :: WordStartUnicode ,
1699
+ Look :: WordStartHalfAscii => Look :: WordEndHalfAscii ,
1700
+ Look :: WordEndHalfAscii => Look :: WordStartHalfAscii ,
1701
+ Look :: WordStartHalfUnicode => Look :: WordEndHalfUnicode ,
1702
+ Look :: WordEndHalfUnicode => Look :: WordStartHalfUnicode ,
1659
1703
}
1660
1704
}
1661
1705
@@ -1676,16 +1720,24 @@ impl Look {
1676
1720
#[ inline]
1677
1721
pub const fn from_repr ( repr : u32 ) -> Option < Look > {
1678
1722
match repr {
1679
- 0b00_0000_0001 => Some ( Look :: Start ) ,
1680
- 0b00_0000_0010 => Some ( Look :: End ) ,
1681
- 0b00_0000_0100 => Some ( Look :: StartLF ) ,
1682
- 0b00_0000_1000 => Some ( Look :: EndLF ) ,
1683
- 0b00_0001_0000 => Some ( Look :: StartCRLF ) ,
1684
- 0b00_0010_0000 => Some ( Look :: EndCRLF ) ,
1685
- 0b00_0100_0000 => Some ( Look :: WordAscii ) ,
1686
- 0b00_1000_0000 => Some ( Look :: WordAsciiNegate ) ,
1687
- 0b01_0000_0000 => Some ( Look :: WordUnicode ) ,
1688
- 0b10_0000_0000 => Some ( Look :: WordUnicodeNegate ) ,
1723
+ 0b00_0000_0000_0000_0001 => Some ( Look :: Start ) ,
1724
+ 0b00_0000_0000_0000_0010 => Some ( Look :: End ) ,
1725
+ 0b00_0000_0000_0000_0100 => Some ( Look :: StartLF ) ,
1726
+ 0b00_0000_0000_0000_1000 => Some ( Look :: EndLF ) ,
1727
+ 0b00_0000_0000_0001_0000 => Some ( Look :: StartCRLF ) ,
1728
+ 0b00_0000_0000_0010_0000 => Some ( Look :: EndCRLF ) ,
1729
+ 0b00_0000_0000_0100_0000 => Some ( Look :: WordAscii ) ,
1730
+ 0b00_0000_0000_1000_0000 => Some ( Look :: WordAsciiNegate ) ,
1731
+ 0b00_0000_0001_0000_0000 => Some ( Look :: WordUnicode ) ,
1732
+ 0b00_0000_0010_0000_0000 => Some ( Look :: WordUnicodeNegate ) ,
1733
+ 0b00_0000_0100_0000_0000 => Some ( Look :: WordStartAscii ) ,
1734
+ 0b00_0000_1000_0000_0000 => Some ( Look :: WordEndAscii ) ,
1735
+ 0b00_0001_0000_0000_0000 => Some ( Look :: WordStartUnicode ) ,
1736
+ 0b00_0010_0000_0000_0000 => Some ( Look :: WordEndUnicode ) ,
1737
+ 0b00_0100_0000_0000_0000 => Some ( Look :: WordStartHalfAscii ) ,
1738
+ 0b00_1000_0000_0000_0000 => Some ( Look :: WordEndHalfAscii ) ,
1739
+ 0b01_0000_0000_0000_0000 => Some ( Look :: WordStartHalfUnicode ) ,
1740
+ 0b10_0000_0000_0000_0000 => Some ( Look :: WordEndHalfUnicode ) ,
1689
1741
_ => None ,
1690
1742
}
1691
1743
}
@@ -1710,6 +1762,14 @@ impl Look {
1710
1762
Look :: WordAsciiNegate => 'B' ,
1711
1763
Look :: WordUnicode => '𝛃' ,
1712
1764
Look :: WordUnicodeNegate => '𝚩' ,
1765
+ Look :: WordStartAscii => '<' ,
1766
+ Look :: WordEndAscii => '>' ,
1767
+ Look :: WordStartUnicode => '〈' ,
1768
+ Look :: WordEndUnicode => '〉' ,
1769
+ Look :: WordStartHalfAscii => '◁' ,
1770
+ Look :: WordEndHalfAscii => '▷' ,
1771
+ Look :: WordStartHalfUnicode => '◀' ,
1772
+ Look :: WordEndHalfUnicode => '▶' ,
1713
1773
}
1714
1774
}
1715
1775
}
@@ -2703,13 +2763,22 @@ impl LookSet {
2703
2763
pub fn contains_word_unicode ( self ) -> bool {
2704
2764
self . contains ( Look :: WordUnicode )
2705
2765
|| self . contains ( Look :: WordUnicodeNegate )
2766
+ || self . contains ( Look :: WordStartUnicode )
2767
+ || self . contains ( Look :: WordEndUnicode )
2768
+ || self . contains ( Look :: WordStartHalfUnicode )
2769
+ || self . contains ( Look :: WordEndHalfUnicode )
2706
2770
}
2707
2771
2708
2772
/// Returns true if and only if this set contains any ASCII word boundary
2709
2773
/// or negated ASCII word boundary assertions.
2710
2774
#[ inline]
2711
2775
pub fn contains_word_ascii ( self ) -> bool {
2712
- self . contains ( Look :: WordAscii ) || self . contains ( Look :: WordAsciiNegate )
2776
+ self . contains ( Look :: WordAscii )
2777
+ || self . contains ( Look :: WordAsciiNegate )
2778
+ || self . contains ( Look :: WordStartAscii )
2779
+ || self . contains ( Look :: WordEndAscii )
2780
+ || self . contains ( Look :: WordStartHalfAscii )
2781
+ || self . contains ( Look :: WordEndHalfAscii )
2713
2782
}
2714
2783
2715
2784
/// Returns an iterator over all of the look-around assertions in this set.
@@ -3769,7 +3838,7 @@ mod tests {
3769
3838
assert_eq ! ( 0 , set. iter( ) . count( ) ) ;
3770
3839
3771
3840
let set = LookSet :: full ( ) ;
3772
- assert_eq ! ( 10 , set. iter( ) . count( ) ) ;
3841
+ assert_eq ! ( 18 , set. iter( ) . count( ) ) ;
3773
3842
3774
3843
let set =
3775
3844
LookSet :: empty ( ) . insert ( Look :: StartLF ) . insert ( Look :: WordUnicode ) ;
@@ -3787,6 +3856,6 @@ mod tests {
3787
3856
let res = format ! ( "{:?}" , LookSet :: empty( ) ) ;
3788
3857
assert_eq ! ( "∅" , res) ;
3789
3858
let res = format ! ( "{:?}" , LookSet :: full( ) ) ;
3790
- assert_eq ! ( "Az^$rRbB𝛃𝚩" , res) ;
3859
+ assert_eq ! ( "Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶ " , res) ;
3791
3860
}
3792
3861
}
0 commit comments