@@ -680,7 +680,7 @@ impl CharClass {
680
680
self . canonicalize ( )
681
681
}
682
682
683
- /// Canonicalze any sequence of ranges.
683
+ /// Canonicalize any sequence of ranges.
684
684
///
685
685
/// This is responsible for enforcing the canonical format invariants
686
686
/// as described on the docs for the `CharClass` type.
@@ -703,6 +703,41 @@ impl CharClass {
703
703
ordered
704
704
}
705
705
706
+ /// Calculate the intersection of two canonical character classes.
707
+ ///
708
+ /// The returned intersection is canonical.
709
+ fn intersection ( & self , other : & CharClass ) -> CharClass {
710
+ if self . ranges . is_empty ( ) || other. ranges . is_empty ( ) {
711
+ return CharClass :: empty ( ) ;
712
+ }
713
+
714
+ let mut intersection = CharClass :: empty ( ) ;
715
+
716
+ let mut iter_a = self . ranges . iter ( ) ;
717
+ let mut iter_b = other. ranges . iter ( ) ;
718
+ let mut a = iter_a. next ( ) . unwrap ( ) ;
719
+ let mut b = iter_b. next ( ) . unwrap ( ) ;
720
+ loop {
721
+ if let Some ( i) = a. intersection ( & b) {
722
+ intersection. ranges . push ( i) ;
723
+ }
724
+
725
+ // If the range with the smaller end didn't match this time,
726
+ // it won't ever match, so move on to the next one.
727
+ let ( iter, item) = if a. end < b. end {
728
+ ( & mut iter_a, & mut a)
729
+ } else {
730
+ ( & mut iter_b, & mut b)
731
+ } ;
732
+ match iter. next ( ) {
733
+ Some ( v) => * item = v,
734
+ None => break , // no more ranges to check, done
735
+ }
736
+ }
737
+
738
+ intersection. canonicalize ( )
739
+ }
740
+
706
741
/// Negates the character class.
707
742
///
708
743
/// For all `c` where `c` is a Unicode scalar value, `c` matches `self`
@@ -801,6 +836,18 @@ impl ClassRange {
801
836
max ( self . start , other. start ) <= inc_char ( min ( self . end , other. end ) )
802
837
}
803
838
839
+ /// Returns the intersection of the two ranges if they have common
840
+ /// characters, `None` otherwise.
841
+ fn intersection ( & self , other : & ClassRange ) -> Option < ClassRange > {
842
+ let start = max ( self . start , other. start ) ;
843
+ let end = min ( self . end , other. end ) ;
844
+ if start <= end {
845
+ Some ( ClassRange :: new ( start, end) )
846
+ } else {
847
+ None
848
+ }
849
+ }
850
+
804
851
/// Creates a new range representing the union of `self` and `other.
805
852
fn merge ( self , other : ClassRange ) -> ClassRange {
806
853
ClassRange {
@@ -1907,6 +1954,108 @@ mod tests {
1907
1954
] ) ) ;
1908
1955
}
1909
1956
1957
+ #[ test]
1958
+ fn class_intersection_empty ( ) {
1959
+ let cls1 = class ( & [ ] ) ;
1960
+ let cls2 = class ( & [ ( 'a' , 'a' ) ] ) ;
1961
+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
1962
+ }
1963
+
1964
+ #[ test]
1965
+ fn class_intersection_single_equal ( ) {
1966
+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1967
+ let cls2 = class ( & [ ( 'a' , 'a' ) ] ) ;
1968
+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'a' ) ] ) ) ;
1969
+ }
1970
+
1971
+ #[ test]
1972
+ fn class_intersection_single_unequal ( ) {
1973
+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1974
+ let cls2 = class ( & [ ( 'b' , 'b' ) ] ) ;
1975
+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
1976
+ }
1977
+
1978
+ #[ test]
1979
+ fn class_intersection_single_in_other ( ) {
1980
+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1981
+ let cls2 = class ( & [ ( 'a' , 'c' ) ] ) ;
1982
+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'a' ) ] ) ) ;
1983
+ }
1984
+
1985
+ #[ test]
1986
+ fn class_intersection_range_in_other ( ) {
1987
+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
1988
+ let cls2 = class ( & [ ( 'a' , 'c' ) ] ) ;
1989
+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'b' ) ] ) ) ;
1990
+ }
1991
+
1992
+ #[ test]
1993
+ fn class_intersection_range_intersection ( ) {
1994
+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
1995
+ let cls2 = class ( & [ ( 'b' , 'c' ) ] ) ;
1996
+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'b' ) ] ) ) ;
1997
+ }
1998
+
1999
+ #[ test]
2000
+ fn class_intersection_only_adjacent ( ) {
2001
+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
2002
+ let cls2 = class ( & [ ( 'c' , 'd' ) ] ) ;
2003
+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2004
+ }
2005
+
2006
+ #[ test]
2007
+ fn class_intersection_range_subset ( ) {
2008
+ let cls1 = class ( & [ ( 'b' , 'c' ) ] ) ;
2009
+ let cls2 = class ( & [ ( 'a' , 'd' ) ] ) ;
2010
+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'c' ) ] ) ) ;
2011
+ }
2012
+
2013
+ #[ test]
2014
+ fn class_intersection_many_ranges_in_one_big ( ) {
2015
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2016
+ let cls2 = class ( & [ ( 'a' , 'h' ) ] ) ;
2017
+ assert_intersection ( cls1, cls2, class ( & [
2018
+ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' )
2019
+ ] ) ) ;
2020
+ }
2021
+
2022
+ #[ test]
2023
+ fn class_intersection_many_ranges_same ( ) {
2024
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2025
+ let cls2 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2026
+ assert_intersection ( cls1, cls2, class ( & [
2027
+ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' )
2028
+ ] ) ) ;
2029
+ }
2030
+
2031
+ #[ test]
2032
+ fn class_intersection_multiple_non_intersecting ( ) {
2033
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'g' , 'h' ) ] ) ;
2034
+ let cls2 = class ( & [ ( 'd' , 'e' ) , ( 'k' , 'l' ) ] ) ;
2035
+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2036
+ }
2037
+
2038
+ #[ test]
2039
+ fn class_intersection_non_intersecting_then_intersecting ( ) {
2040
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2041
+ let cls2 = class ( & [ ( 'h' , 'h' ) ] ) ;
2042
+ assert_intersection ( cls1, cls2, class ( & [ ( 'h' , 'h' ) ] ) ) ;
2043
+ }
2044
+
2045
+ #[ test]
2046
+ fn class_intersection_adjacent_alternating ( ) {
2047
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'e' , 'f' ) , ( 'i' , 'j' ) ] ) ;
2048
+ let cls2 = class ( & [ ( 'c' , 'd' ) , ( 'g' , 'h' ) , ( 'k' , 'l' ) ] ) ;
2049
+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2050
+ }
2051
+
2052
+ #[ test]
2053
+ fn class_intersection_overlapping_alternating ( ) {
2054
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'c' , 'd' ) , ( 'e' , 'f' ) ] ) ;
2055
+ let cls2 = class ( & [ ( 'b' , 'c' ) , ( 'd' , 'e' ) , ( 'f' , 'g' ) ] ) ;
2056
+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'f' ) ] ) ) ;
2057
+ }
2058
+
1910
2059
#[ test]
1911
2060
fn class_canon_overlap_many_case_fold ( ) {
1912
2061
let cls = class ( & [
@@ -2056,4 +2205,10 @@ mod tests {
2056
2205
let expr = e ( "(?-u)[-./]" ) ;
2057
2206
assert_eq ! ( "(?-u:[-\\ .-/])" , expr. to_string( ) ) ;
2058
2207
}
2208
+
2209
+ fn assert_intersection ( cls1 : CharClass , cls2 : CharClass , expected : CharClass ) {
2210
+ // intersection operation should be commutative
2211
+ assert_eq ! ( cls1. intersection( & cls2) , expected) ;
2212
+ assert_eq ! ( cls2. intersection( & cls1) , expected) ;
2213
+ }
2059
2214
}
0 commit comments