@@ -992,8 +992,8 @@ impl<'a> DFA<&'a [u8]> {
992
992
// (by trying to decode every state) and start state ID list below. If
993
993
// either validation fails, then we return an error.
994
994
let ( dfa, nread) = unsafe { DFA :: from_bytes_unchecked ( slice) ? } ;
995
- dfa. tt . validate ( & dfa. special ) ?;
996
- dfa. st . validate ( & dfa. special , & dfa . tt ) ?;
995
+ let seen = dfa. tt . validate ( & dfa. special ) ?;
996
+ dfa. st . validate ( & dfa. special , & seen ) ?;
997
997
// N.B. dfa.special doesn't have a way to do unchecked deserialization,
998
998
// so it has already been validated.
999
999
Ok ( ( dfa, nread) )
@@ -1388,63 +1388,8 @@ impl<T: AsRef<[u8]>> Transitions<T> {
1388
1388
///
1389
1389
/// That is, every state ID can be used to correctly index a state in this
1390
1390
/// table.
1391
- fn validate ( & self , sp : & Special ) -> Result < ( ) , DeserializeError > {
1392
- // In order to validate everything, we not only need to make sure we
1393
- // can decode every state, but that every transition in every state
1394
- // points to a valid state. There are many duplicative transitions, so
1395
- // we record state IDs that we've verified so that we don't redo the
1396
- // decoding work.
1397
- //
1398
- // Except, when in no_std mode, we don't have dynamic memory allocation
1399
- // available to us, so we skip this optimization. It's not clear
1400
- // whether doing something more clever is worth it just yet. If you're
1401
- // profiling this code and need it to run faster, please file an issue.
1402
- //
1403
- // OK, so we also use this to record the set of valid state IDs. Since
1404
- // it is possible for a transition to point to an invalid state ID that
1405
- // still (somehow) deserializes to a valid state. So we need to make
1406
- // sure our transitions are limited to actually correct state IDs.
1407
- // The problem is, I'm not sure how to do this verification step in
1408
- // no-std no-alloc mode. I think we'd *have* to store the set of valid
1409
- // state IDs in the DFA itself. For now, we don't do this verification
1410
- // in no-std no-alloc mode. The worst thing that can happen is an
1411
- // incorrect result. But no panics or memory safety problems should
1412
- // result. Because we still do validate that the state itself is
1413
- // "valid" in the sense that everything it points to actually exists.
1414
- //
1415
- // ---AG
1416
- struct Seen {
1417
- #[ cfg( feature = "alloc" ) ]
1418
- set : alloc:: collections:: BTreeSet < StateID > ,
1419
- #[ cfg( not( feature = "alloc" ) ) ]
1420
- set : core:: marker:: PhantomData < StateID > ,
1421
- }
1422
-
1423
- #[ cfg( feature = "alloc" ) ]
1424
- impl Seen {
1425
- fn new ( ) -> Seen {
1426
- Seen { set : alloc:: collections:: BTreeSet :: new ( ) }
1427
- }
1428
- fn insert ( & mut self , id : StateID ) {
1429
- self . set . insert ( id) ;
1430
- }
1431
- fn contains ( & self , id : & StateID ) -> bool {
1432
- self . set . contains ( id)
1433
- }
1434
- }
1435
-
1436
- #[ cfg( not( feature = "alloc" ) ) ]
1437
- impl Seen {
1438
- fn new ( ) -> Seen {
1439
- Seen { set : core:: marker:: PhantomData }
1440
- }
1441
- fn insert ( & mut self , _id : StateID ) { }
1442
- fn contains ( & self , _id : & StateID ) -> bool {
1443
- false
1444
- }
1445
- }
1446
-
1447
- let mut verified: Seen = Seen :: new ( ) ;
1391
+ fn validate ( & self , sp : & Special ) -> Result < Seen , DeserializeError > {
1392
+ let mut verified = Seen :: new ( ) ;
1448
1393
// We need to make sure that we decode the correct number of states.
1449
1394
// Otherwise, an empty set of transitions would validate even if the
1450
1395
// recorded state length is non-empty.
@@ -1521,7 +1466,7 @@ impl<T: AsRef<[u8]>> Transitions<T> {
1521
1466
"mismatching sparse state length" ,
1522
1467
) ) ;
1523
1468
}
1524
- Ok ( ( ) )
1469
+ Ok ( verified )
1525
1470
}
1526
1471
1527
1472
/// Converts these transitions to a borrowed value.
@@ -1659,7 +1604,7 @@ impl<T: AsRef<[u8]>> Transitions<T> {
1659
1604
let state = & state[ nr..] ;
1660
1605
if npats == 0 {
1661
1606
return Err ( DeserializeError :: generic (
1662
- "state marked as a match, but has no pattern IDs " ,
1607
+ "state marked as a match, but pattern length is zero " ,
1663
1608
) ) ;
1664
1609
}
1665
1610
@@ -1681,6 +1626,21 @@ impl<T: AsRef<[u8]>> Transitions<T> {
1681
1626
} else {
1682
1627
( & [ ] [ ..] , state)
1683
1628
} ;
1629
+ if is_match && pattern_ids. is_empty ( ) {
1630
+ return Err ( DeserializeError :: generic (
1631
+ "state marked as a match, but has no pattern IDs" ,
1632
+ ) ) ;
1633
+ }
1634
+ if sp. is_match_state ( id) && pattern_ids. is_empty ( ) {
1635
+ return Err ( DeserializeError :: generic (
1636
+ "state marked special as a match, but has no pattern IDs" ,
1637
+ ) ) ;
1638
+ }
1639
+ if sp. is_match_state ( id) != is_match {
1640
+ return Err ( DeserializeError :: generic (
1641
+ "whether state is a match or not is inconsistent" ,
1642
+ ) ) ;
1643
+ }
1684
1644
1685
1645
// Now read this state's accelerator info. The first byte is the length
1686
1646
// of the accelerator, which is typically 0 (for no acceleration) but
@@ -2061,28 +2021,19 @@ impl<T: AsRef<[u8]>> StartTable<T> {
2061
2021
fn validate (
2062
2022
& self ,
2063
2023
sp : & Special ,
2064
- trans : & Transitions < T > ,
2024
+ seen : & Seen ,
2065
2025
) -> Result < ( ) , DeserializeError > {
2066
2026
for ( id, _, _) in self . iter ( ) {
2027
+ if !seen. contains ( & id) {
2028
+ return Err ( DeserializeError :: generic (
2029
+ "found invalid start state ID" ,
2030
+ ) ) ;
2031
+ }
2067
2032
if sp. is_match_state ( id) {
2068
2033
return Err ( DeserializeError :: generic (
2069
2034
"start states cannot be match states" ,
2070
2035
) ) ;
2071
2036
}
2072
- // Confirm that the start state points to a valid state.
2073
- let state = trans. try_state ( sp, id) ?;
2074
- // And like for the transition table, confirm that the transitions
2075
- // on all start states themselves point to a valid state.
2076
- //
2077
- // It'd probably be better to integrate this validation with the
2078
- // transition table, or otherwise store a sorted sequence of all
2079
- // valid state IDs in the sparse DFA itself. That way, we could
2080
- // check that every pointer to a state corresponds precisely to a
2081
- // correct and valid state.
2082
- for i in 0 ..state. ntrans {
2083
- let to = state. next_at ( i) ;
2084
- let _ = trans. try_state ( sp, to) ?;
2085
- }
2086
2037
}
2087
2038
Ok ( ( ) )
2088
2039
}
@@ -2537,6 +2488,62 @@ impl<'a> fmt::Debug for StateMut<'a> {
2537
2488
}
2538
2489
}
2539
2490
2491
+ // In order to validate everything, we not only need to make sure we
2492
+ // can decode every state, but that every transition in every state
2493
+ // points to a valid state. There are many duplicative transitions, so
2494
+ // we record state IDs that we've verified so that we don't redo the
2495
+ // decoding work.
2496
+ //
2497
+ // Except, when in no_std mode, we don't have dynamic memory allocation
2498
+ // available to us, so we skip this optimization. It's not clear
2499
+ // whether doing something more clever is worth it just yet. If you're
2500
+ // profiling this code and need it to run faster, please file an issue.
2501
+ //
2502
+ // OK, so we also use this to record the set of valid state IDs. Since
2503
+ // it is possible for a transition to point to an invalid state ID that
2504
+ // still (somehow) deserializes to a valid state. So we need to make
2505
+ // sure our transitions are limited to actually correct state IDs.
2506
+ // The problem is, I'm not sure how to do this verification step in
2507
+ // no-std no-alloc mode. I think we'd *have* to store the set of valid
2508
+ // state IDs in the DFA itself. For now, we don't do this verification
2509
+ // in no-std no-alloc mode. The worst thing that can happen is an
2510
+ // incorrect result. But no panics or memory safety problems should
2511
+ // result. Because we still do validate that the state itself is
2512
+ // "valid" in the sense that everything it points to actually exists.
2513
+ //
2514
+ // ---AG
2515
+ #[ derive( Debug ) ]
2516
+ struct Seen {
2517
+ #[ cfg( feature = "alloc" ) ]
2518
+ set : alloc:: collections:: BTreeSet < StateID > ,
2519
+ #[ cfg( not( feature = "alloc" ) ) ]
2520
+ set : core:: marker:: PhantomData < StateID > ,
2521
+ }
2522
+
2523
+ #[ cfg( feature = "alloc" ) ]
2524
+ impl Seen {
2525
+ fn new ( ) -> Seen {
2526
+ Seen { set : alloc:: collections:: BTreeSet :: new ( ) }
2527
+ }
2528
+ fn insert ( & mut self , id : StateID ) {
2529
+ self . set . insert ( id) ;
2530
+ }
2531
+ fn contains ( & self , id : & StateID ) -> bool {
2532
+ self . set . contains ( id)
2533
+ }
2534
+ }
2535
+
2536
+ #[ cfg( not( feature = "alloc" ) ) ]
2537
+ impl Seen {
2538
+ fn new ( ) -> Seen {
2539
+ Seen { set : core:: marker:: PhantomData }
2540
+ }
2541
+ fn insert ( & mut self , _id : StateID ) { }
2542
+ fn contains ( & self , _id : & StateID ) -> bool {
2543
+ false
2544
+ }
2545
+ }
2546
+
2540
2547
/*
2541
2548
/// A binary search routine specialized specifically to a sparse DFA state's
2542
2549
/// transitions. Specifically, the transitions are defined as a set of pairs
0 commit comments