Skip to content

Commit 91409d6

Browse files
djcvalenting
authored andcommitted
idna: split validity criteria into more specific error variants
1 parent 28a5be6 commit 91409d6

File tree

1 file changed

+46
-24
lines changed

1 file changed

+46
-24
lines changed

idna/src/uts46.rs

+46-24
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,11 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
279279
/// V1 (NFC) and V8 (Bidi) are checked inside `processing()` to prevent doing duplicate work.
280280
///
281281
/// http://www.unicode.org/reports/tr46/#Validity_Criteria
282-
fn is_valid(label: &str, config: Config) -> bool {
282+
fn check_validity(label: &str, config: Config, errors: &mut Errors) {
283283
let first_char = label.chars().next();
284284
if first_char == None {
285285
// Empty string, pass
286-
return true;
286+
return;
287287
}
288288

289289
// V2: No U+002D HYPHEN-MINUS in both third and fourth positions.
@@ -294,7 +294,8 @@ fn is_valid(label: &str, config: Config) -> bool {
294294

295295
// V3: neither begin nor end with a U+002D HYPHEN-MINUS
296296
if config.check_hyphens && (label.starts_with('-') || label.ends_with('-')) {
297-
return false;
297+
errors.check_hyphens = true;
298+
return;
298299
}
299300

300301
// V4: not contain a U+002E FULL STOP
@@ -303,7 +304,8 @@ fn is_valid(label: &str, config: Config) -> bool {
303304

304305
// V5: not begin with a GC=Mark
305306
if is_combining_mark(first_char.unwrap()) {
306-
return false;
307+
errors.start_combining_mark = true;
308+
return;
307309
}
308310

309311
// V6: Check against Mapping Table
@@ -313,15 +315,15 @@ fn is_valid(label: &str, config: Config) -> bool {
313315
Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules,
314316
_ => true,
315317
}) {
316-
return false;
318+
errors.invalid_mapping = true;
319+
return;
317320
}
318321

319322
// V7: ContextJ rules
320323
//
321324
// TODO: Implement rules and add *CheckJoiners* flag.
322325

323326
// V8: Bidi rules are checked inside `processing()`
324-
true
325327
}
326328

327329
/// http://www.unicode.org/reports/tr46/#Processing
@@ -384,7 +386,7 @@ fn processing(
384386

385387
let mut decoder = punycode::Decoder::default();
386388
let non_transitional = config.transitional_processing(false);
387-
let (mut first, mut valid, mut has_bidi_labels) = (true, true, false);
389+
let (mut first, mut has_bidi_labels) = (true, false);
388390
for label in normalized.split('.') {
389391
if !first {
390392
output.push('.');
@@ -401,10 +403,12 @@ fn processing(
401403
has_bidi_labels |= is_bidi_domain(decoded_label);
402404
}
403405

404-
if valid
405-
&& (!is_nfc(&decoded_label) || !is_valid(decoded_label, non_transitional))
406-
{
407-
valid = false;
406+
if !errors.is_err() {
407+
if !is_nfc(&decoded_label) {
408+
errors.nfc = true;
409+
} else {
410+
check_validity(decoded_label, non_transitional, &mut errors);
411+
}
408412
}
409413
}
410414
Err(()) => {
@@ -418,7 +422,7 @@ fn processing(
418422
}
419423

420424
// `normalized` is already `NFC` so we can skip that check
421-
valid &= is_valid(label, config);
425+
check_validity(label, config, &mut errors);
422426
output.push_str(label)
423427
}
424428
}
@@ -428,15 +432,11 @@ fn processing(
428432
//
429433
// TODO: Add *CheckBidi* flag
430434
if !passes_bidi(label, has_bidi_labels) {
431-
valid = false;
435+
errors.check_bidi = true;
432436
break;
433437
}
434438
}
435439

436-
if !valid {
437-
errors.validity_criteria = true;
438-
}
439-
440440
errors
441441
}
442442

@@ -589,8 +589,11 @@ fn is_bidi_domain(s: &str) -> bool {
589589
#[derive(Default)]
590590
pub struct Errors {
591591
punycode: bool,
592-
// https://unicode.org/reports/tr46/#Validity_Criteria
593-
validity_criteria: bool,
592+
check_hyphens: bool,
593+
check_bidi: bool,
594+
start_combining_mark: bool,
595+
invalid_mapping: bool,
596+
nfc: bool,
594597
disallowed_by_std3_ascii_rules: bool,
595598
disallowed_mapped_in_std3: bool,
596599
disallowed_character: bool,
@@ -602,15 +605,23 @@ impl Errors {
602605
fn is_err(&self) -> bool {
603606
let Errors {
604607
punycode,
605-
validity_criteria,
608+
check_hyphens,
609+
check_bidi,
610+
start_combining_mark,
611+
invalid_mapping,
612+
nfc,
606613
disallowed_by_std3_ascii_rules,
607614
disallowed_mapped_in_std3,
608615
disallowed_character,
609616
too_long_for_dns,
610617
too_short_for_dns,
611618
} = *self;
612619
punycode
613-
|| validity_criteria
620+
|| check_hyphens
621+
|| check_bidi
622+
|| start_combining_mark
623+
|| invalid_mapping
624+
|| nfc
614625
|| disallowed_by_std3_ascii_rules
615626
|| disallowed_mapped_in_std3
616627
|| disallowed_character
@@ -623,7 +634,11 @@ impl fmt::Debug for Errors {
623634
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
624635
let Errors {
625636
punycode,
626-
validity_criteria,
637+
check_hyphens,
638+
check_bidi,
639+
start_combining_mark,
640+
invalid_mapping,
641+
nfc,
627642
disallowed_by_std3_ascii_rules,
628643
disallowed_mapped_in_std3,
629644
disallowed_character,
@@ -633,8 +648,15 @@ impl fmt::Debug for Errors {
633648

634649
let fields = [
635650
("punycode", punycode),
636-
("validity_criteria", validity_criteria),
637-
("disallowed_by_std3_ascii_rules", disallowed_by_std3_ascii_rules),
651+
("check_hyphens", check_hyphens),
652+
("check_bidi", check_bidi),
653+
("start_combining_mark", start_combining_mark),
654+
("invalid_mapping", invalid_mapping),
655+
("nfc", nfc),
656+
(
657+
"disallowed_by_std3_ascii_rules",
658+
disallowed_by_std3_ascii_rules,
659+
),
638660
("disallowed_mapped_in_std3", disallowed_mapped_in_std3),
639661
("disallowed_character", disallowed_character),
640662
("too_long_for_dns", too_long_for_dns),

0 commit comments

Comments
 (0)