Skip to content

Commit c219eb8

Browse files
committed
[idna] Preserve leading dots in host
A retry of #171 This diff changes the behavior of ToASCII step to match the spec and prevent failures on some cases when a domain name starts with leading dots (FULL STOPs), as requested in #166. The change in the code results in a few failures for test cases of the Conformance Testing data provided with UTS #46. But, as the header of the test data file (IdnaTest.txt) says: "If the file does not indicate an error, then the implementation must either have an error, or must have a matching result." Therefore, failing on those test cases does not break conformance with UTS #46, and to some level, anticipated. As mentioned in #166, a feedback is submitted for this inconsistency and the test logic can be improved later if the data file addresses the comments. Until then, we can throw less errors and maintain passing conformance tests with this diff. To keep the side-effects of ignoring errors during test runs as minimum as possible, I have separated `TooShortForDns` error from `TooLongForDns`. The `Error` struct has been kept private, so the change won't affect any library users. Fix #166
1 parent 780410c commit c219eb8

File tree

3 files changed

+36
-5
lines changed

3 files changed

+36
-5
lines changed

idna/src/uts46.rs

+19-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
use self::Mapping::*;
1313
use punycode;
1414
use std::ascii::AsciiExt;
15+
use std::slice::Iter;
1516
use unicode_normalization::UnicodeNormalization;
1617
use unicode_normalization::char::is_combining_mark;
1718
use unicode_bidi::{BidiClass, bidi_class};
@@ -237,10 +238,12 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
237238
}
238239
let normalized: String = mapped.nfc().collect();
239240
let mut validated = String::new();
241+
let mut first = true;
240242
for label in normalized.split('.') {
241-
if validated.len() > 0 {
243+
if !first {
242244
validated.push('.');
243245
}
246+
first = false;
244247
if label.starts_with("xn--") {
245248
match punycode::decode_to_string(&label["xn--".len()..]) {
246249
Some(decoded_label) => {
@@ -266,13 +269,14 @@ pub struct Flags {
266269
}
267270

268271
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
269-
enum Error {
272+
pub enum Error {
270273
PunycodeError,
271274
ValidityCriteria,
272275
DissallowedByStd3AsciiRules,
273276
DissallowedMappedInStd3,
274277
DissallowedCharacter,
275278
TooLongForDns,
279+
TooShortForDns,
276280
}
277281

278282
/// Errors recorded during UTS #46 processing.
@@ -282,14 +286,22 @@ enum Error {
282286
#[derive(Debug)]
283287
pub struct Errors(Vec<Error>);
284288

289+
impl Errors {
290+
pub fn iter(&self) -> Iter<Error> {
291+
self.0.iter()
292+
}
293+
}
294+
285295
/// http://www.unicode.org/reports/tr46/#ToASCII
286296
pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
287297
let mut errors = Vec::new();
288298
let mut result = String::new();
299+
let mut first = true;
289300
for label in processing(domain, flags, &mut errors).split('.') {
290-
if result.len() > 0 {
301+
if !first {
291302
result.push('.');
292303
}
304+
first = false;
293305
if label.is_ascii() {
294306
result.push_str(label);
295307
} else {
@@ -305,8 +317,10 @@ pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
305317

306318
if flags.verify_dns_length {
307319
let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result };
308-
if domain.len() < 1 || domain.len() > 253 ||
309-
domain.split('.').any(|label| label.len() < 1 || label.len() > 63) {
320+
if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) {
321+
errors.push(Error::TooShortForDns)
322+
}
323+
if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
310324
errors.push(Error::TooLongForDns)
311325
}
312326
}

idna/tests/uts46.rs

+10
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
8484
return;
8585
}
8686

87+
if let Err(ref errors) = result {
88+
if errors.iter().any(|err| *err == uts46::Error::TooShortForDns) {
89+
// Conformant implementations may produce error where test file has strings.
90+
// Test file drop all leading dots (leading empty labels), in contrast to spec.
91+
// So, we allo TooShortForDns failers on test cases.
92+
// https://github.com/servo/rust-url/issues/166
93+
return;
94+
}
95+
}
96+
8797
assert!(result.is_ok(), "Couldn't parse {} | original: {} | error: {:?}",
8898
source, original, result.err());
8999
let output = result.ok().unwrap();

tests/unit.rs

+7
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,13 @@ fn test_set_host() {
344344
assert_eq!(url.as_str(), "foobar:/hello");
345345
}
346346

347+
#[test]
348+
// https://github.com/servo/rust-url/issues/166
349+
fn test_leading_dots() {
350+
assert_eq!(Host::parse(".org").unwrap(), Host::Domain(".org".to_owned()));
351+
assert_eq!(Url::parse("file://./foo").unwrap().domain(), Some("."));
352+
}
353+
347354
// This is testing that the macro produces buildable code when invoked
348355
// inside both a module and a function
349356
#[test]

0 commit comments

Comments
 (0)