Skip to content

Commit 81c8992

Browse files
committed
[idna] Preserve leading dots in host
A retry of servo#171 This diff changes the behavior of ToASCII step to match the spec and prevent failures on some cases when a domain name starts with leading dots (FULL STOPs), as requested in servo#166. The change in the code results in a few failures for test cases of the Conformance Testing data provided with UTS servo#46. But, as the header of the test data file (IdnaTest.txt) says: "If the file does not indicate an error, then the implementation must either have an error, or must have a matching result." Therefore, failing on those test cases does not break conformance with UTS servo#46, and to some level, anticipated. As mentioned in servo#166, a feedback is submitted for this inconsistency and the test logic can be improved later if the data file addresses the comments. Until then, we can throw less errors and maintain passing conformance tests with this diff.
1 parent 780410c commit 81c8992

File tree

3 files changed

+36
-5
lines changed

3 files changed

+36
-5
lines changed

idna/src/uts46.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
use self::Mapping::*;
1313
use punycode;
1414
use std::ascii::AsciiExt;
15+
use std::slice::Iter;
1516
use unicode_normalization::UnicodeNormalization;
1617
use unicode_normalization::char::is_combining_mark;
1718
use unicode_bidi::{BidiClass, bidi_class};
@@ -237,10 +238,12 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
237238
}
238239
let normalized: String = mapped.nfc().collect();
239240
let mut validated = String::new();
241+
let mut first = true;
240242
for label in normalized.split('.') {
241-
if validated.len() > 0 {
243+
if !first {
242244
validated.push('.');
243245
}
246+
first = false;
244247
if label.starts_with("xn--") {
245248
match punycode::decode_to_string(&label["xn--".len()..]) {
246249
Some(decoded_label) => {
@@ -266,13 +269,14 @@ pub struct Flags {
266269
}
267270

268271
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
269-
enum Error {
272+
pub enum Error {
270273
PunycodeError,
271274
ValidityCriteria,
272275
DissallowedByStd3AsciiRules,
273276
DissallowedMappedInStd3,
274277
DissallowedCharacter,
275278
TooLongForDns,
279+
TooShortForDns,
276280
}
277281

278282
/// Errors recorded during UTS #46 processing.
@@ -282,14 +286,22 @@ enum Error {
282286
#[derive(Debug)]
283287
pub struct Errors(Vec<Error>);
284288

289+
impl Errors {
290+
pub fn iter(&self) -> Iter<Error> {
291+
self.0.iter()
292+
}
293+
}
294+
285295
/// http://www.unicode.org/reports/tr46/#ToASCII
286296
pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
287297
let mut errors = Vec::new();
288298
let mut result = String::new();
299+
let mut first = true;
289300
for label in processing(domain, flags, &mut errors).split('.') {
290-
if result.len() > 0 {
301+
if !first {
291302
result.push('.');
292303
}
304+
first = false;
293305
if label.is_ascii() {
294306
result.push_str(label);
295307
} else {
@@ -305,8 +317,10 @@ pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
305317

306318
if flags.verify_dns_length {
307319
let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result };
308-
if domain.len() < 1 || domain.len() > 253 ||
309-
domain.split('.').any(|label| label.len() < 1 || label.len() > 63) {
320+
if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) {
321+
errors.push(Error::TooShortForDns)
322+
}
323+
if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
310324
errors.push(Error::TooLongForDns)
311325
}
312326
}

idna/tests/uts46.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
8484
return;
8585
}
8686

87+
if let Err(ref errors) = result {
88+
if errors.iter().any(|err| *err == uts46::Error::TooShortForDns) {
89+
// Conformant implementations may produce error where test file has strings.
90+
// Test file drop all leading dots (leading empty labels), in contrast to spec.
91+
// So, we allo TooShortForDns failers on test cases.
92+
// https://github.com/servo/rust-url/issues/166
93+
return;
94+
}
95+
}
96+
8797
assert!(result.is_ok(), "Couldn't parse {} | original: {} | error: {:?}",
8898
source, original, result.err());
8999
let output = result.ok().unwrap();

tests/unit.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,13 @@ fn test_set_host() {
344344
assert_eq!(url.as_str(), "foobar:/hello");
345345
}
346346

347+
#[test]
348+
// https://github.com/servo/rust-url/issues/166
349+
fn test_leading_dots() {
350+
assert_eq!(Host::parse(".org").unwrap(), Host::Domain(".org".to_owned()));
351+
assert_eq!(Url::parse("file://./foo").unwrap().domain(), Some("."));
352+
}
353+
347354
// This is testing that the macro produces buildable code when invoked
348355
// inside both a module and a function
349356
#[test]

0 commit comments

Comments
 (0)