Skip to content

Commit f6fc6a8

Browse files
committed
[idna] Preserve leading dots in host
A retry of #171 This diff changes the behavior of ToASCII step to match the spec and prevent failures on some cases when a domain name starts with leading dots (FULL STOPs), as requested in #166. The change in the code results in a few failures for test cases of the Conformance Testing data provided with UTS #46. But, as the header of the test data file (IdnaTest.txt) says: "If the file does not indicate an error, then the implementation must either have an error, or must have a matching result." Therefore, failing on those test cases does not break conformance with UTS #46, and to some level, anticipated. As mentioned in #166, a feedback is submitted for this inconsistency and the test logic can be improved later if the data file addresses the comments. Until then, we can throw less errors and maintain passing conformance tests with this diff. To keep the side-effects of ignoring errors during test runs as minimum as possible, I have separated `TooShortForDns` error from `TooLongForDns`. The `Error` struct has been kept private, so the change won't affect any library users. Fix #166
1 parent 03e0c0f commit f6fc6a8

File tree

3 files changed

+35
-5
lines changed

3 files changed

+35
-5
lines changed

idna/src/uts46.rs

+18-5
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,12 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
237237
}
238238
let normalized: String = mapped.nfc().collect();
239239
let mut validated = String::new();
240+
let mut first = true;
240241
for label in normalized.split('.') {
241-
if validated.len() > 0 {
242+
if !first {
242243
validated.push('.');
243244
}
245+
first = false;
244246
if label.starts_with("xn--") {
245247
match punycode::decode_to_string(&label["xn--".len()..]) {
246248
Some(decoded_label) => {
@@ -266,13 +268,14 @@ pub struct Flags {
266268
}
267269

268270
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
269-
enum Error {
271+
pub enum Error {
270272
PunycodeError,
271273
ValidityCriteria,
272274
DissallowedByStd3AsciiRules,
273275
DissallowedMappedInStd3,
274276
DissallowedCharacter,
275277
TooLongForDns,
278+
TooShortForDns,
276279
}
277280

278281
/// Errors recorded during UTS #46 processing.
@@ -282,14 +285,22 @@ enum Error {
282285
#[derive(Debug)]
283286
pub struct Errors(Vec<Error>);
284287

288+
impl PartialEq<Vec<Error>> for Errors {
289+
fn eq(&self, other: &Vec<Error>) -> bool {
290+
self.0 == *other
291+
}
292+
}
293+
285294
/// http://www.unicode.org/reports/tr46/#ToASCII
286295
pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
287296
let mut errors = Vec::new();
288297
let mut result = String::new();
298+
let mut first = true;
289299
for label in processing(domain, flags, &mut errors).split('.') {
290-
if result.len() > 0 {
300+
if !first {
291301
result.push('.');
292302
}
303+
first = false;
293304
if label.is_ascii() {
294305
result.push_str(label);
295306
} else {
@@ -305,8 +316,10 @@ pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
305316

306317
if flags.verify_dns_length {
307318
let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result };
308-
if domain.len() < 1 || domain.len() > 253 ||
309-
domain.split('.').any(|label| label.len() < 1 || label.len() > 63) {
319+
if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) {
320+
errors.push(Error::TooShortForDns)
321+
}
322+
if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
310323
errors.push(Error::TooLongForDns)
311324
}
312325
}

idna/tests/uts46.rs

+10
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
8484
return;
8585
}
8686

87+
if let Err(ref errors) = result {
88+
if errors.eq(&vec![uts46::Error::TooShortForDns]) {
89+
// Conformant implementations may produce error where test file has strings.
90+
// Test file drop all leading dots (leading empty labels), in contrast to spec.
91+
// So, we allo TooShortForDns failers on test cases.
92+
// https://github.com/servo/rust-url/issues/166
93+
return;
94+
}
95+
}
96+
8797
assert!(result.is_ok(), "Couldn't parse {} | original: {} | error: {:?}",
8898
source, original, result.err());
8999
let output = result.ok().unwrap();

tests/unit.rs

+7
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,13 @@ fn test_set_host() {
344344
assert_eq!(url.as_str(), "foobar:/hello");
345345
}
346346

347+
#[test]
348+
// https://github.com/servo/rust-url/issues/166
349+
fn test_leading_dots() {
350+
assert_eq!(Host::parse(".org").unwrap(), Host::Domain(".org".to_owned()));
351+
assert_eq!(Url::parse("file://./foo").unwrap().domain(), Some("."));
352+
}
353+
347354
// This is testing that the macro produces buildable code when invoked
348355
// inside both a module and a function
349356
#[test]

0 commit comments

Comments
 (0)