From a8d78fec52aec04254ec602851c4fd7c8469c592 Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Thu, 27 Feb 2025 23:38:19 -0800 Subject: [PATCH 1/2] Specialize OsString::push for strings When concatenating two WTF-8 strings, surrogate pairs at the boundaries need to be joined. However, since UTF-8 strings cannot contain surrogate halves, this check can be skipped when one string is UTF-8. Specialize `OsString::push` to use a more efficient concatenation in this case. Unfortunately, a specialization for `T: AsRef` conflicts with `T: AsRef`, so stamp out string types with a macro. --- library/std/src/ffi/os_str.rs | 25 ++++++++++++++++++++++++- library/std/src/sys/os_str/bytes.rs | 5 +++++ library/std/src/sys/os_str/wtf8.rs | 5 +++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index f4a02802336d5..c0e659db307af 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -257,7 +257,30 @@ impl OsString { #[inline] #[rustc_confusables("append", "put")] pub fn push>(&mut self, s: T) { - self.inner.push_slice(&s.as_ref().inner) + trait SpecPushTo { + fn spec_push_to(&self, buf: &mut OsString); + } + + impl> SpecPushTo for T { + #[inline] + default fn spec_push_to(&self, buf: &mut OsString) { + buf.inner.push_slice(&self.as_ref().inner); + } + } + + // Use a more efficient implementation when the string is UTF-8. + macro spec_str($T:ty) { + impl SpecPushTo for $T { + #[inline] + fn spec_push_to(&self, buf: &mut OsString) { + buf.inner.push_str(self); + } + } + } + spec_str!(str); + spec_str!(String); + + s.spec_push_to(self) } /// Creates a new `OsString` with at least the given capacity. diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs index 1d337694944bc..dfff2d3e5d31d 100644 --- a/library/std/src/sys/os_str/bytes.rs +++ b/library/std/src/sys/os_str/bytes.rs @@ -139,6 +139,11 @@ impl Buf { self.inner.extend_from_slice(&s.inner) } + #[inline] + pub fn push_str(&mut self, s: &str) { + self.inner.extend_from_slice(s.as_bytes()); + } + #[inline] pub fn reserve(&mut self, additional: usize) { self.inner.reserve(additional) diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs index 8acec6f949fc5..a32f5d40f6a9c 100644 --- a/library/std/src/sys/os_str/wtf8.rs +++ b/library/std/src/sys/os_str/wtf8.rs @@ -116,6 +116,11 @@ impl Buf { self.inner.push_wtf8(&s.inner) } + #[inline] + pub fn push_str(&mut self, s: &str) { + self.inner.push_str(s); + } + #[inline] pub fn reserve(&mut self, additional: usize) { self.inner.reserve(additional) From 83407b828bcf5c883b2ab50b958edbcba0738f6a Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Fri, 28 Feb 2025 13:31:15 -0800 Subject: [PATCH 2/2] Specialize constructing OsString from strings The WTF-8 version of `OsString` tracks whether it is known to be valid UTF-8 with its `is_known_utf8` field. Specialize `From>` so this can be set for UTF-8 string types. --- library/std/src/ffi/os_str.rs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index c0e659db307af..aa25ff5293c71 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -610,7 +610,30 @@ impl> From<&T> for OsString { /// Copies any value implementing [AsRef]<[OsStr]> /// into a newly allocated [`OsString`]. fn from(s: &T) -> OsString { - s.as_ref().to_os_string() + trait SpecToOsString { + fn spec_to_os_string(&self) -> OsString; + } + + impl> SpecToOsString for T { + #[inline] + default fn spec_to_os_string(&self) -> OsString { + self.as_ref().to_os_string() + } + } + + // Preserve the known-UTF-8 property for strings. + macro spec_str($T:ty) { + impl SpecToOsString for $T { + #[inline] + fn spec_to_os_string(&self) -> OsString { + OsString::from(String::from(self)) + } + } + } + spec_str!(str); + spec_str!(String); + + s.spec_to_os_string() } }