@@ -714,6 +714,156 @@ impl String {
714
714
. collect ( )
715
715
}
716
716
717
+ /// Decode a UTF-16LE–encoded vector `v` into a `String`, returning [`Err`]
718
+ /// if `v` contains any invalid data.
719
+ ///
720
+ /// # Examples
721
+ ///
722
+ /// Basic usage:
723
+ ///
724
+ /// ```
725
+ /// #![feature(str_from_utf16_endian)]
726
+ /// // 𝄞music
727
+ /// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
728
+ /// 0x73, 0x00, 0x69, 0x00, 0x63, 0x00];
729
+ /// assert_eq!(String::from("𝄞music"),
730
+ /// String::from_utf16le(v).unwrap());
731
+ ///
732
+ /// // 𝄞mu<invalid>ic
733
+ /// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
734
+ /// 0x00, 0xD8, 0x69, 0x00, 0x63, 0x00];
735
+ /// assert!(String::from_utf16le(v).is_err());
736
+ /// ```
737
+ #[ cfg( not( no_global_oom_handling) ) ]
738
+ #[ unstable( feature = "str_from_utf16_endian" , issue = "116258" ) ]
739
+ pub fn from_utf16le ( v : & [ u8 ] ) -> Result < String , FromUtf16Error > {
740
+ if v. len ( ) % 2 != 0 {
741
+ return Err ( FromUtf16Error ( ( ) ) ) ;
742
+ }
743
+ match ( cfg ! ( target_endian = "little" ) , unsafe { v. align_to :: < u16 > ( ) } ) {
744
+ ( true , ( [ ] , v, [ ] ) ) => Self :: from_utf16 ( v) ,
745
+ _ => char:: decode_utf16 ( v. array_chunks :: < 2 > ( ) . copied ( ) . map ( u16:: from_le_bytes) )
746
+ . collect :: < Result < _ , _ > > ( )
747
+ . map_err ( |_| FromUtf16Error ( ( ) ) ) ,
748
+ }
749
+ }
750
+
751
+ /// Decode a UTF-16LE–encoded slice `v` into a `String`, replacing
752
+ /// invalid data with [the replacement character (`U+FFFD`)][U+FFFD].
753
+ ///
754
+ /// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`],
755
+ /// `from_utf16le_lossy` returns a `String` since the UTF-16 to UTF-8
756
+ /// conversion requires a memory allocation.
757
+ ///
758
+ /// [`from_utf8_lossy`]: String::from_utf8_lossy
759
+ /// [`Cow<'a, str>`]: crate::borrow::Cow "borrow::Cow"
760
+ /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER
761
+ ///
762
+ /// # Examples
763
+ ///
764
+ /// Basic usage:
765
+ ///
766
+ /// ```
767
+ /// #![feature(str_from_utf16_endian)]
768
+ /// // 𝄞mus<invalid>ic<invalid>
769
+ /// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
770
+ /// 0x73, 0x00, 0x1E, 0xDD, 0x69, 0x00, 0x63, 0x00,
771
+ /// 0x34, 0xD8];
772
+ ///
773
+ /// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
774
+ /// String::from_utf16le_lossy(v));
775
+ /// ```
776
+ #[ cfg( not( no_global_oom_handling) ) ]
777
+ #[ unstable( feature = "str_from_utf16_endian" , issue = "116258" ) ]
778
+ pub fn from_utf16le_lossy ( v : & [ u8 ] ) -> String {
779
+ match ( cfg ! ( target_endian = "little" ) , unsafe { v. align_to :: < u16 > ( ) } ) {
780
+ ( true , ( [ ] , v, [ ] ) ) => Self :: from_utf16_lossy ( v) ,
781
+ ( true , ( [ ] , v, [ _remainder] ) ) => Self :: from_utf16_lossy ( v) + "\u{FFFD} " ,
782
+ _ => {
783
+ let mut iter = v. array_chunks :: < 2 > ( ) ;
784
+ let string = char:: decode_utf16 ( iter. by_ref ( ) . copied ( ) . map ( u16:: from_le_bytes) )
785
+ . map ( |r| r. unwrap_or ( char:: REPLACEMENT_CHARACTER ) )
786
+ . collect ( ) ;
787
+ if iter. remainder ( ) . is_empty ( ) { string } else { string + "\u{FFFD} " }
788
+ }
789
+ }
790
+ }
791
+
792
+ /// Decode a UTF-16BE–encoded vector `v` into a `String`, returning [`Err`]
793
+ /// if `v` contains any invalid data.
794
+ ///
795
+ /// # Examples
796
+ ///
797
+ /// Basic usage:
798
+ ///
799
+ /// ```
800
+ /// #![feature(str_from_utf16_endian)]
801
+ /// // 𝄞music
802
+ /// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
803
+ /// 0x00, 0x73, 0x00, 0x69, 0x00, 0x63];
804
+ /// assert_eq!(String::from("𝄞music"),
805
+ /// String::from_utf16be(v).unwrap());
806
+ ///
807
+ /// // 𝄞mu<invalid>ic
808
+ /// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
809
+ /// 0xD8, 0x00, 0x00, 0x69, 0x00, 0x63];
810
+ /// assert!(String::from_utf16be(v).is_err());
811
+ /// ```
812
+ #[ cfg( not( no_global_oom_handling) ) ]
813
+ #[ unstable( feature = "str_from_utf16_endian" , issue = "116258" ) ]
814
+ pub fn from_utf16be ( v : & [ u8 ] ) -> Result < String , FromUtf16Error > {
815
+ if v. len ( ) % 2 != 0 {
816
+ return Err ( FromUtf16Error ( ( ) ) ) ;
817
+ }
818
+ match ( cfg ! ( target_endian = "big" ) , unsafe { v. align_to :: < u16 > ( ) } ) {
819
+ ( true , ( [ ] , v, [ ] ) ) => Self :: from_utf16 ( v) ,
820
+ _ => char:: decode_utf16 ( v. array_chunks :: < 2 > ( ) . copied ( ) . map ( u16:: from_be_bytes) )
821
+ . collect :: < Result < _ , _ > > ( )
822
+ . map_err ( |_| FromUtf16Error ( ( ) ) ) ,
823
+ }
824
+ }
825
+
826
+ /// Decode a UTF-16BE–encoded slice `v` into a `String`, replacing
827
+ /// invalid data with [the replacement character (`U+FFFD`)][U+FFFD].
828
+ ///
829
+ /// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`],
830
+ /// `from_utf16le_lossy` returns a `String` since the UTF-16 to UTF-8
831
+ /// conversion requires a memory allocation.
832
+ ///
833
+ /// [`from_utf8_lossy`]: String::from_utf8_lossy
834
+ /// [`Cow<'a, str>`]: crate::borrow::Cow "borrow::Cow"
835
+ /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER
836
+ ///
837
+ /// # Examples
838
+ ///
839
+ /// Basic usage:
840
+ ///
841
+ /// ```
842
+ /// #![feature(str_from_utf16_endian)]
843
+ /// // 𝄞mus<invalid>ic<invalid>
844
+ /// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
845
+ /// 0x00, 0x73, 0xDD, 0x1E, 0x00, 0x69, 0x00, 0x63,
846
+ /// 0xD8, 0x34];
847
+ ///
848
+ /// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
849
+ /// String::from_utf16be_lossy(v));
850
+ /// ```
851
+ #[ cfg( not( no_global_oom_handling) ) ]
852
+ #[ unstable( feature = "str_from_utf16_endian" , issue = "116258" ) ]
853
+ pub fn from_utf16be_lossy ( v : & [ u8 ] ) -> String {
854
+ match ( cfg ! ( target_endian = "big" ) , unsafe { v. align_to :: < u16 > ( ) } ) {
855
+ ( true , ( [ ] , v, [ ] ) ) => Self :: from_utf16_lossy ( v) ,
856
+ ( true , ( [ ] , v, [ _remainder] ) ) => Self :: from_utf16_lossy ( v) + "\u{FFFD} " ,
857
+ _ => {
858
+ let mut iter = v. array_chunks :: < 2 > ( ) ;
859
+ let string = char:: decode_utf16 ( iter. by_ref ( ) . copied ( ) . map ( u16:: from_be_bytes) )
860
+ . map ( |r| r. unwrap_or ( char:: REPLACEMENT_CHARACTER ) )
861
+ . collect ( ) ;
862
+ if iter. remainder ( ) . is_empty ( ) { string } else { string + "\u{FFFD} " }
863
+ }
864
+ }
865
+ }
866
+
717
867
/// Decomposes a `String` into its raw components.
718
868
///
719
869
/// Returns the raw pointer to the underlying data, the length of
0 commit comments