@@ -269,10 +269,10 @@ pub trait CharExt {
269
269
fn len_utf8 ( self ) -> usize ;
270
270
#[ stable( feature = "core" , since = "1.6.0" ) ]
271
271
fn len_utf16 ( self ) -> usize ;
272
- #[ stable ( feature = "core " , since = "1.6.0 " ) ]
273
- fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> Option < usize > ;
274
- #[ stable ( feature = "core " , since = "1.6.0 " ) ]
275
- fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> Option < usize > ;
272
+ #[ unstable ( feature = "unicode " , issue = "27784 " ) ]
273
+ fn encode_utf8 ( self ) -> EncodeUtf8 ;
274
+ #[ unstable ( feature = "unicode " , issue = "27784 " ) ]
275
+ fn encode_utf16 ( self ) -> EncodeUtf16 ;
276
276
}
277
277
278
278
#[ stable( feature = "core" , since = "1.6.0" ) ]
@@ -336,75 +336,47 @@ impl CharExt for char {
336
336
}
337
337
338
338
#[ inline]
339
- fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> Option < usize > {
340
- encode_utf8_raw ( self as u32 , dst)
339
+ fn encode_utf8 ( self ) -> EncodeUtf8 {
340
+ let code = self as u32 ;
341
+ let mut buf = [ 0 ; 4 ] ;
342
+ let pos = if code < MAX_ONE_B {
343
+ buf[ 3 ] = code as u8 ;
344
+ 3
345
+ } else if code < MAX_TWO_B {
346
+ buf[ 2 ] = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
347
+ buf[ 3 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
348
+ 2
349
+ } else if code < MAX_THREE_B {
350
+ buf[ 1 ] = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
351
+ buf[ 2 ] = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
352
+ buf[ 3 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
353
+ 1
354
+ } else {
355
+ buf[ 0 ] = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
356
+ buf[ 1 ] = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
357
+ buf[ 2 ] = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
358
+ buf[ 3 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
359
+ 0
360
+ } ;
361
+ EncodeUtf8 { buf : buf, pos : pos }
341
362
}
342
363
343
364
#[ inline]
344
- fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> Option < usize > {
345
- encode_utf16_raw ( self as u32 , dst)
346
- }
347
- }
348
-
349
- /// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
350
- /// and then returns the number of bytes written.
351
- ///
352
- /// If the buffer is not large enough, nothing will be written into it
353
- /// and a `None` will be returned.
354
- #[ inline]
355
- #[ unstable( feature = "char_internals" ,
356
- reason = "this function should not be exposed publicly" ,
357
- issue = "0" ) ]
358
- #[ doc( hidden) ]
359
- pub fn encode_utf8_raw ( code : u32 , dst : & mut [ u8 ] ) -> Option < usize > {
360
- // Marked #[inline] to allow llvm optimizing it away
361
- if code < MAX_ONE_B && !dst. is_empty ( ) {
362
- dst[ 0 ] = code as u8 ;
363
- Some ( 1 )
364
- } else if code < MAX_TWO_B && dst. len ( ) >= 2 {
365
- dst[ 0 ] = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
366
- dst[ 1 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
367
- Some ( 2 )
368
- } else if code < MAX_THREE_B && dst. len ( ) >= 3 {
369
- dst[ 0 ] = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
370
- dst[ 1 ] = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
371
- dst[ 2 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
372
- Some ( 3 )
373
- } else if dst. len ( ) >= 4 {
374
- dst[ 0 ] = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
375
- dst[ 1 ] = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
376
- dst[ 2 ] = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
377
- dst[ 3 ] = ( code & 0x3F ) as u8 | TAG_CONT ;
378
- Some ( 4 )
379
- } else {
380
- None
381
- }
382
- }
383
-
384
- /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
385
- /// and then returns the number of `u16`s written.
386
- ///
387
- /// If the buffer is not large enough, nothing will be written into it
388
- /// and a `None` will be returned.
389
- #[ inline]
390
- #[ unstable( feature = "char_internals" ,
391
- reason = "this function should not be exposed publicly" ,
392
- issue = "0" ) ]
393
- #[ doc( hidden) ]
394
- pub fn encode_utf16_raw ( mut ch : u32 , dst : & mut [ u16 ] ) -> Option < usize > {
395
- // Marked #[inline] to allow llvm optimizing it away
396
- if ( ch & 0xFFFF ) == ch && !dst. is_empty ( ) {
397
- // The BMP falls through (assuming non-surrogate, as it should)
398
- dst[ 0 ] = ch as u16 ;
399
- Some ( 1 )
400
- } else if dst. len ( ) >= 2 {
401
- // Supplementary planes break into surrogates.
402
- ch -= 0x1_0000 ;
403
- dst[ 0 ] = 0xD800 | ( ( ch >> 10 ) as u16 ) ;
404
- dst[ 1 ] = 0xDC00 | ( ( ch as u16 ) & 0x3FF ) ;
405
- Some ( 2 )
406
- } else {
407
- None
365
+ fn encode_utf16 ( self ) -> EncodeUtf16 {
366
+ let mut buf = [ 0 ; 2 ] ;
367
+ let mut code = self as u32 ;
368
+ let pos = if ( code & 0xFFFF ) == code {
369
+ // The BMP falls through (assuming non-surrogate, as it should)
370
+ buf[ 1 ] = code as u16 ;
371
+ 1
372
+ } else {
373
+ // Supplementary planes break into surrogates.
374
+ code -= 0x1_0000 ;
375
+ buf[ 0 ] = 0xD800 | ( ( code >> 10 ) as u16 ) ;
376
+ buf[ 1 ] = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
377
+ 0
378
+ } ;
379
+ EncodeUtf16 { buf : buf, pos : pos }
408
380
}
409
381
}
410
382
@@ -583,3 +555,80 @@ impl Iterator for EscapeDefault {
583
555
}
584
556
}
585
557
}
558
+
559
+ /// An iterator over `u8` entries represending the UTF-8 encoding of a `char`
560
+ /// value.
561
+ ///
562
+ /// Constructed via the `.encode_utf8()` method on `char`.
563
+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
564
+ #[ derive( Debug ) ]
565
+ pub struct EncodeUtf8 {
566
+ buf : [ u8 ; 4 ] ,
567
+ pos : usize ,
568
+ }
569
+
570
+ impl EncodeUtf8 {
571
+ /// Returns the remaining bytes of this iterator as a slice.
572
+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
573
+ pub fn as_slice ( & self ) -> & [ u8 ] {
574
+ & self . buf [ self . pos ..]
575
+ }
576
+ }
577
+
578
+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
579
+ impl Iterator for EncodeUtf8 {
580
+ type Item = u8 ;
581
+
582
+ fn next ( & mut self ) -> Option < u8 > {
583
+ if self . pos == self . buf . len ( ) {
584
+ None
585
+ } else {
586
+ let ret = Some ( self . buf [ self . pos ] ) ;
587
+ self . pos += 1 ;
588
+ ret
589
+ }
590
+ }
591
+
592
+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
593
+ self . as_slice ( ) . iter ( ) . size_hint ( )
594
+ }
595
+ }
596
+
597
+ /// An iterator over `u16` entries represending the UTF-16 encoding of a `char`
598
+ /// value.
599
+ ///
600
+ /// Constructed via the `.encode_utf16()` method on `char`.
601
+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
602
+ #[ derive( Debug ) ]
603
+ pub struct EncodeUtf16 {
604
+ buf : [ u16 ; 2 ] ,
605
+ pos : usize ,
606
+ }
607
+
608
+ impl EncodeUtf16 {
609
+ /// Returns the remaining bytes of this iterator as a slice.
610
+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
611
+ pub fn as_slice ( & self ) -> & [ u16 ] {
612
+ & self . buf [ self . pos ..]
613
+ }
614
+ }
615
+
616
+
617
+ #[ unstable( feature = "unicode" , issue = "27784" ) ]
618
+ impl Iterator for EncodeUtf16 {
619
+ type Item = u16 ;
620
+
621
+ fn next ( & mut self ) -> Option < u16 > {
622
+ if self . pos == self . buf . len ( ) {
623
+ None
624
+ } else {
625
+ let ret = Some ( self . buf [ self . pos ] ) ;
626
+ self . pos += 1 ;
627
+ ret
628
+ }
629
+ }
630
+
631
+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
632
+ self . as_slice ( ) . iter ( ) . size_hint ( )
633
+ }
634
+ }
0 commit comments