@@ -8,6 +8,7 @@ import { webByteUtils } from '../../src/utils/web_byte_utils';
8
8
import * as sinon from 'sinon' ;
9
9
import { loadCJSModuleBSON , loadReactNativeCJSModuleBSON , loadESModuleBSON } from '../load_bson' ;
10
10
import * as crypto from 'node:crypto' ;
11
+ import { BSONError , BSONUTF8Error } from '../../src/error' ;
11
12
12
13
type ByteUtilTest < K extends keyof ByteUtils > = {
13
14
name : string ;
@@ -399,6 +400,8 @@ const fromUTF8Tests: ByteUtilTest<'encodeUTF8Into'>[] = [
399
400
}
400
401
}
401
402
] ;
403
+
404
+
402
405
const toUTF8Tests : ByteUtilTest < 'toUTF8' > [ ] = [
403
406
{
404
407
name : 'should create utf8 string from buffer input' ,
@@ -416,6 +419,14 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
416
419
expect ( output ) . to . be . a ( 'string' ) . with . lengthOf ( 0 ) ;
417
420
}
418
421
} ,
422
+ {
423
+ name : 'should insert replacement character fatal is false and string is invalid' ,
424
+ inputs : [ Buffer . from ( '616263f09fa4' , 'hex' ) , 0 , 7 , false ] ,
425
+ expectation ( { error, output } ) {
426
+ expect ( error ) . to . not . exist ;
427
+ expect ( output ) . to . equal ( 'abc\uFFFD' ) ;
428
+ }
429
+ } ,
419
430
{
420
431
name : 'should throw an error if fatal is set and string is invalid' ,
421
432
inputs : [ Buffer . from ( '616263f09fa4' , 'hex' ) , 0 , 7 , true ] ,
@@ -424,14 +435,168 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
424
435
}
425
436
} ,
426
437
{
427
- name : 'should insert replacement character fatal is false and string is invalid' ,
428
- inputs : [ Buffer . from ( '616263f09fa4' , 'hex' ) , 0 , 7 , false ] ,
429
- expectation ( { error, output } ) {
430
- expect ( error ) . to . not . exist ;
431
- expect ( output ) . to . equal ( 'abc\uFFFD' ) ;
438
+ name : 'throw an error if fatal is set and string contains overlong encoding' ,
439
+ inputs : [ Buffer . from ( '11000000025f0005000000f08282ac0000' , 'hex' ) , 0 , 18 , true ] ,
440
+ expectation ( { error } ) {
441
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
442
+ }
443
+ } ,
444
+ {
445
+ name : 'throw an error if fatal is set and string contains invalid bytes' ,
446
+ inputs : [ Buffer . from ( 'abcff' , 'hex' ) , 0 , 2 , true ] ,
447
+ expectation ( { error } ) {
448
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
449
+ }
450
+ } ,
451
+ {
452
+ name : 'throw an error if fatal is set and string contains an unexpected continuation byte' ,
453
+ inputs : [ Buffer . from ( '7F80' , 'hex' ) , 0 , 2 , true ] ,
454
+ expectation ( { error } ) {
455
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
456
+ }
457
+ } ,
458
+ { inputs : [ Buffer . from ( '0xFF' , 'hex' ) , 0 , 1 , true ] , name : 'throws when provided with invalid code' , expectation ( { error } ) {
459
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
460
+ }
461
+ } ,
462
+ { inputs : [ Buffer . from ( '0xC0' , 'hex' ) , 0 , 1 , true ] , name : 'throws when provided with ends early' , expectation ( { error } ) {
463
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
464
+ }
465
+ } ,
466
+ { inputs : [ Buffer . from ( '0xE0' , 'hex' ) , 0 , 1 , true ] , name : 'throws when provided with ends early 2' , expectation ( { error } ) {
467
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
468
+ }
469
+ } ,
470
+ { inputs : [ Buffer . from ( '0xC000' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with invalid trail' , expectation ( { error } ) {
471
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
472
+ }
473
+ } ,
474
+ { inputs : [ Buffer . from ( '0xC0C0' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with invalid trail 2' , expectation ( { error } ) {
475
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
476
+ }
477
+ } ,
478
+ { inputs : [ Buffer . from ( '0xE000' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with invalid trail 3' , expectation ( { error } ) {
479
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
480
+ }
481
+ } ,
482
+ { inputs : [ Buffer . from ( '0xE0C0' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with invalid trail 4' , expectation ( { error } ) {
483
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
484
+ }
485
+ } ,
486
+ { inputs : [ Buffer . from ( '0xE08000' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with invalid trail 5' , expectation ( { error } ) {
487
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
488
+ }
489
+ } ,
490
+ { inputs : [ Buffer . from ( '0xE080C0' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with invalid trail 6' , expectation ( { error } ) {
491
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
492
+ }
493
+ } ,
494
+ { inputs : [ Buffer . from ( '0xFC8080808080' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with > 0x10FFFF' , expectation ( { error } ) {
495
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
496
+ }
497
+ } ,
498
+ { inputs : [ Buffer . from ( '0xFE8080808080' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with obsolete lead byte' , expectation ( { error } ) {
499
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
500
+ }
501
+ } ,
502
+
503
+ // Overlong encodings
504
+ { inputs : [ Buffer . from ( '0xC080' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with overlong U+0000 - 2 bytes' , expectation ( { error } ) {
505
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
506
+ }
507
+ } ,
508
+ { inputs : [ Buffer . from ( '0xE08080' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with overlong U+0000 - 3 bytes' , expectation ( { error } ) {
509
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
510
+ }
511
+ } ,
512
+ { inputs : [ Buffer . from ( '0xF0808080' , 'hex' ) , 0 , 4 , true ] , name : 'throws when provided with overlong U+0000 - 4 bytes' , expectation ( { error } ) {
513
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
514
+ }
515
+ } ,
516
+ { inputs : [ Buffer . from ( '0xF880808080' , 'hex' ) , 0 , 5 , true ] , name : 'throws when provided with overlong U+0000 - 5 bytes' , expectation ( { error } ) {
517
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
518
+ }
519
+ } ,
520
+ { inputs : [ Buffer . from ( '0xFC8080808080' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with overlong U+0000 - 6 bytes' , expectation ( { error } ) {
521
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
522
+ }
523
+ } ,
524
+
525
+ { inputs : [ Buffer . from ( '0xC1BF' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with overlong U+007F - 2 bytes' , expectation ( { error } ) {
526
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
527
+ }
528
+ } ,
529
+ { inputs : [ Buffer . from ( '0xE081BF' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with overlong U+007F - 3 bytes' , expectation ( { error } ) {
530
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
531
+ }
532
+ } ,
533
+ { inputs : [ Buffer . from ( '0xF08081BF' , 'hex' ) , 0 , 4 , true ] , name : 'throws when provided with overlong U+007F - 4 bytes' , expectation ( { error } ) {
534
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
535
+ }
536
+ } ,
537
+ { inputs : [ Buffer . from ( '0xF8808081BF' , 'hex' ) , 0 , 5 , true ] , name : 'throws when provided with overlong U+007F - 5 bytes' , expectation ( { error } ) {
538
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
539
+ }
540
+ } ,
541
+ { inputs : [ Buffer . from ( '0xFC80808081BF' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with overlong U+007F - 6 bytes' , expectation ( { error } ) {
542
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
543
+ }
544
+ } ,
545
+
546
+ { inputs : [ Buffer . from ( '0xE09FBF' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with overlong U+07FF - 3 bytes' , expectation ( { error } ) {
547
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
548
+ }
549
+ } ,
550
+ { inputs : [ Buffer . from ( '0xF0809FBF' , 'hex' ) , 0 , 4 , true ] , name : 'throws when provided with overlong U+07FF - 4 bytes' , expectation ( { error } ) {
551
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
552
+ }
553
+ } ,
554
+ { inputs : [ Buffer . from ( '0xF880809FBF' , 'hex' ) , 0 , 5 , true ] , name : 'throws when provided with overlong U+07FF - 5 bytes' , expectation ( { error } ) {
555
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
556
+ }
557
+ } ,
558
+ { inputs : [ Buffer . from ( '0xFC8080809FBF' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with overlong U+07FF - 6 bytes' , expectation ( { error } ) {
559
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
560
+ }
561
+ } ,
562
+
563
+ { inputs : [ Buffer . from ( '0xF08FBFBF' , 'hex' ) , 0 , 4 , true ] , name : 'throws when provided with overlong U+FFFF - 4 bytes' , expectation ( { error } ) {
564
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
565
+ }
566
+ } ,
567
+ { inputs : [ Buffer . from ( '0xF8808FBFBF' , 'hex' ) , 0 , 5 , true ] , name : 'throws when provided with overlong U+FFFF - 5 bytes' , expectation ( { error } ) {
568
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
569
+ }
570
+ } ,
571
+ { inputs : [ Buffer . from ( '0xFC80808FBFBF' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with overlong U+FFFF - 6 bytes' , expectation ( { error } ) {
572
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
573
+ }
574
+ } ,
575
+
576
+ { inputs : [ Buffer . from ( '0xF8848FBFBF' , 'hex' ) , 0 , 5 , true ] , name : 'throws when provided with overlong U+10FFFF - 5 bytes' , expectation ( { error } ) {
577
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
578
+ }
579
+ } ,
580
+ { inputs : [ Buffer . from ( '0xFC80848FBFBF' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with overlong U+10FFFF - 6 bytes' , expectation ( { error } ) {
581
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
582
+ }
583
+ } ,
584
+
585
+ // UTF-16 surrogates encoded as code points in UTF-8
586
+ { inputs : [ Buffer . from ( '0xEDA080' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with lead surrogate' , expectation ( { error } ) {
587
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
588
+ }
589
+ } ,
590
+ { inputs : [ Buffer . from ( '0xEDB080' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with trail surrogate' , expectation ( { error } ) {
591
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
592
+ }
593
+ } ,
594
+ { inputs : [ Buffer . from ( '0xEDA080EDB080' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with surrogate pair' , expectation ( { error } ) {
595
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
432
596
}
433
597
}
434
598
] ;
599
+
435
600
const utf8ByteLengthTests : ByteUtilTest < 'utf8ByteLength' > [ ] = [
436
601
{
437
602
name : 'should return zero for empty string' ,
@@ -801,4 +966,54 @@ describe('ByteUtils', () => {
801
966
} ) ;
802
967
}
803
968
}
969
+
970
+ let bad = [
971
+ { encoding : 'utf-8' , input : [ 0xFF ] , name : 'invalid code' } ,
972
+ { encoding : 'utf-8' , input : [ 0xC0 ] , name : 'ends early' } ,
973
+ { encoding : 'utf-8' , input : [ 0xE0 ] , name : 'ends early 2' } ,
974
+ { encoding : 'utf-8' , input : [ 0xC0 , 0x00 ] , name : 'invalid trail' } ,
975
+ { encoding : 'utf-8' , input : [ 0xC0 , 0xC0 ] , name : 'invalid trail 2' } ,
976
+ { encoding : 'utf-8' , input : [ 0xE0 , 0x00 ] , name : 'invalid trail 3' } ,
977
+ { encoding : 'utf-8' , input : [ 0xE0 , 0xC0 ] , name : 'invalid trail 4' } ,
978
+ { encoding : 'utf-8' , input : [ 0xE0 , 0x80 , 0x00 ] , name : 'invalid trail 5' } ,
979
+ { encoding : 'utf-8' , input : [ 0xE0 , 0x80 , 0xC0 ] , name : 'invalid trail 6' } ,
980
+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : '> 0x10FFFF' } ,
981
+ { encoding : 'utf-8' , input : [ 0xFE , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : 'obsolete lead byte' } ,
982
+
983
+ // Overlong encodings
984
+ { encoding : 'utf-8' , input : [ 0xC0 , 0x80 ] , name : 'overlong U+0000 - 2 bytes' } ,
985
+ { encoding : 'utf-8' , input : [ 0xE0 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 3 bytes' } ,
986
+ { encoding : 'utf-8' , input : [ 0xF0 , 0x80 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 4 bytes' } ,
987
+ { encoding : 'utf-8' , input : [ 0xF8 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 5 bytes' } ,
988
+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 6 bytes' } ,
989
+
990
+ { encoding : 'utf-8' , input : [ 0xC1 , 0xBF ] , name : 'overlong U+007F - 2 bytes' } ,
991
+ { encoding : 'utf-8' , input : [ 0xE0 , 0x81 , 0xBF ] , name : 'overlong U+007F - 3 bytes' } ,
992
+ { encoding : 'utf-8' , input : [ 0xF0 , 0x80 , 0x81 , 0xBF ] , name : 'overlong U+007F - 4 bytes' } ,
993
+ { encoding : 'utf-8' , input : [ 0xF8 , 0x80 , 0x80 , 0x81 , 0xBF ] , name : 'overlong U+007F - 5 bytes' } ,
994
+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x80 , 0x80 , 0x81 , 0xBF ] , name : 'overlong U+007F - 6 bytes' } ,
995
+
996
+ { encoding : 'utf-8' , input : [ 0xE0 , 0x9F , 0xBF ] , name : 'overlong U+07FF - 3 bytes' } ,
997
+ { encoding : 'utf-8' , input : [ 0xF0 , 0x80 , 0x9F , 0xBF ] , name : 'overlong U+07FF - 4 bytes' } ,
998
+ { encoding : 'utf-8' , input : [ 0xF8 , 0x80 , 0x80 , 0x9F , 0xBF ] , name : 'overlong U+07FF - 5 bytes' } ,
999
+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x80 , 0x80 , 0x9F , 0xBF ] , name : 'overlong U+07FF - 6 bytes' } ,
1000
+
1001
+ { encoding : 'utf-8' , input : [ 0xF0 , 0x8F , 0xBF , 0xBF ] , name : 'overlong U+FFFF - 4 bytes' } ,
1002
+ { encoding : 'utf-8' , input : [ 0xF8 , 0x80 , 0x8F , 0xBF , 0xBF ] , name : 'overlong U+FFFF - 5 bytes' } ,
1003
+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x80 , 0x8F , 0xBF , 0xBF ] , name : 'overlong U+FFFF - 6 bytes' } ,
1004
+
1005
+ { encoding : 'utf-8' , input : [ 0xF8 , 0x84 , 0x8F , 0xBF , 0xBF ] , name : 'overlong U+10FFFF - 5 bytes' } ,
1006
+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x84 , 0x8F , 0xBF , 0xBF ] , name : 'overlong U+10FFFF - 6 bytes' } ,
1007
+
1008
+ // UTF-16 surrogates encoded as code points in UTF-8
1009
+ { encoding : 'utf-8' , input : [ 0xED , 0xA0 , 0x80 ] , name : 'lead surrogate' } ,
1010
+ { encoding : 'utf-8' , input : [ 0xED , 0xB0 , 0x80 ] , name : 'trail surrogate' } ,
1011
+ { encoding : 'utf-8' , input : [ 0xED , 0xA0 , 0x80 , 0xED , 0xB0 , 0x80 ] , name : 'surrogate pair' } ,
1012
+ ] ;
1013
+
1014
+ for ( const test of bad ) {
1015
+ it . only ( `${ test . name } ` , ( ) => {
1016
+ expect ( ( ) => nodeJsByteUtils . toUTF8 ( Uint8Array . from ( test . input ) , 0 , test . input . length , true ) ) . to . throw ( BSONError ) ;
1017
+ } ) ;
1018
+ }
804
1019
} ) ;
0 commit comments