@@ -81,8 +81,10 @@ bufnr([{buf} [, {create}]]) Number Number of the buffer {buf}
81
81
bufwinid({buf} ) Number window ID of buffer {buf}
82
82
bufwinnr({buf} ) Number window number of buffer {buf}
83
83
byte2line({byte} ) Number line number at byte count {byte}
84
- byteidx({expr} , {nr} ) Number byte index of {nr} 'th char in {expr}
85
- byteidxcomp({expr} , {nr} ) Number byte index of {nr} 'th char in {expr}
84
+ byteidx({expr} , {nr} [, {utf16} ])
85
+ Number byte index of {nr} 'th char in {expr}
86
+ byteidxcomp({expr} , {nr} [, {utf16} ])
87
+ Number byte index of {nr} 'th char in {expr}
86
88
call({func} , {arglist} [, {dict} ])
87
89
any call {func} with arguments {arglist}
88
90
ceil({expr} ) Float round {expr} up
@@ -117,7 +119,7 @@ changenr() Number current change number
117
119
char2nr({expr} [, {utf8} ]) Number ASCII/UTF-8 value of first char in {expr}
118
120
charclass({string} ) Number character class of {string}
119
121
charcol({expr} [, {winid} ]) Number column number of cursor or mark
120
- charidx({string} , {idx} [, {countcc} ])
122
+ charidx({string} , {idx} [, {countcc} [, {utf16} ] ])
121
123
Number char index of byte {idx} in {string}
122
124
chdir({dir} ) String change current working directory
123
125
cindent({lnum} ) Number C indent for line {lnum}
@@ -604,6 +606,8 @@ strptime({format}, {timestring})
604
606
strridx({haystack} , {needle} [, {start} ])
605
607
Number last index of {needle} in {haystack}
606
608
strtrans({expr} ) String translate string to make it printable
609
+ strutf16len({string} [, {countcc} ])
610
+ Number number of UTF-16 code units in {string}
607
611
strwidth({expr} ) Number display cell length of the String {expr}
608
612
submatch({nr} [, {list} ]) String or List
609
613
specific match in ":s" or substitute()
@@ -704,6 +708,8 @@ undofile({name}) String undo file name for {name}
704
708
undotree() List undo file tree
705
709
uniq({list} [, {func} [, {dict} ]])
706
710
List remove adjacent duplicates from a list
711
+ utf16idx({string} , {idx} [, {countcc} [, {charidx} ]])
712
+ Number UTF-16 index of byte {idx} in {string}
707
713
values({dict} ) List values in {dict}
708
714
virtcol({expr} [, {list} ]) Number or List
709
715
screen column of cursor or mark
@@ -1363,7 +1369,7 @@ byte2line({byte}) *byte2line()*
1363
1369
< {not available when compiled without the | +byte_offset |
1364
1370
feature}
1365
1371
1366
- byteidx({expr} , {nr} ) *byteidx()*
1372
+ byteidx({expr} , {nr} [, {utf16} ]) *byteidx()*
1367
1373
Return byte index of the {nr} 'th character in the String
1368
1374
{expr} . Use zero for the first character, it then returns
1369
1375
zero.
@@ -1373,6 +1379,13 @@ byteidx({expr}, {nr}) *byteidx()*
1373
1379
length is added to the preceding base character. See
1374
1380
| byteidxcomp() | below for counting composing characters
1375
1381
separately.
1382
+ When {utf16} is TRUE, {nr} is used as the UTF-16 index in the
1383
+ String {expr} instead of as the character index. The UTF-16
1384
+ index is the index in the string when it is encoded with
1385
+ 16-bit words. If the specified UTF-16 index is in the middle
1386
+ of a character (e.g. in a 4-byte character), then the byte
1387
+ index of the first byte in the character is returned.
1388
+ Refer to | string-offset-encoding | for more information.
1376
1389
Example : >
1377
1390
echo matchstr(str, ".", byteidx(str, 3))
1378
1391
< will display the fourth character. Another way to do the
@@ -1384,11 +1397,17 @@ byteidx({expr}, {nr}) *byteidx()*
1384
1397
If there are less than {nr} characters -1 is returned.
1385
1398
If there are exactly {nr} characters the length of the string
1386
1399
in bytes is returned.
1387
-
1400
+ See | charidx() | and | utf16idx() | for getting the character and
1401
+ UTF-16 index respectively from the byte index.
1402
+ Examples: >
1403
+ echo byteidx('a😊😊', 2) returns 5
1404
+ echo byteidx('a😊😊', 2, 1) returns 1
1405
+ echo byteidx('a😊😊', 3, 1) returns 5
1406
+ <
1388
1407
Can also be used as a | method | : >
1389
1408
GetName()->byteidx(idx)
1390
1409
1391
- byteidxcomp({expr} , {nr} ) *byteidxcomp()*
1410
+ byteidxcomp({expr} , {nr} [, {utf16} ]) *byteidxcomp()*
1392
1411
Like byteidx(), except that a composing character is counted
1393
1412
as a separate character. Example: >
1394
1413
let s = 'e' .. nr2char(0x301)
@@ -1493,7 +1512,7 @@ charcol({expr} [, {winid}]) *charcol()*
1493
1512
GetPos()->col()
1494
1513
<
1495
1514
*charidx()*
1496
- charidx({string} , {idx} [, {countcc} ])
1515
+ charidx({string} , {idx} [, {countcc} [, {utf16} ] ])
1497
1516
Return the character index of the byte at {idx} in {string} .
1498
1517
The index of the first character is zero.
1499
1518
If there are no multibyte characters the returned value is
@@ -1503,17 +1522,22 @@ charidx({string}, {idx} [, {countcc}])
1503
1522
added to the preceding base character.
1504
1523
When {countcc} is | TRUE | , then composing characters are
1505
1524
counted as separate characters.
1525
+ When {utf16} is TRUE, {idx} is used as the UTF-16 index in the
1526
+ String {expr} instead of as the byte index.
1506
1527
Returns -1 if the arguments are invalid or if {idx} is greater
1507
1528
than the index of the last byte in {string} . An error is
1508
1529
given if the first argument is not a string, the second
1509
1530
argument is not a number or when the third argument is present
1510
1531
and is not zero or one.
1511
1532
See | byteidx() | and | byteidxcomp() | for getting the byte index
1512
- from the character index.
1533
+ from the character index and | utf16idx() | for getting the
1534
+ UTF-16 index from the character index.
1535
+ Refer to | string-offset-encoding | for more information.
1513
1536
Examples: >
1514
1537
echo charidx('áb́ć', 3) returns 1
1515
1538
echo charidx('áb́ć', 6, 1) returns 4
1516
1539
echo charidx('áb́ć', 16) returns -1
1540
+ echo charidx('a😊😊', 4, 0, 1) returns 2
1517
1541
<
1518
1542
Can also be used as a | method | : >
1519
1543
GetName()->charidx(idx)
@@ -9243,6 +9267,27 @@ strtrans({string}) *strtrans()*
9243
9267
Can also be used as a | method | : >
9244
9268
GetString()->strtrans()
9245
9269
9270
+ strutf16len({string} [, {countcc} ]) *strutf16len()*
9271
+ The result is a Number, which is the number of UTF-16 code
9272
+ units in String {string} .
9273
+ When {countcc} is TRUE, composing characters are counted
9274
+ separately.
9275
+ When {countcc} is omitted or FALSE, composing characters are
9276
+ ignored.
9277
+
9278
+ Returns zero on error.
9279
+
9280
+ Also see | strlen() | and | strcharlen() | .
9281
+ Examples: >
9282
+ echo strutf16len('a') returns 1
9283
+ echo strutf16len('©') returns 1
9284
+ echo strutf16len('😊') returns 2
9285
+ echo strutf16len('ą́') returns 1
9286
+ echo strutf16len('ą́', v:true) returns 3
9287
+
9288
+ Can also be used as a |method|: >
9289
+ GetText()->strutf16len()
9290
+ <
9246
9291
strwidth({string} ) *strwidth()*
9247
9292
The result is a Number, which is the number of display cells
9248
9293
String {string} occupies. A Tab character is counted as one
@@ -10058,6 +10103,31 @@ uniq({list} [, {func} [, {dict}]]) *uniq()* *E882*
10058
10103
10059
10104
Can also be used as a | method | : >
10060
10105
mylist->uniq()
10106
+ <
10107
+ *utf16idx()*
10108
+ utf16idx({string} , {idx} [, {countcc} [, {charidx} ]])
10109
+ Same as | charidx() | but returns the UTF-16 index of the byte
10110
+ at {idx} in {string} .
10111
+ When {charidx} is TRUE, {idx} is used as the character index
10112
+ in the String {string} instead of as the byte index.
10113
+ An {idx} in the middle of a UTF-8 sequence is rounded upwards
10114
+ to the end of that sequence.
10115
+ See | byteidx() | and | byteidxcomp() | for getting the byte index
10116
+ from the UTF-16 index and | charidx() | for getting the
10117
+ character index from the UTF-16 index.
10118
+ Refer to | string-offset-encoding | for more information.
10119
+ Examples: >
10120
+ echo utf16idx('a😊😊', 3) returns 2
10121
+ echo utf16idx('a😊😊', 7) returns 4
10122
+ echo utf16idx('a😊😊', 1, 0, 1) returns 2
10123
+ echo utf16idx('a😊😊', 2, 0, 1) returns 4
10124
+ echo utf16idx('aą́c', 6) returns 2
10125
+ echo utf16idx('aą́c', 6, 1) returns 4
10126
+ echo utf16idx('a😊😊', 9) returns -1
10127
+ <
10128
+ Can also be used as a | method | : >
10129
+ GetName()->utf16idx(idx)
10130
+
10061
10131
10062
10132
values({dict} ) *values()*
10063
10133
Return a | List | with all the values of {dict} . The | List | is
0 commit comments