@@ -81,8 +81,10 @@ bufnr([{buf} [, {create}]]) Number Number of the buffer {buf}
81
81
bufwinid({buf} ) Number window ID of buffer {buf}
82
82
bufwinnr({buf} ) Number window number of buffer {buf}
83
83
byte2line({byte} ) Number line number at byte count {byte}
84
- byteidx({expr} , {nr} ) Number byte index of {nr} 'th char in {expr}
85
- byteidxcomp({expr} , {nr} ) Number byte index of {nr} 'th char in {expr}
84
+ byteidx({expr} , {nr} [, {utf16} ])
85
+ Number byte index of {nr} 'th char in {expr}
86
+ byteidxcomp({expr} , {nr} [, {utf16} ])
87
+ Number byte index of {nr} 'th char in {expr}
86
88
call({func} , {arglist} [, {dict} ])
87
89
any call {func} with arguments {arglist}
88
90
ceil({expr} ) Float round {expr} up
@@ -117,7 +119,7 @@ changenr() Number current change number
117
119
char2nr({expr} [, {utf8} ]) Number ASCII/UTF-8 value of first char in {expr}
118
120
charclass({string} ) Number character class of {string}
119
121
charcol({expr} [, {winid} ]) Number column number of cursor or mark
120
- charidx({string} , {idx} [, {countcc} ])
122
+ charidx({string} , {idx} [, {countcc} [, {utf16} ] ])
121
123
Number char index of byte {idx} in {string}
122
124
chdir({dir} ) String change current working directory
123
125
cindent({lnum} ) Number C indent for line {lnum}
@@ -604,6 +606,8 @@ strptime({format}, {timestring})
604
606
strridx({haystack} , {needle} [, {start} ])
605
607
Number last index of {needle} in {haystack}
606
608
strtrans({expr} ) String translate string to make it printable
609
+ strutf16len({string} [, {countcc} ])
610
+ Number number of UTF-16 code units in {string}
607
611
strwidth({expr} ) Number display cell length of the String {expr}
608
612
submatch({nr} [, {list} ]) String or List
609
613
specific match in ":s" or substitute()
@@ -704,6 +708,8 @@ undofile({name}) String undo file name for {name}
704
708
undotree() List undo file tree
705
709
uniq({list} [, {func} [, {dict} ]])
706
710
List remove adjacent duplicates from a list
711
+ utf16idx({string} , {idx} [, {countcc} [, {charidx} ]])
712
+ Number UTF-16 index of byte {idx} in {string}
707
713
values({dict} ) List values in {dict}
708
714
virtcol({expr} [, {list} ]) Number or List
709
715
screen column of cursor or mark
@@ -1363,7 +1369,7 @@ byte2line({byte}) *byte2line()*
1363
1369
< {not available when compiled without the | +byte_offset |
1364
1370
feature}
1365
1371
1366
- byteidx({expr} , {nr} ) *byteidx()*
1372
+ byteidx({expr} , {nr} [, {utf16} ]) *byteidx()*
1367
1373
Return byte index of the {nr} 'th character in the String
1368
1374
{expr} . Use zero for the first character, it then returns
1369
1375
zero.
@@ -1373,6 +1379,13 @@ byteidx({expr}, {nr}) *byteidx()*
1373
1379
length is added to the preceding base character. See
1374
1380
| byteidxcomp() | below for counting composing characters
1375
1381
separately.
1382
+ When {utf16} is present and TRUE, {nr} is used as the UTF-16
1383
+ index in the String {expr} instead of as the character index.
1384
+ The UTF-16 index is the index in the string when it is encoded
1385
+ with 16-bit words. If the specified UTF-16 index is in the
1386
+ middle of a character (e.g. in a 4-byte character), then the
1387
+ byte index of the first byte in the character is returned.
1388
+ Refer to | string-offset-encoding | for more information.
1376
1389
Example : >
1377
1390
echo matchstr(str, ".", byteidx(str, 3))
1378
1391
< will display the fourth character. Another way to do the
@@ -1384,11 +1397,17 @@ byteidx({expr}, {nr}) *byteidx()*
1384
1397
If there are less than {nr} characters -1 is returned.
1385
1398
If there are exactly {nr} characters the length of the string
1386
1399
in bytes is returned.
1387
-
1400
+ See | charidx() | and | utf16idx() | for getting the character and
1401
+ UTF-16 index respectively from the byte index.
1402
+ Examples: >
1403
+ echo byteidx('a😊😊', 2) returns 5
1404
+ echo byteidx('a😊😊', 2, 1) returns 1
1405
+ echo byteidx('a😊😊', 3, 1) returns 5
1406
+ <
1388
1407
Can also be used as a | method | : >
1389
1408
GetName()->byteidx(idx)
1390
1409
1391
- byteidxcomp({expr} , {nr} ) *byteidxcomp()*
1410
+ byteidxcomp({expr} , {nr} [, {utf16} ]) *byteidxcomp()*
1392
1411
Like byteidx(), except that a composing character is counted
1393
1412
as a separate character. Example: >
1394
1413
let s = 'e' .. nr2char(0x301)
@@ -1493,27 +1512,36 @@ charcol({expr} [, {winid}]) *charcol()*
1493
1512
GetPos()->col()
1494
1513
<
1495
1514
*charidx()*
1496
- charidx({string} , {idx} [, {countcc} ])
1515
+ charidx({string} , {idx} [, {countcc} [, {utf16} ] ])
1497
1516
Return the character index of the byte at {idx} in {string} .
1498
1517
The index of the first character is zero.
1499
1518
If there are no multibyte characters the returned value is
1500
1519
equal to {idx} .
1520
+
1501
1521
When {countcc} is omitted or | FALSE | , then composing characters
1502
- are not counted separately, their byte length is
1503
- added to the preceding base character.
1522
+ are not counted separately, their byte length is added to the
1523
+ preceding base character.
1504
1524
When {countcc} is | TRUE | , then composing characters are
1505
1525
counted as separate characters.
1526
+
1527
+ When {utf16} is present and TRUE, {idx} is used as the UTF-16
1528
+ index in the String {expr} instead of as the byte index.
1529
+
1506
1530
Returns -1 if the arguments are invalid or if {idx} is greater
1507
1531
than the index of the last byte in {string} . An error is
1508
1532
given if the first argument is not a string, the second
1509
1533
argument is not a number or when the third argument is present
1510
1534
and is not zero or one.
1535
+
1511
1536
See | byteidx() | and | byteidxcomp() | for getting the byte index
1512
- from the character index.
1537
+ from the character index and | utf16idx() | for getting the
1538
+ UTF-16 index from the character index.
1539
+ Refer to | string-offset-encoding | for more information.
1513
1540
Examples: >
1514
1541
echo charidx('áb́ć', 3) returns 1
1515
1542
echo charidx('áb́ć', 6, 1) returns 4
1516
1543
echo charidx('áb́ć', 16) returns -1
1544
+ echo charidx('a😊😊', 4, 0, 1) returns 2
1517
1545
<
1518
1546
Can also be used as a | method | : >
1519
1547
GetName()->charidx(idx)
@@ -9244,6 +9272,28 @@ strtrans({string}) *strtrans()*
9244
9272
Can also be used as a | method | : >
9245
9273
GetString()->strtrans()
9246
9274
9275
+ strutf16len({string} [, {countcc} ]) *strutf16len()*
9276
+ The result is a Number, which is the number of UTF-16 code
9277
+ units in String {string} (after converting it to UTF-16).
9278
+
9279
+ When {countcc} is TRUE, composing characters are counted
9280
+ separately.
9281
+ When {countcc} is omitted or FALSE, composing characters are
9282
+ ignored.
9283
+
9284
+ Returns zero on error.
9285
+
9286
+ Also see | strlen() | and | strcharlen() | .
9287
+ Examples: >
9288
+ echo strutf16len('a') returns 1
9289
+ echo strutf16len('©') returns 1
9290
+ echo strutf16len('😊') returns 2
9291
+ echo strutf16len('ą́') returns 1
9292
+ echo strutf16len('ą́', v:true) returns 3
9293
+
9294
+ Can also be used as a |method|: >
9295
+ GetText()->strutf16len()
9296
+ <
9247
9297
strwidth({string} ) *strwidth()*
9248
9298
The result is a Number, which is the number of display cells
9249
9299
String {string} occupies. A Tab character is counted as one
@@ -10059,6 +10109,34 @@ uniq({list} [, {func} [, {dict}]]) *uniq()* *E882*
10059
10109
10060
10110
Can also be used as a | method | : >
10061
10111
mylist->uniq()
10112
+ <
10113
+ *utf16idx()*
10114
+ utf16idx({string} , {idx} [, {countcc} [, {charidx} ]])
10115
+ Same as | charidx() | but returns the UTF-16 index of the byte
10116
+ at {idx} in {string} (after converting it to UTF-16).
10117
+
10118
+ When {charidx} is present and TRUE, {idx} is used as the
10119
+ character index in the String {string} instead of as the byte
10120
+ index.
10121
+ An {idx} in the middle of a UTF-8 sequence is rounded upwards
10122
+ to the end of that sequence.
10123
+
10124
+ See | byteidx() | and | byteidxcomp() | for getting the byte index
10125
+ from the UTF-16 index and | charidx() | for getting the
10126
+ character index from the UTF-16 index.
10127
+ Refer to | string-offset-encoding | for more information.
10128
+ Examples: >
10129
+ echo utf16idx('a😊😊', 3) returns 2
10130
+ echo utf16idx('a😊😊', 7) returns 4
10131
+ echo utf16idx('a😊😊', 1, 0, 1) returns 2
10132
+ echo utf16idx('a😊😊', 2, 0, 1) returns 4
10133
+ echo utf16idx('aą́c', 6) returns 2
10134
+ echo utf16idx('aą́c', 6, 1) returns 4
10135
+ echo utf16idx('a😊😊', 9) returns -1
10136
+ <
10137
+ Can also be used as a | method | : >
10138
+ GetName()->utf16idx(idx)
10139
+
10062
10140
10063
10141
values({dict} ) *values()*
10064
10142
Return a | List | with all the values of {dict} . The | List | is
0 commit comments