Skip to content

Commit 0b69d72

Browse files
committed
YQLSUPPORT-5228: Fix String::RemoveAll for NonASCII input
The signed char was used as the index value in the loop, collecting "to-be-removed" charset. As a result, processing all NonASCII bytes (i.e. greater than 127) leads to an invalid result. The patch changes the index value type to the unsigned one. Follows up #2836
1 parent c42eeee commit 0b69d72

File tree

9 files changed

+99
-4
lines changed

9 files changed

+99
-4
lines changed

ydb/library/yql/udfs/common/string/string_udf.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -538,11 +538,11 @@ namespace {
538538
std::string input(args[0].AsStringRef());
539539
const std::string_view remove(args[1].AsStringRef());
540540
std::array<bool, 256> chars{};
541-
for (const char c : remove) {
541+
for (const ui8 c : remove) {
542542
chars[c] = true;
543543
}
544544
size_t tpos = 0;
545-
for (const char c : input) {
545+
for (const ui8 c : input) {
546546
if (!chars[c]) {
547547
input[tpos++] = c;
548548
}
@@ -562,11 +562,11 @@ namespace {
562562
std::string input(arg1.AsStringRef());
563563
const std::string_view remove(arg2.AsStringRef());
564564
std::array<bool, 256> chars{};
565-
for (const char c : remove) {
565+
for (const ui8 c : remove) {
566566
chars[c] = true;
567567
}
568568
size_t tpos = 0;
569-
for (const char c : input) {
569+
for (const ui8 c : input) {
570570
if (!chars[c]) {
571571
input[tpos++] = c;
572572
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@
5656
"";
5757
%false;
5858
"2"
59+
];
60+
[
61+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
62+
%false;
63+
"23"
5964
]
6065
]
6166
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,28 @@
116116
"";
117117
"";
118118
""
119+
];
120+
[
121+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
122+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
123+
[
124+
"YNCf0YDQ0LLQtdGCLCDQvNC40YAhYA=="
125+
];
126+
[
127+
"YNCf0YDQuNCy0LXRgiwg0LzQ0YAhYA=="
128+
];
129+
[
130+
"YNCf0dC40LLQtdGCLCDQvNC40YAhYA=="
131+
];
132+
[
133+
"YNCf0YDQuNCy0LXRgiwg0LzQuNEhYA=="
134+
];
135+
[
136+
"YNCf0dC40LLQtdGCLCDQvNC40YAhYA=="
137+
];
138+
[
139+
"YNCf0YDQuNCy0LXRgiwg0LzQuNEhYA=="
140+
]
119141
]
120142
]
121143
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,16 @@
116116
"";
117117
"";
118118
""
119+
];
120+
[
121+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
122+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
123+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
124+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
125+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
126+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
127+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
128+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
119129
]
120130
]
121131
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_Find_/results.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,17 @@
128128
"-1";
129129
"-1";
130130
"2"
131+
];
132+
[
133+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
134+
%false;
135+
%false;
136+
%false;
137+
%false;
138+
%false;
139+
"-1";
140+
"-1";
141+
"23"
131142
]
132143
]
133144
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_Remove_/results.txt

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@
2121
"String"
2222
]
2323
];
24+
[
25+
"hwru";
26+
[
27+
"DataType";
28+
"String"
29+
]
30+
];
2431
[
2532
"first";
2633
[
@@ -70,6 +77,7 @@
7077
[
7178
"fdsa";
7279
"fd";
80+
"fdsa";
7381
"fds";
7482
"fds";
7583
"fda";
@@ -80,6 +88,7 @@
8088
[
8189
"aswedfg";
8290
"wedfg";
91+
"aswedfg";
8392
"swedfg";
8493
"swedfg";
8594
"swedfg";
@@ -90,6 +99,7 @@
9099
[
91100
"asdadsaasd";
92101
"ddd";
102+
"asdadsaasd";
93103
"sdadsaasd";
94104
"asdadsasd";
95105
"sdadsaasd";
@@ -100,6 +110,7 @@
100110
[
101111
"gdsfsassas";
102112
"gdf";
113+
"gdsfsassas";
103114
"gdsfsssas";
104115
"gdsfsasss";
105116
"gdfsassas";
@@ -115,7 +126,31 @@
115126
"";
116127
"";
117128
"";
129+
"";
118130
""
131+
];
132+
[
133+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
134+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
135+
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
136+
[
137+
"YNCf0YDQ0LLQtdGCLCDQvNC40YAhYA=="
138+
];
139+
[
140+
"YNCf0YDQuNCy0LXRgiwg0LzQ0YAhYA=="
141+
];
142+
[
143+
"YNCf0YDQ0LLQtdGCLCDQvNC40YAhYA=="
144+
];
145+
[
146+
"YNCf0YDQuNCy0LXRgiwg0LzQ0YAhYA=="
147+
];
148+
[
149+
"YNCf0YDQ0LLQtdGCLCDQvNC40YAhYA=="
150+
];
151+
[
152+
"YNCf0YDQuNCy0LXRgiwg0LzQ0YAhYA=="
153+
]
119154
]
120155
]
121156
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_Replace_/results.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,16 @@
116116
"";
117117
"";
118118
""
119+
];
120+
[
121+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
122+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
123+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
124+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
125+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
126+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
127+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
128+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
119129
]
120130
]
121131
}

ydb/library/yql/udfs/common/string/test/cases/Remove.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
SELECT
33
value,
44
String::RemoveAll(value, "as") AS all,
5+
String::RemoveAll(value, "`") AS hwru,
56
String::RemoveFirst(value, "a") AS first,
67
String::RemoveLast(value, "a") AS last,
78
String::RemoveFirst(value, "as") AS first2,

ydb/library/yql/udfs/common/string/test/cases/default.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
{"key"="3";"subkey"="3";"value"="asdadsaasd"};
44
{"key"="4";"subkey"="4";"value"="gdsfsassas"};
55
{"key"="5";"subkey"="5";"value"=""};
6+
{"key"="6";"subkey"="6";"value"="`Привет, мир!`"};

0 commit comments

Comments
 (0)