Skip to content

Commit 36d2225

Browse files
committed
YQLSUPPORT-5228: Fix Remove UDFs for NonASCII input
The signed char was used as the index value in the loop, collecting "to-be-removed" charset. As a result, processing all NonASCII bytes (i.e. greater than 127) leads to an invalid result. The patch changes the index value type to the unsigned one. Follows up #2836
1 parent 39a4da3 commit 36d2225

File tree

10 files changed

+175
-10
lines changed

10 files changed

+175
-10
lines changed

ydb/library/yql/udfs/common/string/string_udf.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -538,11 +538,11 @@ namespace {
538538
std::string input(args[0].AsStringRef());
539539
const std::string_view remove(args[1].AsStringRef());
540540
std::array<bool, 256> chars{};
541-
for (const char c : remove) {
541+
for (const ui8 c : remove) {
542542
chars[c] = true;
543543
}
544544
size_t tpos = 0;
545-
for (const char c : input) {
545+
for (const ui8 c : input) {
546546
if (!chars[c]) {
547547
input[tpos++] = c;
548548
}
@@ -562,11 +562,11 @@ namespace {
562562
std::string input(arg1.AsStringRef());
563563
const std::string_view remove(arg2.AsStringRef());
564564
std::array<bool, 256> chars{};
565-
for (const char c : remove) {
565+
for (const ui8 c : remove) {
566566
chars[c] = true;
567567
}
568568
size_t tpos = 0;
569-
for (const char c : input) {
569+
for (const ui8 c : input) {
570570
if (!chars[c]) {
571571
input[tpos++] = c;
572572
}
@@ -586,7 +586,7 @@ namespace {
586586
std::string input(args[0].AsStringRef());
587587
const std::string_view remove(args[1].AsStringRef());
588588
std::array<bool, 256> chars{};
589-
for (const char c : remove) {
589+
for (const ui8 c : remove) {
590590
chars[c] = true;
591591
}
592592
for (auto it = input.cbegin(); it != input.cend(); ++it) {
@@ -606,7 +606,7 @@ namespace {
606606
std::string input(arg1.AsStringRef());
607607
const std::string_view remove(arg2.AsStringRef());
608608
std::array<bool, 256> chars{};
609-
for (const char c : remove) {
609+
for (const ui8 c : remove) {
610610
chars[c] = true;
611611
}
612612
for (auto it = input.cbegin(); it != input.cend(); ++it) {
@@ -626,7 +626,7 @@ namespace {
626626
std::string input(args[0].AsStringRef());
627627
const std::string_view remove(args[1].AsStringRef());
628628
std::array<bool, 256> chars{};
629-
for (const char c : remove) {
629+
for (const ui8 c : remove) {
630630
chars[c] = true;
631631
}
632632
for (auto it = input.crbegin(); it != input.crend(); ++it) {
@@ -646,7 +646,7 @@ namespace {
646646
std::string input(arg1.AsStringRef());
647647
const std::string_view remove(arg2.AsStringRef());
648648
std::array<bool, 256> chars{};
649-
for (const char c : remove) {
649+
for (const ui8 c : remove) {
650650
chars[c] = true;
651651
}
652652
for (auto it = input.crbegin(); it != input.crend(); ++it) {

ydb/library/yql/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt

+5
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@
5656
"";
5757
%false;
5858
"2"
59+
];
60+
[
61+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
62+
%false;
63+
"23"
5964
]
6065
]
6166
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt

+61
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,27 @@
6262
"DataType";
6363
"String"
6464
]
65+
];
66+
[
67+
"hwruall";
68+
[
69+
"DataType";
70+
"String"
71+
]
72+
];
73+
[
74+
"hwrufirst";
75+
[
76+
"DataType";
77+
"String"
78+
]
79+
];
80+
[
81+
"hwrulast";
82+
[
83+
"DataType";
84+
"String"
85+
]
6586
]
6687
]
6788
]
@@ -75,6 +96,9 @@
7596
"fda";
7697
"fds";
7798
"fdsa";
99+
"fdsa";
100+
"fdsa";
101+
"fdsa";
78102
"fdsa"
79103
];
80104
[
@@ -85,6 +109,9 @@
85109
"swedfg";
86110
"awedfg";
87111
"aswedfg";
112+
"aswedfg";
113+
"aswedfg";
114+
"aswedfg";
88115
"aswedfg"
89116
];
90117
[
@@ -95,6 +122,9 @@
95122
"sdadsaasd";
96123
"asdadsaad";
97124
"asdadsaasd";
125+
"asdadsaasd";
126+
"asdadsaasd";
127+
"asdadsaasd";
98128
"asdadsaasd"
99129
];
100130
[
@@ -105,6 +135,9 @@
105135
"gdfsassas";
106136
"gdsfsassa";
107137
"gdsfsassas";
138+
"gdsfsassas";
139+
"gdsfsassas";
140+
"gdsfsassas";
108141
"gdsfsassas"
109142
];
110143
[
@@ -115,7 +148,35 @@
115148
"";
116149
"";
117150
"";
151+
"";
152+
"";
153+
"";
118154
""
155+
];
156+
[
157+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
158+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
159+
[
160+
"YNCfgNC40LLQtdGCLCDQvNC40YAhYA=="
161+
];
162+
[
163+
"YNCf0YDQuNCy0LXRgiwg0LzQ0YAhYA=="
164+
];
165+
[
166+
"YNCfgNC40LLQtdGCLCDQvNC40YAhYA=="
167+
];
168+
[
169+
"YNCf0YDQuNCy0LXRgiwg0LzQuNEhYA=="
170+
];
171+
[
172+
"YNCf0dC40LLQtdGCLCDQvNC40YAhYA=="
173+
];
174+
[
175+
"YNCf0YDQuNCy0LXRgiwg0LzQuNEhYA=="
176+
];
177+
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
178+
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
179+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"
119180
]
120181
]
121182
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt

+10
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,16 @@
116116
"";
117117
"";
118118
""
119+
];
120+
[
121+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
122+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
123+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
124+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
125+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
126+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
127+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
128+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
119129
]
120130
]
121131
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_Find_/results.txt

+11
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,17 @@
128128
"-1";
129129
"-1";
130130
"2"
131+
];
132+
[
133+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
134+
%false;
135+
%false;
136+
%false;
137+
%false;
138+
%false;
139+
"-1";
140+
"-1";
141+
"23"
131142
]
132143
]
133144
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_Remove_/results.txt

+61
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,27 @@
6262
"DataType";
6363
"String"
6464
]
65+
];
66+
[
67+
"hwruall";
68+
[
69+
"DataType";
70+
"String"
71+
]
72+
];
73+
[
74+
"hwrufirst";
75+
[
76+
"DataType";
77+
"String"
78+
]
79+
];
80+
[
81+
"hwrulast";
82+
[
83+
"DataType";
84+
"String"
85+
]
6586
]
6687
]
6788
]
@@ -75,6 +96,9 @@
7596
"fda";
7697
"fds";
7798
"fdsa";
99+
"fdsa";
100+
"fdsa";
101+
"fdsa";
78102
"fdsa"
79103
];
80104
[
@@ -85,6 +109,9 @@
85109
"swedfg";
86110
"awedfg";
87111
"aswedfg";
112+
"aswedfg";
113+
"aswedfg";
114+
"aswedfg";
88115
"aswedfg"
89116
];
90117
[
@@ -95,6 +122,9 @@
95122
"sdadsaasd";
96123
"asdadsaad";
97124
"asdadsaasd";
125+
"asdadsaasd";
126+
"asdadsaasd";
127+
"asdadsaasd";
98128
"asdadsaasd"
99129
];
100130
[
@@ -105,6 +135,9 @@
105135
"gdfsassas";
106136
"gdsfsassa";
107137
"gdsfsassas";
138+
"gdsfsassas";
139+
"gdsfsassas";
140+
"gdsfsassas";
108141
"gdsfsassas"
109142
];
110143
[
@@ -115,7 +148,35 @@
115148
"";
116149
"";
117150
"";
151+
"";
152+
"";
153+
"";
118154
""
155+
];
156+
[
157+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
158+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
159+
[
160+
"YNCfgNC40LLQtdGCLCDQvNC40YAhYA=="
161+
];
162+
[
163+
"YNCf0YDQuNCy0LXRgiwg0LzQuIAhYA=="
164+
];
165+
[
166+
"YNCfgNC40LLQtdGCLCDQvNC40YAhYA=="
167+
];
168+
[
169+
"YNCf0YDQuNCy0LXRgiwg0LzQuIAhYA=="
170+
];
171+
[
172+
"YNCfgNC40LLQtdGCLCDQvNC40YAhYA=="
173+
];
174+
[
175+
"YNCf0YDQuNCy0LXRgiwg0LzQuIAhYA=="
176+
];
177+
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
178+
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
179+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"
119180
]
120181
]
121182
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_Replace_/results.txt

+10
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,16 @@
116116
"";
117117
"";
118118
""
119+
];
120+
[
121+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
122+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
123+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
124+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
125+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
126+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
127+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
128+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
119129
]
120130
]
121131
}

ydb/library/yql/udfs/common/string/test/cases/BlockRemove.sql

+4-1
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,8 @@ SELECT
99
String::RemoveFirst(value, "as") AS first2,
1010
String::RemoveLast(value, "as") AS last2,
1111
String::RemoveFirst(value, "") AS first3,
12-
String::RemoveLast(value, "") AS last3
12+
String::RemoveLast(value, "") AS last3,
13+
String::RemoveAll(value, "`") AS hwruall,
14+
String::RemoveFirst(value, "`") AS hwrufirst,
15+
String::RemoveLast(value, "`") AS hwrulast,
1316
FROM Input;

ydb/library/yql/udfs/common/string/test/cases/Remove.sql

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,8 @@ SELECT
77
String::RemoveFirst(value, "as") AS first2,
88
String::RemoveLast(value, "as") AS last2,
99
String::RemoveFirst(value, "") AS first3,
10-
String::RemoveLast(value, "") AS last3
10+
String::RemoveLast(value, "") AS last3,
11+
String::RemoveAll(value, "`") AS hwruall,
12+
String::RemoveFirst(value, "`") AS hwrufirst,
13+
String::RemoveLast(value, "`") AS hwrulast,
1114
FROM Input;

ydb/library/yql/udfs/common/string/test/cases/default.in

+1
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
{"key"="3";"subkey"="3";"value"="asdadsaasd"};
44
{"key"="4";"subkey"="4";"value"="gdsfsassas"};
55
{"key"="5";"subkey"="5";"value"=""};
6+
{"key"="6";"subkey"="6";"value"="`Привет, мир!`"};

0 commit comments

Comments
 (0)