Skip to content

Commit 2ed4009

Browse files
authored
YQL-18464: Fix Remove UDFs for NonASCII input (#4789)
1 parent 95371ef commit 2ed4009

File tree

10 files changed

+155
-14
lines changed

10 files changed

+155
-14
lines changed

ydb/library/yql/udfs/common/string/string_udf.cpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -538,11 +538,11 @@ namespace {
538538
std::string input(args[0].AsStringRef());
539539
const std::string_view remove(args[1].AsStringRef());
540540
std::array<bool, 256> chars{};
541-
for (const char c : remove) {
541+
for (const ui8 c : remove) {
542542
chars[c] = true;
543543
}
544544
size_t tpos = 0;
545-
for (const char c : input) {
545+
for (const ui8 c : input) {
546546
if (!chars[c]) {
547547
input[tpos++] = c;
548548
}
@@ -562,11 +562,11 @@ namespace {
562562
std::string input(arg1.AsStringRef());
563563
const std::string_view remove(arg2.AsStringRef());
564564
std::array<bool, 256> chars{};
565-
for (const char c : remove) {
565+
for (const ui8 c : remove) {
566566
chars[c] = true;
567567
}
568568
size_t tpos = 0;
569-
for (const char c : input) {
569+
for (const ui8 c : input) {
570570
if (!chars[c]) {
571571
input[tpos++] = c;
572572
}
@@ -586,11 +586,11 @@ namespace {
586586
std::string input(args[0].AsStringRef());
587587
const std::string_view remove(args[1].AsStringRef());
588588
std::array<bool, 256> chars{};
589-
for (const char c : remove) {
589+
for (const ui8 c : remove) {
590590
chars[c] = true;
591591
}
592592
for (auto it = input.cbegin(); it != input.cend(); ++it) {
593-
if (chars[*it]) {
593+
if (chars[static_cast<ui8>(*it)]) {
594594
input.erase(it);
595595
return valueBuilder->NewString(input);
596596
}
@@ -606,11 +606,11 @@ namespace {
606606
std::string input(arg1.AsStringRef());
607607
const std::string_view remove(arg2.AsStringRef());
608608
std::array<bool, 256> chars{};
609-
for (const char c : remove) {
609+
for (const ui8 c : remove) {
610610
chars[c] = true;
611611
}
612612
for (auto it = input.cbegin(); it != input.cend(); ++it) {
613-
if (chars[*it]) {
613+
if (chars[static_cast<ui8>(*it)]) {
614614
input.erase(it);
615615
return sink(TBlockItem(input));
616616
}
@@ -626,11 +626,11 @@ namespace {
626626
std::string input(args[0].AsStringRef());
627627
const std::string_view remove(args[1].AsStringRef());
628628
std::array<bool, 256> chars{};
629-
for (const char c : remove) {
629+
for (const ui8 c : remove) {
630630
chars[c] = true;
631631
}
632632
for (auto it = input.crbegin(); it != input.crend(); ++it) {
633-
if (chars[*it]) {
633+
if (chars[static_cast<ui8>(*it)]) {
634634
input.erase(input.crend() - it - 1, 1);
635635
return valueBuilder->NewString(input);
636636
}
@@ -646,11 +646,11 @@ namespace {
646646
std::string input(arg1.AsStringRef());
647647
const std::string_view remove(arg2.AsStringRef());
648648
std::array<bool, 256> chars{};
649-
for (const char c : remove) {
649+
for (const ui8 c : remove) {
650650
chars[c] = true;
651651
}
652652
for (auto it = input.crbegin(); it != input.crend(); ++it) {
653-
if (chars[*it]) {
653+
if (chars[static_cast<ui8>(*it)]) {
654654
input.erase(input.crend() - it - 1, 1);
655655
return sink(TBlockItem(input));
656656
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt

+5
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@
5656
"";
5757
%false;
5858
"2"
59+
];
60+
[
61+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
62+
%false;
63+
"23"
5964
]
6065
]
6166
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt

+49
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,27 @@
6262
"DataType";
6363
"String"
6464
]
65+
];
66+
[
67+
"hwruall";
68+
[
69+
"DataType";
70+
"String"
71+
]
72+
];
73+
[
74+
"hwrufirst";
75+
[
76+
"DataType";
77+
"String"
78+
]
79+
];
80+
[
81+
"hwrulast";
82+
[
83+
"DataType";
84+
"String"
85+
]
6586
]
6687
]
6788
]
@@ -75,6 +96,9 @@
7596
"fda";
7697
"fds";
7798
"fdsa";
99+
"fdsa";
100+
"fdsa";
101+
"fdsa";
78102
"fdsa"
79103
];
80104
[
@@ -85,6 +109,9 @@
85109
"swedfg";
86110
"awedfg";
87111
"aswedfg";
112+
"aswedfg";
113+
"aswedfg";
114+
"aswedfg";
88115
"aswedfg"
89116
];
90117
[
@@ -95,6 +122,9 @@
95122
"sdadsaasd";
96123
"asdadsaad";
97124
"asdadsaasd";
125+
"asdadsaasd";
126+
"asdadsaasd";
127+
"asdadsaasd";
98128
"asdadsaasd"
99129
];
100130
[
@@ -105,6 +135,9 @@
105135
"gdfsassas";
106136
"gdsfsassa";
107137
"gdsfsassas";
138+
"gdsfsassas";
139+
"gdsfsassas";
140+
"gdsfsassas";
108141
"gdsfsassas"
109142
];
110143
[
@@ -115,7 +148,23 @@
115148
"";
116149
"";
117150
"";
151+
"";
152+
"";
153+
"";
118154
""
155+
];
156+
[
157+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
158+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
159+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
160+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
161+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
162+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
163+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
164+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
165+
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
166+
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
167+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"
119168
]
120169
]
121170
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt

+10
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,16 @@
116116
"";
117117
"";
118118
""
119+
];
120+
[
121+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
122+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
123+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
124+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
125+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
126+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
127+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
128+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
119129
]
120130
]
121131
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_Find_/results.txt

+11
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,17 @@
128128
"-1";
129129
"-1";
130130
"2"
131+
];
132+
[
133+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
134+
%false;
135+
%false;
136+
%false;
137+
%false;
138+
%false;
139+
"-1";
140+
"-1";
141+
"23"
131142
]
132143
]
133144
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_Remove_/results.txt

+49
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,27 @@
6262
"DataType";
6363
"String"
6464
]
65+
];
66+
[
67+
"hwruall";
68+
[
69+
"DataType";
70+
"String"
71+
]
72+
];
73+
[
74+
"hwrufirst";
75+
[
76+
"DataType";
77+
"String"
78+
]
79+
];
80+
[
81+
"hwrulast";
82+
[
83+
"DataType";
84+
"String"
85+
]
6586
]
6687
]
6788
]
@@ -75,6 +96,9 @@
7596
"fda";
7697
"fds";
7798
"fdsa";
99+
"fdsa";
100+
"fdsa";
101+
"fdsa";
78102
"fdsa"
79103
];
80104
[
@@ -85,6 +109,9 @@
85109
"swedfg";
86110
"awedfg";
87111
"aswedfg";
112+
"aswedfg";
113+
"aswedfg";
114+
"aswedfg";
88115
"aswedfg"
89116
];
90117
[
@@ -95,6 +122,9 @@
95122
"sdadsaasd";
96123
"asdadsaad";
97124
"asdadsaasd";
125+
"asdadsaasd";
126+
"asdadsaasd";
127+
"asdadsaasd";
98128
"asdadsaasd"
99129
];
100130
[
@@ -105,6 +135,9 @@
105135
"gdfsassas";
106136
"gdsfsassa";
107137
"gdsfsassas";
138+
"gdsfsassas";
139+
"gdsfsassas";
140+
"gdsfsassas";
108141
"gdsfsassas"
109142
];
110143
[
@@ -115,7 +148,23 @@
115148
"";
116149
"";
117150
"";
151+
"";
152+
"";
153+
"";
118154
""
155+
];
156+
[
157+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
158+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
159+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
160+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
161+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
162+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
163+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
164+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
165+
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
166+
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
167+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"
119168
]
120169
]
121170
}

ydb/library/yql/udfs/common/string/test/canondata/test.test_Replace_/results.txt

+10
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,16 @@
116116
"";
117117
"";
118118
""
119+
];
120+
[
121+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
122+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
123+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
124+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
125+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
126+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
127+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
128+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
119129
]
120130
]
121131
}

ydb/library/yql/udfs/common/string/test/cases/BlockRemove.sql

+4-1
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,8 @@ SELECT
99
String::RemoveFirst(value, "as") AS first2,
1010
String::RemoveLast(value, "as") AS last2,
1111
String::RemoveFirst(value, "") AS first3,
12-
String::RemoveLast(value, "") AS last3
12+
String::RemoveLast(value, "") AS last3,
13+
String::RemoveAll(value, "`") AS hwruall,
14+
String::RemoveFirst(value, "`") AS hwrufirst,
15+
String::RemoveLast(value, "`") AS hwrulast,
1316
FROM Input;

ydb/library/yql/udfs/common/string/test/cases/Remove.sql

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,8 @@ SELECT
77
String::RemoveFirst(value, "as") AS first2,
88
String::RemoveLast(value, "as") AS last2,
99
String::RemoveFirst(value, "") AS first3,
10-
String::RemoveLast(value, "") AS last3
10+
String::RemoveLast(value, "") AS last3,
11+
String::RemoveAll(value, "`") AS hwruall,
12+
String::RemoveFirst(value, "`") AS hwrufirst,
13+
String::RemoveLast(value, "`") AS hwrulast,
1114
FROM Input;

ydb/library/yql/udfs/common/string/test/cases/default.in

+1
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
{"key"="3";"subkey"="3";"value"="asdadsaasd"};
44
{"key"="4";"subkey"="4";"value"="gdsfsassas"};
55
{"key"="5";"subkey"="5";"value"=""};
6+
{"key"="6";"subkey"="6";"value"="`Привет, мир!`"};

0 commit comments

Comments
 (0)