Skip to content

Commit 6e8e556

Browse files
authored
Moved csv parsing in import file cmd to YDB CLI and supported pg-types (only row tables) (#514)
* Fixed parsing JSON from pg types * Fixed bugs * Moved csv parsing in import file cmd to YDB CLI and supported pg-types (only row tables) * Fixed bugs * Fixed bug * Fixed bugs * Fixed test * Added column table validation * Fixed bugs
1 parent 8918b0f commit 6e8e556

File tree

11 files changed

+319
-172
lines changed

11 files changed

+319
-172
lines changed

ydb/public/lib/json_value/ydb_json_value.cpp

+13-15
Original file line numberDiff line numberDiff line change
@@ -677,22 +677,20 @@ namespace {
677677
ValueBuilder.Decimal(jsonValue.GetString());
678678
break;
679679

680-
case TTypeParser::ETypeKind::Pg: {
681-
TPgType pgType(""); // TODO: correct type?
682-
if (jsonValue.GetType() == NJson::JSON_STRING) {
683-
ValueBuilder.Pg(TPgValue(TPgValue::VK_TEXT, jsonValue.GetString(), pgType));
684-
} else if (jsonValue.GetType() == NJson::JSON_NULL) {
685-
ValueBuilder.Pg(TPgValue(TPgValue::VK_NULL, {}, pgType));
686-
} else {
687-
EnsureType(jsonValue, NJson::JSON_ARRAY);
688-
if (jsonValue.GetArray().size() != 1) {
689-
ThrowFatalError(TStringBuilder() << "Pg type should be encoded as array with size 1, but not " << jsonValue.GetArray().size());
690-
}
691-
auto& innerJsonValue = jsonValue.GetArray().at(0);
692-
EnsureType(innerJsonValue, NJson::JSON_STRING);
693-
auto binary = JsonStringToBinaryString(innerJsonValue.GetString());
694-
ValueBuilder.Pg(TPgValue(TPgValue::VK_BINARY, binary, pgType));
680+
case TTypeParser::ETypeKind::Pg:
681+
if (jsonValue.GetType() == NJson::JSON_STRING) {
682+
ValueBuilder.Pg(TPgValue(TPgValue::VK_TEXT, jsonValue.GetString(), TypeParser.GetPg()));
683+
} else if (jsonValue.GetType() == NJson::JSON_NULL) {
684+
ValueBuilder.Pg(TPgValue(TPgValue::VK_NULL, {}, TypeParser.GetPg()));
685+
} else {
686+
EnsureType(jsonValue, NJson::JSON_ARRAY);
687+
if (jsonValue.GetArray().size() != 1) {
688+
ThrowFatalError(TStringBuilder() << "Pg type should be encoded as array with size 1, but not " << jsonValue.GetArray().size());
695689
}
690+
auto& innerJsonValue = jsonValue.GetArray().at(0);
691+
EnsureType(innerJsonValue, NJson::JSON_STRING);
692+
auto binary = JsonStringToBinaryString(innerJsonValue.GetString());
693+
ValueBuilder.Pg(TPgValue(TPgValue::VK_BINARY, binary, TypeParser.GetPg()));
696694
}
697695
break;
698696

ydb/public/lib/ydb_cli/commands/ydb_service_import.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,9 @@ int TCommandImportFromCsv::Run(TConfig& config) {
244244
settings.Header(Header);
245245
settings.NewlineDelimited(NewlineDelimited);
246246
settings.HeaderRow(HeaderRow);
247-
settings.NullValue(NullValue);
247+
if (config.ParseResult->Has("null-value")) {
248+
settings.NullValue(NullValue);
249+
}
248250

249251
if (Delimiter.size() != 1) {
250252
throw TMisuseException()

ydb/public/lib/ydb_cli/common/csv_parser.cpp

+100-36
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ namespace {
1010

1111
class TCsvToYdbConverter {
1212
public:
13-
explicit TCsvToYdbConverter(TTypeParser& parser)
13+
explicit TCsvToYdbConverter(TTypeParser& parser, const std::optional<TString>& nullValue)
1414
: Parser(parser)
15+
, NullValue(nullValue)
1516
{
1617
}
1718

@@ -40,12 +41,12 @@ class TCsvToYdbConverter {
4041
size_t cnt;
4142
try {
4243
auto value = StringToArithmetic<T>(token, cnt);
43-
if (cnt != token.Size() || value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
44+
if (cnt != token.Size() || value < std::numeric_limits<T>::lowest() || value > std::numeric_limits<T>::max()) {
4445
throw yexception();
4546
}
4647
return static_cast<T>(value);
4748
} catch (std::exception& e) {
48-
throw TMisuseException() << "Expected " << Parser.GetPrimitive() << " value, recieved: \"" << token << "\".";
49+
throw TMisuseException() << "Expected " << Parser.GetPrimitive() << " value, recieved: \"" << token << "\".";
4950
}
5051
}
5152

@@ -105,15 +106,30 @@ class TCsvToYdbConverter {
105106
case EPrimitiveType::DyNumber:
106107
Builder.DyNumber(token);
107108
break;
108-
case EPrimitiveType::Date:
109-
Builder.Date(TInstant::Days(GetArithmetic<ui16>(token)));
109+
case EPrimitiveType::Date: {
110+
TInstant date;
111+
if (!TInstant::TryParseIso8601(token, date)) {
112+
date = TInstant::Days(GetArithmetic<ui16>(token));
113+
}
114+
Builder.Date(date);
110115
break;
111-
case EPrimitiveType::Datetime:
112-
Builder.Datetime(TInstant::Seconds(GetArithmetic<ui32>(token)));
116+
}
117+
case EPrimitiveType::Datetime: {
118+
TInstant datetime;
119+
if (!TInstant::TryParseIso8601(token, datetime)) {
120+
datetime = TInstant::Seconds(GetArithmetic<ui32>(token));
121+
}
122+
Builder.Datetime(datetime);
113123
break;
114-
case EPrimitiveType::Timestamp:
115-
Builder.Timestamp(TInstant::MicroSeconds(GetArithmetic<ui64>(token)));
124+
}
125+
case EPrimitiveType::Timestamp: {
126+
TInstant timestamp;
127+
if (!TInstant::TryParseIso8601(token, timestamp)) {
128+
timestamp = TInstant::MicroSeconds(GetArithmetic<ui64>(token));
129+
}
130+
Builder.Timestamp(timestamp);
116131
break;
132+
}
117133
case EPrimitiveType::Interval:
118134
Builder.Interval(GetArithmetic<i64>(token));
119135
break;
@@ -133,17 +149,17 @@ class TCsvToYdbConverter {
133149

134150
void BuildValue(TStringBuf token) {
135151
switch (Parser.GetKind()) {
136-
case TTypeParser::ETypeKind::Primitive:
152+
case TTypeParser::ETypeKind::Primitive: {
137153
BuildPrimitive(TString(token));
138154
break;
139-
140-
case TTypeParser::ETypeKind::Decimal:
155+
}
156+
case TTypeParser::ETypeKind::Decimal: {
141157
Builder.Decimal(TString(token));
142158
break;
143-
144-
case TTypeParser::ETypeKind::Optional:
159+
}
160+
case TTypeParser::ETypeKind::Optional: {
145161
Parser.OpenOptional();
146-
if (token == NullValue) {
162+
if (NullValue && token == NullValue) {
147163
Builder.EmptyOptional(GetType());
148164
} else {
149165
Builder.BeginOptional();
@@ -152,23 +168,31 @@ class TCsvToYdbConverter {
152168
}
153169
Parser.CloseOptional();
154170
break;
155-
156-
case TTypeParser::ETypeKind::Null:
171+
}
172+
case TTypeParser::ETypeKind::Null: {
157173
EnsureNull(token);
158174
break;
159-
160-
case TTypeParser::ETypeKind::Void:
175+
}
176+
case TTypeParser::ETypeKind::Void: {
161177
EnsureNull(token);
162178
break;
163-
164-
case TTypeParser::ETypeKind::Tagged:
179+
}
180+
case TTypeParser::ETypeKind::Tagged: {
165181
Parser.OpenTagged();
166182
Builder.BeginTagged(Parser.GetTag());
167183
BuildValue(token);
168184
Builder.EndTagged();
169185
Parser.CloseTagged();
170186
break;
171-
187+
}
188+
case TTypeParser::ETypeKind::Pg: {
189+
if (NullValue && token == NullValue) {
190+
Builder.Pg(TPgValue(TPgValue::VK_NULL, {}, Parser.GetPg()));
191+
} else {
192+
Builder.Pg(TPgValue(TPgValue::VK_TEXT, TString(token), Parser.GetPg()));
193+
}
194+
break;
195+
}
172196
default:
173197
throw TMisuseException() << "Unsupported type kind: " << Parser.GetKind();
174198
}
@@ -200,6 +224,10 @@ class TCsvToYdbConverter {
200224
Parser.CloseTagged();
201225
break;
202226

227+
case TTypeParser::ETypeKind::Pg:
228+
typeBuilder.Pg(Parser.GetPg());
229+
break;
230+
203231
default:
204232
throw TMisuseException() << "Unsupported type kind: " << Parser.GetKind();
205233
}
@@ -222,6 +250,9 @@ class TCsvToYdbConverter {
222250
}
223251

224252
void EnsureNull(TStringBuf token) const {
253+
if (!NullValue) {
254+
throw TMisuseException() << "Expected null value instead of \"" << token << "\", but null value is not set.";
255+
}
225256
if (token != NullValue) {
226257
throw TMisuseException() << "Expected null value: \"" << NullValue << "\", recieved: \"" << token << "\".";
227258
}
@@ -234,28 +265,42 @@ class TCsvToYdbConverter {
234265

235266
private:
236267
TTypeParser& Parser;
237-
const TString NullValue = "";
268+
const std::optional<TString> NullValue = "";
238269
TValueBuilder Builder;
239270
};
240271

241272
}
242273

243-
TCsvParser::TCsvParser(TString&& headerRow, const char delimeter, const std::map<TString, TType>& paramTypes, const std::map<TString, TString>& paramSources)
274+
TCsvParser::TCsvParser(TString&& headerRow, const char delimeter, const std::optional<TString>& nullValue,
275+
const std::map<TString, TType>* paramTypes,
276+
const std::map<TString, TString>* paramSources)
244277
: HeaderRow(std::move(headerRow))
245278
, Delimeter(delimeter)
279+
, NullValue(nullValue)
246280
, ParamTypes(paramTypes)
247281
, ParamSources(paramSources)
248282
{
249283
NCsvFormat::CsvSplitter splitter(HeaderRow, Delimeter);
250284
Header = static_cast<TVector<TString>>(splitter);
251285
}
252286

253-
TValue TCsvParser::FieldToValue(TTypeParser& parser, TStringBuf token) {
254-
TCsvToYdbConverter converter(parser);
287+
TCsvParser::TCsvParser(TVector<TString>&& header, const char delimeter, const std::optional<TString>& nullValue,
288+
const std::map<TString, TType>* paramTypes,
289+
const std::map<TString, TString>* paramSources)
290+
: Header(std::move(header))
291+
, Delimeter(delimeter)
292+
, NullValue(nullValue)
293+
, ParamTypes(paramTypes)
294+
, ParamSources(paramSources)
295+
{
296+
}
297+
298+
TValue TCsvParser::FieldToValue(TTypeParser& parser, TStringBuf token) const {
299+
TCsvToYdbConverter converter(parser, NullValue);
255300
return converter.Convert(token);
256301
}
257302

258-
void TCsvParser::GetParams(TString&& data, TParamsBuilder& builder) {
303+
void TCsvParser::GetParams(TString&& data, TParamsBuilder& builder) const {
259304
NCsvFormat::CsvSplitter splitter(data, Delimeter);
260305
auto headerIt = Header.begin();
261306
do {
@@ -264,14 +309,16 @@ void TCsvParser::GetParams(TString&& data, TParamsBuilder& builder) {
264309
throw TMisuseException() << "Header contains less fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"";
265310
}
266311
TString fullname = "$" + *headerIt;
267-
auto paramIt = ParamTypes.find(fullname);
268-
if (paramIt == ParamTypes.end()) {
312+
auto paramIt = ParamTypes->find(fullname);
313+
if (paramIt == ParamTypes->end()) {
269314
++headerIt;
270315
continue;
271316
}
272-
auto paramSource = ParamSources.find(fullname);
273-
if (paramSource != ParamSources.end()) {
274-
throw TMisuseException() << "Parameter " << fullname << " value found in more than one source: stdin, " << paramSource->second << ".";
317+
if (ParamSources) {
318+
auto paramSource = ParamSources->find(fullname);
319+
if (paramSource != ParamSources->end()) {
320+
throw TMisuseException() << "Parameter " << fullname << " value found in more than one source: stdin, " << paramSource->second << ".";
321+
}
275322
}
276323
TTypeParser parser(paramIt->second);
277324
builder.AddParam(fullname, FieldToValue(parser, token));
@@ -283,27 +330,30 @@ void TCsvParser::GetParams(TString&& data, TParamsBuilder& builder) {
283330
}
284331
}
285332

286-
void TCsvParser::GetValue(TString&& data, const TType& type, TValueBuilder& builder) {
333+
void TCsvParser::GetValue(TString&& data, TValueBuilder& builder, const TType& type) const {
287334
NCsvFormat::CsvSplitter splitter(data, Delimeter);
288-
auto headerIt = Header.begin();
335+
auto headerIt = Header.cbegin();
289336
std::map<TString, TStringBuf> fields;
290337
do {
291338
TStringBuf token = splitter.Consume();
292-
if (headerIt == Header.end()) {
339+
if (headerIt == Header.cend()) {
293340
throw TMisuseException() << "Header contains less fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"";
294341
}
295342
fields[*headerIt] = token;
296343
++headerIt;
297344
} while (splitter.Step());
298345

299-
if (headerIt != Header.end()) {
346+
if (headerIt != Header.cend()) {
300347
throw TMisuseException() << "Header contains more fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"";
301348
}
302349
builder.BeginStruct();
303350
TTypeParser parser(type);
304351
parser.OpenStruct();
305352
while (parser.TryNextMember()) {
306353
TString name = parser.GetMemberName();
354+
if (name == "__ydb_skip_column_name") {
355+
continue;
356+
}
307357
auto fieldIt = fields.find(name);
308358
if (fieldIt == fields.end()) {
309359
throw TMisuseException() << "No member \"" << name << "\" in csv string for YDB struct type";
@@ -314,5 +364,19 @@ void TCsvParser::GetValue(TString&& data, const TType& type, TValueBuilder& buil
314364
builder.EndStruct();
315365
}
316366

367+
TType TCsvParser::GetColumnsType() const {
368+
TTypeBuilder builder;
369+
builder.BeginStruct();
370+
for (const auto& colName : Header) {
371+
if (ParamTypes->find(colName) != ParamTypes->end()) {
372+
builder.AddMember(colName, ParamTypes->at(colName));
373+
} else {
374+
builder.AddMember("__ydb_skip_column_name", TTypeBuilder().Build());
375+
}
376+
}
377+
builder.EndStruct();
378+
return builder.Build();
379+
}
380+
317381
}
318382
}

ydb/public/lib/ydb_cli/common/csv_parser.h

+22-7
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,34 @@ namespace NConsoleClient {
99

1010
class TCsvParser {
1111
public:
12-
TCsvParser(TString&& headerRow, const char delimeter, const std::map<TString, TType>& paramTypes, const std::map<TString, TString>& paramSources);
12+
TCsvParser() = default;
1313

14-
void GetParams(TString&& data, TParamsBuilder& builder);
15-
void GetValue(TString&& data, const TType& type, TValueBuilder& builder);
14+
TCsvParser(const TCsvParser&) = delete;
15+
TCsvParser(TCsvParser&&) = default;
16+
TCsvParser& operator=(const TCsvParser&) = delete;
17+
TCsvParser& operator=(TCsvParser&&) = default;
18+
~TCsvParser() = default;
19+
20+
TCsvParser(TString&& headerRow, const char delimeter, const std::optional<TString>& nullValue,
21+
const std::map<TString, TType>* paramTypes = nullptr,
22+
const std::map<TString, TString>* paramSources = nullptr);
23+
TCsvParser(TVector<TString>&& header, const char delimeter, const std::optional<TString>& nullValue,
24+
const std::map<TString, TType>* paramTypes = nullptr,
25+
const std::map<TString, TString>* paramSources = nullptr);
26+
27+
void GetParams(TString&& data, TParamsBuilder& builder) const;
28+
void GetValue(TString&& data, TValueBuilder& builder, const TType& type) const;
29+
TType GetColumnsType() const;
1630

1731
private:
18-
TValue FieldToValue(TTypeParser& parser, TStringBuf token);
32+
TValue FieldToValue(TTypeParser& parser, TStringBuf token) const;
1933

2034
TVector<TString> Header;
2135
TString HeaderRow;
22-
const char Delimeter;
23-
const std::map<TString, TType>& ParamTypes;
24-
const std::map<TString, TString>& ParamSources;
36+
char Delimeter;
37+
std::optional<TString> NullValue;
38+
const std::map<TString, TType>* ParamTypes;
39+
const std::map<TString, TString>* ParamSources;
2540
};
2641

2742
}

0 commit comments

Comments
 (0)