@@ -79,6 +79,43 @@ TStatus WaitForQueue(const size_t maxQueueSize, std::vector<TAsyncStatus>& inFli
79
79
return MakeStatus ();
80
80
}
81
81
82
+ void InitCsvParser (TCsvParser& parser,
83
+ bool & removeLastDelimiter,
84
+ TString&& defaultHeader,
85
+ const TImportFileSettings& settings,
86
+ const std::map<TString, TType>* columnTypes,
87
+ const NTable::TTableDescription* dbTableInfo) {
88
+ if (settings.Header_ || settings.HeaderRow_ ) {
89
+ TString headerRow;
90
+ if (settings.Header_ ) {
91
+ headerRow = std::move (defaultHeader);
92
+ }
93
+ if (settings.HeaderRow_ ) {
94
+ headerRow = settings.HeaderRow_ ;
95
+ }
96
+ if (headerRow.EndsWith (" \r\n " )) {
97
+ headerRow.erase (headerRow.Size () - 2 );
98
+ }
99
+ if (headerRow.EndsWith (" \n " )) {
100
+ headerRow.erase (headerRow.Size () - 1 );
101
+ }
102
+ if (headerRow.EndsWith (settings.Delimiter_ )) {
103
+ removeLastDelimiter = true ;
104
+ headerRow.erase (headerRow.Size () - settings.Delimiter_ .Size ());
105
+ }
106
+ parser = TCsvParser (std::move (headerRow), settings.Delimiter_ [0 ], settings.NullValue_ , columnTypes);
107
+ return ;
108
+ }
109
+
110
+ TVector<TString> columns;
111
+ Y_ENSURE_BT (dbTableInfo);
112
+ for (const auto & column : dbTableInfo->GetColumns ()) {
113
+ columns.push_back (column.Name );
114
+ }
115
+ parser = TCsvParser (std::move (columns), settings.Delimiter_ [0 ], settings.NullValue_ , columnTypes);
116
+ return ;
117
+ }
118
+
82
119
FHANDLE GetStdinFileno () {
83
120
#if defined(_win32_)
84
121
return GetStdHandle (STD_INPUT_HANDLE);
@@ -222,9 +259,8 @@ class TCsvFileReader {
222
259
} // namespace
223
260
224
261
TImportFileClient::TImportFileClient (const TDriver& driver, const TClientCommand::TConfig& rootConfig)
225
- : OperationClient (std::make_shared<NOperation::TOperationClient >(driver))
262
+ : TableClient (std::make_shared<NTable::TTableClient >(driver))
226
263
, SchemeClient(std::make_shared<NScheme::TSchemeClient>(driver))
227
- , TableClient(std::make_shared<NTable::TTableClient>(driver))
228
264
{
229
265
RetrySettings
230
266
.MaxRetries (TImportFileSettings::MaxRetries)
@@ -239,11 +275,25 @@ TStatus TImportFileClient::Import(const TVector<TString>& filePaths, const TStri
239
275
TStringBuilder () << " Illegal delimiter for TSV format, only tab is allowed" );
240
276
}
241
277
242
- auto result = NDump::DescribePath (*SchemeClient, dbPath);
243
- auto resultStatus = result.GetStatus ();
244
- if (resultStatus != EStatus::SUCCESS) {
245
- return MakeStatus (EStatus::SCHEME_ERROR,
246
- TStringBuilder () << result.GetIssues ().ToString () << dbPath);
278
+ auto resultStatus = TableClient->RetryOperationSync (
279
+ [this , dbPath](NTable::TSession session) {
280
+ auto result = session.DescribeTable (dbPath).ExtractValueSync ();
281
+ if (result.IsSuccess ()) {
282
+ DbTableInfo = std::make_unique<const NTable::TTableDescription>(result.GetTableDescription ());
283
+ }
284
+ return result;
285
+ }, NTable::TRetryOperationSettings{RetrySettings}.MaxRetries (10 ));
286
+
287
+ if (!resultStatus.IsSuccess ()) {
288
+ // / TODO: Remove this after server fix: https://github.com/ydb-platform/ydb/issues/7791
289
+ if (resultStatus.GetStatus () == EStatus::SCHEME_ERROR) {
290
+ auto describePathResult = NDump::DescribePath (*SchemeClient, dbPath);
291
+ if (describePathResult.GetStatus () != EStatus::SUCCESS) {
292
+ return MakeStatus (EStatus::SCHEME_ERROR,
293
+ TStringBuilder () << describePathResult.GetIssues ().ToString () << dbPath);
294
+ }
295
+ }
296
+ return resultStatus;
247
297
}
248
298
249
299
UpsertSettings
@@ -374,45 +424,13 @@ TStatus TImportFileClient::UpsertCsv(IInputStream& input, const TString& dbPath,
374
424
375
425
TCountingInput countInput (&input);
376
426
NCsvFormat::TLinesSplitter splitter (countInput);
377
- TCsvParser parser;
378
- bool RemoveLastDelimiter = false ;
379
427
380
- NTable::TCreateSessionResult sessionResult = TableClient->GetSession (NTable::TCreateSessionSettings ()).GetValueSync ();
381
- if (!sessionResult.IsSuccess ())
382
- return sessionResult;
383
- NTable::TDescribeTableResult tableResult = sessionResult.GetSession ().DescribeTable (dbPath).GetValueSync ();
384
- if (!tableResult.IsSuccess ())
385
- return tableResult;
428
+ auto columnTypes = GetColumnTypes ();
429
+ ValidateTValueUpsertTable ();
386
430
387
- auto columnTypes = GetColumnTypes (tableResult.GetTableDescription ());
388
- ValidateTable (tableResult.GetTableDescription ());
389
-
390
- if (settings.Header_ || settings.HeaderRow_ ) {
391
- TString headerRow;
392
- if (settings.Header_ ) {
393
- headerRow = splitter.ConsumeLine ();
394
- }
395
- if (settings.HeaderRow_ ) {
396
- headerRow = settings.HeaderRow_ ;
397
- }
398
- if (headerRow.EndsWith (" \r\n " )) {
399
- headerRow.erase (headerRow.Size () - 2 );
400
- }
401
- if (headerRow.EndsWith (" \n " )) {
402
- headerRow.erase (headerRow.Size () - 1 );
403
- }
404
- if (headerRow.EndsWith (settings.Delimiter_ )) {
405
- RemoveLastDelimiter = true ;
406
- headerRow.erase (headerRow.Size () - settings.Delimiter_ .Size ());
407
- }
408
- parser = TCsvParser (std::move (headerRow), settings.Delimiter_ [0 ], settings.NullValue_ , &columnTypes);
409
- } else {
410
- TVector<TString> columns;
411
- for (const auto & column : tableResult.GetTableDescription ().GetColumns ()) {
412
- columns.push_back (column.Name );
413
- }
414
- parser = TCsvParser (std::move (columns), settings.Delimiter_ [0 ], settings.NullValue_ , &columnTypes);
415
- }
431
+ TCsvParser parser;
432
+ bool removeLastDelimiter = false ;
433
+ InitCsvParser (parser, removeLastDelimiter, splitter.ConsumeLine (), settings, &columnTypes, DbTableInfo.get ());
416
434
417
435
for (ui32 i = 0 ; i < settings.SkipRows_ ; ++i) {
418
436
splitter.ConsumeLine ();
@@ -450,7 +468,7 @@ TStatus TImportFileClient::UpsertCsv(IInputStream& input, const TString& dbPath,
450
468
readBytes += line.Size ();
451
469
batchBytes += line.Size ();
452
470
453
- if (RemoveLastDelimiter ) {
471
+ if (removeLastDelimiter ) {
454
472
if (!line.EndsWith (settings.Delimiter_ )) {
455
473
return MakeStatus (EStatus::BAD_REQUEST,
456
474
" According to the header, lines should end with a delimiter" );
@@ -498,42 +516,14 @@ TStatus TImportFileClient::UpsertCsv(IInputStream& input, const TString& dbPath,
498
516
TStatus TImportFileClient::UpsertCsvByBlocks (const TString& filePath, const TString& dbPath, const TImportFileSettings& settings) {
499
517
TMaxInflightGetter inFlightGetter (settings.MaxInFlightRequests_ , FilesCount);
500
518
TString headerRow;
501
- TCsvParser parser;
502
519
TCsvFileReader splitter (filePath, settings, headerRow, inFlightGetter);
503
- bool RemoveLastDelimiter = false ;
504
-
505
- NTable::TCreateSessionResult sessionResult = TableClient->GetSession (NTable::TCreateSessionSettings ()).GetValueSync ();
506
- if (!sessionResult.IsSuccess ())
507
- return sessionResult;
508
- NTable::TDescribeTableResult tableResult = sessionResult.GetSession ().DescribeTable (dbPath).GetValueSync ();
509
- if (!tableResult.IsSuccess ())
510
- return tableResult;
511
520
512
- auto columnTypes = GetColumnTypes (tableResult. GetTableDescription () );
513
- ValidateTable (tableResult. GetTableDescription () );
521
+ auto columnTypes = GetColumnTypes ();
522
+ ValidateTValueUpsertTable ( );
514
523
515
- if (settings.Header_ || settings.HeaderRow_ ) {
516
- if (settings.HeaderRow_ ) {
517
- headerRow = settings.HeaderRow_ ;
518
- }
519
- if (headerRow.EndsWith (" \r\n " )) {
520
- headerRow.erase (headerRow.Size () - 2 );
521
- }
522
- if (headerRow.EndsWith (" \n " )) {
523
- headerRow.erase (headerRow.Size () - 1 );
524
- }
525
- if (headerRow.EndsWith (settings.Delimiter_ )) {
526
- RemoveLastDelimiter = true ;
527
- headerRow.erase (headerRow.Size () - settings.Delimiter_ .Size ());
528
- }
529
- parser = TCsvParser (std::move (headerRow), settings.Delimiter_ [0 ], settings.NullValue_ , &columnTypes);
530
- } else {
531
- TVector<TString> columns;
532
- for (const auto & column : tableResult.GetTableDescription ().GetColumns ()) {
533
- columns.push_back (column.Name );
534
- }
535
- parser = TCsvParser (std::move (columns), settings.Delimiter_ [0 ], settings.NullValue_ , &columnTypes);
536
- }
524
+ TCsvParser parser;
525
+ bool removeLastDelimiter = false ;
526
+ InitCsvParser (parser, removeLastDelimiter, std::move (headerRow), settings, &columnTypes, DbTableInfo.get ());
537
527
538
528
TType lineType = parser.GetColumnsType ();
539
529
@@ -565,7 +555,7 @@ TStatus TImportFileClient::UpsertCsvByBlocks(const TString& filePath, const TStr
565
555
}
566
556
readBytes += line.size ();
567
557
batchBytes += line.size ();
568
- if (RemoveLastDelimiter ) {
558
+ if (removeLastDelimiter ) {
569
559
if (!line.EndsWith (settings.Delimiter_ )) {
570
560
return MakeStatus (EStatus::BAD_REQUEST,
571
561
" According to the header, lines should end with a delimiter" );
@@ -611,15 +601,8 @@ TStatus TImportFileClient::UpsertCsvByBlocks(const TString& filePath, const TStr
611
601
612
602
TStatus TImportFileClient::UpsertJson (IInputStream& input, const TString& dbPath, const TImportFileSettings& settings,
613
603
std::optional<ui64> inputSizeHint, ProgressCallbackFunc & progressCallback) {
614
- NTable::TCreateSessionResult sessionResult = TableClient->GetSession (NTable::TCreateSessionSettings ()).GetValueSync ();
615
- if (!sessionResult.IsSuccess ())
616
- return sessionResult;
617
- NTable::TDescribeTableResult tableResult = sessionResult.GetSession ().DescribeTable (dbPath).GetValueSync ();
618
- if (!tableResult.IsSuccess ())
619
- return tableResult;
620
-
621
- const TType tableType = GetTableType (tableResult.GetTableDescription ());
622
- ValidateTable (tableResult.GetTableDescription ());
604
+ const TType tableType = GetTableType ();
605
+ ValidateTValueUpsertTable ();
623
606
const NYdb::EBinaryStringEncoding stringEncoding =
624
607
(settings.Format_ == EOutputFormat::JsonBase64) ? NYdb::EBinaryStringEncoding::Base64 :
625
608
NYdb::EBinaryStringEncoding::Unicode;
@@ -818,36 +801,39 @@ TAsyncStatus TImportFileClient::UpsertParquetBuffer(const TString& dbPath, const
818
801
return TableClient->RetryOperation (upsert, RetrySettings);
819
802
}
820
803
821
- TType TImportFileClient::GetTableType (const NTable::TTableDescription& tableDescription ) {
804
+ TType TImportFileClient::GetTableType () {
822
805
TTypeBuilder typeBuilder;
823
806
typeBuilder.BeginStruct ();
824
- const auto & columns = tableDescription.GetTableColumns ();
807
+ Y_ENSURE_BT (DbTableInfo);
808
+ const auto & columns = DbTableInfo->GetTableColumns ();
825
809
for (auto it = columns.begin (); it != columns.end (); it++) {
826
810
typeBuilder.AddMember ((*it).Name , (*it).Type );
827
811
}
828
812
typeBuilder.EndStruct ();
829
813
return typeBuilder.Build ();
830
814
}
831
815
832
- std::map<TString, TType> TImportFileClient::GetColumnTypes (const NTable::TTableDescription& tableDescription ) {
816
+ std::map<TString, TType> TImportFileClient::GetColumnTypes () {
833
817
std::map<TString, TType> columnTypes;
834
- const auto & columns = tableDescription.GetTableColumns ();
818
+ Y_ENSURE_BT (DbTableInfo);
819
+ const auto & columns = DbTableInfo->GetTableColumns ();
835
820
for (auto it = columns.begin (); it != columns.end (); it++) {
836
821
columnTypes.insert ({(*it).Name , (*it).Type });
837
822
}
838
823
return columnTypes;
839
824
}
840
825
841
- void TImportFileClient::ValidateTable ( const NTable::TTableDescription& tableDescription ) {
842
- auto columnTypes = GetColumnTypes (tableDescription );
826
+ void TImportFileClient::ValidateTValueUpsertTable ( ) {
827
+ auto columnTypes = GetColumnTypes ();
843
828
bool hasPgType = false ;
844
829
for (const auto & [_, type] : columnTypes) {
845
830
if (TTypeParser (type).GetKind () == TTypeParser::ETypeKind::Pg) {
846
831
hasPgType = true ;
847
832
break ;
848
833
}
849
834
}
850
- if (tableDescription.GetStoreType () == NTable::EStoreType::Column && hasPgType) {
835
+ Y_ENSURE_BT (DbTableInfo);
836
+ if (DbTableInfo->GetStoreType () == NTable::EStoreType::Column && hasPgType) {
851
837
throw TMisuseException () << " Import into column table with Pg type columns in not supported" ;
852
838
}
853
839
}
0 commit comments