Skip to content

Commit 543ba14

Browse files
committed
Merge branch 'main' of https://github.com/ydb-platform/ydb into llvm14
2 parents a3bf393 + cb5d0e3 commit 543ba14

File tree

1,631 files changed

+36696
-12966
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,631 files changed

+36696
-12966
lines changed

.github/actions/update_changelog/action.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,5 @@ runs:
4040
run: |
4141
git config --local user.email "[email protected]"
4242
git config --local user.name "GitHub Action"
43-
git config --local github.token ${{ env.UPDATE_REPO_TOKEN }}
43+
git config --local github.token ${{ env.YDBOT_TOKEN }}
4444
python ${{ github.action_path }}/update_changelog.py pr_data.txt "${{ inputs.changelog_path }}" "${{ inputs.base_branch }}" "${{ inputs.suffix }}"

.github/actions/update_changelog/update_changelog.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
CATEGORY_PREFIX = "### "
1616
ITEM_PREFIX = "* "
1717

18+
YDBOT_TOKEN = os.getenv("YDBOT_TOKEN")
19+
1820
@functools.cache
1921
def get_github_api_url():
2022
return os.getenv('GITHUB_REPOSITORY')
@@ -150,7 +152,7 @@ def fetch_pr_details(pr_id):
150152
url = f"https://api.github.com/repos/{get_github_api_url()}/pulls/{pr_id}"
151153
headers = {
152154
"Accept": "application/vnd.github.v3+json",
153-
"Authorization": f"token {GITHUB_TOKEN}"
155+
"Authorization": f"token {YDBOT_TOKEN}"
154156
}
155157
response = requests.get(url, headers=headers)
156158
response.raise_for_status()
@@ -160,7 +162,7 @@ def fetch_user_details(username):
160162
url = f"https://api.github.com/users/{username}"
161163
headers = {
162164
"Accept": "application/vnd.github.v3+json",
163-
"Authorization": f"token {GITHUB_TOKEN}"
165+
"Authorization": f"token {YDBOT_TOKEN}"
164166
}
165167
response = requests.get(url, headers=headers)
166168
response.raise_for_status()
@@ -175,8 +177,6 @@ def fetch_user_details(username):
175177
changelog_path = sys.argv[2]
176178
base_branch = sys.argv[3]
177179
suffix = sys.argv[4]
178-
179-
GITHUB_TOKEN = os.getenv("UPDATE_REPO_TOKEN")
180180

181181
try:
182182
with open(pr_data_file, 'r') as file:
@@ -204,7 +204,7 @@ def fetch_user_details(username):
204204

205205
update_changelog(changelog_path, pr_data)
206206

207-
base_branch_name = f"changelog-for-{base_branch}-{suffix}"
207+
base_branch_name = f"dev-changelog-{base_branch}-{suffix}"
208208
branch_name = base_branch_name
209209
index = 1
210210
while branch_exists(branch_name):

.github/actions/validate_pr_description/validate_pr_description.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
from typing import Tuple
44

55
issue_patterns = [
6-
r"https://github.com/ydb-platform/ydb/issues/\d+",
6+
r"https://github.com/ydb-platform/[a-z\-]+/issues/\d+",
77
r"https://st.yandex-team.ru/[a-zA-Z]+-\d+",
8-
r"#+d+",
8+
r"#\d+",
99
r"[a-zA-Z]+-\d+"
1010
]
1111

.github/config/muted_ya.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ ydb/core/blobstorage/ut_vdisk TBsLocalRecovery.WriteRestartReadHugeDecreased
99
ydb/core/blobstorage/ut_vdisk TBsVDiskGC.GCPutKeepBarrierSync
1010
ydb/core/blobstorage/ut_vdisk TBsVDiskManyPutGet.ManyPutRangeGetCompactionIndexOnly
1111
ydb/core/blobstorage/ut_vdisk [*/*] chunk chunk
12-
ydb/core/blobstorage/ut_vdisk2 VDiskTest.HugeBlobWrite
1312
ydb/core/cms/ut_sentinel_unstable TSentinelUnstableTests.BSControllerCantChangeStatus
1413
ydb/core/cms/ut_sentinel_unstable sole chunk chunk
1514
ydb/core/cms/ut_sentinel_unstable sole+chunk+chunk
@@ -62,10 +61,8 @@ ydb/core/kqp/ut/tx KqpSnapshotIsolation.TConflictReadWriteOltpNoSink
6261
ydb/core/kqp/ut/tx KqpSnapshotIsolation.TConflictWriteOlap
6362
ydb/core/kqp/ut/tx KqpSnapshotIsolation.TConflictWriteOltp
6463
ydb/core/kqp/ut/tx KqpSnapshotIsolation.TConflictWriteOltpNoSink
65-
ydb/core/kqp/ut/tx KqpSnapshotIsolation.TReadOnlyOlap
6664
ydb/core/kqp/ut/tx KqpSnapshotIsolation.TReadOnlyOltp
6765
ydb/core/kqp/ut/tx KqpSnapshotIsolation.TReadOnlyOltpNoSink
68-
ydb/core/kqp/ut/tx KqpSnapshotIsolation.TSimpleOlap
6966
ydb/core/kqp/ut/tx KqpSnapshotIsolation.TSimpleOltp
7067
ydb/core/kqp/ut/tx KqpSnapshotIsolation.TSimpleOltpNoSink
7168
ydb/core/kqp/ut/yql KqpScripting.StreamExecuteYqlScriptScanOperationTmeoutBruteForce
@@ -91,6 +88,8 @@ ydb/library/actors/http/ut sole+chunk+chunk
9188
ydb/library/actors/interconnect/ut_huge_cluster HugeCluster.AllToAll
9289
ydb/library/actors/interconnect/ut_huge_cluster sole chunk chunk
9390
ydb/library/yql/providers/generic/connector/tests/datasource/ydb test.py.test_select_positive[column_selection_col2_COL1-kqprun]
91+
ydb/library/yql/providers/generic/connector/tests/join test.py.test_join[join_ch_ch-kqprun]
92+
ydb/library/yql/providers/generic/connector/tests/join test.py.test_join[join_pg_pg-kqprun]
9493
ydb/services/keyvalue/ut sole chunk chunk
9594
ydb/services/keyvalue/ut sole+chunk+chunk
9695
ydb/services/persqueue_v1/ut TPersQueueCommonTest.TestLimiterLimitsWithBlobsRateLimit
@@ -245,15 +244,16 @@ ydb/tests/postgres_integrations/go-libpq docker_wrapper_test.py.test_pg_generate
245244
ydb/tests/postgres_integrations/go-libpq docker_wrapper_test.py.test_pg_generated[TestTxOptions]
246245
ydb/tests/sql sole chunk chunk
247246
ydb/tests/sql/large sole chunk chunk
247+
ydb/tests/sql/large test_insert_delete_duplicate_records.py.TestConcurrentInsertDeleteAndRead.test_bulkupsert_delete_and_read_tpch
248248
ydb/tests/sql/large test_insert_delete_duplicate_records.py.TestConcurrentInsertDeleteAndRead.test_insert_delete_and_read_simple_tx
249249
ydb/tests/sql/large test_insert_delete_duplicate_records.py.TestConcurrentInsertDeleteAndRead.test_upsert_delete_and_read_tpch_tx
250250
ydb/tests/sql/large test_insertinto_selectfrom.py.TestConcurrentInsertAndCount.test_concurrent_upsert_and_count
251+
ydb/tests/sql/large test_insertinto_selectfrom.py.TestConcurrentInsertAndCount.test_concurrent_upsert_and_count_tx
251252
ydb/tests/sql/large test_tiering.py.TestYdbS3TTL.test_basic_tiering_operations
252253
ydb/tests/sql/large test_workload_manager.py.TestWorkloadManager.test_pool_classifier_init[False]
253254
ydb/tests/sql/large test_workload_manager.py.TestWorkloadManager.test_resource_pool_queue_resource_weight[1-False]
254255
ydb/tests/sql/large test_workload_manager.py.TestWorkloadManager.test_resource_pool_queue_resource_weight[1-True]
255256
ydb/tests/sql/large test_workload_manager.py.TestWorkloadManager.test_resource_pool_queue_size_limit
256-
ydb/tests/stress/kv/tests test_workload.py.TestYdbKvWorkload.test[column]
257257
ydb/tests/stress/log/tests test_workload.py.TestYdbLogWorkload.test[column]
258258
ydb/tests/stress/log/tests test_workload.py.TestYdbLogWorkload.test[row]
259259
ydb/tools/stress_tool/ut TDeviceTestTool.PDiskTestLogWrite
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
#!/usr/bin/env python3
2+
3+
#--query_path .github/scripts/analytics/data_mart_queries/perfomance_olap_mart.sql --table_path perfomance/olap/fast_results --store_type column --partition_keys Run_start_timestamp --primary_keys Db Suite Test Branch Run_start_timestamp --ttl_min 43200 --ttl_key Run_start_timestamp
4+
import argparse
5+
import ydb
6+
import configparser
7+
import os
8+
import time
9+
10+
# Load configuration
11+
dir = os.path.dirname(__file__)
12+
config = configparser.ConfigParser()
13+
config_file_path = f"{dir}/../../config/ydb_qa_db.ini"
14+
config.read(config_file_path)
15+
repo_path = os.path.abspath(f"{dir}/../../../")
16+
17+
DATABASE_ENDPOINT = config["QA_DB"]["DATABASE_ENDPOINT"]
18+
DATABASE_PATH = config["QA_DB"]["DATABASE_PATH"]
19+
20+
def get_data_from_query_with_metadata(driver, query):
21+
results = []
22+
scan_query = ydb.ScanQuery(query, {})
23+
it = driver.table_client.scan_query(scan_query)
24+
print(f"Executing query")
25+
start_time = time.time()
26+
column_types = None
27+
while True:
28+
try:
29+
result = next(it)
30+
if column_types is None:
31+
column_types = [(col.name, col.type) for col in result.result_set.columns]
32+
33+
results.extend(result.result_set.rows)
34+
35+
except StopIteration:
36+
break
37+
38+
end_time = time.time()
39+
print(f'Captured {len(results)} rows, duration: {end_time - start_time}s')
40+
return results, column_types
41+
42+
def ydb_type_to_str(ydb_type, store_type = 'ROW'):
43+
# Converts YDB type to string representation for table creation
44+
is_optional = False
45+
if ydb_type.HasField('optional_type'):
46+
is_optional = True
47+
base_type = ydb_type.optional_type.item
48+
else:
49+
base_type = ydb_type
50+
51+
for type in ydb.PrimitiveType:
52+
if type.proto.type_id == base_type.type_id:
53+
break
54+
if is_optional:
55+
result_type = ydb.OptionalType(type)
56+
name = result_type._repr
57+
else:
58+
result_type = type
59+
name = result_type.name
60+
61+
if name.upper() == 'BOOL' and store_type.upper() == 'COLUMN':
62+
if is_optional:
63+
result_type = ydb.OptionalType(ydb.PrimitiveType.Uint8)
64+
else:
65+
result_type = ydb.PrimitiveType.Uint8
66+
name = 'Uint8'
67+
return result_type, name
68+
69+
def create_table(session, table_path, column_types, store_type, partition_keys, primary_keys, ttl_min, ttl_key):
70+
"""Create table based on the structure of the provided column types."""
71+
if not column_types:
72+
raise ValueError("No column types to create table from.")
73+
74+
columns_sql = []
75+
for column_name, column_ydb_type in column_types:
76+
column_type_obj, column_type_str = ydb_type_to_str(column_ydb_type, store_type.upper())
77+
if column_name in primary_keys:
78+
columns_sql.append(f"`{column_name}` {column_type_str.replace('?','')} NOT NULL")
79+
else:
80+
columns_sql.append(f"`{column_name}` {column_type_str.replace('?','')}")
81+
82+
partition_keys_sql = ", ".join([f"`{key}`" for key in partition_keys])
83+
primary_keys_sql = ", ".join([f"`{key}`" for key in primary_keys])
84+
85+
# Добавляем TTL только если оба аргумента заданы
86+
ttl_clause = ""
87+
if ttl_min and ttl_key:
88+
ttl_clause = f' TTL = Interval("PT{ttl_min}M") ON {ttl_key}'
89+
90+
create_table_sql = f"""
91+
CREATE TABLE IF NOT EXISTS `{table_path}` (
92+
{', '.join(columns_sql)},
93+
PRIMARY KEY ({primary_keys_sql})
94+
)
95+
PARTITION BY HASH({partition_keys_sql})
96+
WITH (
97+
{"STORE = COLUMN" if store_type.upper() == 'COLUMN' else ''}
98+
{',' if store_type and ttl_clause else ''}
99+
{ttl_clause}
100+
)
101+
"""
102+
103+
print(f"Creating table with query: {create_table_sql}")
104+
session.execute_scheme(create_table_sql)
105+
def create_table_if_not_exists(session, table_path, column_types, store_type, partition_keys, primary_keys, ttl_min, ttl_key):
106+
"""Create table if it does not already exist, based on column types."""
107+
try:
108+
session.describe_table(table_path)
109+
print(f"Table '{table_path}' already exists.")
110+
except ydb.Error:
111+
print(f"Table '{table_path}' does not exist. Creating table...")
112+
create_table(session, table_path, column_types, store_type, partition_keys, primary_keys, ttl_min, ttl_key)
113+
114+
def bulk_upsert(table_client, table_path, rows, column_types,store_type='ROW'):
115+
print(f"> Bulk upsert into: {table_path}")
116+
117+
column_types_map = ydb.BulkUpsertColumns()
118+
for column_name, column_ydb_type in column_types:
119+
column_type_obj, column_type_str = ydb_type_to_str(column_ydb_type, store_type.upper())
120+
column_types_map.add_column(column_name, column_type_obj)
121+
122+
table_client.bulk_upsert(table_path, rows, column_types_map)
123+
124+
def parse_args():
125+
parser = argparse.ArgumentParser(description="YDB Table Manager")
126+
parser.add_argument("--table_path", required=True, help="Table path and name")
127+
parser.add_argument("--query_path", required=True, help="Path to the SQL query file")
128+
parser.add_argument("--store_type", choices=["column", "row"], required=True, help="Table store type (column or row)")
129+
parser.add_argument("--partition_keys", nargs="+", required=True, help="List of partition keys")
130+
parser.add_argument("--primary_keys", nargs="+", required=True, help="List of primary keys")
131+
parser.add_argument("--ttl_min", type=int, help="TTL in minutes")
132+
parser.add_argument("--ttl_key", help="TTL key column name")
133+
134+
return parser.parse_args()
135+
136+
def main():
137+
args = parse_args()
138+
139+
if "CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS" not in os.environ:
140+
print("Error: Env variable CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS is missing, skipping")
141+
return 1
142+
else:
143+
os.environ["YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"] = os.environ[
144+
"CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"
145+
]
146+
147+
table_path = args.table_path
148+
batch_size = 50000
149+
150+
# Read SQL query from file
151+
sql_query_path = os.path.join(repo_path, args.query_path)
152+
print(f'Query found: {sql_query_path}')
153+
with open(sql_query_path, 'r') as file:
154+
sql_query = file.read()
155+
156+
with ydb.Driver(
157+
endpoint=DATABASE_ENDPOINT,
158+
database=DATABASE_PATH,
159+
credentials=ydb.credentials_from_env_variables()
160+
) as driver:
161+
driver.wait(timeout=10, fail_fast=True)
162+
with ydb.SessionPool(driver) as pool:
163+
# Run query to get sample data and column types
164+
results, column_types = get_data_from_query_with_metadata(driver, sql_query)
165+
if not results:
166+
print("No data to create table from.")
167+
return
168+
169+
# Create table if not exists based on sample column types
170+
pool.retry_operation_sync(
171+
lambda session: create_table_if_not_exists(
172+
session, f'{DATABASE_PATH}/{table_path}', column_types, args.store_type,
173+
args.partition_keys, args.primary_keys, args.ttl_min, args.ttl_key
174+
)
175+
)
176+
177+
print(f'Preparing to upsert: {len(results)} rows')
178+
for start in range(0, len(results), batch_size):
179+
batch_rows = results[start:start + batch_size]
180+
print(f'Upserting: {start}-{start + len(batch_rows)}/{len(results)} rows')
181+
bulk_upsert(driver.table_client, f'{DATABASE_PATH}/{table_path}', batch_rows, column_types, args.store_type)
182+
print('Data uploaded')
183+
184+
185+
if __name__ == "__main__":
186+
main()
187+

0 commit comments

Comments
 (0)