Skip to content

Commit ff2ec3a

Browse files
authored
docs: add sample to run DML query (#591)
* docs: add sample to run DML query * cleanup leftover datasets before test run * fix import order
1 parent 8089bdb commit ff2ec3a

File tree

4 files changed

+168
-0
lines changed

4 files changed

+168
-0
lines changed

samples/snippets/conftest.py

+40
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,35 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import datetime
16+
import random
17+
1518
from google.cloud import bigquery
1619
import pytest
1720

1821

22+
RESOURCE_PREFIX = "python_bigquery_samples_snippets"
23+
24+
25+
def resource_prefix() -> str:
26+
timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S")
27+
random_string = hex(random.randrange(1000000))[2:]
28+
return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}"
29+
30+
31+
@pytest.fixture(scope="session", autouse=True)
32+
def cleanup_datasets(bigquery_client: bigquery.Client):
33+
yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1)
34+
for dataset in bigquery_client.list_datasets():
35+
if (
36+
dataset.dataset_id.startswith(RESOURCE_PREFIX)
37+
and dataset.created < yesterday
38+
):
39+
bigquery_client.delete_dataset(
40+
dataset, delete_contents=True, not_found_ok=True
41+
)
42+
43+
1944
@pytest.fixture(scope="session")
2045
def bigquery_client():
2146
bigquery_client = bigquery.Client()
@@ -25,3 +50,18 @@ def bigquery_client():
2550
@pytest.fixture(scope="session")
2651
def project_id(bigquery_client):
2752
return bigquery_client.project
53+
54+
55+
@pytest.fixture(scope="session")
56+
def dataset_id(bigquery_client: bigquery.Client, project_id: str):
57+
dataset_id = resource_prefix()
58+
full_dataset_id = f"{project_id}.{dataset_id}"
59+
dataset = bigquery.Dataset(full_dataset_id)
60+
bigquery_client.create_dataset(dataset)
61+
yield dataset_id
62+
bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)
63+
64+
65+
@pytest.fixture
66+
def bigquery_client_patch(monkeypatch, bigquery_client):
67+
monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client)
+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from google.cloud import bigquery
16+
import pytest
17+
18+
from conftest import resource_prefix
19+
import update_with_dml
20+
21+
22+
@pytest.fixture
23+
def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
24+
table_id = f"{resource_prefix()}_update_with_dml"
25+
yield table_id
26+
full_table_id = f"{project_id}.{dataset_id}.{table_id}"
27+
bigquery_client.delete_table(full_table_id, not_found_ok=True)
28+
29+
30+
def test_update_with_dml(bigquery_client_patch, dataset_id, table_id):
31+
override_values = {
32+
"dataset_id": dataset_id,
33+
"table_id": table_id,
34+
}
35+
num_rows = update_with_dml.run_sample(override_values=override_values)
36+
assert num_rows > 0

samples/snippets/update_with_dml.py

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START bigquery_update_with_dml]
16+
import pathlib
17+
18+
from google.cloud import bigquery
19+
from google.cloud.bigquery import enums
20+
21+
22+
def load_from_newline_delimited_json(
23+
client: bigquery.Client,
24+
filepath: pathlib.Path,
25+
project_id: str,
26+
dataset_id: str,
27+
table_id: str,
28+
):
29+
full_table_id = f"{project_id}.{dataset_id}.{table_id}"
30+
job_config = bigquery.LoadJobConfig()
31+
job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
32+
job_config.schema = [
33+
bigquery.SchemaField("id", enums.SqlTypeNames.STRING),
34+
bigquery.SchemaField("user_id", enums.SqlTypeNames.INTEGER),
35+
bigquery.SchemaField("login_time", enums.SqlTypeNames.TIMESTAMP),
36+
bigquery.SchemaField("logout_time", enums.SqlTypeNames.TIMESTAMP),
37+
bigquery.SchemaField("ip_address", enums.SqlTypeNames.STRING),
38+
]
39+
40+
with open(filepath, "rb") as json_file:
41+
load_job = client.load_table_from_file(
42+
json_file, full_table_id, job_config=job_config
43+
)
44+
45+
# Wait for load job to finish.
46+
load_job.result()
47+
48+
49+
def update_with_dml(
50+
client: bigquery.Client, project_id: str, dataset_id: str, table_id: str
51+
):
52+
query_text = f"""
53+
UPDATE `{project_id}.{dataset_id}.{table_id}`
54+
SET ip_address = REGEXP_REPLACE(ip_address, r"(\\.[0-9]+)$", ".0")
55+
WHERE TRUE
56+
"""
57+
query_job = client.query(query_text)
58+
59+
# Wait for query job to finish.
60+
query_job.result()
61+
62+
print(f"DML query modified {query_job.num_dml_affected_rows} rows.")
63+
return query_job.num_dml_affected_rows
64+
65+
66+
def run_sample(override_values={}):
67+
client = bigquery.Client()
68+
filepath = pathlib.Path(__file__).parent / "user_sessions_data.json"
69+
project_id = client.project
70+
dataset_id = "sample_db"
71+
table_id = "UserSessions"
72+
# [END bigquery_update_with_dml]
73+
# To facilitate testing, we replace values with alternatives
74+
# provided by the testing harness.
75+
dataset_id = override_values.get("dataset_id", dataset_id)
76+
table_id = override_values.get("table_id", table_id)
77+
# [START bigquery_update_with_dml]
78+
load_from_newline_delimited_json(client, filepath, project_id, dataset_id, table_id)
79+
return update_with_dml(client, project_id, dataset_id, table_id)
80+
81+
82+
# [END bigquery_update_with_dml]
+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"}
2+
{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"}
3+
{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"}
4+
{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"}
5+
{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"}
6+
{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"}
7+
{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"}
8+
{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"}
9+
{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"}
10+
{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"}

0 commit comments

Comments
 (0)