Skip to content

Commit 4f7d4b1

Browse files
emar-kartswast
authored andcommitted
refactor(bigquery): update code samples to use strings for table and dataset IDs (#9974)
* load_and_query_partitioned_table * remove client_query_legacy_sql from snippets * client_query_w_named_params * client_query_w_positional_params * client_query_w_timestamp_params * client_query_w_array_params * client_query_w_struct_params * query_no_cache * query_external_gcs_temporary_table * unify test_update_table_require_partition_filter * Update test_copy_table_multiple_source.py * Update client_query_add_column.py * Update client_query_relax_column.py * flake8 correction * fix queries.rst file * import reformat + comma deletion
1 parent fe9deb1 commit 4f7d4b1

24 files changed

+610
-365
lines changed

bigquery/docs/snippets.py

Lines changed: 0 additions & 346 deletions
Original file line numberDiff line numberDiff line change
@@ -228,78 +228,6 @@ def test_create_partitioned_table(client, to_delete):
228228
assert table.time_partitioning.expiration_ms == 7776000000
229229

230230

231-
def test_load_and_query_partitioned_table(client, to_delete):
232-
dataset_id = "load_partitioned_table_dataset_{}".format(_millis())
233-
dataset = bigquery.Dataset(client.dataset(dataset_id))
234-
client.create_dataset(dataset)
235-
to_delete.append(dataset)
236-
237-
# [START bigquery_load_table_partitioned]
238-
# from google.cloud import bigquery
239-
# client = bigquery.Client()
240-
# dataset_id = 'my_dataset'
241-
table_id = "us_states_by_date"
242-
243-
dataset_ref = client.dataset(dataset_id)
244-
job_config = bigquery.LoadJobConfig()
245-
job_config.schema = [
246-
bigquery.SchemaField("name", "STRING"),
247-
bigquery.SchemaField("post_abbr", "STRING"),
248-
bigquery.SchemaField("date", "DATE"),
249-
]
250-
job_config.skip_leading_rows = 1
251-
job_config.time_partitioning = bigquery.TimePartitioning(
252-
type_=bigquery.TimePartitioningType.DAY,
253-
field="date", # name of column to use for partitioning
254-
expiration_ms=7776000000,
255-
) # 90 days
256-
uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv"
257-
258-
load_job = client.load_table_from_uri(
259-
uri, dataset_ref.table(table_id), job_config=job_config
260-
) # API request
261-
262-
assert load_job.job_type == "load"
263-
264-
load_job.result() # Waits for table load to complete.
265-
266-
table = client.get_table(dataset_ref.table(table_id))
267-
print("Loaded {} rows to table {}".format(table.num_rows, table_id))
268-
# [END bigquery_load_table_partitioned]
269-
assert table.num_rows == 50
270-
271-
project_id = client.project
272-
273-
# [START bigquery_query_partitioned_table]
274-
import datetime
275-
276-
# from google.cloud import bigquery
277-
# client = bigquery.Client()
278-
# project_id = 'my-project'
279-
# dataset_id = 'my_dataset'
280-
table_id = "us_states_by_date"
281-
282-
sql_template = """
283-
SELECT *
284-
FROM `{}.{}.{}`
285-
WHERE date BETWEEN @start_date AND @end_date
286-
"""
287-
sql = sql_template.format(project_id, dataset_id, table_id)
288-
job_config = bigquery.QueryJobConfig()
289-
job_config.query_parameters = [
290-
bigquery.ScalarQueryParameter("start_date", "DATE", datetime.date(1800, 1, 1)),
291-
bigquery.ScalarQueryParameter("end_date", "DATE", datetime.date(1899, 12, 31)),
292-
]
293-
294-
# API request
295-
query_job = client.query(sql, job_config=job_config)
296-
297-
rows = list(query_job)
298-
print("{} states were admitted to the US in the 1800s".format(len(rows)))
299-
# [END bigquery_query_partitioned_table]
300-
assert len(rows) == 29
301-
302-
303231
@pytest.mark.skip(
304232
reason=(
305233
"update_table() is flaky "
@@ -1327,35 +1255,6 @@ def test_extract_table_compressed(client, to_delete):
13271255
to_delete.insert(0, blob)
13281256

13291257

1330-
def test_client_query_legacy_sql(client):
1331-
"""Run a query with Legacy SQL explicitly set"""
1332-
# [START bigquery_query_legacy]
1333-
# from google.cloud import bigquery
1334-
# client = bigquery.Client()
1335-
1336-
query = (
1337-
"SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] "
1338-
'WHERE state = "TX" '
1339-
"LIMIT 100"
1340-
)
1341-
1342-
# Set use_legacy_sql to True to use legacy SQL syntax.
1343-
job_config = bigquery.QueryJobConfig()
1344-
job_config.use_legacy_sql = True
1345-
1346-
query_job = client.query(
1347-
query,
1348-
# Location must match that of the dataset(s) referenced in the query.
1349-
location="US",
1350-
job_config=job_config,
1351-
) # API request - starts the query
1352-
1353-
# Print the results.
1354-
for row in query_job: # API request - fetches results
1355-
print(row)
1356-
# [END bigquery_query_legacy]
1357-
1358-
13591258
def test_client_query_total_rows(client, capsys):
13601259
"""Run a query and just check for how many rows."""
13611260
# [START bigquery_query_total_rows]
@@ -1420,251 +1319,6 @@ def test_manage_job(client):
14201319
# [END bigquery_get_job]
14211320

14221321

1423-
def test_client_query_w_named_params(client, capsys):
1424-
"""Run a query using named query parameters"""
1425-
1426-
# [START bigquery_query_params_named]
1427-
# from google.cloud import bigquery
1428-
# client = bigquery.Client()
1429-
1430-
query = """
1431-
SELECT word, word_count
1432-
FROM `bigquery-public-data.samples.shakespeare`
1433-
WHERE corpus = @corpus
1434-
AND word_count >= @min_word_count
1435-
ORDER BY word_count DESC;
1436-
"""
1437-
query_params = [
1438-
bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
1439-
bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
1440-
]
1441-
job_config = bigquery.QueryJobConfig()
1442-
job_config.query_parameters = query_params
1443-
query_job = client.query(
1444-
query,
1445-
# Location must match that of the dataset(s) referenced in the query.
1446-
location="US",
1447-
job_config=job_config,
1448-
) # API request - starts the query
1449-
1450-
# Print the results
1451-
for row in query_job:
1452-
print("{}: \t{}".format(row.word, row.word_count))
1453-
1454-
assert query_job.state == "DONE"
1455-
# [END bigquery_query_params_named]
1456-
1457-
out, _ = capsys.readouterr()
1458-
assert "the" in out
1459-
1460-
1461-
def test_client_query_w_positional_params(client, capsys):
1462-
"""Run a query using query parameters"""
1463-
1464-
# [START bigquery_query_params_positional]
1465-
# from google.cloud import bigquery
1466-
# client = bigquery.Client()
1467-
1468-
query = """
1469-
SELECT word, word_count
1470-
FROM `bigquery-public-data.samples.shakespeare`
1471-
WHERE corpus = ?
1472-
AND word_count >= ?
1473-
ORDER BY word_count DESC;
1474-
"""
1475-
# Set the name to None to use positional parameters.
1476-
# Note that you cannot mix named and positional parameters.
1477-
query_params = [
1478-
bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"),
1479-
bigquery.ScalarQueryParameter(None, "INT64", 250),
1480-
]
1481-
job_config = bigquery.QueryJobConfig()
1482-
job_config.query_parameters = query_params
1483-
query_job = client.query(
1484-
query,
1485-
# Location must match that of the dataset(s) referenced in the query.
1486-
location="US",
1487-
job_config=job_config,
1488-
) # API request - starts the query
1489-
1490-
# Print the results
1491-
for row in query_job:
1492-
print("{}: \t{}".format(row.word, row.word_count))
1493-
1494-
assert query_job.state == "DONE"
1495-
# [END bigquery_query_params_positional]
1496-
1497-
out, _ = capsys.readouterr()
1498-
assert "the" in out
1499-
1500-
1501-
def test_client_query_w_timestamp_params(client, capsys):
1502-
"""Run a query using query parameters"""
1503-
1504-
# [START bigquery_query_params_timestamps]
1505-
# from google.cloud import bigquery
1506-
# client = bigquery.Client()
1507-
1508-
import datetime
1509-
import pytz
1510-
1511-
query = "SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);"
1512-
query_params = [
1513-
bigquery.ScalarQueryParameter(
1514-
"ts_value",
1515-
"TIMESTAMP",
1516-
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC),
1517-
)
1518-
]
1519-
job_config = bigquery.QueryJobConfig()
1520-
job_config.query_parameters = query_params
1521-
query_job = client.query(
1522-
query,
1523-
# Location must match that of the dataset(s) referenced in the query.
1524-
location="US",
1525-
job_config=job_config,
1526-
) # API request - starts the query
1527-
1528-
# Print the results
1529-
for row in query_job:
1530-
print(row)
1531-
1532-
assert query_job.state == "DONE"
1533-
# [END bigquery_query_params_timestamps]
1534-
1535-
out, _ = capsys.readouterr()
1536-
assert "2016, 12, 7, 9, 0" in out
1537-
1538-
1539-
def test_client_query_w_array_params(client, capsys):
1540-
"""Run a query using array query parameters"""
1541-
# [START bigquery_query_params_arrays]
1542-
# from google.cloud import bigquery
1543-
# client = bigquery.Client()
1544-
1545-
query = """
1546-
SELECT name, sum(number) as count
1547-
FROM `bigquery-public-data.usa_names.usa_1910_2013`
1548-
WHERE gender = @gender
1549-
AND state IN UNNEST(@states)
1550-
GROUP BY name
1551-
ORDER BY count DESC
1552-
LIMIT 10;
1553-
"""
1554-
query_params = [
1555-
bigquery.ScalarQueryParameter("gender", "STRING", "M"),
1556-
bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]),
1557-
]
1558-
job_config = bigquery.QueryJobConfig()
1559-
job_config.query_parameters = query_params
1560-
query_job = client.query(
1561-
query,
1562-
# Location must match that of the dataset(s) referenced in the query.
1563-
location="US",
1564-
job_config=job_config,
1565-
) # API request - starts the query
1566-
1567-
# Print the results
1568-
for row in query_job:
1569-
print("{}: \t{}".format(row.name, row.count))
1570-
1571-
assert query_job.state == "DONE"
1572-
# [END bigquery_query_params_arrays]
1573-
1574-
out, _ = capsys.readouterr()
1575-
assert "James" in out
1576-
1577-
1578-
def test_client_query_w_struct_params(client, capsys):
1579-
"""Run a query using struct query parameters"""
1580-
# [START bigquery_query_params_structs]
1581-
# from google.cloud import bigquery
1582-
# client = bigquery.Client()
1583-
1584-
query = "SELECT @struct_value AS s;"
1585-
query_params = [
1586-
bigquery.StructQueryParameter(
1587-
"struct_value",
1588-
bigquery.ScalarQueryParameter("x", "INT64", 1),
1589-
bigquery.ScalarQueryParameter("y", "STRING", "foo"),
1590-
)
1591-
]
1592-
job_config = bigquery.QueryJobConfig()
1593-
job_config.query_parameters = query_params
1594-
query_job = client.query(
1595-
query,
1596-
# Location must match that of the dataset(s) referenced in the query.
1597-
location="US",
1598-
job_config=job_config,
1599-
) # API request - starts the query
1600-
1601-
# Print the results
1602-
for row in query_job:
1603-
print(row.s)
1604-
1605-
assert query_job.state == "DONE"
1606-
# [END bigquery_query_params_structs]
1607-
1608-
out, _ = capsys.readouterr()
1609-
assert "1" in out
1610-
assert "foo" in out
1611-
1612-
1613-
def test_query_no_cache(client):
1614-
# [START bigquery_query_no_cache]
1615-
# from google.cloud import bigquery
1616-
# client = bigquery.Client()
1617-
1618-
job_config = bigquery.QueryJobConfig()
1619-
job_config.use_query_cache = False
1620-
sql = """
1621-
SELECT corpus
1622-
FROM `bigquery-public-data.samples.shakespeare`
1623-
GROUP BY corpus;
1624-
"""
1625-
query_job = client.query(
1626-
sql,
1627-
# Location must match that of the dataset(s) referenced in the query.
1628-
location="US",
1629-
job_config=job_config,
1630-
) # API request
1631-
1632-
# Print the results.
1633-
for row in query_job: # API request - fetches results
1634-
print(row)
1635-
# [END bigquery_query_no_cache]
1636-
1637-
1638-
def test_query_external_gcs_temporary_table(client):
1639-
# [START bigquery_query_external_gcs_temp]
1640-
# from google.cloud import bigquery
1641-
# client = bigquery.Client()
1642-
1643-
# Configure the external data source and query job
1644-
external_config = bigquery.ExternalConfig("CSV")
1645-
external_config.source_uris = [
1646-
"gs://cloud-samples-data/bigquery/us-states/us-states.csv"
1647-
]
1648-
external_config.schema = [
1649-
bigquery.SchemaField("name", "STRING"),
1650-
bigquery.SchemaField("post_abbr", "STRING"),
1651-
]
1652-
external_config.options.skip_leading_rows = 1 # optionally skip header row
1653-
table_id = "us_states"
1654-
job_config = bigquery.QueryJobConfig()
1655-
job_config.table_definitions = {table_id: external_config}
1656-
1657-
# Example query to find states starting with 'W'
1658-
sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id)
1659-
1660-
query_job = client.query(sql, job_config=job_config) # API request
1661-
1662-
w_states = list(query_job) # Waits for query to finish
1663-
print("There are {} states with names starting with W.".format(len(w_states)))
1664-
# [END bigquery_query_external_gcs_temp]
1665-
assert len(w_states) == 4
1666-
1667-
16681322
def test_query_external_gcs_permanent_table(client, to_delete):
16691323
dataset_id = "query_external_gcs_{}".format(_millis())
16701324
dataset = bigquery.Dataset(client.dataset(dataset_id))

bigquery/docs/usage/queries.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Run a query using a named query parameter
4343
See BigQuery documentation for more information on
4444
`parameterized queries <https://cloud.google.com/bigquery/docs/parameterized-queries>`_.
4545

46-
.. literalinclude:: ../snippets.py
46+
.. literalinclude:: ../samples/client_query_w_named_params.py
4747
:language: python
4848
:dedent: 4
4949
:start-after: [START bigquery_query_params_named]

0 commit comments

Comments
 (0)