GoogleCloudPlatform · kurtisvg · Feb 15, 2020 · Feb 14, 2020 · Feb 14, 2020 · Feb 15, 2020
diff --git a/bigquery/bqml/data_scientist_tutorial_test.py b/bigquery/bqml/data_scientist_tutorial_test.py
@@ -16,29 +16,34 @@
 from google.cloud import bigquery
 # [END bqml_data_scientist_tutorial_import_and_client]
 import pytest
+import uuid
 
 # [START bqml_data_scientist_tutorial_import_and_client]
 client = bigquery.Client()
+# We use a unique dataset ID for this example to avoid collisions with
+# other invocations of this tutorial.  In practice, you could leverage
+# a persistent dataset and not create/destroy it with each invocation.
+dataset_id = "bqml_tutorial_{}".format(str(uuid.uuid4().hex))
 # [END bqml_data_scientist_tutorial_import_and_client]
 
 
 @pytest.fixture
 def delete_dataset():
     yield
     client.delete_dataset(
-        client.dataset('bqml_tutorial'), delete_contents=True)
+        client.dataset(dataset_id), delete_contents=True)
 
 
 def test_data_scientist_tutorial(delete_dataset):
     # [START bqml_data_scientist_tutorial_create_dataset]
-    dataset = bigquery.Dataset(client.dataset('bqml_tutorial'))
+    dataset = bigquery.Dataset(client.dataset(dataset_id))
     dataset.location = 'US'
     client.create_dataset(dataset)
     # [END bqml_data_scientist_tutorial_create_dataset]
 
     # [START bqml_data_scientist_tutorial_create_model]
     sql = """
-        CREATE OR REPLACE MODEL `bqml_tutorial.sample_model`
+        CREATE OR REPLACE MODEL `{}.sample_model`
         OPTIONS(model_type='logistic_reg') AS
         SELECT
             IF(totals.transactions IS NULL, 0, 1) AS label,
@@ -50,7 +55,7 @@ def test_data_scientist_tutorial(delete_dataset):
             `bigquery-public-data.google_analytics_sample.ga_sessions_*`
         WHERE
             _TABLE_SUFFIX BETWEEN '20160801' AND '20170630'
-    """
+    """.format(dataset_id)
     df = client.query(sql).to_dataframe()
     print(df)
     # [END bqml_data_scientist_tutorial_create_model]
@@ -60,8 +65,8 @@ def test_data_scientist_tutorial(delete_dataset):
         SELECT
         *
         FROM
-        ML.TRAINING_INFO(MODEL `bqml_tutorial.sample_model`)
-    """
+        ML.TRAINING_INFO(MODEL `{}.sample_model`)
+    """.format(dataset_id)
     df = client.query(sql).to_dataframe()
     print(df)
     # [END bqml_data_scientist_tutorial_get_training_statistics]
@@ -70,7 +75,7 @@ def test_data_scientist_tutorial(delete_dataset):
     sql = """
         SELECT
             *
-        FROM ML.EVALUATE(MODEL `bqml_tutorial.sample_model`, (
+        FROM ML.EVALUATE(MODEL `{}.sample_model`, (
             SELECT
                 IF(totals.transactions IS NULL, 0, 1) AS label,
                 IFNULL(device.operatingSystem, "") AS os,
@@ -81,7 +86,7 @@ def test_data_scientist_tutorial(delete_dataset):
                 `bigquery-public-data.google_analytics_sample.ga_sessions_*`
             WHERE
                 _TABLE_SUFFIX BETWEEN '20170701' AND '20170801'))
-    """
+    """.format(dataset_id)
     df = client.query(sql).to_dataframe()
     print(df)
     # [END bqml_data_scientist_tutorial_evaluate_model]
@@ -91,7 +96,7 @@ def test_data_scientist_tutorial(delete_dataset):
         SELECT
             country,
             SUM(predicted_label) as total_predicted_purchases
-        FROM ML.PREDICT(MODEL `bqml_tutorial.sample_model`, (
+        FROM ML.PREDICT(MODEL `{}.sample_model`, (
             SELECT
                 IFNULL(device.operatingSystem, "") AS os,
                 device.isMobile AS is_mobile,
@@ -104,7 +109,7 @@ def test_data_scientist_tutorial(delete_dataset):
             GROUP BY country
             ORDER BY total_predicted_purchases DESC
             LIMIT 10
-    """
+    """.format(dataset_id)
     df = client.query(sql).to_dataframe()
     print(df)
     # [END bqml_data_scientist_tutorial_predict_transactions]
@@ -114,7 +119,7 @@ def test_data_scientist_tutorial(delete_dataset):
         SELECT
             fullVisitorId,
             SUM(predicted_label) as total_predicted_purchases
-        FROM ML.PREDICT(MODEL `bqml_tutorial.sample_model`, (
+        FROM ML.PREDICT(MODEL `{}.sample_model`, (
             SELECT
                 IFNULL(device.operatingSystem, "") AS os,
                 device.isMobile AS is_mobile,
@@ -128,7 +133,7 @@ def test_data_scientist_tutorial(delete_dataset):
             GROUP BY fullVisitorId
             ORDER BY total_predicted_purchases DESC
             LIMIT 10
-    """
+    """.format(dataset_id)
     df = client.query(sql).to_dataframe()
     print(df)
     # [END bqml_data_scientist_tutorial_predict_purchases]
diff --git a/bigquery/bqml/ncaa_tutorial_test.py b/bigquery/bqml/ncaa_tutorial_test.py
@@ -14,6 +14,7 @@
 
 import io
 import os
+import uuid
 
 # [START bqml_ncaa_tutorial_import_and_client]
 from google.cloud import bigquery
@@ -22,43 +23,41 @@
 
 # [START bqml_ncaa_tutorial_import_and_client]
 client = bigquery.Client()
+# We use a unique dataset ID for this example to avoid collisions with
+# other invocations of this tutorial.  In practice, you could leverage
+# a persistent dataset and not create/destroy it with each invocation.
+dataset_id = "bqml_tutorial_{}".format(str(uuid.uuid4().hex))
 # [END bqml_ncaa_tutorial_import_and_client]
 
 
 @pytest.fixture
 def delete_dataset():
     yield
     client.delete_dataset(
-        client.dataset('bqml_tutorial'), delete_contents=True)
+        client.dataset(dataset_id), delete_contents=True)
 
 
 def test_ncaa_tutorial(delete_dataset):
     # [START bqml_ncaa_tutorial_create_dataset]
-    dataset = bigquery.Dataset(client.dataset('bqml_tutorial'))
+    dataset = bigquery.Dataset(client.dataset(dataset_id))
     dataset.location = 'US'
     client.create_dataset(dataset)
     # [END bqml_ncaa_tutorial_create_dataset]
 
     # Create the tables used by the tutorial
     # Note: the queries are saved to a file. This should be updated to use the
     # saved queries once the library supports running saved queries.
-    query_filepath_to_table_name = {
-        'feature_input_query.sql': 'cume_games',
-        'training_data_query.sql': 'wide_games'
-    }
+    query_files = ['feature_input_query.sql', 'training_data_query.sql']
     resources_directory = os.path.join(os.path.dirname(__file__), 'resources')
-    for query_filepath, table_name in query_filepath_to_table_name.items():
-        table_ref = dataset.table(table_name)
-        job_config = bigquery.QueryJobConfig()
-        job_config.destination = table_ref
+    for fname in query_files:
         query_filepath = os.path.join(
-            resources_directory, query_filepath)
-        sql = io.open(query_filepath, 'r', encoding='utf-8').read()
-        client.query(sql, job_config=job_config).result()
+            resources_directory, fname)
+        sql = io.open(query_filepath, 'r', encoding='utf-8').read().format(dataset_id)
+        client.query(sql).result()
 
     # [START bqml_ncaa_tutorial_create_model]
     sql = """
-        CREATE OR REPLACE MODEL `bqml_tutorial.ncaa_model`
+        CREATE OR REPLACE MODEL `{0}.ncaa_model`
         OPTIONS (
             model_type='linear_reg',
             max_iteration=50 ) AS
@@ -69,11 +68,11 @@ def test_ncaa_tutorial(delete_dataset):
                 total_three_points_att),
             total_three_points_att as label
         FROM
-            `bqml_tutorial.wide_games`
+            `{0}.wide_games`
         WHERE
             # remove the game to predict
             game_id != 'f1063e80-23c7-486b-9a5e-faa52beb2d83'
-    """
+    """.format(dataset_id)
     df = client.query(sql).to_dataframe()
     print(df)
     # [END bqml_ncaa_tutorial_create_model]
@@ -83,8 +82,8 @@ def test_ncaa_tutorial(delete_dataset):
         SELECT
             *
         FROM
-            ML.TRAINING_INFO(MODEL `bqml_tutorial.ncaa_model`)
-    """
+            ML.TRAINING_INFO(MODEL `{}.ncaa_model`)
+    """.format(dataset_id)
     df = client.query(sql).to_dataframe()
     print(df)
     # [END bqml_ncaa_tutorial_get_training_statistics]
@@ -96,13 +95,13 @@ def test_ncaa_tutorial(delete_dataset):
                 *,
                 total_three_points_att AS label
             FROM
-                `bqml_tutorial.wide_games` )
+                `{0}.wide_games` )
         SELECT
             *
         FROM
-            ML.EVALUATE(MODEL `bqml_tutorial.ncaa_model`,
+            ML.EVALUATE(MODEL `{0}.ncaa_model`,
                 TABLE eval_table)
-    """
+    """.format(dataset_id)
     df = client.query(sql).to_dataframe()
     print(df)
     # [END bqml_ncaa_tutorial_evaluate_model]
@@ -113,7 +112,7 @@ def test_ncaa_tutorial(delete_dataset):
             SELECT
                 *
             FROM
-                `bqml_tutorial.wide_games`
+                `{0}.wide_games`
             WHERE
                 game_id='f1063e80-23c7-486b-9a5e-faa52beb2d83' )
         SELECT
@@ -125,7 +124,7 @@ def test_ncaa_tutorial(delete_dataset):
                 game_id,
                 predicted_label AS predicted_total_three_points_att
             FROM
-                ML.PREDICT(MODEL `bqml_tutorial.ncaa_model`,
+                ML.PREDICT(MODEL `{0}.ncaa_model`,
                 table game_to_predict) ) AS predict
         JOIN (
             SELECT
@@ -135,7 +134,7 @@ def test_ncaa_tutorial(delete_dataset):
                 game_to_predict) AS truth
         ON
             predict.game_id = truth.game_id
-    """
+    """.format(dataset_id)
     df = client.query(sql).to_dataframe()
     print(df)
     # [END bqml_ncaa_tutorial_predict_outcomes]
diff --git a/bigquery/bqml/requirements.txt b/bigquery/bqml/requirements.txt
@@ -1,3 +1,4 @@
-google-cloud-bigquery[pandas]==1.20.0
+pandas==0.22
+google-cloud-bigquery>=1.24.0
 flaky==3.6.1
 mock==3.0.5
diff --git a/bigquery/bqml/resources/feature_input_query.sql b/bigquery/bqml/resources/feature_input_query.sql
@@ -1,4 +1,8 @@
-#standardSQL
+# This query creates a sample table using
+# the ncaa_basketball public dataset.  It
+# uses a format string token for setting
+# the destination dataset.
+CREATE OR REPLACE TABLE `{0}.cume_games` AS
 SELECT
   game_id,
   season,

diff --git a/bigquery/bqml/resources/training_data_query.sql b/bigquery/bqml/resources/training_data_query.sql
@@ -1,4 +1,5 @@
 #standardSQL
+CREATE OR REPLACE TABLE `{0}.wide_games` AS
 SELECT
   team.game_id AS game_id,
   team.season AS season,
@@ -768,9 +769,9 @@ SELECT
   opponent.opp_possessions_std_last_5 AS	opponent_opp_possessions_std_last_5,
   opponent.opp_possessions_std_last_10 AS	opponent_opp_possessions_std_last_10
 FROM
-  `bqml_tutorial.cume_games` AS team
+  `{0}.cume_games` AS team
 JOIN
-  `bqml_tutorial.cume_games` AS opponent
+  `{0}.cume_games` AS opponent
 ON
   team.game_id = opponent.game_id AND team.team_id != opponent.team_id
 WHERE