14
14
15
15
import io
16
16
import os
17
+ import uuid
17
18
18
19
# [START bqml_ncaa_tutorial_import_and_client]
19
20
from google .cloud import bigquery
22
23
23
24
# [START bqml_ncaa_tutorial_import_and_client]
24
25
client = bigquery .Client ()
26
+ # We use a unique dataset ID for this example to avoid collisions with
27
+ # other invocations of this tutorial. In practice, you could leverage
28
+ # a persistent dataset and not create/destroy it with each invocation.
29
+ dataset_id = "bqml_tutorial_{}" .format (str (uuid .uuid4 ().hex ))
25
30
# [END bqml_ncaa_tutorial_import_and_client]
26
31
27
32
28
33
@pytest .fixture
29
34
def delete_dataset ():
30
35
yield
31
36
client .delete_dataset (
32
- client .dataset ('bqml_tutorial' ), delete_contents = True )
37
+ client .dataset (dataset_id ), delete_contents = True )
33
38
34
39
35
40
def test_ncaa_tutorial (delete_dataset ):
36
41
# [START bqml_ncaa_tutorial_create_dataset]
37
- dataset = bigquery .Dataset (client .dataset ('bqml_tutorial' ))
42
+ dataset = bigquery .Dataset (client .dataset (dataset_id ))
38
43
dataset .location = 'US'
39
44
client .create_dataset (dataset )
40
45
# [END bqml_ncaa_tutorial_create_dataset]
41
46
42
47
# Create the tables used by the tutorial
43
48
# Note: the queries are saved to a file. This should be updated to use the
44
49
# saved queries once the library supports running saved queries.
45
- query_filepath_to_table_name = {
46
- 'feature_input_query.sql' : 'cume_games' ,
47
- 'training_data_query.sql' : 'wide_games'
48
- }
50
+ query_files = ['feature_input_query.sql' , 'training_data_query.sql' ]
49
51
resources_directory = os .path .join (os .path .dirname (__file__ ), 'resources' )
50
- for query_filepath , table_name in query_filepath_to_table_name .items ():
51
- table_ref = dataset .table (table_name )
52
- job_config = bigquery .QueryJobConfig ()
53
- job_config .destination = table_ref
52
+ for fname in query_files :
54
53
query_filepath = os .path .join (
55
- resources_directory , query_filepath )
56
- sql = io .open (query_filepath , 'r' , encoding = 'utf-8' ).read ()
57
- client .query (sql , job_config = job_config ).result ()
54
+ resources_directory , fname )
55
+ sql = io .open (query_filepath , 'r' , encoding = 'utf-8' ).read (). format ( dataset_id )
56
+ client .query (sql ).result ()
58
57
59
58
# [START bqml_ncaa_tutorial_create_model]
60
59
sql = """
61
- CREATE OR REPLACE MODEL `bqml_tutorial .ncaa_model`
60
+ CREATE OR REPLACE MODEL `{0} .ncaa_model`
62
61
OPTIONS (
63
62
model_type='linear_reg',
64
63
max_iteration=50 ) AS
@@ -69,11 +68,11 @@ def test_ncaa_tutorial(delete_dataset):
69
68
total_three_points_att),
70
69
total_three_points_att as label
71
70
FROM
72
- `bqml_tutorial .wide_games`
71
+ `{0} .wide_games`
73
72
WHERE
74
73
# remove the game to predict
75
74
game_id != 'f1063e80-23c7-486b-9a5e-faa52beb2d83'
76
- """
75
+ """ . format ( dataset_id )
77
76
df = client .query (sql ).to_dataframe ()
78
77
print (df )
79
78
# [END bqml_ncaa_tutorial_create_model]
@@ -83,8 +82,8 @@ def test_ncaa_tutorial(delete_dataset):
83
82
SELECT
84
83
*
85
84
FROM
86
- ML.TRAINING_INFO(MODEL `bqml_tutorial .ncaa_model`)
87
- """
85
+ ML.TRAINING_INFO(MODEL `{} .ncaa_model`)
86
+ """ . format ( dataset_id )
88
87
df = client .query (sql ).to_dataframe ()
89
88
print (df )
90
89
# [END bqml_ncaa_tutorial_get_training_statistics]
@@ -96,13 +95,13 @@ def test_ncaa_tutorial(delete_dataset):
96
95
*,
97
96
total_three_points_att AS label
98
97
FROM
99
- `bqml_tutorial .wide_games` )
98
+ `{0} .wide_games` )
100
99
SELECT
101
100
*
102
101
FROM
103
- ML.EVALUATE(MODEL `bqml_tutorial .ncaa_model`,
102
+ ML.EVALUATE(MODEL `{0} .ncaa_model`,
104
103
TABLE eval_table)
105
- """
104
+ """ . format ( dataset_id )
106
105
df = client .query (sql ).to_dataframe ()
107
106
print (df )
108
107
# [END bqml_ncaa_tutorial_evaluate_model]
@@ -113,7 +112,7 @@ def test_ncaa_tutorial(delete_dataset):
113
112
SELECT
114
113
*
115
114
FROM
116
- `bqml_tutorial .wide_games`
115
+ `{0} .wide_games`
117
116
WHERE
118
117
game_id='f1063e80-23c7-486b-9a5e-faa52beb2d83' )
119
118
SELECT
@@ -125,7 +124,7 @@ def test_ncaa_tutorial(delete_dataset):
125
124
game_id,
126
125
predicted_label AS predicted_total_three_points_att
127
126
FROM
128
- ML.PREDICT(MODEL `bqml_tutorial .ncaa_model`,
127
+ ML.PREDICT(MODEL `{0} .ncaa_model`,
129
128
table game_to_predict) ) AS predict
130
129
JOIN (
131
130
SELECT
@@ -135,7 +134,7 @@ def test_ncaa_tutorial(delete_dataset):
135
134
game_to_predict) AS truth
136
135
ON
137
136
predict.game_id = truth.game_id
138
- """
137
+ """ . format ( dataset_id )
139
138
df = client .query (sql ).to_dataframe ()
140
139
print (df )
141
140
# [END bqml_ncaa_tutorial_predict_outcomes]
0 commit comments