From 1645dea60c0a661ff30a6a2dd0ffa390fdf1c4d9 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Fri, 26 Feb 2021 12:15:07 -0800 Subject: [PATCH 1/3] workaround for pyspark connector breaking change --- data-science-onramp/data-cleaning/clean_test.py | 4 +++- data-science-onramp/data-ingestion/setup_test.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/data-science-onramp/data-cleaning/clean_test.py b/data-science-onramp/data-cleaning/clean_test.py index 75f9a394db5..96499cb2e41 100644 --- a/data-science-onramp/data-cleaning/clean_test.py +++ b/data-science-onramp/data-cleaning/clean_test.py @@ -69,7 +69,9 @@ "pyspark_job": { "main_python_file_uri": f"gs://{BUCKET_NAME}/{BUCKET_BLOB}", "args": [BUCKET_NAME, BQ_TABLE, "--dry-run"], - "jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"], + # Temporarily pin jar version due to breaking release + #"jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"], + "jar_file_uris":["gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.18.1.jar"], }, } diff --git a/data-science-onramp/data-ingestion/setup_test.py b/data-science-onramp/data-ingestion/setup_test.py index 2a826b96749..ddd82158061 100644 --- a/data-science-onramp/data-ingestion/setup_test.py +++ b/data-science-onramp/data-ingestion/setup_test.py @@ -54,7 +54,9 @@ "pyspark_job": { "main_python_file_uri": f"gs://{BUCKET_NAME}/{BUCKET_BLOB}", "args": [BUCKET_NAME, BQ_DATASET, "--test"], - "jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"], + # Temporarily pin jar version due to breaking release + # "jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"], + "jar_file_uris":["gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.18.1.jar"], }, } From b094bf8382ea27b6adad8bc3cfda5f2e75ab8d21 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Fri, 26 Feb 2021 12:17:42 -0800 Subject: [PATCH 2/3] add missing license header --- data-science-onramp/data-ingestion/setup_test.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/data-science-onramp/data-ingestion/setup_test.py b/data-science-onramp/data-ingestion/setup_test.py index ddd82158061..885dd585846 100644 --- a/data-science-onramp/data-ingestion/setup_test.py +++ b/data-science-onramp/data-ingestion/setup_test.py @@ -1,3 +1,16 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Test file for the setup job in the Data Science Onramp sample application Creates a test Dataproc cluster and runs the job with a --test flag. The job uploads a subset of the data to BigQuery. Then, data is pulled from BigQuery and checks are made to see if the data is dirty. From 7bc3252f947d00b4017df1d19c0f775805074996 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Fri, 26 Feb 2021 12:22:55 -0800 Subject: [PATCH 3/3] fix lint --- data-science-onramp/data-cleaning/clean_test.py | 4 ++-- data-science-onramp/data-ingestion/setup_test.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/data-science-onramp/data-cleaning/clean_test.py b/data-science-onramp/data-cleaning/clean_test.py index 96499cb2e41..61c0a59912f 100644 --- a/data-science-onramp/data-cleaning/clean_test.py +++ b/data-science-onramp/data-cleaning/clean_test.py @@ -70,8 +70,8 @@ "main_python_file_uri": f"gs://{BUCKET_NAME}/{BUCKET_BLOB}", "args": [BUCKET_NAME, BQ_TABLE, "--dry-run"], # Temporarily pin jar version due to breaking release - #"jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"], - "jar_file_uris":["gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.18.1.jar"], + # "jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"], + "jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.18.1.jar"], }, } diff --git a/data-science-onramp/data-ingestion/setup_test.py b/data-science-onramp/data-ingestion/setup_test.py index 885dd585846..d7032d3238b 100644 --- a/data-science-onramp/data-ingestion/setup_test.py +++ b/data-science-onramp/data-ingestion/setup_test.py @@ -69,7 +69,7 @@ "args": [BUCKET_NAME, BQ_DATASET, "--test"], # Temporarily pin jar version due to breaking release # "jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"], - "jar_file_uris":["gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.18.1.jar"], + "jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.18.1.jar"], }, }