Skip to content

Commit 3dce8d5

Browse files
authored
workaround for pyspark connector breaking change (#5420)
* workaround for pyspark connector breaking change * add missing license header * fix lint
1 parent b6044b2 commit 3dce8d5

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed

data-science-onramp/data-cleaning/clean_test.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@
6969
"pyspark_job": {
7070
"main_python_file_uri": f"gs://{BUCKET_NAME}/{BUCKET_BLOB}",
7171
"args": [BUCKET_NAME, BQ_TABLE, "--dry-run"],
72-
"jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"],
72+
# Temporarily pin jar version due to breaking release
73+
# "jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"],
74+
"jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.18.1.jar"],
7375
},
7476
}
7577

data-science-onramp/data-ingestion/setup_test.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
# Copyright 2021 Google LLC
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
114
"""Test file for the setup job in the Data Science Onramp sample application Creates a test Dataproc cluster and runs the job with a --test flag.
215
The job uploads a subset of the data to BigQuery.
316
Then, data is pulled from BigQuery and checks are made to see if the data is dirty.
@@ -54,7 +67,9 @@
5467
"pyspark_job": {
5568
"main_python_file_uri": f"gs://{BUCKET_NAME}/{BUCKET_BLOB}",
5669
"args": [BUCKET_NAME, BQ_DATASET, "--test"],
57-
"jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"],
70+
# Temporarily pin jar version due to breaking release
71+
# "jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"],
72+
"jar_file_uris": ["gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.18.1.jar"],
5873
},
5974
}
6075

0 commit comments

Comments
 (0)