Skip to content

Commit e6ddfc9

Browse files
authored
Upgrade dbt generate artifact types (#2598)
* upgrade dbt-bigquery and re-generate artifact types (#2545) * upgrade dbt-bigquery * create gen_artifacts.sh and use it * wip on horrific code * continue subclassing/patching generated models * start working on mypy compatibility * remove empty test block * continue working on mypy passing * make mypy pass * fixes post-upgrade * address comments * upgrade dbt to 1.6 preview, and fix data access model full refresh * just determine partitions dynamically * allow extra fields in generated artifact models * make mypy pass
1 parent 34ff5db commit e6ddfc9

17 files changed

+2019
-454
lines changed

warehouse/Dockerfile

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ RUN poetry export -f requirements.txt --without-hashes --output requirements.txt
2121

2222
COPY ./dbt_project.yml /app/dbt_project.yml
2323
COPY ./packages.yml /app/packages.yml
24+
COPY ./profiles.yml /app/profiles.yml
2425
RUN dbt deps
2526

2627
COPY . /app

warehouse/models/mart/transit_database/_mart_transit_database.yml

-1
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,6 @@ models:
386386
For best results, join with reference to a specific date, and make sure to choose a specific
387387
output grain (organizations, services, customer-facing vs. not); you will likely need to filter
388388
or group to get the desired output.
389-
tests:
390389
columns:
391390
- *key
392391
- name: service_key

warehouse/models/staging/audit/stg_audit__cloudaudit_googleapis_com_data_access.sql

+10-9
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
'data_type': 'date',
88
'granularity': 'day',
99
},
10-
partitions=['current_date()'],
1110
cluster_by='job_type',
1211
)
1312
}}
@@ -21,13 +20,15 @@ WITH latest AS (
2120
FROM cal-itp-data-infra.audit.cloudaudit_googleapis_com_data_access_{{ yesterday.strftime('%Y%m%d') }}
2221
),
2322

24-
everything AS (
25-
{% set start_date = modules.datetime.date(year=2022, month=4, day=11) %}
26-
{% set days = (modules.datetime.date.today() - start_date).days + 1 %}
23+
everything AS ( -- noqa: ST03
24+
-- without this limited lookback, we'd eventually exhaust query resources on full refreshes
25+
-- since we might end up unioning hundreds of tables
26+
-- technically we have data back to 2022-04-11
27+
{% set days = 90 %}
2728

28-
{% for add in range(days) %}
29+
{% for day in range(days) %}
2930

30-
{% set current = start_date + modules.datetime.timedelta(days=add) %}
31+
{% set current = modules.datetime.date.today() - modules.datetime.timedelta(days=day) %}
3132

3233
SELECT *
3334
FROM cal-itp-data-infra.audit.cloudaudit_googleapis_com_data_access_{{ current.strftime('%Y%m%d') }}
@@ -79,9 +80,9 @@ stg_audit__cloudaudit_googleapis_com_data_access AS (
7980
SECOND
8081
) AS duration_in_seconds,
8182
JSON_VALUE_ARRAY(job, '$.jobStats.queryStats.referencedTables') as referenced_tables,
82-
CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS INT64) AS total_billed_bytes,
83-
5.0 * CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS INT64) / POWER(2, 40) AS estimated_cost_usd, -- $5/TB
84-
CAST(JSON_VALUE(job, '$.jobStats.totalSlotMs') AS INT64) / 1000 AS total_slots_seconds,
83+
CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS int64) AS total_billed_bytes,
84+
5.0 * CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS int64) / POWER(2, 40) AS estimated_cost_usd, -- $5/TB
85+
CAST(JSON_VALUE(job, '$.jobStats.totalSlotMs') AS int64) / 1000 AS total_slots_seconds,
8586

8687
JSON_VALUE(metadata, '$.tableDataRead.jobName') as table_data_read_job_name,
8788

warehouse/mypy.ini

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[mypy]
2+
plugins = pydantic.mypy, sqlmypy
3+
disable_error_code = assignment

warehouse/poetry.lock

+53-148
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

warehouse/pyproject.toml

+2-5
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,11 @@ networkx = {version = "<3", extras = ["default"]}
2929
# export CFLAGS="-I $(brew --prefix graphviz)/include"
3030
# export LDFLAGS="-L $(brew --prefix graphviz)/lib"
3131
pygraphviz = "^1.10"
32-
dbt-bigquery = "^1.4.3"
3332
palettable = "^3.3.0"
33+
dbt-bigquery = "1.6.0b1"
3434

3535
[tool.poetry.group.dev.dependencies]
3636
black = "^22.12.0"
37-
mypy = "^0.991"
3837
isort = "^5.11.4"
3938
types-tqdm = "^4.64.7"
4039
types-requests = "^2.28.11"
@@ -46,10 +45,8 @@ datamodel-code-generator = "^0.17.1"
4645
sqlfluff = "^2.0.2"
4746
sqlfluff-templater-dbt = "^2.0.2"
4847
ipdb = "^0.13.13"
48+
mypy = "^1.2.0"
4949

5050
[build-system]
5151
requires = ["poetry-core>=1.0.0"]
5252
build-backend = "poetry.core.masonry.api"
53-
54-
[tool.mypy]
55-
plugins = "sqlmypy"

0 commit comments

Comments
 (0)