Skip to content

Commit 8a5cca9

Browse files
committed
Exclude version_downloads from the database dumps.
The current implementation of the database dumps does not work for the partioned version_downloads table. This change excludes the version_downloads table and all its partitions from the dumps.
1 parent b9d691d commit 8a5cca9

File tree

5 files changed

+93
-127
lines changed

5 files changed

+93
-127
lines changed

Cargo.lock

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ hyper-tls = "0.3"
9090
lazy_static = "1.0"
9191
tokio-core = "0.1"
9292
diesel_migrations = { version = "1.3.0", features = ["postgres"] }
93+
glob = "0.3"
9394

9495
[build-dependencies]
9596
dotenv = "0.15"

src/tasks/dump_db/dump-db.toml

+55-111
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,49 @@
22
# database table, we set which columns are included in the dump, and optionally
33
# how to filter the rows.
44
#
5-
# <table_name>.columns - a TOML dictionary determining what columns to include.
6-
# possible values are "private" (not included) and "public" (included).
5+
# tables.<table_name>.columns - a TOML dictionary determining what columns to
6+
# include. possible values are "private" (not included) and "public"
7+
# (included).
78
#
8-
# <table_name>.filter - a string that is a valid SQL expression, which is used
9-
# in a WHERE clause to filter the rows of the table.
9+
# tables.<table_name>.filter - a string that is a valid SQL expression, which
10+
# is used in a WHERE clause to filter the rows of the table.
1011
#
11-
# <table_name>.dependencies - an array of table names, used to determine the
12-
# order of the tables in the generated import script. All tables referred
13-
# to by public columns in the current table should be listed, to make sure
14-
# they are imported before this table.
12+
# tables.<table_name>.dependencies - an array of table names, used to determine
13+
# the order of the tables in the generated import script. All tables
14+
# referred to by public columns in the current table should be listed, to
15+
# make sure they are imported before this table.
1516
#
16-
# <table_name>.columns_defaults - a TOML dictionary mapping column names to a
17-
# raw SQL expression that is used as the default value for the column on
18-
# import. This is useful for private columns that are not nullable and do
19-
# not have a default.
20-
21-
[api_tokens.columns]
22-
id = "private"
23-
user_id = "private"
24-
token = "private"
25-
name = "private"
26-
created_at = "private"
27-
last_used_at = "private"
28-
revoked = "private"
29-
30-
[background_jobs.columns]
31-
id = "private"
32-
job_type = "private"
33-
data = "private"
34-
retries = "private"
35-
last_retry = "private"
36-
created_at = "private"
37-
38-
[badges]
17+
# tables.<table_name>.columns_defaults - a TOML dictionary mapping column names
18+
# to a raw SQL expression that is used as the default value for the column
19+
# on import. This is useful for private columns that are not nullable and
20+
# do not have a default.
21+
#
22+
# private_table - an array of tables to consider as completely private. This is
23+
# a shortcut for marking all columns of a table as private.
24+
25+
private_tables = [
26+
"__diesel_schema_migrations",
27+
"api_tokens",
28+
"background_jobs",
29+
"crate_owner_invitations",
30+
"emails",
31+
"follows",
32+
"publish_limit_buckets",
33+
"publish_rate_overrides",
34+
"readme_renderings",
35+
"version_owner_actions",
36+
"versions_published_by",
37+
"version_downloads*",
38+
]
39+
40+
[tables.badges]
3941
dependencies = ["crates"]
40-
[badges.columns]
42+
[tables.badges.columns]
4143
crate_id = "public"
4244
badge_type = "public"
4345
attributes = "public"
4446

45-
[categories.columns]
47+
[tables.categories.columns]
4648
id = "public"
4749
category = "public"
4850
slug = "public"
@@ -51,18 +53,10 @@ crates_cnt = "public"
5153
created_at = "public"
5254
path = "public"
5355

54-
[crate_owner_invitations.columns]
55-
invited_user_id = "private"
56-
invited_by_user_id = "private"
57-
crate_id = "private"
58-
created_at = "private"
59-
token = "private"
60-
token_generated_at = "private"
61-
62-
[crate_owners]
56+
[tables.crate_owners]
6357
dependencies = ["crates", "users"]
6458
filter = "NOT deleted"
65-
[crate_owners.columns]
59+
[tables.crate_owners.columns]
6660
crate_id = "public"
6761
owner_id = "public"
6862
created_at = "public"
@@ -72,7 +66,7 @@ updated_at = "private"
7266
owner_kind = "public"
7367
email_notifications = "private"
7468

75-
[crates.columns]
69+
[tables.crates.columns]
7670
id = "public"
7771
name = "public"
7872
updated_at = "public"
@@ -86,21 +80,21 @@ textsearchable_index_col = "public"
8680
repository = "public"
8781
max_upload_size = "public"
8882

89-
[crates_categories]
83+
[tables.crates_categories]
9084
dependencies = ["categories", "crates"]
91-
[crates_categories.columns]
85+
[tables.crates_categories.columns]
9286
crate_id = "public"
9387
category_id = "public"
9488

95-
[crates_keywords]
89+
[tables.crates_keywords]
9690
dependencies = ["crates", "keywords"]
97-
[crates_keywords.columns]
91+
[tables.crates_keywords.columns]
9892
crate_id = "public"
9993
keyword_id = "public"
10094

101-
[dependencies]
95+
[tables.dependencies]
10296
dependencies = ["crates", "versions"]
103-
[dependencies.columns]
97+
[tables.dependencies.columns]
10498
id = "public"
10599
version_id = "public"
106100
crate_id = "public"
@@ -111,99 +105,53 @@ features = "public"
111105
target = "public"
112106
kind = "public"
113107

114-
[__diesel_schema_migrations.columns]
115-
version = "private"
116-
run_on = "private"
117-
118-
[emails.columns]
119-
id = "private"
120-
user_id = "private"
121-
email = "private"
122-
verified = "private"
123-
token = "private"
124-
token_generated_at = "private"
125-
126-
[follows.columns]
127-
user_id = "private"
128-
crate_id = "private"
129-
130-
[keywords.columns]
108+
[tables.keywords.columns]
131109
id = "public"
132110
keyword = "public"
133111
crates_cnt = "public"
134112
created_at = "public"
135113

136-
[metadata.columns]
114+
[tables.metadata.columns]
137115
total_downloads = "public"
138116

139-
[publish_limit_buckets.columns]
140-
user_id = "private"
141-
tokens = "private"
142-
last_refill = "private"
143-
144-
[publish_rate_overrides.columns]
145-
user_id = "private"
146-
burst = "private"
147-
148-
[readme_renderings.columns]
149-
version_id = "private"
150-
rendered_at = "private"
151-
152-
[reserved_crate_names.columns]
117+
[tables.reserved_crate_names.columns]
153118
name = "public"
154119

155-
[teams.columns]
120+
[tables.teams.columns]
156121
id = "public"
157122
login = "public"
158123
github_id = "public"
159124
name = "public"
160125
avatar = "public"
161126

162-
[users]
127+
[tables.users]
163128
filter = """
164129
id in (
165130
SELECT owner_id AS user_id FROM crate_owners WHERE NOT deleted AND owner_kind = 0
166131
UNION
167132
SELECT published_by as user_id FROM versions
168133
)"""
169-
[users.columns]
134+
[tables.users.columns]
170135
id = "public"
171136
gh_access_token = "private"
172137
gh_login = "public"
173138
name = "public"
174139
gh_avatar = "public"
175140
gh_id = "public"
176-
[users.column_defaults]
141+
[tables.users.column_defaults]
177142
gh_access_token = "''"
178143

179-
[version_authors]
144+
[tables.version_authors]
180145
dependencies = ["versions"]
181-
[version_authors.columns]
146+
[tables.version_authors.columns]
182147
id = "public"
183148
version_id = "public"
184149
user_id = "private"
185150
name = "public"
186151

187-
[version_downloads]
188-
dependencies = ["versions"]
189-
[version_downloads.columns]
190-
version_id = "public"
191-
downloads = "public"
192-
counted = "private"
193-
date = "public"
194-
processed = "private"
195-
196-
[version_owner_actions.columns]
197-
id = "private"
198-
version_id = "private"
199-
user_id = "private"
200-
api_token_id = "private"
201-
action = "private"
202-
time = "private"
203-
204-
[versions]
152+
[tables.versions]
205153
dependencies = ["crates", "users"]
206-
[versions.columns]
154+
[tables.versions.columns]
207155
id = "public"
208156
crate_id = "public"
209157
num = "public"
@@ -215,7 +163,3 @@ yanked = "public"
215163
license = "public"
216164
crate_size = "public"
217165
published_by = "public"
218-
219-
[versions_published_by.columns]
220-
version_id = "private"
221-
email = "private"

src/tasks/dump_db/gen_scripts.rs

+30-15
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,15 @@ impl TableConfig {
7575
}
7676
}
7777

78-
/// Maps table names to the respective configurations. Used to load `dump_db.toml`.
78+
/// Representation of the configuration file dump-db.toml.
79+
///
80+
/// tables – maps table names to the respective configurations.
81+
/// private_tables – names of tables to treat as completely private.
7982
#[derive(Clone, Debug, Default, Deserialize)]
80-
#[serde(transparent)]
81-
struct VisibilityConfig(BTreeMap<String, TableConfig>);
83+
struct VisibilityConfig {
84+
tables: BTreeMap<String, TableConfig>,
85+
private_tables: Vec<String>,
86+
}
8287

8388
/// Subset of the configuration data to be passed on to the Handlbars template.
8489
#[derive(Debug, Serialize)]
@@ -94,7 +99,7 @@ impl VisibilityConfig {
9499
let mut result = Vec::new();
95100
let mut num_deps = BTreeMap::new();
96101
let mut rev_deps: BTreeMap<_, Vec<_>> = BTreeMap::new();
97-
for (table, config) in self.0.iter() {
102+
for (table, config) in self.tables.iter() {
98103
num_deps.insert(table.as_str(), config.dependencies.len());
99104
for dep in &config.dependencies {
100105
rev_deps
@@ -118,7 +123,7 @@ impl VisibilityConfig {
118123
}
119124
}
120125
assert_eq!(
121-
self.0.len(),
126+
self.tables.len(),
122127
result.len(),
123128
"circular dependencies in database dump configuration detected",
124129
);
@@ -129,7 +134,7 @@ impl VisibilityConfig {
129134
let tables = self
130135
.topological_sort()
131136
.into_iter()
132-
.filter_map(|table| self.0[table].handlebars_context(table))
137+
.filter_map(|table| self.tables[table].handlebars_context(table))
133138
.collect();
134139
HandlebarsContext { tables }
135140
}
@@ -161,18 +166,28 @@ mod tests {
161166
use crate::test_util::pg_connection;
162167
use diesel::prelude::*;
163168
use std::collections::HashSet;
164-
use std::iter::FromIterator;
165169

166170
/// Test whether the visibility configuration matches the schema of the
167171
/// test database.
168172
#[test]
169-
#[should_panic]
170173
fn check_visibility_config() {
171174
let conn = pg_connection();
172-
let db_columns = HashSet::<Column>::from_iter(get_db_columns(&conn));
173-
let vis_columns = toml::from_str::<VisibilityConfig>(include_str!("dump-db.toml"))
174-
.unwrap()
175-
.0
175+
let config: VisibilityConfig = toml::from_str(include_str!("dump-db.toml")).unwrap();
176+
let private_patterns: Vec<_> = config
177+
.private_tables
178+
.iter()
179+
.map(|s| glob::Pattern::new(s).unwrap())
180+
.collect();
181+
let db_columns: HashSet<Column> = get_db_columns(&conn)
182+
.into_iter()
183+
.filter(|column| {
184+
!private_patterns
185+
.iter()
186+
.any(|pattern| pattern.matches(&column.table_name))
187+
})
188+
.collect();
189+
let vis_columns = config
190+
.tables
176191
.iter()
177192
.flat_map(|(table, config)| {
178193
config.columns.iter().map(move |(column, _)| Column {
@@ -246,19 +261,19 @@ mod tests {
246261
#[test]
247262
fn test_topological_sort() {
248263
let mut config = VisibilityConfig::default();
249-
let tables = &mut config.0;
264+
let tables = &mut config.tables;
250265
tables.insert("a".to_owned(), table_config_with_deps(&["b", "c"]));
251266
tables.insert("b".to_owned(), table_config_with_deps(&["c", "d"]));
252267
tables.insert("c".to_owned(), table_config_with_deps(&["d"]));
253-
config.0.insert("d".to_owned(), table_config_with_deps(&[]));
268+
tables.insert("d".to_owned(), table_config_with_deps(&[]));
254269
assert_eq!(config.topological_sort(), ["d", "c", "b", "a"]);
255270
}
256271

257272
#[test]
258273
#[should_panic]
259274
fn topological_sort_panics_for_cyclic_dependency() {
260275
let mut config = VisibilityConfig::default();
261-
let tables = &mut config.0;
276+
let tables = &mut config.tables;
262277
tables.insert("a".to_owned(), table_config_with_deps(&["b"]));
263278
tables.insert("b".to_owned(), table_config_with_deps(&["a"]));
264279
config.topological_sort();

0 commit comments

Comments
 (0)