Skip to content

Commit 879e56c

Browse files
authored
Unrolled build for rust-lang#139016
Rollup merge of rust-lang#139016 - Kobzol:post-merge-analysis-durations, r=marcoieni Add job duration changes to post-merge analysis report This should help us observe large regressions in job duration changes. I would also like to add quick links to GH jobs/workflow to the post-merge workflow, but for that I first need to store some CI metadata to the bootstrap metrics, to make it easier to lookup the corresponding GH workflows (otherwise we'd need to look them up by commit SHA, which would be much more complicated). The last commit adds this metadata. Once this PR is merged, and the metadata will be available in the metrics stored on S3, I'll send a follow-up PR that uses the metadata to add links to job names in the post-merge workflow report. r? `@marcoieni`
2 parents 217693a + 27cca0a commit 879e56c

File tree

8 files changed

+120
-9
lines changed

8 files changed

+120
-9
lines changed

.github/workflows/ci.yml

+2
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ jobs:
6969
env:
7070
CI_JOB_NAME: ${{ matrix.name }}
7171
CI_JOB_DOC_URL: ${{ matrix.doc_url }}
72+
GITHUB_WORKFLOW_RUN_ID: ${{ github.run_id }}
73+
GITHUB_REPOSITORY: ${{ github.repository }}
7274
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
7375
# commit of PR sha or commit sha. `GITHUB_SHA` is not accurate for PRs.
7476
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}

src/bootstrap/src/utils/metrics.rs

+19-3
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ use std::fs::File;
99
use std::io::BufWriter;
1010
use std::time::{Duration, Instant, SystemTime};
1111

12+
use build_helper::ci::CiEnv;
1213
use build_helper::metrics::{
13-
JsonInvocation, JsonInvocationSystemStats, JsonNode, JsonRoot, JsonStepSystemStats, Test,
14-
TestOutcome, TestSuite, TestSuiteMetadata,
14+
CiMetadata, JsonInvocation, JsonInvocationSystemStats, JsonNode, JsonRoot, JsonStepSystemStats,
15+
Test, TestOutcome, TestSuite, TestSuiteMetadata,
1516
};
1617
use sysinfo::{CpuRefreshKind, RefreshKind, System};
1718

@@ -217,7 +218,12 @@ impl BuildMetrics {
217218
children: steps.into_iter().map(|step| self.prepare_json_step(step)).collect(),
218219
});
219220

220-
let json = JsonRoot { format_version: CURRENT_FORMAT_VERSION, system_stats, invocations };
221+
let json = JsonRoot {
222+
format_version: CURRENT_FORMAT_VERSION,
223+
system_stats,
224+
invocations,
225+
ci_metadata: get_ci_metadata(CiEnv::current()),
226+
};
221227

222228
t!(std::fs::create_dir_all(dest.parent().unwrap()));
223229
let mut file = BufWriter::new(t!(File::create(&dest)));
@@ -245,6 +251,16 @@ impl BuildMetrics {
245251
}
246252
}
247253

254+
fn get_ci_metadata(ci_env: CiEnv) -> Option<CiMetadata> {
255+
if ci_env != CiEnv::GitHubActions {
256+
return None;
257+
}
258+
let workflow_run_id =
259+
std::env::var("GITHUB_WORKFLOW_RUN_ID").ok().and_then(|id| id.parse::<u64>().ok())?;
260+
let repository = std::env::var("GITHUB_REPOSITORY").ok()?;
261+
Some(CiMetadata { workflow_run_id, repository })
262+
}
263+
248264
struct MetricsState {
249265
finished_steps: Vec<StepMetrics>,
250266
running_steps: Vec<StepMetrics>,

src/build_helper/src/metrics.rs

+13
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,19 @@ pub struct JsonRoot {
99
pub format_version: usize,
1010
pub system_stats: JsonInvocationSystemStats,
1111
pub invocations: Vec<JsonInvocation>,
12+
#[serde(default)]
13+
pub ci_metadata: Option<CiMetadata>,
14+
}
15+
16+
/// Represents metadata about bootstrap's execution in CI.
17+
#[derive(Serialize, Deserialize)]
18+
pub struct CiMetadata {
19+
/// GitHub run ID of the workflow where bootstrap was executed.
20+
/// Note that the run ID will be shared amongst all jobs executed in that workflow.
21+
pub workflow_run_id: u64,
22+
/// Full name of a GitHub repository where bootstrap was executed in CI.
23+
/// e.g. `rust-lang-ci/rust`.
24+
pub repository: String,
1225
}
1326

1427
#[derive(Serialize, Deserialize)]

src/ci/citool/src/analysis.rs

+61-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::collections::{BTreeMap, HashMap, HashSet};
22
use std::fmt::Debug;
3+
use std::time::Duration;
34

45
use build_helper::metrics::{
56
BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, escape_step_name,
@@ -184,11 +185,70 @@ fn render_table(suites: BTreeMap<String, TestSuiteRecord>) -> String {
184185
}
185186

186187
/// Outputs a report of test differences between the `parent` and `current` commits.
187-
pub fn output_test_diffs(job_metrics: HashMap<JobName, JobMetrics>) {
188+
pub fn output_test_diffs(job_metrics: &HashMap<JobName, JobMetrics>) {
188189
let aggregated_test_diffs = aggregate_test_diffs(&job_metrics);
189190
report_test_diffs(aggregated_test_diffs);
190191
}
191192

193+
/// Prints the ten largest differences in bootstrap durations.
194+
pub fn output_largest_duration_changes(job_metrics: &HashMap<JobName, JobMetrics>) {
195+
struct Entry<'a> {
196+
job: &'a JobName,
197+
before: Duration,
198+
after: Duration,
199+
change: f64,
200+
}
201+
202+
let mut changes: Vec<Entry> = vec![];
203+
for (job, metrics) in job_metrics {
204+
if let Some(parent) = &metrics.parent {
205+
let duration_before = parent
206+
.invocations
207+
.iter()
208+
.map(|i| BuildStep::from_invocation(i).duration)
209+
.sum::<Duration>();
210+
let duration_after = metrics
211+
.current
212+
.invocations
213+
.iter()
214+
.map(|i| BuildStep::from_invocation(i).duration)
215+
.sum::<Duration>();
216+
let pct_change = duration_after.as_secs_f64() / duration_before.as_secs_f64();
217+
let pct_change = pct_change * 100.0;
218+
// Normalize around 100, to get + for regression and - for improvements
219+
let pct_change = pct_change - 100.0;
220+
changes.push(Entry {
221+
job,
222+
before: duration_before,
223+
after: duration_after,
224+
change: pct_change,
225+
});
226+
}
227+
}
228+
changes.sort_by(|e1, e2| e1.change.partial_cmp(&e2.change).unwrap().reverse());
229+
230+
println!("# Job duration changes");
231+
for (index, entry) in changes.into_iter().take(10).enumerate() {
232+
println!(
233+
"{}. `{}`: {:.1}s -> {:.1}s ({:.1}%)",
234+
index + 1,
235+
entry.job,
236+
entry.before.as_secs_f64(),
237+
entry.after.as_secs_f64(),
238+
entry.change
239+
);
240+
}
241+
242+
println!();
243+
output_details("How to interpret the job duration changes?", || {
244+
println!(
245+
r#"Job durations can vary a lot, based on the actual runner instance
246+
that executed the job, system noise, invalidated caches, etc. The table above is provided
247+
mostly for t-infra members, for simpler debugging of potential CI slow-downs."#
248+
);
249+
});
250+
}
251+
192252
#[derive(Default)]
193253
struct TestSuiteRecord {
194254
passed: u64,

src/ci/citool/src/main.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use clap::Parser;
1515
use jobs::JobDatabase;
1616
use serde_yaml::Value;
1717

18-
use crate::analysis::output_test_diffs;
18+
use crate::analysis::{output_largest_duration_changes, output_test_diffs};
1919
use crate::cpu_usage::load_cpu_usage;
2020
use crate::datadog::upload_datadog_metric;
2121
use crate::jobs::RunType;
@@ -160,7 +160,7 @@ fn postprocess_metrics(
160160
job_name,
161161
JobMetrics { parent: Some(parent_metrics), current: metrics },
162162
)]);
163-
output_test_diffs(job_metrics);
163+
output_test_diffs(&job_metrics);
164164
return Ok(());
165165
}
166166
Err(error) => {
@@ -180,7 +180,8 @@ fn post_merge_report(db: JobDatabase, current: String, parent: String) -> anyhow
180180
let metrics = download_auto_job_metrics(&db, &parent, &current)?;
181181

182182
println!("\nComparing {parent} (parent) -> {current} (this PR)\n");
183-
output_test_diffs(metrics);
183+
output_test_diffs(&metrics);
184+
output_largest_duration_changes(&metrics);
184185

185186
Ok(())
186187
}

src/ci/citool/src/metrics.rs

+19-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use std::collections::HashMap;
2-
use std::path::Path;
2+
use std::path::{Path, PathBuf};
33

44
use anyhow::Context;
55
use build_helper::metrics::{JsonNode, JsonRoot, TestSuite};
@@ -74,6 +74,17 @@ Maybe it was newly added?"#,
7474
}
7575

7676
pub fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
77+
// Best effort cache to speed-up local re-executions of citool
78+
let cache_path = PathBuf::from(".citool-cache").join(sha).join(format!("{job_name}.json"));
79+
if cache_path.is_file() {
80+
if let Ok(metrics) = std::fs::read_to_string(&cache_path)
81+
.map_err(|err| err.into())
82+
.and_then(|data| anyhow::Ok::<JsonRoot>(serde_json::from_str::<JsonRoot>(&data)?))
83+
{
84+
return Ok(metrics);
85+
}
86+
}
87+
7788
let url = get_metrics_url(job_name, sha);
7889
let mut response = ureq::get(&url).call()?;
7990
if !response.status().is_success() {
@@ -87,6 +98,13 @@ pub fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoo
8798
.body_mut()
8899
.read_json()
89100
.with_context(|| anyhow::anyhow!("cannot deserialize metrics from {url}"))?;
101+
102+
if let Ok(_) = std::fs::create_dir_all(cache_path.parent().unwrap()) {
103+
if let Ok(data) = serde_json::to_string(&data) {
104+
let _ = std::fs::write(cache_path, data);
105+
}
106+
}
107+
90108
Ok(data)
91109
}
92110

src/ci/citool/src/utils.rs

-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ where
2323
println!(
2424
r"<details>
2525
<summary>{summary}</summary>
26-
2726
"
2827
);
2928
func();

src/ci/docker/run.sh

+2
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,8 @@ docker \
355355
--env GITHUB_ACTIONS \
356356
--env GITHUB_REF \
357357
--env GITHUB_STEP_SUMMARY="/checkout/obj/${SUMMARY_FILE}" \
358+
--env GITHUB_WORKFLOW_RUN_ID \
359+
--env GITHUB_REPOSITORY \
358360
--env RUST_BACKTRACE \
359361
--env TOOLSTATE_REPO_ACCESS_TOKEN \
360362
--env TOOLSTATE_REPO \

0 commit comments

Comments
 (0)