Skip to content

Commit 4ffe6eb

Browse files
committed
feat: Add support for statistics and additional performance information.
1 parent a158d22 commit 4ffe6eb

File tree

8 files changed

+106
-38
lines changed

8 files changed

+106
-38
lines changed

Diff for: Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: gitoxide-core/src/repository/blame.rs

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,29 @@
11
use gix::bstr::BStr;
22
use std::ffi::OsStr;
33

4-
pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Write) -> anyhow::Result<()> {
4+
pub fn blame_file(
5+
mut repo: gix::Repository,
6+
file: &OsStr,
7+
out: impl std::io::Write,
8+
err: Option<&mut dyn std::io::Write>,
9+
) -> anyhow::Result<()> {
510
repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?));
611

712
let suspect = repo.head()?.peel_to_commit_in_place()?;
813
let traverse =
914
gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::<Vec<gix::ObjectId>>)
15+
.with_commit_graph(repo.commit_graph_if_enabled()?)
1016
.build()?;
1117
let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?;
1218
let file_path: &BStr = gix::path::os_str_into_bstr(file)?;
1319

1420
let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file_path)?;
21+
let statistics = outcome.statistics;
1522
write_blame_entries(out, outcome)?;
1623

24+
if let Some(err) = err {
25+
writeln!(err, "{statistics:#?}")?;
26+
}
1727
Ok(())
1828
}
1929

Diff for: gix-blame/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ rust-version = "1.65"
1414
doctest = false
1515

1616
[dependencies]
17+
gix-trace = { version = "^0.1.11", path = "../gix-trace" }
1718
gix-diff = { version = "^0.49.0", path = "../gix-diff", default-features = false, features = ["blob"] }
1819
gix-object = { version = "^0.46.0", path = "../gix-object" }
1920
gix-hash = { version = "^0.15.0", path = "../gix-hash" }

Diff for: gix-blame/src/file/function.rs

+65-34
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::{process_changes, Change, Offset, UnblamedHunk};
2-
use crate::{BlameEntry, Outcome};
2+
use crate::{BlameEntry, Outcome, Statistics};
33
use gix_diff::blob::intern::TokenSource;
44
use gix_hash::ObjectId;
55
use gix_object::{bstr::BStr, FindExt};
@@ -62,9 +62,12 @@ pub fn file<E>(
6262
let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else {
6363
todo!("return actual error");
6464
};
65+
let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect);
6566

66-
let (mut buf, mut buf2) = (Vec::new(), Vec::new());
67-
let original_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2).unwrap();
67+
let mut stats = Statistics::default();
68+
let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
69+
let original_file_entry =
70+
find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats).unwrap();
6871
let original_file_blob = odb.find_blob(&original_file_entry.oid, &mut buf).unwrap().data.to_vec();
6972
let num_lines_in_original = {
7073
let mut interner = gix_diff::blob::intern::Interner::new(original_file_blob.len() / 100);
@@ -81,9 +84,11 @@ pub fn file<E>(
8184
)];
8285

8386
let mut out = Vec::new();
87+
let mut diff_state = gix_diff::tree::State::default();
8488
'outer: for item in traverse {
8589
let item = item?;
8690
let suspect = item.id;
91+
stats.commits_traversed += 1;
8792

8893
let mut parent_ids = item.parent_ids;
8994
if parent_ids.is_empty() {
@@ -102,13 +107,15 @@ pub fn file<E>(
102107
break;
103108
}
104109

105-
let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2) else {
110+
let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats) else {
106111
continue;
107112
};
108113

109114
if parent_ids.len() == 1 {
110115
let parent_id = parent_ids.pop().expect("just validated there is exactly one");
111-
if let Some(parent_entry) = find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2) {
116+
if let Some(parent_entry) =
117+
find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2, &mut stats)
118+
{
112119
if entry.oid == parent_entry.oid {
113120
// The blobs storing the blamed file in `entry` and `parent_entry` are identical
114121
// which is why we can pass blame to the parent without further checks.
@@ -119,7 +126,17 @@ pub fn file<E>(
119126
}
120127
}
121128

122-
let Some(modification) = tree_diff_at_file_path(&odb, file_path, item.id, parent_id) else {
129+
let Some(modification) = tree_diff_at_file_path(
130+
&odb,
131+
file_path,
132+
item.id,
133+
parent_id,
134+
&mut stats,
135+
&mut diff_state,
136+
&mut buf,
137+
&mut buf2,
138+
&mut buf3,
139+
) else {
123140
// None of the changes affected the file we’re currently blaming. Pass blame to parent.
124141
for unblamed_hunk in &mut hunks_to_blame {
125142
unblamed_hunk.pass_blame(suspect, parent_id);
@@ -142,8 +159,7 @@ pub fn file<E>(
142159
}
143160
gix_diff::tree::recorder::Change::Deletion { .. } => todo!(),
144161
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
145-
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path);
146-
162+
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats);
147163
hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect);
148164
for unblamed_hunk in &mut hunks_to_blame {
149165
unblamed_hunk.pass_blame(suspect, parent_id);
@@ -152,7 +168,8 @@ pub fn file<E>(
152168
}
153169
} else {
154170
for parent_id in &parent_ids {
155-
if let Some(parent_entry) = find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2)
171+
if let Some(parent_entry) =
172+
find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)
156173
{
157174
if entry.oid == parent_entry.oid {
158175
// The blobs storing the blamed file in `entry` and `parent_entry` are
@@ -167,7 +184,17 @@ pub fn file<E>(
167184
}
168185

169186
for parent_id in parent_ids {
170-
let changes_for_file_path = tree_diff_at_file_path(&odb, file_path, item.id, parent_id);
187+
let changes_for_file_path = tree_diff_at_file_path(
188+
&odb,
189+
file_path,
190+
item.id,
191+
parent_id,
192+
&mut stats,
193+
&mut diff_state,
194+
&mut buf,
195+
&mut buf2,
196+
&mut buf3,
197+
);
171198
let Some(modification) = changes_for_file_path else {
172199
// None of the changes affected the file we’re currently blaming. Pass blame
173200
// to parent.
@@ -188,8 +215,7 @@ pub fn file<E>(
188215
}
189216
gix_diff::tree::recorder::Change::Deletion { .. } => todo!(),
190217
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
191-
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path);
192-
218+
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats);
193219
hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect);
194220
for unblamed_hunk in &mut hunks_to_blame {
195221
unblamed_hunk.pass_blame(suspect, parent_id);
@@ -215,6 +241,7 @@ pub fn file<E>(
215241
Ok(Outcome {
216242
entries: coalesce_blame_entries(out),
217243
blob: original_file_blob,
244+
statistics: stats,
218245
})
219246
}
220247

@@ -262,42 +289,37 @@ fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
262289
})
263290
}
264291

292+
#[allow(clippy::too_many_arguments)]
265293
fn tree_diff_at_file_path(
266294
odb: impl gix_object::Find + gix_object::FindHeader,
267295
file_path: &BStr,
268296
id: ObjectId,
269297
parent_id: ObjectId,
298+
stats: &mut Statistics,
299+
state: &mut gix_diff::tree::State,
300+
commit_buf: &mut Vec<u8>,
301+
lhs_tree_buf: &mut Vec<u8>,
302+
rhs_tree_buf: &mut Vec<u8>,
270303
) -> Option<gix_diff::tree::recorder::Change> {
271-
let mut buffer = Vec::new();
304+
let parent_tree = odb.find_commit(&parent_id, commit_buf).unwrap().tree();
305+
stats.commits_to_tree += 1;
272306

273-
let parent = odb.find_commit(&parent_id, &mut buffer).unwrap();
274-
275-
let mut buffer = Vec::new();
276307
let parent_tree_iter = odb
277-
.find(&parent.tree(), &mut buffer)
308+
.find(&parent_tree, lhs_tree_buf)
278309
.unwrap()
279310
.try_into_tree_iter()
280311
.unwrap();
312+
stats.trees_decoded += 1;
281313

282-
let mut buffer = Vec::new();
283-
let commit = odb.find_commit(&id, &mut buffer).unwrap();
314+
let tree_id = odb.find_commit(&id, commit_buf).unwrap().tree();
315+
stats.commits_to_tree += 1;
284316

285-
let mut buffer = Vec::new();
286-
let tree_iter = odb
287-
.find(&commit.tree(), &mut buffer)
288-
.unwrap()
289-
.try_into_tree_iter()
290-
.unwrap();
317+
let tree_iter = odb.find(&tree_id, rhs_tree_buf).unwrap().try_into_tree_iter().unwrap();
318+
stats.trees_decoded += 1;
291319

292320
let mut recorder = gix_diff::tree::Recorder::default();
293-
gix_diff::tree(
294-
parent_tree_iter,
295-
tree_iter,
296-
gix_diff::tree::State::default(),
297-
&odb,
298-
&mut recorder,
299-
)
300-
.unwrap();
321+
gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder).unwrap();
322+
stats.trees_diffed += 1;
301323

302324
recorder.records.into_iter().find(|change| match change {
303325
gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path,
@@ -312,6 +334,7 @@ fn blob_changes(
312334
oid: ObjectId,
313335
previous_oid: ObjectId,
314336
file_path: &BStr,
337+
stats: &mut Statistics,
315338
) -> Vec<Change> {
316339
/// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Blamed File*.
317340
struct ChangeRecorder {
@@ -391,6 +414,7 @@ fn blob_changes(
391414
let number_of_lines_in_destination = input.after.len();
392415
let change_recorder = ChangeRecorder::new(number_of_lines_in_destination.try_into().unwrap());
393416

417+
stats.blobs_diffed += 1;
394418
gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder)
395419
}
396420

@@ -400,12 +424,19 @@ fn find_path_entry_in_commit(
400424
file_path: &BStr,
401425
buf: &mut Vec<u8>,
402426
buf2: &mut Vec<u8>,
427+
stats: &mut Statistics,
403428
) -> Option<gix_object::tree::Entry> {
404429
let commit_id = odb.find_commit(commit, buf).unwrap().tree();
405430
let tree_iter = odb.find_tree_iter(&commit_id, buf).unwrap();
431+
stats.commits_to_tree += 1;
432+
stats.trees_decoded += 1;
406433

407434
tree_iter
408-
.lookup_entry(odb, buf2, file_path.split(|b| *b == b'/'))
435+
.lookup_entry(
436+
odb,
437+
buf2,
438+
file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1),
439+
)
409440
.unwrap()
410441
}
411442

Diff for: gix-blame/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#![forbid(unsafe_code)]
1616

1717
mod types;
18-
pub use types::{BlameEntry, Outcome};
18+
pub use types::{BlameEntry, Outcome, Statistics};
1919

2020
mod file;
2121
pub use file::function::file;

Diff for: gix-blame/src/types.rs

+19
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,31 @@ use gix_hash::ObjectId;
88
use gix_object::bstr::BString;
99

1010
/// The outcome of [`file()`](crate::file()).
11+
#[derive(Debug, Clone)]
1112
pub struct Outcome {
1213
/// One entry in sequential order, to associate a hunk in the original file with the commit (and its lines)
1314
/// that introduced it.
1415
pub entries: Vec<BlameEntry>,
1516
/// A buffer with the file content of the *Original File*, ready for tokenization.
1617
pub blob: Vec<u8>,
18+
/// Additional information about the amount of work performed to produce the blame.
19+
pub statistics: Statistics,
20+
}
21+
22+
/// Additional information about the performed operations.
23+
#[derive(Debug, Default, Copy, Clone)]
24+
pub struct Statistics {
25+
/// The amount of commits it traversed until the blame was complete.
26+
pub commits_traversed: usize,
27+
/// The amount of commits whose trees were extracted.
28+
pub commits_to_tree: usize,
29+
/// The amount of trees that were decoded to find the entry of the file to blame.
30+
pub trees_decoded: usize,
31+
/// The amount of fully-fledged tree-diffs to see if the filepath was added, deleted or modified.
32+
pub trees_diffed: usize,
33+
/// The amount of blobs there were compared to each other to learn what changed between commits.
34+
/// Note that in order to diff a blob, one needs to load both versions from the database.
35+
pub blobs_diffed: usize,
1736
}
1837

1938
impl Outcome {

Diff for: src/plumbing/main.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -1533,14 +1533,16 @@ pub fn main() -> Result<()> {
15331533
},
15341534
),
15351535
},
1536-
Subcommands::Blame { file } => prepare_and_run(
1536+
Subcommands::Blame { statistics, file } => prepare_and_run(
15371537
"blame",
15381538
trace,
15391539
verbose,
15401540
progress,
15411541
progress_keep_open,
15421542
None,
1543-
move |_progress, out, _err| core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out),
1543+
move |_progress, out, err| {
1544+
core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out, statistics.then_some(err))
1545+
},
15441546
),
15451547
Subcommands::Completions { shell, out_dir } => {
15461548
let mut app = Args::command();

Diff for: src/plumbing/options/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,10 @@ pub enum Subcommands {
153153
Free(free::Subcommands),
154154
/// Blame lines in a file
155155
Blame {
156+
/// Print additional statistics to help understanding performance.
157+
#[clap(long, short = 's')]
158+
statistics: bool,
159+
/// The file to create the blame information for.
156160
file: std::ffi::OsString,
157161
},
158162
/// Generate shell completions to stdout or a directory.

0 commit comments

Comments
 (0)