Skip to content

Commit 6ed9976

Browse files
authoredDec 25, 2024··
Merge pull request #1453 from cruessler/gix-blame
Explore gix APIs, experiment with gix-blame API
2 parents 7659a65 + e951e7d commit 6ed9976

File tree

19 files changed

+3178
-28
lines changed

19 files changed

+3178
-28
lines changed
 

Diff for: ‎Cargo.lock

+13
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: ‎README.md

+1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ is usable to some extent.
139139
* [gix-shallow](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-shallow)
140140
* `gitoxide-core`
141141
* **very early** _(possibly without any documentation and many rough edges)_
142+
* [gix-blame](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-blame)
142143
* **idea** _(just a name placeholder)_
143144
* [gix-note](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-note)
144145
* [gix-fetchhead](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-fetchhead)

Diff for: ‎crate-status.md

+20-1
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ The top-level crate that acts as hub to all functionality provided by the `gix-*
293293
* [x] safe with cycles and recursive configurations
294294
* [x] multi-line with comments and quotes
295295
* **promisor**
296-
* It's vague, but these seems to be like index files allowing to fetch objects from a server on demand.
296+
* It's vague, but these seem to be like index files allowing to fetch objects from a server on demand.
297297
* [x] API documentation
298298
* [ ] Some examples
299299

@@ -361,6 +361,25 @@ Check out the [performance discussion][gix-diff-performance] as well.
361361
* [x] API documentation
362362
* [ ] Examples
363363

364+
### gix-blame
365+
366+
* [x] commit-annotations for a single file
367+
- [ ] progress
368+
- [ ] interruptibility
369+
- [ ] streaming
370+
- [ ] support for worktree changes (creates virtual commit on top of `HEAD`)
371+
- [ ] shallow-history support
372+
- [ ] rename tracking (track different paths through history)
373+
- [ ] commits to ignore
374+
- [ ] pass all blame-cornercases (from Git)
375+
* **Performance-Improvements**
376+
* Without the following the performance isn't competitive with Git.
377+
1. Implement custom graph walk which won't run down parents that don't have the path in question.
378+
2. Implement access of trees from commit-graph and fill that information into the traversal info by default.
379+
3. commit-graph with bloom filter, used to quickly check if a commit has a path.
380+
* [x] API documentation
381+
* [ ] Examples
382+
364383
### gix-traverse
365384

366385
Check out the [performance discussion][gix-traverse-performance] as well.

Diff for: ‎gitoxide-core/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ serde = ["gix/serde", "dep:serde_json", "dep:serde", "bytesize/serde"]
4949

5050
[dependencies]
5151
# deselect everything else (like "performance") as this should be controllable by the parent application.
52-
gix = { version = "^0.69.1", path = "../gix", default-features = false, features = ["merge", "blob-diff", "revision", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt", "status", "dirwalk"] }
52+
gix = { version = "^0.69.1", path = "../gix", default-features = false, features = ["merge", "blob-diff", "blame", "revision", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt", "status", "dirwalk"] }
5353
gix-pack-for-configuration-only = { package = "gix-pack", version = "^0.56.0", path = "../gix-pack", default-features = false, features = ["pack-cache-lru-dynamic", "pack-cache-lru-static", "generate", "streaming-input"] }
5454
gix-transport-configuration-only = { package = "gix-transport", version = "^0.44.0", path = "../gix-transport", default-features = false }
5555
gix-archive-for-configuration-only = { package = "gix-archive", version = "^0.18.0", path = "../gix-archive", optional = true, features = ["tar", "tar_gz"] }

Diff for: ‎gitoxide-core/src/repository/blame.rs

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
use gix::bstr::ByteSlice;
2+
use gix::config::tree;
3+
use std::ffi::OsStr;
4+
5+
pub fn blame_file(
6+
mut repo: gix::Repository,
7+
file: &OsStr,
8+
out: impl std::io::Write,
9+
err: Option<&mut dyn std::io::Write>,
10+
) -> anyhow::Result<()> {
11+
{
12+
let mut config = repo.config_snapshot_mut();
13+
if config.string(&tree::Core::DELTA_BASE_CACHE_LIMIT).is_none() {
14+
config.set_value(&tree::Core::DELTA_BASE_CACHE_LIMIT, "100m")?;
15+
}
16+
}
17+
let index = repo.index_or_empty()?;
18+
repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&index));
19+
20+
let file = gix::path::os_str_into_bstr(file)?;
21+
let specs = repo.pathspec(
22+
false,
23+
[file],
24+
true,
25+
&index,
26+
gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping.adjust_for_bare(repo.is_bare()),
27+
)?;
28+
// TODO: there should be a way to normalize paths without going through patterns, at least in this case maybe?
29+
// `Search` actually sorts patterns by excluding or not, all that can lead to strange results.
30+
let file = specs
31+
.search()
32+
.patterns()
33+
.map(|p| p.path().to_owned())
34+
.next()
35+
.expect("exactly one pattern");
36+
37+
let suspect = repo.head()?.peel_to_commit_in_place()?;
38+
let traverse =
39+
gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::<Vec<gix::ObjectId>>)
40+
.with_commit_graph(repo.commit_graph_if_enabled()?)
41+
.build()?;
42+
let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?;
43+
let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file.as_bstr())?;
44+
let statistics = outcome.statistics;
45+
write_blame_entries(out, outcome)?;
46+
47+
if let Some(err) = err {
48+
writeln!(err, "{statistics:#?}")?;
49+
}
50+
Ok(())
51+
}
52+
53+
fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcome) -> Result<(), std::io::Error> {
54+
for (entry, lines_in_hunk) in outcome.entries_with_lines() {
55+
for ((actual_lno, source_lno), line) in entry
56+
.range_in_blamed_file()
57+
.zip(entry.range_in_source_file())
58+
.zip(lines_in_hunk)
59+
{
60+
write!(
61+
out,
62+
"{short_id} {line_no} {src_line_no} {line}",
63+
line_no = actual_lno + 1,
64+
src_line_no = source_lno + 1,
65+
short_id = entry.commit_id.to_hex_with_len(8),
66+
)?;
67+
}
68+
}
69+
70+
Ok(())
71+
}

Diff for: ‎gitoxide-core/src/repository/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ pub enum PathsOrPatterns {
2121
pub mod archive;
2222
pub mod cat;
2323
pub use cat::function::cat;
24+
pub mod blame;
2425
pub mod commit;
2526
pub mod config;
2627
mod credential;

Diff for: ‎gix-blame/Cargo.toml

+14-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ name = "gix-blame"
55
version = "0.0.0"
66
repository = "https://github.com/GitoxideLabs/gitoxide"
77
license = "MIT OR Apache-2.0"
8-
description = "A crate of the gitoxide project dedicated implementing a 'blame' algorithm"
8+
description = "A crate of the gitoxide project dedicated to implementing a 'blame' algorithm"
99
authors = ["Christoph Rüßler <christoph.ruessler@mailbox.org>", "Sebastian Thiel <sebastian.thiel@icloud.com>"]
1010
edition = "2021"
1111
rust-version = "1.65"
@@ -14,6 +14,19 @@ rust-version = "1.65"
1414
doctest = false
1515

1616
[dependencies]
17+
gix-trace = { version = "^0.1.11", path = "../gix-trace" }
18+
gix-diff = { version = "^0.49.0", path = "../gix-diff", default-features = false, features = ["blob"] }
19+
gix-object = { version = "^0.46.0", path = "../gix-object" }
20+
gix-hash = { version = "^0.15.0", path = "../gix-hash" }
21+
gix-worktree = { version = "^0.38.0", path = "../gix-worktree", default-features = false, features = ["attributes"] }
22+
gix-traverse = { version = "^0.43.0", path = "../gix-traverse" }
23+
24+
thiserror = "2.0.0"
1725

1826
[dev-dependencies]
27+
gix-ref = { version = "^0.49.0", path = "../gix-ref" }
28+
gix-filter = { version = "^0.16.0", path = "../gix-filter" }
29+
gix-fs = { version = "^0.12.0", path = "../gix-fs" }
30+
gix-index = { version = "^0.37.0", path = "../gix-index" }
31+
gix-odb = { version = "^0.66.0", path = "../gix-odb" }
1932
gix-testtools = { path = "../tests/tools" }

Diff for: ‎gix-blame/src/error.rs

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
use gix_object::bstr::BString;
2+
3+
/// The error returned by [file()](crate::file()).
4+
#[derive(Debug, thiserror::Error)]
5+
#[allow(missing_docs)]
6+
pub enum Error {
7+
#[error("No commit was given")]
8+
EmptyTraversal,
9+
#[error(transparent)]
10+
BlobDiffSetResource(#[from] gix_diff::blob::platform::set_resource::Error),
11+
#[error(transparent)]
12+
BlobDiffPrepare(#[from] gix_diff::blob::platform::prepare_diff::Error),
13+
#[error("The file to blame at '{file_path}' wasn't found in the first commit at {commit_id}")]
14+
FileMissing {
15+
/// The file-path to the object to blame.
16+
file_path: BString,
17+
/// The commit whose tree didn't contain `file_path`.
18+
commit_id: gix_hash::ObjectId,
19+
},
20+
#[error("Couldn't find commit or tree in the object database")]
21+
FindObject(#[from] gix_object::find::Error),
22+
#[error("Could not find existing blob or commit")]
23+
FindExistingObject(#[from] gix_object::find::existing_object::Error),
24+
#[error("Could not find existing iterator over a tree")]
25+
FindExistingIter(#[from] gix_object::find::existing_iter::Error),
26+
#[error("Failed to obtain the next commit in the commit-graph traversal")]
27+
Traverse(#[source] Box<dyn std::error::Error + Send + Sync>),
28+
#[error(transparent)]
29+
DiffTree(#[from] gix_diff::tree::Error),
30+
}

Diff for: ‎gix-blame/src/file/function.rs

+451
Large diffs are not rendered by default.

Diff for: ‎gix-blame/src/file/mod.rs

+490
Large diffs are not rendered by default.

Diff for: ‎gix-blame/src/file/tests.rs

+1,366
Large diffs are not rendered by default.

Diff for: ‎gix-blame/src/lib.rs

+20-7
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,23 @@
11
//! A crate to implement an algorithm to annotate lines in tracked files with the commits that changed them.
2-
#![deny(rust_2018_idioms)]
2+
//!
3+
//! ### Terminology
4+
//!
5+
//! * **Source File**
6+
//! - The file as it exists in `HEAD`.
7+
//! - the initial state with all lines that we need to associate with a *Source File*.
8+
//! * **Blamed File**
9+
//! - A file at a version (i.e. commit) that introduces hunks into the final 'image'.
10+
//! * **Suspects**
11+
//! - The versions of the files that can contain hunks that we could use in the final 'image'
12+
//! - multiple at the same time as the commit-graph may split up.
13+
//! - turns into *Source File* once we have found an association into the *Blamed File*.
14+
#![deny(rust_2018_idioms, missing_docs)]
315
#![forbid(unsafe_code)]
416

5-
#[cfg(test)]
6-
mod tests {
7-
#[test]
8-
#[ignore]
9-
fn it_works() {}
10-
}
17+
mod error;
18+
pub use error::Error;
19+
mod types;
20+
pub use types::{BlameEntry, Outcome, Statistics};
21+
22+
mod file;
23+
pub use file::function::file;

Diff for: ‎gix-blame/src/types.rs

+207
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
use crate::file::function::tokens_for_diffing;
2+
use gix_hash::ObjectId;
3+
use gix_object::bstr::BString;
4+
use std::num::NonZeroU32;
5+
use std::{
6+
collections::BTreeMap,
7+
ops::{AddAssign, Range, SubAssign},
8+
};
9+
10+
/// The outcome of [`file()`](crate::file()).
11+
#[derive(Debug, Default, Clone)]
12+
pub struct Outcome {
13+
/// One entry in sequential order, to associate a hunk in the blamed file with the source commit (and its lines)
14+
/// that introduced it.
15+
pub entries: Vec<BlameEntry>,
16+
/// A buffer with the file content of the *Blamed File*, ready for tokenization.
17+
pub blob: Vec<u8>,
18+
/// Additional information about the amount of work performed to produce the blame.
19+
pub statistics: Statistics,
20+
}
21+
22+
/// Additional information about the performed operations.
23+
#[derive(Debug, Default, Copy, Clone)]
24+
pub struct Statistics {
25+
/// The amount of commits it traversed until the blame was complete.
26+
pub commits_traversed: usize,
27+
/// The amount of commits whose trees were extracted.
28+
pub commits_to_tree: usize,
29+
/// The amount of trees that were decoded to find the entry of the file to blame.
30+
pub trees_decoded: usize,
31+
/// The amount of fully-fledged tree-diffs to see if the filepath was added, deleted or modified.
32+
pub trees_diffed: usize,
33+
/// The amount of blobs there were compared to each other to learn what changed between commits.
34+
/// Note that in order to diff a blob, one needs to load both versions from the database.
35+
pub blobs_diffed: usize,
36+
}
37+
38+
impl Outcome {
39+
/// Return an iterator over each entry in [`Self::entries`], along with its lines, line by line.
40+
///
41+
/// Note that [`Self::blob`] must be tokenized in exactly the same way as the tokenizer that was used
42+
/// to perform the diffs, which is what this method assures.
43+
pub fn entries_with_lines(&self) -> impl Iterator<Item = (BlameEntry, Vec<BString>)> + '_ {
44+
use gix_diff::blob::intern::TokenSource;
45+
let mut interner = gix_diff::blob::intern::Interner::new(self.blob.len() / 100);
46+
let lines_as_tokens: Vec<_> = tokens_for_diffing(&self.blob)
47+
.tokenize()
48+
.map(|token| interner.intern(token))
49+
.collect();
50+
self.entries.iter().map(move |e| {
51+
(
52+
e.clone(),
53+
lines_as_tokens[e.range_in_blamed_file()]
54+
.iter()
55+
.map(|token| BString::new(interner[*token].into()))
56+
.collect(),
57+
)
58+
})
59+
}
60+
}
61+
62+
/// Describes the offset of a particular hunk relative to the *Blamed File*.
63+
#[derive(Clone, Copy, Debug, PartialEq)]
64+
pub enum Offset {
65+
/// The amount of lines to add.
66+
Added(u32),
67+
/// The amount of lines to remove.
68+
Deleted(u32),
69+
}
70+
71+
impl Offset {
72+
/// Shift the given `range` according to our offset.
73+
pub fn shifted_range(&self, range: &Range<u32>) -> Range<u32> {
74+
match self {
75+
Offset::Added(added) => {
76+
debug_assert!(range.start >= *added, "{self:?} {range:?}");
77+
Range {
78+
start: range.start - added,
79+
end: range.end - added,
80+
}
81+
}
82+
Offset::Deleted(deleted) => Range {
83+
start: range.start + deleted,
84+
end: range.end + deleted,
85+
},
86+
}
87+
}
88+
}
89+
90+
impl AddAssign<u32> for Offset {
91+
fn add_assign(&mut self, rhs: u32) {
92+
match self {
93+
Self::Added(added) => *self = Self::Added(*added + rhs),
94+
Self::Deleted(deleted) => {
95+
if rhs > *deleted {
96+
*self = Self::Added(rhs - *deleted);
97+
} else {
98+
*self = Self::Deleted(*deleted - rhs);
99+
}
100+
}
101+
}
102+
}
103+
}
104+
105+
impl SubAssign<u32> for Offset {
106+
fn sub_assign(&mut self, rhs: u32) {
107+
match self {
108+
Self::Added(added) => {
109+
if rhs > *added {
110+
*self = Self::Deleted(rhs - *added);
111+
} else {
112+
*self = Self::Added(*added - rhs);
113+
}
114+
}
115+
Self::Deleted(deleted) => *self = Self::Deleted(*deleted + rhs),
116+
}
117+
}
118+
}
119+
120+
/// A mapping of a section of the *Blamed File* to the section in a *Source File* that introduced it.
121+
///
122+
/// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally,
123+
/// they have the same content, which is the reason they are in what is returned by [`file()`](crate::file()).
124+
#[derive(Clone, Debug, PartialEq)]
125+
pub struct BlameEntry {
126+
/// The index of the token in the *Blamed File* (typically lines) where this entry begins.
127+
pub start_in_blamed_file: u32,
128+
/// The index of the token in the *Source File* (typically lines) where this entry begins.
129+
///
130+
/// This is possibly offset compared to `start_in_blamed_file`.
131+
pub start_in_source_file: u32,
132+
/// The amount of lines the hunk is spanning.
133+
pub len: NonZeroU32,
134+
/// The commit that introduced the section into the *Source File*.
135+
pub commit_id: ObjectId,
136+
}
137+
138+
impl BlameEntry {
139+
/// Create a new instance.
140+
pub fn new(range_in_blamed_file: Range<u32>, range_in_source_file: Range<u32>, commit_id: ObjectId) -> Self {
141+
debug_assert!(
142+
range_in_blamed_file.end > range_in_blamed_file.start,
143+
"{range_in_blamed_file:?}"
144+
);
145+
debug_assert!(
146+
range_in_source_file.end > range_in_source_file.start,
147+
"{range_in_source_file:?}"
148+
);
149+
debug_assert_eq!(range_in_source_file.len(), range_in_blamed_file.len());
150+
151+
Self {
152+
start_in_blamed_file: range_in_blamed_file.start,
153+
start_in_source_file: range_in_source_file.start,
154+
len: NonZeroU32::new(range_in_blamed_file.len() as u32).expect("BUG: hunks are never empty"),
155+
commit_id,
156+
}
157+
}
158+
}
159+
160+
impl BlameEntry {
161+
/// Return the range of tokens this entry spans in the *Blamed File*.
162+
pub fn range_in_blamed_file(&self) -> Range<usize> {
163+
let start = self.start_in_blamed_file as usize;
164+
start..start + self.len.get() as usize
165+
}
166+
/// Return the range of tokens this entry spans in the *Source File*.
167+
pub fn range_in_source_file(&self) -> Range<usize> {
168+
let start = self.start_in_source_file as usize;
169+
start..start + self.len.get() as usize
170+
}
171+
}
172+
173+
pub(crate) trait LineRange {
174+
fn shift_by(&self, offset: Offset) -> Self;
175+
}
176+
177+
impl LineRange for Range<u32> {
178+
fn shift_by(&self, offset: Offset) -> Self {
179+
offset.shifted_range(self)
180+
}
181+
}
182+
183+
/// Tracks the hunks in the *Blamed File* that are not yet associated with the commit that introduced them.
184+
#[derive(Debug, PartialEq)]
185+
pub struct UnblamedHunk {
186+
/// The range in the file that is being blamed that this hunk represents.
187+
pub range_in_blamed_file: Range<u32>,
188+
/// Maps a commit to the range in a source file (i.e. *Blamed File* at a revision) that is equal to `range_in_blamed_file`.
189+
pub suspects: BTreeMap<ObjectId, Range<u32>>,
190+
}
191+
192+
#[derive(Debug)]
193+
pub(crate) enum Either<T, U> {
194+
Left(T),
195+
Right(U),
196+
}
197+
198+
/// A single change between two blobs, or an unchanged region.
199+
#[derive(Debug, PartialEq)]
200+
pub enum Change {
201+
/// A range of tokens that wasn't changed.
202+
Unchanged(Range<u32>),
203+
/// `(added_line_range, num_deleted_in_before)`
204+
AddedOrReplaced(Range<u32>, u32),
205+
/// `(line_to_start_deletion_at, num_deleted_in_before)`
206+
Deleted(u32, u32),
207+
}

Diff for: ‎gix-blame/tests/blame.rs

+269-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,271 @@
1+
use std::path::PathBuf;
2+
3+
use gix_hash::ObjectId;
4+
use gix_object::bstr;
5+
6+
struct Baseline<'a> {
7+
lines: bstr::Lines<'a>,
8+
}
9+
10+
mod baseline {
11+
use std::path::Path;
12+
13+
use gix_blame::BlameEntry;
14+
use gix_hash::ObjectId;
15+
use gix_ref::bstr::ByteSlice;
16+
17+
use super::Baseline;
18+
19+
// These fields are used by `git` in its porcelain output.
20+
const HEADER_FIELDS: [&str; 12] = [
21+
// https://github.com/git/git/blob/6258f68c3c1092c901337895c864073dcdea9213/builtin/blame.c#L256-L280
22+
"author",
23+
"author-mail",
24+
"author-time",
25+
"author-tz",
26+
"committer",
27+
"committer-mail",
28+
"committer-time",
29+
"committer-tz",
30+
"summary",
31+
"boundary",
32+
// https://github.com/git/git/blob/6258f68c3c1092c901337895c864073dcdea9213/builtin/blame.c#L239-L248
33+
"previous",
34+
"filename",
35+
];
36+
37+
fn is_known_header_field(field: &&str) -> bool {
38+
HEADER_FIELDS.contains(field)
39+
}
40+
41+
impl Baseline<'_> {
42+
pub fn collect(baseline_path: impl AsRef<Path>) -> std::io::Result<Vec<BlameEntry>> {
43+
let content = std::fs::read(baseline_path)?;
44+
45+
Ok(Baseline { lines: content.lines() }.collect())
46+
}
47+
}
48+
49+
impl Iterator for Baseline<'_> {
50+
type Item = BlameEntry;
51+
52+
fn next(&mut self) -> Option<Self::Item> {
53+
let mut ranges = None;
54+
let mut commit_id = gix_hash::Kind::Sha1.null();
55+
let mut skip_lines: u32 = 0;
56+
57+
for line in self.lines.by_ref() {
58+
if line.starts_with(b"\t") {
59+
// Each group consists of a header and one or more lines. We break from the
60+
// loop, thus returning a `BlameEntry` from `next` once we have seen the number
61+
// of lines starting with "\t" as indicated in the group’s header.
62+
skip_lines -= 1;
63+
64+
if skip_lines == 0 {
65+
break;
66+
} else {
67+
continue;
68+
}
69+
}
70+
71+
let fields: Vec<&str> = line.to_str().unwrap().split(' ').collect();
72+
if fields.len() == 4 {
73+
// We’re possibly dealing with a group header.
74+
// If we can’t parse the first field as an `ObjectId`, we know this is not a
75+
// group header, so we continue. This can yield false positives, but for
76+
// testing purposes, we don’t bother.
77+
commit_id = match ObjectId::from_hex(fields[0].as_bytes()) {
78+
Ok(id) => id,
79+
Err(_) => continue,
80+
};
81+
82+
let line_number_in_source_file = fields[1].parse::<u32>().unwrap();
83+
let line_number_in_final_file = fields[2].parse::<u32>().unwrap();
84+
// The last field indicates the number of lines this group contains info for
85+
// (this is not equal to the number of lines in git blame’s porcelain output).
86+
let number_of_lines_in_group = fields[3].parse::<u32>().unwrap();
87+
88+
skip_lines = number_of_lines_in_group;
89+
90+
let source_range =
91+
(line_number_in_source_file - 1)..(line_number_in_source_file + number_of_lines_in_group - 1);
92+
let blame_range =
93+
(line_number_in_final_file - 1)..(line_number_in_final_file + number_of_lines_in_group - 1);
94+
assert!(ranges.is_none(), "should not overwrite existing ranges");
95+
ranges = Some((blame_range, source_range));
96+
} else if !is_known_header_field(&fields[0]) && ObjectId::from_hex(fields[0].as_bytes()).is_err() {
97+
panic!("unexpected line: '{:?}'", line.as_bstr());
98+
}
99+
}
100+
101+
let Some((range_in_blamed_file, range_in_source_file)) = ranges else {
102+
// No new lines were parsed, so we assume the iterator is finished.
103+
return None;
104+
};
105+
Some(BlameEntry::new(range_in_blamed_file, range_in_source_file, commit_id))
106+
}
107+
}
108+
}
109+
110+
struct Fixture {
111+
odb: gix_odb::Handle,
112+
resource_cache: gix_diff::blob::Platform,
113+
commits: Vec<Result<gix_traverse::commit::Info, gix_traverse::commit::topo::Error>>,
114+
}
115+
116+
impl Fixture {
117+
fn new() -> gix_testtools::Result<Fixture> {
118+
Self::for_worktree_path(fixture_path())
119+
}
120+
121+
fn for_worktree_path(worktree_path: PathBuf) -> gix_testtools::Result<Fixture> {
122+
use gix_ref::store::WriteReflog;
123+
124+
let store = gix_ref::file::Store::at(
125+
worktree_path.join(".git"),
126+
gix_ref::store::init::Options {
127+
write_reflog: WriteReflog::Disable,
128+
..Default::default()
129+
},
130+
);
131+
let odb = gix_odb::at(worktree_path.join(".git/objects"))?;
132+
133+
let mut reference = gix_ref::file::Store::find(&store, "HEAD")?;
134+
135+
// Needed for `peel_to_id_in_place`.
136+
use gix_ref::file::ReferenceExt;
137+
138+
let head_id = reference.peel_to_id_in_place(&store, &odb)?;
139+
140+
let commits: Vec<_> = gix_traverse::commit::topo::Builder::from_iters(&odb, [head_id], None::<Vec<ObjectId>>)
141+
.build()?
142+
.collect();
143+
144+
let git_dir = worktree_path.join(".git");
145+
let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default())?;
146+
let stack = gix_worktree::Stack::from_state_and_ignore_case(
147+
worktree_path.clone(),
148+
false,
149+
gix_worktree::stack::State::AttributesAndIgnoreStack {
150+
attributes: Default::default(),
151+
ignore: Default::default(),
152+
},
153+
&index,
154+
index.path_backing(),
155+
);
156+
let capabilities = gix_fs::Capabilities::probe(&git_dir);
157+
let resource_cache = gix_diff::blob::Platform::new(
158+
Default::default(),
159+
gix_diff::blob::Pipeline::new(
160+
gix_diff::blob::pipeline::WorktreeRoots {
161+
old_root: None,
162+
new_root: None,
163+
},
164+
gix_filter::Pipeline::new(Default::default(), Default::default()),
165+
vec![],
166+
gix_diff::blob::pipeline::Options {
167+
large_file_threshold_bytes: 0,
168+
fs: capabilities,
169+
},
170+
),
171+
gix_diff::blob::pipeline::Mode::ToGit,
172+
stack,
173+
);
174+
Ok(Fixture {
175+
odb,
176+
resource_cache,
177+
commits,
178+
})
179+
}
180+
}
181+
182+
macro_rules! mktest {
183+
($name:ident, $case:expr, $number_of_lines:literal) => {
184+
#[test]
185+
fn $name() -> gix_testtools::Result<()> {
186+
let Fixture {
187+
odb,
188+
mut resource_cache,
189+
commits,
190+
} = Fixture::new()?;
191+
192+
let lines_blamed = gix_blame::file(
193+
&odb,
194+
commits,
195+
&mut resource_cache,
196+
format!("{}.txt", $case).as_str().into(),
197+
)?
198+
.entries;
199+
200+
assert_eq!(lines_blamed.len(), $number_of_lines);
201+
202+
let git_dir = fixture_path().join(".git");
203+
let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case)))?;
204+
205+
assert_eq!(baseline.len(), $number_of_lines);
206+
assert_eq!(lines_blamed, baseline);
207+
Ok(())
208+
}
209+
};
210+
}
211+
212+
mktest!(simple_case, "simple", 4);
213+
mktest!(multiline_hunks, "multiline-hunks", 3);
214+
mktest!(deleted_lines, "deleted-lines", 1);
215+
mktest!(deleted_lines_multiple_hunks, "deleted-lines-multiple-hunks", 2);
216+
mktest!(changed_lines, "changed-lines", 1);
217+
mktest!(
218+
changed_line_between_unchanged_lines,
219+
"changed-line-between-unchanged-lines",
220+
3
221+
);
222+
mktest!(added_lines, "added-lines", 2);
223+
mktest!(added_lines_around, "added-lines-around", 3);
224+
mktest!(switched_lines, "switched-lines", 4);
225+
mktest!(added_line_before_changed_line, "added-line-before-changed-line", 3);
226+
mktest!(same_line_changed_twice, "same-line-changed-twice", 2);
227+
mktest!(coalesce_adjacent_hunks, "coalesce-adjacent-hunks", 1);
228+
229+
mktest!(resolved_conflict, "resolved-conflict", 2);
230+
mktest!(file_in_one_chain_of_ancestors, "file-in-one-chain-of-ancestors", 1);
231+
mktest!(
232+
different_file_in_another_chain_of_ancestors,
233+
"different-file-in-another-chain-of-ancestors",
234+
1
235+
);
236+
mktest!(file_only_changed_in_branch, "file-only-changed-in-branch", 2);
237+
238+
/// As of 2024-09-24, these tests are expected to fail.
239+
///
240+
/// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904
1241
#[test]
2-
fn it_works() {
3-
let _worktree = gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap();
242+
#[should_panic = "empty-lines-myers"]
243+
fn diff_disparity() {
244+
for case in ["empty-lines-myers", "empty-lines-histogram"] {
245+
let Fixture {
246+
odb,
247+
mut resource_cache,
248+
commits,
249+
} = Fixture::new().unwrap();
250+
251+
let lines_blamed = gix_blame::file(
252+
&odb,
253+
commits,
254+
&mut resource_cache,
255+
format!("{case}.txt").as_str().into(),
256+
)
257+
.unwrap()
258+
.entries;
259+
260+
assert_eq!(lines_blamed.len(), 5);
261+
262+
let git_dir = fixture_path().join(".git");
263+
let baseline = Baseline::collect(git_dir.join(format!("{case}.baseline"))).unwrap();
264+
265+
assert_eq!(lines_blamed, baseline, "{case}");
266+
}
267+
}
268+
269+
fn fixture_path() -> PathBuf {
270+
gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap()
4271
}

Diff for: ‎gix-blame/tests/fixtures/make_blame_repo.sh

+199-16
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,206 @@
11
#!/usr/bin/env bash
22
set -eu -o pipefail
33

4-
54
git init -q
5+
git config --local diff.algorithm histogram
6+
67
git config merge.ff false
78

89
git checkout -q -b main
9-
git commit -q --allow-empty -m c1
10-
git tag at-c1
11-
git commit -q --allow-empty -m c2
12-
git commit -q --allow-empty -m c3
13-
git commit -q --allow-empty -m c4
14-
15-
git checkout -q -b branch1
16-
git commit -q --allow-empty -m b1c1
17-
git tag at-b1c1
18-
git commit -q --allow-empty -m b1c2
19-
20-
git checkout -q main
21-
git commit -q --allow-empty -m c5
22-
git tag at-c5
23-
git merge branch1 -m m1b1
10+
11+
echo "line 1" >> simple.txt
12+
git add simple.txt
13+
git commit -q -m c1
14+
15+
echo -e "line 1\nline 2\nline 3" >> multiline-hunks.txt
16+
git add multiline-hunks.txt
17+
git commit -q -m c1.1
18+
19+
echo -e "line 1\nline 2" > changed-lines.txt
20+
echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> changed-line-between-unchanged-lines.txt
21+
git add changed-lines.txt
22+
git add changed-line-between-unchanged-lines.txt
23+
git commit -q -m c1.2
24+
25+
echo "line 2" >> added-lines.txt
26+
echo "line 2" >> added-lines-around.txt
27+
echo -e "line 1\nline 2" > coalesce-adjacent-hunks.txt
28+
git add added-lines.txt
29+
git add added-lines-around.txt
30+
git add coalesce-adjacent-hunks.txt
31+
git commit -q -m c1.3
32+
33+
echo "line 2" >> simple.txt
34+
git add simple.txt
35+
git commit -q -m c2
36+
37+
echo -e "line 4\nline 5\nline 6" >> multiline-hunks.txt
38+
git add multiline-hunks.txt
39+
git commit -q -m c2.1
40+
41+
echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> deleted-lines.txt
42+
echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> deleted-lines-multiple-hunks.txt
43+
git add deleted-lines.txt
44+
git add deleted-lines-multiple-hunks.txt
45+
git commit -q -m c2.2
46+
47+
echo -e "line 1\nline 2\nline 3" > added-line-before-changed-line.txt
48+
git add added-line-before-changed-line.txt
49+
git commit -q -m c2.3
50+
51+
echo -e "line 1\nline 2" > same-line-changed-twice.txt
52+
echo -e "line 1\nline in between\nline 2" > coalesce-adjacent-hunks.txt
53+
git add same-line-changed-twice.txt
54+
git add coalesce-adjacent-hunks.txt
55+
git commit -q -m c2.4
56+
57+
echo "line 3" >> simple.txt
58+
git add simple.txt
59+
git commit -q -m c3
60+
61+
echo -e "line 3\nline 4" > deleted-lines.txt
62+
echo -e "line 2\nline 4" > deleted-lines-multiple-hunks.txt
63+
git add deleted-lines.txt
64+
git add deleted-lines-multiple-hunks.txt
65+
git commit -q -m c3.1
66+
67+
echo -e "line 3\nline 4" > changed-lines.txt
68+
echo -e "line 1\nline 2\nline 3 changed\nline 4\nline 5\nline 6" > changed-line-between-unchanged-lines.txt
69+
git add changed-lines.txt
70+
git add changed-line-between-unchanged-lines.txt
71+
git commit -q -m c3.2
72+
73+
echo -e "line 2\nline 3" > added-line-before-changed-line.txt
74+
echo -e "line 1\nline 2" > coalesce-adjacent-hunks.txt
75+
git add added-line-before-changed-line.txt
76+
git add coalesce-adjacent-hunks.txt
77+
git commit -q -m c3.3
78+
79+
echo -e "line 1\nline 2 changed" > same-line-changed-twice.txt
80+
git add same-line-changed-twice.txt
81+
git commit -q -m c3.4
82+
83+
echo "line 4" >> simple.txt
84+
git add simple.txt
85+
git commit -q -m c4
86+
87+
echo -e "line 7\nline 8\nline 9" >> multiline-hunks.txt
88+
git add multiline-hunks.txt
89+
git commit -q -m c4.1
90+
91+
echo -e "line 1\nline 3\nline 2\nline 4" > switched-lines.txt
92+
git add switched-lines.txt
93+
git commit -q -m c4.2
94+
95+
echo -e "line 2 changed\nline 3" > added-line-before-changed-line.txt
96+
git add added-line-before-changed-line.txt
97+
git commit -q -m c4.3
98+
99+
echo -e "line 1\nline 2 changed a second time" > same-line-changed-twice.txt
100+
git add same-line-changed-twice.txt
101+
git commit -q -m c4.4
102+
103+
echo -e " line 1\n\n line 2\n\n line 3" > empty-lines-histogram.txt
104+
cp empty-lines-histogram.txt empty-lines-myers.txt
105+
git add empty-lines-histogram.txt empty-lines-myers.txt
106+
git commit -q -m c4.5
107+
108+
echo -e "line 0\nline 1\nline 2" > added-lines.txt
109+
echo -e "line 0\nline 1\nline 2\nline 3" > added-lines-around.txt
110+
git add added-lines.txt
111+
git add added-lines-around.txt
112+
git commit -q -m c5
113+
114+
echo -e "line 4" > deleted-lines.txt
115+
git add deleted-lines.txt
116+
git commit -q -m c5.1
117+
118+
echo -e "line 1\nline 2\nline 3\nline 4" > switched-lines.txt
119+
git add switched-lines.txt
120+
git commit -q -m c5.2
121+
122+
echo -e "line 1\nline 2 changed\nline 3" > added-line-before-changed-line.txt
123+
git add added-line-before-changed-line.txt
124+
git commit -q -m c5.3
125+
126+
echo -e " line 1\n\n line in between\n\n line 2\n\n line in between\n\n line 3" > empty-lines-histogram.txt
127+
cp empty-lines-histogram.txt empty-lines-myers.txt
128+
git add empty-lines-histogram.txt empty-lines-myers.txt
129+
git commit -q -m c5.4
130+
131+
# The commit history created by the commits above this line is linear, it only
132+
# contains commits that have exactly one parent.
133+
# Below this line, there’s also commits that have more than one parent.
134+
135+
echo -e "line 1 original\nline 2\n line 3" > resolved-conflict.txt
136+
git add resolved-conflict.txt
137+
git commit -q -m c6
138+
139+
echo -e "line 1 changed\nline 2\n line 3" > resolved-conflict.txt
140+
git add resolved-conflict.txt
141+
git commit -q -m c7
142+
143+
git checkout -b different-branch-to-create-a-conflict
144+
git reset --hard HEAD~1
145+
146+
echo -e "line 1 changed in a different way\nline 2\n line 3" > resolved-conflict.txt
147+
git add resolved-conflict.txt
148+
git commit -q -m c8
149+
150+
git checkout main
151+
git merge different-branch-to-create-a-conflict || true
152+
153+
echo -e "line 1 conflict resolved\nline 2\n line 3" > resolved-conflict.txt
154+
git add resolved-conflict.txt
155+
git commit -q -m c9
156+
157+
echo -e "line 1\nline 2\n line 3" > file-in-one-chain-of-ancestors.txt
158+
git add file-in-one-chain-of-ancestors.txt
159+
git commit -q -m c10
160+
161+
git checkout -b different-branch-that-does-not-contain-file
162+
git reset --hard HEAD~1
163+
164+
echo -e "line 4\nline 5\n line 6" > different-file-in-another-chain-of-ancestors.txt
165+
git add different-file-in-another-chain-of-ancestors.txt
166+
git commit -q -m c11
167+
168+
git checkout main
169+
git merge different-branch-that-does-not-contain-file || true
170+
171+
echo -e "line 1\nline 2\n line 3" > file-only-changed-in-branch.txt
172+
git add file-only-changed-in-branch.txt
173+
git commit -q -m c12
174+
175+
git checkout -b branch-that-has-one-commit
176+
177+
echo -e "line 1 changed\nline 2\n line 3" > file-only-changed-in-branch.txt
178+
git add file-only-changed-in-branch.txt
179+
git commit -q -m c13
180+
181+
git checkout main
182+
git merge branch-that-has-one-commit || true
183+
184+
git blame --porcelain simple.txt > .git/simple.baseline
185+
git blame --porcelain multiline-hunks.txt > .git/multiline-hunks.baseline
186+
git blame --porcelain deleted-lines.txt > .git/deleted-lines.baseline
187+
git blame --porcelain deleted-lines-multiple-hunks.txt > .git/deleted-lines-multiple-hunks.baseline
188+
git blame --porcelain changed-lines.txt > .git/changed-lines.baseline
189+
git blame --porcelain changed-line-between-unchanged-lines.txt > .git/changed-line-between-unchanged-lines.baseline
190+
git blame --porcelain added-lines.txt > .git/added-lines.baseline
191+
git blame --porcelain added-lines-around.txt > .git/added-lines-around.baseline
192+
git blame --porcelain switched-lines.txt > .git/switched-lines.baseline
193+
git blame --porcelain added-line-before-changed-line.txt > .git/added-line-before-changed-line.baseline
194+
git blame --porcelain same-line-changed-twice.txt > .git/same-line-changed-twice.baseline
195+
git blame --porcelain coalesce-adjacent-hunks.txt > .git/coalesce-adjacent-hunks.baseline
196+
197+
git blame --porcelain resolved-conflict.txt > .git/resolved-conflict.baseline
198+
git blame --porcelain file-in-one-chain-of-ancestors.txt > .git/file-in-one-chain-of-ancestors.baseline
199+
git blame --porcelain different-file-in-another-chain-of-ancestors.txt > .git/different-file-in-another-chain-of-ancestors.baseline
200+
git blame --porcelain file-only-changed-in-branch.txt > .git/file-only-changed-in-branch.baseline
201+
202+
git blame --porcelain empty-lines-histogram.txt > .git/empty-lines-histogram.baseline
203+
204+
git config --local diff.algorithm myers
205+
206+
git blame --porcelain empty-lines-myers.txt > .git/empty-lines-myers.baseline

Diff for: ‎gix/Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ blob-diff = ["gix-diff/blob", "attributes"]
141141
## Add functions to specifically merge files, using the standard three-way merge that git offers.
142142
merge = ["tree-editor", "blob-diff", "dep:gix-merge", "attributes"]
143143

144+
## Add blame command similar to `git blame`.
145+
blame = ["dep:gix-blame"]
146+
144147
## Make it possible to turn a tree into a stream of bytes, which can be decoded to entries and turned into various other formats.
145148
worktree-stream = ["gix-worktree-stream", "attributes"]
146149

@@ -371,6 +374,7 @@ gix-command = { version = "^0.4.0", path = "../gix-command", optional = true }
371374

372375
gix-worktree-stream = { version = "^0.18.0", path = "../gix-worktree-stream", optional = true }
373376
gix-archive = { version = "^0.18.0", path = "../gix-archive", default-features = false, optional = true }
377+
gix-blame = { version= "^0.0.0", path ="../gix-blame", optional = true }
374378

375379
# For communication with remotes
376380
gix-protocol = { version = "^0.47.0", path = "../gix-protocol" }

Diff for: ‎gix/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@
9595
pub use gix_actor as actor;
9696
#[cfg(feature = "attributes")]
9797
pub use gix_attributes as attrs;
98+
#[cfg(feature = "blame")]
99+
pub use gix_blame as blame;
98100
#[cfg(feature = "command")]
99101
pub use gix_command as command;
100102
pub use gix_commitgraph as commitgraph;

Diff for: ‎src/plumbing/main.rs

+11
Original file line numberDiff line numberDiff line change
@@ -1533,6 +1533,17 @@ pub fn main() -> Result<()> {
15331533
},
15341534
),
15351535
},
1536+
Subcommands::Blame { statistics, file } => prepare_and_run(
1537+
"blame",
1538+
trace,
1539+
verbose,
1540+
progress,
1541+
progress_keep_open,
1542+
None,
1543+
move |_progress, out, err| {
1544+
core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out, statistics.then_some(err))
1545+
},
1546+
),
15361547
Subcommands::Completions { shell, out_dir } => {
15371548
let mut app = Args::command();
15381549

Diff for: ‎src/plumbing/options/mod.rs

+8
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,14 @@ pub enum Subcommands {
151151
/// Subcommands that need no git repository to run.
152152
#[clap(subcommand)]
153153
Free(free::Subcommands),
154+
/// Blame lines in a file
155+
Blame {
156+
/// Print additional statistics to help understanding performance.
157+
#[clap(long, short = 's')]
158+
statistics: bool,
159+
/// The file to create the blame information for.
160+
file: std::ffi::OsString,
161+
},
154162
/// Generate shell completions to stdout or a directory.
155163
#[clap(visible_alias = "generate-completions", visible_alias = "shell-completions")]
156164
Completions {

0 commit comments

Comments
 (0)
Please sign in to comment.