Skip to content

Commit e1c4e66

Browse files
committed
Add tree::Editor::cursor() to allow speedier creation of sub-trees.
1 parent 14dfcf0 commit e1c4e66

File tree

3 files changed

+345
-88
lines changed

3 files changed

+345
-88
lines changed

Diff for: gix-object/src/tree/editor.rs

+168-79
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,29 @@
1-
use crate::tree::EntryKind;
1+
use crate::tree::{Editor, EntryKind};
22
use crate::{tree, Tree};
33
use bstr::{BStr, BString, ByteSlice, ByteVec};
44
use gix_hash::ObjectId;
55
use gix_hashtable::hash_map::Entry;
66
use std::cmp::Ordering;
77

8-
/// The state needed to apply edits instantly to in-memory trees.
9-
///
10-
/// It's made so that each tree is looked at in the object database at most once, and held in memory for
11-
/// all edits until everything is flushed to write all changed trees.
12-
///
13-
/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well
14-
/// with some penalties.
15-
///
16-
/// ### Note
17-
///
18-
/// For reasons of efficiency, internally a SHA1 based hashmap is used to avoid having to store full paths
19-
/// to each edited tree. The chance of collision is low, but could be engineered to overwrite or write into
20-
/// an unintended tree.
21-
#[doc(alias = "TreeUpdateBuilder", alias = "git2")]
22-
pub struct Editor<'a> {
23-
/// A way to lookup trees.
24-
find: &'a dyn crate::FindExt,
25-
/// All trees we currently hold in memory. Each of these may change while adding and removing entries.
26-
/// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
27-
/// dropped when writing at the latest.
28-
trees: gix_hashtable::HashMap<ObjectId, Tree>,
29-
/// A buffer to build up paths when finding the tree to edit.
30-
path_buf: BString,
31-
/// Our buffer for storing tree-data in, right before decoding it.
32-
tree_buf: Vec<u8>,
8+
/// A way to constrain all [tree-edits](Editor) to a given subtree.
9+
pub struct Cursor<'a, 'find> {
10+
/// The underlying editor
11+
parent: &'a mut Editor<'find>,
12+
/// Our own location, used as prefix for all operations.
13+
/// Note that it's assumed to always contain a tree.
14+
prefix: BString,
3315
}
3416

3517
/// Lifecycle
3618
impl<'a> Editor<'a> {
3719
/// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing
3820
/// trees when edits are made. Each tree will only be looked-up once and then edited in place from
3921
/// that point on.
40-
pub fn new(root: Tree, find: &'a dyn crate::FindExt) -> Self {
22+
/// `object_hash` denotes the kind of hash to create.
23+
pub fn new(root: Tree, find: &'a dyn crate::FindExt, object_hash: gix_hash::Kind) -> Self {
4124
Editor {
4225
find,
26+
object_hash,
4327
trees: gix_hashtable::HashMap::from_iter(Some((empty_path_hash(), root))),
4428
path_buf: Vec::with_capacity(256).into(),
4529
tree_buf: Vec::with_capacity(512),
@@ -60,15 +44,63 @@ impl<'a> Editor<'a> {
6044
/// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the
6145
/// just-written root-tree.
6246
/// If this is not desired, use [set_root()](Self::set_root()).
63-
pub fn write<E>(&mut self, mut out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
47+
pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
48+
self.path_buf.clear();
49+
self.write_at_pathbuf(out, WriteMode::Normal)
50+
}
51+
52+
/// Remove the entry at `rela_path`, loading all trees on the path accordingly.
53+
/// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all.
54+
pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, crate::find::existing_object::Error>
55+
where
56+
I: IntoIterator<Item = C>,
57+
C: AsRef<BStr>,
58+
{
59+
self.path_buf.clear();
60+
self.upsert_or_remove_at_pathbuf(rela_path, None)
61+
}
62+
63+
/// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree,
64+
/// like `a/b/c`. Names are matched case-sensitively.
65+
///
66+
/// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database
67+
/// or will be made available at a later point to assure the integrity of the produced tree.
68+
///
69+
/// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries
70+
/// will be inserted into them instead.
71+
///
72+
/// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading
73+
/// through them will not be considered a problem.
74+
///
75+
/// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed
76+
/// in Git trees.
77+
pub fn upsert<I, C>(
78+
&mut self,
79+
rela_path: I,
80+
kind: EntryKind,
81+
id: ObjectId,
82+
) -> Result<&mut Self, crate::find::existing_object::Error>
83+
where
84+
I: IntoIterator<Item = C>,
85+
C: AsRef<BStr>,
86+
{
87+
self.path_buf.clear();
88+
self.upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))
89+
}
90+
91+
fn write_at_pathbuf<E>(
92+
&mut self,
93+
mut out: impl FnMut(&Tree) -> Result<ObjectId, E>,
94+
mode: WriteMode,
95+
) -> Result<ObjectId, E> {
6496
assert_ne!(self.trees.len(), 0, "there is at least the root tree");
6597

6698
// back is for children, front is for parents.
6799
let mut parents = vec![(
68100
None::<usize>,
69101
BString::default(),
70102
self.trees
71-
.remove(&empty_path_hash())
103+
.remove(&path_hash(&self.path_buf))
72104
.expect("root tree is always present"),
73105
)];
74106
let mut children = Vec::new();
@@ -106,8 +138,13 @@ impl<'a> Editor<'a> {
106138

107139
// There may be left-over trees if they are replaced with blobs for example.
108140
let root_tree_id = out(&tree)?;
109-
self.trees.clear();
110-
self.trees.insert(empty_path_hash(), tree);
141+
match mode {
142+
WriteMode::Normal => {
143+
self.trees.clear();
144+
}
145+
WriteMode::FromCursor => {}
146+
}
147+
self.trees.insert(path_hash(&self.path_buf), tree);
111148
return Ok(root_tree_id);
112149
} else if !tree.entries.is_empty() {
113150
out(&tree)?;
@@ -120,56 +157,21 @@ impl<'a> Editor<'a> {
120157
unreachable!("we exit as soon as everything is consumed")
121158
}
122159

123-
/// Remove the entry at `rela_path`, loading all trees on the path accordingly.
124-
/// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all.
125-
pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, crate::find::existing_object::Error>
126-
where
127-
I: IntoIterator<Item = C>,
128-
C: AsRef<BStr>,
129-
{
130-
self.upsert_or_remove(rela_path, None)
131-
}
132-
133-
/// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree,
134-
/// like `a/b/c`. Names are matched case-sensitively.
135-
///
136-
/// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database
137-
/// or will be made available at a later point to assure the integrity of the produced tree.
138-
///
139-
/// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries
140-
/// will be inserted into them instead.
141-
///
142-
/// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading
143-
/// through them will not be considered a problem.
144-
///
145-
/// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed
146-
/// in Git trees.
147-
pub fn upsert<I, C>(
160+
fn upsert_or_remove_at_pathbuf<I, C>(
148161
&mut self,
149162
rela_path: I,
150-
kind: EntryKind,
151-
id: ObjectId,
163+
kind_and_id: Option<(EntryKind, ObjectId, UpsertMode)>,
152164
) -> Result<&mut Self, crate::find::existing_object::Error>
153165
where
154166
I: IntoIterator<Item = C>,
155167
C: AsRef<BStr>,
156168
{
157-
self.upsert_or_remove(rela_path, Some((kind, id)))
158-
}
159-
160-
fn upsert_or_remove<I, C>(
161-
&mut self,
162-
rela_path: I,
163-
kind_and_id: Option<(EntryKind, ObjectId)>,
164-
) -> Result<&mut Self, crate::find::existing_object::Error>
165-
where
166-
I: IntoIterator<Item = C>,
167-
C: AsRef<BStr>,
168-
{
169-
let mut cursor = self.trees.get_mut(&empty_path_hash()).expect("root is always present");
170-
self.path_buf.clear();
169+
let mut cursor = self
170+
.trees
171+
.get_mut(&path_hash(&self.path_buf))
172+
.expect("root is always present");
171173
let mut rela_path = rela_path.into_iter().peekable();
172-
let new_kind_is_tree = kind_and_id.map_or(false, |(kind, _)| kind == EntryKind::Tree);
174+
let new_kind_is_tree = kind_and_id.map_or(false, |(kind, _, _)| kind == EntryKind::Tree);
173175
while let Some(name) = rela_path.next() {
174176
let name = name.as_ref();
175177
let is_last = rela_path.peek().is_none();
@@ -206,7 +208,7 @@ impl<'a> Editor<'a> {
206208
}
207209
}
208210
}
209-
Some((kind, id)) => {
211+
Some((kind, id, _mode)) => {
210212
let entry = &mut cursor.entries[idx];
211213
if is_last {
212214
// unconditionally overwrite what's there.
@@ -229,7 +231,7 @@ impl<'a> Editor<'a> {
229231
}
230232
Err(insertion_idx) => match kind_and_id {
231233
None => break,
232-
Some((kind, id)) => {
234+
Some((kind, id, _mode)) => {
233235
cursor.entries.insert(
234236
insertion_idx,
235237
tree::Entry {
@@ -238,17 +240,14 @@ impl<'a> Editor<'a> {
238240
oid: if is_last { id } else { id.kind().null() },
239241
},
240242
);
241-
if is_last {
242-
break;
243-
}
244243
None
245244
}
246245
},
247246
};
248247
if needs_sorting {
249248
cursor.entries.sort();
250249
}
251-
if is_last {
250+
if is_last && kind_and_id.map_or(false, |(_, _, mode)| mode == UpsertMode::Normal) {
252251
break;
253252
}
254253
push_path_component(&mut self.path_buf, name);
@@ -279,6 +278,96 @@ impl<'a> Editor<'a> {
279278
}
280279
}
281280

281+
mod cursor {
282+
use crate::tree::editor::{Cursor, UpsertMode, WriteMode};
283+
use crate::tree::{Editor, EntryKind};
284+
use crate::Tree;
285+
use bstr::{BStr, BString};
286+
use gix_hash::ObjectId;
287+
288+
/// Cursor handling
289+
impl<'a> Editor<'a> {
290+
/// Turn ourselves as a cursor, which points to the same tree as the editor.
291+
///
292+
/// This is useful if a method takes a [`Cursor`], not an [`Editor`].
293+
pub fn to_cursor(&mut self) -> Cursor<'_, 'a> {
294+
Cursor {
295+
parent: self,
296+
prefix: BString::default(),
297+
}
298+
}
299+
300+
/// Create a cursor at the given `rela_path`, which must be a tree or is turned into a tree as its own edit.
301+
///
302+
/// The returned cursor will then allow applying edits to the tree at `rela_path` as root.
303+
/// If `rela_path` is a single empty string, it is equivalent to using the current instance itself.
304+
pub fn cursor_at<I, C>(&mut self, rela_path: I) -> Result<Cursor<'_, 'a>, crate::find::existing_object::Error>
305+
where
306+
I: IntoIterator<Item = C>,
307+
C: AsRef<BStr>,
308+
{
309+
self.path_buf.clear();
310+
self.upsert_or_remove_at_pathbuf(
311+
rela_path,
312+
Some((EntryKind::Tree, self.object_hash.null(), UpsertMode::AssureTreeOnly)),
313+
)?;
314+
Ok(Cursor {
315+
prefix: self.path_buf.clone(), /* set during the upsert call */
316+
parent: self,
317+
})
318+
}
319+
}
320+
321+
impl<'a, 'find> Cursor<'a, 'find> {
322+
/// Like [`Editor::upsert()`], but with the constraint of only editing in this cursor's tree.
323+
pub fn upsert<I, C>(
324+
&mut self,
325+
rela_path: I,
326+
kind: EntryKind,
327+
id: ObjectId,
328+
) -> Result<&mut Self, crate::find::existing_object::Error>
329+
where
330+
I: IntoIterator<Item = C>,
331+
C: AsRef<BStr>,
332+
{
333+
self.parent.path_buf.clone_from(&self.prefix);
334+
self.parent
335+
.upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))?;
336+
Ok(self)
337+
}
338+
339+
/// Like [`crate()`], but with the constraint of only editing in this cursor's tree.
340+
pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, crate::find::existing_object::Error>
341+
where
342+
I: IntoIterator<Item = C>,
343+
C: AsRef<BStr>,
344+
{
345+
self.parent.path_buf.clone_from(&self.prefix);
346+
self.parent.upsert_or_remove_at_pathbuf(rela_path, None)?;
347+
Ok(self)
348+
}
349+
350+
/// Like [`Editor::write()`], but will write only the subtree of the cursor.
351+
pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
352+
self.parent.path_buf.clone_from(&self.prefix);
353+
self.parent.write_at_pathbuf(out, WriteMode::FromCursor)
354+
}
355+
}
356+
}
357+
358+
#[derive(Copy, Clone, Eq, PartialEq)]
359+
enum UpsertMode {
360+
Normal,
361+
/// Only make sure there is a tree at the given location (requires kind tree and null-id)
362+
AssureTreeOnly,
363+
}
364+
365+
enum WriteMode {
366+
Normal,
367+
/// Perform less cleanup to assure parent-editor still stays intact
368+
FromCursor,
369+
}
370+
282371
fn cmp_entry_with_name(a: &tree::Entry, filename: &BStr, is_tree: bool) -> Ordering {
283372
let common = a.filename.len().min(filename.len());
284373
a.filename[..common].cmp(&filename[..common]).then_with(|| {

Diff for: gix-object/src/tree/mod.rs

+35-5
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,47 @@
1-
use std::cmp::Ordering;
2-
31
use crate::{
42
bstr::{BStr, BString},
5-
tree,
3+
tree, Tree,
64
};
5+
use gix_hash::ObjectId;
6+
use std::cmp::Ordering;
77

8-
mod editor;
9-
pub use editor::Editor;
8+
///
9+
pub mod editor;
1010

1111
mod ref_iter;
1212
///
1313
pub mod write;
1414

15+
/// The state needed to apply edits instantly to in-memory trees.
16+
///
17+
/// It's made so that each tree is looked at in the object database at most once, and held in memory for
18+
/// all edits until everything is flushed to write all changed trees.
19+
///
20+
/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well
21+
/// with some penalties.
22+
///
23+
/// ### Note
24+
///
25+
/// For reasons of efficiency, internally a SHA1 based hashmap is used to avoid having to store full paths
26+
/// to each edited tree. The chance of collision is low, but could be engineered to overwrite or write into
27+
/// an unintended tree.
28+
#[doc(alias = "TreeUpdateBuilder", alias = "git2")]
29+
#[derive(Clone)]
30+
pub struct Editor<'a> {
31+
/// A way to lookup trees.
32+
find: &'a dyn crate::FindExt,
33+
/// The kind of hashes to produce
34+
object_hash: gix_hash::Kind,
35+
/// All trees we currently hold in memory. Each of these may change while adding and removing entries.
36+
/// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
37+
/// dropped when writing at the latest.
38+
trees: gix_hashtable::HashMap<ObjectId, Tree>,
39+
/// A buffer to build up paths when finding the tree to edit.
40+
path_buf: BString,
41+
/// Our buffer for storing tree-data in, right before decoding it.
42+
tree_buf: Vec<u8>,
43+
}
44+
1545
/// The mode of items storable in a tree, similar to the file mode on a unix file system.
1646
///
1747
/// Used in [`mutable::Entry`][crate::tree::Entry] and [`EntryRef`].

0 commit comments

Comments
 (0)