Skip to content

Commit d9e74ff

Browse files
committed
a basic command to perform a corpus run
1 parent 12f4cec commit d9e74ff

File tree

6 files changed

+162
-5
lines changed

6 files changed

+162
-5
lines changed

Diff for: Cargo.toml

+7-4
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,12 @@ max = ["max-control", "fast", "gitoxide-core-blocking-client", "http-client-curl
4444
max-pure = ["max-control", "gix-features/rustsha1", "gix-features/zlib-rust-backend", "http-client-reqwest", "gitoxide-core-blocking-client" ]
4545

4646
## Like `max`, but with more control for configuration. See the *Package Maintainers* headline for more information.
47-
max-control = ["fast-safe", "pretty-cli", "gitoxide-core-tools-query", "gitoxide-core-tools", "prodash-render-line", "prodash-render-tui", "prodash/render-line-autoconfigure", "gix/regex" ]
47+
max-control = ["fast-safe", "pretty-cli", "gitoxide-core-tools-query", "gitoxide-core-tools-corpus", "gitoxide-core-tools", "prodash-render-line", "prodash-render-tui", "prodash/render-line-autoconfigure", "gix/regex" ]
4848

4949
## All of the good stuff, with less fanciness for smaller binaries.
5050
##
5151
## As fast as possible, progress line rendering, all transports based on their most mature implementation (HTTP), all `ein` tools, CLI colors and local-time support, JSON output.
52-
lean = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools-query", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line" ]
52+
lean = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools-query", "gitoxide-core-tools-corpus", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line" ]
5353

5454
## The smallest possible build, best suitable for small single-core machines.
5555
##
@@ -67,7 +67,7 @@ small = ["pretty-cli", "gix-features/rustsha1", "gix-features/zlib-rust-backend"
6767
##
6868
## Due to async client-networking not being implemented for most transports, this one supports only the 'git+tcp' and HTTP transport.
6969
## It uses, however, a fully asynchronous networking implementation which can serve a real-world example on how to implement custom async transports.
70-
lean-async = ["fast", "pretty-cli", "gitoxide-core-tools", "gitoxide-core-tools-query", "gitoxide-core-async-client", "prodash-render-line"]
70+
lean-async = ["fast", "pretty-cli", "gitoxide-core-tools", "gitoxide-core-tools-query", "gitoxide-core-tools-corpus", "gitoxide-core-async-client", "prodash-render-line"]
7171

7272
#! ### Package Maintainers
7373
#! `*-control` features leave it to you to configure C libraries, involving choices for `zlib`, ! hashing and transport implementation.
@@ -128,7 +128,10 @@ cache-efficiency-debug = ["gix-features/cache-efficiency-debug"]
128128
gitoxide-core-tools = ["gitoxide-core/organize", "gitoxide-core/estimate-hours"]
129129

130130
## A program to perform analytics on a `git` repository, using an auto-maintained sqlite database
131-
gitoxide-core-tools-query = ["gitoxide-core-tools", "gitoxide-core/query"]
131+
gitoxide-core-tools-query = ["gitoxide-core/query"]
132+
133+
## A program to run algorithms on a corpus of repositories, recording each run for later comparison.
134+
gitoxide-core-tools-corpus = ["gitoxide-core/corpus"]
132135

133136
#! ### Building Blocks for mutually exclusive networking
134137
#! Blocking and async features are mutually exclusive and cause a compile-time error. This also means that `cargo … --all-features` will fail.

Diff for: gitoxide-core/Cargo.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ organize = ["dep:gix-url", "dep:jwalk"]
2121
estimate-hours = ["dep:itertools", "dep:fs-err", "dep:crossbeam-channel", "dep:smallvec"]
2222
## Gather information about repositories and store it in a database for easy querying.
2323
query = ["dep:rusqlite"]
24+
## Run algorithms on a corpus of repositories and store their results for later comparison and intelligence gathering.
25+
corpus = ["dep:rusqlite"]
2426

2527
#! ### Mutually Exclusive Networking
2628
#! If both are set, _blocking-client_ will take precedence, allowing `--all-features` to be used.
@@ -66,7 +68,7 @@ fs-err = { version = "2.6.0", optional = true }
6668
crossbeam-channel = { version = "0.5.6", optional = true }
6769
smallvec = { version = "1.10.0", optional = true }
6870

69-
# for 'query'
71+
# for 'query' and 'corpus'
7072
rusqlite = { version = "0.29.0", optional = true, features = ["bundled"] }
7173

7274
# for svg graph output

Diff for: gitoxide-core/src/corpus/mod.rs

+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
pub struct Engine<P> {
2+
progress: P,
3+
con: rusqlite::Connection,
4+
}
5+
6+
pub mod engine {
7+
use crate::corpus::Engine;
8+
use anyhow::Context;
9+
use std::path::PathBuf;
10+
11+
impl<P> Engine<P>
12+
where
13+
P: gix::Progress,
14+
{
15+
/// Open the corpus DB or create it.
16+
pub fn open_or_create(db: PathBuf, progress: P) -> anyhow::Result<Engine<P>> {
17+
let con = crate::corpus::db::create(db).context("Could not open or create database")?;
18+
Ok(Engine { progress, con })
19+
}
20+
21+
/// Run on the existing set of repositories we have already seen or obtain them from `path` if there is none yet.
22+
pub fn run(&self, _path: PathBuf) -> anyhow::Result<()> {
23+
todo!()
24+
}
25+
}
26+
}
27+
28+
pub mod db {
29+
use anyhow::bail;
30+
use rusqlite::{params, OptionalExtension};
31+
32+
/// A version to be incremented whenever the database layout is changed, to refresh it automatically.
33+
const VERSION: usize = 1;
34+
35+
pub fn create(path: impl AsRef<std::path::Path>) -> anyhow::Result<rusqlite::Connection> {
36+
let path = path.as_ref();
37+
let con = rusqlite::Connection::open(path)?;
38+
let meta_table = r#"
39+
CREATE TABLE if not exists meta(
40+
version int
41+
)"#;
42+
con.execute_batch(meta_table)?;
43+
let version: Option<usize> = con.query_row("SELECT version FROM meta", [], |r| r.get(0)).optional()?;
44+
match version {
45+
None => {
46+
con.execute("INSERT into meta(version) values(?)", params![VERSION])?;
47+
}
48+
Some(version) if version != VERSION => match con.close() {
49+
Ok(()) => {
50+
bail!("Cannot handle database with version {version}, cannot yet migrate to {VERSION} - maybe migrate by hand?");
51+
}
52+
Err((_, err)) => return Err(err.into()),
53+
},
54+
_ => {}
55+
}
56+
con.execute_batch(
57+
r#"
58+
CREATE TABLE if not exists runner(
59+
vendor text,
60+
brand text,
61+
host_name text, -- this is just to help ID the runner
62+
UNIQUE (vendor, brand)
63+
)
64+
"#,
65+
)?;
66+
con.execute_batch(
67+
r#"
68+
CREATE TABLE if not exists corpus(
69+
root text UNIQUE -- the root path of all repositories we want to consider, as canonicalized path
70+
)
71+
"#,
72+
)?;
73+
con.execute_batch(
74+
r#"
75+
CREATE TABLE if not exists repository(
76+
rela_path text UNIQUE, -- the path to the repository on disk, relative to the corpus root path, without leading `./` or `.\`
77+
corpus integer,
78+
FOREIGN KEY (corpus) REFERENCES corpus (rowid)
79+
)
80+
"#,
81+
)?;
82+
con.execute_batch(
83+
r#"
84+
CREATE TABLE if not exists gix_version(
85+
version text UNIQUE -- the unique git version via gix describe
86+
)
87+
"#,
88+
)?;
89+
con.execute_batch(
90+
r#"
91+
CREATE TABLE if not exists run(
92+
repository integer,
93+
runner integer,
94+
gix_version integer,
95+
start_time integer,
96+
end_time integer, -- or NULL if not yet finished (either successfull or with failure)
97+
error text, -- or NULL if there was on error
98+
FOREIGN KEY (repository) REFERENCES repository (rowid),
99+
FOREIGN KEY (runner) REFERENCES runner (rowid),
100+
FOREIGN KEY (gix_version) REFERENCES gix_version (rowid)
101+
)
102+
"#,
103+
)?;
104+
105+
Ok(con)
106+
}
107+
}

Diff for: gitoxide-core/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ impl FromStr for OutputFormat {
6464
}
6565

6666
pub mod commitgraph;
67+
#[cfg(feature = "corpus")]
68+
pub mod corpus;
6769
pub mod net;
6870

6971
#[cfg(feature = "estimate-hours")]

Diff for: src/plumbing/main.rs

+14
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,20 @@ pub fn main() -> Result<()> {
128128
})?;
129129

130130
match cmd {
131+
#[cfg(feature = "gitoxide-core-tools-corpus")]
132+
Subcommands::Corpus(crate::plumbing::options::corpus::Platform { db, path, cmd }) => prepare_and_run(
133+
"corpus",
134+
auto_verbose,
135+
progress,
136+
progress_keep_open,
137+
None,
138+
move |progress, _out, _err| {
139+
let engine = core::corpus::Engine::open_or_create(db, progress)?;
140+
match cmd {
141+
crate::plumbing::options::corpus::SubCommands::Run => engine.run(path),
142+
}
143+
},
144+
),
131145
Subcommands::CommitGraph(cmd) => match cmd {
132146
commitgraph::Subcommands::List { spec } => prepare_and_run(
133147
"commitgraph-list",

Diff for: src/plumbing/options/mod.rs

+29
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,40 @@ pub enum Subcommands {
117117
/// Show which git configuration values are used or planned.
118118
ConfigTree,
119119
Config(config::Platform),
120+
#[cfg(feature = "gitoxide-core-tools-corpus")]
121+
Corpus(corpus::Platform),
120122
/// Subcommands that need no git repository to run.
121123
#[clap(subcommand)]
122124
Free(free::Subcommands),
123125
}
124126

127+
#[cfg(feature = "gitoxide-core-tools-corpus")]
128+
pub mod corpus {
129+
use std::path::PathBuf;
130+
131+
#[derive(Debug, clap::Parser)]
132+
#[command(
133+
about = "run algorithms on a corpus of git repositories and store their results for later analysis",
134+
version = clap::crate_version!(), // TODO: make this an actual version that is git describe, leverage `gix`
135+
)]
136+
pub struct Platform {
137+
/// The path to the database to read and write depending on the sub-command.
138+
#[arg(long, default_value = "corpus.db")]
139+
pub db: PathBuf,
140+
/// The path to the root of the corpus to search repositories in.
141+
#[arg(long, short = 'p', default_value = ".")]
142+
pub path: PathBuf,
143+
#[clap(subcommand)]
144+
pub cmd: SubCommands,
145+
}
146+
147+
#[derive(Debug, clap::Subcommand)]
148+
pub enum SubCommands {
149+
/// Perform a corpus run on all registered repositories.
150+
Run,
151+
}
152+
}
153+
125154
pub mod config {
126155

127156
use gix::bstr::BString;

0 commit comments

Comments
 (0)