Skip to content

Commit

Permalink
feat: ein tool query - a git analytics engine.
Browse files Browse the repository at this point in the history
A tool to build and efficiently maintain a database of information contained
in a git repository, preferably the kind of information that is expensive to obtain,
in order to facilitate queries that would be prohibitive without an accelerating
data structure.
  • Loading branch information
Byron committed Feb 24, 2023
1 parent 1d3d22d commit f8cc623
Show file tree
Hide file tree
Showing 19 changed files with 1,062 additions and 130 deletions.
64 changes: 62 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ prodash-render-line-crossterm = ["prodash-render-line", "prodash/render-line-cro
#! These combine common choices of the above features to represent typical builds

## *fast* + *prodash-render-tui-crossterm* + *prodash-render-line-crossterm* + *http* + *gitoxide-core-tools* + *client-networking*
max = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line", "prodash-render-tui", "prodash/render-line-autoconfigure", "gix/regex" ]
max = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools-query", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line", "prodash-render-tui", "prodash/render-line-autoconfigure", "gix/regex" ]

## *fast* + *prodash-render-line-crossterm* + *gitoxide-core-tools* + *client-networking*.
lean = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line" ]
lean = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools-query", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line" ]
## fast* + *prodash-render-line-crossterm* + *gitoxide-core-tools* + *client-async-networking*.
## Due to async client-networking not being implemented for most transports, this one supports only the 'git' transport.
## It uses, however, a fully asynchronous networking implementation which can serve a real-world example on how to implement custom async transports.
lean-async = ["fast", "pretty-cli", "gitoxide-core-tools", "gitoxide-core-async-client", "prodash-render-line"]
lean-async = ["fast", "pretty-cli", "gitoxide-core-tools", "gitoxide-core-tools-query", "gitoxide-core-async-client", "prodash-render-line"]

## As small as it can possibly be, no threading, no fast sha1, line progress only, rust based zlib implementation.
## no networking, local operations only.
Expand All @@ -64,6 +64,9 @@ max-pure = ["pretty-cli", "gix-features/rustsha1", "gix-features/zlib-rust-backe
## A way to enable all `gitoxide-core` tools found in `gix tools`
gitoxide-core-tools = ["gitoxide-core/organize", "gitoxide-core/estimate-hours"]

## A program to perform analytics on a git repository, using an auto-maintained sqlite database
gitoxide-core-tools-query = ["gitoxide-core-tools", "gitoxide-core/query"]

#! #### Mutually Exclusive Networking
#! If both are set a compile error is triggered. This also means that `cargo … --all-features` will fail.

Expand Down
10 changes: 7 additions & 3 deletions gitoxide-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ default = []

#! ### Tools
## Discover all git repositories within a directory. Particularly useful with [skim](https://github.com/lotabout/skim).
organize = ["gix-url", "jwalk"]
organize = ["dep:gix-url", "dep:jwalk"]
## Derive the amount of time invested into a git repository akin to [git-hours](https://github.com/kimmobrunfeldt/git-hours).
estimate-hours = ["itertools", "fs-err", "num_cpus", "crossbeam-channel", "mime_guess"]
estimate-hours = ["dep:itertools", "dep:fs-err", "dep:crossbeam-channel", "dep:mime_guess"]
## Gather information about repositories and store it in a database for easy querying.
query = ["dep:rusqlite"]

#! ### Mutually Exclusive Networking
#! If both are set, _blocking-client_ will take precedence, allowing `--all-features` to be used.
Expand Down Expand Up @@ -62,10 +64,12 @@ jwalk = { version = "0.8.0", optional = true }
# for 'hours'
itertools = { version = "0.10.1", optional = true }
fs-err = { version = "2.6.0", optional = true }
num_cpus = { version = "1.13.1", optional = true }
crossbeam-channel = { version = "0.5.6", optional = true }
mime_guess = { version = "2.0.4", optional = true }

# for 'query'
rusqlite = { version = "0.28.0", optional = true, features = ["bundled"] }

document-features = { version = "0.2.0", optional = true }

[package.metadata.docs.rs]
Expand Down
7 changes: 3 additions & 4 deletions gitoxide-core/src/hours/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,7 @@ where
let commit_id = repo.rev_parse_single(rev_spec)?.detach();
let mut string_heap = BTreeSet::<&'static [u8]>::new();
let needs_stats = file_stats || line_stats;
let threads = {
let t = threads.unwrap_or_else(num_cpus::get);
(t == 0).then(num_cpus::get_physical).unwrap_or(t)
};
let threads = gix::features::parallel::num_threads(threads);

let (commit_authors, stats, is_shallow, skipped_merge_commits) = {
let stat_progress = needs_stats.then(|| progress.add_child("extract stats")).map(|mut p| {
Expand Down Expand Up @@ -236,6 +233,8 @@ where
(true, true) => {
files.modified += 1;
if line_stats {
// TODO: replace this with proper git-attributes - this isn't
// really working, can't see shell scripts for example.
let is_text_file = mime_guess::from_path(
gix::path::from_bstr(change.location)
.as_ref(),
Expand Down
2 changes: 2 additions & 0 deletions gitoxide-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ pub mod mailmap;
#[cfg(feature = "organize")]
pub mod organize;
pub mod pack;
#[cfg(feature = "query")]
pub mod query;
pub mod repository;

#[cfg(all(feature = "async-client", feature = "blocking-client"))]
Expand Down
68 changes: 68 additions & 0 deletions gitoxide-core/src/query/db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use anyhow::Context;
use rusqlite::{params, OptionalExtension};

/// A version to be incremented whenever the database layout is changed, to refresh it automatically.
const VERSION: usize = 1;

pub fn create(path: impl AsRef<std::path::Path>) -> anyhow::Result<rusqlite::Connection> {
let path = path.as_ref();
let mut con = rusqlite::Connection::open(path)?;
let meta_table = r#"
CREATE TABLE if not exists meta(
version int
)"#;
con.execute_batch(meta_table)?;
let version: Option<usize> = con.query_row("SELECT version FROM meta", [], |r| r.get(0)).optional()?;
match version {
None => {
con.execute("INSERT into meta(version) values(?)", params![VERSION])?;
}
Some(version) if version != VERSION => match con.close() {
Ok(()) => {
std::fs::remove_file(path)
.with_context(|| format!("Failed to remove incompatible database file at {path:?}"))?;
con = rusqlite::Connection::open(path)?;
con.execute_batch(meta_table)?;
con.execute("INSERT into meta(version) values(?)", params![VERSION])?;
}
Err((_, err)) => return Err(err.into()),
},
_ => {}
}
con.execute_batch(
r#"
CREATE TABLE if not exists commits(
hash blob(20) NOT NULL PRIMARY KEY
)
"#,
)?;
// Files are stored as paths which also have an id for referencing purposes
con.execute_batch(
r#"
CREATE TABLE if not exists files(
file_id integer NOT NULL PRIMARY KEY,
file_path text UNIQUE
)
"#,
)?;
con.execute_batch(
r#"
CREATE TABLE if not exists commit_file(
hash blob(20),
file_id text,
has_diff boolean NOT NULL,
lines_added integer NOT NULL,
lines_removed integer NOT NULL,
lines_before integer NOT NULL,
lines_after integer NOT NULL,
mode integer,
source_file_id integer,
FOREIGN KEY (hash) REFERENCES commits (hash),
FOREIGN KEY (file_id) REFERENCES files (file_id),
PRIMARY KEY (hash, file_id)
)
"#,
)?;

Ok(con)
}
Loading

0 comments on commit f8cc623

Please sign in to comment.