diff --git a/.db_file.agdb b/.db_file.agdb deleted file mode 100644 index e69de29bb..000000000 diff --git a/README.md b/README.md index 20836b928..a765111b5 100644 --- a/README.md +++ b/README.md @@ -2,63 +2,79 @@ [![Crates.io](https://img.shields.io/crates/v/agdb)](https://crates.io/crates/agdb) [![release](https://github.com/agnesoft/agdb/actions/workflows/release.yaml/badge.svg)](https://github.com/agnesoft/agdb/actions/workflows/release.yaml) [![coverage](https://github.com/agnesoft/agdb/actions/workflows/coverage.yaml/badge.svg)](https://github.com/agnesoft/agdb/actions/workflows/coverage.yaml) [![codecov](https://codecov.io/gh/agnesoft/agdb/branch/main/graph/badge.svg?token=Z6YO8C3XGU)](https://codecov.io/gh/agnesoft/agdb) -The Agnesoft Graph Database (aka _agdb_) is persistent memory mapped graph database using purely 'no-text' programmatic queries. It can be used as a main persistent storage as well as fast in-memory cache. Its typed but schema-less data store allows for seamless data updates with no downtime or costly migrations. All queries are constructed via a builder pattern (or directly as objects) with no special language or text parsing. +The Agnesoft Graph Database (aka _agdb_) is persistent memory mapped graph database using object 'no-text' queries. It can be used as a main persistent storage, data analytics platform as well as fast in-memory cache. Its typed schema-less data store allows for flexible and seamless data updates with no downtime or costly migrations. All queries are constructed via a builder pattern (or directly as objects) with no special language or text parsing. # Key Features +- Data plotted on a graph +- Typed key-value properties of graph elements (nodes & edges) - Persistent file based storage -- Memory mapped for fast querying - ACID compliant -- Programmatic queries (no text, no query language) -- Typed schema-less key-value data store +- Object queries with builder pattern (no text, no query language) +- Memory mapped for fast querying +- _No dependencies_ # Quickstart -Add `agdb` as a dependency to your project: - ``` cargo add agdb ``` -Basic usage demonstrating creating a database, insert the graph elements with data and querying them back (select and search): - -``` -use agdb::Db; -use agdb::Comparison; - -fn main() { - let mut db = Db::new("db_file.agdb").unwrap(); - - //create a nodes with data - db.exec_mut(&QueryBuilder::insert().nodes().aliases(&["users".into()]).query()).unwrap(); - let users = db.exec_mut(&QueryBuilder::insert().nodes().values(&[ - &[("id", 1).into(), ("username", "user_1").into()], - &[("id", 2).into(), ("username", "user_2").into()], - &[("id", 3).into(), ("username", "user_3").into()]] - ).query()).unwrap(); +Basic usage demonstrating creating a database, inserting graph elements with data and querying them back with select and search. The function using this code must handle `agdb::DbError` and [`agdb::QueryError`](docs/queries.md#queryerror) error types for operator `?` to work: - //connect nodes - db.exec_mut(&QueryBuilder::insert().edges().from(&["users".into()]).to(&users.ids()).query()).unwrap(); +```Rust +let mut db = Db::new("user_db.agdb")?; - //select nodes - let user_elements = db.exec(&QueryBuilder::select().ids(&users.ids()).query()).unwrap(); +db.exec_mut(&QueryBuilder::insert().nodes().aliases("users").query())?; +let users = db.exec_mut(&QueryBuilder::insert() + .nodes() + .values(vec![vec![("username", "Alice").into(), ("joined", 2023).into()], + vec![("username", "Bob").into(), ("joined", 2015).into()], + vec![("username", "John").into()]]) + .query())?; +db.exec_mut(&QueryBuilder::insert().edges().from("users").to(users.ids()).query())?; +``` - for element in user_elements.elements { - println!("{:?}: {:?}", element.id, element.values); - } +This code creates a database called `user_db.agdb` with a simple graph of 4 nodes. The first node is aliased `users` and 3 user nodes for Alice, Bob and John are then connected with edges to the `users` node. The arbitrary `username` and sometimes `joined` properties are attached to the user nodes. - // DbId(2): [DbKeyValue { key: String("id"), value: Int(1) }, DbKeyValue { key: String("username"), value: String("user_1") }] - // DbId(3): [DbKeyValue { key: String("id"), value: Int(2) }, DbKeyValue { key: String("username"), value: String("user_2") }] - // DbId(4): [DbKeyValue { key: String("id"), value: Int(3) }, DbKeyValue { key: String("username"), value: String("user_3") }] +You can select the graph elements (both nodes & edges) with their ids to get them back with their associated data (key-value properties): - //search with conditions - let user_id = db.exec(&QueryBuilder::search().from("users").where_().key("username").value(Comparison::Equal("user_2".into())).query()).unwrap(); +```Rust +let user_elements = db.exec(&QueryBuilder::select().ids(users.ids()).query())?; +println!("{:?}", user_elements); +// QueryResult { +// result: 3, +// elements: [ +// DbElement { id: DbId(2), values: [DbKeyValue { key: String("username"), value: String("Alice") }, DbKeyValue { key: String("joined"), value: Int(2023) }] }, +// DbElement { id: DbId(3), values: [DbKeyValue { key: String("username"), value: String("Bob") }, DbKeyValue { key: String("joined"), value: Int(2015) }] }, +// DbElement { id: DbId(4), values: [DbKeyValue { key: String("username"), value: String("John") }] } +// ] } +``` - println!("{:?}", user_id.elements); - //[DbElement { id: DbId(3), values: [] }] -} +You can also search through the graph to get back only the elements you want: + +```Rust +let user = db.exec(&QueryBuilder::select() + .search(QueryBuilder::search() + .from("users") + .where_() + .key("username") + .value(Comparison::Equal("John".into())) + .query()) + .query())?; +println!("{:?}", user); +// QueryResult { +// result: 1, +// elements: [ +// DbElement { id: DbId(4), values: [DbKeyValue { key: String("username"), value: String("John") }] } +// ] } ``` +For comprehensive overview of all queries see the [queries](docs/queries.md) reference or continue with more in-depth [guide](docs/guide.md). + # Reference -TBD +- [Concepts](docs/concepts.md) +- [Queries](docs/queries.md) +- [Guide](docs/guide.md) +- [But why?](docs/but_why.md) diff --git a/docs/but_why.md b/docs/but_why.md new file mode 100644 index 000000000..75bd4ce75 --- /dev/null +++ b/docs/but_why.md @@ -0,0 +1,73 @@ +The following items provide explanations for some of the design choices of `agdb`. All of them are based on research and extensive testing of various approaches and options. For example unlike most graph implementations out there the `agdb` is using pure contiguous vectors instead of linked lists. Curious to lear why? Read on! + +- [Why graph?](#why-graph) +- [Why not use an existing graph database?](#why-not-use-an-existing-graph-database) +- [Why object queries?](#why-object-queries) +- [Why single file?](#why-single-file) +- [What about sharding, replication and performance at scale?](#what-about-sharding-replication-and-performance-at-scale) + +# Why graph? + +The database area is dominated by relational database systems (tables) and text queries since the 1970s. However the issues with the relational database systems are numerous and they even gave rise the the regular SW profession - database engineer. This is because contrary to their name they are very awkward at representing actual relations between data which is always demanded by the real world applications. They typically use foreign keys and/or proxy tables to represent them. Additionally the tables naturally enforce fixed immutable data schema upon the data they store. To change the schema one needs to create a new database with the changed schema and copy the data over (this is called database migration). Such operation is very costly and most database systems fair poorly when there are foreign keys involved (requiring them to be disabled for the migration to happen). As it turns out nowadays no database schema is truly immutable. New and changed requirements happen so often that the database schemas usually need updating (migrating) nearly every time there is an update to the systems using it. + +There is no solution to this "schema" issue because it is the inherent feature of representing data in tabular form. It can be only mitigated to some degree but your mileage will vary greatly when using these techniques many of which are considered anti-patterns. Things like indexes, indirection (storing data with varied length), storing blob data, data with internal format unknown to the database itself (e.g. JSON) are all ways to prevent the need for database migration at the cost of efficiency. While there are good reasons for representing data in tabular form (lookup speed and space efficiency) the costs of very often far exceed the benefits. Plus as it turns out it is not even that efficient! + +The tables are represented as fixed size records (rows) one after another (this is what makes the schema immutable). This representation is the most efficient when we are reading entire rows at the time (all columns) which is very rarely the case. Most often we want only some of the columns which means we are discarding some (or most) of the row when reading it. This is the same problem the CPU itself has when using memory. It reads is using cache lines. If we happen to read only some of the line the rest is wasted and another line needs to be fetched for the next item(s) (this is called a `cache miss`). This is why contiguous collections (like a `vector`) are almost always the most efficient because they minimize the cache misses. Chandler Carruth had numerous talks at CPPCon on this subject demonstrating that by far the biggest performance impact on software are the cache misses (over 50 % and up to 80 % !!!) with everything else being dwarfed in comparison. + +Beside trying to optimize the tables the most prominent "solution" are the NoSQL databases. They typically use a different way to store data, often in a "schema-less" to cater to the above use cases - easing database migrations (or eliminating them) and providing more efficient data lookup. They typically choose some combination of key-value representation, document representation or a graph representation to scaffold the data instead of tables. They often trade in ACID properties, use write only - never delete "big tables" and other techniques. + +Of NoSQL databases the graph databases stand out in particular because by definition they actually store the relations between the data in the database. How the values are then "attached" to the graph vary but the graph itself serves as an "index" as well as a "map" to be efficiently searched and reason about. The sparse graph (not all nodes are connected to all others) representation is then actually the most flexible and accurate way to store and represent the sparse data (as mentioned nearly or real world data is sparse). + +There are two key properties of representing data as a graph that directly relates to the aforementioned issues of schema and data searching. Firstly the graph itself is the schema that can change freely as needed at any time without any restrictions eliminating the schema issue entirely. You do not need to be clairvoyant and agonize over the right database schema. You can do what works now and change your mind later without any issues. Secondly the graph allows accurately representing any actual relations between the data allowing the most efficient native traversal and lookup of data (vaguely resembling traditional indexing) making the lookup constantly efficient regardless of the data set size. Where table performance will deteriorate as it grows the graph will stay constantly efficient if you can traverse only the subset of the nodes via their relations even if the graph itself contained billions of nodes. + +That is in a nutshell why the graph database is the best choice for most problem domains and data sets out there and why `agdb` is the graph database. + +**Costs** + +Everything has the cost and graph databases are no exception. Some operations and some data representations may be costlier in them as opposed to table based databases. For example if you had immutable schema that never updates then table based database might a better fit as the representation in form of tables is more storage efficient. or if you always read the whole table or whole rows then once again the table based databases might be more performant. Typically though these are uncommon edge cases unlikely to be found in the real world applications. The data is almost always sparse and diverse in nature, the schema is never truly stable etc. On the other hand most use cases benefit greatly from graph based representation and thus such a database is well worth it despite some (often more theoretical) costs. + +# Why not use an existing graph database? + +The following is the list of requirements for an ideal graph database: + +- Free license +- Faster than table based databases in most common use cases +- No new language for querying +- No text based queries +- Rust and/or C++ driver +- Resource efficient (storage & memory) + +Surprisingly there is no database that would fit the bill. Even the most popular graph databases such as `Neo4J` or `OrientDB` fall short on several of these requirements. They do have their own text based language (e.g. Cypher for Neo4J). They lack the drivers for C++/Rust. They are not particularly efficient (being mostly written in Java). Even the recent addition built in Rust - `SurrealDb` - is using text based SQL queries. Quite incomprehensibly its driver support for Rust itself is not very mature so far and was added only later despite the system being written in Rust. Something which is oddly common in the database world, e.g. `RethinkDb`, itself a document database, written mostly in C++, has no C++ support but does officially support for example Ruby. Atop of these issues they often do not leverage the graph structure very well (except for Neo4J which does great job at this) still leaning heavily towards tables. + +# Why object queries? + +The most ubiquitous database query language is SQL which is text based language created in the 1970s. Its biggest advantage is that being text based it can be used from any language to communicate with the database. However just like relational (table) bases databases from the same era it has some major flaws: + +- It needs to be parsed and interpreted by the database during runtime leading to common syntax errors that are hard or impossible to statically check. +- Being a separate programming language from the client coding language increases cognitive load on the programmer. +- It opens up the database to attacks from SQL-injection where the attacker is trying to force the interpreter to treat the user input (e.g. table or column names) as SQL code itself issuing malicious commands such as stealing or damaging the data. +- Being "Turing-complete" and complex language on itself means it can lead (and often leads) to incredibly complex and unmaintainable queries. + +The last point is particularly troublesome because it partially stems from the `schema` issue discussed in the previous points. One common way to avoid changing the schema is to transform the data via queries. This is not only less efficient than representing the data in the correct form directly but also increases the complexity of queries significantly. + +The solutions include heavily sanitizing the user inputs in an attempt to prevent SQL injection attacks, wrapping the constructing of SQL in a builder-pattern to prevent syntax errors and easing the cognitive load by letting programmers create their queries in their main coding language. The complexity is often being reduced by the use of stored SQL procedures (pre-created queries). However all of these options can only mitigate the issues SQL has. + +Using native objects representing the queries eliminate all of the SQL issues sacrificing the portability between languages. However that can be relatively easily be made up via already very mature (de)serialization of native objects available in most languages. Using builder pattern to construct these objects further improve their correctness and readability. Native objects carry no additional cognitive load on the programmer and can be easily used just like any other code. + +# Why single file? + +All operating systems have fairly low limit on number of open file descriptors for a program and for all programs in total making this system resource one of the rarest. Furthermore operating over multiple files does not seem to bring in any substantial benefit for the database while it complicates its implementation significantly. The graph database typically needs to have access to the full graph at all times unlike say key-value stores or document databases. Splitting the data into multiple files would therefore be actually detrimental. Lastly overall storage taken by the multiple files would not actually change as the amount of data would be the same. + +Conversely using just a single file (with a second temporary write ahead log file) makes everything simpler and easier. You can for example easily transfer the data to a different machine - it is just one file. The database can also operate on the file directly if memory mapping was turned off to save RAM at the cost of performance. The program would not need to juggle multiple files and consuming valuable system resources. + +The one file is the database and the data. + +# What about sharding, replication and performance at scale? + +Most databases tackle the issue of (poor) performance at scale by scaling up using replication/sharding strategies. While these techniques are definitely useful and they are planned for `agdb` they should be avoid as much as possible. The increase in complexity when using replication or sharding is dramatic and it is only worth it if there is no other choice. + +The `agdb` is designed so that it performs well regardless of its size. Most read operations are O(1) and there is no limit on concurrency on them. Most write operations are O(1) amortized. The O(n) complexities are limited to individual node traversals, e.g. reading a 1000 connected nodes will take 1000 O(1) operations = O(n) same as reading 1000 rows in a table. However if you structure your data well (meaning you do not blindly connect everything to everything) you can have as large data set as your hardware can fit without issues if you can query only subset of the graph (subgraph) since your query will have performance based on that subgraph and not all the data stored in the database. + +The point here is that you will need to scale out only when your database starts exceeding limits of a single machine. Adding data replication/backup will be relatively easy feature. Sharding would be only slightly harder but the database has been written in a way that it can be used relatively easily. The obvious downside is the huge performance dip for such a setup. To alleviate this the local caches could be used but as mentioned this only further adds to complexity. + +So while features "at scale" are definitely coming you should avoid using them as much as possible. diff --git a/docs/concepts.md b/docs/concepts.md new file mode 100644 index 000000000..f89901b3b --- /dev/null +++ b/docs/concepts.md @@ -0,0 +1,74 @@ +# Concepts + +- [Concepts](#concepts) + - [Graph](#graph) + - [Query](#query) + - [Transaction](#transaction) + - [Storage](#storage) + +## Graph + +_Related:_ [Why graph?](but_why.md#why-graph) + +Graph is a set of nodes (also vertices, points) that are connected to each other through edges (also arcs, links). In `agdb` the data is plotted on directed graphs and there are no restrictions on their structure. They can be cyclic (forming a cycle), acyclic (being open ended), sparse (having only some connections between nodes), disjointed (thus forming multiple graphs), having self-referential edges (nodes being connected to themselves), having multiple edges to the same node (even itself) and/or in the same same direction. + +Nodes and edges are `graph elements` and each can have key-value pairs associated with them usually referred to as `values`. Each graph element has a signed integer id (db id) - nodes having positive values while edges negative values. Note that upon removal of a graph element its id is freed and can be reused by subsequent inserts of new graph elements. + +**Terminology:** + +- Graph (set of nodes and edges) +- Node (point on a graph) +- Edge (connection between two nodes) +- Graph elements (nodes & edges) +- db id (graph element id, positive for nodes, negative for edges) +- Values (key-value pairs associated with a node or an edge) + +## Query + +_Related:_ [Why object queries?](but_why#why-object-queries), [Queries](queries.md) + +Query is a request to retrieve or manipulate data in a database (both the graph structure and `values` associated with the nodes and edges). In `agdb` queries are not texts (like in SQL) but rather objects that contain details about what is being requested. These objects are typically constructed via a query builder but it is also possible to create them like any other object. The builder steps resemble, and often indeed are, direct translations of a well known SQL equivalents (e.g. `QueryBuilder::select() == SELECT`, `QueryBuilder::insert() == INSERT INTO`). + +Queries are executed by the database directly. The `agdb` distinguishes between `immutable` (retrieving data) and `mutable` (manipulating data) queries. Each query execution produces either a result or an error. In `agdb` there is a single `result` object containing a numerical result (i.e. number of affected elements or values) and a list of database elements. Each element in a result is comprised of a database id and a list of `values` (associated key-value pairs). + +In case of a failure the database execution yields an error detailing what went wrong instead of a result. + +See dedicated [queries](queries.md) documentation for details. + +**Terminology:** + +- Query (request to retrieve or manipulate data) +- Immutable query (request to retrieve data) +- Mutable query (request to manipulate data) +- Result (result of a query) + +## Transaction + +_Related_: [Queries](queries.md) + +Transactions are a way to provide atomicity, isolation and data consistency in a database (three of [ACID](https://en.wikipedia.org/wiki/ACID) properties). In `agdb` every query is a transaction but it is also possible to execute multiple queries as a single transaction. Just like `queries` transactions are immutable or mutable. One important rule is borrowed directly from Rust and enforced on the type level: + +_"There can be either unlimited number of concurrent immutable transactions or exactly one mutable transaction"_ + +In multithreaded environment you can easily synchronize the access to the database by using [`RwLock`](https://doc.rust-lang.org/std/sync/struct.RwLock.html). Furthermore unlike traditional transactions implemented in other database systems the `agdb` transactions are immediately executed requiring a closure containing (minimum) amount of code and queries required for the transaction to be performed. This forces the client to optimize their transactions and reduce the time the database is locked, which is particularly important for mutable transactions as they lock the entire database for their execution. + +**Terminology:** + +- Transaction (set of queries to be executed atomically against a database wrapped in a closure) +- Mutable transaction (set of mutable & immutable queries wrapped in a closure) +- Immutable transaction (set of immutable queries wrapped in a closure) + +## Storage + +_Related_: [Why single file?](but_why.md#why-single-file) + +Every persistent database eventually stores its data somewhere on disk in one or more files. the `agdb` stores its data in a single file (that is being shadowed by another temporary write ahead log file). Its internal structure is very similar to that of a memory which makes it very easy to map between the two. The file format is fully platform agnostic and the file can be safely transferred to another machine and loaded there. Similarly the `agdb` is by default memory mapped database but it could just as easily operate purely on the file itself at the cost of read performance (might be implemented as a feature in the future). + +The database durability is provided by the write ahead log (WAL) file which records reverse of every operation to be performed on the main file before it actually happens. In case of any catastrophic failure the main database file is repaired from the WAL on loading the database. + +Just like the memory the main database file will get fragmented over time. Sectors of the file used for the data that was later reallocated will remain unused (fragmented) until the database file is defragmented. That operation is performed automatically on database object instance drop. + +**Terminology:** + +- File storage (underlying single data file) +- Write ahead log (WAL, shadowing file storage to provide durability) diff --git a/docs/guide.md b/docs/guide.md new file mode 100644 index 000000000..d2557ff70 --- /dev/null +++ b/docs/guide.md @@ -0,0 +1 @@ +# Implementation diff --git a/docs/queries.md b/docs/queries.md new file mode 100644 index 000000000..b10c993ba --- /dev/null +++ b/docs/queries.md @@ -0,0 +1,664 @@ +- [QueryResult](#queryresult) +- [QueryError](#queryerror) +- [Transactions](#transactions) +- [QueryIds \& QueryId](#queryids--queryid) +- [QueryValues](#queryvalues) +- [Mutable queries](#mutable-queries) + - [Insert](#insert) + - [Insert nodes](#insert-nodes) + - [Insert edges](#insert-edges) + - [Inserted aliases](#inserted-aliases) + - [Insert values](#insert-values) + - [Remove](#remove) + - [Remove elements](#remove-elements) + - [Remove aliases](#remove-aliases) + - [Remove values](#remove-values) +- [Immutable queries](#immutable-queries) + - [Select](#select) + - [Select elements](#select-elements) + - [Select values](#select-values) + - [Select keys](#select-keys) + - [Select key count](#select-key-count) + - [Select aliases](#select-aliases) + - [Select all aliases](#select-all-aliases) + - [Search](#search) + - [Conditions](#conditions) + +All interactions with the `agdb` are realized through queries. There are two kinds of queries: + +- Immutable queries +- Mutable queries + +Immutable queries read the data from the database through `select` and `search` queries. Mutable queries write to or delete from the database through `insert` and `remove` queries. All queries follow the Rust rules about borrowing: + +``` +There can be unlimited number of immutable concurrent queries or exactly one mutable query running against the database. +``` + +The queries are executed against the database by calling the corresponding method on the database object: + +```Rust +impl Db { + // immutable queries only + pub fn exec<T: Query>(&self, query: &T) -> Result<QueryResult, QueryError> + + // mutable queries only + pub fn exec_mut<T: QueryMut>(&mut self, query: &T) -> Result<QueryResult, QueryError> +} +``` + +Alternatively you can run a series of queries as a [transaction](#transactions). + +All queries return `Result<QueryResult, QueryError>`. The [`QueryResult`](#queryresult) is the universal data structure holding results of all queries in an uniform structure. The [`QueryError`](#queryerror) is the singular error type holding information of any failure or problem encountered when running the query. + +# QueryResult + +The `QueryResult` is the universal result type for all successful queries. It looks like: + +```Rust +pub struct QueryResult { + pub result: i64, + pub elements: Vec<DbElement>, +} +``` + +The `result` field holds numerical result of the query. It typically returns the number of database items affected. For example when selecting from the database it will hold a positive number of elements returned. When removing from the database it will hold a negative number of elements deleted from the database. + +The `elements` field hold the [database elements](concepts.md#graph) returned. Each element looks like: + +```Rust +pub struct DbElement { + pub id: DbId, + pub values: Vec<DbKeyValue>, +} +``` + +The `id` (i.e. `pub struct DbId(i64)`) is a numerical identifier of a database element. Positive number means the element is a `node` while negative number means the elements is an `edge`. The value `0` is a special value signifying no valid element and is used when certain queries return data not related to any particular element, e.g. aliases. + +The values are `key-value` pairs (properties) associated with the given element: + +```Rust +pub struct DbKeyValue { + pub key: DbKey, + pub value: DbValue, +} +``` + +The `DbKey` is an alias of `DbValue` and the value itself is an enum of valid types: + +```Rust +pub enum DbValue { + Bytes(Vec<u8>), + Int(i64), + Uint(u64), + Float(DbFloat), + String(String), + VecInt(Vec<i64>), + VecUint(Vec<u64>), + VecFloat(Vec<DbFloat>), + VecString(Vec<String>), +} +``` + +Note the `DbFloat` type (i.e. `pub struct DbFloat(f64)`) which is a convenient wrapper of `f64` to provide opinionated implementation of some of the operations that are not floating type friendly like comparisons. In `agdb` the float type is using [`total_cmp` standard library function](https://doc.rust-lang.org/std/primitive.f64.html#method.total_cmp). Please see its documentation for important details about possible limits or issues on certain platforms. + +# QueryError + +Failure when running a query is reported through a single `QueryError` object which can optionally hold internal error (or chain of errors) that led to the failure. Most commonly it will represent **data error** or **logic error** in your query. Less commonly it may also report a failure to perform the requested operation due to underlying infrastructure issue (e.g. out of memory). It is up to the client code to handle the error. + +# Transactions + +You can run a series of queries as a transaction invoking corresponding methods on the database object: + +```Rust +impl Db { + // immutable transactions + pub fn transaction<T, E>(&self, f: impl Fn(&Transaction) -> Result<T, E>) -> Result<T, E> + + // mutable transactions + pub fn transaction_mut<T, E: From<QueryError>>(&mut self, f: impl Fn(&mut TransactionMut) -> Result<T, E>) -> Result<T, E> +} +``` + +The transaction methods take a closure that itself takes a transaction object as an argument. This is to prevent long lived transactions and force them to be as concise as possible. The transaction objects implement much the same methods as the `Db` itself (`exec` / `exec_mut`). It is not possible to nest transactions but you can run immutable queries within a mutable transaction `TransactionMut`. + +Note that you cannot manually abort, rollback or commit the transaction. These are handled by the database itself based on the result of the closure. If it's `Ok` the transaction will be committed (in case `mutable` queries as there is nothing to commit for `immutable` queries). If the result is `Err` the transaction will be rolled back. + +In both cases the result will be returned and the signature of the transaction methods allows for custom mapping of the default `Result<QueryResult, QueryError>` to an arbitrary `<T, E>` result-error pair. + +Worth noting is that regular `exec / exec_mut` methods on the `Db` object are actually implemented as transactions. + +# QueryIds & QueryId + +Most queries operate over a set of database ids. The `QueryIds` type is actually an enum: + +```Rust +pub enum QueryIds { + Ids(Vec<QueryId>), + Search(SearchQuery), +} +``` + +It represents either a set of actual `ids` or a `search` query that will be executed as the larger query and its results fed as ids to the larger query. The `QueryId` is defined as another enum: + +```Rust +pub enum QueryId { + Id(DbId), + Alias(String), +} +``` + +This is because you can refer to the database elements via their numerical identifier or by the `string` alias (name). The `DbId` is then just a wrapper type: `pub struct DbId(pub i64)`. Both `QueryIds` and `QueryId` can be constructed from large number of different types like raw `i64`, `&str`, `String` or vectors of those etc. + +# QueryValues + +The `QueryValues` is a an enum type that makes a distinction between singular and multiple values like so: + +```Rust +pub enum QueryValues { + Single(Vec<DbKeyValue>), + Multi(Vec<Vec<DbKeyValue>>), +} +``` + +This is especially important because it can change the meaning of query making use of this type. For example when inserting elements into the database and supplying `QueryValues::Single` all the elements will have the copy of the single set of properties associated with them. Conversely `QueryValues::Multi` will initialize each element with a different provided set of properties bu the number of inserted elements and the number of property sets must then match (it would be a query logic error if they did not match and the query would fail with such an error). + +# Mutable queries + +Mutable queries are the way to modify the data in the database. Remember there can only be a mutable query running against the database at any one time preventing all other mutable or immutable queries running concurrently. There are two types of mutable queries: + +- insert +- remove + +The `insert` queries are used for both insert and updating data while `remove` queries are used to delete data from the database. + +## Insert + +There are 4 distinct insert queries: + +- insert nodes +- insert edges +- insert aliases +- insert values + +### Insert nodes + +```Rust +pub struct InsertNodesQuery { + pub count: u64, + pub values: QueryValues, + pub aliases: Vec<String>, +} +``` + +Builder pattern: + +```Rust +QueryBuilder::insert().nodes().count(2).query(); +QueryBuilder::insert().nodes().count(2).values_uniform(vec![("k", "v").into(), (1, 10).into()]).query(); +QueryBuilder::insert().nodes().aliases(vec!["a", "b"]).query(); +QueryBuilder::insert().nodes().aliases(vec!["a", "b"]).values(vec![vec![("k", 1).into()], vec![("k", 2).into()]]).query(); +QueryBuilder::insert().nodes().aliases(vec!["a", "b"]).values_uniform(vec![("k", "v").into(), (1, 10).into()]).query(); +QueryBuilder::insert().nodes().values(vec![vec![("k", 1).into()], vec![("k", 2).into()]]).query(); +``` + +The `count` is the number of nodes to be inserted into the database. It can be omitted (left `0`) if either `values` or `aliases` (or both) are provided. If the `values` is [`QueryValues::Single`](#queryvalues) you must provide either `count` or `aliases`. It is a logic error if the count cannot be inferred and is set to `0`. If both `values` [`QueryValues::Multi`](#queryvalues) and `aliases` are provided their lengths must match, otherwise it will result in a logic error. Empty alias (`""`) are not allowed. + +The result will contain: + +- number of nodes inserted +- list of elements inserted with their ids (positive) but without the inserted values or aliases + +### Insert edges + +```Rust +pub struct InsertEdgesQuery { + pub from: QueryIds, + pub to: QueryIds, + pub values: QueryValues, + pub each: bool, +} +``` + +Builder pattern: + +```Rust +QueryBuilder::insert().edges().from(1).to(2).query(); +QueryBuilder::insert().edges().from("a").to("b").query(); +QueryBuilder::insert().edges().from("a").to(vec![1, 2]).query(); +QueryBuilder::insert().edges().from(vec![1, 2]).to(vec![2, 3]).query(); +QueryBuilder::insert().edges().from(vec![1, 2]).to(vec![2, 3]).each().query(); +QueryBuilder::insert().edges().from("a").to(vec![1, 2]).values(vec![vec![("k", 1).into()], vec![("k", 2).into()]]).query(); +QueryBuilder::insert().edges().from("a").to(vec![1, 2]).values_uniform(vec![("k", "v").into(), (1, 10).into()]).query(); +QueryBuilder::insert().edges().from_search(QueryBuilder::search().from("a").where_().node().query()).to_search(QueryBuilder::search().from("b").where_().node().query()).query(); +QueryBuilder::insert().edges().from_search(QueryBuilder::search().from("a").where_().node().query()).to_search(QueryBuilder::search().from("b").where_().node().query()).values(vec![vec![("k", 1).into()], vec![("k", 2).into()]]).query(); +QueryBuilder::insert().edges().from_search(QueryBuilder::search().from("a").where_().node().query()).to_search(QueryBuilder::search().from("b").where_().node().query()).values_uniform(vec![("k", "v").into(), (1, 10).into()]).query(); +``` + +The `from` and `to` represents list of origins and destinations of the edges to be inserted. As per [`QueryIds`](#queryids--queryid) it can be a list, single value, search query or even a result of another query (e.g. [insert nodes](#insert-nodes)) through the call of convenient `QueryResult::ids()` method. All ids must be `node`s and all must exist in the database otherwise data error will occur. If the `values` is [`QueryValues::Single`](#queryvalues) all edges will be associated with the copy of the same properties. If `values` is [`QueryValues::Multi`](#queryvalues) then the number of edges being inserted must match the provided values otherwise a logic error will occur. By default the `from` and `to` are expected to be of equal length specifying at each index the pair of nodes to connect with an edge. If all-to-all is desired set the `each` flag to `true`. The rule about the `values` [`QueryValues::Multi`](#queryvalues) still applies though so there must be enough values for all nodes resulting from the combination. + +The result will contain: + +- number of edges inserted +- list of elements inserted with their ids (negative) but without the inserted values + +### Inserted aliases + +```Rust +pub struct InsertAliasesQuery { + pub ids: QueryIds, + pub aliases: Vec<String>, +} +``` + +Builder pattern: + +```Rust +QueryBuilder::insert().aliases("a").of(1).query(); +QueryBuilder::insert().aliases("a").of("b").query(); +QueryBuilder::insert().aliases(vec!["a", "b"]).of(vec![1, 2]).query(); +``` + +Inserts or updates aliases of existing nodes (and only nodes, edges cannot have aliases) through this query. It takes `ids` [`QueryIds`](#queryids--queryid) and list of `aliases` as arguments. The number of aliases must match the `ids` (even if they are a search query). Empty alias (`""`) are not allowed. + +Note that this query is used also for updating existing aliases. Byt inserting a different alias of an id that already has one the alias will be overwritten with the new one. + +The result will contain: + +- number of aliases inserted or updated +- empty list of elements + +### Insert values + +```Rust +pub struct InsertValuesQuery { + pub ids: QueryIds, + pub values: QueryValues, +} +``` + +Builder pattern: + +```Rust +QueryBuilder::insert().values(vec![vec![("k", "v").into(), (1, 10).into()], vec![("k", 2).into()]]).ids(vec![1, 2]).query(); +QueryBuilder::insert().values(vec![vec![("k", "v").into(), (1, 10).into()], vec![("k", 2).into()]]).search(QueryBuilder::search().from("a").query()).query(); +QueryBuilder::insert().values_uniform(vec![("k", "v").into(), (1, 10).into()]).ids(vec![1, 2]).query(); +QueryBuilder::insert().values_uniform(vec![("k", "v").into(), (1, 10).into()]).search(QueryBuilder::search().from("a").query()).query(); +``` + +Inserts or updates key-value pairs (properties) of existing elements. You need to specify the `ids` [`QueryIds`](#queryids--queryid) and the list of `values`. The `values` can be either [`QueryValues::Single`](#queryvalues) that will insert the single set of properties to all elements identified by `ids` or [`QueryValues::Multi`](#queryvalues) that will insert to each `id` its own set of properties but their number must match the number of `ids`. + +Note that this query is used also for updating existing values. By inserting the same `key` its old value will be overwritten with the new one. + +The result will contain: + +- number of key-value pairs (properties) inserted +- empty list of elements + +## Remove + +There are 3 distinct remove queries: + +- remove (elements) +- remove aliases +- remove values + +### Remove elements + +```Rust +pub struct RemoveQuery(pub QueryIds); +``` + +Builder pattern: + +```Rust +QueryBuilder::remove().ids(1).query(); +QueryBuilder::remove().ids("a").query(); +QueryBuilder::remove().ids(vec![1, 2]).query(); +QueryBuilder::remove().ids(vec!["a", "b"]).query(); +QueryBuilder::remove().search(QueryBuilder::search().from("a").query()).query(); +``` + +The elements identified by [`QueryIds`](#queryids--queryid) will be removed from the database if they exist. It is NOT an error if the elements to be removed do not exist in the database. All associated properties (key-value pairs) are also removed from all elements. Removing nodes will also remove all their edges (incoming and outgoing) and their properties. + +The result will contain: + +- negative number of elements removed (edges not explicitly listed or those listed but removed as part of one of their node's removal do not contribute to the result counter) +- empty list of elements + +### Remove aliases + +```Rust +pub struct RemoveAliasesQuery(pub Vec<String>); +``` + +Builder pattern: + +```Rust +QueryBuilder::remove().aliases("a").query(); +QueryBuilder::remove().aliases(vec!["a", "b"]).query(); +``` + +The aliases listed will be removed from the database if they exist. It is NOT an error if the aliases do not exist in the database. + +The result will contain: + +- negative number of aliases removed +- empty list of elements + +### Remove values + +```Rust +pub struct RemoveValuesQuery(pub SelectValuesQuery); +``` + +NOTE: See [`SelectValuesQuery`](#select-values) for more details. + +Builder pattern: + +```Rust +QueryBuilder::remove().values(vec!["k1".into(), "k2".into()]).ids(vec![1, 2]).query(); +QueryBuilder::remove().values(vec!["k1".into(), "k2".into()]).search(QueryBuilder::search().from("a").query()).query(); +``` + +The properties (key-value pairs) identified by `keys` and associated with `ids` [`QueryIds`](#queryids--queryid) will be removed from the database if they exist. It is a data error if any of the `ids` do not exist in the database but it is NOT an error if any of the keys does not exist or is not associated as property to any of the `ids`. + +The result will contain: + +- Number of actually removed key-value pairs +- empty list of elements + +# Immutable queries + +Immutable queries read the data from the database and there can be unlimited number of concurrent queries running against the database at the same time. There are two types of immutable queries: + +- select +- search + +The `select` queries are used to read the data from the database using known `id`s of elements. The `search` queries are used to find the `id`s and the result of search queries is thus often combined with the `select` queries. + +## Select + +There are 6 select queries: + +- select (elements) +- select values +- select keys +- select key count +- select aliases +- select all aliases + +### Select elements + +```Rust +pub struct SelectQuery(pub QueryIds); +``` + +Builder pattern: + +```Rust +QueryBuilder::select().ids("a").query(); +QueryBuilder::select().ids(vec![1, 2]).query(); +QueryBuilder::select().search(QueryBuilder::search().from(1).query()).query(); +``` + +Selects elements identified by `ids` [`QueryIds`](#queryids--queryid) or search query with all their properties. If any of the ids does not exist in the database running the query will return an error. The search query is most commonly used to find, filter or otherwise limit what elements to select. + +The result will contain: + +- number of returned elements +- list of elements with all properties + +### Select values + +```Rust +pub struct SelectValuesQuery { + pub keys: Vec<DbKey>, + pub ids: QueryIds, +} +``` + +Builder pattern: + +```Rust +QueryBuilder::select().values(vec!["k".into(), "k2".into()]).ids("a").query(); +QueryBuilder::select().values(vec!["k".into(), "k2".into()]).ids(vec![1, 2]).query(); +QueryBuilder::select().values(vec!["k".into(), "k2".into()]).search(QueryBuilder::search().from(1).query()).query(); +``` + +Selects elements identified by `ids` [`QueryIds`](#queryids--queryid) or search query with only selected properties (identified by the list of keys). If any of the ids does not exist in the database or does not have all the keys associated with it then running the query will return an error. While the search query is most commonly used to find, filter or otherwise limit what elements to select, using this particular query can limit what properties will be returned. + +The result will contain: + +- number of returned elements +- list of elements with only selected properties + +### Select keys + +```Rust +pub struct SelectKeysQuery(pub QueryIds); +``` + +Builder pattern: + +```Rust +QueryBuilder::select().keys().ids("a").query(); +QueryBuilder::select().keys().ids(vec![1, 2]).query(); +QueryBuilder::select().keys().search(QueryBuilder::search().from(1).query()).query(); +``` + +Selects elements identified by `ids` [`QueryIds`](#queryids--queryid) or search query with only keys returned. If any of the ids does not exist in the database running the query will return an error. This query is most commonly used for establishing what data is available in on the graph elements (e.g. when transforming the data into a table this query could be used to populate the column names). + +The result will contain: + +- number of returned elements +- list of elements with only keys and default (empty `Int(0)` values) + +### Select key count + +```Rust +pub struct SelectKeyCountQuery(pub QueryIds); +``` + +Builder pattern: + +```Rust +QueryBuilder::select().key_count().ids("a").query(); +QueryBuilder::select().key_count().ids(vec![1, 2]).query(); +QueryBuilder::select().key_count().search(QueryBuilder::search().from(1).query()).query(); +``` + +Selects elements identified by `ids` [`QueryIds`](#queryids--queryid) or search query with only key count returned. If any of the ids does not exist in the database running the query will return an error. This query is most commonly used for establishing how many properties there are associated with the graph elements. + +The result will contain: + +- number of returned elements +- list of elements each with a single property (`String("key_count")`: `u64`) + +### Select aliases + +```Rust +pub struct SelectAliasesQuery(pub QueryIds); +``` + +Builder pattern: + +```Rust +QueryBuilder::select().aliases().ids(vec![1, 2]).query(); +QueryBuilder::select().aliases().search(QueryBuilder::search().from(1).query()).query(); +``` + +Selects aliases of the `ids` [`QueryIds`](#queryids--queryid) or a search. If any of the ids does not have an alias running the query will return an error. + +The result will contain: + +- number of returned elements +- list of elements each with a single property (`String("alias")`: `String`) + +### Select all aliases + +```Rust +pub struct SelectAllAliases {} +``` + +Builder pattern: + +```Rust +QueryBuilder::select().aliases().query() +``` + +Selects all aliases in the database. + +The result will contain: + +- number of elements with aliases +- list of elements with an alias each with a single property (`String("alias"): String`) + +## Search + +There is only a single search query that provides the ability to search the graph examining connected elements and their properties. While it is possible to construct the search queries manually, specifying conditions manually in particular can be excessively difficult and therefore **using the builder pattern is recommended**. + +```Rust +pub struct SearchQuery { + pub origin: QueryId, + pub destination: QueryId, + pub limit: u64, + pub offset: u64, + pub order_by: Vec<DbKeyOrder>, + pub conditions: Vec<QueryCondition>, +} + +pub enum DbKeyOrder { + Asc(DbKey), + Desc(DbKey), +} +``` + +Builder pattern: + +```Rust +QueryBuilder::search().from("a").query(); +QueryBuilder::search().to(1).query(); //reverse search +QueryBuilder::search().from("a").to("b").query(); //path search, A* + +//limit, offset and order_by can be applied similarly to all the search variants +QueryBuilder::search().from(1).order_by(vec![DbKeyOrder::Desc("age".into()), DbKeyOrder::Asc("name".into())]).query() +QueryBuilder::search().from(1).offset(10).query(); +QueryBuilder::search().from(1).limit(5).query(); +QueryBuilder::search().from(1).order_by(vec![DbKeyOrder::Desc("k".into())]).offset(10).query(); +QueryBuilder::search().from(1).order_by(vec![DbKeyOrder::Desc("k".into())]).limit(5).query(); +QueryBuilder::search().from(1).order_by(vec![DbKeyOrder::Desc("k".into())]).offset(10).limit(5).query(); +QueryBuilder::search().from(1).offset(10).limit(5).query(); +``` + +The search query is made up of the `origin` and `destination` of the search. Specifying only `origin` (from) will result in breadth first search along `from->to` edges. Specifying only `destination` will result in the reverse breadth first search along the `to<-from` edges. When both `origin` and `destination` are specified the search algorithm becomes a path search and the algorithm is switched to `A*`. Optionally you can specify a `limit` (0 = unlimited) and `offset` (0 = no offset) to the returned list of graph element ids. If specified (!= 0) the `origin` and the `destination` must exist in the database, otherwise an error will be returned. The elements can be optionally ordered with `order_by` list of keys allowing ascending/descending ordering based on multiple properties. + +Finally the list of `conditions` that each examined graph element must satisfy to be included in the result (and subjected to the `limit` and `offset`). + +**NOTE:** When both `origin` and `destination` are specified and the algorithm is switched to the `A*` the `limit` and `offset` are applied differently. In regular (open-ended) search the search will end when the `limit` is reached but with the path search (A\*) the `destination` must be reached first before they are applied. + +### Conditions + +The currently supported conditions are: + +- Where (opens nested list of conditions) +- Edge (if the element is an `edge`) +- Node (if the element is a `node`) +- Distance (if the current distance of the search satisfies the numerical comparison, each graph element away from the start increases the distance, including edges, i.e. second node from start is at distance `2`) +- EdgeCount (if the element is a node and total number of edges (in and out) satisfies the numerical comparison - self-referential edges are counted twice) +- EdgeCountFrom (if the element is a node and total number of outgoing edges satisfies the numerical comparison) +- EdgeCountTo (if the element is a node and total number of incoming edges satisfies the numerical comparison) +- Ids (if the element id is in the list) +- KeyValue (if the element's property has the `key` and its value satisfies `value` comparison) +- Keys (if the element has all the `keys` regardless of their values) +- EndWhere (closes nested list of conditions) + +All conditions can be further modified as follows: + +- Not (reverses the condition result) +- NotBeyond (stops the search beyond this element) + +The conditions can be changed with logic operators: + +- And (logical `and`) +- Or (logical `or`) + +```Rust +pub struct QueryCondition { + pub logic: QueryConditionLogic, + pub modifier: QueryConditionModifier, + pub data: QueryConditionData, +} + +pub enum QueryConditionLogic { + And, + Or, +} + +pub enum QueryConditionModifier { + None, + Not, + NotBeyond, +} + +pub enum QueryConditionData { + Distance(CountComparison), + Edge, + EdgeCount(CountComparison), + EdgeCountFrom(CountComparison), + EdgeCountTo(CountComparison), + Ids(Vec<QueryId>), + KeyValue { key: DbKey, value: Comparison }, + Keys(Vec<DbKey>), + Node, + Where(Vec<QueryCondition>), +} + +pub enum CountComparison { + Equal(u64), + GreaterThan(u64), + GreaterThanOrEqual(u64), + LessThan(u64), + LessThanOrEqual(u64), + NotEqual(u64), +} + +pub enum Comparison { + Equal(DbValue), + GreaterThan(DbValue), + GreaterThanOrEqual(DbValue), + LessThan(DbValue), + LessThanOrEqual(DbValue), + NotEqual(DbValue), +} +``` + +Builder pattern: + +```Rust +//the where_() can be applied to any of the basic search queries after order_by/offset/limit +//not() and not_beyond() can be applied to all conditions including nested where_() +QueryBuilder::search().from(1).where_().distance(CountComparison::LessThan(3)).query(); +QueryBuilder::search().from(1).where_().edge().query(); +QueryBuilder::search().from(1).where_().edge_count(CountComparison::GreaterThan(2))().query(); +QueryBuilder::search().from(1).where_().edge_count_from(CountComparison::Equal(1))().query(); +QueryBuilder::search().from(1).where_().edge_count_to(CountComparison::NotEqual(1))().query(); +QueryBuilder::search().from(1).where_().node().query(); +QueryBuilder::search().from(1).where_().key("k").value(Comparison::Equal(1.into())).query(); +QueryBuilder::search().from(1).where_().keys(vec!["k1".into(), "k2".into()]).query(); +QueryBuilder::search().from(1).where_().not().keys(vec!["k1".into(), "k2".into()]).query(); +QueryBuilder::search().from(1).where_().ids(vec![1, 2]).query(); +QueryBuilder::search().from(1).where_().not().ids(vec![1, 2]).query(); +QueryBuilder::search().from(1).where_().not_beyond().ids("a").query(); +QueryBuilder::search().from(1).where_().node().or().edge().query(); +QueryBuilder::search().from(1).where_().node().and().distance().query(CountComparison::GreaterThanOrEqual(3)).query(); +QueryBuilder::search().from(1).where_().node().or().where_().edge().and().key("k").value(Comparison::Equal(1.into())).end_where().query(); +``` + +NOTE: The use of `where_` with an underscore as the method name is necessary to avoid conflict with the Rust keyword. + +The conditions are applied one at a time to each visited element and chained using logic operators `AND` and `OR`. They can be nested using `where_` and `end_where` (in place of brackets). The condition evaluator supports short-circuiting not evaluating conditions further if the logical outcome cannot change. + +The condition `Distance` and the condition modifier `NotBeyond` are particularly important because they can directly influence the search. The former (`Distance`) can limit the depth of the search and can help with constructing more elaborate queries (or sequence thereof) extracting only fine grained elements (e.g. nodes whose edges have particular properties or are connected to other nodes with some properties). The latter (`NotBeyond`) can limit search to only certain areas of an otherwise larger graph. Its most basic usage would be with condition `ids` to flat out stop the search at certain elements. + +For further examples and use cases see the [in-depth guide](guide.md). diff --git a/src/agdb/db.rs b/src/agdb/db.rs index 28572d0b4..abee144c2 100644 --- a/src/agdb/db.rs +++ b/src/agdb/db.rs @@ -566,30 +566,30 @@ impl Db { condition: &QueryConditionData, ) -> Result<SearchControl, DbError> { match condition { - QueryConditionData::Distance { value } => Ok(value.compare(distance)), + QueryConditionData::Distance(value) => Ok(value.compare(distance)), QueryConditionData::Edge => Ok(SearchControl::Continue(index.is_edge())), - QueryConditionData::EdgeCount { value } => { + QueryConditionData::EdgeCount(value) => { Ok(if let Some(node) = self.graph.node(index) { value.compare(node.edge_count()) } else { SearchControl::Continue(false) }) } - QueryConditionData::EdgeCountFrom { value } => { + QueryConditionData::EdgeCountFrom(value) => { Ok(if let Some(node) = self.graph.node(index) { value.compare(node.edge_count_from()) } else { SearchControl::Continue(false) }) } - QueryConditionData::EdgeCountTo { value } => { + QueryConditionData::EdgeCountTo(value) => { Ok(if let Some(node) = self.graph.node(index) { value.compare(node.edge_count_to()) } else { SearchControl::Continue(false) }) } - QueryConditionData::Ids { values } => { + QueryConditionData::Ids(values) => { Ok(SearchControl::Continue(values.iter().any(|id| { index.0 == match id { @@ -615,7 +615,7 @@ impl Db { false }, )), - QueryConditionData::Keys { values } => { + QueryConditionData::Keys(values) => { let keys = self .values .iter_key(&DbId(index.0)) @@ -626,7 +626,7 @@ impl Db { )) } QueryConditionData::Node => Ok(SearchControl::Continue(index.is_node())), - QueryConditionData::Where { conditions } => { + QueryConditionData::Where(conditions) => { self.evaluate_conditions(index, distance, conditions) } } diff --git a/src/agdb/query/query_condition.rs b/src/agdb/query/query_condition.rs index 8695532cd..ff275a751 100644 --- a/src/agdb/query/query_condition.rs +++ b/src/agdb/query/query_condition.rs @@ -18,16 +18,16 @@ pub enum QueryConditionModifier { #[derive(Debug, Clone, PartialEq)] pub enum QueryConditionData { - Distance { value: CountComparison }, + Distance(CountComparison), Edge, - EdgeCount { value: CountComparison }, - EdgeCountFrom { value: CountComparison }, - EdgeCountTo { value: CountComparison }, - Ids { values: Vec<QueryId> }, + EdgeCount(CountComparison), + EdgeCountFrom(CountComparison), + EdgeCountTo(CountComparison), + Ids(Vec<QueryId>), KeyValue { key: DbKey, value: Comparison }, - Keys { values: Vec<DbKey> }, + Keys(Vec<DbKey>), Node, - Where { conditions: Vec<QueryCondition> }, + Where(Vec<QueryCondition>), } #[derive(Debug, Clone, PartialEq)] diff --git a/src/agdb/query/query_ids.rs b/src/agdb/query/query_ids.rs index bb9bc7125..7e368fd6d 100644 --- a/src/agdb/query/query_ids.rs +++ b/src/agdb/query/query_ids.rs @@ -8,6 +8,15 @@ pub enum QueryIds { Search(SearchQuery), } +impl QueryIds { + pub(crate) fn get_ids(self) -> Vec<QueryId> { + match self { + QueryIds::Ids(ids) => ids, + QueryIds::Search(_) => vec![], + } + } +} + impl From<Vec<QueryId>> for QueryIds { fn from(value: Vec<QueryId>) -> Self { QueryIds::Ids(value) @@ -86,4 +95,19 @@ mod tests { fn derived_from_debug() { format!("{:?}", QueryIds::Ids(vec![QueryId::from(0)])); } + + #[test] + fn get_ids_from_search() { + let ids = QueryIds::Search(SearchQuery { + origin: QueryId::Id(DbId(0)), + destination: QueryId::Id(DbId(0)), + limit: 0, + offset: 0, + order_by: vec![], + conditions: vec![], + }) + .get_ids(); + + assert_eq!(ids, vec![]); + } } diff --git a/src/agdb/query/remove_aliases_query.rs b/src/agdb/query/remove_aliases_query.rs index fe26813e4..b21f294a7 100644 --- a/src/agdb/query/remove_aliases_query.rs +++ b/src/agdb/query/remove_aliases_query.rs @@ -3,13 +3,11 @@ use crate::Db; use crate::QueryError; use crate::QueryResult; -pub struct RemoveAliasesQuery { - pub aliases: Vec<String>, -} +pub struct RemoveAliasesQuery(pub Vec<String>); impl QueryMut for RemoveAliasesQuery { fn process(&self, db: &mut Db, result: &mut QueryResult) -> Result<(), QueryError> { - for alias in &self.aliases { + for alias in &self.0 { if db.remove_alias(alias)? { result.result -= 1; } diff --git a/src/agdb/query/select_aliases_query.rs b/src/agdb/query/select_aliases_query.rs index 296102613..b3465e595 100644 --- a/src/agdb/query/select_aliases_query.rs +++ b/src/agdb/query/select_aliases_query.rs @@ -6,13 +6,11 @@ use crate::Query; use crate::QueryError; use crate::QueryResult; -pub struct SelectAliasesQuery { - pub ids: QueryIds, -} +pub struct SelectAliasesQuery(pub QueryIds); impl Query for SelectAliasesQuery { fn process(&self, db: &Db, result: &mut QueryResult) -> Result<(), QueryError> { - match &self.ids { + match &self.0 { QueryIds::Ids(ids) => { result.elements.reserve(ids.len()); result.result += ids.len() as i64; diff --git a/src/agdb/query_builder/remove.rs b/src/agdb/query_builder/remove.rs index 64d06f8f0..a0009d427 100644 --- a/src/agdb/query_builder/remove.rs +++ b/src/agdb/query_builder/remove.rs @@ -14,9 +14,7 @@ pub struct Remove {} impl Remove { pub fn aliases<T: Into<QueryAliases>>(self, names: T) -> RemoveAliases { - RemoveAliases(RemoveAliasesQuery { - aliases: Into::<QueryAliases>::into(names).0, - }) + RemoveAliases(RemoveAliasesQuery(Into::<QueryAliases>::into(names).0)) } pub fn ids<T: Into<QueryIds>>(self, ids: T) -> RemoveIds { diff --git a/src/agdb/query_builder/search.rs b/src/agdb/query_builder/search.rs index df66a543d..58e4132ec 100644 --- a/src/agdb/query_builder/search.rs +++ b/src/agdb/query_builder/search.rs @@ -52,8 +52,8 @@ impl SearchFrom { SelectOffset(self.0) } - pub fn order_by(mut self, keys: &[DbKeyOrder]) -> SearchOrderBy { - self.0.order_by = keys.to_vec(); + pub fn order_by(mut self, keys: Vec<DbKeyOrder>) -> SearchOrderBy { + self.0.order_by = keys; SearchOrderBy(self.0) } diff --git a/src/agdb/query_builder/select.rs b/src/agdb/query_builder/select.rs index 02ddfc047..63e79cea8 100644 --- a/src/agdb/query_builder/select.rs +++ b/src/agdb/query_builder/select.rs @@ -16,9 +16,7 @@ pub struct Select {} impl Select { pub fn aliases(self) -> SelectAliases { - SelectAliases(SelectAliasesQuery { - ids: QueryIds::Ids(vec![]), - }) + SelectAliases(SelectAliasesQuery(QueryIds::Ids(vec![]))) } pub fn ids<T: Into<QueryIds>>(self, ids: T) -> SelectIds { diff --git a/src/agdb/query_builder/select_aliases.rs b/src/agdb/query_builder/select_aliases.rs index ffdab15d8..dbf35e8d2 100644 --- a/src/agdb/query_builder/select_aliases.rs +++ b/src/agdb/query_builder/select_aliases.rs @@ -7,21 +7,15 @@ pub struct SelectAliases(pub SelectAliasesQuery); pub struct SelectAliasesIds(pub SelectAliasesQuery); -impl SelectAliasesIds { - pub fn query(self) -> SelectAliasesQuery { - self.0 - } -} - impl SelectAliases { pub fn ids<T: Into<QueryIds>>(mut self, ids: T) -> SelectAliasesIds { - self.0.ids = ids.into(); + self.0 .0 = ids.into(); SelectAliasesIds(self.0) } pub fn search(mut self, query: SearchQuery) -> SelectAliasesIds { - self.0.ids = QueryIds::Search(query); + self.0 .0 = QueryIds::Search(query); SelectAliasesIds(self.0) } @@ -30,3 +24,9 @@ impl SelectAliases { SelectAllAliases {} } } + +impl SelectAliasesIds { + pub fn query(self) -> SelectAliasesQuery { + self.0 + } +} diff --git a/src/agdb/query_builder/where_.rs b/src/agdb/query_builder/where_.rs index 1e952b26a..05d95f5b5 100644 --- a/src/agdb/query_builder/where_.rs +++ b/src/agdb/query_builder/where_.rs @@ -3,10 +3,11 @@ use crate::query::query_condition::QueryCondition; use crate::query::query_condition::QueryConditionData; use crate::query::query_condition::QueryConditionLogic; use crate::query::query_condition::QueryConditionModifier; -use crate::query::query_id::QueryId; +use crate::query::query_values::QueryKeys; use crate::query::search_query::SearchQuery; use crate::Comparison; use crate::DbKey; +use crate::QueryIds; pub struct Where { logic: QueryConditionLogic, @@ -36,7 +37,7 @@ impl Where { self.add_condition(QueryCondition { logic: self.logic, modifier: self.modifier, - data: QueryConditionData::Distance { value: comparison }, + data: QueryConditionData::Distance(comparison), }); WhereLogicOperator(self) @@ -56,7 +57,7 @@ impl Where { self.add_condition(QueryCondition { logic: self.logic, modifier: self.modifier, - data: QueryConditionData::EdgeCount { value: comparison }, + data: QueryConditionData::EdgeCount(comparison), }); WhereLogicOperator(self) @@ -66,7 +67,7 @@ impl Where { self.add_condition(QueryCondition { logic: self.logic, modifier: self.modifier, - data: QueryConditionData::EdgeCountFrom { value: comparison }, + data: QueryConditionData::EdgeCountFrom(comparison), }); WhereLogicOperator(self) @@ -76,19 +77,17 @@ impl Where { self.add_condition(QueryCondition { logic: self.logic, modifier: self.modifier, - data: QueryConditionData::EdgeCountTo { value: comparison }, + data: QueryConditionData::EdgeCountTo(comparison), }); WhereLogicOperator(self) } - pub fn ids(mut self, ids: &[QueryId]) -> WhereLogicOperator { + pub fn ids<T: Into<QueryIds>>(mut self, ids: T) -> WhereLogicOperator { self.add_condition(QueryCondition { logic: self.logic, modifier: self.modifier, - data: QueryConditionData::Ids { - values: ids.to_vec(), - }, + data: QueryConditionData::Ids(Into::<QueryIds>::into(ids).get_ids()), }); WhereLogicOperator(self) @@ -101,13 +100,11 @@ impl Where { } } - pub fn keys(mut self, names: &[DbKey]) -> WhereLogicOperator { + pub fn keys<T: Into<QueryKeys>>(mut self, keys: T) -> WhereLogicOperator { self.add_condition(QueryCondition { logic: self.logic, modifier: self.modifier, - data: QueryConditionData::Keys { - values: names.to_vec(), - }, + data: QueryConditionData::Keys(Into::<QueryKeys>::into(keys).0), }); WhereLogicOperator(self) @@ -139,7 +136,7 @@ impl Where { self.add_condition(QueryCondition { logic: self.logic, modifier: self.modifier, - data: QueryConditionData::Where { conditions: vec![] }, + data: QueryConditionData::Where(vec![]), }); self.conditions.push(vec![]); @@ -163,7 +160,7 @@ impl Where { if let Some(QueryCondition { logic: _, modifier: _, - data: QueryConditionData::Where { conditions }, + data: QueryConditionData::Where(conditions), }) = current_conditions.last_mut() { *conditions = last_conditions; @@ -226,6 +223,7 @@ impl WhereLogicOperator { mod test { use super::*; use crate::DbId; + use crate::QueryId; #[test] fn invalid_collapse() { diff --git a/tests/db_test.rs b/tests/db_test.rs index cb1b8f1f6..550ed7347 100644 --- a/tests/db_test.rs +++ b/tests/db_test.rs @@ -1,6 +1,5 @@ mod test_db; -use agdb::Comparison; use agdb::Db; use agdb::DbElement; use agdb::DbId; @@ -258,39 +257,3 @@ fn optimize_on_drop() { assert!(optimized_file_size < db_file_size); } - -#[rustfmt::skip] -#[test] -fn quickstart() { - let _ = std::fs::remove_file("db_file.agdb"); - let mut db = Db::new("db_file.agdb").unwrap(); - - //create a nodes with data - db.exec_mut(&QueryBuilder::insert().nodes().aliases("users").query()).unwrap(); - let users = db.exec_mut(&QueryBuilder::insert().nodes().values(vec![ - vec![("id", 1).into(), ("username", "user_1").into()], - vec![("id", 2).into(), ("username", "user_2").into()], - vec![("id", 3).into(), ("username", "user_3").into()]] - ).query()).unwrap(); - - //connect nodes - db.exec_mut(&QueryBuilder::insert().edges().from("users").to(users.ids()).query()).unwrap(); - - //select nodes - let user_elements = db.exec(&QueryBuilder::select().ids(users.ids()).query()).unwrap(); - - for element in user_elements.elements { - println!("{:?}: {:?}", element.id, element.values); - } - - // DbId(2): [DbKeyValue { key: String("id"), value: Int(1) }, DbKeyValue { key: String("username"), value: String("user_1") }] - // DbId(3): [DbKeyValue { key: String("id"), value: Int(2) }, DbKeyValue { key: String("username"), value: String("user_2") }] - // DbId(4): [DbKeyValue { key: String("id"), value: Int(3) }, DbKeyValue { key: String("username"), value: String("user_3") }] - - //search with conditions - let user_id = db.exec(&QueryBuilder::search().from("users").where_().key("username").value(Comparison::Equal("user_2".into())).query()).unwrap(); - - println!("{:?}", user_id.elements); - //[DbElement { id: DbId(3), values: [] }] - let _ = std::fs::remove_file("db_file.agdb"); -} diff --git a/tests/examples.rs b/tests/examples.rs new file mode 100644 index 000000000..05d91dd09 --- /dev/null +++ b/tests/examples.rs @@ -0,0 +1,68 @@ +mod test_db; + +use crate::test_db::TestFile; +use agdb::Comparison; +use agdb::Db; +use agdb::QueryBuilder; + +#[test] +fn quickstart() { + let _test_file = TestFile::from("db_file.agdb"); + + let mut db = Db::new("db_file.agdb").unwrap(); + let insert_users_root = QueryBuilder::insert().nodes().aliases("users").query(); + db.exec_mut(&insert_users_root).unwrap(); + + let user_values = vec![ + vec![("id", 1).into(), ("username", "user_1").into()], + vec![("id", 2).into(), ("username", "user_2").into()], + vec![("id", 3).into(), ("username", "user_3").into()], + ]; + let users = db + .exec_mut(&QueryBuilder::insert().nodes().values(user_values).query()) + .unwrap(); + + db.exec_mut( + &QueryBuilder::insert() + .edges() + .from("users") + .to(users.ids()) + .query(), + ) + .unwrap(); + + let user_elements = db + .exec(&QueryBuilder::select().ids(users.ids()).query()) + .unwrap(); + + println!("{:?}", user_elements); + // QueryResult { + // result: 3, + // elements: [ + // DbElement { id: DbId(2), values: [DbKeyValue { key: String("id"), value: Int(1) }, DbKeyValue { key: String("username"), value: String("user_1") }] }, + // DbElement { id: DbId(3), values: [DbKeyValue { key: String("id"), value: Int(2) }, DbKeyValue { key: String("username"), value: String("user_2") }] }, + // DbElement { id: DbId(4), values: [DbKeyValue { key: String("id"), value: Int(3) }, DbKeyValue { key: String("username"), value: String("user_3") }] } + // ] } + + let user_id = db + .exec( + &QueryBuilder::select() + .search( + QueryBuilder::search() + .from("users") + .where_() + .key("username") + .value(Comparison::Equal("user_2".into())) + .query(), + ) + .query(), + ) + .unwrap(); + + println!("{:?}", user_id); + // QueryResult { + // result: 1, + // elements: [ + // DbElement { id: DbId(3), values: [DbKeyValue { key: String("id"), value: Int(2) }, DbKeyValue { key: String("username"), value: String("user_2") }] } + // ] } +} diff --git a/tests/search_test.rs b/tests/search_test.rs index 412e0291d..488e6dc0c 100644 --- a/tests/search_test.rs +++ b/tests/search_test.rs @@ -326,7 +326,7 @@ fn search_from_ordered_by() { .search( QueryBuilder::search() .from("users") - .order_by(&[ + .order_by(vec![ DbKeyOrder::Desc("age".into()), DbKeyOrder::Asc("name".into()), ]) diff --git a/tests/search_where_test.rs b/tests/search_where_test.rs index 0e7f6d01b..a459589f8 100644 --- a/tests/search_where_test.rs +++ b/tests/search_where_test.rs @@ -139,7 +139,7 @@ fn search_from_where_keys() { QueryBuilder::search() .from("users") .where_() - .keys(&["username".into(), "id".into()]) + .keys(vec!["username".into(), "id".into()]) .query(), &[16, 15, 14, 13, 12], ); @@ -243,10 +243,10 @@ fn search_from_where_node_edge() { .node() .and() .not() - .ids(&[1.into(), 2.into(), 3.into()]) + .ids(vec![1, 2, 3]) .and() .not_beyond() - .ids(&["users".into()]) + .ids("users") .query(), &[8, 7, 6], ); @@ -263,10 +263,10 @@ fn search_from_where_ids() { QueryBuilder::search() .from(1) .where_() - .ids(&["docs".into(), "users".into()]) + .ids(vec!["docs", "users"]) .and() .not_beyond() - .ids(&["docs".into(), "users".into()]) + .ids(vec!["docs", "users"]) .query(), &[3, 2], ); @@ -278,7 +278,7 @@ fn search_from_where_key_value() { db.exec_ids( QueryBuilder::search() .from("users") - .order_by(&[DbKeyOrder::Asc("id".into())]) + .order_by(vec![DbKeyOrder::Asc("id".into())]) .where_() .key("active") .value(Comparison::Equal(1.into())) @@ -288,7 +288,7 @@ fn search_from_where_key_value() { db.exec_ids( QueryBuilder::search() .from("users") - .order_by(&[DbKeyOrder::Asc("id".into())]) + .order_by(vec![DbKeyOrder::Asc("id".into())]) .where_() .key("active") .value(Comparison::NotEqual(1.into())) @@ -298,7 +298,7 @@ fn search_from_where_key_value() { db.exec_ids( QueryBuilder::search() .from("users") - .order_by(&[DbKeyOrder::Asc("id".into())]) + .order_by(vec![DbKeyOrder::Asc("id".into())]) .where_() .key("active") .value(Comparison::LessThan(1.into())) @@ -308,7 +308,7 @@ fn search_from_where_key_value() { db.exec_ids( QueryBuilder::search() .from("users") - .order_by(&[DbKeyOrder::Asc("id".into())]) + .order_by(vec![DbKeyOrder::Asc("id".into())]) .where_() .key("active") .value(Comparison::LessThanOrEqual(1.into())) @@ -318,7 +318,7 @@ fn search_from_where_key_value() { db.exec_ids( QueryBuilder::search() .from("users") - .order_by(&[DbKeyOrder::Desc("id".into())]) + .order_by(vec![DbKeyOrder::Desc("id".into())]) .where_() .key("active") .value(Comparison::GreaterThan(0.into())) @@ -328,7 +328,7 @@ fn search_from_where_key_value() { db.exec_ids( QueryBuilder::search() .from("users") - .order_by(&[DbKeyOrder::Desc("id".into())]) + .order_by(vec![DbKeyOrder::Desc("id".into())]) .where_() .key("active") .value(Comparison::GreaterThanOrEqual(0.into())) @@ -373,10 +373,10 @@ fn search_from_limit_offset_where() { .node() .and() .not() - .ids(&[1.into(), 2.into(), 3.into()]) + .ids(vec![1, 2, 3]) .and() .not_beyond() - .ids(&["users".into()]) + .ids("users") .query(), &[8, 7], ); @@ -388,10 +388,10 @@ fn search_from_limit_offset_where() { .node() .and() .not() - .ids(&[1.into(), 2.into(), 3.into()]) + .ids(vec![1, 2, 3]) .and() .not_beyond() - .ids(&["users".into()]) + .ids("users") .query(), &[7, 6], ); @@ -404,10 +404,10 @@ fn search_from_limit_offset_where() { .node() .and() .not() - .ids(&[1.into(), 2.into(), 3.into()]) + .ids(vec![1, 2, 3]) .and() .not_beyond() - .ids(&["users".into()]) + .ids("users") .query(), &[7], ); @@ -423,9 +423,9 @@ fn search_from_to_where() { .to(7) .where_() .not_beyond() - .ids(&["docs".into()]) + .ids("docs") .and() - .keys(&["id".into()]) + .keys(vec!["id".into()]) .query(), &[12.into()], ) diff --git a/tests/test_db/mod.rs b/tests/test_db/mod.rs index 8f618e528..bb556de08 100644 --- a/tests/test_db/mod.rs +++ b/tests/test_db/mod.rs @@ -8,7 +8,7 @@ use agdb::QueryError; use agdb::QueryMut; use agdb::QueryResult; use agdb::TransactionMut; -use test_file::TestFile; +pub use test_file::TestFile; pub struct TestDb { _test_file: TestFile,