From 4f9a004c55cfa7686308282a27963f267b5c9389 Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Tue, 1 Nov 2022 05:57:11 -0400 Subject: [PATCH 01/24] add adr --- docs/architecture/README.md | 2 + .../adr-001-node-key-refactoring.md | 81 +++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 docs/architecture/adr-001-node-key-refactoring.md diff --git a/docs/architecture/README.md b/docs/architecture/README.md index 052093828..a4def9afc 100644 --- a/docs/architecture/README.md +++ b/docs/architecture/README.md @@ -20,3 +20,5 @@ If recorded decisions turned out to be lacking, convene a discussion, record the and then modify the code to match. ## ADR Table of Contents + +- [ADR 001: Node Key Refactoring](./adr-001-node-key-refactoring.md) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md new file mode 100644 index 000000000..d41754aee --- /dev/null +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -0,0 +1,81 @@ +# ADR ADR-001: Node Key Refactoring + +## Changelog + +- 2022-10-31: First draft + +## Status + +Proposed + +## Context + +The original node key of IAVL is a hash of the node and it does not take advantage of data locality on disk. The nodes are stored in a random location of the disk due to the random hash value, so it needs to do a random search of the disk to find the node. + +The `orphans` are used to manage the removed nodes in the current version and allow to deletion of the removed nodes for the specific version from the disk through the `DeleteVersion`. It needs to track every time when updating the tree and also requires extra storage to store `orphans`, but there are not many use cases of `DeleteVersion`. There are two use cases, the first one is the rollback of the tree and the second one is to remove the unnecessary old nodes. + +## Decision + +- Use the sequenced integer ID as a node key like `bigendian(nodeKey)` format. +- Remove the `version` field from the node structure. +- Remove the `orphans` from the tree. + +New node structure + +```go +type Node struct { + key []byte + value []byte + hash []byte + leftHash []byte + rightHash []byte + nodeKey int64 // new field, use as a key + leftNodeKey int64 // new field, need to store + rightNodeKey int64 // new field, need to store + version int64 // will remove + size int64 + leftNode *Node + rightNode *Node + subtreeHeight int8 + persisted bool +} +``` + +New tree structure + +```go +type MutableTree struct { + *ImmutableTree + lastSaved *ImmutableTree + nonce int64 // new field to track the current ID + orphans map[int64]int64 // will remove + versions map[int64]bool + allRootLoaded bool + unsavedFastNodeAdditions map[string]*fastnode.Node + unsavedFastNodeRemovals map[string]interface{} + ndb *nodeDB + skipFastStorageUpgrade bool + + mtx sync.Mutex +} +``` + +## Consequences + +### Positive + +Using the sequenced integer ID, we take advantage of data locality in the bTree and it leads to performance improvements. + +Removing orphans also provides performance improvements including memory and storage saving. Also, it makes it easy to rollback the tree. Because we will keep the sequenced segment IDs for the specific version, and we can remove all nodes for which the `nodeKey` is greater than the specified integer value. + +### Negative + +It requires extra storage to store the node because it should keep `leftNodeKey` and `rightNodeKey` to iterate the tree. Instead, we can delete the `version` in the node and reduce the key size. + +It can't delete the old nodes for the specific version due to removing orphans. But it makes `rollback` easier and it makes it possible to remove old nodes through `import` and `export` functionalities. The `export` will restruct the tree to make node IDs to a sequenced segment like (1 ... node_sieze). + +## References + +- https://github.com/cosmos/iavl/issues/548 +- https://github.com/cosmos/iavl/issues/137 +- https://github.com/cosmos/iavl/issues/571 From 8646a9a5a3164880f9855dcb5d5809ff920b04d7 Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Tue, 1 Nov 2022 07:10:36 -0400 Subject: [PATCH 02/24] small fix --- docs/architecture/adr-001-node-key-refactoring.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index d41754aee..ef3ce0130 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -46,8 +46,8 @@ New tree structure ```go type MutableTree struct { *ImmutableTree - lastSaved *ImmutableTree - nonce int64 // new field to track the current ID + lastSaved *ImmutableTree + nonce int64 // new field to track the current ID orphans map[int64]int64 // will remove versions map[int64]bool allRootLoaded bool From 6e5b081b98d48659a225c4c8a0459d14d40a5c0e Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Tue, 1 Nov 2022 07:52:30 -0400 Subject: [PATCH 03/24] remove child hashes --- docs/architecture/adr-001-node-key-refactoring.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index ef3ce0130..336202ef8 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -17,6 +17,7 @@ The `orphans` are used to manage the removed nodes in the current version and al ## Decision - Use the sequenced integer ID as a node key like `bigendian(nodeKey)` format. +- Remove the `leftHash` and `rightHash` fields, and instead store `hash` field. - Remove the `version` field from the node structure. - Remove the `orphans` from the tree. @@ -26,10 +27,10 @@ New node structure type Node struct { key []byte value []byte - hash []byte - leftHash []byte - rightHash []byte - nodeKey int64 // new field, use as a key + hash []byte // keep this field + leftHash []byte // will remove + rightHash []byte // will remove + nodeKey int64 // new field, use as a node key leftNodeKey int64 // new field, need to store rightNodeKey int64 // new field, need to store version int64 // will remove @@ -64,13 +65,13 @@ type MutableTree struct { ### Positive -Using the sequenced integer ID, we take advantage of data locality in the bTree and it leads to performance improvements. +Using the sequenced integer ID, we take advantage of data locality in the bTree and it leads to performance improvements. Also it can reduce the node size in the storage. Removing orphans also provides performance improvements including memory and storage saving. Also, it makes it easy to rollback the tree. Because we will keep the sequenced segment IDs for the specific version, and we can remove all nodes for which the `nodeKey` is greater than the specified integer value. ### Negative -It requires extra storage to store the node because it should keep `leftNodeKey` and `rightNodeKey` to iterate the tree. Instead, we can delete the `version` in the node and reduce the key size. +It requires extra storage to store the node because it should keep `leftNodeKey` and `rightNodeKey` to iterate the tree. Instead, we can delete the `version`, `leftHash` and `rightHash` fields in the node and reduce the key size. It can't delete the old nodes for the specific version due to removing orphans. But it makes `rollback` easier and it makes it possible to remove old nodes through `import` and `export` functionalities. The `export` will restruct the tree to make node IDs to a sequenced segment like (1 ... node_sieze). From 4ee7804b27b9960515d331974b9f546cadf0db9c Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Tue, 1 Nov 2022 07:57:37 -0400 Subject: [PATCH 04/24] small fix --- docs/architecture/adr-001-node-key-refactoring.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 336202ef8..8dce541fb 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -27,12 +27,12 @@ New node structure type Node struct { key []byte value []byte - hash []byte // keep this field + hash []byte // keep this field in the storage leftHash []byte // will remove rightHash []byte // will remove nodeKey int64 // new field, use as a node key - leftNodeKey int64 // new field, need to store - rightNodeKey int64 // new field, need to store + leftNodeKey int64 // new field, need to store in the storage + rightNodeKey int64 // new field, need to store in the storage version int64 // will remove size int64 leftNode *Node From 9468ee2838703a4828952383e783a15d9958e9f7 Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Tue, 1 Nov 2022 08:19:53 -0400 Subject: [PATCH 05/24] add migration --- docs/architecture/adr-001-node-key-refactoring.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 8dce541fb..89fd1f600 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -63,6 +63,15 @@ type MutableTree struct { ## Consequences +### Migration + +We can migrate nodes one by one through iterating the version. + +- Iterate the version in order, and get the root node for the specific version. +- Iterate the tree and assign the `nodeKey` to nodes which the node version equals. + +We will implement the `Import` functionality for the original version. + ### Positive Using the sequenced integer ID, we take advantage of data locality in the bTree and it leads to performance improvements. Also it can reduce the node size in the storage. From 0c2d610c14d73a1e9ffa30e6d8fb991e632641ee Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Tue, 1 Nov 2022 08:49:14 -0400 Subject: [PATCH 06/24] add pruning --- docs/architecture/adr-001-node-key-refactoring.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 89fd1f600..a844d00ec 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -65,7 +65,7 @@ type MutableTree struct { ### Migration -We can migrate nodes one by one through iterating the version. +We can migrate nodes one by one by iterating the version. - Iterate the version in order, and get the root node for the specific version. - Iterate the tree and assign the `nodeKey` to nodes which the node version equals. @@ -74,15 +74,20 @@ We will implement the `Import` functionality for the original version. ### Positive -Using the sequenced integer ID, we take advantage of data locality in the bTree and it leads to performance improvements. Also it can reduce the node size in the storage. +Using the sequenced integer ID, we take advantage of data locality in the bTree and it leads to performance improvements. Also, it can reduce the node size in the storage. Removing orphans also provides performance improvements including memory and storage saving. Also, it makes it easy to rollback the tree. Because we will keep the sequenced segment IDs for the specific version, and we can remove all nodes for which the `nodeKey` is greater than the specified integer value. ### Negative -It requires extra storage to store the node because it should keep `leftNodeKey` and `rightNodeKey` to iterate the tree. Instead, we can delete the `version`, `leftHash` and `rightHash` fields in the node and reduce the key size. +It requires extra storage to store the node because it should keep `leftNodeKey` and `rightNodeKey` to iterate the tree. Instead, we can delete the `version`, `leftHash`, and `rightHash` fields in the node and reduce the key size. -It can't delete the old nodes for the specific version due to removing orphans. But it makes `rollback` easier and it makes it possible to remove old nodes through `import` and `export` functionalities. The `export` will restruct the tree to make node IDs to a sequenced segment like (1 ... node_sieze). +It can't delete the old nodes for the specific version due to removing orphans. We introduce a new way to prune old versions. + +For example, when a user wants to prune the previous 500 versions every 1000 blocks +- We assume the pruning is completed for `n`th version and the last nonce of `n`th version is `x`. +- We iterate the tree from the `n+501`th root node and pick only nodes which the nodeKey is in `[(n+1)th version first nonce, (n+500)th version the last nonce]`. +- For those nodes, we re-assign the nodeKey from `x+1` in order. ## References From 1459a18da26f310540a6eea942c18655b863e362 Mon Sep 17 00:00:00 2001 From: cool-developer <51834436+cool-develope@users.noreply.github.com> Date: Tue, 1 Nov 2022 11:44:17 -0400 Subject: [PATCH 07/24] Update docs/architecture/adr-001-node-key-refactoring.md Co-authored-by: Aleksandr Bezobchuk --- docs/architecture/adr-001-node-key-refactoring.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index a844d00ec..9f694acf6 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -10,7 +10,7 @@ Proposed ## Context -The original node key of IAVL is a hash of the node and it does not take advantage of data locality on disk. The nodes are stored in a random location of the disk due to the random hash value, so it needs to do a random search of the disk to find the node. +The original key format of IAVL nodes is a hash of the node. It does not take advantage of data locality on disk. Nodes are stored in random locations on disk due to the random hash value, so it needs to scan the disk to find the corresponding node which can be very inefficient. The `orphans` are used to manage the removed nodes in the current version and allow to deletion of the removed nodes for the specific version from the disk through the `DeleteVersion`. It needs to track every time when updating the tree and also requires extra storage to store `orphans`, but there are not many use cases of `DeleteVersion`. There are two use cases, the first one is the rollback of the tree and the second one is to remove the unnecessary old nodes. From d8d0bf9aaf9d80fbb3a39f2ebec39edbb0f504fb Mon Sep 17 00:00:00 2001 From: cool-developer <51834436+cool-develope@users.noreply.github.com> Date: Tue, 1 Nov 2022 11:44:30 -0400 Subject: [PATCH 08/24] Update docs/architecture/adr-001-node-key-refactoring.md Co-authored-by: Aleksandr Bezobchuk --- docs/architecture/adr-001-node-key-refactoring.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 9f694acf6..b1421bc4f 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -12,7 +12,10 @@ Proposed The original key format of IAVL nodes is a hash of the node. It does not take advantage of data locality on disk. Nodes are stored in random locations on disk due to the random hash value, so it needs to scan the disk to find the corresponding node which can be very inefficient. -The `orphans` are used to manage the removed nodes in the current version and allow to deletion of the removed nodes for the specific version from the disk through the `DeleteVersion`. It needs to track every time when updating the tree and also requires extra storage to store `orphans`, but there are not many use cases of `DeleteVersion`. There are two use cases, the first one is the rollback of the tree and the second one is to remove the unnecessary old nodes. +The `orphans` are used to manage node removal in the current design and allow deletion of removed nodes for the specific version from the disk through the `DeleteVersion` API. It needs to track every time when updating the tree and also requires extra storage to store `orphans`, but there are not many use cases of `DeleteVersion`. There are two use cases: + +1. Rollback of the tree to a previous version +2. Remove unnecessary old nodes ## Decision From 88885f1331e676316f5b4b7961c85fe45409dafa Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Tue, 1 Nov 2022 11:56:56 -0400 Subject: [PATCH 09/24] suggestions --- .../adr-001-node-key-refactoring.md | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index b1421bc4f..aed1a2186 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -21,9 +21,10 @@ The `orphans` are used to manage node removal in the current design and allow de - Use the sequenced integer ID as a node key like `bigendian(nodeKey)` format. - Remove the `leftHash` and `rightHash` fields, and instead store `hash` field. -- Remove the `version` field from the node structure. - Remove the `orphans` from the tree. +Theoretically, we can also remove the `version` field in the node structure but it leads to breaking the ics23 proof mechanism. We will revisit it later. + New node structure ```go @@ -31,12 +32,10 @@ type Node struct { key []byte value []byte hash []byte // keep this field in the storage - leftHash []byte // will remove - rightHash []byte // will remove nodeKey int64 // new field, use as a node key leftNodeKey int64 // new field, need to store in the storage rightNodeKey int64 // new field, need to store in the storage - version int64 // will remove + version int64 size int64 leftNode *Node rightNode *Node @@ -52,7 +51,6 @@ type MutableTree struct { *ImmutableTree lastSaved *ImmutableTree nonce int64 // new field to track the current ID - orphans map[int64]int64 // will remove versions map[int64]bool allRootLoaded bool unsavedFastNodeAdditions map[string]*fastnode.Node @@ -64,8 +62,6 @@ type MutableTree struct { } ``` -## Consequences - ### Migration We can migrate nodes one by one by iterating the version. @@ -75,6 +71,17 @@ We can migrate nodes one by one by iterating the version. We will implement the `Import` functionality for the original version. +### Pruning + +We introduce a new way to prune old versions. + +For example, when a user wants to prune the previous 500 versions every 1000 blocks +- We assume the pruning is completed for `n`th version and the last nonce of `n`th version is `x`. +- We iterate the tree from the `n+501`th root node and pick only nodes which the nodeKey is in `[(n+1)th version first nonce, (n+500)th version the last nonce]`. +- We can delete missing nodes instantly or re-assign the nodeKey from `x+1` in order for those nodes. Re-assign should be done after stopping the node but it can lead to improving the data locality. + +## Consequences + ### Positive Using the sequenced integer ID, we take advantage of data locality in the bTree and it leads to performance improvements. Also, it can reduce the node size in the storage. @@ -83,14 +90,9 @@ Removing orphans also provides performance improvements including memory and sto ### Negative -It requires extra storage to store the node because it should keep `leftNodeKey` and `rightNodeKey` to iterate the tree. Instead, we can delete the `version`, `leftHash`, and `rightHash` fields in the node and reduce the key size. - -It can't delete the old nodes for the specific version due to removing orphans. We introduce a new way to prune old versions. +It requires extra storage to store the node because it should keep `leftNodeKey` and `rightNodeKey` to iterate the tree. Instead, we can delete`leftHash` and `rightHash` fields in the node and reduce the key size. -For example, when a user wants to prune the previous 500 versions every 1000 blocks -- We assume the pruning is completed for `n`th version and the last nonce of `n`th version is `x`. -- We iterate the tree from the `n+501`th root node and pick only nodes which the nodeKey is in `[(n+1)th version first nonce, (n+500)th version the last nonce]`. -- For those nodes, we re-assign the nodeKey from `x+1` in order. +It can't delete the old nodes for the specific version due to removing orphans. ## References From 5272de4f0d3fbab53994a7048ac81b84aef067ce Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Thu, 3 Nov 2022 10:59:34 -0400 Subject: [PATCH 10/24] suggestions --- .../adr-001-node-key-refactoring.md | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index aed1a2186..eebe54ddb 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -19,12 +19,11 @@ The `orphans` are used to manage node removal in the current design and allow de ## Decision -- Use the sequenced integer ID as a node key like `bigendian(nodeKey)` format. +- Use the version and the sequenced integer ID as a node key like `bigendian(version) | bigendian(nonce)` format. +- Remove the `version` field from node body writes. - Remove the `leftHash` and `rightHash` fields, and instead store `hash` field. - Remove the `orphans` from the tree. -Theoretically, we can also remove the `version` field in the node structure but it leads to breaking the ics23 proof mechanism. We will revisit it later. - New node structure ```go @@ -32,10 +31,10 @@ type Node struct { key []byte value []byte hash []byte // keep this field in the storage - nodeKey int64 // new field, use as a node key + nonce int64 // new field, the sequenced integer ID within the specific version leftNodeKey int64 // new field, need to store in the storage rightNodeKey int64 // new field, need to store in the storage - version int64 + version int64 size int64 leftNode *Node rightNode *Node @@ -50,7 +49,7 @@ New tree structure type MutableTree struct { *ImmutableTree lastSaved *ImmutableTree - nonce int64 // new field to track the current ID + nonce int64 // new field to track the integer ID of the current version versions map[int64]bool allRootLoaded bool unsavedFastNodeAdditions map[string]*fastnode.Node @@ -76,23 +75,22 @@ We will implement the `Import` functionality for the original version. We introduce a new way to prune old versions. For example, when a user wants to prune the previous 500 versions every 1000 blocks -- We assume the pruning is completed for `n`th version and the last nonce of `n`th version is `x`. -- We iterate the tree from the `n+501`th root node and pick only nodes which the nodeKey is in `[(n+1)th version first nonce, (n+500)th version the last nonce]`. -- We can delete missing nodes instantly or re-assign the nodeKey from `x+1` in order for those nodes. Re-assign should be done after stopping the node but it can lead to improving the data locality. +- We iterate the tree based on the `n+501`th root and pick only nodes which the version is in `[(n+1), (n+500)]`. +- We can delete missing nodes (which don't appear in the above traversing) instantly or re-assign the nonce in order for those nodes. Re-assign should be done after stopping the node but it can lead to improving the data locality. ## Consequences ### Positive -Using the sequenced integer ID, we take advantage of data locality in the bTree and it leads to performance improvements. Also, it can reduce the node size in the storage. +Using the version and the sequenced integer ID, we take advantage of data locality in the LSM tree. Since we commit the sorted data, it can reduce compactions and makes easy to find the key. Also, it can reduce the key and node size in the storage. -Removing orphans also provides performance improvements including memory and storage saving. Also, it makes it easy to rollback the tree. Because we will keep the sequenced segment IDs for the specific version, and we can remove all nodes for which the `nodeKey` is greater than the specified integer value. +Removing orphans also provides performance improvements including memory and storage saving. Also, it makes it easy to rollback the tree. Because we can remove all nodes for which the `nodeKey` is greater than the specified version. ### Negative It requires extra storage to store the node because it should keep `leftNodeKey` and `rightNodeKey` to iterate the tree. Instead, we can delete`leftHash` and `rightHash` fields in the node and reduce the key size. -It can't delete the old nodes for the specific version due to removing orphans. +Remvoing old nodes for the specific version requires some extra iterations due to removing orphans. ## References From 006d76c9f2c9399256d3fa3506578d086d47557e Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Fri, 4 Nov 2022 09:04:47 -0400 Subject: [PATCH 11/24] update the struct --- .../adr-001-node-key-refactoring.md | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index eebe54ddb..dbfd39549 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -27,17 +27,21 @@ The `orphans` are used to manage node removal in the current design and allow de New node structure ```go +type NodeKey struct { + version int64 + nonce int32 +} + type Node struct { key []byte value []byte - hash []byte // keep this field in the storage - nonce int64 // new field, the sequenced integer ID within the specific version - leftNodeKey int64 // new field, need to store in the storage - rightNodeKey int64 // new field, need to store in the storage - version int64 - size int64 + hash []byte // keep it in the storage instead of leftHash and rightHash + nodeKey NodeKey // new field, the key in the storage + leftNodeKey NodeKey // new field, need to store in the storage + rightNodeKey NodeKey // new field, need to store in the storage leftNode *Node rightNode *Node + size int64 subtreeHeight int8 persisted bool } @@ -49,7 +53,6 @@ New tree structure type MutableTree struct { *ImmutableTree lastSaved *ImmutableTree - nonce int64 // new field to track the integer ID of the current version versions map[int64]bool allRootLoaded bool unsavedFastNodeAdditions map[string]*fastnode.Node @@ -61,6 +64,8 @@ type MutableTree struct { } ``` +We will restruct the nonce when save the current version. It will reduce unnecessary checks in CRUD operations of the tree and keep sorted the order of insertion in the LSM tree. + ### Migration We can migrate nodes one by one by iterating the version. From 7cc7280c37e881b2bac427bebd20c9a13c241e4f Mon Sep 17 00:00:00 2001 From: cool-developer <51834436+cool-develope@users.noreply.github.com> Date: Tue, 8 Nov 2022 09:23:15 -0500 Subject: [PATCH 12/24] Update docs/architecture/adr-001-node-key-refactoring.md Co-authored-by: Robert Zaremba --- docs/architecture/adr-001-node-key-refactoring.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index dbfd39549..88593da7d 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -12,7 +12,7 @@ Proposed The original key format of IAVL nodes is a hash of the node. It does not take advantage of data locality on disk. Nodes are stored in random locations on disk due to the random hash value, so it needs to scan the disk to find the corresponding node which can be very inefficient. -The `orphans` are used to manage node removal in the current design and allow deletion of removed nodes for the specific version from the disk through the `DeleteVersion` API. It needs to track every time when updating the tree and also requires extra storage to store `orphans`, but there are not many use cases of `DeleteVersion`. There are two use cases: +The `orphans` are used to manage node removal in the current design and allow deletion of removed nodes for the specific version from the disk through the `DeleteVersion` API. It needs to track every time when updating the tree and also requires extra storage to store `orphans`. But there are only 2 use cases for `DeleteVersion`: 1. Rollback of the tree to a previous version 2. Remove unnecessary old nodes From 4e6044fc190f5f38b6c171ddae1b7295cc227761 Mon Sep 17 00:00:00 2001 From: cool-developer <51834436+cool-develope@users.noreply.github.com> Date: Tue, 8 Nov 2022 09:28:19 -0500 Subject: [PATCH 13/24] Update adr-001-node-key-refactoring.md --- docs/architecture/adr-001-node-key-refactoring.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 88593da7d..d6a2bd3a0 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -64,7 +64,7 @@ type MutableTree struct { } ``` -We will restruct the nonce when save the current version. It will reduce unnecessary checks in CRUD operations of the tree and keep sorted the order of insertion in the LSM tree. +We will assign the nonce in order when save the current version in `SaveBranch`. It will reduce unnecessary checks in CRUD operations of the tree and keep sorted the order of insertion in the LSM tree. ### Migration From 0bf748681eae957fd4ac159b0ef36cba3c0fa1ce Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Wed, 9 Nov 2022 16:29:50 -0500 Subject: [PATCH 14/24] orphans --- .../adr-001-node-key-refactoring.md | 68 +++++++++++++------ 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index d6a2bd3a0..a59d2ae5b 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -10,9 +10,9 @@ Proposed ## Context -The original key format of IAVL nodes is a hash of the node. It does not take advantage of data locality on disk. Nodes are stored in random locations on disk due to the random hash value, so it needs to scan the disk to find the corresponding node which can be very inefficient. +The original key format of IAVL nodes is a hash of the node. It does not take advantage of data locality on disk. Nodes are stored in random locations on the disk due to the random hash value, so it needs to scan the disk to find the corresponding node which can be very inefficient. -The `orphans` are used to manage node removal in the current design and allow deletion of removed nodes for the specific version from the disk through the `DeleteVersion` API. It needs to track every time when updating the tree and also requires extra storage to store `orphans`. But there are only 2 use cases for `DeleteVersion`: +The `orphans` are used to manage node removal in the current design and allow the deletion of removed nodes for the specific version from the disk through the `DeleteVersion` API. It needs to track every time when updating the tree and also requires extra storage to store `orphans`. But there are only 2 use cases for `DeleteVersion`: 1. Rollback of the tree to a previous version 2. Remove unnecessary old nodes @@ -21,8 +21,9 @@ The `orphans` are used to manage node removal in the current design and allow de - Use the version and the sequenced integer ID as a node key like `bigendian(version) | bigendian(nonce)` format. - Remove the `version` field from node body writes. -- Remove the `leftHash` and `rightHash` fields, and instead store `hash` field. -- Remove the `orphans` from the tree. +- Remove the `leftHash` and `rightHash` fields, and instead store `hash` field in the node body. +- Separate the `orphans` from the tree CRUD operations, and refactor the orphan store like `bigendian(to_version) | bigendian(from_version) -> nonce`. +- Remove the `root` store and prefix identifier which is used to identify the root, node, and orphan. New node structure @@ -36,14 +37,13 @@ type Node struct { key []byte value []byte hash []byte // keep it in the storage instead of leftHash and rightHash - nodeKey NodeKey // new field, the key in the storage - leftNodeKey NodeKey // new field, need to store in the storage - rightNodeKey NodeKey // new field, need to store in the storage + nodeKey *NodeKey // new field, the key in the storage + leftNodeKey *NodeKey // new field, need to store in the storage + rightNodeKey *NodeKey // new field, need to store in the storage leftNode *Node rightNode *Node size int64 subtreeHeight int8 - persisted bool } ``` @@ -53,6 +53,7 @@ New tree structure type MutableTree struct { *ImmutableTree lastSaved *ImmutableTree + nonce int32 versions map[int64]bool allRootLoaded bool unsavedFastNodeAdditions map[string]*fastnode.Node @@ -64,38 +65,63 @@ type MutableTree struct { } ``` -We will assign the nonce in order when save the current version in `SaveBranch`. It will reduce unnecessary checks in CRUD operations of the tree and keep sorted the order of insertion in the LSM tree. +We will assign the nonce in order when saving the current version in `SaveVersion`. It will reduce unnecessary checks in CRUD operations of the tree and keep sorted the order of insertion in the LSM tree. ### Migration We can migrate nodes one by one by iterating the version. - Iterate the version in order, and get the root node for the specific version. -- Iterate the tree and assign the `nodeKey` to nodes which the node version equals. - -We will implement the `Import` functionality for the original version. +- Iterate the tree based on the root and pick only nodes the node version is the same as the given version. +- Store them using the new node key format. ### Pruning -We introduce a new way to prune old versions. +We introduce a new way to struct `orphans` in the `SaveVersion`, not in the `Set` or `Remove`. + +- Get the previous root from the `lastSaved`. +- Iterate the tree until `leftNode` or `rightNode` is not `nil` based on the previous root. + +The above node group would be removed in the current version because having children means it is updated in the current CRUD operations. -For example, when a user wants to prune the previous 500 versions every 1000 blocks -- We iterate the tree based on the `n+501`th root and pick only nodes which the version is in `[(n+1), (n+500)]`. -- We can delete missing nodes (which don't appear in the above traversing) instantly or re-assign the nonce in order for those nodes. Re-assign should be done after stopping the node but it can lead to improving the data locality. +### Rollback + +When we want to rollback to the specific version `n` + +- Iterate the version from `n+1`. +- Traverse key-value through `traversePrefix` with `prefix=bigendian(version)`. +- Remove data (it will include `orphans` and `nodes` data). ## Consequences ### Positive -Using the version and the sequenced integer ID, we take advantage of data locality in the LSM tree. Since we commit the sorted data, it can reduce compactions and makes easy to find the key. Also, it can reduce the key and node size in the storage. +Using the version and the sequenced integer ID, we take advantage of data locality in the LSM tree. Since we commit the sorted data, it can reduce compactions and makes it easy to find the key. Also, it can reduce the key and node size in the storage. + +``` +# node body -Removing orphans also provides performance improvements including memory and storage saving. Also, it makes it easy to rollback the tree. Because we can remove all nodes for which the `nodeKey` is greater than the specified version. +add `hash`: +32 byte +add `leftNodeKey`, `rightNodeKey`: max (8+4)*2= +24 byte +remove `leftHash`, `rightHash`: -64 byte +remove `version`: max -8 byte +------------------------------------------------------------ + total save 16 byte -### Negative +# node key + +remove `hash`: -32 byte +add `version|nonce`: +12 byte +------------------------------------ + total save 20 byte +``` + +Separating orphans also provides performance improvements including memory and storage saving. -It requires extra storage to store the node because it should keep `leftNodeKey` and `rightNodeKey` to iterate the tree. Instead, we can delete`leftHash` and `rightHash` fields in the node and reduce the key size. +### Negative -Remvoing old nodes for the specific version requires some extra iterations due to removing orphans. +The `Update` operation will require extra DB access because we need to take children to calculate the hash of updated nodes. +It doesn't require more access in other cases including `Set`, `Remove`, and `Proof`. ## References From ee11dff3f9977def0a5af7acebfabbb4adfb9e12 Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Thu, 10 Nov 2022 11:31:05 -0500 Subject: [PATCH 15/24] revert removing root store --- docs/architecture/adr-001-node-key-refactoring.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index a59d2ae5b..920484951 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -22,8 +22,7 @@ The `orphans` are used to manage node removal in the current design and allow th - Use the version and the sequenced integer ID as a node key like `bigendian(version) | bigendian(nonce)` format. - Remove the `version` field from node body writes. - Remove the `leftHash` and `rightHash` fields, and instead store `hash` field in the node body. -- Separate the `orphans` from the tree CRUD operations, and refactor the orphan store like `bigendian(to_version) | bigendian(from_version) -> nonce`. -- Remove the `root` store and prefix identifier which is used to identify the root, node, and orphan. +- Separate the `orphans` from the tree CRUD operations, and refactor the orphan store like `bigendian(to_version) | bigendian(from_version) | bigendian(nonce)`. New node structure From d9f7d2e0d28093d6556cb9d43a81b5fd62f09815 Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Wed, 30 Nov 2022 13:16:19 -0500 Subject: [PATCH 16/24] path update --- .../adr-001-node-key-refactoring.md | 67 ++++++++++++++----- 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 920484951..06f067ea4 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -19,23 +19,41 @@ The `orphans` are used to manage node removal in the current design and allow th ## Decision -- Use the version and the sequenced integer ID as a node key like `bigendian(version) | bigendian(nonce)` format. +- Use the version and the sequenced integer ID as a node key like `bigendian(version) | byte array(path)` format. Here the `path` is a binary expression of the path from the root to the current node. + ``` + `10101` : (right, left, right, left, right) -> [0x15] + ``` +- Store only the child node key for the below version in node body writes. Because it is possible to get the child path for the same version. + ```go + func (node *Node) getLeftNode() (*Node, error) { + if node.leftNode != nil { + return node.leftNode + } + if node.leftNodeKey != nil { + return getNode(node.leftNodeKey) // get the node from the storage + } + return getNode(&NodeKey{ + version: node.nodeKey.version, + path: node.nodeKey.path + '0', // it will be more complicated in the real implementation + }) + } + ``` - Remove the `version` field from node body writes. - Remove the `leftHash` and `rightHash` fields, and instead store `hash` field in the node body. -- Separate the `orphans` from the tree CRUD operations, and refactor the orphan store like `bigendian(to_version) | bigendian(from_version) | bigendian(nonce)`. +- Remove the `orphans` completely from both tree and storage. New node structure ```go type NodeKey struct { version int64 - nonce int32 + path []byte } type Node struct { key []byte value []byte - hash []byte // keep it in the storage instead of leftHash and rightHash + hash []byte // keep it in the storage instead of leftHash and rightHash nodeKey *NodeKey // new field, the key in the storage leftNodeKey *NodeKey // new field, need to store in the storage rightNodeKey *NodeKey // new field, need to store in the storage @@ -52,7 +70,6 @@ New tree structure type MutableTree struct { *ImmutableTree lastSaved *ImmutableTree - nonce int32 versions map[int64]bool allRootLoaded bool unsavedFastNodeAdditions map[string]*fastnode.Node @@ -64,7 +81,7 @@ type MutableTree struct { } ``` -We will assign the nonce in order when saving the current version in `SaveVersion`. It will reduce unnecessary checks in CRUD operations of the tree and keep sorted the order of insertion in the LSM tree. +We will assign the `nodeKey` when saving the current version in `SaveVersion`. It will reduce unnecessary checks in CRUD operations of the tree and keep sorted the order of insertion in the LSM tree. ### Migration @@ -76,12 +93,25 @@ We can migrate nodes one by one by iterating the version. ### Pruning -We introduce a new way to struct `orphans` in the `SaveVersion`, not in the `Set` or `Remove`. +We assume keeping only the range versions of `fromVersion` to `toVersion`. Refer to [this issue](https://github.com/cosmos/cosmos-sdk/issues/12989). -- Get the previous root from the `lastSaved`. -- Iterate the tree until `leftNode` or `rightNode` is not `nil` based on the previous root. +When we want to prune all versions up to the specific version `n` -The above node group would be removed in the current version because having children means it is updated in the current CRUD operations. +- Iterate the tree based on the root of `n+1`th version. +- Iterate the node until visiting the node the version is below `fromVersion` and don't visit further deeply. +- Apply `DeletePath` for all visited nodes the version is below `n+1`. + +```go +func DeletePath(nk *NodeKey) error { + DeleteNode(node) + if nk.path is not root { + DeletePath(&NodeKey{ + version: nk.version, + path: parent(nk.path), // it looks like removing the last binary + }) + } +} +``` ### Rollback @@ -89,41 +119,44 @@ When we want to rollback to the specific version `n` - Iterate the version from `n+1`. - Traverse key-value through `traversePrefix` with `prefix=bigendian(version)`. -- Remove data (it will include `orphans` and `nodes` data). +- Remove all iterated nodes. ## Consequences ### Positive -Using the version and the sequenced integer ID, we take advantage of data locality in the LSM tree. Since we commit the sorted data, it can reduce compactions and makes it easy to find the key. Also, it can reduce the key and node size in the storage. +Using the version and the path, we take advantage of data locality in the LSM tree. Since we commit the sorted data, it can reduce compactions and makes it easy to find the key. Also, it can reduce the key and node size in the storage. ``` # node body add `hash`: +32 byte -add `leftNodeKey`, `rightNodeKey`: max (8+4)*2= +24 byte +add `leftNodeKey`, `rightNodeKey`: max 8 + 8 = +16 byte remove `leftHash`, `rightHash`: -64 byte remove `version`: max -8 byte ------------------------------------------------------------ - total save 16 byte + total save 24 byte # node key remove `hash`: -32 byte -add `version|nonce`: +12 byte +add `version|path`: +16 byte ------------------------------------ - total save 20 byte + total save 16 byte ``` -Separating orphans also provides performance improvements including memory and storage saving. +Removing orphans also provides performance improvements including memory and storage saving. ### Negative The `Update` operation will require extra DB access because we need to take children to calculate the hash of updated nodes. It doesn't require more access in other cases including `Set`, `Remove`, and `Proof`. +It is impossible to remove the individual version. The new design requires more restrict pruning strategies. + ## References - https://github.com/cosmos/iavl/issues/548 - https://github.com/cosmos/iavl/issues/137 - https://github.com/cosmos/iavl/issues/571 +- https://github.com/cosmos/cosmos-sdk/issues/12989 From 10184ac03b1acc06f052af3ae2693f3a7aaa2ba2 Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Wed, 30 Nov 2022 13:17:43 -0500 Subject: [PATCH 17/24] small fix --- docs/architecture/adr-001-node-key-refactoring.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 06f067ea4..5de55f63f 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -19,7 +19,7 @@ The `orphans` are used to manage node removal in the current design and allow th ## Decision -- Use the version and the sequenced integer ID as a node key like `bigendian(version) | byte array(path)` format. Here the `path` is a binary expression of the path from the root to the current node. +- Use the version and the path as a node key like `bigendian(version) | byte array(path)` format. Here the `path` is a binary expression of the path from the root to the current node. ``` `10101` : (right, left, right, left, right) -> [0x15] ``` From 5f948447f9a4ff3b736465b5036f15ab053c0cae Mon Sep 17 00:00:00 2001 From: cool-developer <51834436+cool-develope@users.noreply.github.com> Date: Wed, 30 Nov 2022 13:24:53 -0500 Subject: [PATCH 18/24] Update adr-001-node-key-refactoring.md --- .../adr-001-node-key-refactoring.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 5de55f63f..7f179a118 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -27,7 +27,7 @@ The `orphans` are used to manage node removal in the current design and allow th ```go func (node *Node) getLeftNode() (*Node, error) { if node.leftNode != nil { - return node.leftNode + return node.leftNode, nil } if node.leftNodeKey != nil { return getNode(node.leftNodeKey) // get the node from the storage @@ -47,7 +47,7 @@ New node structure ```go type NodeKey struct { version int64 - path []byte + path []byte } type Node struct { @@ -59,7 +59,7 @@ type Node struct { rightNodeKey *NodeKey // new field, need to store in the storage leftNode *Node rightNode *Node - size int64 + size int64 subtreeHeight int8 } ``` @@ -130,19 +130,19 @@ Using the version and the path, we take advantage of data locality in the LSM tr ``` # node body -add `hash`: +32 byte -add `leftNodeKey`, `rightNodeKey`: max 8 + 8 = +16 byte -remove `leftHash`, `rightHash`: -64 byte -remove `version`: max -8 byte +add `hash`: +32 byte +add `leftNodeKey`, `rightNodeKey`: max 8 + 8 = +16 byte +remove `leftHash`, `rightHash`: -64 byte +remove `version`: max -8 byte ------------------------------------------------------------ - total save 24 byte + total save 24 byte # node key remove `hash`: -32 byte add `version|path`: +16 byte ------------------------------------ - total save 16 byte + total save 16 byte ``` Removing orphans also provides performance improvements including memory and storage saving. From 8fb87b670f035b7dbec56c650c97b2f7e27ed9fa Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Fri, 2 Dec 2022 08:51:55 -0500 Subject: [PATCH 19/24] small fix --- docs/architecture/adr-001-node-key-refactoring.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 7f179a118..695ebe346 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -103,13 +103,17 @@ When we want to prune all versions up to the specific version `n` ```go func DeletePath(nk *NodeKey) error { - DeleteNode(node) if nk.path is not root { - DeletePath(&NodeKey{ + pnk := &NodeKey{ version: nk.version, path: parent(nk.path), // it looks like removing the last binary - }) + } + if err != DeleteNode(pnk); err != nil { + return err + } + return DeletePath(pnk) } + return nil } ``` From 6bbf7f94e120587e0874e2097e35563575d66cb4 Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Fri, 2 Dec 2022 14:30:40 -0500 Subject: [PATCH 20/24] add prune method --- .../adr-001-node-key-refactoring.md | 26 +++++-------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 695ebe346..71de4dc7f 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -95,27 +95,15 @@ We can migrate nodes one by one by iterating the version. We assume keeping only the range versions of `fromVersion` to `toVersion`. Refer to [this issue](https://github.com/cosmos/cosmos-sdk/issues/12989). -When we want to prune all versions up to the specific version `n` +Here we are introducing a new way how to get orphaned nodes which remove in the `n+1`th version updates. -- Iterate the tree based on the root of `n+1`th version. -- Iterate the node until visiting the node the version is below `fromVersion` and don't visit further deeply. -- Apply `DeletePath` for all visited nodes the version is below `n+1`. +- Traverse the tree in-order way based on the root of `n+1`th version. +- If we visit the lower version node, pick the node and don't visit further deeply. Pay attention to the order of these nodes. +- Traverse the tree in-order way based on the root of `n`th version. +- Iterate the tree until meet the first node among the above nodes(stack) and delete all visited nodes so far from `n`th tree. +- Pop the first node from the stack and iterate again. -```go -func DeletePath(nk *NodeKey) error { - if nk.path is not root { - pnk := &NodeKey{ - version: nk.version, - path: parent(nk.path), // it looks like removing the last binary - } - if err != DeleteNode(pnk); err != nil { - return err - } - return DeletePath(pnk) - } - return nil -} -``` +If we assume `1 to (n-1)` versions already been removed, when we want to remove the `n`th version, we can just remove the above orphaned nodes. ### Rollback From 204d881886c430189040cbb3b5da1b02b3c7b4ec Mon Sep 17 00:00:00 2001 From: cool-developer <51834436+cool-develope@users.noreply.github.com> Date: Wed, 18 Jan 2023 14:11:01 -0500 Subject: [PATCH 21/24] Update docs/architecture/adr-001-node-key-refactoring.md Co-authored-by: Aaron Craelius --- docs/architecture/adr-001-node-key-refactoring.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 71de4dc7f..51a15b487 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -23,7 +23,7 @@ The `orphans` are used to manage node removal in the current design and allow th ``` `10101` : (right, left, right, left, right) -> [0x15] ``` -- Store only the child node key for the below version in node body writes. Because it is possible to get the child path for the same version. +- Store the child node key in the node body only when the child node is from an earlier version. For the same version, it is possible to guess the child node key and thus it doesn't need to be explicitly stored. ```go func (node *Node) getLeftNode() (*Node, error) { if node.leftNode != nil { From bf85d923cbcc5d08226ab39c0036fca89c51fbdd Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Fri, 17 Feb 2023 08:29:40 -0500 Subject: [PATCH 22/24] resolve conflicts --- .../adr-001-node-key-refactoring.md | 74 ++++++++----------- 1 file changed, 30 insertions(+), 44 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 51a15b487..977fc37a3 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -10,7 +10,7 @@ Proposed ## Context -The original key format of IAVL nodes is a hash of the node. It does not take advantage of data locality on disk. Nodes are stored in random locations on the disk due to the random hash value, so it needs to scan the disk to find the corresponding node which can be very inefficient. +The original key format of IAVL nodes is a hash of the node. It does not take advantage of data locality on LSM-Tree. Nodes are stored with the random hash value, so it increases the number of compactions and makes it difficult to find the node. The new key format will take advantage of data locality in the LSM tree and reduce the number of compactions. The `orphans` are used to manage node removal in the current design and allow the deletion of removed nodes for the specific version from the disk through the `DeleteVersion` API. It needs to track every time when updating the tree and also requires extra storage to store `orphans`. But there are only 2 use cases for `DeleteVersion`: @@ -19,35 +19,18 @@ The `orphans` are used to manage node removal in the current design and allow th ## Decision -- Use the version and the path as a node key like `bigendian(version) | byte array(path)` format. Here the `path` is a binary expression of the path from the root to the current node. - ``` - `10101` : (right, left, right, left, right) -> [0x15] - ``` -- Store the child node key in the node body only when the child node is from an earlier version. For the same version, it is possible to guess the child node key and thus it doesn't need to be explicitly stored. - ```go - func (node *Node) getLeftNode() (*Node, error) { - if node.leftNode != nil { - return node.leftNode, nil - } - if node.leftNodeKey != nil { - return getNode(node.leftNodeKey) // get the node from the storage - } - return getNode(&NodeKey{ - version: node.nodeKey.version, - path: node.nodeKey.path + '0', // it will be more complicated in the real implementation - }) - } - ``` -- Remove the `version` field from node body writes. -- Remove the `leftHash` and `rightHash` fields, and instead store `hash` field in the node body. +- Use the version and the local nonce as a node key like `bigendian(version) | bigendian(nonce)` format. Here the `nonce` is a local sequence id for the same version. + - Store the children node keys (`leftNodeKey` and `rightNodeKey`) in the node body. + - Remove the `version` field from node body writes. + - Remove the `leftHash` and `rightHash` fields, and instead store `hash` field in the node body. - Remove the `orphans` completely from both tree and storage. New node structure ```go type NodeKey struct { - version int64 - path []byte + version int64 + nonce int32 } type Node struct { @@ -59,7 +42,9 @@ type Node struct { rightNodeKey *NodeKey // new field, need to store in the storage leftNode *Node rightNode *Node - size int64 + size int64 + leftNode *Node + rightNode *Node subtreeHeight int8 } ``` @@ -68,15 +53,13 @@ New tree structure ```go type MutableTree struct { - *ImmutableTree - lastSaved *ImmutableTree - versions map[int64]bool - allRootLoaded bool - unsavedFastNodeAdditions map[string]*fastnode.Node - unsavedFastNodeRemovals map[string]interface{} + *ImmutableTree // The current, working tree. + lastSaved *ImmutableTree // The most recently saved tree. + unsavedFastNodeAdditions map[string]*fastnode.Node // FastNodes that have not yet been saved to disk + unsavedFastNodeRemovals map[string]interface{} // FastNodes that have not yet been removed from disk ndb *nodeDB - skipFastStorageUpgrade bool - + skipFastStorageUpgrade bool // If true, the tree will work like no fast storage and always not upgrade fast storage + mtx sync.Mutex } ``` @@ -85,11 +68,12 @@ We will assign the `nodeKey` when saving the current version in `SaveVersion`. I ### Migration -We can migrate nodes one by one by iterating the version. +We can migrate nodes through the following steps: -- Iterate the version in order, and get the root node for the specific version. -- Iterate the tree based on the root and pick only nodes the node version is the same as the given version. -- Store them using the new node key format. +- Export the snapshot of the tree from the original version. +- Import the snapshot to the new version. + - Track the nonce for the same version using int32 array of the version length. + - Assign the `nodeKey` when saving the node. ### Pruning @@ -122,19 +106,19 @@ Using the version and the path, we take advantage of data locality in the LSM tr ``` # node body -add `hash`: +32 byte -add `leftNodeKey`, `rightNodeKey`: max 8 + 8 = +16 byte -remove `leftHash`, `rightHash`: -64 byte -remove `version`: max -8 byte +add `hash`: +32 byte +add `leftNodeKey`, `rightNodeKey`: max (8 + 4) * 2 = +24 byte +remove `leftHash`, `rightHash`: -64 byte +remove `version`: max -8 byte ------------------------------------------------------------ - total save 24 byte + total save 16 byte # node key remove `hash`: -32 byte -add `version|path`: +16 byte +add `version|nonce`: +12 byte ------------------------------------ - total save 16 byte + total save 20 byte ``` Removing orphans also provides performance improvements including memory and storage saving. @@ -146,6 +130,8 @@ It doesn't require more access in other cases including `Set`, `Remove`, and `Pr It is impossible to remove the individual version. The new design requires more restrict pruning strategies. +When importing the tree, it may require more memory because of int32 array of the version length. We will introduce the new importing strategy to reduce the memory usage. + ## References - https://github.com/cosmos/iavl/issues/548 From b064ede008a4aaf677c856747f500e60aebb8e00 Mon Sep 17 00:00:00 2001 From: cool-developer <51834436+cool-develope@users.noreply.github.com> Date: Fri, 17 Feb 2023 05:34:07 -0800 Subject: [PATCH 23/24] Update adr-001-node-key-refactoring.md --- docs/architecture/adr-001-node-key-refactoring.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 977fc37a3..6eb3d66a6 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -42,7 +42,7 @@ type Node struct { rightNodeKey *NodeKey // new field, need to store in the storage leftNode *Node rightNode *Node - size int64 + size int64 leftNode *Node rightNode *Node subtreeHeight int8 @@ -106,19 +106,19 @@ Using the version and the path, we take advantage of data locality in the LSM tr ``` # node body -add `hash`: +32 byte +add `hash`: +32 byte add `leftNodeKey`, `rightNodeKey`: max (8 + 4) * 2 = +24 byte -remove `leftHash`, `rightHash`: -64 byte -remove `version`: max -8 byte +remove `leftHash`, `rightHash`: -64 byte +remove `version`: max -8 byte ------------------------------------------------------------ - total save 16 byte + total save 16 byte # node key -remove `hash`: -32 byte +remove `hash`: -32 byte add `version|nonce`: +12 byte ------------------------------------ - total save 20 byte + total save 20 byte ``` Removing orphans also provides performance improvements including memory and storage saving. From fb0f182360af45c7ba8231c103cdb41438c2e57f Mon Sep 17 00:00:00 2001 From: Cool Developer Date: Tue, 21 Feb 2023 08:26:16 -0500 Subject: [PATCH 24/24] comments --- .../adr-001-node-key-refactoring.md | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/docs/architecture/adr-001-node-key-refactoring.md b/docs/architecture/adr-001-node-key-refactoring.md index 6eb3d66a6..62bb4cdd5 100644 --- a/docs/architecture/adr-001-node-key-refactoring.md +++ b/docs/architecture/adr-001-node-key-refactoring.md @@ -77,9 +77,11 @@ We can migrate nodes through the following steps: ### Pruning -We assume keeping only the range versions of `fromVersion` to `toVersion`. Refer to [this issue](https://github.com/cosmos/cosmos-sdk/issues/12989). +The current pruning strategies allows for intermediate versions to exist. With the adoption of this ADR we are migrating to allowing only versions to exist between a range (50-100 instead of 1,25,50-100). -Here we are introducing a new way how to get orphaned nodes which remove in the `n+1`th version updates. +Here we are introducing a new way how to get orphaned nodes which remove in the `n+1`th version updates without storing orphanes in the storage. + +When we want to remove the `n+1`th version - Traverse the tree in-order way based on the root of `n+1`th version. - If we visit the lower version node, pick the node and don't visit further deeply. Pay attention to the order of these nodes. @@ -101,36 +103,36 @@ When we want to rollback to the specific version `n` ### Positive -Using the version and the path, we take advantage of data locality in the LSM tree. Since we commit the sorted data, it can reduce compactions and makes it easy to find the key. Also, it can reduce the key and node size in the storage. +* Using the version and a local nonce, we take advantage of data locality in the LSM tree. Since we commit the sorted data, it can reduce compactions and makes it easy to find the key. Also, it can reduce the key and node size in the storage. -``` -# node body + ``` + # node body -add `hash`: +32 byte -add `leftNodeKey`, `rightNodeKey`: max (8 + 4) * 2 = +24 byte -remove `leftHash`, `rightHash`: -64 byte -remove `version`: max -8 byte ------------------------------------------------------------- - total save 16 byte + add `hash`: +32 byte + add `leftNodeKey`, `rightNodeKey`: max (8 + 4) * 2 = +24 byte + remove `leftHash`, `rightHash`: -64 byte + remove `version`: max -8 byte + ------------------------------------------------------------ + total save 16 byte -# node key + # node key -remove `hash`: -32 byte -add `version|nonce`: +12 byte ------------------------------------- - total save 20 byte -``` + remove `hash`: -32 byte + add `version|nonce`: +12 byte + ------------------------------------ + total save 20 byte + ``` -Removing orphans also provides performance improvements including memory and storage saving. +* Removing orphans also provides performance improvements including memory and storage saving. ### Negative -The `Update` operation will require extra DB access because we need to take children to calculate the hash of updated nodes. -It doesn't require more access in other cases including `Set`, `Remove`, and `Proof`. +* `Update` operations will require extra DB access because we need to take children to calculate the hash of updated nodes. + * It doesn't require more access in other cases including `Set`, `Remove`, and `Proof`. -It is impossible to remove the individual version. The new design requires more restrict pruning strategies. +* It is impossible to remove the individual version. The new design requires more restrict pruning strategies. -When importing the tree, it may require more memory because of int32 array of the version length. We will introduce the new importing strategy to reduce the memory usage. +* When importing the tree, it may require more memory because of int32 array of the version length. We will introduce the new importing strategy to reduce the memory usage. ## References