From 0278e3530aa109e329f55a38900af567f9e35b29 Mon Sep 17 00:00:00 2001 From: harry <53987565+h5law@users.noreply.github.com> Date: Sat, 2 Dec 2023 04:57:51 +0000 Subject: [PATCH] chore: tidy up documentation and github workflows/templates (#29) --- .github/ISSUE_TEMPLATE/issue.md | 65 +++-- .../pull_request_template.md | 65 ----- .github/pull_request_template.md | 59 ++++ .github/workflows/test.yml | 60 ++-- .gitignore | 2 +- README.md | 151 ++++++---- docs/KVStore.md | 78 ++++-- docs/MerkleSumTree.md | 152 ++++++---- docs/SMT.md | 259 +++++++++++++----- godoc.go | 8 + proofs.go | 3 +- 11 files changed, 587 insertions(+), 315 deletions(-) delete mode 100644 .github/PULL_REQUEST_TEMPLATE/pull_request_template.md create mode 100644 .github/pull_request_template.md create mode 100644 godoc.go diff --git a/.github/ISSUE_TEMPLATE/issue.md b/.github/ISSUE_TEMPLATE/issue.md index 12a1602..63a2c99 100644 --- a/.github/ISSUE_TEMPLATE/issue.md +++ b/.github/ISSUE_TEMPLATE/issue.md @@ -2,38 +2,40 @@ name: Issue about: General purpose issue for the SMT libarary title: "[REPLACE ME] With a descriptive title" -labels: '' -assignees: '' - +labels: "" +assignees: "" --- - ## Objective -[ What? Issue description] +[ What? Describe the issue in 1-2 sentences] ## Origin Document -[ Why? Issue justification and/or link to another document] +[ Why? Justify the issue in 1-2 sentences. Consider adding a link or a screenshot.] ## Goals - - Goal #1 - Goal #2 - ... -## Deliverable +## Deliverables - - [ ] Deliverable #1 @@ -42,34 +44,31 @@ assignees: '' ## Non-goals / Non-deliverables - -- Nongoal #1 -- Nongoal #2 +- Non-goal #1 +- Non-deliverable #2 - ... -## General issue deliverables +## General deliverables - -- [ ] Update any relevant README(s) -- [ ] Add or update any relevant or supporting [mermaid](https://mermaid-js.github.io/mermaid/) diagrams +- [ ] **Comments**: Add/update TODOs and comments alongside the source code so it is easier to follow. +- [ ] **Testing**: Add new tests (unit/fuzz/benchmarks) to the test suite. +- [ ] **Makefile**: Add new targets to the Makefile to make the new functionality easier to use. +- [ ] **Documentation**: Update architectural or development READMEs; use [mermaid](https://mermaid-js.github.io/mermaid/) diagrams where appropriate. -## Testing Methodology +--- - -- [ ] **Task specific tests or benchmarks**: `go test ...` -- [ ] **New tests or benchmarks**: `go test ...` -- [ ] **All tests**: `go test -v` - ---- - -**Creator**: [github username of the creator] -**Co-Owners**: [optional - github usernames of the co-owner(s)] +**Creator**: [GitHub handle of issue owner] +**Co-Owners**: [OPTIONAL - GitHub handle of co-owner(s)] diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md deleted file mode 100644 index 8dc653f..0000000 --- a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md +++ /dev/null @@ -1,65 +0,0 @@ - - -## Description - - -reviewpad:summary - -## Issue - -Fixes # - -## Type of change - -Please mark the relevant option(s): - -- [ ] New feature, functionality or library -- [ ] Bug fix -- [ ] Code health or cleanup -- [ ] Major breaking change -- [ ] Documentation -- [ ] Other - -## List of changes - - - -- Change #1 -- Change #2 -- ... - -## Testing - -- [ ] **Task specific tests or benchmarks**: `go test ...` -- [ ] **New tests or benchmarks**: `go test ...` -- [ ] **All tests**: `go test -v` - - - -## Required Checklist - -- [ ] I have performed a self-review of my own code -- [ ] I have commented my code, particularly in hard-to-understand areas -- [ ] I have added, or updated, [`godoc` format comments](https://go.dev/blog/godoc) on touched members (see: [tip.golang.org/doc/comment](https://tip.golang.org/doc/comment)) -- [ ] I have tested my changes using the available tooling -- [ ] I have updated the corresponding CHANGELOG - -### If Applicable Checklist - -- [ ] Update any relevant README(s) -- [ ] Add or update any relevant or supporting [mermaid](https://mermaid-js.github.io/mermaid/) diagrams -- [ ] I have added tests that prove my fix is effective or that my feature works diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..dbd3b4b --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,59 @@ + + +## Summary + + + +### Human Summary + +### AI Summary + +reviewpad:summary + +## Issue + +Fixes # + +[Explain the reasoning for the PR in 1-2 sentences. Consider adding a link or a screenshot.] + +## Type of change + +Please mark the relevant option(s): + +- [ ] New feature, functionality or library +- [ ] Bug fix +- [ ] Code health or cleanup +- [ ] Documentation +- [ ] Other (specify) + + +## Testing + +- [ ] **Run all unit tests**: `make test_all` +- [ ] **Run all/relevant benchmarks (if optimising)**: `make benchmark_{all | suite name}` + + + +## Required Checklist + +- [ ] I have tested my changes using the available tooling +- [ ] I have performed a self-review of my own code +- [ ] I have commented my code ([`godoc` format comments](https://go.dev/blog/godoc) see: [tip.golang.org/doc/comment](https://tip.golang.org/doc/comment)) + +### If Applicable Checklist + +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] I have updated any relevant README(s)/documentation and left TODOs throughout the codebase +- [ ] Add or update any relevant or supporting [mermaid](https://mermaid-js.github.io/mermaid/) diagrams diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 08dca81..161589b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,74 +7,70 @@ on: - main - release/** +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} + cancel-in-progress: true + env: - # Even though we can test against multiple versions, this one is considered a target version. - TARGET_GOLANG_VERSION: "1.20" + TARGET_GOLANG_VERSION: "1.20.11" jobs: tests: runs-on: ubuntu-latest strategy: fail-fast: false - matrix: - go: ["1.20"] - name: Go Tests steps: - uses: actions/checkout@v3 + with: + fetch-depth: "0" # Per https://github.com/ignite/cli/issues/1674#issuecomment-1144619147 + - name: Setup go - uses: actions/setup-go@v3 + uses: actions/setup-go@v4 with: - go-version: ${{ matrix.go }} - - name: Setup Golang caches - uses: actions/cache@v3 + go-version: ${{ env.TARGET_GOLANG_VERSION }} + + - name: golangci-lint + uses: golangci/golangci-lint-action@v3 with: - path: | - ~/.cache/go-build - ~/go/pkg/mod - key: ${{ runner.os }}-golang-${{ matrix.go }}-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-golang-${{ matrix.go }}- + version: latest + args: --timeout=10m + skip-cache: true + only-new-issues: true + - name: Create coverage report and run tests run: | set -euo pipefail GODEBUG=netdns=cgo go test -p 1 -json ./ -mod=readonly -timeout 8m -race -coverprofile=coverage.txt -covermode=atomic 2>&1 | tee test_results.json + - name: Sanitize test results # We're utilizing `tee` above which can capture non-json stdout output so we need to remove non-json lines before additional parsing and submitting it to the external github action. - if: ${{ always() && env.TARGET_GOLANG_VERSION == matrix.go }} run: cat test_results.json | jq -c -R 'fromjson? | select(type == "object")' > tmp.json && mv tmp.json test_results.json + - name: Output test failures # Makes it easier to find failed tests so no need to scroll through the whole log. - if: ${{ failure() && env.TARGET_GOLANG_VERSION == matrix.go }} + if: ${{ failure() }} run: | jq --argjson fail_tests "$(jq -c -r 'select(.Action == "fail") | select(.Test) | .Test' test_results.json | jq -R -s -c -r 'split("\n") | map(select(length>0))')" 'select(.Test as $t | ($fail_tests | arrays)[] | select($t == .)) | select(.Output) | .Output' test_results.json | jq -r | sed ':a;N;$!ba;s/\n\n/\n/g' > test_failures.json cat test_failures.json exit 1 + - name: Upload test results - if: ${{ always() && env.TARGET_GOLANG_VERSION == matrix.go }} uses: actions/upload-artifact@v3 with: name: test-results path: | test_*.json + - name: Annotate tests on GitHub # Only annotate if the test failed on target version to avoid duplicated annotations on GitHub. - if: ${{ always() && env.TARGET_GOLANG_VERSION == matrix.go }} uses: guyarb/golang-test-annotations@v0.5.1 with: test-results: test_results.json + - name: Upload coverage to Codecov - if: ${{ always() && env.TARGET_GOLANG_VERSION == matrix.go }} uses: codecov/codecov-action@v3 with: files: ./coverage.txt - - name: golangci-lint - if: ${{ always() && env.TARGET_GOLANG_VERSION == matrix.go }} - uses: golangci/golangci-lint-action@v3 - with: - version: latest - args: --timeout=10m - skip-cache: true - only-new-issues: true build: runs-on: ubuntu-latest @@ -83,20 +79,22 @@ jobs: fail-fast: false matrix: goarch: ["arm64", "amd64"] - go: ["1.20"] timeout-minutes: 5 name: Build for ${{ matrix.goarch }} steps: - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v4 with: - go-version: ${{ matrix.go }} + go-version: ${{ env.TARGET_GOLANG_VERSION }} + - uses: actions/checkout@v3 + - uses: technote-space/get-diff-action@v4 with: PATTERNS: | **/**.go go.mod go.sum + - name: Go build run: GOOS=linux GOARCH=${{ matrix.goarch }} go build if: env.GIT_DIFF diff --git a/.gitignore b/.gitignore index 9f11b75..e43b0f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -.idea/ +.DS_Store diff --git a/README.md b/README.md index 02fe2c1..2e3f7d8 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# smt +# smt [![Tag](https://img.shields.io/github/v/tag/pokt-network/smt.svg?sort=semver)](https://img.shields.io/github/v/tag/pokt-network/smt.svg?sort=semver) [![GoDoc](https://godoc.org/github.com/pokt-network/smt?status.svg)](https://godoc.org/github.com/pokt-network/smt) @@ -8,27 +8,56 @@ Note: **Requires Go 1.20+** + + - [Overview](#overview) - [Documentation](#documentation) +- [Tests](#tests) - [Benchmarks](#benchmarks) + - [Definitions](#definitions) + - [Bytes/Operation (B/op)](#bytesoperation-bop) + - [Commit](#commit) + - [Sizing](#sizing) - [SMT](#smt) - [Fill](#fill) - [Operations](#operations) - [SMST](#smst) - [Fill](#fill-1) - [Operations](#operations-1) + - [Proofs](#proofs) + - [SMT](#smt-1) + - [SMST](#smst-1) + + ## Overview -This is a Go library that implements a Sparse Merkle tree for a key-value map. The tree implements the same optimisations specified in the [Libra whitepaper][libra whitepaper], to reduce the number of hash operations required per tree operation to O(k) where k is the number of non-empty elements in the tree. +This is a Go library that implements a Sparse Merkle tree for a key-value map. +The tree implements the same optimisations specified in the [Libra whitepaper], +to reduce the number of hash operations required per tree operation to $O(k)$ +where $k$ is the number of non-empty elements in the tree. And is implemented +in a similar way to the [JMT whitepaper], with additional features and proof +mechanics. ## Documentation -Documentation for the different aspects of this library can be found in the [docs](./docs/) directory. +Documentation for the different aspects of this library can be found in the +[docs](./docs/) directory. + +## Tests + +To run all the unit tests in the repo (excluding fuzz tests and benchmarks) +simply run the following command: + +```sh +make test_all +``` ## Benchmarks -Benchmarks for the different aspects of this SMT library can be found in [benchmarks](./benchmarks/). In order to run the entire benchmarking suite use the following command: +Benchmarks for the different aspects of this SMT library can be found in +[benchmarks](./benchmarks/). In order to run the entire benchmarking suite use +the following command: ```sh make benchmark_all @@ -36,26 +65,38 @@ make benchmark_all ### Definitions -Below is a list of terms used in the benchmarks' results that may need clarification. +Below is a list of terms used in the benchmarks' results that may need +clarification. #### Bytes/Operation (B/op) - - This refers to the number of bytes allocated for each operation. + +- This refers to the number of bytes allocated for each operation. #### Commit - - The `Commit` term refers to the `Commit` method of the tree. This takes all changes (which are made in memory) to the tree and writes them to the underlying database. + +- The `Commit` term refers to the `Commit` method of the tree. This takes all + changes (which are made in memory) to the tree and writes them to the + underlying database. #### Sizing - - The tests use the following sizes: 0.1M, 0.5M, 1M, 5M, 10M. The `M` refers to millions hence: - - 0.1M = 100,000 (One hundred thousand) - - 0.5M = 500,000 (Five hundred thousand) - - 1M = 1,000,000 (One million) - - 5M = 5,000,000 (Five million) - - 10M = 10,000,000 (Ten million) - - These sizes refer to the number of key-value pairs or key-value-sum triples inserted into the tree either beforehand or during the benchmark depending on which benchmark it is. -_NOTE: Unless otherwise stated the benchmarks in this document were ran on a 2023 14-inch Macbook Pro M2 Max with 32GB of RAM. The trees tested are using the `sha256.New()` hasher._ +- The tests use the following sizes: 0.1M, 0.5M, 1M, 5M, 10M. The `M` refers to + millions hence: + - 0.1M = 100,000 (One hundred thousand) + - 0.5M = 500,000 (Five hundred thousand) + - 1M = 1,000,000 (One million) + - 5M = 5,000,000 (Five million) + - 10M = 10,000,000 (Ten million) +- These sizes refer to the number of key-value pairs or key-value-sum triples + inserted into the tree either beforehand or during the benchmark depending on + which benchmark it is. -_TODO: There is an opportunity to do a fuzz test where we commit every `N` updates, if this ever becomes a bottlneck_ +_NOTE: Unless otherwise stated the benchmarks in this document were ran on a +2023 14-inch Macbook Pro M2 Max with 32GB of RAM. The trees tested are using the +`sha256.New()` hasher._ + +_TODO: There is an opportunity to do a fuzz test where we commit every `N` +updates, if this ever becomes a bottlneck_ ### SMT @@ -67,7 +108,10 @@ make benchmark_smt #### Fill -The "fill" benchmarks cover the time taken to insert `N` key-value pairs into the SMT, as well as how long it takes to do this and commit these changes to disk. This gives us an insight into how long it takes to build a tree of a certain size. +The "fill" benchmarks cover the time taken to insert `N` key-value pairs into +the SMT, as well as how long it takes to do this and commit these changes to +disk. This gives us an insight into how long it takes to build a tree of a +certain size. In order to run the SMT filling benchmarks use the following command: @@ -75,22 +119,25 @@ In order to run the SMT filling benchmarks use the following command: make benchmark_smt_fill ``` -| Benchmark | # Values | Iterations | Time (s/op) | Bytes (B/op) | Allocations (allocs/op) | -| --------------- |----------| ---------- | --------------- | --------------- | ----------------------- | -| Fill | 0.1M | 10 | 0.162967196 | 159,479,499 | 2,371,598 | -| Fill & Commit | 0.1M | 10 | 2.877307858 | 972,961,486 | 15,992,605 | -| Fill | 0.5M | 10 | 0.926864771 | 890,408,326 | 13,021,258 | -| Fill & Commit | 0.5M | 10 | 16.043430012 | 5,640,034,396 | 82,075,720 | -| Fill | 1M | 10 | 2.033616088 | 1,860,523,968 | 27,041,639 | -| Fill & Commit | 1M | 10 | 32.617249642 | 12,655,347,004 | 166,879,661 | -| Fill | 5M | 10 | 12.502309738 | 10,229,139,731 | 146,821,675 | -| Fill & Commit | 5M | 10 | 175.421250979 | 78,981,342,709 | 870,235,579 | -| Fill | 10M | 10 | 29.718092496 | 21,255,245,031 | 303,637,210 | -| Fill & Commit | 10M | 10 | 396.142675962 | 173,053,933,624 | 1,775,304,977 | +| Benchmark | # Values | Iterations | Time (s/op) | Bytes (B/op) | Allocations (allocs/op) | +| ------------- | -------- | ---------- | ------------- | --------------- | ----------------------- | +| Fill | 0.1M | 10 | 0.162967196 | 159,479,499 | 2,371,598 | +| Fill & Commit | 0.1M | 10 | 2.877307858 | 972,961,486 | 15,992,605 | +| Fill | 0.5M | 10 | 0.926864771 | 890,408,326 | 13,021,258 | +| Fill & Commit | 0.5M | 10 | 16.043430012 | 5,640,034,396 | 82,075,720 | +| Fill | 1M | 10 | 2.033616088 | 1,860,523,968 | 27,041,639 | +| Fill & Commit | 1M | 10 | 32.617249642 | 12,655,347,004 | 166,879,661 | +| Fill | 5M | 10 | 12.502309738 | 10,229,139,731 | 146,821,675 | +| Fill & Commit | 5M | 10 | 175.421250979 | 78,981,342,709 | 870,235,579 | +| Fill | 10M | 10 | 29.718092496 | 21,255,245,031 | 303,637,210 | +| Fill & Commit | 10M | 10 | 396.142675962 | 173,053,933,624 | 1,775,304,977 | #### Operations -The "operations" benchmarks cover the time taken to perform a single operation on an SMT of a given size, and also how long doing this operation followed by a commit would take. This gives us insight into how the SMT operates when filled to differing degrees. +The "operations" benchmarks cover the time taken to perform a single operation +on an SMT of a given size, and also how long doing this operation followed by a +commit would take. This gives us insight into how the SMT operates when filled +to differing degrees. In order to run the SMT operation benchmarks use the following command: @@ -99,7 +146,7 @@ make benchmark_smt_ops ``` | Benchmark | Prefilled Values | Iterations | Time (ns/op) | Bytes (B/op) | Allocations (allocs/op) | -| --------------- |------------------| ---------- | ------------ | ------------ | ----------------------- | +| --------------- | ---------------- | ---------- | ------------ | ------------ | ----------------------- | | Update | 0.1M | 740,618 | 1,350 | 1,753 | 25 | | Update & Commit | 0.1M | 21,022 | 54,665 | 13,110 | 281 | | Update | 0.5M | 605,348 | 1,682 | 1,957 | 26 | @@ -141,7 +188,10 @@ make benchmark_smst #### Fill -The "fill" benchmarks cover the time taken to insert `N` key-value-sum triples into the SMST, as well as how long it takes to do this and commit these changes to disk. This gives us an insight into how long it takes to build a tree of a certain size. +The "fill" benchmarks cover the time taken to insert `N` key-value-sum triples +into the SMST, as well as how long it takes to do this and commit these changes +to disk. This gives us an insight into how long it takes to build a tree of a +certain size. In order to run the SMST filling benchmarks use the following command: @@ -149,22 +199,25 @@ In order to run the SMST filling benchmarks use the following command: make benchmark_smst_fill ``` -| Benchmark | # Values | Iterations | Time (s/op) | Bytes (B/op) | Allocations (allocs/op) | -| --------------- |----------| ---------- | --------------- | --------------- | ----------------------- | -| Fill | 0.1M | 10 | 0.157951888 | 165,878,234 | 2,471,593 | -| Fill & Commit | 0.1M | 10 | 3.011097462 | 1,058,069,050 | 16,664,811 | -| Fill | 0.5M | 10 | 0.927521862 | 922,408,350 | 13,521,259 | -| Fill & Commit | 0.5M | 10 | 15.338199979 | 6,533,439,773 | 85,880,046 | -| Fill | 1M | 10 | 1.982756162 | 1,924,516,467 | 28,041,610 | -| Fill & Commit | 1M | 10 | 31.197517821 | 14,874,342,889 | 175,474,251 | -| Fill | 5M | 10 | 12.054370871 | 10,549,075,488 | 151,821,423 | -| Fill & Commit | 5M | 10 | 176.912009238 | 89,667,234,678 | 914,653,740 | -| Fill | 10M | 10 | 26.859672362 | 21,894,837,504 | 313,635,611 | -| Fill & Commit | 10M | 10 | 490.805535617 | 197,997,807,905 | 1,865,882,489 | +| Benchmark | # Values | Iterations | Time (s/op) | Bytes (B/op) | Allocations (allocs/op) | +| ------------- | -------- | ---------- | ------------- | --------------- | ----------------------- | +| Fill | 0.1M | 10 | 0.157951888 | 165,878,234 | 2,471,593 | +| Fill & Commit | 0.1M | 10 | 3.011097462 | 1,058,069,050 | 16,664,811 | +| Fill | 0.5M | 10 | 0.927521862 | 922,408,350 | 13,521,259 | +| Fill & Commit | 0.5M | 10 | 15.338199979 | 6,533,439,773 | 85,880,046 | +| Fill | 1M | 10 | 1.982756162 | 1,924,516,467 | 28,041,610 | +| Fill & Commit | 1M | 10 | 31.197517821 | 14,874,342,889 | 175,474,251 | +| Fill | 5M | 10 | 12.054370871 | 10,549,075,488 | 151,821,423 | +| Fill & Commit | 5M | 10 | 176.912009238 | 89,667,234,678 | 914,653,740 | +| Fill | 10M | 10 | 26.859672362 | 21,894,837,504 | 313,635,611 | +| Fill & Commit | 10M | 10 | 490.805535617 | 197,997,807,905 | 1,865,882,489 | #### Operations -The "operations" benchmarks cover the time taken to perform a single operation on an SMST of a given size, and also how long doing this operation followed by a commit would take. This gives us insight into how the SMST operates when filled to differing degrees. +The "operations" benchmarks cover the time taken to perform a single operation +on an SMST of a given size, and also how long doing this operation followed by +a commit would take. This gives us insight into how the SMST operates when +filled to differing degrees. In order to run the SMST operation benchmarks use the following command: @@ -207,7 +260,8 @@ make benchmark_smst_ops ### Proofs -To run the tests to average the proof size for numerous prefilled trees use the following command: +To run the tests to average the proof size for numerous prefilled trees use the +following command: ```sh make benchmark_proof_sizes @@ -216,7 +270,7 @@ make benchmark_proof_sizes #### SMT | Prefilled Size | Average Serialised Proof Size (bytes) | Min (bytes) | Max (bytes) | Average Serialised Compacted Proof Size (bytes) | Min (bytes) | Max (bytes) | -|----------------|---------------------------------------|-------------|-------------|-------------------------------------------------|-------------|-------------| +| -------------- | ------------------------------------- | ----------- | ----------- | ----------------------------------------------- | ----------- | ----------- | | 100,000 | 780 | 650 | 1310 | 790 | 692 | 925 | | 500,000 | 856 | 716 | 1475 | 866 | 758 | 1024 | | 1,000,000 | 890 | 716 | 1475 | 900 | 758 | 1057 | @@ -226,11 +280,12 @@ make benchmark_proof_sizes #### SMST | Prefilled Size | Average Serialised Proof Size (bytes) | Min (bytes) | Max (bytes) | Average Serialised Compacted Proof Size (bytes) | Min (bytes) | Max (bytes) | -|----------------|---------------------------------------|-------------|-------------|-------------------------------------------------|-------------|-------------| +| -------------- | ------------------------------------- | ----------- | ----------- | ----------------------------------------------- | ----------- | ----------- | | 100,000 | 935 | 780 | 1590 | 937 | 822 | 1101 | | 500,000 | 1030 | 862 | 1795 | 1032 | 904 | 1224 | | 1,000,000 | 1071 | 868 | 1795 | 1073 | 910 | 1265 | | 5,000,000 | 1166 | 975 | 2123 | 1169 | 1018 | 1388 | | 10,000,000 | 1207 | 1026 | 2123 | 1210 | 1059 | 1429 | +[jmt whitepaper]: https://developers.diem.com/papers/jellyfish-merkle-tree/2021-01-14.pdf [libra whitepaper]: https://diem-developers-components.netlify.app/papers/the-diem-blockchain/2020-05-26.pdf diff --git a/docs/KVStore.md b/docs/KVStore.md index 9fe78f5..3fcda9f 100644 --- a/docs/KVStore.md +++ b/docs/KVStore.md @@ -1,4 +1,6 @@ -# KVStore +# KVStore + + - [Overview](#overview) - [Implementation](#implementation) @@ -13,35 +15,54 @@ - [Clear All Key-Value Pairs](#clear-all-key-value-pairs) - [Len](#len) + + ## Overview -The `KVStore` interface is a key-value store that is used by the `SMT` and `SMST` as its underlying database for its nodes. However, it is an independent key-value store that can be used for any purpose. +The `KVStore` interface is a key-value store that is used by the `SMT` and +`SMST` as its underlying database for its nodes. However, it is an independent +key-value store that can be used for any purpose. ## Implementation -The `KVStore` is implemented in [`kvstore.go`](../kvstore.go) and is a wrapper around the [BadgerDB](https://github.com/dgraph-io/badger) key-value database. +The `KVStore` is implemented in [`kvstore.go`](../kvstore.go) and is a wrapper +around the [BadgerDB](https://github.com/dgraph-io/badger) key-value database. -The interface defines simple key-value store accessor methods as well as other methods desired from a key-value database in general, this can be found in [`kvstore.go`](../kvstore.go). +The interface defines simple key-value store accessor methods as well as other +methods desired from a key-value database in general, this can be found in +[`kvstore.go`](../kvstore.go). -_NOTE: The `KVStore` interface can be implemented by any key-value store that satisfies the interface and used as the underlying database store for the `SM(S)T`_ +_NOTE: The `KVStore` interface can be implemented by any key-value store that +satisfies the interface and used as the underlying database store for the +`SM(S)T`_ ### In-Memory and Persistent -The `KVStore` implementation can be used as an in-memory or persistent key-value store. The `NewKVStore` function takes a `path` argument that can be used to specify a path to a directory to store the database files. If the `path` is an empty string, the database will be stored in-memory. +The `KVStore` implementation can be used as an in-memory or persistent key-value +store. The `NewKVStore` function takes a `path` argument that can be used to +specify a path to a directory to store the database files. If the `path` is an +empty string, the database will be stored in-memory. -_NOTE: When providing a path for a persistent database, the directory must exist and be writeable by the user running the application._ +_NOTE: When providing a path for a persistent database, the directory must exist +and be writeable by the user running the application._ ### Store methods -As a key-value store the `KVStore` interface defines the simple `Get`, `Set` and `Delete` methods to access and modify the underlying database. +As a key-value store the `KVStore` interface defines the simple `Get`, `Set` and +`Delete` methods to access and modify the underlying database. ### Lifecycle Methods -The `Stop` method **must** be called when the `KVStore` is no longer needed. This method closes the underlying database and frees up any resources used by the `KVStore`. +The `Stop` method **must** be called when the `KVStore` is no longer needed. +This method closes the underlying database and frees up any resources used by +the `KVStore`. -For persistent databases, the `Stop` method should be called when the application no longer needs to access the database. For in-memory databases, the `Stop` method should be called when the `KVStore` is no longer needed. +For persistent databases, the `Stop` method should be called when the +application no longer needs to access the database. For in-memory databases, the +`Stop` method should be called when the `KVStore` is no longer needed. -_NOTE: A persistent `KVStore` that is not stopped will stop another `KVStore` from opening the database._ +_NOTE: A persistent `KVStore` that is not stopped will stop another `KVStore` +from opening the database._ ### Data Methods @@ -49,34 +70,49 @@ The `KVStore` interface provides two methods to allow backups and restorations. #### Backups -The `Backup` method takes an `io.Writer` and a `bool` to indicate whether the backup should be incremental or not. The `io.Writer` is then filled with the contents of the database in an opaque format used by the underlying database for this purpose. +The `Backup` method takes an `io.Writer` and a `bool` to indicate whether the +backup should be incremental or not. The `io.Writer` is then filled with the +contents of the database in an opaque format used by the underlying database for +this purpose. -When the `incremental` bool is `false` a full backup will be performed, otherwise an incremental backup will be performed. This is enabled by the `KVStore` keeping the timestamp of its last backup and only backing up data that has been modified since the last backup. +When the `incremental` bool is `false` a full backup will be performed, +otherwise an incremental backup will be performed. This is enabled by the +`KVStore` keeping the timestamp of its last backup and only backing up data that +has been modified since the last backup. #### Restorations -The `Restore` method takes an `io.Reader` and restores the database from this reader. +The `Restore` method takes an `io.Reader` and restores the database from this +reader. -The `KVStore` calling the `Restore` method is expected to be initialised and open, otherwise the restore will fail. +The `KVStore` calling the `Restore` method is expected to be initialised and +open, otherwise the restore will fail. -_NOTE: Any data contained in the `KVStore` when calling restore will be overwritten._ +_NOTE: Any data contained in the `KVStore` when calling restore will be +overwritten._ ### Accessor Methods -The accessor methods enable simpler access to the underlying database for certain tasks that are desirable in a key-value store. +The accessor methods enable simpler access to the underlying database for +certain tasks that are desirable in a key-value store. #### Prefixed and Sorted Get All -The `GetAll` method supports the retrieval of all keys and values, where the key has a specific prefix. The `descending` bool indicates whether the keys should be returned in descending order or not. +The `GetAll` method supports the retrieval of all keys and values, where the key +has a specific prefix. The `descending` bool indicates whether the keys should +be returned in descending order or not. -_NOTE: In order to retrieve all keys and values the empty prefix `[]byte{}` should be used to match all keys_ +_NOTE: In order to retrieve all keys and values the empty prefix `[]byte{}` +should be used to match all keys_ #### Clear All Key-Value Pairs The `ClearAll` method removes all key-value pairs from the database. -_NOTE: The `ClearAll` method is intended to debug purposes and should not be used in production unless necessary_ +_NOTE: The `ClearAll` method is intended to debug purposes and should not be +used in production unless necessary_ #### Len -The `Len` method returns the number of keys in the database, similarly to how the `len` function can return the length of a map. +The `Len` method returns the number of keys in the database, similarly to how +the `len` function can return the length of a map. diff --git a/docs/MerkleSumTree.md b/docs/MerkleSumTree.md index 8592a82..78afaaa 100644 --- a/docs/MerkleSumTree.md +++ b/docs/MerkleSumTree.md @@ -1,4 +1,6 @@ -# Sparse Merkle Sum Tree (smst) +# Sparse Merkle Sum Tree (smst) + + - [Overview](#overview) - [Implementation](#implementation) @@ -11,31 +13,61 @@ - [Nil Values](#nil-values) - [Example](#example) + + ## Overview -Merkle Sum trees function very similarly to regular Merkle trees, with the primary difference being that each leaf node in a Merkle sum tree includes a `sum` in addition to its value. This allows for the entire tree's total sum to be calculated easily, as the sum of any branch is the sum of its children. Thus the sum of the root node is the sum of the entire tree. Like a normal Merkle tree, the Merkle sum tree allows for the efficient verification of its members, proving non-membership / membership of certain elements and generally functions the same. +Merkle Sum trees function very similarly to regular Merkle trees, with the +primary difference being that each leaf node in a Merkle sum tree includes a +`sum` in addition to its value. This allows for the entire tree's total sum to +be calculated easily, as the sum of any branch is the sum of its children. Thus +the sum of the root node is the sum of the entire tree. Like a normal Merkle +tree, the Merkle sum tree allows for the efficient verification of its members, +proving inclusion/exclusion of certain elements and generally functions the same. -Merkle sum trees can be very useful for blockchain applications in that they can easily track accounts balances and, thus, the total balance of all accounts. They can be very useful in proof of reserve systems whereby one needs to prove the membership of an element that is a component of the total sum, along with a verifiable total sum of all elements. +Merkle sum trees can be very useful for blockchain applications in that they can +easily track accounts balances and, thus, the total balance of all accounts. +They can be very useful in proof of reserve systems whereby one needs to prove +the inclusion of an element that is a component of the total sum, along with a +verifiable total sum of all elements. ## Implementation -The implementation of the Sparse Merkle Sum Tree (SMST) follows, in principle, the same implementation as the [Plasma Core Merkle Sum tree][plasma core docs]. The main differences with the current SMT implementation are outlined below. The primary difference lies in the encoding of node data within the tree to accommodate for the sum. +The implementation of the Sparse Merkle Sum Tree (SMST) follows, in principle, +the same implementation as the [Plasma Core Merkle Sum tree][plasma core docs]. +The main differences with the current SMT implementation are outlined below. +The primary difference lies in the encoding of node data within the tree to +accommodate for the sum. + +_NOTE: The Plasma Core Merkle Sum tree uses a 16 byte hex string to encode the +sum whereas this SMST implementation uses an 8 byte binary representation of the +`uint64` sum._ -In practice the SMST is a wrapper around the SMT with a new field added to the `TreeSpec`: `sumTree bool` this determines whether the SMT should follow its regular encoding of that of the sum tree. +In practice the SMST is a wrapper around the SMT with a new field added to the +`TreeSpec`: `sumTree bool` this determines whether the SMT should follow its +regular encoding of that of the sum tree. -_Note_: The Plasma Core Merkle Sum tree uses a 16 byte hex string to encode the sum whereas this SMST implementation uses an 8 byte binary representation of the `uint64` sum. +See: the [SMT documentation](./SMT.md) for the details on how the SMT works. The majority of the code relating to the SMST can be found in: - [smst.go](../smst.go) - main SMT wrapper functionality - [hasher.go](../hasher.go) - SMST encoding functions -- [types.go](../types.go) - SMST interfaces and node serialisation/hashing functions +- [types.go](../types.go) - SMST interfaces and node serialisation/hashing + functions ### Sum Encoding -The sum for any node is encoded in a byte array with a fixed size (`[8]byte`) this allows for the sum to fully represent a `uint64` value in binary form. The golang `encoding/binary` package is used to encode the sum with `binary.BigEndian.PutUint64(sumBz[:], sum)` into a byte array `sumBz`. +The sum for any node is encoded in a byte array with a fixed size (`[8]byte`) +this allows for the sum to fully represent a `uint64` value in binary form. +The golang `encoding/binary` package is used to encode the sum with +`binary.BigEndian.PutUint64(sumBz[:], sum)` into a byte array `sumBz`. -In order for the SMST to include the sum into a leaf node the SMT the SMST initialises the SMT with the `WithValueHasher(nil)` option so that the SMT does **not** hash any values. The SMST will then hash the value and append the sum bytes to the end of the hashed value, using whatever `ValueHasher` was given to the SMST on initialisation. +In order for the SMST to include the sum into a leaf node the SMT the SMST +initialises the SMT with the `WithValueHasher(nil)` option so that the SMT does +**not** hash any values. The SMST will then hash the value and append the sum +bytes to the end of the hashed value, using whatever `ValueHasher` was given to +the SMST on initialisation. ```mermaid graph TD @@ -47,25 +79,28 @@ graph TD subgraph SMST[SMST] SS1[ValueHasher: SHA256] subgraph SUM["SMST.Update()"] - SU1["ValueHash = ValueHasher(Value)"] + SU1["valueHash = ValueHasher(Value)"] SU2["sumBytes = binary(Sum)"] - SU3["ValueHash = append(valueHash, sumBytes...)"] + SU3["valueHash = append(valueHash, sumBytes...)"] end end subgraph SMT[SMT] SM1[ValueHasher: nil] subgraph UPD["SMT.Update()"] - U1["valueHash = value"] - U2["SMT.nodeStore.Set(Key, ValueHash)"] + U2["SMT.nodeStore.Set(Key, valueHash)"] end end - KVS -->Key,Value,Sum--> SMST - SMST -->Key,ValueHash--> SMT + KVS --"Key + Value + Sum"--> SMST + SMST --"Key + valueHash"--> SMT ``` ### Digests -The digest for any node in the SMST is calculated in partially the same manner as the regular SMT. The main differences are that the sum is included in the digest `preimage` - meaning the hash of any node's data includes **BOTH** its data _and_ sum. In addition to this the sum is appended to the hash producing digests like so: +The digest for any node in the SMST is calculated in partially the same manner +as the regular SMT. The main differences are that the sum is included in the +digest `preimage` - meaning the hash of any node's data includes **BOTH** its +data _and_ sum. In addition to this the sum is appended to the hash producing +digests like so: `digest = [node hash]+[8 byte sum]` @@ -83,20 +118,27 @@ Therefore for the following node types, the digests are computed as follows: - Prefix: `[]byte{0}` - `sumBytes = binary(sum)` - `digest = hash([]byte{0} + path + valueHash) + sumBytes` - - **Note**: as mentioned above the `valueHash` is already appended with the `sumBytes` prior to insertion in the underlying SMT + - **Note**: as mentioned above the `valueHash` is already appended with the + `sumBytes` prior to insertion in the underlying SMT - **Lazy Nodes** - - Prefix of the actual node type is stored in the persisted digest as determined above + - Prefix of the actual node type is stored in the persisted digest as + determined above - `digest = persistedDigest` -This means that with a hasher such as `sha256.New()` whose hash size is `32 bytes`, the digest of any node will be `40 bytes` in length. +This means that with a hasher such as `sha256.New()` whose hash size is +`32 bytes`, the digest of any node will be `40 bytes` in length. ### Visualisations -The following diagrams are representations of how the tree and its components can be visualised. +The following diagrams are representations of how the tree and its components +can be visualised. #### General Tree Structure -None of the nodes have a different structure to the regular SMT, but the digests of nodes now include their sum as described above and the sum is included in the leaf node's value. For the purposes of visualization, the sum is included in all nodes as an extra field. +None of the nodes have a different structure to the regular SMT, but the digests +of nodes now include their sum as described above and the sum is included in the +leaf node's value. For the purposes of visualization, the sum is included in all +nodes as an extra field. ```mermaid graph TB @@ -143,7 +185,10 @@ graph TB #### Binary Sum Digests -The following diagram shows the structure of the digests of the nodes within the tree in a simplified manner, again none of the nodes have a `sum` field, but for visualisation purposes the sum is included in all nodes with the exception of the leaf nodes where the sum is shown as part of its value. +The following diagram shows the structure of the digests of the nodes within +the tree in a simplified manner, again none of the nodes have a `sum` field, +but for visualisation purposes the sum is included in all nodes with the +exception of the leaf nodes where the sum is shown as part of its value. ```mermaid graph TB @@ -201,11 +246,16 @@ graph TB ## Sum -The `Sum()` function adds functionality to easily retrieve the tree's current sum as a `uint64`. +The `Sum()` function adds functionality to easily retrieve the tree's current +sum as a `uint64`. ## Nil Values -A `nil` value and `0` weight is the same as the placeholder value and default sum in the SMST and as such inserting a key with a `nil` value has specific behaviours. Although the insertion of a key-value-weight grouping with a `nil` value and `0` weight will alter the root hash, a proof will not recognise the key as being in the tree. +A `nil` value and `0` weight is the same as the placeholder value and default +sum in the SMST and as such inserting a key with a `nil` value has specific +behaviours. Although the insertion of a key-value-weight grouping with a `nil` +value and `0` weight will alter the root hash, a proof will not recognise the +key as being in the tree. Assume `(key, value, weight)` groupings as follows: @@ -226,44 +276,44 @@ Assume `(key, value, weight)` groupings as follows: package main import ( - "crypto/sha256" - "fmt" + "crypto/sha256" + "fmt" - "github.com/pokt-network/smt" + "github.com/pokt-network/smt" ) func main() { - // Initialise a new in-memory key-value store to store the nodes of the tree - // (Note: the tree only stores hashed values, not raw value data) - nodeStore := smt.NewKVStore("") + // Initialise a new in-memory key-value store to store the nodes of the tree + // (Note: the tree only stores hashed values, not raw value data) + nodeStore := smt.NewKVStore("") - // Ensure the database connection closes - defer nodeStore.Stop() + // Ensure the database connection closes + defer nodeStore.Stop() - // Initialise the tree - tree := smt.NewSparseMerkleSumTree(nodeStore, sha256.New()) + // Initialise the tree + tree := smt.NewSparseMerkleSumTree(nodeStore, sha256.New()) - // Update tree with keys, values and their sums - _ = tree.Update([]byte("foo"), []byte("oof"), 10) - _ = tree.Update([]byte("baz"), []byte("zab"), 7) - _ = tree.Update([]byte("bin"), []byte("nib"), 3) + // Update tree with keys, values and their sums + _ = tree.Update([]byte("foo"), []byte("oof"), 10) + _ = tree.Update([]byte("baz"), []byte("zab"), 7) + _ = tree.Update([]byte("bin"), []byte("nib"), 3) - // Commit the changes to the nodeStore - _ = tree.Commit() + // Commit the changes to the nodeStore + _ = tree.Commit() - sum := tree.Sum() - fmt.Println(sum == 20) // true + sum := tree.Sum() + fmt.Println(sum == 20) // true - // Generate a Merkle proof for "foo" - proof, _ := tree.Prove([]byte("foo")) - root := tree.Root() // We also need the current tree root for the proof + // Generate a Merkle proof for "foo" + proof, _ := tree.Prove([]byte("foo")) + root := tree.Root() // We also need the current tree root for the proof - // Verify the Merkle proof for "foo"="oof" where "foo" has a sum of 10 - if valid := smt.VerifySumProof(proof, root, []byte("foo"), []byte("oof"), 10, tree.Spec()); valid { - fmt.Println("Proof verification succeeded.") - } else { - fmt.Println("Proof verification failed.") - } + // Verify the Merkle proof for "foo"="oof" where "foo" has a sum of 10 + if valid := smt.VerifySumProof(proof, root, []byte("foo"), []byte("oof"), 10, tree.Spec()); valid { + fmt.Println("Proof verification succeeded.") + } else { + fmt.Println("Proof verification failed.") + } } ``` diff --git a/docs/SMT.md b/docs/SMT.md index 062c7f7..facc955 100644 --- a/docs/SMT.md +++ b/docs/SMT.md @@ -1,4 +1,6 @@ -# smt +# smt + + - [Overview](#overview) - [Implementation](#implementation) @@ -14,17 +16,29 @@ - [Visualisation](#visualisation) - [Values](#values) - [Nil values](#nil-values) -- [Hashers \& Digests](#hashers--digests) +- [Hashers & Digests](#hashers--digests) - [Proofs](#proofs) - [Verification](#verification) + - [Closest Proof](#closest-proof) + - [Compression](#compression) + - [Serialisation](#serialisation) - [Database](#database) - [Data Loss](#data-loss) - [Sparse Merkle Sum Tree](#sparse-merkle-sum-tree) - [Example](#example) + + ## Overview -Sparse Merkle Trees (SMTs) are efficient and secure data structures for storing key-value pairs. They use a hash-based tree structure to represent the data sparsely, saving memory. Cryptographic hash functions ensure data integrity and authenticity. SMTs enable users to prove the existence or non-existence of specific key-value pairs by constructing cryptographic proofs. These properties make SMTs valuable in applications like blockchains, decentralized databases, and authenticated data structures, providing optimized and trustworthy data storage and verification. +Sparse Merkle Trees (SMTs) are efficient and secure data structures for storing +key-value pairs. They use a hash-based tree structure to represent the data +sparsely, saving memory. Cryptographic hash functions ensure data integrity and +authenticity. SMTs enable users to prove the existence or non-existence of +specific key-value pairs by constructing cryptographic proofs. These properties +make SMTs valuable in applications like blockchains, decentralized databases, +and authenticated data structures, providing optimized and trustworthy data +storage and verification. See [smt.go](../smt.go) for more details on the implementation. @@ -42,42 +56,69 @@ The SMT has 4 node types that are used to construct the tree: - Prefixed `[]byte{0}` - `digest = hash([]byte{0} + path + value)` - Lazy Nodes - - Prefix of the actual node type is stored in the persisted digest as determined above + - Prefix of the actual node type is stored in the persisted digest as + determined above - `digest = persistedDigest` ### Inner Nodes -Inner nodes represent a branch in the tree with two **non-nil** child nodes. The inner node has an internal `digest` which represents the hash of the child nodes concatenated hashes. +Inner nodes represent a branch in the tree with two **non-nil** child nodes. +The inner node has an internal `digest` which represents the hash of the child +nodes concatenated hashes. ### Extension Nodes -Extension nodes represent a singly linked chain of inner nodes, with a single child. They are used to represent a common path in the tree and as such contain the path and bounds of the path they represent. The `digest` of an extension node is the hash of its path bounds, the path itself and the child nodes digest concatenated. +Extension nodes represent a singly linked chain of inner nodes, with a single +child. They are used to represent a common path in the tree and as such contain +the path and bounds of the path they represent. The `digest` of an extension +node is the hash of its path bounds, the path itself and the child nodes digest +concatenated. ### Leaf Nodes -Leaf nodes store the full path which they represent and also the hash of the value they store. The `digest` of a leaf node is the hash of the leaf nodes path and value concatenated. +Leaf nodes store the full path which they represent and also the hash of the +value they store. The `digest` of a leaf node is the hash of the leaf nodes path +and value concatenated. -The SMT stores only the hashes of the values in the tree, not the raw values themselves. In order to store the raw values in the underlying database the option `WithValueHasher(nil)` must be passed into the `NewSparseMerkleTree` constructor. +The SMT stores only the hashes of the values in the tree, not the raw values +themselves. In order to store the raw values in the underlying database the +option `WithValueHasher(nil)` must be passed into the `NewSparseMerkleTree` +constructor. ### Lazy Nodes -Lazy nodes represent uncached, persisted nodes, and as such only store the `digest` of the node. When a lazy node is accessed the node type will be determined and the full node type will be populated with any relevant fields such as its children and path. +Lazy nodes represent uncached, persisted nodes, and as such only store the +`digest` of the node. When a lazy node is accessed the node type will be +determined and the full node type will be populated with any relevant fields +such as its children and path. ### Lazy Loading -This library uses a cached, lazy-loaded tree structure to optimize performance. It optimises performance by not reading from/writing to the underlying database on each operation, deferring any underlying changes until the `Commit()` function is called. +This library uses a cached, lazy-loaded tree structure to optimize performance. +It optimises performance by not reading from/writing to the underlying database +on each operation, deferring any underlying changes until the `Commit()` +function is called. -All nodes have a `persisted` field which signals whether they have been persisted to the underlying database or not. In practice this gives a large performance optimisation by working on cached data and not reading from/writing to the database on each operation. If a node is deleted from the tree it is marked as `orphaned` and will be deleted from the database when the `Commit()` function is called. +All nodes have a `persisted` field which signals whether they have been +persisted to the underlying database or not. In practice this gives a large +performance optimisation by working on cached data and not reading from/writing +to the database on each operation. If a node is deleted from the tree it is +marked as `orphaned` and will be deleted from the database when the `Commit()` +function is called. -Once the `Commit()` function is called the tree will delete any orphaned nodes from the database and write the key-value pairs of all the unpersisted leaf nodes' hashes and their values to the database. +Once the `Commit()` function is called the tree will delete any orphaned nodes +from the database and write the key-value pairs of all the unpersisted leaf +nodes' hashes and their values to the database. ### Visualisations -The following diagrams are representations of how the tree and its components can be visualised. +The following diagrams are representations of how the tree and its components +can be visualised. #### General Tree Structure -The different nodes types described above make the tree have a structure similar to the following: +The different nodes types described above make the tree have a structure similar +to the following: ```mermaid graph TD @@ -116,7 +157,10 @@ graph TD #### Lazy Nodes -When importing a tree via `ImportSparseMerkleTree` the tree will be lazily loaded from the root hash provided. As such the initial tree structure would contain just a single lazy node, until the tree is used and nodes have to be resolved from the database, whose digest is the root hash of the tree. +When importing a tree via `ImportSparseMerkleTree` the tree will be lazily +loaded from the root hash provided. As such the initial tree structure would +contain just a single lazy node, until the tree is used and nodes have to be +resolved from the database, whose digest is the root hash of the tree. ```mermaid graph TD @@ -128,7 +172,8 @@ graph TD end ``` -If we were to resolve just this root node, we could have the following tree structure: +If we were to resolve just this root node, we could have the following tree +structure: ```mermaid graph TD @@ -153,14 +198,21 @@ Where `Hash(Hash1 + Hash2)` is the same root hash as the previous example. Paths are **only** stored in two types of nodes: Leaf nodes and Extension nodes. -- Extension nodes contain not only the path they represent but also the path bounds (ie. the start and end of the path they cover). -- Leaf nodes contain the full path which they represent, as well as the value stored at that path. +- Extension nodes contain not only the path they represent but also the path + bounds (ie. the start and end of the path they cover). +- Leaf nodes contain the full path which they represent, as well as the value + stored at that path. -Inner nodes do **not** contain a path, as they represent a branch in the tree and not a path. As such their children, _if they are extension nodes or leaf nodes_, will hold a path value. +Inner nodes do **not** contain a path, as they represent a branch in the tree +and not a path. As such their children, _if they are extension nodes or leaf +nodes_, will hold a path value. ### Visualisation -The following diagram shows how paths are stored in the different nodes of the tree. In the actual SMT paths are not 8 bit binary strings but are instead the returned values of the `PathHasher` (discussed below). These are then used to calculate the path bit (`0` or `1`) at any index of the path byte slice. +The following diagram shows how paths are stored in the different nodes of the +tree. In the actual SMT paths are not 8 bit binary strings but are instead the +returned values of the `PathHasher` (discussed below). These are then used to +calculate the path bit (`0` or `1`) at any index of the path byte slice. ```mermaid graph LR @@ -204,15 +256,24 @@ graph LR ## Values -By default the SMT will use the `hasher` passed into `NewSparseMerkleTree` to hash both the keys into their paths in the tree, as well as the values. This means the data stored in a leaf node will be the hash of the value, not the value itself. +By default the SMT will use the `hasher` passed into `NewSparseMerkleTree` to +hash both the keys into their paths in the tree, as well as the values. This +means the data stored in a leaf node will be the hash of the value, not the +value itself. -However, if this is not desired, the two option functions `WithPathHasher` and `WithValueHasher` can be used to change the hashing function used for the keys and values respectively. +However, if this is not desired, the two option functions `WithPathHasher` and +`WithValueHasher` can be used to change the hashing function used for the keys +and values respectively. -If `nil` is passed into `WithValueHasher` functions, it will act as identity hasher and store the values unaltered in the tree. +If `nil` is passed into `WithValueHasher` functions, it will act as identity +hasher and store the values unaltered in the tree. ### Nil values -A `nil` value is the same as the placeholder value in the SMT and as such inserting a key with a `nil` value has specific behaviours. Although the insertion of a key-value pair with a `nil` value will alter the root hash, a proof will not recognise the key as being in the tree. +A `nil` value is the same as the placeholder value in the SMT and as such +inserting a key with a `nil` value has specific behaviours. Although the +insertion of a key-value pair with a `nil` value will alter the root hash, a +proof will not recognise the key as being in the tree. Assume `(key, value)` pairs as follows: @@ -223,15 +284,27 @@ Assume `(key, value)` pairs as follows: ## Hashers & Digests -When creating a new SMT or importing one a `hasher` is provided, typically this would be `sha256.New()` but could be any hasher implementing the go `hash.Hash` interface. By default this hasher, referred to as the `TreeHasher` will be used on both keys (to create paths) and values (to store). But separate hashers can be passed in via the option functions mentioned above. +When creating a new SMT or importing one a `hasher` is provided, typically this +would be `sha256.New()` but could be any hasher implementing the go `hash.Hash` +interface. By default this hasher, referred to as the `TreeHasher` will be used +on both keys (to create paths) and values (to store). But separate hashers can +be passed in via the option functions mentioned above. -Whenever we do an operation on the tree, the `PathHasher` is used to hash the key and return its digest - the path. When we store a value in a leaf node we hash it using the `ValueHasher`. These digests are calculated by writing to the hasher and then calculating the checksum by calling `Sum(nil)`. +Whenever we do an operation on the tree, the `PathHasher` is used to hash the +key and return its digest - the path. When we store a value in a leaf node we +hash it using the `ValueHasher`. These digests are calculated by writing to the +hasher and then calculating the checksum by calling `Sum(nil)`. -The digests of all nodes, regardless of the `PathHasher` and `ValueHasher`s being used, will be the result of writing to the `TreeHasher` and calculating the `Sum`. The exact data hashed will depend on the type of node, this is described in the [implementation](#implementation) section. +The digests of all nodes, regardless of the `PathHasher` and `ValueHasher`s +being used, will be the result of writing to the `TreeHasher` and calculating +the `Sum`. The exact data hashed will depend on the type of node, this is +described in the [implementation](#implementation) section. -The following diagram represents the creation of a leaf node in an abstracted and simplified manner. +The following diagram represents the creation of a leaf node in an abstracted +and simplified manner. -_Note_: This diagram is not entirely accurate regarding the process of creating a leaf node, but is a good representation of the process. +_Note: This diagram is not entirely accurate regarding the process of creating +a leaf node, but is a good representation of the process._ ```mermaid graph TD @@ -268,38 +341,98 @@ graph TD ## Proofs -The `SparseMerkleProof` type contains the information required for membership and non-membership proofs, depending on the key provided to the tree method `Prove(key []byte)` either a membership or non-membership proof will be generated. +The `SparseMerkleProof` type contains the information required for inclusion +and exclusion proofs, depending on the key provided to the tree method +`Prove(key []byte)` either an inclusion or exclusion proof will be generated. -The `SparseMerkleProof` type contains the relevant information required to rebuild the root hash of the tree from the given key. This information is: +_NOTE: The inclusion and exclusion proof are the same type, just constructed +differently_ + +The `SparseMerkleProof` type contains the relevant information required to +rebuild the root hash of the tree from the given key. This information is: - Any side nodes - Data of the sibling node - Data for the unrelated leaf at the path - - This is `nil` for membership proofs, and only used for non-membership proofs - -`SparseMerkleProof`s can be compressed into `SparseCompactMerkleProof` objects, which are smaller and more efficient to store. These can be created by calling `CompactProof()` with a `SparseMerkleProof`. + - This is `nil` for inclusion proofs, and only used for exclusion proofs ### Verification -In order to verify a `SparseMerkleProof` the `VerifyProof` method is called with the proof, tree spec, root hash as well as the key and value (if a membership proof) that the proof is for. +In order to verify a `SparseMerkleProof` the `VerifyProof` method is called with +the proof, tree spec, root hash as well as the key and value that the proof is +for. When verifying an exclusion proof the value provided should be `nil`. + +The verification step simply uses the proof data to recompute the root hash with +the data provided and the digests stored in the proof. If the root hash matches +the one provided then the proof is valid, otherwise it is an invalid proof. + +### Closest Proof + +The `SparseMerkleClosestProof` is a novel proof mechanism, which can provide a +proof of inclusion for a sentinel leaf in the tree with the most bits in common +with the hash provided to the `ProveClosest()` method. This works by traversing +the tree according to the path of the hash provided and if encountering a `nil` +node then backstepping and flipping the path bit for that depth in the path. + +This backstepping process allows the traversal to continue until it reaches a +sentinel leaf that has the longest common prefix and most bits in common with +the provided hash, up to the depth of the leaf found. + +This method guarentees a proof of inclusion in all cases and can be verified by +using the `VerifyClosestProof` function which requires the proof and root hash +of the tree. + +### Compression + +Both proof types have compression and decompression functions available to +reduce their size, for more efficient storage. These can be created by calling: + +- `CompactProof(SparseMerkleProof)` to produce a `SparseCompactMerkleProof` +- `CompactClosestProof(SparseMerkleClosestProof)` to produce a + `SparseCompactMerkleClosestProof` + +These compacted proof types can then be decompressed by calling: + +- `DecompactProof(SparseCompactMerkleProof)` to produce the corresponding + `SparseMerkleProof` +- `DecompactClosestProof(SparseCompactMerkleClosestProof)` to produce the + corresponding `SparseMerkleClosestProof` + +### Serialisation -The verification step simply uses the proof data to recompute the root hash with the data provided and the digests stored in the proof. If the root hash matches the one provided then the proof is valid, otherwise it is an invalid proof. +All proof types are serialisable in both their regular and compressed forms. +This is done through the `encoding/gob` package that provides optimisations +around marshalling and unmarshalling custom go types compared to other encoding +schemes. ## Database -This library defines the `KVStore` interface which by default is implemented using [BadgerDB](https://github.com/dgraph-io/badger), however any database that implements this interface can be used as a drop in replacement. The `KVStore` allows for both in memory and persisted databases to be used to store the nodes for the SMT. +This library defines the `KVStore` interface which by default is implemented +using [BadgerDB](https://github.com/dgraph-io/badger), however any database that +implements this interface can be used as a drop in replacement. The `KVStore` +allows for both in memory and persisted databases to be used to store the nodes +for the SMT. -When changes are committed to the underlying database using `Commit()` the digests of the leaf nodes are stored at their respective paths. If retrieved manually from the database the returned value will be the digest of the leaf node, **not** the leaf node's value, even when `WithValueHasher(nil)` is used. The node value can be parsed from this value, as the tree `Get` function does by removing the prefix and path bytes from the returned value. +When changes are committed to the underlying database using `Commit()` the +digests of the leaf nodes are stored at their respective paths. If retrieved +manually from the database the returned value will be the digest of the leaf +node, **not** the leaf node's value, even when `WithValueHasher(nil)` is used. +The node value can be parsed from this value, as the tree `Get` function does +by removing the prefix and path bytes from the returned value. See [KVStore.md](./KVStore.md) for the details of the implementation. ### Data Loss -In the event of a system crash or unexpected failure of the program utilising the SMT, if the `Commit()` function has not been called, any changes to the tree will be lost. This is due to the underlying database not being changed **until** the `Commit()` function is called and changes are persisted. +In the event of a system crash or unexpected failure of the program utilising +the SMT, if the `Commit()` function has not been called, any changes to the tree +will be lost. This is due to the underlying database not being changed **until** +the `Commit()` function is called and changes are persisted. ## Sparse Merkle Sum Tree -This library also implements a Sparse Merkle Sum Tree (SMST), the documentation for which can be found [here](./MerkleSumTree.md). +This library also implements a Sparse Merkle Sum Tree (SMST), the documentation +for which can be found [here](./MerkleSumTree.md). ## Example @@ -307,38 +440,38 @@ This library also implements a Sparse Merkle Sum Tree (SMST), the documentation package main import ( - "crypto/sha256" - "fmt" + "crypto/sha256" + "fmt" - "github.com/pokt-network/smt" + "github.com/pokt-network/smt" ) func main() { - // Initialise a new in-memory key-value store to store the nodes of the tree - // (Note: the tree only stores hashed values, not raw value data) - nodeStore := smt.NewKVStore("") + // Initialise a new in-memory key-value store to store the nodes of the tree + // (Note: the tree only stores hashed values, not raw value data) + nodeStore := smt.NewKVStore("") - // Ensure the database connection closes - defer nodeStore.Stop() + // Ensure the database connection closes + defer nodeStore.Stop() - // Initialise the tree - tree := smt.NewSparseMerkleTree(nodeStore, sha256.New()) + // Initialise the tree + tree := smt.NewSparseMerkleTree(nodeStore, sha256.New()) - // Update the key "foo" with the value "bar" - _ = tree.Update([]byte("foo"), []byte("bar")) + // Update the key "foo" with the value "bar" + _ = tree.Update([]byte("foo"), []byte("bar")) - // Commit the changes to the node store - _ = tree.Commit() + // Commit the changes to the node store + _ = tree.Commit() - // Generate a Merkle proof for "foo" - proof, _ := tree.Prove([]byte("foo")) - root := tree.Root() // We also need the current tree root for the proof + // Generate a Merkle proof for "foo" + proof, _ := tree.Prove([]byte("foo")) + root := tree.Root() // We also need the current tree root for the proof - // Verify the Merkle proof for "foo"="bar" - if smt.VerifyProof(proof, root, []byte("foo"), []byte("bar"), tree.Spec()) { - fmt.Println("Proof verification succeeded.") - } else { - fmt.Println("Proof verification failed.") - } + // Verify the Merkle proof for "foo"="bar" + if smt.VerifyProof(proof, root, []byte("foo"), []byte("bar"), tree.Spec()) { + fmt.Println("Proof verification succeeded.") + } else { + fmt.Println("Proof verification failed.") + } } ``` diff --git a/godoc.go b/godoc.go new file mode 100644 index 0000000..0c45f12 --- /dev/null +++ b/godoc.go @@ -0,0 +1,8 @@ +// Package smt provides an implementation of a Sparse Merkle tree for a +// key-value map. The tree implements the same optimisations specified in the +// Libra whitepaper to reduce the number of hash operations required per tree +// operation to O(k) where k is the number of non-empty elements in the tree. +// And is implemente in a similar way to the JMT whitepaper, with additional +// features and proof mechanics, such as a Sparse Merkle Sum Tree and new +// ClosestProof mechanics. +package smt diff --git a/proofs.go b/proofs.go index f67822e..580a29f 100644 --- a/proofs.go +++ b/proofs.go @@ -1,4 +1,3 @@ -// TODO(#22): Define protobufs for all proof types and enable deterministic serialisation package smt import ( @@ -21,7 +20,7 @@ func init() { var ErrBadProof = errors.New("bad proof") // SparseMerkleProof is a Merkle proof for an element in a SparseMerkleTree. -// TODO: Research whether the SiblingData is required and remove it if not +// TODO: Look into whether the SiblingData is required and remove it if not type SparseMerkleProof struct { // SideNodes is an array of the sibling nodes leading up to the leaf of the proof. SideNodes [][]byte