From 2bd8878726120a69273f07e177e933734c2bf4db Mon Sep 17 00:00:00 2001 From: patrickwoodhead Date: Fri, 19 Nov 2021 10:07:05 +0000 Subject: [PATCH] feat: add traversal/walk function Closes: https://github.com/multiformats/js-multiformats/issues/118 Basic traversal functionality for deterministic DAG walking with no repeat block visits and support for block skipping. User supplies a block loader, which can be used to watch the block ordering of the walk. --- README.md | 37 ++++++++++ package.json | 4 + src/traversal.js | 38 ++++++++++ test/test-traversal.js | 162 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 241 insertions(+) create mode 100644 src/traversal.js create mode 100644 test/test-traversal.js diff --git a/README.md b/README.md index ae402d9b..1436f2d8 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ * [Multibase Encoders / Decoders / Codecs](#multibase-encoders--decoders--codecs) * [Multicodec Encoders / Decoders / Codecs](#multicodec-encoders--decoders--codecs) * [Multihash Hashers](#multihash-hashers) + * [Traversal](#traversal) * [Legacy interface](#legacy-interface) * [Implementations](#implementations) * [Multibase codecs](#multibase-codecs) @@ -137,6 +138,42 @@ CID.create(1, json.code, hash) //> CID(bagaaierasords4njcts6vs7qvdjfcvgnume4hqohf65zsfguprqphs3icwea) ``` +### Traversal + +This library contains higher-order functions for traversing graphs of data easily. + +`walk()` walks through the links in each block of a DAG calling a user-supplied loader function for each one, in depth-first order with no duplicate block visits. The loader should return a `Block` object and can be used to inspect and collect block ordering for a full DAG walk. The loader should `throw` on error, and return `null` if a block should be skipped by `walk()`. + +```js +import { walk } from 'multiformats/traversal' +import * as Block from 'multiformats/block' +import * as codec from 'multiformats/codecs/json' +import { sha256 as hasher } from 'multiformats/hashes/sha2' + +// build a DAG (a single block for this simple example) +const value = { hello: 'world' } +const block = await Block.encode({ value, codec, hasher }) +const { cid } = block +console.log(cid) +//> CID(bagaaierasords4njcts6vs7qvdjfcvgnume4hqohf65zsfguprqphs3icwea) + +// create a loader function that also collects CIDs of blocks in +// their traversal order +const load = (cid, blocks) => async (cid) => { + // fetch a block using its cid + // e.g.: const block = await fetchBlockByCID(cid) + blocks.push(cid) + return block +} + +// collect blocks in this DAG starting from the root `cid` +const blocks = [] +await walk({ cid, load: load(cid, blocks) }) + +console.log(blocks) +//> [CID(bagaaierasords4njcts6vs7qvdjfcvgnume4hqohf65zsfguprqphs3icwea)] +``` + ## Legacy interface [`blockcodec-to-ipld-format`](https://github.com/ipld/js-blockcodec-to-ipld-format) converts a multiformats [`BlockCodec`](https://github.com/multiformats/js-multiformats/blob/master/src/codecs/interface.ts#L21) into an diff --git a/package.json b/package.json index 1d108929..63e7c69a 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,9 @@ "./block": { "import": "./src/block.js" }, + "./traversal": { + "import": "./src/traversal.js" + }, "./bases/identity": { "import": "./src/bases/identity.js" }, @@ -96,6 +99,7 @@ } }, "devDependencies": { + "@ipld/dag-pb": "^2.1.14", "@types/node": "^16.7.10", "@typescript-eslint/eslint-plugin": "^4.30.0", "@typescript-eslint/parser": "^4.30.0", diff --git a/src/traversal.js b/src/traversal.js new file mode 100644 index 00000000..9d2f4018 --- /dev/null +++ b/src/traversal.js @@ -0,0 +1,38 @@ +import { base58btc } from 'multiformats/bases/base58' + +/** + * @typedef {import('./cid.js').CID} CID + */ + +/** + * @template T + * @typedef {import('./block.js').Block} Block + */ + +/** + * @template T + * @param {Object} options + * @param {CID} options.cid + * @param {(cid: CID) => Promise|null>} options.load + * @param {Set?} options.seen + */ +const walk = async ({ cid, load, seen }) => { + seen = seen || new Set() + const b58Cid = cid.toString(base58btc) + if (seen.has(b58Cid)) { + return + } + + const block = await load(cid) + seen.add(b58Cid) + + if (block === null) { // the loader signals with `null` that we should skip this block + return + } + + for (const [, cid] of block.links()) { + await walk({ cid, load, seen }) + } +} + +export { walk } diff --git a/test/test-traversal.js b/test/test-traversal.js new file mode 100644 index 00000000..ee7697bb --- /dev/null +++ b/test/test-traversal.js @@ -0,0 +1,162 @@ +/* globals describe, it */ +import * as codec from 'multiformats/codecs/json' +import * as dagPB from '@ipld/dag-pb' +import { sha256 as hasher } from 'multiformats/hashes/sha2' +import * as main from 'multiformats/block' +import { walk } from 'multiformats/traversal' +import { deepStrictEqual as same } from 'assert' + +const test = it +const { createLink, createNode } = dagPB + +describe('traversal', () => { + describe('walk', async () => { + // Forming the following DAG for testing + // A + // / \ + // B C + // / \ / \ + // D D D E + const linksE = [] + const valueE = createNode(Uint8Array.from('string E qacdswa'), linksE) + const blockE = await main.encode({ value: valueE, codec, hasher }) + const cidE = blockE.cid + + const linksD = [] + const valueD = createNode(Uint8Array.from('string D zasa'), linksD) + const blockD = await main.encode({ value: valueD, codec, hasher }) + const cidD = blockD.cid + + const linksC = [createLink('link1', 100, cidD), createLink('link2', 100, cidE)] + const valueC = createNode(Uint8Array.from('string C zxc'), linksC) + const blockC = await main.encode({ value: valueC, codec, hasher }) + const cidC = blockC.cid + + const linksB = [createLink('link1', 100, cidD), createLink('link2', 100, cidD)] + const valueB = createNode(Uint8Array.from('string B lpokjiasd'), linksB) + const blockB = await main.encode({ value: valueB, codec, hasher }) + const cidB = blockB.cid + + const linksA = [createLink('link1', 100, cidB), createLink('link2', 100, cidC)] + const valueA = createNode(Uint8Array.from('string A qwertcfdgshaa'), linksA) + const blockA = await main.encode({ value: valueA, codec, hasher }) + const cidA = blockA.cid + + const load = async (cid) => { + if (cid.equals(cidE)) { + return blockE + } + if (cid.equals(cidD)) { + return blockD + } + if (cid.equals(cidC)) { + return blockC + } + if (cid.equals(cidB)) { + return blockB + } + if (cid.equals(cidA)) { + return blockA + } + return null + } + + const loadWrapper = (load, arr = []) => (cid) => { + arr.push(cid.toString()) + return load(cid) + } + + test('block with no links', async () => { + // Test Case 1 + // Input DAG + // D + // + // Expect load to be called with D + const expectedCallArray = [cidD.toString()] + const callArray = [] + + await walk({ cid: cidD, load: loadWrapper(load, callArray) }) + + expectedCallArray.forEach((value, index) => { + same(value, callArray[index]) + }) + }) + + test('block with links', async () => { + // Test Case 2 + // Input + // C + // / \ + // D E + // + // Expect load to be called with C, then D, then E + const expectedCallArray = [cidC.toString(), cidD.toString(), cidE.toString()] + const callArray = [] + + await walk({ cid: cidC, load: loadWrapper(load, callArray) }) + + expectedCallArray.forEach((value, index) => { + same(value, callArray[index]) + }) + }) + + test('block with matching links', async () => { + // Test Case 3 + // Input + // B + // / \ + // D D + // + // Expect load to be called with B, then D + const expectedCallArray = [cidB.toString(), cidD.toString()] + const callArray = [] + + await walk({ cid: cidB, load: loadWrapper(load, callArray) }) + + expectedCallArray.forEach((value, index) => { + same(value, callArray[index]) + }) + }) + + test('depth first with duplicated block', async () => { + // Test Case 4 + // Input + // A + // / \ + // B C + // / \ / \ + // D D D E + // + // Expect load to be called with A, then B, then D, then C, then E + const expectedCallArray = [ + cidA.toString(), + cidB.toString(), + cidD.toString(), + cidC.toString(), + cidE.toString() + ] + const callArray = [] + + await walk({ cid: cidA, load: loadWrapper(load, callArray) }) + + expectedCallArray.forEach((value, index) => { + same(value, callArray[index]) + }) + }) + + test('null return', async () => { + const links = [] + const value = createNode(Uint8Array.from('test'), links) + const block = await main.encode({ value: value, codec, hasher }) + const cid = block.cid + const expectedCallArray = [cid.toString()] + const callArray = [] + + await walk({ cid, load: loadWrapper(load, callArray) }) + + expectedCallArray.forEach((value, index) => { + same(value, callArray[index]) + }) + }) + }) +})