Skip to content
This repository has been archived by the owner on Jun 27, 2023. It is now read-only.

[WIP] remove UnixfsNode from trickledag(part2) #54

Merged
15 changes: 12 additions & 3 deletions importer/balanced/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@
// mentioned. This is the only scenario where the root can be of a type different
// that the UnixFS node.
//
// Notes:
// 1. In the implementation. `FSNodeOverDag` structure is used for representing
// the UnixFS node encoded inside the DAG node.
// (see https://github.com/ipfs/go-ipfs/pull/5118.)
// 2. `TFile` is used for backwards-compatibility. It was a bug causing the leaf
// nodes to be generated with this type instead of `TRaw`. The former one
// should be used (like the trickle builder does).
// (See https://github.com/ipfs/go-ipfs/pull/5120.)
//
// +-------------+
// | Root 4 |
// +-------------+
Expand Down Expand Up @@ -123,7 +132,7 @@ import (
func Layout(db *h.DagBuilderHelper) (ipld.Node, error) {
if db.Done() {
// No data, return just an empty node.
root, err := db.NewLeafNode(nil)
root, err := db.NewLeafNode(nil, ft.TFile)
if err != nil {
return nil, err
}
Expand All @@ -137,7 +146,7 @@ func Layout(db *h.DagBuilderHelper) (ipld.Node, error) {
// (corner case), after that subsequent `root` nodes will
// always be internal nodes (with a depth > 0) that can
// be handled by the loop.
root, fileSize, err := db.NewLeafDataNode()
root, fileSize, err := db.NewLeafDataNode(ft.TFile)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -224,7 +233,7 @@ func fillNodeRec(db *h.DagBuilderHelper, node *h.FSNodeOverDag, depth int) (fill

if depth == 1 {
// Base case: add leaf node with data.
childNode, childFileSize, err = db.NewLeafDataNode()
childNode, childFileSize, err = db.NewLeafDataNode(ft.TFile)
if err != nil {
return nil, 0, err
}
Expand Down
179 changes: 71 additions & 108 deletions importer/helpers/dagbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,59 +134,15 @@ func (db *DagBuilderHelper) GetDagServ() ipld.DAGService {
return db.dserv
}

// NewUnixfsNode creates a new Unixfs node to represent a file.
func (db *DagBuilderHelper) NewUnixfsNode() *UnixfsNode {
n := &UnixfsNode{
node: new(dag.ProtoNode),
ufmt: ft.NewFSNode(ft.TFile),
}
n.SetCidBuilder(db.cidBuilder)
return n
}

// GetCidBuilder returns the internal `cid.CidBuilder` set in the builder.
func (db *DagBuilderHelper) GetCidBuilder() cid.Builder {
return db.cidBuilder
}

// NewLeaf creates a leaf node filled with data. If rawLeaves is
// defined than a raw leaf will be returned. Otherwise, if data is
// nil the type field will be TRaw (for backwards compatibility), if
// data is defined (but possibly empty) the type field will be TRaw.
func (db *DagBuilderHelper) NewLeaf(data []byte) (*UnixfsNode, error) {
if len(data) > BlockSizeLimit {
return nil, ErrSizeLimitExceeded
}

if db.rawLeaves {
if db.cidBuilder == nil {
return &UnixfsNode{
rawnode: dag.NewRawNode(data),
raw: true,
}, nil
}
rawnode, err := dag.NewRawNodeWPrefix(data, db.cidBuilder)
if err != nil {
return nil, err
}
return &UnixfsNode{
rawnode: rawnode,
raw: true,
}, nil
}

if data == nil {
return db.NewUnixfsNode(), nil
}

blk := db.newUnixfsBlock()
blk.SetData(data)
return blk, nil
}

// NewLeafNode is a variation from `NewLeaf` (see its description) that
// returns an `ipld.Node` instead.
func (db *DagBuilderHelper) NewLeafNode(data []byte) (ipld.Node, error) {
// NewLeafNode creates a leaf node filled with data. If rawLeaves is
// defined then a raw leaf will be returned. Otherwise, it will create
// and return `FSNodeOverDag` with `fsNodeType`.
func (db *DagBuilderHelper) NewLeafNode(data []byte, fsNodeType pb.Data_DataType) (ipld.Node, error) {
if len(data) > BlockSizeLimit {
return nil, ErrSizeLimitExceeded
}
Expand All @@ -204,7 +160,7 @@ func (db *DagBuilderHelper) NewLeafNode(data []byte) (ipld.Node, error) {
}

// Encapsulate the data in UnixFS node (instead of a raw node).
fsNodeOverDag := db.NewFSNodeOverDag(ft.TFile)
fsNodeOverDag := db.NewFSNodeOverDag(fsNodeType)
fsNodeOverDag.SetFileData(data)
node, err := fsNodeOverDag.Commit()
if err != nil {
Expand All @@ -213,75 +169,50 @@ func (db *DagBuilderHelper) NewLeafNode(data []byte) (ipld.Node, error) {
// TODO: Encapsulate this sequence of calls into a function that
// just returns the final `ipld.Node` avoiding going through
// `FSNodeOverDag`.
// TODO: Using `TFile` for backwards-compatibility, a bug in the
// balanced builder was causing the leaf nodes to be generated
// with this type instead of `TRaw`, the one that should be used
// (like the trickle builder does).
// (See https://github.com/ipfs/go-ipfs/pull/5120.)

return node, nil
}

// newUnixfsBlock creates a new Unixfs node to represent a raw data block
func (db *DagBuilderHelper) newUnixfsBlock() *UnixfsNode {
n := &UnixfsNode{
node: new(dag.ProtoNode),
ufmt: ft.NewFSNode(ft.TRaw),
}
n.SetCidBuilder(db.cidBuilder)
return n
}

// FillNodeLayer will add datanodes as children to the give node until
// at most db.indirSize nodes are added.
func (db *DagBuilderHelper) FillNodeLayer(node *UnixfsNode) error {
// it is full in this layer or no more data.
// NOTE: This function creates raw data nodes so it only works
// for the `trickle.Layout`.
func (db *DagBuilderHelper) FillNodeLayer(node *FSNodeOverDag) error {

// while we have room AND we're not done
for node.NumChildren() < db.maxlinks && !db.Done() {
child, err := db.GetNextDataNode()
child, childFileSize, err := db.NewLeafDataNode(ft.TRaw)
if err != nil {
return err
}

if err := node.AddChild(child, db); err != nil {
if err := node.AddChild(child, childFileSize, db); err != nil {
return err
}
}
node.Commit()
// TODO: Do we need to commit here? The caller who created the
// `FSNodeOverDag` should be in charge of that.

return nil
}

// GetNextDataNode builds a UnixFsNode with the data obtained from the
// Splitter, given the constraints (BlockSizeLimit, RawLeaves) specified
// when creating the DagBuilderHelper.
func (db *DagBuilderHelper) GetNextDataNode() (*UnixfsNode, error) {
data, err := db.Next()
if err != nil {
return nil, err
}

if data == nil { // we're done!
return nil, nil
}

return db.NewLeaf(data)
}

// NewLeafDataNode is a variation of `GetNextDataNode` that returns
// an `ipld.Node` instead. It builds the `node` with the data obtained
// from the Splitter and returns it with the `dataSize` (that will be
// used to keep track of the DAG file size). The size of the data is
// computed here because after that it will be hidden by `NewLeafNode`
// inside a generic `ipld.Node` representation.
func (db *DagBuilderHelper) NewLeafDataNode() (node ipld.Node, dataSize uint64, err error) {
// NewLeafDataNode builds the `node` with the data obtained from the
// Splitter with the given constraints (BlockSizeLimit, RawLeaves)
// specified when creating the DagBuilderHelper. It returns
// `ipld.Node` with the `dataSize` (that will be used to keep track of
// the DAG file size). The size of the data is computed here because
// after that it will be hidden by `NewLeafNode` inside a generic
// `ipld.Node` representation.
func (db *DagBuilderHelper) NewLeafDataNode(fsNodeType pb.Data_DataType) (node ipld.Node, dataSize uint64, err error) {
fileData, err := db.Next()
if err != nil {
return nil, 0, err
}
dataSize = uint64(len(fileData))

// Create a new leaf node containing the file chunk data.
node, err = db.NewLeafNode(fileData)
node, err = db.NewLeafNode(fileData, fsNodeType)
if err != nil {
return nil, 0, err
}
Expand Down Expand Up @@ -326,21 +257,6 @@ func (db *DagBuilderHelper) ProcessFileStore(node ipld.Node, dataSize uint64) ip
return node
}

// AddUnixfsNode sends a node to the DAGService, and returns it as ipld.Node.
func (db *DagBuilderHelper) AddUnixfsNode(node *UnixfsNode) (ipld.Node, error) {
dn, err := node.GetDagNode()
if err != nil {
return nil, err
}

err = db.dserv.Add(context.TODO(), dn)
if err != nil {
return nil, err
}

return dn, nil
}

// Add inserts the given node in the DAGService.
func (db *DagBuilderHelper) Add(node ipld.Node) error {
return db.dserv.Add(context.TODO(), node)
Expand Down Expand Up @@ -388,6 +304,24 @@ func (db *DagBuilderHelper) NewFSNodeOverDag(fsNodeType pb.Data_DataType) *FSNod
return node
}

// NewFSNFromDag reconstructs a FSNodeOverDag node from a given dag node
func (db *DagBuilderHelper) NewFSNFromDag(nd *dag.ProtoNode) (*FSNodeOverDag, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we expand all the newFSN to newFSNode names to be consistent with the current terminology?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NewFSNFromDag should be NewFSNodeOverDagFromDag. It is too long for me, and I simply truncate it at that time. Is there some better name in your mind?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, drop the OverDag, I was thinking of expanding just the FSN to FSNode part: NewFSNodeFromDag.

return NewFSNFromDag(nd)
}

// NewFSNFromDag reconstructs a FSNodeOverDag node from a given dag node
func NewFSNFromDag(nd *dag.ProtoNode) (*FSNodeOverDag, error) {
mb, err := ft.FSNodeFromBytes(nd.Data())
if err != nil {
return nil, err
}

return &FSNodeOverDag{
dag: nd,
file: mb,
}, nil
}

// AddChild adds a `child` `ipld.Node` to both node layers. The
// `dag.ProtoNode` creates a link to the child node while the
// `ft.FSNode` stores its file size (that is, not the size of the
Expand All @@ -404,11 +338,17 @@ func (n *FSNodeOverDag) AddChild(child ipld.Node, fileSize uint64, db *DagBuilde
return db.Add(child)
}

// RemoveChild deletes the child node at the given index.
func (n *FSNodeOverDag) RemoveChild(index int, dbh *DagBuilderHelper) {
n.file.RemoveBlockSize(index)
n.dag.SetLinks(append(n.dag.Links()[:index], n.dag.Links()[index+1:]...))
}

// Commit unifies (resolves) the cache nodes into a single `ipld.Node`
// that represents them: the `ft.FSNode` is encoded inside the
// `dag.ProtoNode`.
//
// TODO: Evaluate making it read-only after committing.
// TODO: Make it read-only after committing, allow to commit only once.
func (n *FSNodeOverDag) Commit() (ipld.Node, error) {
fileData, err := n.file.GetBytes()
if err != nil {
Expand Down Expand Up @@ -436,3 +376,26 @@ func (n *FSNodeOverDag) FileSize() uint64 {
func (n *FSNodeOverDag) SetFileData(fileData []byte) {
n.file.SetData(fileData)
}

// GetDagNode fills out the proper formatting for the FSNodeOverDag node
// inside of a DAG node and returns the dag node.
// TODO: Check if we have committed (passed the UnixFS information
// to the DAG layer) before returning this.
func (n *FSNodeOverDag) GetDagNode() (ipld.Node, error) {
return n.dag, nil
}

// GetChild gets the ith child of this node from the given DAGService.
func (n *FSNodeOverDag) GetChild(ctx context.Context, i int, ds ipld.DAGService) (*FSNodeOverDag, error) {
nd, err := n.dag.Links()[i].GetNode(ctx, ds)
if err != nil {
return nil, err
}

pbn, ok := nd.(*dag.ProtoNode)
if !ok {
return nil, dag.ErrNotProtobuf
}

return NewFSNFromDag(pbn)
}
Loading