Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify log Layout type #2532

Merged
merged 3 commits into from
Jun 9, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 6 additions & 24 deletions storage/cache/subtree_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,6 @@ type GetSubtreesFunc func(ids [][]byte) ([]*storagepb.SubtreeProto, error)
// SetSubtreesFunc describes a function which can store a collection of Subtrees into storage.
type SetSubtreesFunc func(ctx context.Context, s []*storagepb.SubtreeProto) error

// maxSupportedTreeDepth is the maximum depth a tree can reach. Note that log
// trees are further limited to a depth of 63 by the use of signed 64 bit leaf
// indices.
const maxSupportedTreeDepth = 64

// SubtreeCache provides a caching access to Subtree storage. Currently there are assumptions
// in the code that all subtrees are multiple of 8 in depth and that log subtrees are always
// of depth 8. It is not possible to just change the constants above and have things still
Expand All @@ -57,7 +52,6 @@ const maxSupportedTreeDepth = 64
// 1. Parallel readers/writers working on non-intersecting subsets of subtrees/nodes.
// 2. Subtrees/nodes are rarely written, and mostly read.
type SubtreeCache struct {
layout *tree.Layout
hasher hashers.LogHasher

// subtrees contains the Subtree data read from storage, and is updated by
Expand All @@ -72,24 +66,12 @@ type SubtreeCache struct {
}

// NewLogSubtreeCache creates and returns a SubtreeCache appropriate for use with a log
// tree. The caller must supply the strata depths to be used and a suitable LogHasher.
func NewLogSubtreeCache(strataDepths []int, hasher hashers.LogHasher) *SubtreeCache {
// TODO(al): pass this in
maxTreeDepth := maxSupportedTreeDepth
glog.V(1).Infof("Creating new subtree cache maxDepth=%d strataDepths=%v", maxTreeDepth, strataDepths)
layout := tree.NewLayout(strataDepths)

// TODO(al): This needs to be passed in, particularly for Map use cases where
// we need to know it matches the number of bits in the chosen hash function.
if got, want := layout.Height, maxTreeDepth; got != want {
panic(fmt.Errorf("strata indicate tree of depth %d, but expected %d", got, want))
}

// tree. The caller must supply a suitable LogHasher.
func NewLogSubtreeCache(hasher hashers.LogHasher) *SubtreeCache {
if *populateConcurrency <= 0 {
panic(fmt.Errorf("populate_subtree_concurrency must be set to >= 1"))
}
return &SubtreeCache{
layout: layout,
hasher: hasher,
populateConcurrency: *populateConcurrency,
}
Expand All @@ -102,7 +84,7 @@ func (s *SubtreeCache) preload(ids []compact.NodeID, getSubtrees GetSubtreesFunc
// Figure out the set of subtrees we need.
want := make(map[string]bool)
for _, id := range ids {
subID := string(s.layout.GetTileID(id))
subID := string(tree.GetTileID(id))
if _, ok := want[subID]; ok {
// No need to check s.subtrees map twice.
continue
Expand Down Expand Up @@ -239,7 +221,7 @@ func (s *SubtreeCache) prefixIsDirty(prefixKey string) bool {

// getNodeHash returns a single node hash from the cache.
func (s *SubtreeCache) getNodeHash(id compact.NodeID, getSubtree GetSubtreeFunc) ([]byte, error) {
subID, sx := s.layout.Split(id)
subID, sx := tree.Split(id)
c := s.getCachedSubtree(subID)
if c == nil {
glog.V(2).Infof("Cache miss for %x so we'll try to fetch from storage", subID)
Expand Down Expand Up @@ -283,7 +265,7 @@ func (s *SubtreeCache) getNodeHash(id compact.NodeID, getSubtree GetSubtreeFunc)

// SetNodeHash sets a node hash in the cache.
func (s *SubtreeCache) SetNodeHash(id compact.NodeID, h []byte, getSubtree GetSubtreeFunc) error {
subID, sx := s.layout.Split(id)
subID, sx := tree.Split(id)
c := s.getCachedSubtree(subID)
if c == nil {
// TODO(al): This is ok, IFF *all* leaves in the subtree are being set,
Expand Down Expand Up @@ -373,7 +355,7 @@ func (s *SubtreeCache) Flush(ctx context.Context, setSubtrees SetSubtreesFunc) e

// newEmptySubtree creates an empty subtree for the passed-in ID.
func (s *SubtreeCache) newEmptySubtree(id []byte) *storagepb.SubtreeProto {
height := s.layout.TileHeight(len(id) * 8)
const height = 8
pav-kv marked this conversation as resolved.
Show resolved Hide resolved
if glog.V(2) {
glog.Infof("Creating new empty subtree for %x, with height %d", id, height)
}
Expand Down
26 changes: 12 additions & 14 deletions storage/cache/subtree_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@ import (
"github.com/google/trillian/merkle/compact"
rfc6962 "github.com/google/trillian/merkle/rfc6962/hasher"
"github.com/google/trillian/storage/storagepb"
"github.com/google/trillian/storage/tree"

"github.com/golang/mock/gomock"
)

var defaultLogStrata = []int{8, 8, 8, 8, 8, 8, 8, 8}

func ancestor(id compact.NodeID, levelsUp uint) compact.NodeID {
return compact.NewNodeID(id.Level+levelsUp, id.Index>>levelsUp)
}
Expand All @@ -51,7 +50,7 @@ func TestCacheFillOnlyReadsSubtrees(t *testing.T) {
defer mockCtrl.Finish()

m := NewMockNodeStorage(mockCtrl)
c := NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher)
c := NewLogSubtreeCache(rfc6962.DefaultHasher)

id := compact.NewNodeID(28, 0x112233445)
// When we loop around asking for all parents of the above NodeID, we should
Expand All @@ -76,7 +75,7 @@ func TestCacheGetNodesReadsSubtrees(t *testing.T) {
defer mockCtrl.Finish()

m := NewMockNodeStorage(mockCtrl)
c := NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher)
c := NewLogSubtreeCache(rfc6962.DefaultHasher)

ids := []compact.NodeID{
compact.NewNodeID(0, 0x1234),
Expand Down Expand Up @@ -137,7 +136,7 @@ func TestCacheFlush(t *testing.T) {
defer mockCtrl.Finish()

m := NewMockNodeStorage(mockCtrl)
c := NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher)
c := NewLogSubtreeCache(rfc6962.DefaultHasher)

id := compact.NewNodeID(0, 12345)
expectedSetIDs := make(map[string]string)
Expand All @@ -152,7 +151,7 @@ func TestCacheFlush(t *testing.T) {
}
m.EXPECT().SetSubtrees(gomock.Any(), gomock.Any()).Do(func(ctx context.Context, trees []*storagepb.SubtreeProto) {
for _, s := range trees {
if got, want := s.Depth, c.layout.TileHeight(len(s.Prefix)*8); got != int32(want) {
if got, want := s.Depth, int32(8); got != want {
t.Errorf("Got subtree with depth %d, expected %d for prefix %x", got, want, s.Prefix)
}
state, ok := expectedSetIDs[string(s.Prefix)]
Expand Down Expand Up @@ -201,13 +200,12 @@ func TestRepopulateLogSubtree(t *testing.T) {
cmtStorage := storagepb.SubtreeProto{
Leaves: make(map[string][]byte),
InternalNodes: make(map[string][]byte),
Depth: int32(defaultLogStrata[0]),
Depth: 8,
}
s := storagepb.SubtreeProto{
Leaves: make(map[string][]byte),
Depth: int32(defaultLogStrata[0]),
Depth: 8,
}
c := NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher)
for numLeaves := int64(1); numLeaves <= 256; numLeaves++ {
// clear internal nodes
s.InternalNodes = make(map[string][]byte)
Expand All @@ -217,7 +215,7 @@ func TestRepopulateLogSubtree(t *testing.T) {
store := func(id compact.NodeID, hash []byte) {
// Don't store leaves or the subtree root in InternalNodes
if id.Level > 0 && id.Level < 8 {
_, sfx := c.layout.Split(id)
_, sfx := tree.Split(id)
cmtStorage.InternalNodes[sfx.String()] = hash
}
}
Expand All @@ -227,7 +225,7 @@ func TestRepopulateLogSubtree(t *testing.T) {

sfxKey := toSuffix(compact.NewNodeID(0, uint64(numLeaves)-1))
s.Leaves[sfxKey] = leafHash
if numLeaves == 1<<uint(defaultLogStrata[0]) {
if numLeaves == 256 {
s.InternalNodeCount = uint32(len(cmtStorage.InternalNodes))
} else {
s.InternalNodeCount = 0
Expand All @@ -247,7 +245,7 @@ func TestRepopulateLogSubtree(t *testing.T) {

// Repopulation should only have happened with a full subtree, otherwise the internal nodes map
// should be empty
if numLeaves != 1<<uint(defaultLogStrata[0]) {
if numLeaves != 256 {
if len(s.InternalNodes) != 0 {
t.Fatalf("(it %d) internal nodes should be empty but got: %v", numLeaves, s.InternalNodes)
}
Expand All @@ -261,7 +259,7 @@ func BenchmarkRepopulateLogSubtree(b *testing.B) {
hasher := rfc6962.DefaultHasher
s := storagepb.SubtreeProto{
Leaves: make(map[string][]byte),
Depth: int32(defaultLogStrata[0]),
Depth: 8,
InternalNodeCount: 254,
}
for i := 0; i < 256; i++ {
Expand Down Expand Up @@ -326,7 +324,7 @@ func TestIdempotentWrites(t *testing.T) {
// We should see many reads, but only the first call to SetNodeHash should
// result in an actual write being flushed through to storage.
for i := 0; i < 10; i++ {
c := NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher)
c := NewLogSubtreeCache(rfc6962.DefaultHasher)
if _, err := c.getNodeHash(id, m.GetSubtree); err != nil {
t.Fatalf("%d: failed to get node hash: %v", i, err)
}
Expand Down
23 changes: 0 additions & 23 deletions storage/cloudspanner/default_strata.go

This file was deleted.

2 changes: 1 addition & 1 deletion storage/cloudspanner/log_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ func (ls *logStorage) Snapshot(ctx context.Context) (storage.ReadOnlyLogTX, erro
}

func newLogCache(tree *trillian.Tree) (*cache.SubtreeCache, error) {
return cache.NewLogSubtreeCache(defLogStrata, rfc6962.DefaultHasher), nil
return cache.NewLogSubtreeCache(rfc6962.DefaultHasher), nil
}

func (ls *logStorage) begin(ctx context.Context, tree *trillian.Tree, readonly bool, stx spanRead) (*logTX, error) {
Expand Down
4 changes: 1 addition & 3 deletions storage/memory/log_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ import (
const logIDLabel = "logid"

var (
defaultLogStrata = []int{8, 8, 8, 8, 8, 8, 8, 8}

once sync.Once
queuedCounter monitoring.Counter
dequeuedCounter monitoring.Counter
Expand Down Expand Up @@ -153,7 +151,7 @@ func (m *memoryLogStorage) beginInternal(ctx context.Context, tree *trillian.Tre
createMetrics(m.metricFactory)
})

stCache := cache.NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher)
stCache := cache.NewLogSubtreeCache(rfc6962.DefaultHasher)
ttx, err := m.TreeStorage.beginTreeTX(ctx, tree.TreeId, rfc6962.DefaultHasher.Size(), stCache, readonly)
if err != nil {
return nil, err
Expand Down
4 changes: 1 addition & 3 deletions storage/mysql/log_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,6 @@ const (
)

var (
defaultLogStrata = []int{8, 8, 8, 8, 8, 8, 8, 8}

once sync.Once
queuedCounter monitoring.Counter
queuedDupCounter monitoring.Counter
Expand Down Expand Up @@ -228,7 +226,7 @@ func (m *mySQLLogStorage) beginInternal(ctx context.Context, tree *trillian.Tree
createMetrics(m.metricFactory)
})

stCache := cache.NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher)
stCache := cache.NewLogSubtreeCache(rfc6962.DefaultHasher)
ttx, err := m.beginTreeTx(ctx, tree, rfc6962.DefaultHasher.Size(), stCache)
if err != nil && err != storage.ErrTreeNeedsInit {
return nil, err
Expand Down
107 changes: 17 additions & 90 deletions storage/tree/layout.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,114 +16,41 @@ package tree

import (
"encoding/binary"
"fmt"

"github.com/google/trillian/merkle/compact"
)

const (
// depthQuantum defines the smallest supported tile height, which all tile
// heights must also be a multiple of.
//
// WARNING: The Layout type breaks if this value is not a multiple of 8,
// because it uses NodeID byte representation directly.
depthQuantum = 8
)

// Layout defines the mapping between tree node IDs and tile IDs.
type Layout struct {
// sIndex contains stratum info for each multiple-of-depthQuantum node depth.
// Note that if a stratum spans multiple depthQuantum heights then it will be
// present in this slice the corresponding number of times.
// This index is used for fast mapping from node IDs to strata IDs.
sIndex []stratumInfo
// Height is the height of the tree. It defines the maximal bit-length of a
// node ID that the tree can contain.
Height int
}

// NewLayout creates a tree layout based on the passed-in strata heights.
func NewLayout(heights []int) *Layout {
// Compute the total tree height.
height := 0
for i, h := range heights {
// Verify the stratum height is valid.
if h <= 0 {
panic(fmt.Errorf("invalid stratum height[%d]: %d; must be > 0", i, h))
}
if h%depthQuantum != 0 {
panic(fmt.Errorf("invalid stratum height[%d]: %d; must be a multiple of %d", i, h, depthQuantum))
}
height += h
}

// Build the strata information index.
sIndex := make([]stratumInfo, 0, height/depthQuantum)
for _, h := range heights {
// Assign the same stratum info to depth quants that this stratum spans.
info := stratumInfo{idBytes: len(sIndex), height: h}
for d := 0; d < h; d += depthQuantum {
sIndex = append(sIndex, info)
}
}

return &Layout{sIndex: sIndex, Height: height}
}

// GetTileID returns the path from the tree root to the tile that the given
// node belongs to. All the bits of the returned byte slice are used, because
// Layout enforces tile heights to be multiples of 8.
// GetTileID returns the path from the "virtual" root at level 64 to the root
// of the tile that the given node belongs to. All the bits of the returned
// slice are significant because all tile heights are 8.
//
// Note that nodes located at strata boundaries normally belong to tiles rooted
// above them. However, the topmost node is the root for its own tile since
// there is nothing above it.
func (l *Layout) GetTileID(id compact.NodeID) []byte {
if int(id.Level) >= l.Height {
// Note that a root of a tile belongs to a tile above it (as its leaf node).
// The exception is the "virtual" root which belongs to its own "pseudo" tile.
func GetTileID(id compact.NodeID) []byte {
if id.Level >= 64 {
return []byte{} // Note: Not nil, so that storage/SQL doesn't use NULL.
}

info := l.getStratumAt(l.Height - int(id.Level) - 1)
level := uint(l.Height - info.idBytes*8)
index := id.Index >> (level - id.Level)
index := id.Index >> (8 - id.Level%8)
bytesCount := (64 - id.Level - 1) / 8

var bytes [8]byte
binary.BigEndian.PutUint64(bytes[:], index)
return bytes[8-info.idBytes:]
return bytes[8-bytesCount:]
}

// Split returns the path from the tree root to the tile that the given node
// belongs to, and the corresponding local address within this tile.
func (l *Layout) Split(id compact.NodeID) ([]byte, *Suffix) {
if int(id.Level) >= l.Height {
// Split returns the path from the "virtual" root at level 64 to the root of
// the tile that the given node belongs to, and the corresponding local address
// of this node within this tile.
func Split(id compact.NodeID) ([]byte, *Suffix) {
if id.Level >= 64 {
return []byte{}, EmptySuffix
}
tileID := l.GetTileID(id)
tileID := GetTileID(id)

var bytes [8]byte
bits := l.Height - int(id.Level) - len(tileID)*8
bits := 64 - id.Level - uint(len(tileID)*8)
binary.BigEndian.PutUint64(bytes[:], id.Index<<(64-bits))
suffix := NewSuffix(uint8(bits), bytes[:])

return tileID, suffix
}

// TileHeight returns the height of a tile with its root located at the
// specified depth from the tree root. The result is not defined if rootDepth
// is not a tile boundary.
//
// TODO(pavelkalinnikov, v2): Use "type-safe" structured argument like NodeID2.
func (l *Layout) TileHeight(rootDepth int) int {
return l.getStratumAt(rootDepth).height
}

func (l *Layout) getStratumAt(depth int) stratumInfo {
return l.sIndex[depth/depthQuantum]
}

// stratumInfo describes a single stratum across the tree.
type stratumInfo struct {
// idBytes is the byte length of IDs for this stratum.
idBytes int
// height is the number of tree levels in this stratum.
height int
}
Loading