From 21ef4b2a1e41839f8b98e110a9b6c5229d40a929 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:19:42 +0100 Subject: [PATCH 01/62] Duplicate MySQL files to new PostgreSQL directories, preserving git line history --- .../{mysqlqm/mysql_quota.go => postgresqlqm/postgresql_quota.go} | 0 .../mysql_quota_test.go => postgresqlqm/postgresql_quota_test.go} | 0 quota/{mysqlqm => postgresqlqm}/quota_provider.go | 0 storage/{mysql => postgresql}/admin_storage.go | 0 storage/{mysql => postgresql}/admin_storage_test.go | 0 storage/{mysql => postgresql}/drop_storage.sql | 0 storage/{mysql => postgresql}/errors.go | 0 storage/{mysql => postgresql}/log_storage.go | 0 storage/{mysql => postgresql}/log_storage_test.go | 0 storage/{mysql/mysqlpb => postgresql/postgresqlpb}/gen.go | 0 storage/{mysql/mysqlpb => postgresql/postgresqlpb}/options.pb.go | 0 storage/{mysql/mysqlpb => postgresql/postgresqlpb}/options.proto | 0 storage/{mysql => postgresql}/provider.go | 0 storage/{mysql => postgresql}/provider_test.go | 0 storage/{mysql => postgresql}/queue.go | 0 storage/{mysql => postgresql}/queue_batching.go | 0 storage/{mysql => postgresql}/schema/storage.sql | 0 storage/{mysql => postgresql}/sql.go | 0 storage/{mysql => postgresql}/storage_test.go | 0 storage/{testdb/testdb.go => postgresql/testdbpgx/testdbpgx.go} | 0 .../testdb_test.go => postgresql/testdbpgx/testdbpgx_test.go} | 0 storage/{mysql => postgresql}/tree_storage.go | 0 22 files changed, 0 insertions(+), 0 deletions(-) rename quota/{mysqlqm/mysql_quota.go => postgresqlqm/postgresql_quota.go} (100%) rename quota/{mysqlqm/mysql_quota_test.go => postgresqlqm/postgresql_quota_test.go} (100%) rename quota/{mysqlqm => postgresqlqm}/quota_provider.go (100%) rename storage/{mysql => postgresql}/admin_storage.go (100%) rename storage/{mysql => postgresql}/admin_storage_test.go (100%) rename storage/{mysql => postgresql}/drop_storage.sql (100%) rename storage/{mysql => postgresql}/errors.go (100%) rename storage/{mysql => postgresql}/log_storage.go (100%) rename storage/{mysql => postgresql}/log_storage_test.go (100%) rename storage/{mysql/mysqlpb => postgresql/postgresqlpb}/gen.go (100%) rename storage/{mysql/mysqlpb => postgresql/postgresqlpb}/options.pb.go (100%) rename storage/{mysql/mysqlpb => postgresql/postgresqlpb}/options.proto (100%) rename storage/{mysql => postgresql}/provider.go (100%) rename storage/{mysql => postgresql}/provider_test.go (100%) rename storage/{mysql => postgresql}/queue.go (100%) rename storage/{mysql => postgresql}/queue_batching.go (100%) rename storage/{mysql => postgresql}/schema/storage.sql (100%) rename storage/{mysql => postgresql}/sql.go (100%) rename storage/{mysql => postgresql}/storage_test.go (100%) rename storage/{testdb/testdb.go => postgresql/testdbpgx/testdbpgx.go} (100%) rename storage/{testdb/testdb_test.go => postgresql/testdbpgx/testdbpgx_test.go} (100%) rename storage/{mysql => postgresql}/tree_storage.go (100%) diff --git a/quota/mysqlqm/mysql_quota.go b/quota/postgresqlqm/postgresql_quota.go similarity index 100% rename from quota/mysqlqm/mysql_quota.go rename to quota/postgresqlqm/postgresql_quota.go diff --git a/quota/mysqlqm/mysql_quota_test.go b/quota/postgresqlqm/postgresql_quota_test.go similarity index 100% rename from quota/mysqlqm/mysql_quota_test.go rename to quota/postgresqlqm/postgresql_quota_test.go diff --git a/quota/mysqlqm/quota_provider.go b/quota/postgresqlqm/quota_provider.go similarity index 100% rename from quota/mysqlqm/quota_provider.go rename to quota/postgresqlqm/quota_provider.go diff --git a/storage/mysql/admin_storage.go b/storage/postgresql/admin_storage.go similarity index 100% rename from storage/mysql/admin_storage.go rename to storage/postgresql/admin_storage.go diff --git a/storage/mysql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go similarity index 100% rename from storage/mysql/admin_storage_test.go rename to storage/postgresql/admin_storage_test.go diff --git a/storage/mysql/drop_storage.sql b/storage/postgresql/drop_storage.sql similarity index 100% rename from storage/mysql/drop_storage.sql rename to storage/postgresql/drop_storage.sql diff --git a/storage/mysql/errors.go b/storage/postgresql/errors.go similarity index 100% rename from storage/mysql/errors.go rename to storage/postgresql/errors.go diff --git a/storage/mysql/log_storage.go b/storage/postgresql/log_storage.go similarity index 100% rename from storage/mysql/log_storage.go rename to storage/postgresql/log_storage.go diff --git a/storage/mysql/log_storage_test.go b/storage/postgresql/log_storage_test.go similarity index 100% rename from storage/mysql/log_storage_test.go rename to storage/postgresql/log_storage_test.go diff --git a/storage/mysql/mysqlpb/gen.go b/storage/postgresql/postgresqlpb/gen.go similarity index 100% rename from storage/mysql/mysqlpb/gen.go rename to storage/postgresql/postgresqlpb/gen.go diff --git a/storage/mysql/mysqlpb/options.pb.go b/storage/postgresql/postgresqlpb/options.pb.go similarity index 100% rename from storage/mysql/mysqlpb/options.pb.go rename to storage/postgresql/postgresqlpb/options.pb.go diff --git a/storage/mysql/mysqlpb/options.proto b/storage/postgresql/postgresqlpb/options.proto similarity index 100% rename from storage/mysql/mysqlpb/options.proto rename to storage/postgresql/postgresqlpb/options.proto diff --git a/storage/mysql/provider.go b/storage/postgresql/provider.go similarity index 100% rename from storage/mysql/provider.go rename to storage/postgresql/provider.go diff --git a/storage/mysql/provider_test.go b/storage/postgresql/provider_test.go similarity index 100% rename from storage/mysql/provider_test.go rename to storage/postgresql/provider_test.go diff --git a/storage/mysql/queue.go b/storage/postgresql/queue.go similarity index 100% rename from storage/mysql/queue.go rename to storage/postgresql/queue.go diff --git a/storage/mysql/queue_batching.go b/storage/postgresql/queue_batching.go similarity index 100% rename from storage/mysql/queue_batching.go rename to storage/postgresql/queue_batching.go diff --git a/storage/mysql/schema/storage.sql b/storage/postgresql/schema/storage.sql similarity index 100% rename from storage/mysql/schema/storage.sql rename to storage/postgresql/schema/storage.sql diff --git a/storage/mysql/sql.go b/storage/postgresql/sql.go similarity index 100% rename from storage/mysql/sql.go rename to storage/postgresql/sql.go diff --git a/storage/mysql/storage_test.go b/storage/postgresql/storage_test.go similarity index 100% rename from storage/mysql/storage_test.go rename to storage/postgresql/storage_test.go diff --git a/storage/testdb/testdb.go b/storage/postgresql/testdbpgx/testdbpgx.go similarity index 100% rename from storage/testdb/testdb.go rename to storage/postgresql/testdbpgx/testdbpgx.go diff --git a/storage/testdb/testdb_test.go b/storage/postgresql/testdbpgx/testdbpgx_test.go similarity index 100% rename from storage/testdb/testdb_test.go rename to storage/postgresql/testdbpgx/testdbpgx_test.go diff --git a/storage/mysql/tree_storage.go b/storage/postgresql/tree_storage.go similarity index 100% rename from storage/mysql/tree_storage.go rename to storage/postgresql/tree_storage.go From 78653c25a3383d8617dd0ca3fcbc4fe0dae38c38 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:19:42 +0100 Subject: [PATCH 02/62] Restore MySQL files --- quota/mysqlqm/mysql_quota.go | 164 ++++++ quota/mysqlqm/mysql_quota_test.go | 325 +++++++++++ quota/mysqlqm/quota_provider.go | 48 ++ storage/mysql/admin_storage.go | 479 ++++++++++++++++ storage/mysql/admin_storage_test.go | 342 +++++++++++ storage/mysql/drop_storage.sql | 9 + storage/mysql/errors.go | 50 ++ storage/mysql/log_storage.go | 857 ++++++++++++++++++++++++++++ storage/mysql/log_storage_test.go | 846 +++++++++++++++++++++++++++ storage/mysql/mysqlpb/gen.go | 18 + storage/mysql/mysqlpb/options.pb.go | 165 ++++++ storage/mysql/mysqlpb/options.proto | 27 + storage/mysql/provider.go | 145 +++++ storage/mysql/provider_test.go | 46 ++ storage/mysql/queue.go | 146 +++++ storage/mysql/queue_batching.go | 150 +++++ storage/mysql/schema/storage.sql | 137 +++++ storage/mysql/sql.go | 157 +++++ storage/mysql/storage_test.go | 363 ++++++++++++ storage/mysql/tree_storage.go | 436 ++++++++++++++ storage/testdb/testdb.go | 301 ++++++++++ storage/testdb/testdb_test.go | 27 + 22 files changed, 5238 insertions(+) create mode 100644 quota/mysqlqm/mysql_quota.go create mode 100644 quota/mysqlqm/mysql_quota_test.go create mode 100644 quota/mysqlqm/quota_provider.go create mode 100644 storage/mysql/admin_storage.go create mode 100644 storage/mysql/admin_storage_test.go create mode 100644 storage/mysql/drop_storage.sql create mode 100644 storage/mysql/errors.go create mode 100644 storage/mysql/log_storage.go create mode 100644 storage/mysql/log_storage_test.go create mode 100644 storage/mysql/mysqlpb/gen.go create mode 100644 storage/mysql/mysqlpb/options.pb.go create mode 100644 storage/mysql/mysqlpb/options.proto create mode 100644 storage/mysql/provider.go create mode 100644 storage/mysql/provider_test.go create mode 100644 storage/mysql/queue.go create mode 100644 storage/mysql/queue_batching.go create mode 100644 storage/mysql/schema/storage.sql create mode 100644 storage/mysql/sql.go create mode 100644 storage/mysql/storage_test.go create mode 100644 storage/mysql/tree_storage.go create mode 100644 storage/testdb/testdb.go create mode 100644 storage/testdb/testdb_test.go diff --git a/quota/mysqlqm/mysql_quota.go b/quota/mysqlqm/mysql_quota.go new file mode 100644 index 0000000000..ca55613909 --- /dev/null +++ b/quota/mysqlqm/mysql_quota.go @@ -0,0 +1,164 @@ +// Copyright 2017 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mysqlqm defines a MySQL-based quota.Manager implementation. +package mysqlqm + +import ( + "context" + "database/sql" + "errors" + "fmt" + + "github.com/google/trillian/quota" + "k8s.io/klog/v2" +) + +const ( + // DefaultMaxUnsequenced is a suggested value for MaxUnsequencedRows. + // Note that this is a Global/Write quota suggestion, so it applies across trees. + DefaultMaxUnsequenced = 500000 // About 2h of non-stop signing at 70QPS. + + countFromInformationSchemaQuery = ` + SELECT table_rows + FROM information_schema.tables + WHERE table_schema = schema() + AND table_name = ? + AND table_type = ?` + countFromUnsequencedQuery = "SELECT COUNT(*) FROM Unsequenced" +) + +// ErrTooManyUnsequencedRows is returned when tokens are requested but Unsequenced has grown +// beyond the configured limit. +var ErrTooManyUnsequencedRows = errors.New("too many unsequenced rows") + +// QuotaManager is a MySQL-based quota.Manager implementation. +// +// It has two working modes: one queries the information schema for the number of Unsequenced rows, +// the other does a select count(*) on the Unsequenced table. Information schema queries are +// default, even though they are approximate, as they're constant time (select count(*) on InnoDB +// based MySQL needs to traverse the index and may take quite a while to complete). +// +// QuotaManager only implements Global/Write quotas, which is based on the number of Unsequenced +// rows (to be exact, tokens = MaxUnsequencedRows - actualUnsequencedRows). +// Other quotas are considered infinite. +type QuotaManager struct { + DB *sql.DB + MaxUnsequencedRows int + UseSelectCount bool +} + +// GetTokens implements quota.Manager.GetTokens. +// It doesn't actually reserve or retrieve tokens, instead it allows access based on the number of +// rows in the Unsequenced table. +func (m *QuotaManager) GetTokens(ctx context.Context, numTokens int, specs []quota.Spec) error { + for _, spec := range specs { + if spec.Group != quota.Global || spec.Kind != quota.Write { + continue + } + // Only allow global writes if Unsequenced is under the expected limit + count, err := m.countUnsequenced(ctx) + if err != nil { + return err + } + if count+numTokens > m.MaxUnsequencedRows { + return ErrTooManyUnsequencedRows + } + } + return nil +} + +// PutTokens implements quota.Manager.PutTokens. +// It's a noop for QuotaManager. +func (m *QuotaManager) PutTokens(ctx context.Context, numTokens int, specs []quota.Spec) error { + return nil +} + +// ResetQuota implements quota.Manager.ResetQuota. +// It's a noop for QuotaManager. +func (m *QuotaManager) ResetQuota(ctx context.Context, specs []quota.Spec) error { + return nil +} + +func (m *QuotaManager) countUnsequenced(ctx context.Context) (int, error) { + if m.UseSelectCount { + return countFromTable(ctx, m.DB) + } + return countFromInformationSchema(ctx, m.DB) +} + +func countFromInformationSchema(ctx context.Context, db *sql.DB) (int, error) { + // turn off statistics caching for MySQL 8 + if err := turnOffInformationSchemaCache(ctx, db); err != nil { + return 0, err + } + // information_schema.tables doesn't have an explicit PK, so let's play it safe and ensure + // the cursor returns a single row. + rows, err := db.QueryContext(ctx, countFromInformationSchemaQuery, "Unsequenced", "BASE TABLE") + if err != nil { + return 0, err + } + defer func() { + if err := rows.Close(); err != nil { + klog.Errorf("Close(): %v", err) + } + }() + if !rows.Next() { + return 0, errors.New("cursor has no rows after information_schema query") + } + var count int + if err := rows.Scan(&count); err != nil { + return 0, err + } + if rows.Next() { + return 0, errors.New("too many rows returned from information_schema query") + } + return count, nil +} + +func countFromTable(ctx context.Context, db *sql.DB) (int, error) { + var count int + if err := db.QueryRowContext(ctx, countFromUnsequencedQuery).Scan(&count); err != nil { + return 0, err + } + return count, nil +} + +// turnOffInformationSchemaCache turn off statistics caching for MySQL 8 +// To always retrieve the latest statistics directly from the storage engine and bypass cached values, set information_schema_stats_expiry to 0. +// See https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_information_schema_stats_expiry +// MySQL versions prior to 8 will fail safely. +func turnOffInformationSchemaCache(ctx context.Context, db *sql.DB) error { + opt := "information_schema_stats_expiry" + res := db.QueryRowContext(ctx, "SHOW VARIABLES LIKE '"+opt+"'") + var none string + var expiry int + + if err := res.Scan(&none, &expiry); err != nil { + // fail safely for all versions of MySQL prior to 8 + if errors.Is(err, sql.ErrNoRows) { + return nil + } + + return fmt.Errorf("failed to get variable %q: %v", opt, err) + } + + if expiry != 0 { + if _, err := db.ExecContext(ctx, "SET SESSION "+opt+"=0"); err != nil { + return fmt.Errorf("failed to set variable %q: %v", opt, err) + } + } + + return nil +} diff --git a/quota/mysqlqm/mysql_quota_test.go b/quota/mysqlqm/mysql_quota_test.go new file mode 100644 index 0000000000..dabfd131dd --- /dev/null +++ b/quota/mysqlqm/mysql_quota_test.go @@ -0,0 +1,325 @@ +// Copyright 2017 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysqlqm_test + +import ( + "context" + "crypto" + "database/sql" + "fmt" + "testing" + "time" + + "github.com/google/trillian" + "github.com/google/trillian/quota" + "github.com/google/trillian/quota/mysqlqm" + "github.com/google/trillian/storage" + "github.com/google/trillian/storage/mysql" + "github.com/google/trillian/storage/testdb" + "github.com/google/trillian/types" + + stestonly "github.com/google/trillian/storage/testonly" +) + +func TestQuotaManager_GetTokens(t *testing.T) { + testdb.SkipIfNoMySQL(t) + ctx := context.Background() + + db, done, err := testdb.NewTrillianDB(ctx, testdb.DriverMySQL) + if err != nil { + t.Fatalf("GetTestDB() returned err = %v", err) + } + defer done(ctx) + + tree, err := createTree(ctx, db) + if err != nil { + t.Fatalf("createTree() returned err = %v", err) + } + + tests := []struct { + desc string + unsequencedRows, maxUnsequencedRows, numTokens int + specs []quota.Spec + wantErr bool + }{ + { + desc: "globalWriteSingleToken", + unsequencedRows: 10, + maxUnsequencedRows: 20, + numTokens: 1, + specs: []quota.Spec{{Group: quota.Global, Kind: quota.Write}}, + }, + { + desc: "globalWriteMultiToken", + unsequencedRows: 10, + maxUnsequencedRows: 20, + numTokens: 5, + specs: []quota.Spec{{Group: quota.Global, Kind: quota.Write}}, + }, + { + desc: "globalWriteOverQuota1", + unsequencedRows: 20, + maxUnsequencedRows: 20, + numTokens: 1, + specs: []quota.Spec{{Group: quota.Global, Kind: quota.Write}}, + wantErr: true, + }, + { + desc: "globalWriteOverQuota2", + unsequencedRows: 15, + maxUnsequencedRows: 20, + numTokens: 10, + specs: []quota.Spec{{Group: quota.Global, Kind: quota.Write}}, + wantErr: true, + }, + { + desc: "unlimitedQuotas", + numTokens: 10, + specs: []quota.Spec{ + {Group: quota.User, Kind: quota.Read, User: "dylan"}, + {Group: quota.Tree, Kind: quota.Read, TreeID: tree.TreeId}, + {Group: quota.Global, Kind: quota.Read}, + {Group: quota.User, Kind: quota.Write, User: "dylan"}, + {Group: quota.Tree, Kind: quota.Write, TreeID: tree.TreeId}, + }, + }, + } + + for _, test := range tests { + if err := setUnsequencedRows(ctx, db, tree, test.unsequencedRows); err != nil { + t.Errorf("setUnsequencedRows() returned err = %v", err) + continue + } + + // Test general cases using select count(*) to avoid flakiness / allow for more + // precise assertions. + // See TestQuotaManager_GetTokens_InformationSchema for information schema tests. + qm := &mysqlqm.QuotaManager{DB: db, MaxUnsequencedRows: test.maxUnsequencedRows, UseSelectCount: true} + err := qm.GetTokens(ctx, test.numTokens, test.specs) + if hasErr := err == mysqlqm.ErrTooManyUnsequencedRows; hasErr != test.wantErr { + t.Errorf("%v: GetTokens() returned err = %q, wantErr = %v", test.desc, err, test.wantErr) + } + } +} + +func TestQuotaManager_GetTokens_InformationSchema(t *testing.T) { + testdb.SkipIfNoMySQL(t) + ctx := context.Background() + + maxUnsequenced := 20 + globalWriteSpec := []quota.Spec{{Group: quota.Global, Kind: quota.Write}} + + // Make both variants go through the test. + tests := []struct { + useSelectCount bool + }{ + {useSelectCount: true}, + {useSelectCount: false}, + } + for _, test := range tests { + desc := fmt.Sprintf("useSelectCount = %v", test.useSelectCount) + t.Run(desc, func(t *testing.T) { + db, done, err := testdb.NewTrillianDB(ctx, testdb.DriverMySQL) + if err != nil { + t.Fatalf("NewTrillianDB() returned err = %v", err) + } + defer done(ctx) + + tree, err := createTree(ctx, db) + if err != nil { + t.Fatalf("createTree() returned err = %v", err) + } + + qm := &mysqlqm.QuotaManager{DB: db, MaxUnsequencedRows: maxUnsequenced, UseSelectCount: test.useSelectCount} + + // All GetTokens() calls where leaves < maxUnsequenced should succeed: + // information_schema may be outdated, but it should refer to a valid point in the + // past. + for i := 0; i < maxUnsequenced-1; i++ { + if err := queueLeaves(ctx, db, tree, i /* firstID */, 1 /* num */); err != nil { + t.Fatalf("queueLeaves() returned err = %v", err) + } + if err := qm.GetTokens(ctx, 1 /* numTokens */, globalWriteSpec); err != nil { + t.Errorf("GetTokens() returned err = %v (%v leaves)", err, i+1) + } + } + + // Make leaves = maxUnsequenced + if err := queueLeaves(ctx, db, tree, maxUnsequenced-1 /* firstID */, 1 /* num */); err != nil { + t.Fatalf("queueLeaves() returned err = %v", err) + } + + // Allow some time for information_schema to "catch up". + stop := false + timeout := time.After(1 * time.Second) + for !stop { + select { + case <-timeout: + t.Errorf("timed out") + stop = true + default: + // An error means that GetTokens is working correctly + stop = qm.GetTokens(ctx, 1 /* numTokens */, globalWriteSpec) == mysqlqm.ErrTooManyUnsequencedRows + } + } + }) + } +} + +func TestQuotaManager_Noops(t *testing.T) { + testdb.SkipIfNoMySQL(t) + ctx := context.Background() + + db, done, err := testdb.NewTrillianDB(ctx, testdb.DriverMySQL) + if err != nil { + t.Fatalf("GetTestDB() returned err = %v", err) + } + defer done(ctx) + + qm := &mysqlqm.QuotaManager{DB: db, MaxUnsequencedRows: 1000} + specs := allSpecs(ctx, qm, 10 /* treeID */) + + tests := []struct { + desc string + fn func() error + }{ + { + desc: "PutTokens", + fn: func() error { + return qm.PutTokens(ctx, 10 /* numTokens */, specs) + }, + }, + { + desc: "ResetQuota", + fn: func() error { + return qm.ResetQuota(ctx, specs) + }, + }, + } + for _, test := range tests { + if err := test.fn(); err != nil { + t.Errorf("%v: got err = %v", test.desc, err) + } + } +} + +func allSpecs(_ context.Context, _ quota.Manager, treeID int64) []quota.Spec { + return []quota.Spec{ + {Group: quota.User, Kind: quota.Read, User: "florence"}, + {Group: quota.Tree, Kind: quota.Read, TreeID: treeID}, + {Group: quota.Global, Kind: quota.Read}, + {Group: quota.User, Kind: quota.Write, User: "florence"}, + {Group: quota.Tree, Kind: quota.Write, TreeID: treeID}, + {Group: quota.Global, Kind: quota.Write}, + } +} + +func countUnsequenced(ctx context.Context, db *sql.DB) (int, error) { + var count int + if err := db.QueryRowContext(ctx, "SELECT COUNT(*) FROM Unsequenced").Scan(&count); err != nil { + return 0, err + } + return count, nil +} + +func createTree(ctx context.Context, db *sql.DB) (*trillian.Tree, error) { + var tree *trillian.Tree + + { + as := mysql.NewAdminStorage(db) + err := as.ReadWriteTransaction(ctx, func(ctx context.Context, tx storage.AdminTX) error { + var err error + tree, err = tx.CreateTree(ctx, stestonly.LogTree) + return err + }) + if err != nil { + return nil, err + } + } + + { + ls := mysql.NewLogStorage(db, nil) + err := ls.ReadWriteTransaction(ctx, tree, func(ctx context.Context, tx storage.LogTreeTX) error { + logRoot, err := (&types.LogRootV1{RootHash: []byte{0}}).MarshalBinary() + if err != nil { + return err + } + slr := &trillian.SignedLogRoot{LogRoot: logRoot} + return tx.StoreSignedLogRoot(ctx, slr) + }) + if err != nil { + return nil, err + } + } + + return tree, nil +} + +func queueLeaves(ctx context.Context, db *sql.DB, tree *trillian.Tree, firstID, num int) error { + hasher := crypto.SHA256.New() + + leaves := []*trillian.LogLeaf{} + for i := 0; i < num; i++ { + value := []byte(fmt.Sprintf("leaf-%v", firstID+i)) + hasher.Reset() + if _, err := hasher.Write(value); err != nil { + return err + } + hash := hasher.Sum(nil) + leaves = append(leaves, &trillian.LogLeaf{ + MerkleLeafHash: hash, + LeafValue: value, + ExtraData: []byte("extra data"), + LeafIdentityHash: hash, + }) + } + + ls := mysql.NewLogStorage(db, nil) + _, err := ls.QueueLeaves(ctx, tree, leaves, time.Now()) + return err +} + +func setUnsequencedRows(ctx context.Context, db *sql.DB, tree *trillian.Tree, wantRows int) error { + count, err := countUnsequenced(ctx, db) + if err != nil { + return err + } + if count == wantRows { + return nil + } + + // Clear the tables and re-create leaves from scratch. It's easier than having to reason + // about duplicate entries. + if _, err := db.ExecContext(ctx, "DELETE FROM LeafData"); err != nil { + return err + } + if _, err := db.ExecContext(ctx, "DELETE FROM Unsequenced"); err != nil { + return err + } + if err := queueLeaves(ctx, db, tree, 0 /* firstID */, wantRows); err != nil { + return err + } + + // Sanity check the final count + count, err = countUnsequenced(ctx, db) + if err != nil { + return err + } + if count != wantRows { + return fmt.Errorf("got %v unsequenced rows, want = %v", count, wantRows) + } + + return nil +} diff --git a/quota/mysqlqm/quota_provider.go b/quota/mysqlqm/quota_provider.go new file mode 100644 index 0000000000..008f13b6a0 --- /dev/null +++ b/quota/mysqlqm/quota_provider.go @@ -0,0 +1,48 @@ +// Copyright 2018 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysqlqm + +import ( + "flag" + + "github.com/google/trillian/quota" + "github.com/google/trillian/storage/mysql" + "k8s.io/klog/v2" +) + +// QuotaManagerName identifies the MySQL quota implementation. +const QuotaManagerName = "mysql" + +var maxUnsequencedRows = flag.Int("max_unsequenced_rows", DefaultMaxUnsequenced, "Max number of unsequenced rows before rate limiting kicks in. "+ + "Only effective for quota_system=mysql.") + +func init() { + if err := quota.RegisterProvider(QuotaManagerName, newMySQLQuotaManager); err != nil { + klog.Fatalf("Failed to register quota manager %v: %v", QuotaManagerName, err) + } +} + +func newMySQLQuotaManager() (quota.Manager, error) { + db, err := mysql.GetDatabase() + if err != nil { + return nil, err + } + qm := &QuotaManager{ + DB: db, + MaxUnsequencedRows: *maxUnsequencedRows, + } + klog.Info("Using MySQL QuotaManager") + return qm, nil +} diff --git a/storage/mysql/admin_storage.go b/storage/mysql/admin_storage.go new file mode 100644 index 0000000000..0b267f6732 --- /dev/null +++ b/storage/mysql/admin_storage.go @@ -0,0 +1,479 @@ +// Copyright 2017 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "bytes" + "context" + "database/sql" + "encoding/gob" + "fmt" + "sync" + "time" + + "github.com/google/trillian" + "github.com/google/trillian/storage" + "github.com/google/trillian/storage/mysql/mysqlpb" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/timestamppb" + "k8s.io/klog/v2" +) + +const ( + defaultSequenceIntervalSeconds = 60 + + nonDeletedWhere = " WHERE (Deleted IS NULL OR Deleted = 'false')" + + selectTrees = ` + SELECT + TreeId, + TreeState, + TreeType, + HashStrategy, + HashAlgorithm, + SignatureAlgorithm, + DisplayName, + Description, + CreateTimeMillis, + UpdateTimeMillis, + PrivateKey, -- Unused + PublicKey, -- Used to store StorageSettings + MaxRootDurationMillis, + Deleted, + DeleteTimeMillis + FROM Trees` + selectNonDeletedTrees = selectTrees + nonDeletedWhere + selectTreeByID = selectTrees + " WHERE TreeId = ?" + + updateTreeSQL = `UPDATE Trees + SET TreeState = ?, TreeType = ?, DisplayName = ?, Description = ?, UpdateTimeMillis = ?, MaxRootDurationMillis = ?, PrivateKey = ? + WHERE TreeId = ?` +) + +// NewAdminStorage returns a MySQL storage.AdminStorage implementation backed by DB. +func NewAdminStorage(db *sql.DB) *mysqlAdminStorage { + return &mysqlAdminStorage{db} +} + +// mysqlAdminStorage implements storage.AdminStorage +type mysqlAdminStorage struct { + db *sql.DB +} + +func (s *mysqlAdminStorage) Snapshot(ctx context.Context) (storage.ReadOnlyAdminTX, error) { + return s.beginInternal(ctx) +} + +func (s *mysqlAdminStorage) beginInternal(ctx context.Context) (storage.AdminTX, error) { + tx, err := s.db.BeginTx(ctx, nil /* opts */) + if err != nil { + return nil, err + } + return &adminTX{tx: tx}, nil +} + +func (s *mysqlAdminStorage) ReadWriteTransaction(ctx context.Context, f storage.AdminTXFunc) error { + tx, err := s.beginInternal(ctx) + if err != nil { + return err + } + defer func() { + if err := tx.Close(); err != nil { + klog.Errorf("tx.Close(): %v", err) + } + }() + if err := f(ctx, tx); err != nil { + return err + } + return tx.Commit() +} + +func (s *mysqlAdminStorage) CheckDatabaseAccessible(ctx context.Context) error { + return s.db.PingContext(ctx) +} + +type adminTX struct { + tx *sql.Tx + + // mu guards reads/writes on closed, which happen on Commit/Close methods. + // + // We don't check closed on methods apart from the ones above, as we trust tx + // to keep tabs on its state, and hence fail to do queries after closed. + mu sync.RWMutex + closed bool +} + +func (t *adminTX) Commit() error { + t.mu.Lock() + defer t.mu.Unlock() + t.closed = true + return t.tx.Commit() +} + +func (t *adminTX) Close() error { + t.mu.Lock() + defer t.mu.Unlock() + if t.closed { + return nil + } + t.closed = true + return t.tx.Rollback() +} + +func (t *adminTX) GetTree(ctx context.Context, treeID int64) (*trillian.Tree, error) { + stmt, err := t.tx.PrepareContext(ctx, selectTreeByID) + if err != nil { + return nil, err + } + defer func() { + if err := stmt.Close(); err != nil { + klog.Errorf("stmt.Close(): %v", err) + } + }() + + // GetTree is an entry point for most RPCs, let's provide somewhat nicer error messages. + tree, err := readTree(stmt.QueryRowContext(ctx, treeID)) + switch { + case err == sql.ErrNoRows: + // ErrNoRows doesn't provide useful information, so we don't forward it. + return nil, status.Errorf(codes.NotFound, "tree %v not found", treeID) + case err != nil: + return nil, fmt.Errorf("error reading tree %v: %v", treeID, err) + } + return tree, nil +} + +func (t *adminTX) ListTrees(ctx context.Context, includeDeleted bool) ([]*trillian.Tree, error) { + var query string + if includeDeleted { + query = selectTrees + } else { + query = selectNonDeletedTrees + } + + stmt, err := t.tx.PrepareContext(ctx, query) + if err != nil { + return nil, err + } + defer func() { + if err := stmt.Close(); err != nil { + klog.Errorf("stmt.Close(): %v", err) + } + }() + rows, err := stmt.QueryContext(ctx) + if err != nil { + return nil, err + } + defer func() { + if err := rows.Close(); err != nil { + klog.Errorf("rows.Close(): %v", err) + } + }() + trees := []*trillian.Tree{} + for rows.Next() { + tree, err := readTree(rows) + if err != nil { + return nil, err + } + trees = append(trees, tree) + } + return trees, nil +} + +func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillian.Tree, error) { + if err := storage.ValidateTreeForCreation(ctx, tree); err != nil { + return nil, err + } + if err := validateStorageSettings(tree); err != nil { + return nil, err + } + + id, err := storage.NewTreeID() + if err != nil { + return nil, err + } + + // Use the time truncated-to-millis throughout, as that's what's stored. + nowMillis := toMillisSinceEpoch(time.Now()) + now := fromMillisSinceEpoch(nowMillis) + + newTree := proto.Clone(tree).(*trillian.Tree) + newTree.TreeId = id + newTree.CreateTime = timestamppb.New(now) + if err := newTree.CreateTime.CheckValid(); err != nil { + return nil, fmt.Errorf("failed to build create time: %w", err) + } + newTree.UpdateTime = timestamppb.New(now) + if err := newTree.UpdateTime.CheckValid(); err != nil { + return nil, fmt.Errorf("failed to build update time: %w", err) + } + if err := newTree.MaxRootDuration.CheckValid(); err != nil { + return nil, fmt.Errorf("could not parse MaxRootDuration: %w", err) + } + rootDuration := newTree.MaxRootDuration.AsDuration() + + // When creating a new tree we automatically add StorageSettings to allow us to + // determine that this tree can support newer storage features. When reading + // trees that do not have this StorageSettings populated, it must be assumed that + // the tree was created with the oldest settings. + // The gist of this code is super simple: create a new StorageSettings with the most + // modern defaults if the created tree does not have one, and then create a struct that + // represents this to store in the DB. Unfortunately because this involves anypb, struct + // copies, marshalling, and proper error handling this turns into a scary amount of code. + if tree.StorageSettings != nil { + newTree.StorageSettings = proto.Clone(tree.StorageSettings).(*anypb.Any) + } else { + o := &mysqlpb.StorageOptions{ + SubtreeRevisions: false, // Default behaviour for new trees is to skip writing subtree revisions. + } + a, err := anypb.New(o) + if err != nil { + return nil, fmt.Errorf("failed to create new StorageOptions: %v", err) + } + newTree.StorageSettings = a + } + o := &mysqlpb.StorageOptions{} + if err := anypb.UnmarshalTo(newTree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { + return nil, fmt.Errorf("failed to unmarshal StorageOptions: %v", err) + } + ss := storageSettings{ + Revisioned: o.SubtreeRevisions, + } + buff := &bytes.Buffer{} + enc := gob.NewEncoder(buff) + if err := enc.Encode(ss); err != nil { + return nil, fmt.Errorf("failed to encode storageSettings: %v", err) + } + + insertTreeStmt, err := t.tx.PrepareContext( + ctx, + `INSERT INTO Trees( + TreeId, + TreeState, + TreeType, + HashStrategy, + HashAlgorithm, + SignatureAlgorithm, + DisplayName, + Description, + CreateTimeMillis, + UpdateTimeMillis, + PrivateKey, -- Unused + PublicKey, -- Used to store StorageSettings + MaxRootDurationMillis) + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`) + if err != nil { + return nil, err + } + defer func() { + if err := insertTreeStmt.Close(); err != nil { + klog.Errorf("insertTreeStmt.Close(): %v", err) + } + }() + + _, err = insertTreeStmt.ExecContext( + ctx, + newTree.TreeId, + newTree.TreeState.String(), + newTree.TreeType.String(), + "RFC6962_SHA256", // Unused, filling in for backward compatibility. + "SHA256", // Unused, filling in for backward compatibility. + "ECDSA", // Unused, filling in for backward compatibility. + newTree.DisplayName, + newTree.Description, + nowMillis, + nowMillis, + []byte{}, // PrivateKey: Unused, filling in for backward compatibility. + buff.Bytes(), // Using the otherwise unused PublicKey for storing StorageSettings. + rootDuration/time.Millisecond, + ) + if err != nil { + return nil, err + } + + // MySQL silently truncates data when running in non-strict mode. + // We shouldn't be using non-strict modes, but let's guard against it + // anyway. + if _, err := t.GetTree(ctx, newTree.TreeId); err != nil { + // GetTree will fail for truncated enums (they get recorded as + // empty strings, which will not match any known value). + return nil, fmt.Errorf("enum truncated: %v", err) + } + + insertControlStmt, err := t.tx.PrepareContext( + ctx, + `INSERT INTO TreeControl( + TreeId, + SigningEnabled, + SequencingEnabled, + SequenceIntervalSeconds) + VALUES(?, ?, ?, ?)`) + if err != nil { + return nil, err + } + defer func() { + if err := insertControlStmt.Close(); err != nil { + klog.Errorf("insertControlStmt.Close(): %v", err) + } + }() + _, err = insertControlStmt.ExecContext( + ctx, + newTree.TreeId, + true, /* SigningEnabled */ + true, /* SequencingEnabled */ + defaultSequenceIntervalSeconds, + ) + if err != nil { + return nil, err + } + + return newTree, nil +} + +func (t *adminTX) UpdateTree(ctx context.Context, treeID int64, updateFunc func(*trillian.Tree)) (*trillian.Tree, error) { + tree, err := t.GetTree(ctx, treeID) + if err != nil { + return nil, err + } + + beforeUpdate := proto.Clone(tree).(*trillian.Tree) + updateFunc(tree) + if err := storage.ValidateTreeForUpdate(ctx, beforeUpdate, tree); err != nil { + return nil, err + } + if err := validateStorageSettings(tree); err != nil { + return nil, err + } + + // TODO(pavelkalinnikov): When switching TreeType from PREORDERED_LOG to LOG, + // ensure all entries in SequencedLeafData are integrated. + + // Use the time truncated-to-millis throughout, as that's what's stored. + nowMillis := toMillisSinceEpoch(time.Now()) + now := fromMillisSinceEpoch(nowMillis) + tree.UpdateTime = timestamppb.New(now) + if err != nil { + return nil, fmt.Errorf("failed to build update time: %v", err) + } + if err := tree.MaxRootDuration.CheckValid(); err != nil { + return nil, fmt.Errorf("could not parse MaxRootDuration: %w", err) + } + rootDuration := tree.MaxRootDuration.AsDuration() + + stmt, err := t.tx.PrepareContext(ctx, updateTreeSQL) + if err != nil { + return nil, err + } + defer func() { + if err := stmt.Close(); err != nil { + klog.Errorf("stmt.Close(): %v", err) + } + }() + + if _, err = stmt.ExecContext( + ctx, + tree.TreeState.String(), + tree.TreeType.String(), + tree.DisplayName, + tree.Description, + nowMillis, + rootDuration/time.Millisecond, + []byte{}, // PrivateKey: Unused, filling in for backward compatibility. + // PublicKey should not be updated with any storageSettings here without + // a lot of thought put into it. At the moment storageSettings are inferred + // when reading the tree, even if no value is stored in the database. + tree.TreeId); err != nil { + return nil, err + } + + return tree, nil +} + +func (t *adminTX) SoftDeleteTree(ctx context.Context, treeID int64) (*trillian.Tree, error) { + return t.updateDeleted(ctx, treeID, true /* deleted */, toMillisSinceEpoch(time.Now()) /* deleteTimeMillis */) +} + +func (t *adminTX) UndeleteTree(ctx context.Context, treeID int64) (*trillian.Tree, error) { + return t.updateDeleted(ctx, treeID, false /* deleted */, nil /* deleteTimeMillis */) +} + +// updateDeleted updates the Deleted and DeleteTimeMillis fields of the specified tree. +// deleteTimeMillis must be either an int64 (in millis since epoch) or nil. +func (t *adminTX) updateDeleted(ctx context.Context, treeID int64, deleted bool, deleteTimeMillis interface{}) (*trillian.Tree, error) { + if err := validateDeleted(ctx, t.tx, treeID, !deleted); err != nil { + return nil, err + } + if _, err := t.tx.ExecContext( + ctx, + "UPDATE Trees SET Deleted = ?, DeleteTimeMillis = ? WHERE TreeId = ?", + deleted, deleteTimeMillis, treeID); err != nil { + return nil, err + } + return t.GetTree(ctx, treeID) +} + +func (t *adminTX) HardDeleteTree(ctx context.Context, treeID int64) error { + if err := validateDeleted(ctx, t.tx, treeID, true /* wantDeleted */); err != nil { + return err + } + + // TreeControl didn't have "ON DELETE CASCADE" on previous versions, so let's hit it explicitly + if _, err := t.tx.ExecContext(ctx, "DELETE FROM TreeControl WHERE TreeId = ?", treeID); err != nil { + return err + } + _, err := t.tx.ExecContext(ctx, "DELETE FROM Trees WHERE TreeId = ?", treeID) + return err +} + +func validateDeleted(ctx context.Context, tx *sql.Tx, treeID int64, wantDeleted bool) error { + var nullDeleted sql.NullBool + switch err := tx.QueryRowContext(ctx, "SELECT Deleted FROM Trees WHERE TreeId = ?", treeID).Scan(&nullDeleted); { + case err == sql.ErrNoRows: + return status.Errorf(codes.NotFound, "tree %v not found", treeID) + case err != nil: + return err + } + + switch deleted := nullDeleted.Valid && nullDeleted.Bool; { + case wantDeleted && !deleted: + return status.Errorf(codes.FailedPrecondition, "tree %v is not soft deleted", treeID) + case !wantDeleted && deleted: + return status.Errorf(codes.FailedPrecondition, "tree %v already soft deleted", treeID) + } + return nil +} + +func validateStorageSettings(tree *trillian.Tree) error { + if tree.StorageSettings.MessageIs(&mysqlpb.StorageOptions{}) { + return nil + } + if tree.StorageSettings == nil { + // No storage settings is OK, we'll just use the defaults for new trees + return nil + } + return fmt.Errorf("storage_settings must be nil or mysqlpb.StorageOptions, but got %v", tree.StorageSettings) +} + +// storageSettings allows us to persist storage settings to the DB. +// It is a tempting trap to use protos for this, but the way they encode +// makes it impossible to tell the difference between no value ever written +// and a value that was written with the default values for each field. +// Using an explicit struct and gob encoding allows us to tell the difference. +type storageSettings struct { + Revisioned bool +} diff --git a/storage/mysql/admin_storage_test.go b/storage/mysql/admin_storage_test.go new file mode 100644 index 0000000000..d1a7cfd81d --- /dev/null +++ b/storage/mysql/admin_storage_test.go @@ -0,0 +1,342 @@ +// Copyright 2017 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "bytes" + "context" + "database/sql" + "encoding/gob" + "fmt" + "testing" + + "github.com/google/trillian" + "github.com/google/trillian/storage" + "github.com/google/trillian/storage/mysql/mysqlpb" + "github.com/google/trillian/storage/testonly" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" +) + +const selectTreeControlByID = "SELECT SigningEnabled, SequencingEnabled, SequenceIntervalSeconds FROM TreeControl WHERE TreeId = ?" + +func TestMysqlAdminStorage(t *testing.T) { + tester := &testonly.AdminStorageTester{NewAdminStorage: func() storage.AdminStorage { + cleanTestDB(DB) + return NewAdminStorage(DB) + }} + tester.RunAllTests(t) +} + +func TestAdminTX_CreateTree_InitializesStorageStructures(t *testing.T) { + cleanTestDB(DB) + s := NewAdminStorage(DB) + ctx := context.Background() + + tree, err := storage.CreateTree(ctx, s, testonly.LogTree) + if err != nil { + t.Fatalf("CreateTree() failed: %v", err) + } + + // Check if TreeControl is correctly written. + var signingEnabled, sequencingEnabled bool + var sequenceIntervalSeconds int + if err := DB.QueryRowContext(ctx, selectTreeControlByID, tree.TreeId).Scan(&signingEnabled, &sequencingEnabled, &sequenceIntervalSeconds); err != nil { + t.Fatalf("Failed to read TreeControl: %v", err) + } + // We don't mind about specific values, defaults change, but let's check + // that important numbers are not zeroed. + if sequenceIntervalSeconds <= 0 { + t.Errorf("sequenceIntervalSeconds = %v, want > 0", sequenceIntervalSeconds) + } +} + +func TestCreateTreeInvalidStates(t *testing.T) { + cleanTestDB(DB) + s := NewAdminStorage(DB) + ctx := context.Background() + + states := []trillian.TreeState{trillian.TreeState_DRAINING, trillian.TreeState_FROZEN} + + for _, state := range states { + inTree := proto.Clone(testonly.LogTree).(*trillian.Tree) + inTree.TreeState = state + if _, err := storage.CreateTree(ctx, s, inTree); err == nil { + t.Errorf("CreateTree() state: %v got: nil want: err", state) + } + } +} + +func TestAdminTX_TreeWithNulls(t *testing.T) { + cleanTestDB(DB) + s := NewAdminStorage(DB) + ctx := context.Background() + + // Setup: create a tree and set all nullable columns to null. + // Some columns have to be manually updated, as it's not possible to set + // some proto fields to nil. + tree, err := storage.CreateTree(ctx, s, testonly.LogTree) + if err != nil { + t.Fatalf("CreateTree() failed: %v", err) + } + treeID := tree.TreeId + + if err := setNulls(ctx, DB, treeID); err != nil { + t.Fatalf("setNulls() = %v, want = nil", err) + } + + tests := []struct { + desc string + fn storage.AdminTXFunc + }{ + { + desc: "GetTree", + fn: func(ctx context.Context, tx storage.AdminTX) error { + _, err := tx.GetTree(ctx, treeID) + return err + }, + }, + { + desc: "ListTrees", + fn: func(ctx context.Context, tx storage.AdminTX) error { + trees, err := tx.ListTrees(ctx, false /* includeDeleted */) + if err != nil { + return err + } + for _, tree := range trees { + if tree.TreeId == treeID { + return nil + } + } + return fmt.Errorf("ID not found: %v", treeID) + }, + }, + } + for _, test := range tests { + if err := s.ReadWriteTransaction(ctx, test.fn); err != nil { + t.Errorf("%v: err = %v, want = nil", test.desc, err) + } + } +} + +func TestAdminTX_StorageSettings(t *testing.T) { + cleanTestDB(DB) + s := NewAdminStorage(DB) + ctx := context.Background() + + badSettings, err := anypb.New(&trillian.Tree{}) + if err != nil { + t.Fatalf("Error marshaling proto: %v", err) + } + goodSettings, err := anypb.New(&mysqlpb.StorageOptions{}) + if err != nil { + t.Fatalf("Error marshaling proto: %v", err) + } + + tests := []struct { + desc string + // fn attempts to either create or update a tree with a non-nil, valid Any proto + // on Tree.StorageSettings. It's expected to return an error. + fn func(storage.AdminStorage) error + wantErr bool + }{ + { + desc: "CreateTree Bad Settings", + fn: func(s storage.AdminStorage) error { + tree := proto.Clone(testonly.LogTree).(*trillian.Tree) + tree.StorageSettings = badSettings + _, err := storage.CreateTree(ctx, s, tree) + return err + }, + wantErr: true, + }, + { + desc: "CreateTree nil Settings", + fn: func(s storage.AdminStorage) error { + tree := proto.Clone(testonly.LogTree).(*trillian.Tree) + tree.StorageSettings = nil + _, err := storage.CreateTree(ctx, s, tree) + return err + }, + wantErr: false, + }, + { + desc: "CreateTree StorageOptions Settings", + fn: func(s storage.AdminStorage) error { + tree := proto.Clone(testonly.LogTree).(*trillian.Tree) + tree.StorageSettings = goodSettings + _, err := storage.CreateTree(ctx, s, tree) + return err + }, + wantErr: false, + }, + { + desc: "UpdateTree", + fn: func(s storage.AdminStorage) error { + tree, err := storage.CreateTree(ctx, s, testonly.LogTree) + if err != nil { + t.Fatalf("CreateTree() failed with err = %v", err) + } + _, err = storage.UpdateTree(ctx, s, tree.TreeId, func(tree *trillian.Tree) { tree.StorageSettings = badSettings }) + return err + }, + wantErr: true, + }, + } + for _, test := range tests { + if err := test.fn(s); (err != nil) != test.wantErr { + t.Errorf("err: %v, wantErr = %v", err, test.wantErr) + } + } +} + +// Test reading variants of trees that could have been created by old versions +// of Trillian to check we infer the correct storage options. +func TestAdminTX_GetTreeLegacies(t *testing.T) { + cleanTestDB(DB) + s := NewAdminStorage(DB) + ctx := context.Background() + + serializedStorageSettings := func(revisioned bool) []byte { + ss := storageSettings{ + Revisioned: revisioned, + } + buff := &bytes.Buffer{} + enc := gob.NewEncoder(buff) + if err := enc.Encode(ss); err != nil { + t.Fatalf("failed to encode storageSettings: %v", err) + } + return buff.Bytes() + } + tests := []struct { + desc string + key []byte + wantRevisioned bool + }{ + { + desc: "No data", + key: []byte{}, + wantRevisioned: true, + }, + { + desc: "Public key", + key: []byte("trustmethatthisisapublickey"), + wantRevisioned: true, + }, + { + desc: "StorageOptions revisioned", + key: serializedStorageSettings(true), + wantRevisioned: true, + }, + { + desc: "StorageOptions revisionless", + key: serializedStorageSettings(false), + wantRevisioned: false, + }, + } + for _, tC := range tests { + // Create a tree with default settings, and then reach into the DB to override + // whatever was written into the persisted settings to align with the test case. + tree, err := storage.CreateTree(ctx, s, testonly.LogTree) + if err != nil { + t.Fatal(err) + } + // We are reaching really into the internals here, but it's the only way to set up + // archival state. Going through the Create/Update methods will change the storage + // options. + tx, err := s.db.BeginTx(ctx, nil /* opts */) + if err != nil { + t.Fatal(err) + } + if _, err := tx.Exec("UPDATE Trees SET PublicKey = ? WHERE TreeId = ?", tC.key, tree.TreeId); err != nil { + t.Fatal(err) + } + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + readTree, err := storage.GetTree(ctx, s, tree.TreeId) + if err != nil { + t.Fatal(err) + } + o := &mysqlpb.StorageOptions{} + if err := anypb.UnmarshalTo(readTree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { + t.Fatal(err) + } + if got, want := o.SubtreeRevisions, tC.wantRevisioned; got != want { + t.Errorf("%s SubtreeRevisions: got %t, wanted %t", tC.desc, got, want) + } + } +} + +func TestAdminTX_HardDeleteTree(t *testing.T) { + cleanTestDB(DB) + s := NewAdminStorage(DB) + ctx := context.Background() + + tree, err := storage.CreateTree(ctx, s, testonly.LogTree) + if err != nil { + t.Fatalf("CreateTree() returned err = %v", err) + } + + if err := s.ReadWriteTransaction(ctx, func(ctx context.Context, tx storage.AdminTX) error { + if _, err := tx.SoftDeleteTree(ctx, tree.TreeId); err != nil { + return err + } + return tx.HardDeleteTree(ctx, tree.TreeId) + }); err != nil { + t.Fatalf("ReadWriteTransaction() returned err = %v", err) + } + + // Unlike the HardDelete tests on AdminStorageTester, here we have the chance to poke inside the + // database and check that the rows are gone, so let's do just that. + // If there's no record on Trees, then there can be no record in any of the dependent tables. + var name string + if err := DB.QueryRowContext(ctx, "SELECT DisplayName FROM Trees WHERE TreeId = ?", tree.TreeId).Scan(&name); err != sql.ErrNoRows { + t.Errorf("QueryRowContext() returned err = %v, want = %v", err, sql.ErrNoRows) + } +} + +func TestCheckDatabaseAccessible_Fails(t *testing.T) { + ctx := context.Background() + + // Pass in a closed database to provoke a failure. + db, done := openTestDBOrDie() + cleanTestDB(db) + s := NewAdminStorage(db) + done(ctx) + + if err := s.CheckDatabaseAccessible(ctx); err == nil { + t.Error("TestCheckDatabaseAccessible_Fails got: nil, want: err") + } +} + +func TestCheckDatabaseAccessible_OK(t *testing.T) { + cleanTestDB(DB) + s := NewAdminStorage(DB) + ctx := context.Background() + if err := s.CheckDatabaseAccessible(ctx); err != nil { + t.Errorf("TestCheckDatabaseAccessible_OK got: %v, want: nil", err) + } +} + +func setNulls(ctx context.Context, db *sql.DB, treeID int64) error { + stmt, err := db.PrepareContext(ctx, "UPDATE Trees SET DisplayName = NULL, Description = NULL WHERE TreeId = ?") + if err != nil { + return err + } + defer func() { _ = stmt.Close() }() + _, err = stmt.ExecContext(ctx, treeID) + return err +} diff --git a/storage/mysql/drop_storage.sql b/storage/mysql/drop_storage.sql new file mode 100644 index 0000000000..6e407b5d40 --- /dev/null +++ b/storage/mysql/drop_storage.sql @@ -0,0 +1,9 @@ +-- Caution - this removes all tables in our schema + +DROP TABLE IF EXISTS Unsequenced; +DROP TABLE IF EXISTS Subtree; +DROP TABLE IF EXISTS SequencedLeafData; +DROP TABLE IF EXISTS TreeHead; +DROP TABLE IF EXISTS LeafData; +DROP TABLE IF EXISTS TreeControl; +DROP TABLE IF EXISTS Trees; diff --git a/storage/mysql/errors.go b/storage/mysql/errors.go new file mode 100644 index 0000000000..e457fa40fc --- /dev/null +++ b/storage/mysql/errors.go @@ -0,0 +1,50 @@ +// Copyright 2021 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "github.com/go-sql-driver/mysql" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +const ( + // ER_DUP_ENTRY: Error returned by driver when inserting a duplicate row. + errNumDuplicate = 1062 + // ER_LOCK_DEADLOCK: Error returned when there was a deadlock. + errNumDeadlock = 1213 +) + +// mysqlToGRPC converts some types of MySQL errors to GRPC errors. This gives +// clients more signal when the operation can be retried. +func mysqlToGRPC(err error) error { + mysqlErr, ok := err.(*mysql.MySQLError) + if !ok { + return err + } + if mysqlErr.Number == errNumDeadlock { + return status.Errorf(codes.Aborted, "MySQL: %v", mysqlErr) + } + return err +} + +func isDuplicateErr(err error) bool { + switch err := err.(type) { + case *mysql.MySQLError: + return err.Number == errNumDuplicate + default: + return false + } +} diff --git a/storage/mysql/log_storage.go b/storage/mysql/log_storage.go new file mode 100644 index 0000000000..b3f10f7818 --- /dev/null +++ b/storage/mysql/log_storage.go @@ -0,0 +1,857 @@ +// Copyright 2016 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "bytes" + "context" + "database/sql" + "errors" + "fmt" + "sort" + "strconv" + "sync" + "time" + + "github.com/google/trillian" + "github.com/google/trillian/monitoring" + "github.com/google/trillian/storage" + "github.com/google/trillian/storage/cache" + "github.com/google/trillian/storage/tree" + "github.com/google/trillian/types" + "github.com/transparency-dev/merkle/compact" + "github.com/transparency-dev/merkle/rfc6962" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/types/known/timestamppb" + "k8s.io/klog/v2" +) + +const ( + valuesPlaceholder5 = "(?,?,?,?,?)" + + insertLeafDataSQL = "INSERT INTO LeafData(TreeId,LeafIdentityHash,LeafValue,ExtraData,QueueTimestampNanos) VALUES" + valuesPlaceholder5 + insertSequencedLeafSQL = "INSERT INTO SequencedLeafData(TreeId,LeafIdentityHash,MerkleLeafHash,SequenceNumber,IntegrateTimestampNanos) VALUES" + + selectNonDeletedTreeIDByTypeAndStateSQL = ` + SELECT TreeId FROM Trees + WHERE TreeType IN(?,?) + AND TreeState IN(?,?) + AND (Deleted IS NULL OR Deleted = 'false')` + + selectLatestSignedLogRootSQL = `SELECT TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature + FROM TreeHead WHERE TreeId=? + ORDER BY TreeHeadTimestamp DESC LIMIT 1` + + selectLeavesByRangeSQL = `SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos + FROM LeafData l,SequencedLeafData s + WHERE l.LeafIdentityHash = s.LeafIdentityHash + AND s.SequenceNumber >= ? AND s.SequenceNumber < ? AND l.TreeId = ? AND s.TreeId = l.TreeId` + orderBySequenceNumberSQL + + // These statements need to be expanded to provide the correct number of parameter placeholders. + selectLeavesByMerkleHashSQL = `SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos + FROM LeafData l,SequencedLeafData s + WHERE l.LeafIdentityHash = s.LeafIdentityHash + AND s.MerkleLeafHash IN (` + placeholderSQL + `) AND l.TreeId = ? AND s.TreeId = l.TreeId` + // TODO(#1548): rework the code so the dummy hash isn't needed (e.g. this assumes hash size is 32) + dummyMerkleLeafHash = "00000000000000000000000000000000" + // This statement returns a dummy Merkle leaf hash value (which must be + // of the right size) so that its signature matches that of the other + // leaf-selection statements. + selectLeavesByLeafIdentityHashSQL = `SELECT '` + dummyMerkleLeafHash + `',l.LeafIdentityHash,l.LeafValue,-1,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos + FROM LeafData l LEFT JOIN SequencedLeafData s ON (l.LeafIdentityHash = s.LeafIdentityHash AND l.TreeID = s.TreeID) + WHERE l.LeafIdentityHash IN (` + placeholderSQL + `) AND l.TreeId = ?` + + // Same as above except with leaves ordered by sequence so we only incur this cost when necessary + orderBySequenceNumberSQL = " ORDER BY s.SequenceNumber" + selectLeavesByMerkleHashOrderedBySequenceSQL = selectLeavesByMerkleHashSQL + orderBySequenceNumberSQL + + logIDLabel = "logid" +) + +var ( + once sync.Once + queuedCounter monitoring.Counter + queuedDupCounter monitoring.Counter + dequeuedCounter monitoring.Counter + + queueLatency monitoring.Histogram + queueInsertLatency monitoring.Histogram + queueReadLatency monitoring.Histogram + queueInsertLeafLatency monitoring.Histogram + queueInsertEntryLatency monitoring.Histogram + dequeueLatency monitoring.Histogram + dequeueSelectLatency monitoring.Histogram + dequeueRemoveLatency monitoring.Histogram +) + +func createMetrics(mf monitoring.MetricFactory) { + queuedCounter = mf.NewCounter("mysql_queued_leaves", "Number of leaves queued", logIDLabel) + queuedDupCounter = mf.NewCounter("mysql_queued_dup_leaves", "Number of duplicate leaves queued", logIDLabel) + dequeuedCounter = mf.NewCounter("mysql_dequeued_leaves", "Number of leaves dequeued", logIDLabel) + + queueLatency = mf.NewHistogram("mysql_queue_leaves_latency", "Latency of queue leaves operation in seconds", logIDLabel) + queueInsertLatency = mf.NewHistogram("mysql_queue_leaves_latency_insert", "Latency of insertion part of queue leaves operation in seconds", logIDLabel) + queueReadLatency = mf.NewHistogram("mysql_queue_leaves_latency_read_dups", "Latency of read-duplicates part of queue leaves operation in seconds", logIDLabel) + queueInsertLeafLatency = mf.NewHistogram("mysql_queue_leaf_latency_leaf", "Latency of insert-leaf part of queue (single) leaf operation in seconds", logIDLabel) + queueInsertEntryLatency = mf.NewHistogram("mysql_queue_leaf_latency_entry", "Latency of insert-entry part of queue (single) leaf operation in seconds", logIDLabel) + + dequeueLatency = mf.NewHistogram("mysql_dequeue_leaves_latency", "Latency of dequeue leaves operation in seconds", logIDLabel) + dequeueSelectLatency = mf.NewHistogram("mysql_dequeue_leaves_latency_select", "Latency of selection part of dequeue leaves operation in seconds", logIDLabel) + dequeueRemoveLatency = mf.NewHistogram("mysql_dequeue_leaves_latency_remove", "Latency of removal part of dequeue leaves operation in seconds", logIDLabel) +} + +func labelForTX(t *logTreeTX) string { + return strconv.FormatInt(t.treeID, 10) +} + +func observe(hist monitoring.Histogram, duration time.Duration, label string) { + hist.Observe(duration.Seconds(), label) +} + +type mySQLLogStorage struct { + *mySQLTreeStorage + admin storage.AdminStorage + metricFactory monitoring.MetricFactory +} + +// NewLogStorage creates a storage.LogStorage instance for the specified MySQL URL. +// It assumes storage.AdminStorage is backed by the same MySQL database as well. +func NewLogStorage(db *sql.DB, mf monitoring.MetricFactory) storage.LogStorage { + if mf == nil { + mf = monitoring.InertMetricFactory{} + } + return &mySQLLogStorage{ + admin: NewAdminStorage(db), + mySQLTreeStorage: newTreeStorage(db), + metricFactory: mf, + } +} + +func (m *mySQLLogStorage) CheckDatabaseAccessible(ctx context.Context) error { + return m.db.PingContext(ctx) +} + +func (m *mySQLLogStorage) getLeavesByMerkleHashStmt(ctx context.Context, num int, orderBySequence bool) (*sql.Stmt, error) { + if orderBySequence { + return m.getStmt(ctx, selectLeavesByMerkleHashOrderedBySequenceSQL, num, "?", "?") + } + + return m.getStmt(ctx, selectLeavesByMerkleHashSQL, num, "?", "?") +} + +func (m *mySQLLogStorage) getLeavesByLeafIdentityHashStmt(ctx context.Context, num int) (*sql.Stmt, error) { + return m.getStmt(ctx, selectLeavesByLeafIdentityHashSQL, num, "?", "?") +} + +func (m *mySQLLogStorage) GetActiveLogIDs(ctx context.Context) ([]int64, error) { + // Include logs that are DRAINING in the active list as we're still + // integrating leaves into them. + rows, err := m.db.QueryContext( + ctx, selectNonDeletedTreeIDByTypeAndStateSQL, + trillian.TreeType_LOG.String(), trillian.TreeType_PREORDERED_LOG.String(), + trillian.TreeState_ACTIVE.String(), trillian.TreeState_DRAINING.String()) + if err != nil { + return nil, err + } + defer func() { + if err := rows.Close(); err != nil { + klog.Errorf("rows.Close(): %v", err) + } + }() + ids := []int64{} + for rows.Next() { + var treeID int64 + if err := rows.Scan(&treeID); err != nil { + return nil, err + } + ids = append(ids, treeID) + } + return ids, rows.Err() +} + +func (m *mySQLLogStorage) beginInternal(ctx context.Context, tree *trillian.Tree) (*logTreeTX, error) { + once.Do(func() { + createMetrics(m.metricFactory) + }) + + stCache := cache.NewLogSubtreeCache(rfc6962.DefaultHasher) + ttx, err := m.beginTreeTx(ctx, tree, rfc6962.DefaultHasher.Size(), stCache) + if err != nil && err != storage.ErrTreeNeedsInit { + return nil, err + } + + ltx := &logTreeTX{ + treeTX: ttx, + ls: m, + dequeued: make(map[string]dequeuedLeaf), + } + ltx.slr, ltx.readRev, err = ltx.fetchLatestRoot(ctx) + if err == storage.ErrTreeNeedsInit { + ltx.treeTX.writeRevision = 0 + return ltx, err + } else if err != nil { + if err := ttx.Close(); err != nil { + klog.Errorf("ttx.Close(): %v", err) + } + return nil, err + } + + if err := ltx.root.UnmarshalBinary(ltx.slr.LogRoot); err != nil { + if err := ttx.Close(); err != nil { + klog.Errorf("ttx.Close(): %v", err) + } + return nil, err + } + + ltx.treeTX.writeRevision = ltx.readRev + 1 + return ltx, nil +} + +// TODO(pavelkalinnikov): This and many other methods of this storage +// implementation can leak a specific sql.ErrTxDone all the way to the client, +// if the transaction is rolled back as a result of a canceled context. It must +// return "generic" errors, and only log the specific ones for debugging. +func (m *mySQLLogStorage) ReadWriteTransaction(ctx context.Context, tree *trillian.Tree, f storage.LogTXFunc) error { + tx, err := m.beginInternal(ctx, tree) + if err != nil && err != storage.ErrTreeNeedsInit { + return err + } + defer func() { + if err := tx.Close(); err != nil { + klog.Errorf("tx.Close(): %v", err) + } + }() + if err := f(ctx, tx); err != nil { + return err + } + return tx.Commit(ctx) +} + +func (m *mySQLLogStorage) AddSequencedLeaves(ctx context.Context, tree *trillian.Tree, leaves []*trillian.LogLeaf, timestamp time.Time) ([]*trillian.QueuedLogLeaf, error) { + tx, err := m.beginInternal(ctx, tree) + if tx != nil { + // Ensure we don't leak the transaction. For example if we get an + // ErrTreeNeedsInit from beginInternal() or if AddSequencedLeaves fails + // below. + defer func() { + if err := tx.Close(); err != nil { + klog.Errorf("tx.Close(): %v", err) + } + }() + } + if err != nil { + return nil, err + } + res, err := tx.AddSequencedLeaves(ctx, leaves, timestamp) + if err != nil { + return nil, err + } + if err := tx.Commit(ctx); err != nil { + return nil, err + } + return res, nil +} + +func (m *mySQLLogStorage) SnapshotForTree(ctx context.Context, tree *trillian.Tree) (storage.ReadOnlyLogTreeTX, error) { + tx, err := m.beginInternal(ctx, tree) + if err != nil && err != storage.ErrTreeNeedsInit { + return nil, err + } + return tx, err +} + +func (m *mySQLLogStorage) QueueLeaves(ctx context.Context, tree *trillian.Tree, leaves []*trillian.LogLeaf, queueTimestamp time.Time) ([]*trillian.QueuedLogLeaf, error) { + tx, err := m.beginInternal(ctx, tree) + if tx != nil { + // Ensure we don't leak the transaction. For example if we get an + // ErrTreeNeedsInit from beginInternal() or if QueueLeaves fails + // below. + defer func() { + if err := tx.Close(); err != nil { + klog.Errorf("tx.Close(): %v", err) + } + }() + } + if err != nil { + return nil, err + } + existing, err := tx.QueueLeaves(ctx, leaves, queueTimestamp) + if err != nil { + return nil, err + } + + if err := tx.Commit(ctx); err != nil { + return nil, err + } + + ret := make([]*trillian.QueuedLogLeaf, len(leaves)) + for i, e := range existing { + if e != nil { + ret[i] = &trillian.QueuedLogLeaf{ + Leaf: e, + Status: status.Newf(codes.AlreadyExists, "leaf already exists: %v", e.LeafIdentityHash).Proto(), + } + continue + } + ret[i] = &trillian.QueuedLogLeaf{Leaf: leaves[i]} + } + return ret, nil +} + +type logTreeTX struct { + treeTX + ls *mySQLLogStorage + root types.LogRootV1 + readRev int64 + slr *trillian.SignedLogRoot + dequeued map[string]dequeuedLeaf +} + +// GetMerkleNodes returns the requested nodes at the read revision. +func (t *logTreeTX) GetMerkleNodes(ctx context.Context, ids []compact.NodeID) ([]tree.Node, error) { + t.treeTX.mu.Lock() + defer t.treeTX.mu.Unlock() + return t.subtreeCache.GetNodes(ids, t.getSubtreesAtRev(ctx, t.readRev)) +} + +func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime time.Time) ([]*trillian.LogLeaf, error) { + t.treeTX.mu.Lock() + defer t.treeTX.mu.Unlock() + + if t.treeType == trillian.TreeType_PREORDERED_LOG { + // TODO(pavelkalinnikov): Optimize this by fetching only the required + // fields of LogLeaf. We can avoid joining with LeafData table here. + return t.getLeavesByRangeInternal(ctx, int64(t.root.TreeSize), int64(limit)) + } + + start := time.Now() + stx, err := t.tx.PrepareContext(ctx, selectQueuedLeavesSQL) + if err != nil { + klog.Warningf("Failed to prepare dequeue select: %s", err) + return nil, err + } + defer func() { + if err := stx.Close(); err != nil { + klog.Errorf("stx.Close(): %v", err) + } + }() + + leaves := make([]*trillian.LogLeaf, 0, limit) + rows, err := stx.QueryContext(ctx, t.treeID, cutoffTime.UnixNano(), limit) + if err != nil { + klog.Warningf("Failed to select rows for work: %s", err) + return nil, err + } + defer func() { + if err := rows.Close(); err != nil { + klog.Errorf("rows.Close(): %v", err) + } + }() + + for rows.Next() { + leaf, dqInfo, err := t.dequeueLeaf(rows) + if err != nil { + klog.Warningf("Error dequeuing leaf: %v", err) + return nil, err + } + + if len(leaf.LeafIdentityHash) != t.hashSizeBytes { + return nil, errors.New("dequeued a leaf with incorrect hash size") + } + + k := string(leaf.LeafIdentityHash) + if _, ok := t.dequeued[k]; ok { + // dupe, user probably called DequeueLeaves more than once. + continue + } + t.dequeued[k] = dqInfo + leaves = append(leaves, leaf) + } + + if rows.Err() != nil { + return nil, rows.Err() + } + label := labelForTX(t) + observe(dequeueSelectLatency, time.Since(start), label) + observe(dequeueLatency, time.Since(start), label) + dequeuedCounter.Add(float64(len(leaves)), label) + + return leaves, nil +} + +// sortLeavesForInsert returns a slice containing the passed in leaves sorted +// by LeafIdentityHash, and paired with their original positions. +// QueueLeaves and AddSequencedLeaves use this to make the order that LeafData +// row locks are acquired deterministic and reduce the chance of deadlocks. +func sortLeavesForInsert(leaves []*trillian.LogLeaf) []leafAndPosition { + ordLeaves := make([]leafAndPosition, len(leaves)) + for i, leaf := range leaves { + ordLeaves[i] = leafAndPosition{leaf: leaf, idx: i} + } + sort.Sort(byLeafIdentityHashWithPosition(ordLeaves)) + return ordLeaves +} + +func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, queueTimestamp time.Time) ([]*trillian.LogLeaf, error) { + t.treeTX.mu.Lock() + defer t.treeTX.mu.Unlock() + + // Don't accept batches if any of the leaves are invalid. + for _, leaf := range leaves { + if len(leaf.LeafIdentityHash) != t.hashSizeBytes { + return nil, fmt.Errorf("queued leaf must have a leaf ID hash of length %d", t.hashSizeBytes) + } + leaf.QueueTimestamp = timestamppb.New(queueTimestamp) + if err := leaf.QueueTimestamp.CheckValid(); err != nil { + return nil, fmt.Errorf("got invalid queue timestamp: %w", err) + } + } + start := time.Now() + label := labelForTX(t) + + ordLeaves := sortLeavesForInsert(leaves) + existingCount := 0 + existingLeaves := make([]*trillian.LogLeaf, len(leaves)) + + for _, ol := range ordLeaves { + i, leaf := ol.idx, ol.leaf + + leafStart := time.Now() + if err := leaf.QueueTimestamp.CheckValid(); err != nil { + return nil, fmt.Errorf("got invalid queue timestamp: %w", err) + } + qTimestamp := leaf.QueueTimestamp.AsTime() + _, err := t.tx.ExecContext(ctx, insertLeafDataSQL, t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, qTimestamp.UnixNano()) + insertDuration := time.Since(leafStart) + observe(queueInsertLeafLatency, insertDuration, label) + if isDuplicateErr(err) { + // Remember the duplicate leaf, using the requested leaf for now. + existingLeaves[i] = leaf + existingCount++ + queuedDupCounter.Inc(label) + continue + } + if err != nil { + klog.Warningf("Error inserting %d into LeafData: %s", i, err) + return nil, mysqlToGRPC(err) + } + + // Create the work queue entry + args := []interface{}{ + t.treeID, + leaf.LeafIdentityHash, + leaf.MerkleLeafHash, + } + args = append(args, queueArgs(t.treeID, leaf.LeafIdentityHash, qTimestamp)...) + _, err = t.tx.ExecContext( + ctx, + insertUnsequencedEntrySQL, + args..., + ) + if err != nil { + klog.Warningf("Error inserting into Unsequenced: %s", err) + return nil, mysqlToGRPC(err) + } + leafDuration := time.Since(leafStart) + observe(queueInsertEntryLatency, (leafDuration - insertDuration), label) + } + insertDuration := time.Since(start) + observe(queueInsertLatency, insertDuration, label) + queuedCounter.Add(float64(len(leaves)), label) + + if existingCount == 0 { + return existingLeaves, nil + } + + // For existing leaves, we need to retrieve the contents. First collate the desired LeafIdentityHash values + // We deduplicate the hashes to address https://github.com/google/trillian/issues/3603 but will be mapped + // back to the existingLeaves slice below + uniqueLeafMap := make(map[string]struct{}, len(existingLeaves)) + var toRetrieve [][]byte + for _, existing := range existingLeaves { + if existing != nil { + key := string(existing.LeafIdentityHash) + if _, ok := uniqueLeafMap[key]; !ok { + uniqueLeafMap[key] = struct{}{} + toRetrieve = append(toRetrieve, existing.LeafIdentityHash) + } + } + } + results, err := t.getLeafDataByIdentityHash(ctx, toRetrieve) + if err != nil { + return nil, fmt.Errorf("failed to retrieve existing leaves: %v", err) + } + if len(results) != len(toRetrieve) { + return nil, fmt.Errorf("failed to retrieve all existing leaves: got %d, want %d", len(results), len(toRetrieve)) + } + // Replace the requested leaves with the actual leaves. + for i, requested := range existingLeaves { + if requested == nil { + continue + } + found := false + for _, result := range results { + if bytes.Equal(result.LeafIdentityHash, requested.LeafIdentityHash) { + existingLeaves[i] = result + found = true + break + } + } + if !found { + return nil, fmt.Errorf("failed to find existing leaf for hash %x", requested.LeafIdentityHash) + } + } + totalDuration := time.Since(start) + readDuration := totalDuration - insertDuration + observe(queueReadLatency, readDuration, label) + observe(queueLatency, totalDuration, label) + + return existingLeaves, nil +} + +func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf, timestamp time.Time) ([]*trillian.QueuedLogLeaf, error) { + t.treeTX.mu.Lock() + defer t.treeTX.mu.Unlock() + + res := make([]*trillian.QueuedLogLeaf, len(leaves)) + ok := status.New(codes.OK, "OK").Proto() + + // Leaves in this transaction are inserted in two tables. For each leaf, if + // one of the two inserts fails, we remove the side effect by rolling back to + // a savepoint installed before the first insert of the two. + const savepoint = "SAVEPOINT AddSequencedLeaves" + if _, err := t.tx.ExecContext(ctx, savepoint); err != nil { + klog.Errorf("Error adding savepoint: %s", err) + return nil, mysqlToGRPC(err) + } + // TODO(pavelkalinnikov): Consider performance implication of executing this + // extra SAVEPOINT, especially for 1-entry batches. Optimize if necessary. + + // Note: LeafData inserts are presumably protected from deadlocks due to + // sorting, but the order of the corresponding SequencedLeafData inserts + // becomes indeterministic. However, in a typical case when leaves are + // supplied in contiguous non-intersecting batches, the chance of having + // circular dependencies between transactions is significantly lower. + ordLeaves := sortLeavesForInsert(leaves) + for _, ol := range ordLeaves { + i, leaf := ol.idx, ol.leaf + + // This should fail on insert, but catch it early. + if got, want := len(leaf.LeafIdentityHash), t.hashSizeBytes; got != want { + return nil, status.Errorf(codes.FailedPrecondition, "leaves[%d] has incorrect hash size %d, want %d", i, got, want) + } + + if _, err := t.tx.ExecContext(ctx, savepoint); err != nil { + klog.Errorf("Error updating savepoint: %s", err) + return nil, mysqlToGRPC(err) + } + + res[i] = &trillian.QueuedLogLeaf{Status: ok} + + // TODO(pavelkalinnikov): Measure latencies. + _, err := t.tx.ExecContext(ctx, insertLeafDataSQL, + t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, timestamp.UnixNano()) + // TODO(pavelkalinnikov): Detach PREORDERED_LOG integration latency metric. + + // TODO(pavelkalinnikov): Support opting out from duplicates detection. + if isDuplicateErr(err) { + res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIdentityHash").Proto() + // Note: No rolling back to savepoint because there is no side effect. + continue + } else if err != nil { + klog.Errorf("Error inserting leaves[%d] into LeafData: %s", i, err) + return nil, mysqlToGRPC(err) + } + + _, err = t.tx.ExecContext(ctx, insertSequencedLeafSQL+valuesPlaceholder5, + t.treeID, leaf.LeafIdentityHash, leaf.MerkleLeafHash, leaf.LeafIndex, 0) + // TODO(pavelkalinnikov): Update IntegrateTimestamp on integrating the leaf. + + if isDuplicateErr(err) { + res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIndex").Proto() + if _, err := t.tx.ExecContext(ctx, "ROLLBACK TO "+savepoint); err != nil { + klog.Errorf("Error rolling back to savepoint: %s", err) + return nil, mysqlToGRPC(err) + } + } else if err != nil { + klog.Errorf("Error inserting leaves[%d] into SequencedLeafData: %s", i, err) + return nil, mysqlToGRPC(err) + } + + // TODO(pavelkalinnikov): Load LeafData for conflicting entries. + } + + if _, err := t.tx.ExecContext(ctx, "RELEASE "+savepoint); err != nil { + klog.Errorf("Error releasing savepoint: %s", err) + return nil, mysqlToGRPC(err) + } + + return res, nil +} + +func (t *logTreeTX) GetLeavesByRange(ctx context.Context, start, count int64) ([]*trillian.LogLeaf, error) { + t.treeTX.mu.Lock() + defer t.treeTX.mu.Unlock() + return t.getLeavesByRangeInternal(ctx, start, count) +} + +func (t *logTreeTX) getLeavesByRangeInternal(ctx context.Context, start, count int64) ([]*trillian.LogLeaf, error) { + if count <= 0 { + return nil, status.Errorf(codes.InvalidArgument, "invalid count %d, want > 0", count) + } + if start < 0 { + return nil, status.Errorf(codes.InvalidArgument, "invalid start %d, want >= 0", start) + } + + if t.treeType == trillian.TreeType_LOG { + treeSize := int64(t.root.TreeSize) + if treeSize <= 0 { + return nil, status.Errorf(codes.OutOfRange, "empty tree") + } else if start >= treeSize { + return nil, status.Errorf(codes.OutOfRange, "invalid start %d, want < TreeSize(%d)", start, treeSize) + } + // Ensure no entries queried/returned beyond the tree. + if maxCount := treeSize - start; count > maxCount { + count = maxCount + } + } + // TODO(pavelkalinnikov): Further clip `count` to a safe upper bound like 64k. + + args := []interface{}{start, start + count, t.treeID} + rows, err := t.tx.QueryContext(ctx, selectLeavesByRangeSQL, args...) + if err != nil { + klog.Warningf("Failed to get leaves by range: %s", err) + return nil, err + } + defer func() { + if err := rows.Close(); err != nil { + klog.Errorf("rows.Close(): %v", err) + } + }() + + ret := make([]*trillian.LogLeaf, 0, count) + for wantIndex := start; rows.Next(); wantIndex++ { + leaf := &trillian.LogLeaf{} + var qTimestamp, iTimestamp int64 + if err := rows.Scan( + &leaf.MerkleLeafHash, + &leaf.LeafIdentityHash, + &leaf.LeafValue, + &leaf.LeafIndex, + &leaf.ExtraData, + &qTimestamp, + &iTimestamp); err != nil { + klog.Warningf("Failed to scan merkle leaves: %s", err) + return nil, err + } + if leaf.LeafIndex != wantIndex { + if wantIndex < int64(t.root.TreeSize) { + return nil, fmt.Errorf("got unexpected index %d, want %d", leaf.LeafIndex, wantIndex) + } + break + } + leaf.QueueTimestamp = timestamppb.New(time.Unix(0, qTimestamp)) + if err := leaf.QueueTimestamp.CheckValid(); err != nil { + return nil, fmt.Errorf("got invalid queue timestamp: %w", err) + } + leaf.IntegrateTimestamp = timestamppb.New(time.Unix(0, iTimestamp)) + if err := leaf.IntegrateTimestamp.CheckValid(); err != nil { + return nil, fmt.Errorf("got invalid integrate timestamp: %w", err) + } + ret = append(ret, leaf) + } + if err := rows.Err(); err != nil { + klog.Warningf("Failed to read returned leaves: %s", err) + return nil, err + } + + return ret, nil +} + +func (t *logTreeTX) GetLeavesByHash(ctx context.Context, leafHashes [][]byte, orderBySequence bool) ([]*trillian.LogLeaf, error) { + t.treeTX.mu.Lock() + defer t.treeTX.mu.Unlock() + + tmpl, err := t.ls.getLeavesByMerkleHashStmt(ctx, len(leafHashes), orderBySequence) + if err != nil { + return nil, err + } + + return t.getLeavesByHashInternal(ctx, leafHashes, tmpl, "merkle") +} + +// getLeafDataByIdentityHash retrieves leaf data by LeafIdentityHash, returned +// as a slice of LogLeaf objects for convenience. However, note that the +// returned LogLeaf objects will not have a valid MerkleLeafHash, LeafIndex, or IntegrateTimestamp. +func (t *logTreeTX) getLeafDataByIdentityHash(ctx context.Context, leafHashes [][]byte) ([]*trillian.LogLeaf, error) { + tmpl, err := t.ls.getLeavesByLeafIdentityHashStmt(ctx, len(leafHashes)) + if err != nil { + return nil, err + } + return t.getLeavesByHashInternal(ctx, leafHashes, tmpl, "leaf-identity") +} + +func (t *logTreeTX) LatestSignedLogRoot(ctx context.Context) (*trillian.SignedLogRoot, error) { + t.treeTX.mu.Lock() + defer t.treeTX.mu.Unlock() + + if t.slr == nil { + return nil, storage.ErrTreeNeedsInit + } + + return t.slr, nil +} + +// fetchLatestRoot reads the latest root and the revision from the DB. +func (t *logTreeTX) fetchLatestRoot(ctx context.Context) (*trillian.SignedLogRoot, int64, error) { + var timestamp, treeSize, treeRevision int64 + var rootHash, rootSignatureBytes []byte + if err := t.tx.QueryRowContext( + ctx, selectLatestSignedLogRootSQL, t.treeID).Scan( + ×tamp, &treeSize, &rootHash, &treeRevision, &rootSignatureBytes, + ); err == sql.ErrNoRows { + // It's possible there are no roots for this tree yet + return nil, 0, storage.ErrTreeNeedsInit + } + + // Put logRoot back together. Fortunately LogRoot has a deterministic serialization. + logRoot, err := (&types.LogRootV1{ + RootHash: rootHash, + TimestampNanos: uint64(timestamp), + TreeSize: uint64(treeSize), + }).MarshalBinary() + if err != nil { + return nil, 0, err + } + + return &trillian.SignedLogRoot{LogRoot: logRoot}, treeRevision, nil +} + +func (t *logTreeTX) StoreSignedLogRoot(ctx context.Context, root *trillian.SignedLogRoot) error { + t.treeTX.mu.Lock() + defer t.treeTX.mu.Unlock() + + var logRoot types.LogRootV1 + if err := logRoot.UnmarshalBinary(root.LogRoot); err != nil { + klog.Warningf("Failed to parse log root: %x %v", root.LogRoot, err) + return err + } + if len(logRoot.Metadata) != 0 { + return fmt.Errorf("unimplemented: mysql storage does not support log root metadata") + } + + res, err := t.tx.ExecContext( + ctx, + insertTreeHeadSQL, + t.treeID, + logRoot.TimestampNanos, + logRoot.TreeSize, + logRoot.RootHash, + t.treeTX.writeRevision, + []byte{}) + if err != nil { + klog.Warningf("Failed to store signed root: %s", err) + } + + return checkResultOkAndRowCountIs(res, err, 1) +} + +func (t *logTreeTX) getLeavesByHashInternal(ctx context.Context, leafHashes [][]byte, tmpl *sql.Stmt, desc string) ([]*trillian.LogLeaf, error) { + stx := t.tx.StmtContext(ctx, tmpl) + defer func() { + if err := stx.Close(); err != nil { + klog.Errorf("stx.Close(): %v", err) + } + }() + + var args []interface{} + for _, hash := range leafHashes { + args = append(args, []byte(hash)) + } + args = append(args, t.treeID) + rows, err := stx.QueryContext(ctx, args...) + if err != nil { + klog.Warningf("Query() %s hash = %v", desc, err) + return nil, err + } + defer func() { + if err := rows.Close(); err != nil { + klog.Errorf("rows.Close(): %v", err) + } + }() + + // The tree could include duplicates so we don't know how many results will be returned + var ret []*trillian.LogLeaf + for rows.Next() { + leaf := &trillian.LogLeaf{} + // We might be using a LEFT JOIN in our statement, so leaves which are + // queued but not yet integrated will have a NULL IntegrateTimestamp + // when there's no corresponding entry in SequencedLeafData, even though + // the table definition forbids that, so we use a nullable type here and + // check its validity below. + var integrateTS sql.NullInt64 + var queueTS int64 + + if err := rows.Scan(&leaf.MerkleLeafHash, &leaf.LeafIdentityHash, &leaf.LeafValue, &leaf.LeafIndex, &leaf.ExtraData, &queueTS, &integrateTS); err != nil { + klog.Warningf("LogID: %d Scan() %s = %s", t.treeID, desc, err) + return nil, err + } + leaf.QueueTimestamp = timestamppb.New(time.Unix(0, queueTS)) + if err := leaf.QueueTimestamp.CheckValid(); err != nil { + return nil, fmt.Errorf("got invalid queue timestamp: %w", err) + } + if integrateTS.Valid { + leaf.IntegrateTimestamp = timestamppb.New(time.Unix(0, integrateTS.Int64)) + if err := leaf.IntegrateTimestamp.CheckValid(); err != nil { + return nil, fmt.Errorf("got invalid integrate timestamp: %w", err) + } + } + + if got, want := len(leaf.MerkleLeafHash), t.hashSizeBytes; got != want { + return nil, fmt.Errorf("LogID: %d Scanned leaf %s does not have hash length %d, got %d", t.treeID, desc, want, got) + } + + ret = append(ret, leaf) + } + if err := rows.Err(); err != nil { + klog.Warningf("Failed to read returned leaves: %s", err) + return nil, err + } + + return ret, nil +} + +// leafAndPosition records original position before sort. +type leafAndPosition struct { + leaf *trillian.LogLeaf + idx int +} + +// byLeafIdentityHashWithPosition allows sorting (as above), but where we need +// to remember the original position +type byLeafIdentityHashWithPosition []leafAndPosition + +func (l byLeafIdentityHashWithPosition) Len() int { + return len(l) +} + +func (l byLeafIdentityHashWithPosition) Swap(i, j int) { + l[i], l[j] = l[j], l[i] +} + +func (l byLeafIdentityHashWithPosition) Less(i, j int) bool { + return bytes.Compare(l[i].leaf.LeafIdentityHash, l[j].leaf.LeafIdentityHash) == -1 +} diff --git a/storage/mysql/log_storage_test.go b/storage/mysql/log_storage_test.go new file mode 100644 index 0000000000..b67035bd30 --- /dev/null +++ b/storage/mysql/log_storage_test.go @@ -0,0 +1,846 @@ +// Copyright 2016 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "bytes" + "context" + "crypto/sha256" + "database/sql" + "fmt" + "sort" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/trillian" + "github.com/google/trillian/integration/storagetest" + "github.com/google/trillian/storage" + "github.com/google/trillian/storage/testonly" + "github.com/google/trillian/types" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/timestamppb" + "k8s.io/klog/v2" + + _ "github.com/go-sql-driver/mysql" +) + +var allTables = []string{"Unsequenced", "TreeHead", "SequencedLeafData", "LeafData", "Subtree", "TreeControl", "Trees"} + +// Must be 32 bytes to match sha256 length if it was a real hash +var ( + dummyHash = []byte("hashxxxxhashxxxxhashxxxxhashxxxx") + dummyRawHash = []byte("xxxxhashxxxxhashxxxxhashxxxxhash") + dummyHash2 = []byte("HASHxxxxhashxxxxhashxxxxhashxxxx") +) + +// Time we will queue all leaves at +var fakeQueueTime = time.Date(2016, 11, 10, 15, 16, 27, 0, time.UTC) + +// Time we will integrate all leaves at +var fakeIntegrateTime = time.Date(2016, 11, 10, 15, 16, 30, 0, time.UTC) + +// Time we'll request for guard cutoff in tests that don't test this (should include all above) +var fakeDequeueCutoffTime = time.Date(2016, 11, 10, 15, 16, 30, 0, time.UTC) + +// Used for tests involving extra data +var someExtraData = []byte("Some extra data") + +const ( + leavesToInsert = 5 + sequenceNumber int64 = 237 +) + +// Tests that access the db should each use a distinct log ID to prevent lock contention when +// run in parallel or race conditions / unexpected interactions. Tests that pass should hold +// no locks afterwards. + +func createFakeLeaf(ctx context.Context, db *sql.DB, logID int64, rawHash, hash, data, extraData []byte, seq int64, t *testing.T) *trillian.LogLeaf { + t.Helper() + queuedAtNanos := fakeQueueTime.UnixNano() + integratedAtNanos := fakeIntegrateTime.UnixNano() + _, err := db.ExecContext(ctx, "INSERT INTO LeafData(TreeId, LeafIdentityHash, LeafValue, ExtraData, QueueTimestampNanos) VALUES(?,?,?,?,?)", logID, rawHash, data, extraData, queuedAtNanos) + _, err2 := db.ExecContext(ctx, "INSERT INTO SequencedLeafData(TreeId, SequenceNumber, LeafIdentityHash, MerkleLeafHash, IntegrateTimestampNanos) VALUES(?,?,?,?,?)", logID, seq, rawHash, hash, integratedAtNanos) + + if err != nil || err2 != nil { + t.Fatalf("Failed to create test leaves: %v %v", err, err2) + } + queueTimestamp := timestamppb.New(fakeQueueTime) + integrateTimestamp := timestamppb.New(fakeIntegrateTime) + return &trillian.LogLeaf{ + MerkleLeafHash: hash, + LeafValue: data, + ExtraData: extraData, + LeafIndex: seq, + LeafIdentityHash: rawHash, + QueueTimestamp: queueTimestamp, + IntegrateTimestamp: integrateTimestamp, + } +} + +func checkLeafContents(leaf *trillian.LogLeaf, seq int64, rawHash, hash, data, extraData []byte, t *testing.T) { + t.Helper() + if got, want := leaf.MerkleLeafHash, hash; !bytes.Equal(got, want) { + t.Fatalf("Wrong leaf hash in returned leaf got\n%v\nwant:\n%v", got, want) + } + + if got, want := leaf.LeafIdentityHash, rawHash; !bytes.Equal(got, want) { + t.Fatalf("Wrong raw leaf hash in returned leaf got\n%v\nwant:\n%v", got, want) + } + + if got, want := seq, leaf.LeafIndex; got != want { + t.Fatalf("Bad sequence number in returned leaf got: %d, want:%d", got, want) + } + + if got, want := leaf.LeafValue, data; !bytes.Equal(got, want) { + t.Fatalf("Unxpected data in returned leaf. got:\n%v\nwant:\n%v", got, want) + } + + if got, want := leaf.ExtraData, extraData; !bytes.Equal(got, want) { + t.Fatalf("Unxpected data in returned leaf. got:\n%v\nwant:\n%v", got, want) + } + + iTime := leaf.IntegrateTimestamp.AsTime() + if got, want := iTime.UnixNano(), fakeIntegrateTime.UnixNano(); got != want { + t.Errorf("Wrong IntegrateTimestamp: got %v, want %v", got, want) + } +} + +func TestLogSuite(t *testing.T) { + storageFactory := func(context.Context, *testing.T) (storage.LogStorage, storage.AdminStorage) { + t.Cleanup(func() { cleanTestDB(DB) }) + return NewLogStorage(DB, nil), NewAdminStorage(DB) + } + + storagetest.RunLogStorageTests(t, storageFactory) +} + +func TestQueueDuplicateLeaf(t *testing.T) { + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + mustSignAndStoreLogRoot(ctx, t, s, tree, 0) + + count := 15 + leaves := createTestLeaves(int64(count), 10) + leaves2 := createTestLeaves(int64(count), 12) + leaves3 := createTestLeaves(3, 100) + leaves4 := createTestLeaves(3, 105) + + // Note that tests accumulate queued leaves on top of each other. + tests := []struct { + desc string + leaves []*trillian.LogLeaf + want []*trillian.LogLeaf + }{ + { + desc: "[10, 11, 12, ...]", + leaves: leaves, + want: make([]*trillian.LogLeaf, count), + }, + { + desc: "[12, 13, 14, ...] so first (count-2) are duplicates", + leaves: leaves2, + want: append(leaves[2:], nil, nil), + }, + { + desc: "[10, 100, 11, 101, 102] so [dup, new, dup, new, dup]", + leaves: []*trillian.LogLeaf{leaves[0], leaves3[0], leaves[1], leaves3[1], leaves[2]}, + want: []*trillian.LogLeaf{leaves[0], nil, leaves[1], nil, leaves[2]}, + }, + { + // we explictly reuse tests that have already been integrated to test issue 3603 + desc: "[100, 100, 106, 101, 107]", + leaves: []*trillian.LogLeaf{leaves3[0], leaves3[0], leaves4[1], leaves3[1], leaves4[2]}, + want: []*trillian.LogLeaf{leaves3[0], leaves3[0], leaves4[1], leaves3[1], leaves4[2]}, + }, + } + + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + existing, err := s.QueueLeaves(ctx, tree, test.leaves, fakeQueueTime) + if err != nil { + t.Fatalf("Failed to queue leaves: %v", err) + } + + if len(existing) != len(test.want) { + t.Fatalf("|QueueLeaves()|=%d; want %d", len(existing), len(test.want)) + } + for i, want := range test.want { + got := existing[i] + if want == nil { + if got.Status != nil { + t.Errorf("QueueLeaves()[%d].Code: %v; want %v", i, got, want) + } + return + } + if got == nil { + t.Fatalf("QueueLeaves()[%d]=nil; want non-nil", i) + } else if !bytes.Equal(got.Leaf.LeafIdentityHash, want.LeafIdentityHash) { + t.Fatalf("QueueLeaves()[%d].LeafIdentityHash=%x; want %x", i, got.Leaf.LeafIdentityHash, want.LeafIdentityHash) + } + } + }) + } +} + +func TestQueueLeaves(t *testing.T) { + ctx := context.Background() + + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + mustSignAndStoreLogRoot(ctx, t, s, tree, 0) + + leaves := createTestLeaves(leavesToInsert, 20) + if _, err := s.QueueLeaves(ctx, tree, leaves, fakeQueueTime); err != nil { + t.Fatalf("Failed to queue leaves: %v", err) + } + + // Should see the leaves in the database. There is no API to read from the unsequenced data. + var count int + if err := DB.QueryRowContext(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&count); err != nil { + t.Fatalf("Could not query row count: %v", err) + } + if leavesToInsert != count { + t.Fatalf("Expected %d unsequenced rows but got: %d", leavesToInsert, count) + } + + // Additional check on timestamp being set correctly in the database + var queueTimestamp int64 + if err := DB.QueryRowContext(ctx, "SELECT DISTINCT QueueTimestampNanos FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&queueTimestamp); err != nil { + t.Fatalf("Could not query timestamp: %v", err) + } + if got, want := queueTimestamp, fakeQueueTime.UnixNano(); got != want { + t.Fatalf("Incorrect queue timestamp got: %d want: %d", got, want) + } +} + +func TestQueueLeavesDuplicateBigBatch(t *testing.T) { + t.Skip("Known Issue: https://github.com/google/trillian/issues/1845") + ctx := context.Background() + + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + mustSignAndStoreLogRoot(ctx, t, s, tree, 0) + + const leafCount = 999 + 1 + leaves := createTestLeaves(leafCount, 20) + + if _, err := s.QueueLeaves(ctx, tree, leaves, fakeQueueTime); err != nil { + t.Fatalf("Failed to queue leaves: %v", err) + } + + if _, err := s.QueueLeaves(ctx, tree, leaves, fakeQueueTime); err != nil { + t.Fatalf("Failed to queue leaves: %v", err) + } + + // Should see the leaves in the database. There is no API to read from the unsequenced data. + var count int + if err := DB.QueryRowContext(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&count); err != nil { + t.Fatalf("Could not query row count: %v", err) + } + if leafCount != count { + t.Fatalf("Expected %d unsequenced rows but got: %d", leafCount, count) + } +} + +// ----------------------------------------------------------------------------- + +func TestDequeueLeavesHaveQueueTimestamp(t *testing.T) { + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + mustSignAndStoreLogRoot(ctx, t, s, tree, 0) + + leaves := createTestLeaves(leavesToInsert, 20) + if _, err := s.QueueLeaves(ctx, tree, leaves, fakeDequeueCutoffTime); err != nil { + t.Fatalf("Failed to queue leaves: %v", err) + } + + { + // Now try to dequeue them + runLogTX(s, tree, t, func(ctx context.Context, tx2 storage.LogTreeTX) error { + leaves2, err := tx2.DequeueLeaves(ctx, 99, fakeDequeueCutoffTime) + if err != nil { + t.Fatalf("Failed to dequeue leaves: %v", err) + } + if len(leaves2) != leavesToInsert { + t.Fatalf("Dequeued %d leaves but expected to get %d", len(leaves2), leavesToInsert) + } + ensureLeavesHaveQueueTimestamp(t, leaves2, fakeDequeueCutoffTime) + return nil + }) + } +} + +// Queues leaves and attempts to dequeue before the guard cutoff allows it. This should +// return nothing. Then retry with an inclusive guard cutoff and ensure the leaves +// are returned. +func TestDequeueLeavesGuardInterval(t *testing.T) { + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + mustSignAndStoreLogRoot(ctx, t, s, tree, 0) + + leaves := createTestLeaves(leavesToInsert, 20) + if _, err := s.QueueLeaves(ctx, tree, leaves, fakeQueueTime); err != nil { + t.Fatalf("Failed to queue leaves: %v", err) + } + + { + // Now try to dequeue them using a cutoff that means we should get none + runLogTX(s, tree, t, func(ctx context.Context, tx2 storage.LogTreeTX) error { + leaves2, err := tx2.DequeueLeaves(ctx, 99, fakeQueueTime.Add(-time.Second)) + if err != nil { + t.Fatalf("Failed to dequeue leaves: %v", err) + } + if len(leaves2) != 0 { + t.Fatalf("Dequeued %d leaves when they all should be in guard interval", len(leaves2)) + } + + // Try to dequeue again using a cutoff that should include them + leaves2, err = tx2.DequeueLeaves(ctx, 99, fakeQueueTime.Add(time.Second)) + if err != nil { + t.Fatalf("Failed to dequeue leaves: %v", err) + } + if len(leaves2) != leavesToInsert { + t.Fatalf("Dequeued %d leaves but expected to get %d", len(leaves2), leavesToInsert) + } + ensureAllLeavesDistinct(leaves2, t) + return nil + }) + } +} + +func TestDequeueLeavesTimeOrdering(t *testing.T) { + // Queue two small batches of leaves at different timestamps. Do two separate dequeue + // transactions and make sure the returned leaves are respecting the time ordering of the + // queue. + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + mustSignAndStoreLogRoot(ctx, t, s, tree, 0) + + batchSize := 2 + leaves := createTestLeaves(int64(batchSize), 0) + leaves2 := createTestLeaves(int64(batchSize), int64(batchSize)) + + if _, err := s.QueueLeaves(ctx, tree, leaves, fakeQueueTime); err != nil { + t.Fatalf("QueueLeaves(1st batch) = %v", err) + } + // These are one second earlier so should be dequeued first + if _, err := s.QueueLeaves(ctx, tree, leaves2, fakeQueueTime.Add(-time.Second)); err != nil { + t.Fatalf("QueueLeaves(2nd batch) = %v", err) + } + + { + // Now try to dequeue two leaves and we should get the second batch + runLogTX(s, tree, t, func(ctx context.Context, tx2 storage.LogTreeTX) error { + dequeue1, err := tx2.DequeueLeaves(ctx, batchSize, fakeQueueTime) + if err != nil { + t.Fatalf("DequeueLeaves(1st) = %v", err) + } + if got, want := len(dequeue1), batchSize; got != want { + t.Fatalf("Dequeue count mismatch (1st) got: %d, want: %d", got, want) + } + ensureAllLeavesDistinct(dequeue1, t) + + // Ensure this is the second batch queued by comparing leaf hashes (must be distinct as + // the leaf data was). + if !leafInBatch(dequeue1[0], leaves2) || !leafInBatch(dequeue1[1], leaves2) { + t.Fatalf("Got leaf from wrong batch (1st dequeue): %v", dequeue1) + } + iTimestamp := timestamppb.Now() + for i, l := range dequeue1 { + l.IntegrateTimestamp = iTimestamp + l.LeafIndex = int64(i) + } + if err := tx2.UpdateSequencedLeaves(ctx, dequeue1); err != nil { + t.Fatalf("UpdateSequencedLeaves(): %v", err) + } + + return nil + }) + + // Try to dequeue again and we should get the batch that was queued first, though at a later time + runLogTX(s, tree, t, func(ctx context.Context, tx3 storage.LogTreeTX) error { + dequeue2, err := tx3.DequeueLeaves(ctx, batchSize, fakeQueueTime) + if err != nil { + t.Fatalf("DequeueLeaves(2nd) = %v", err) + } + if got, want := len(dequeue2), batchSize; got != want { + t.Fatalf("Dequeue count mismatch (2nd) got: %d, want: %d", got, want) + } + ensureAllLeavesDistinct(dequeue2, t) + + // Ensure this is the first batch by comparing leaf hashes. + if !leafInBatch(dequeue2[0], leaves) || !leafInBatch(dequeue2[1], leaves) { + t.Fatalf("Got leaf from wrong batch (2nd dequeue): %v", dequeue2) + } + return nil + }) + } +} + +func TestGetLeavesByHashNotPresent(t *testing.T) { + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + hashes := [][]byte{[]byte("thisdoesn'texist")} + leaves, err := tx.GetLeavesByHash(ctx, hashes, false) + if err != nil { + t.Fatalf("Error getting leaves by hash: %v", err) + } + if len(leaves) != 0 { + t.Fatalf("Expected no leaves returned but got %d", len(leaves)) + } + return nil + }) +} + +func TestGetLeavesByHash(t *testing.T) { + ctx := context.Background() + + // Create fake leaf as if it had been sequenced + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + + data := []byte("some data") + createFakeLeaf(ctx, DB, tree.TreeId, dummyRawHash, dummyHash, data, someExtraData, sequenceNumber, t) + + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + hashes := [][]byte{dummyHash} + leaves, err := tx.GetLeavesByHash(ctx, hashes, false) + if err != nil { + t.Fatalf("Unexpected error getting leaf by hash: %v", err) + } + if len(leaves) != 1 { + t.Fatalf("Got %d leaves but expected one", len(leaves)) + } + checkLeafContents(leaves[0], sequenceNumber, dummyRawHash, dummyHash, data, someExtraData, t) + return nil + }) +} + +func TestGetLeavesByHashBigBatch(t *testing.T) { + t.Skip("Known Issue: https://github.com/google/trillian/issues/1845") + ctx := context.Background() + + // Create fake leaf as if it had been sequenced + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + + const leafCount = 999 + 1 + hashes := make([][]byte, leafCount) + for i := 0; i < leafCount; i++ { + data := []byte(fmt.Sprintf("data %d", i)) + hash := sha256.Sum256(data) + hashes[i] = hash[:] + createFakeLeaf(ctx, DB, tree.TreeId, hash[:], hash[:], data, someExtraData, sequenceNumber+int64(i), t) + } + + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + leaves, err := tx.GetLeavesByHash(ctx, hashes, false) + if err != nil { + t.Fatalf("Unexpected error getting leaf by hash: %v", err) + } + if got, want := len(leaves), leafCount; got != want { + t.Fatalf("Got %d leaves, expected %d", got, want) + } + return nil + }) +} + +func TestGetLeafDataByIdentityHash(t *testing.T) { + ctx := context.Background() + + // Create fake leaf as if it had been sequenced + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + data := []byte("some data") + leaf := createFakeLeaf(ctx, DB, tree.TreeId, dummyRawHash, dummyHash, data, someExtraData, sequenceNumber, t) + leaf.LeafIndex = -1 + leaf.MerkleLeafHash = []byte(dummyMerkleLeafHash) + leaf2 := createFakeLeaf(ctx, DB, tree.TreeId, dummyHash2, dummyHash2, data, someExtraData, sequenceNumber+1, t) + leaf2.LeafIndex = -1 + leaf2.MerkleLeafHash = []byte(dummyMerkleLeafHash) + + tests := []struct { + hashes [][]byte + want []*trillian.LogLeaf + }{ + { + hashes: [][]byte{dummyRawHash}, + want: []*trillian.LogLeaf{leaf}, + }, + { + hashes: [][]byte{{0x01, 0x02}}, + }, + { + hashes: [][]byte{ + dummyRawHash, + {0x01, 0x02}, + dummyHash2, + {0x01, 0x02}, + }, + // Note: leaves not necessarily returned in order requested. + want: []*trillian.LogLeaf{leaf2, leaf}, + }, + } + for i, test := range tests { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + leaves, err := tx.(*logTreeTX).getLeafDataByIdentityHash(ctx, test.hashes) + if err != nil { + t.Fatalf("getLeavesByIdentityHash(_) = (_,%v); want (_,nil)", err) + } + + if len(leaves) != len(test.want) { + t.Fatalf("getLeavesByIdentityHash(_) = (|%d|,nil); want (|%d|,nil)", len(leaves), len(test.want)) + } + leavesEquivalent(t, leaves, test.want) + return nil + }) + }) + } +} + +func leavesEquivalent(t *testing.T, gotLeaves, wantLeaves []*trillian.LogLeaf) { + t.Helper() + want := make(map[string]*trillian.LogLeaf) + for _, w := range wantLeaves { + k := sha256.Sum256([]byte(w.String())) + want[string(k[:])] = w + } + got := make(map[string]*trillian.LogLeaf) + for _, g := range gotLeaves { + k := sha256.Sum256([]byte(g.String())) + got[string(k[:])] = g + } + if diff := cmp.Diff(want, got, cmp.Comparer(proto.Equal)); diff != "" { + t.Errorf("leaves not equivalent: diff -want,+got:\n%v", diff) + } +} + +// ----------------------------------------------------------------------------- + +func TestLatestSignedRootNoneWritten(t *testing.T) { + ctx := context.Background() + + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + + tx, err := s.SnapshotForTree(ctx, tree) + if err != storage.ErrTreeNeedsInit { + t.Fatalf("SnapshotForTree gave %v, want %v", err, storage.ErrTreeNeedsInit) + } + commit(ctx, tx, t) +} + +func SignLogRoot(root *types.LogRootV1) (*trillian.SignedLogRoot, error) { + logRoot, err := root.MarshalBinary() + if err != nil { + return nil, err + } + return &trillian.SignedLogRoot{LogRoot: logRoot}, nil +} + +func TestLatestSignedLogRoot(t *testing.T) { + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + + root, err := SignLogRoot(&types.LogRootV1{ + TimestampNanos: 98765, + TreeSize: 16, + RootHash: []byte(dummyHash), + }) + if err != nil { + t.Fatalf("SignLogRoot(): %v", err) + } + + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + if err := tx.StoreSignedLogRoot(ctx, root); err != nil { + t.Fatalf("Failed to store signed root: %v", err) + } + return nil + }) + + { + runLogTX(s, tree, t, func(ctx context.Context, tx2 storage.LogTreeTX) error { + root2, err := tx2.LatestSignedLogRoot(ctx) + if err != nil { + t.Fatalf("Failed to read back new log root: %v", err) + } + if !proto.Equal(root, root2) { + t.Fatalf("Root round trip failed: <%v> and: <%v>", root, root2) + } + return nil + }) + } +} + +func TestDuplicateSignedLogRoot(t *testing.T) { + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + + root, err := SignLogRoot(&types.LogRootV1{ + TimestampNanos: 98765, + TreeSize: 16, + RootHash: []byte(dummyHash), + }) + if err != nil { + t.Fatalf("SignLogRoot(): %v", err) + } + + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + if err := tx.StoreSignedLogRoot(ctx, root); err != nil { + t.Fatalf("Failed to store signed root: %v", err) + } + // Shouldn't be able to do it again + if err := tx.StoreSignedLogRoot(ctx, root); err == nil { + t.Fatal("Allowed duplicate signed root") + } + return nil + }) +} + +func TestLogRootUpdate(t *testing.T) { + ctx := context.Background() + // Write two roots for a log and make sure the one with the newest timestamp supersedes + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, testonly.LogTree) + s := NewLogStorage(DB, nil) + + root, err := SignLogRoot(&types.LogRootV1{ + TimestampNanos: 98765, + TreeSize: 16, + RootHash: []byte(dummyHash), + }) + if err != nil { + t.Fatalf("SignLogRoot(): %v", err) + } + root2, err := SignLogRoot(&types.LogRootV1{ + TimestampNanos: 98766, + TreeSize: 16, + RootHash: []byte(dummyHash), + }) + if err != nil { + t.Fatalf("SignLogRoot(): %v", err) + } + + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + return tx.StoreSignedLogRoot(ctx, root) + }) + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + return tx.StoreSignedLogRoot(ctx, root2) + }) + + runLogTX(s, tree, t, func(ctx context.Context, tx2 storage.LogTreeTX) error { + root3, err := tx2.LatestSignedLogRoot(ctx) + if err != nil { + t.Fatalf("Failed to read back new log root: %v", err) + } + if !proto.Equal(root2, root3) { + t.Fatalf("Root round trip failed: <%v> and: <%v>", root, root2) + } + return nil + }) +} + +func TestGetActiveLogIDs(t *testing.T) { + ctx := context.Background() + + cleanTestDB(DB) + admin := NewAdminStorage(DB) + + // Create a few test trees + log1 := proto.Clone(testonly.LogTree).(*trillian.Tree) + log2 := proto.Clone(testonly.LogTree).(*trillian.Tree) + log3 := proto.Clone(testonly.PreorderedLogTree).(*trillian.Tree) + drainingLog := proto.Clone(testonly.LogTree).(*trillian.Tree) + frozenLog := proto.Clone(testonly.LogTree).(*trillian.Tree) + deletedLog := proto.Clone(testonly.LogTree).(*trillian.Tree) + for _, tree := range []**trillian.Tree{&log1, &log2, &log3, &drainingLog, &frozenLog, &deletedLog} { + newTree, err := storage.CreateTree(ctx, admin, *tree) + if err != nil { + t.Fatalf("CreateTree(%+v) returned err = %v", tree, err) + } + *tree = newTree + } + + // FROZEN is not a valid initial state, so we have to update it separately. + if _, err := storage.UpdateTree(ctx, admin, frozenLog.TreeId, func(t *trillian.Tree) { + t.TreeState = trillian.TreeState_FROZEN + }); err != nil { + t.Fatalf("UpdateTree() returned err = %v", err) + } + // DRAINING is not a valid initial state, so we have to update it separately. + if _, err := storage.UpdateTree(ctx, admin, drainingLog.TreeId, func(t *trillian.Tree) { + t.TreeState = trillian.TreeState_DRAINING + }); err != nil { + t.Fatalf("UpdateTree() returned err = %v", err) + } + + // Update deleted trees accordingly + updateDeletedStmt, err := DB.PrepareContext(ctx, "UPDATE Trees SET Deleted = ? WHERE TreeId = ?") + if err != nil { + t.Fatalf("PrepareContext() returned err = %v", err) + } + defer func() { + if err := updateDeletedStmt.Close(); err != nil { + klog.Errorf("updateDeletedStmt.Close(): %v", err) + } + }() + for _, treeID := range []int64{deletedLog.TreeId} { + if _, err := updateDeletedStmt.ExecContext(ctx, true, treeID); err != nil { + t.Fatalf("ExecContext(%v) returned err = %v", treeID, err) + } + } + + s := NewLogStorage(DB, nil) + got, err := s.GetActiveLogIDs(ctx) + if err != nil { + t.Fatalf("GetActiveLogIDs() returns err = %v", err) + } + + want := []int64{log1.TreeId, log2.TreeId, log3.TreeId, drainingLog.TreeId} + sort.Slice(got, func(i, j int) bool { return got[i] < got[j] }) + sort.Slice(want, func(i, j int) bool { return want[i] < want[j] }) + if diff := cmp.Diff(got, want); diff != "" { + t.Errorf("post-GetActiveLogIDs diff (-got +want):\n%v", diff) + } +} + +func TestGetActiveLogIDsEmpty(t *testing.T) { + ctx := context.Background() + + cleanTestDB(DB) + s := NewLogStorage(DB, nil) + + ids, err := s.GetActiveLogIDs(ctx) + if err != nil { + t.Fatalf("GetActiveLogIDs() = (_, %v), want = (_, nil)", err) + } + + if got, want := len(ids), 0; got != want { + t.Errorf("GetActiveLogIDs(): got %v IDs, want = %v", got, want) + } +} + +func ensureAllLeavesDistinct(leaves []*trillian.LogLeaf, t *testing.T) { + t.Helper() + // All the leaf value hashes should be distinct because the leaves were created with distinct + // leaf data. If only we had maps with slices as keys or sets or pretty much any kind of usable + // data structures we could do this properly. + for i := range leaves { + for j := range leaves { + if i != j && bytes.Equal(leaves[i].LeafIdentityHash, leaves[j].LeafIdentityHash) { + t.Fatalf("Unexpectedly got a duplicate leaf hash: %v %v", + leaves[i].LeafIdentityHash, leaves[j].LeafIdentityHash) + } + } + } +} + +func ensureLeavesHaveQueueTimestamp(t *testing.T, leaves []*trillian.LogLeaf, want time.Time) { + t.Helper() + for _, leaf := range leaves { + gotQTimestamp := leaf.QueueTimestamp.AsTime() + if got, want := gotQTimestamp.UnixNano(), want.UnixNano(); got != want { + t.Errorf("Got leaf with QueueTimestampNanos = %v, want %v: %v", got, want, leaf) + } + } +} + +// Creates some test leaves with predictable data +func createTestLeaves(n, startSeq int64) []*trillian.LogLeaf { + var leaves []*trillian.LogLeaf + for l := int64(0); l < n; l++ { + lv := fmt.Sprintf("Leaf %d", l+startSeq) + h := sha256.New() + h.Write([]byte(lv)) + leafHash := h.Sum(nil) + leaf := &trillian.LogLeaf{ + LeafIdentityHash: leafHash, + MerkleLeafHash: leafHash, + LeafValue: []byte(lv), + ExtraData: []byte(fmt.Sprintf("Extra %d", l)), + LeafIndex: int64(startSeq + l), + } + leaves = append(leaves, leaf) + } + + return leaves +} + +// Convenience methods to avoid copying out "if err != nil { blah }" all over the place +func runLogTX(s storage.LogStorage, tree *trillian.Tree, t *testing.T, f storage.LogTXFunc) { + t.Helper() + if err := s.ReadWriteTransaction(context.Background(), tree, f); err != nil { + t.Fatalf("Failed to run log tx: %v", err) + } +} + +type committableTX interface { + Commit(ctx context.Context) error +} + +func commit(ctx context.Context, tx committableTX, t *testing.T) { + t.Helper() + if err := tx.Commit(ctx); err != nil { + t.Errorf("Failed to commit tx: %v", err) + } +} + +func leafInBatch(leaf *trillian.LogLeaf, batch []*trillian.LogLeaf) bool { + for _, bl := range batch { + if bytes.Equal(bl.LeafIdentityHash, leaf.LeafIdentityHash) { + return true + } + } + + return false +} diff --git a/storage/mysql/mysqlpb/gen.go b/storage/mysql/mysqlpb/gen.go new file mode 100644 index 0000000000..875c14cbf4 --- /dev/null +++ b/storage/mysql/mysqlpb/gen.go @@ -0,0 +1,18 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mysqlpb contains protobuf definitions used by the mysql implementation. +package mysqlpb + +//go:generate protoc -I=. --go_out=paths=source_relative:. options.proto diff --git a/storage/mysql/mysqlpb/options.pb.go b/storage/mysql/mysqlpb/options.pb.go new file mode 100644 index 0000000000..4f7ed42cda --- /dev/null +++ b/storage/mysql/mysqlpb/options.pb.go @@ -0,0 +1,165 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.34.2 +// protoc v3.20.1 +// source: options.proto + +package mysqlpb + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// StorageOptions contains configuration parameters for MySQL implementation +// of the storage backend. This is envisioned only to be used for changes that +// would be breaking, but need to support old behaviour for backwards compatibility. +type StorageOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // subtreeRevisions being explicitly set to false will skip writing subtree revisions. + // https://github.com/google/trillian/pull/3201 + SubtreeRevisions bool `protobuf:"varint,1,opt,name=subtreeRevisions,proto3" json:"subtreeRevisions,omitempty"` +} + +func (x *StorageOptions) Reset() { + *x = StorageOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_options_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StorageOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StorageOptions) ProtoMessage() {} + +func (x *StorageOptions) ProtoReflect() protoreflect.Message { + mi := &file_options_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StorageOptions.ProtoReflect.Descriptor instead. +func (*StorageOptions) Descriptor() ([]byte, []int) { + return file_options_proto_rawDescGZIP(), []int{0} +} + +func (x *StorageOptions) GetSubtreeRevisions() bool { + if x != nil { + return x.SubtreeRevisions + } + return false +} + +var File_options_proto protoreflect.FileDescriptor + +var file_options_proto_rawDesc = []byte{ + 0x0a, 0x0d, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, + 0x07, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x22, 0x3c, 0x0a, 0x0e, 0x53, 0x74, 0x6f, 0x72, + 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x2a, 0x0a, 0x10, 0x73, 0x75, + 0x62, 0x74, 0x72, 0x65, 0x65, 0x52, 0x65, 0x76, 0x69, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x73, 0x75, 0x62, 0x74, 0x72, 0x65, 0x65, 0x52, 0x65, 0x76, + 0x69, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x32, 0x5a, 0x30, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, + 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x74, 0x72, 0x69, 0x6c, + 0x6c, 0x69, 0x61, 0x6e, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2f, 0x6d, 0x79, 0x73, + 0x71, 0x6c, 0x2f, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, +} + +var ( + file_options_proto_rawDescOnce sync.Once + file_options_proto_rawDescData = file_options_proto_rawDesc +) + +func file_options_proto_rawDescGZIP() []byte { + file_options_proto_rawDescOnce.Do(func() { + file_options_proto_rawDescData = protoimpl.X.CompressGZIP(file_options_proto_rawDescData) + }) + return file_options_proto_rawDescData +} + +var file_options_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_options_proto_goTypes = []any{ + (*StorageOptions)(nil), // 0: mysqlpb.StorageOptions +} +var file_options_proto_depIdxs = []int32{ + 0, // [0:0] is the sub-list for method output_type + 0, // [0:0] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_options_proto_init() } +func file_options_proto_init() { + if File_options_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_options_proto_msgTypes[0].Exporter = func(v any, i int) any { + switch v := v.(*StorageOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_options_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_options_proto_goTypes, + DependencyIndexes: file_options_proto_depIdxs, + MessageInfos: file_options_proto_msgTypes, + }.Build() + File_options_proto = out.File + file_options_proto_rawDesc = nil + file_options_proto_goTypes = nil + file_options_proto_depIdxs = nil +} diff --git a/storage/mysql/mysqlpb/options.proto b/storage/mysql/mysqlpb/options.proto new file mode 100644 index 0000000000..2ebdc670ae --- /dev/null +++ b/storage/mysql/mysqlpb/options.proto @@ -0,0 +1,27 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +option go_package = "github.com/google/trillian/storage/mysql/mysqlpb"; + +package mysqlpb; + +// StorageOptions contains configuration parameters for MySQL implementation +// of the storage backend. This is envisioned only to be used for changes that +// would be breaking, but need to support old behaviour for backwards compatibility. +message StorageOptions { + // subtreeRevisions being explicitly set to false will skip writing subtree revisions. + // https://github.com/google/trillian/pull/3201 + bool subtreeRevisions = 1; +} diff --git a/storage/mysql/provider.go b/storage/mysql/provider.go new file mode 100644 index 0000000000..6c1a4c5e4e --- /dev/null +++ b/storage/mysql/provider.go @@ -0,0 +1,145 @@ +// Copyright 2018 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "crypto/tls" + "crypto/x509" + "database/sql" + "errors" + "flag" + "os" + "sync" + + "github.com/google/trillian/monitoring" + "github.com/google/trillian/storage" + "k8s.io/klog/v2" + + // Load MySQL driver + "github.com/go-sql-driver/mysql" +) + +var ( + mySQLURI = flag.String("mysql_uri", "test:zaphod@tcp(127.0.0.1:3306)/test", "Connection URI for MySQL database") + maxConns = flag.Int("mysql_max_conns", 0, "Maximum connections to the database") + maxIdle = flag.Int("mysql_max_idle_conns", -1, "Maximum idle database connections in the connection pool") + mySQLTLSCA = flag.String("mysql_tls_ca", "", "Path to the CA certificate file for MySQL TLS connection ") + mySQLServerName = flag.String("mysql_server_name", "", "Name of the MySQL server to be used as the Server Name in the TLS configuration") + + mysqlMu sync.Mutex + mysqlErr error + mysqlDB *sql.DB + mysqlStorageInstance *mysqlProvider +) + +// GetDatabase returns an instance of MySQL database, or creates one. +// +// TODO(pavelkalinnikov): Make the dependency of MySQL quota provider from +// MySQL storage provider explicit. +func GetDatabase() (*sql.DB, error) { + mysqlMu.Lock() + defer mysqlMu.Unlock() + return getMySQLDatabaseLocked() +} + +func init() { + if err := storage.RegisterProvider("mysql", newMySQLStorageProvider); err != nil { + klog.Fatalf("Failed to register storage provider mysql: %v", err) + } +} + +type mysqlProvider struct { + db *sql.DB + mf monitoring.MetricFactory +} + +func newMySQLStorageProvider(mf monitoring.MetricFactory) (storage.Provider, error) { + mysqlMu.Lock() + defer mysqlMu.Unlock() + if mysqlStorageInstance == nil { + db, err := getMySQLDatabaseLocked() + if err != nil { + return nil, err + } + mysqlStorageInstance = &mysqlProvider{ + db: db, + mf: mf, + } + } + return mysqlStorageInstance, nil +} + +// getMySQLDatabaseLocked returns an instance of MySQL database, or creates +// one. Requires mysqlMu to be locked. +func getMySQLDatabaseLocked() (*sql.DB, error) { + if mysqlDB != nil || mysqlErr != nil { + return mysqlDB, mysqlErr + } + dsn := *mySQLURI + if *mySQLTLSCA != "" { + if err := registerMySQLTLSConfig(); err != nil { + return nil, err + } + dsn += "?tls=custom" + } + db, err := OpenDB(dsn) + if err != nil { + mysqlErr = err + return nil, err + } + if *maxConns > 0 { + db.SetMaxOpenConns(*maxConns) + } + if *maxIdle >= 0 { + db.SetMaxIdleConns(*maxIdle) + } + mysqlDB, mysqlErr = db, nil + return db, nil +} + +func (s *mysqlProvider) LogStorage() storage.LogStorage { + return NewLogStorage(s.db, s.mf) +} + +func (s *mysqlProvider) AdminStorage() storage.AdminStorage { + return NewAdminStorage(s.db) +} + +func (s *mysqlProvider) Close() error { + return s.db.Close() +} + +// registerMySQLTLSConfig registers a custom TLS config for MySQL using a provided CA certificate and optional server name. +// Returns an error if the CA certificate can't be read or added to the root cert pool, or when the registration of the TLS config fails. +func registerMySQLTLSConfig() error { + if *mySQLTLSCA == "" { + return nil + } + rootCertPool := x509.NewCertPool() + pem, err := os.ReadFile(*mySQLTLSCA) + if err != nil { + return err + } + if ok := rootCertPool.AppendCertsFromPEM(pem); !ok { + return errors.New("failed to append PEM") + } + tlsConfig := &tls.Config{ + RootCAs: rootCertPool, + } + if *mySQLServerName != "" { + tlsConfig.ServerName = *mySQLServerName + } + return mysql.RegisterTLSConfig("custom", tlsConfig) +} diff --git a/storage/mysql/provider_test.go b/storage/mysql/provider_test.go new file mode 100644 index 0000000000..ad20f408b6 --- /dev/null +++ b/storage/mysql/provider_test.go @@ -0,0 +1,46 @@ +// Copyright 2018 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "flag" + "testing" + + "github.com/google/trillian/storage" + "github.com/google/trillian/testonly/flagsaver" +) + +func TestMySQLStorageProviderErrorPersistence(t *testing.T) { + defer flagsaver.Save().MustRestore() + if err := flag.Set("mysql_uri", "&bogus*:::?"); err != nil { + t.Errorf("Failed to set flag: %v", err) + } + + // First call: This should fail due to the Database URL being garbage. + _, err1 := storage.NewProvider("mysql", nil) + if err1 == nil { + t.Fatalf("Expected 'storage.NewProvider' to fail") + } + + // Second call: This should fail with the same error. + _, err2 := storage.NewProvider("mysql", nil) + if err2 == nil { + t.Fatalf("Expected second call to 'storage.NewProvider' to fail") + } + + if err2 != err1 { + t.Fatalf("Expected second call to 'storage.NewProvider' to fail with %q, instead got: %q", err1, err2) + } +} diff --git a/storage/mysql/queue.go b/storage/mysql/queue.go new file mode 100644 index 0000000000..53ff1071a6 --- /dev/null +++ b/storage/mysql/queue.go @@ -0,0 +1,146 @@ +//go:build !batched_queue +// +build !batched_queue + +// Copyright 2017 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "context" + "database/sql" + "errors" + "fmt" + "time" + + "github.com/google/trillian" + "google.golang.org/protobuf/types/known/timestamppb" + "k8s.io/klog/v2" +) + +const ( + // If this statement ORDER BY clause is changed refer to the comment in removeSequencedLeaves + selectQueuedLeavesSQL = `SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos + FROM Unsequenced + WHERE TreeID=? + AND Bucket=0 + AND QueueTimestampNanos<=? + ORDER BY QueueTimestampNanos,LeafIdentityHash ASC LIMIT ?` + insertUnsequencedEntrySQL = `INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos) + VALUES(?,0,?,?,?)` + deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE TreeId=? AND Bucket=0 AND QueueTimestampNanos=? AND LeafIdentityHash=?" +) + +type dequeuedLeaf struct { + queueTimestampNanos int64 + leafIdentityHash []byte +} + +func dequeueInfo(leafIDHash []byte, queueTimestamp int64) dequeuedLeaf { + return dequeuedLeaf{queueTimestampNanos: queueTimestamp, leafIdentityHash: leafIDHash} +} + +func (t *logTreeTX) dequeueLeaf(rows *sql.Rows) (*trillian.LogLeaf, dequeuedLeaf, error) { + var leafIDHash []byte + var merkleHash []byte + var queueTimestamp int64 + + err := rows.Scan(&leafIDHash, &merkleHash, &queueTimestamp) + if err != nil { + klog.Warningf("Error scanning work rows: %s", err) + return nil, dequeuedLeaf{}, err + } + + // Note: the LeafData and ExtraData being nil here is OK as this is only used by the + // sequencer. The sequencer only writes to the SequencedLeafData table and the client + // supplied data was already written to LeafData as part of queueing the leaf. + queueTimestampProto := timestamppb.New(time.Unix(0, queueTimestamp)) + if err := queueTimestampProto.CheckValid(); err != nil { + return nil, dequeuedLeaf{}, fmt.Errorf("got invalid queue timestamp: %w", err) + } + leaf := &trillian.LogLeaf{ + LeafIdentityHash: leafIDHash, + MerkleLeafHash: merkleHash, + QueueTimestamp: queueTimestampProto, + } + return leaf, dequeueInfo(leafIDHash, queueTimestamp), nil +} + +func queueArgs(_ int64, _ []byte, queueTimestamp time.Time) []interface{} { + return []interface{}{queueTimestamp.UnixNano()} +} + +func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf) error { + dequeuedLeaves := make([]dequeuedLeaf, 0, len(leaves)) + for _, leaf := range leaves { + // This should fail on insert but catch it early + if len(leaf.LeafIdentityHash) != t.hashSizeBytes { + return errors.New("sequenced leaf has incorrect hash size") + } + + if err := leaf.IntegrateTimestamp.CheckValid(); err != nil { + return fmt.Errorf("got invalid integrate timestamp: %w", err) + } + iTimestamp := leaf.IntegrateTimestamp.AsTime() + _, err := t.tx.ExecContext( + ctx, + insertSequencedLeafSQL+valuesPlaceholder5, + t.treeID, + leaf.LeafIdentityHash, + leaf.MerkleLeafHash, + leaf.LeafIndex, + iTimestamp.UnixNano()) + if err != nil { + klog.Warningf("Failed to update sequenced leaves: %s", err) + return err + } + + qe, ok := t.dequeued[string(leaf.LeafIdentityHash)] + if !ok { + return fmt.Errorf("attempting to update leaf that wasn't dequeued. IdentityHash: %x", leaf.LeafIdentityHash) + } + dequeuedLeaves = append(dequeuedLeaves, qe) + } + + return t.removeSequencedLeaves(ctx, dequeuedLeaves) +} + +// removeSequencedLeaves removes the passed in leaves slice (which may be +// modified as part of the operation). +func (t *logTreeTX) removeSequencedLeaves(ctx context.Context, leaves []dequeuedLeaf) error { + start := time.Now() + // Don't need to re-sort because the query ordered by leaf hash. If that changes because + // the query is expensive then the sort will need to be done here. See comment in + // QueueLeaves. + stx, err := t.tx.PrepareContext(ctx, deleteUnsequencedSQL) + if err != nil { + klog.Warningf("Failed to prep delete statement for sequenced work: %v", err) + return err + } + defer func() { + if err := stx.Close(); err != nil { + klog.Errorf("stx.Close(): %v", err) + } + }() + for _, dql := range leaves { + result, err := stx.ExecContext(ctx, t.treeID, dql.queueTimestampNanos, dql.leafIdentityHash) + err = checkResultOkAndRowCountIs(result, err, int64(1)) + if err != nil { + return err + } + } + + observe(dequeueRemoveLatency, time.Since(start), labelForTX(t)) + return nil +} diff --git a/storage/mysql/queue_batching.go b/storage/mysql/queue_batching.go new file mode 100644 index 0000000000..d5317aa197 --- /dev/null +++ b/storage/mysql/queue_batching.go @@ -0,0 +1,150 @@ +//go:build batched_queue +// +build batched_queue + +// Copyright 2017 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "context" + "crypto/sha256" + "database/sql" + "encoding/binary" + "fmt" + "strings" + "time" + + "github.com/google/trillian" + "google.golang.org/protobuf/types/known/timestamppb" + "k8s.io/klog/v2" +) + +const ( + // If this statement ORDER BY clause is changed refer to the comment in removeSequencedLeaves + selectQueuedLeavesSQL = `SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID + FROM Unsequenced + WHERE TreeID=? + AND Bucket=0 + AND QueueTimestampNanos<=? + ORDER BY QueueTimestampNanos,LeafIdentityHash ASC LIMIT ?` + insertUnsequencedEntrySQL = `INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID) VALUES(?,0,?,?,?,?)` + deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE QueueID IN ()" +) + +type dequeuedLeaf []byte + +func dequeueInfo(_ []byte, queueID []byte) dequeuedLeaf { + return dequeuedLeaf(queueID) +} + +func (t *logTreeTX) dequeueLeaf(rows *sql.Rows) (*trillian.LogLeaf, dequeuedLeaf, error) { + var leafIDHash []byte + var merkleHash []byte + var queueTimestamp int64 + var queueID []byte + + err := rows.Scan(&leafIDHash, &merkleHash, &queueTimestamp, &queueID) + if err != nil { + klog.Warningf("Error scanning work rows: %s", err) + return nil, nil, err + } + + queueTimestampProto := timestamppb.New(time.Unix(0, queueTimestamp)) + if err := queueTimestampProto.CheckValid(); err != nil { + return nil, dequeuedLeaf{}, fmt.Errorf("got invalid queue timestamp: %w", err) + } + // Note: the LeafData and ExtraData being nil here is OK as this is only used by the + // sequencer. The sequencer only writes to the SequencedLeafData table and the client + // supplied data was already written to LeafData as part of queueing the leaf. + leaf := &trillian.LogLeaf{ + LeafIdentityHash: leafIDHash, + MerkleLeafHash: merkleHash, + QueueTimestamp: queueTimestampProto, + } + return leaf, dequeueInfo(leafIDHash, queueID), nil +} + +func generateQueueID(treeID int64, leafIdentityHash []byte, timestamp int64) []byte { + h := sha256.New() + b := make([]byte, 10) + binary.PutVarint(b, treeID) + h.Write(b) + b = make([]byte, 10) + binary.PutVarint(b, timestamp) + h.Write(b) + h.Write(leafIdentityHash) + return h.Sum(nil) +} + +func queueArgs(treeID int64, identityHash []byte, queueTimestamp time.Time) []interface{} { + timestamp := queueTimestamp.UnixNano() + return []interface{}{timestamp, generateQueueID(treeID, identityHash, timestamp)} +} + +func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf) error { + querySuffix := []string{} + args := []interface{}{} + dequeuedLeaves := make([]dequeuedLeaf, 0, len(leaves)) + for _, leaf := range leaves { + if err := leaf.IntegrateTimestamp.CheckValid(); err != nil { + return fmt.Errorf("got invalid integrate timestamp: %w", err) + } + iTimestamp := leaf.IntegrateTimestamp.AsTime() + querySuffix = append(querySuffix, valuesPlaceholder5) + args = append(args, t.treeID, leaf.LeafIdentityHash, leaf.MerkleLeafHash, leaf.LeafIndex, iTimestamp.UnixNano()) + qe, ok := t.dequeued[string(leaf.LeafIdentityHash)] + if !ok { + return fmt.Errorf("attempting to update leaf that wasn't dequeued. IdentityHash: %x", leaf.LeafIdentityHash) + } + dequeuedLeaves = append(dequeuedLeaves, qe) + } + result, err := t.tx.ExecContext(ctx, insertSequencedLeafSQL+strings.Join(querySuffix, ","), args...) + if err != nil { + klog.Warningf("Failed to update sequenced leaves: %s", err) + } + if err := checkResultOkAndRowCountIs(result, err, int64(len(leaves))); err != nil { + return err + } + + return t.removeSequencedLeaves(ctx, dequeuedLeaves) +} + +func (m *mySQLLogStorage) getDeleteUnsequencedStmt(ctx context.Context, num int) (*sql.Stmt, error) { + return m.getStmt(ctx, deleteUnsequencedSQL, num, "?", "?") +} + +// removeSequencedLeaves removes the passed in leaves slice (which may be +// modified as part of the operation). +func (t *logTreeTX) removeSequencedLeaves(ctx context.Context, queueIDs []dequeuedLeaf) error { + // Don't need to re-sort because the query ordered by leaf hash. If that changes because + // the query is expensive then the sort will need to be done here. See comment in + // QueueLeaves. + tmpl, err := t.ls.getDeleteUnsequencedStmt(ctx, len(queueIDs)) + if err != nil { + klog.Warningf("Failed to get delete statement for sequenced work: %s", err) + return err + } + stx := t.tx.StmtContext(ctx, tmpl) + args := make([]interface{}, len(queueIDs)) + for i, q := range queueIDs { + args[i] = []byte(q) + } + result, err := stx.ExecContext(ctx, args...) + if err != nil { + // Error is handled by checkResultOkAndRowCountIs() below + klog.Warningf("Failed to delete sequenced work: %s", err) + } + return checkResultOkAndRowCountIs(result, err, int64(len(queueIDs))) +} diff --git a/storage/mysql/schema/storage.sql b/storage/mysql/schema/storage.sql new file mode 100644 index 0000000000..0d571b24fa --- /dev/null +++ b/storage/mysql/schema/storage.sql @@ -0,0 +1,137 @@ +# MySQL / MariaDB version of the tree schema + +-- --------------------------------------------- +-- Tree stuff here +-- --------------------------------------------- + +-- Tree parameters should not be changed after creation. Doing so can +-- render the data in the tree unusable or inconsistent. +CREATE TABLE IF NOT EXISTS Trees( + TreeId BIGINT NOT NULL, + TreeState ENUM('ACTIVE', 'FROZEN', 'DRAINING') NOT NULL, + TreeType ENUM('LOG', 'MAP', 'PREORDERED_LOG') NOT NULL, + HashStrategy ENUM('RFC6962_SHA256', 'TEST_MAP_HASHER', 'OBJECT_RFC6962_SHA256', 'CONIKS_SHA512_256', 'CONIKS_SHA256') NOT NULL, + HashAlgorithm ENUM('SHA256') NOT NULL, + SignatureAlgorithm ENUM('ECDSA', 'RSA', 'ED25519') NOT NULL, + DisplayName VARCHAR(20), + Description VARCHAR(200), + CreateTimeMillis BIGINT NOT NULL, + UpdateTimeMillis BIGINT NOT NULL, + MaxRootDurationMillis BIGINT NOT NULL, + PrivateKey MEDIUMBLOB NOT NULL, -- Unused. + PublicKey MEDIUMBLOB NOT NULL, -- This is now used to store settings. + Deleted BOOLEAN, + DeleteTimeMillis BIGINT, + PRIMARY KEY(TreeId) +); + +-- This table contains tree parameters that can be changed at runtime such as for +-- administrative purposes. +CREATE TABLE IF NOT EXISTS TreeControl( + TreeId BIGINT NOT NULL, + SigningEnabled BOOLEAN NOT NULL, + SequencingEnabled BOOLEAN NOT NULL, + SequenceIntervalSeconds INTEGER NOT NULL, + PRIMARY KEY(TreeId), + FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS Subtree( + TreeId BIGINT NOT NULL, + SubtreeId VARBINARY(255) NOT NULL, + Nodes MEDIUMBLOB NOT NULL, + SubtreeRevision INTEGER NOT NULL, + -- Key columns must be in ASC order in order to benefit from group-by/min-max + -- optimization in MySQL. + PRIMARY KEY(TreeId, SubtreeId, SubtreeRevision), + FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE +); + +-- The TreeRevisionIdx is used to enforce that there is only one STH at any +-- tree revision +CREATE TABLE IF NOT EXISTS TreeHead( + TreeId BIGINT NOT NULL, + TreeHeadTimestamp BIGINT, + TreeSize BIGINT, + RootHash VARBINARY(255) NOT NULL, + RootSignature VARBINARY(1024) NOT NULL, + TreeRevision BIGINT, + PRIMARY KEY(TreeId, TreeHeadTimestamp), + FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE +); + +CREATE UNIQUE INDEX TreeHeadRevisionIdx + ON TreeHead(TreeId, TreeRevision); + +-- --------------------------------------------- +-- Log specific stuff here +-- --------------------------------------------- + +-- Creating index at same time as table allows some storage engines to better +-- optimize physical storage layout. Most engines allow multiple nulls in a +-- unique index but some may not. + +-- A leaf that has not been sequenced has a row in this table. If duplicate leaves +-- are allowed they will all reference this row. +CREATE TABLE IF NOT EXISTS LeafData( + TreeId BIGINT NOT NULL, + -- This is a personality specific has of some subset of the leaf data. + -- It's only purpose is to allow Trillian to identify duplicate entries in + -- the context of the personality. + LeafIdentityHash VARBINARY(255) NOT NULL, + -- This is the data stored in the leaf for example in CT it contains a DER encoded + -- X.509 certificate but is application dependent + LeafValue LONGBLOB NOT NULL, + -- This is extra data that the application can associate with the leaf should it wish to. + -- This data is not included in signing and hashing. + ExtraData LONGBLOB, + -- The timestamp from when this leaf data was first queued for inclusion. + QueueTimestampNanos BIGINT NOT NULL, + PRIMARY KEY(TreeId, LeafIdentityHash), + FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE +); + +-- When a leaf is sequenced a row is added to this table. If logs allow duplicates then +-- multiple rows will exist with different sequence numbers. The signed timestamp +-- will be communicated via the unsequenced table as this might need to be unique, depending +-- on the log parameters and we can't insert into this table until we have the sequence number +-- which is not available at the time we queue the entry. We need both hashes because the +-- LeafData table is keyed by the raw data hash. +CREATE TABLE IF NOT EXISTS SequencedLeafData( + TreeId BIGINT NOT NULL, + SequenceNumber BIGINT UNSIGNED NOT NULL, + -- This is a personality specific has of some subset of the leaf data. + -- It's only purpose is to allow Trillian to identify duplicate entries in + -- the context of the personality. + LeafIdentityHash VARBINARY(255) NOT NULL, + -- This is a MerkleLeafHash as defined by the treehasher that the log uses. For example for + -- CT this hash will include the leaf prefix byte as well as the leaf data. + MerkleLeafHash VARBINARY(255) NOT NULL, + IntegrateTimestampNanos BIGINT NOT NULL, + PRIMARY KEY(TreeId, SequenceNumber), + FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE, + FOREIGN KEY(TreeId, LeafIdentityHash) REFERENCES LeafData(TreeId, LeafIdentityHash) ON DELETE CASCADE +); + +CREATE INDEX SequencedLeafMerkleIdx + ON SequencedLeafData(TreeId, MerkleLeafHash); + +CREATE TABLE IF NOT EXISTS Unsequenced( + TreeId BIGINT NOT NULL, + -- The bucket field is to allow the use of time based ring bucketed schemes if desired. If + -- unused this should be set to zero for all entries. + Bucket INTEGER NOT NULL, + -- This is a personality specific hash of some subset of the leaf data. + -- It's only purpose is to allow Trillian to identify duplicate entries in + -- the context of the personality. + LeafIdentityHash VARBINARY(255) NOT NULL, + -- This is a MerkleLeafHash as defined by the treehasher that the log uses. For example for + -- CT this hash will include the leaf prefix byte as well as the leaf data. + MerkleLeafHash VARBINARY(255) NOT NULL, + QueueTimestampNanos BIGINT NOT NULL, + -- This is a SHA256 hash of the TreeID, LeafIdentityHash and QueueTimestampNanos. It is used + -- for batched deletes from the table when trillian_log_server and trillian_log_signer are + -- built with the batched_queue tag. + QueueID VARBINARY(32) DEFAULT NULL UNIQUE, + PRIMARY KEY (TreeId, Bucket, QueueTimestampNanos, LeafIdentityHash) +); diff --git a/storage/mysql/sql.go b/storage/mysql/sql.go new file mode 100644 index 0000000000..a48e2a554f --- /dev/null +++ b/storage/mysql/sql.go @@ -0,0 +1,157 @@ +// Copyright 2018 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "bytes" + "database/sql" + "encoding/gob" + "fmt" + "time" + + "github.com/google/trillian" + "github.com/google/trillian/storage/mysql/mysqlpb" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/timestamppb" +) + +// toMillisSinceEpoch converts a timestamp into milliseconds since epoch +func toMillisSinceEpoch(t time.Time) int64 { + return t.UnixNano() / 1000000 +} + +// fromMillisSinceEpoch converts +func fromMillisSinceEpoch(ts int64) time.Time { + return time.Unix(0, ts*1000000) +} + +// setNullStringIfValid assigns src to dest if src is Valid. +func setNullStringIfValid(src sql.NullString, dest *string) { + if src.Valid { + *dest = src.String + } +} + +// row defines a common interface between sql.Row and sql.Rows(!) +type row interface { + Scan(dest ...interface{}) error +} + +// readTree takes a sql row and returns a tree +func readTree(r row) (*trillian.Tree, error) { + tree := &trillian.Tree{} + + // Enums and Datetimes need an extra conversion step + var treeState, treeType, hashStrategy, hashAlgorithm, signatureAlgorithm string + var createMillis, updateMillis, maxRootDurationMillis int64 + var displayName, description sql.NullString + var privateKey, publicKey []byte + var deleted sql.NullBool + var deleteMillis sql.NullInt64 + err := r.Scan( + &tree.TreeId, + &treeState, + &treeType, + &hashStrategy, + &hashAlgorithm, + &signatureAlgorithm, + &displayName, + &description, + &createMillis, + &updateMillis, + &privateKey, + &publicKey, + &maxRootDurationMillis, + &deleted, + &deleteMillis, + ) + if err != nil { + return nil, err + } + + setNullStringIfValid(displayName, &tree.DisplayName) + setNullStringIfValid(description, &tree.Description) + + // Convert all things! + if ts, ok := trillian.TreeState_value[treeState]; ok { + tree.TreeState = trillian.TreeState(ts) + } else { + return nil, fmt.Errorf("unknown TreeState: %v", treeState) + } + if tt, ok := trillian.TreeType_value[treeType]; ok { + tree.TreeType = trillian.TreeType(tt) + } else { + return nil, fmt.Errorf("unknown TreeType: %v", treeType) + } + if hashStrategy != "RFC6962_SHA256" { + return nil, fmt.Errorf("unknown HashStrategy: %v", hashStrategy) + } + + // Let's make sure we didn't mismatch any of the casts above + ok := tree.TreeState.String() == treeState && + tree.TreeType.String() == treeType + if !ok { + return nil, fmt.Errorf( + "mismatched enum: tree = %v, enums = [%v, %v, %v, %v, %v]", + tree, + treeState, treeType, hashStrategy, hashAlgorithm, signatureAlgorithm) + } + + tree.CreateTime = timestamppb.New(fromMillisSinceEpoch(createMillis)) + if err := tree.CreateTime.CheckValid(); err != nil { + return nil, fmt.Errorf("failed to parse create time: %w", err) + } + tree.UpdateTime = timestamppb.New(fromMillisSinceEpoch(updateMillis)) + if err := tree.UpdateTime.CheckValid(); err != nil { + return nil, fmt.Errorf("failed to parse update time: %w", err) + } + tree.MaxRootDuration = durationpb.New(time.Duration(maxRootDurationMillis * int64(time.Millisecond))) + + tree.Deleted = deleted.Valid && deleted.Bool + if tree.Deleted && deleteMillis.Valid { + tree.DeleteTime = timestamppb.New(fromMillisSinceEpoch(deleteMillis.Int64)) + if err := tree.DeleteTime.CheckValid(); err != nil { + return nil, fmt.Errorf("failed to parse delete time: %w", err) + } + } + + // We're going to try to interpret PublicKey as storageSettings, but it could be a + // public key from a really old tree, or an empty column from a tree created in the + // period between Trillian key material being removed and this column being used for + // storing settings. + buff := bytes.NewBuffer(publicKey) + dec := gob.NewDecoder(buff) + ss := &storageSettings{} + var o *mysqlpb.StorageOptions + if err := dec.Decode(ss); err != nil { + // If there are no storageSettings then this tree was created before settings + // were supported, and thus we have to populate the settings with the oldest + // settings for features. + o = &mysqlpb.StorageOptions{ + SubtreeRevisions: true, + } + } else { + o = &mysqlpb.StorageOptions{ + SubtreeRevisions: ss.Revisioned, + } + } + tree.StorageSettings, err = anypb.New(o) + if err != nil { + return nil, fmt.Errorf("failed to put StorageSettings into tree: %w", err) + } + + return tree, nil +} diff --git a/storage/mysql/storage_test.go b/storage/mysql/storage_test.go new file mode 100644 index 0000000000..c08aa757f2 --- /dev/null +++ b/storage/mysql/storage_test.go @@ -0,0 +1,363 @@ +// Copyright 2016 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mysql + +import ( + "bytes" + "context" + "crypto" + "crypto/sha256" + "database/sql" + "errors" + "flag" + "fmt" + "os" + "testing" + "time" + + "github.com/google/trillian" + "github.com/google/trillian/storage" + "github.com/google/trillian/storage/mysql/mysqlpb" + "github.com/google/trillian/storage/testdb" + storageto "github.com/google/trillian/storage/testonly" + stree "github.com/google/trillian/storage/tree" + "github.com/google/trillian/types" + "github.com/transparency-dev/merkle/compact" + "github.com/transparency-dev/merkle/rfc6962" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/durationpb" + "k8s.io/klog/v2" +) + +var ( + // LogTree is a valid, LOG-type trillian.Tree for tests. + // This tree is configured to write revisions for each subtree. + // This matches the legacy behaviour before revisions were removed. + RevisionedLogTree = &trillian.Tree{ + TreeState: trillian.TreeState_ACTIVE, + TreeType: trillian.TreeType_LOG, + DisplayName: "Llamas Log", + Description: "Registry of publicly-owned llamas", + MaxRootDuration: durationpb.New(0 * time.Millisecond), + StorageSettings: mustCreateRevisionedStorage(), + } +) + +func mustCreateRevisionedStorage() *anypb.Any { + o := &mysqlpb.StorageOptions{ + SubtreeRevisions: true, + } + a, err := anypb.New(o) + if err != nil { + panic(err) + } + return a +} + +func TestNodeRoundTrip(t *testing.T) { + nodes := createSomeNodes(256) + nodeIDs := make([]compact.NodeID, len(nodes)) + for i := range nodes { + nodeIDs[i] = nodes[i].ID + } + + for _, tc := range []struct { + desc string + store []stree.Node + read []compact.NodeID + want []stree.Node + wantErr bool + }{ + {desc: "store-4-read-4", store: nodes[:4], read: nodeIDs[:4], want: nodes[:4]}, + {desc: "store-4-read-1", store: nodes[:4], read: nodeIDs[:1], want: nodes[:1]}, + {desc: "store-2-read-4", store: nodes[:2], read: nodeIDs[:4], want: nodes[:2]}, + {desc: "store-none-read-all", store: nil, read: nodeIDs, wantErr: true}, + {desc: "store-all-read-all", store: nodes, read: nodeIDs, want: nodes}, + {desc: "store-all-read-none", store: nodes, read: nil, want: nil}, + } { + testbody := func(treeDef *trillian.Tree) { + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, treeDef) + s := NewLogStorage(DB, nil) + + const writeRev = int64(100) + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + forceWriteRevision(writeRev, tx) + if err := tx.SetMerkleNodes(ctx, tc.store); err != nil { + t.Fatalf("Failed to store nodes: %s", err) + } + return storeLogRoot(ctx, tx, uint64(len(tc.store)), uint64(writeRev), []byte{1, 2, 3}) + }) + + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + readNodes, err := tx.GetMerkleNodes(ctx, tc.read) + if err != nil && !tc.wantErr { + t.Fatalf("Failed to retrieve nodes: %s", err) + } else if err == nil && tc.wantErr { + t.Fatal("Retrieving nodes succeeded unexpectedly") + } + if err := nodesAreEqual(readNodes, tc.want); err != nil { + t.Fatalf("Read back different nodes from the ones stored: %s", err) + } + return nil + }) + } + t.Run(tc.desc+"-norevisions", func(t *testing.T) { + testbody(storageto.LogTree) + }) + t.Run(tc.desc+"-revisions", func(t *testing.T) { + testbody(RevisionedLogTree) + }) + } +} + +// This test ensures that node writes cross subtree boundaries so this edge case in the subtree +// cache gets exercised. Any tree size > 256 will do this. +func TestLogNodeRoundTripMultiSubtree(t *testing.T) { + testCases := []struct { + desc string + tree *trillian.Tree + }{ + { + desc: "Revisionless", + tree: storageto.LogTree, + }, + { + desc: "Revisions", + tree: RevisionedLogTree, + }, + } + for _, tC := range testCases { + t.Run(tC.desc, func(t *testing.T) { + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, tC.tree) + s := NewLogStorage(DB, nil) + + const writeRev = int64(100) + const size = 871 + nodesToStore, err := createLogNodesForTreeAtSize(t, size, writeRev) + if err != nil { + t.Fatalf("failed to create test tree: %v", err) + } + nodeIDsToRead := make([]compact.NodeID, len(nodesToStore)) + for i := range nodesToStore { + nodeIDsToRead[i] = nodesToStore[i].ID + } + + { + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + forceWriteRevision(writeRev, tx) + if err := tx.SetMerkleNodes(ctx, nodesToStore); err != nil { + t.Fatalf("Failed to store nodes: %s", err) + } + return storeLogRoot(ctx, tx, uint64(size), uint64(writeRev), []byte{1, 2, 3}) + }) + } + + { + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + readNodes, err := tx.GetMerkleNodes(ctx, nodeIDsToRead) + if err != nil { + t.Fatalf("Failed to retrieve nodes: %s", err) + } + if err := nodesAreEqual(readNodes, nodesToStore); err != nil { + missing, extra := diffNodes(readNodes, nodesToStore) + for _, n := range missing { + t.Errorf("Missing: %v", n.ID) + } + for _, n := range extra { + t.Errorf("Extra : %v", n.ID) + } + t.Fatalf("Read back different nodes from the ones stored: %s", err) + } + return nil + }) + } + }) + } +} + +func forceWriteRevision(rev int64, tx storage.LogTreeTX) { + mtx, ok := tx.(*logTreeTX) + if !ok { + panic(nil) + } + mtx.treeTX.writeRevision = rev +} + +func createSomeNodes(count int) []stree.Node { + r := make([]stree.Node, count) + for i := range r { + r[i].ID = compact.NewNodeID(0, uint64(i)) + h := sha256.Sum256([]byte{byte(i)}) + r[i].Hash = h[:] + klog.V(3).Infof("Node to store: %v", r[i].ID) + } + return r +} + +func createLogNodesForTreeAtSize(t *testing.T, ts, rev int64) ([]stree.Node, error) { + hasher := rfc6962.New(crypto.SHA256) + fact := compact.RangeFactory{Hash: hasher.HashChildren} + cr := fact.NewEmptyRange(0) + + nodeMap := make(map[compact.NodeID][]byte) + store := func(id compact.NodeID, hash []byte) { nodeMap[id] = hash } + + for l := 0; l < int(ts); l++ { + hash := hasher.HashLeaf([]byte(fmt.Sprintf("Leaf %d", l))) + // Store the new leaf node, and all new perfect nodes. + if err := cr.Append(hash, store); err != nil { + return nil, err + } + } + + // Unroll the map, which has deduped the updates for us and retained the latest + nodes := make([]stree.Node, 0, len(nodeMap)) + for id, hash := range nodeMap { + nodes = append(nodes, stree.Node{ID: id, Hash: hash}) + } + return nodes, nil +} + +// TODO(pavelkalinnikov): Allow nodes to be out of order. +func nodesAreEqual(lhs, rhs []stree.Node) error { + if ls, rs := len(lhs), len(rhs); ls != rs { + return fmt.Errorf("different number of nodes, %d vs %d", ls, rs) + } + for i := range lhs { + if l, r := lhs[i].ID, rhs[i].ID; l != r { + return fmt.Errorf("NodeIDs are not the same,\nlhs = %v,\nrhs = %v", l, r) + } + if l, r := lhs[i].Hash, rhs[i].Hash; !bytes.Equal(l, r) { + return fmt.Errorf("Hashes are not the same for %v,\nlhs = %v,\nrhs = %v", lhs[i].ID, l, r) + } + } + return nil +} + +func diffNodes(got, want []stree.Node) ([]stree.Node, []stree.Node) { + var missing []stree.Node + gotMap := make(map[compact.NodeID]stree.Node) + for _, n := range got { + gotMap[n.ID] = n + } + for _, n := range want { + _, ok := gotMap[n.ID] + if !ok { + missing = append(missing, n) + } + delete(gotMap, n.ID) + } + // Unpack the extra nodes to return both as slices + extra := make([]stree.Node, 0, len(gotMap)) + for _, v := range gotMap { + extra = append(extra, v) + } + return missing, extra +} + +func openTestDBOrDie() (*sql.DB, func(context.Context)) { + db, done, err := testdb.NewTrillianDB(context.TODO(), testdb.DriverMySQL) + if err != nil { + panic(err) + } + return db, done +} + +// cleanTestDB deletes all the entries in the database. +func cleanTestDB(db *sql.DB) { + for _, table := range allTables { + if _, err := db.ExecContext(context.TODO(), fmt.Sprintf("DELETE FROM %s", table)); err != nil { + panic(fmt.Sprintf("Failed to delete rows in %s: %v", table, err)) + } + } +} + +func getVersion(db *sql.DB) (string, error) { + rows, err := db.QueryContext(context.TODO(), "SELECT @@GLOBAL.version") + if err != nil { + return "", fmt.Errorf("getVersion: failed to perform query: %v", err) + } + defer func() { _ = rows.Close() }() + if !rows.Next() { + return "", errors.New("getVersion: cursor has no rows") + } + var v string + if err := rows.Scan(&v); err != nil { + return "", err + } + if rows.Next() { + return "", errors.New("getVersion: too many rows returned") + } + return v, nil +} + +func mustSignAndStoreLogRoot(ctx context.Context, t *testing.T, l storage.LogStorage, tree *trillian.Tree, treeSize uint64) { + t.Helper() + if err := l.ReadWriteTransaction(ctx, tree, func(ctx context.Context, tx storage.LogTreeTX) error { + return storeLogRoot(ctx, tx, treeSize, 0, []byte{0}) + }); err != nil { + t.Fatalf("ReadWriteTransaction: %v", err) + } +} + +func storeLogRoot(ctx context.Context, tx storage.LogTreeTX, size, rev uint64, hash []byte) error { + logRoot, err := (&types.LogRootV1{TreeSize: size, RootHash: hash}).MarshalBinary() + if err != nil { + return fmt.Errorf("error marshaling new LogRoot: %v", err) + } + root := &trillian.SignedLogRoot{LogRoot: logRoot} + if err := tx.StoreSignedLogRoot(ctx, root); err != nil { + return fmt.Errorf("error storing new SignedLogRoot: %v", err) + } + return nil +} + +// mustCreateTree creates the specified tree using AdminStorage. +func mustCreateTree(ctx context.Context, t *testing.T, s storage.AdminStorage, tree *trillian.Tree) *trillian.Tree { + t.Helper() + tree, err := storage.CreateTree(ctx, s, tree) + if err != nil { + t.Fatalf("storage.CreateTree(): %v", err) + } + return tree +} + +// DB is the database used for tests. It's initialized and closed by TestMain(). +var DB *sql.DB + +func TestMain(m *testing.M) { + flag.Parse() + if !testdb.MySQLAvailable() { + klog.Errorf("MySQL not available, skipping all MySQL storage tests") + return + } + + var done func(context.Context) + + DB, done = openTestDBOrDie() + + if v, err := getVersion(DB); err == nil { + klog.Infof("MySQL version '%v'", v) + } + status := m.Run() + done(context.Background()) + os.Exit(status) +} diff --git a/storage/mysql/tree_storage.go b/storage/mysql/tree_storage.go new file mode 100644 index 0000000000..7fb5ed8d0d --- /dev/null +++ b/storage/mysql/tree_storage.go @@ -0,0 +1,436 @@ +// Copyright 2016 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mysql provides a MySQL-based storage layer implementation. +package mysql + +import ( + "context" + "database/sql" + "encoding/base64" + "fmt" + "runtime/debug" + "strings" + "sync" + + "github.com/google/trillian" + "github.com/google/trillian/storage/cache" + "github.com/google/trillian/storage/mysql/mysqlpb" + "github.com/google/trillian/storage/storagepb" + "github.com/google/trillian/storage/tree" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" + "k8s.io/klog/v2" +) + +// These statements are fixed +const ( + insertSubtreeMultiSQL = `INSERT INTO Subtree(TreeId, SubtreeId, Nodes, SubtreeRevision) ` + placeholderSQL + ` ON DUPLICATE KEY UPDATE Nodes=VALUES(Nodes)` + insertTreeHeadSQL = `INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) + VALUES(?,?,?,?,?,?)` + + selectSubtreeSQL = ` + SELECT x.SubtreeId, Subtree.Nodes + FROM ( + SELECT n.TreeId, n.SubtreeId, max(n.SubtreeRevision) AS MaxRevision + FROM Subtree n + WHERE n.SubtreeId IN (` + placeholderSQL + `) AND + n.TreeId = ? AND n.SubtreeRevision <= ? + GROUP BY n.TreeId, n.SubtreeId + ) AS x + INNER JOIN Subtree + ON Subtree.SubtreeId = x.SubtreeId + AND Subtree.SubtreeRevision = x.MaxRevision + AND Subtree.TreeId = x.TreeId + AND Subtree.TreeId = ?` + + selectSubtreeSQLNoRev = ` + SELECT SubtreeId, Subtree.Nodes + FROM Subtree + WHERE Subtree.TreeId = ? + AND SubtreeId IN (` + placeholderSQL + `)` + placeholderSQL = "" +) + +// mySQLTreeStorage is shared between the mySQLLog- and (forthcoming) mySQLMap- +// Storage implementations, and contains functionality which is common to both, +type mySQLTreeStorage struct { + db *sql.DB + + // Must hold the mutex before manipulating the statement map. Sharing a lock because + // it only needs to be held while the statements are built, not while they execute and + // this will be a short time. These maps are from the number of placeholder '?' + // in the query to the statement that should be used. + statementMutex sync.Mutex + statements map[string]map[int]*sql.Stmt +} + +// OpenDB opens a database connection for all MySQL-based storage implementations. +func OpenDB(dbURL string) (*sql.DB, error) { + db, err := sql.Open("mysql", dbURL) + if err != nil { + // Don't log uri as it could contain credentials + klog.Warningf("Could not open MySQL database, check config: %s", err) + return nil, err + } + + if _, err := db.ExecContext(context.TODO(), "SET sql_mode = 'STRICT_ALL_TABLES'"); err != nil { + klog.Warningf("Failed to set strict mode on mysql db: %s", err) + return nil, err + } + + return db, nil +} + +func newTreeStorage(db *sql.DB) *mySQLTreeStorage { + return &mySQLTreeStorage{ + db: db, + statements: make(map[string]map[int]*sql.Stmt), + } +} + +// expandPlaceholderSQL expands an sql statement by adding a specified number of '?' +// placeholder slots. At most one placeholder will be expanded. +func expandPlaceholderSQL(sql string, num int, first, rest string) string { + if num <= 0 { + panic(fmt.Errorf("trying to expand SQL placeholder with <= 0 parameters: %s", sql)) + } + + parameters := first + strings.Repeat(","+rest, num-1) + + return strings.Replace(sql, placeholderSQL, parameters, 1) +} + +// getStmt creates and caches sql.Stmt structs based on the passed in statement +// and number of bound arguments. +// TODO(al,martin): consider pulling this all out as a separate unit for reuse +// elsewhere. +func (m *mySQLTreeStorage) getStmt(ctx context.Context, statement string, num int, first, rest string) (*sql.Stmt, error) { + m.statementMutex.Lock() + defer m.statementMutex.Unlock() + + if m.statements[statement] != nil { + if m.statements[statement][num] != nil { + // TODO(al,martin): we'll possibly need to expire Stmts from the cache, + // e.g. when DB connections break etc. + return m.statements[statement][num], nil + } + } else { + m.statements[statement] = make(map[int]*sql.Stmt) + } + + s, err := m.db.PrepareContext(ctx, expandPlaceholderSQL(statement, num, first, rest)) + if err != nil { + klog.Warningf("Failed to prepare statement %d: %s", num, err) + return nil, err + } + + m.statements[statement][num] = s + + return s, nil +} + +func (m *mySQLTreeStorage) getSubtreeStmt(ctx context.Context, subtreeRevs bool, num int) (*sql.Stmt, error) { + if subtreeRevs { + return m.getStmt(ctx, selectSubtreeSQL, num, "?", "?") + } else { + return m.getStmt(ctx, selectSubtreeSQLNoRev, num, "?", "?") + } +} + +func (m *mySQLTreeStorage) setSubtreeStmt(ctx context.Context, num int) (*sql.Stmt, error) { + return m.getStmt(ctx, insertSubtreeMultiSQL, num, "VALUES(?, ?, ?, ?)", "(?, ?, ?, ?)") +} + +func (m *mySQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes int, subtreeCache *cache.SubtreeCache) (treeTX, error) { + t, err := m.db.BeginTx(ctx, nil /* opts */) + if err != nil { + klog.Warningf("Could not start tree TX: %s", err) + return treeTX{}, err + } + var subtreeRevisions bool + o := &mysqlpb.StorageOptions{} + if err := anypb.UnmarshalTo(tree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { + return treeTX{}, fmt.Errorf("failed to unmarshal StorageSettings: %v", err) + } + subtreeRevisions = o.SubtreeRevisions + return treeTX{ + tx: t, + mu: &sync.Mutex{}, + ts: m, + treeID: tree.TreeId, + treeType: tree.TreeType, + hashSizeBytes: hashSizeBytes, + subtreeCache: subtreeCache, + writeRevision: -1, + subtreeRevs: subtreeRevisions, + }, nil +} + +type treeTX struct { + // mu ensures that tx can only be used for one query/exec at a time. + mu *sync.Mutex + closed bool + tx *sql.Tx + ts *mySQLTreeStorage + treeID int64 + treeType trillian.TreeType + hashSizeBytes int + subtreeCache *cache.SubtreeCache + writeRevision int64 + subtreeRevs bool +} + +func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]byte) ([]*storagepb.SubtreeProto, error) { + klog.V(2).Infof("getSubtrees(len(ids)=%d)", len(ids)) + klog.V(4).Infof("getSubtrees(") + if len(ids) == 0 { + return nil, nil + } + + tmpl, err := t.ts.getSubtreeStmt(ctx, t.subtreeRevs, len(ids)) + if err != nil { + return nil, err + } + stx := t.tx.StmtContext(ctx, tmpl) + defer func() { + if err := stx.Close(); err != nil { + klog.Errorf("stx.Close(): %v", err) + } + }() + + var args []interface{} + if t.subtreeRevs { + args = make([]interface{}, 0, len(ids)+3) + // populate args with ids. + for _, id := range ids { + klog.V(4).Infof(" id: %x", id) + args = append(args, id) + } + args = append(args, t.treeID) + args = append(args, treeRevision) + args = append(args, t.treeID) + } else { + args = make([]interface{}, 0, len(ids)+1) + args = append(args, t.treeID) + + // populate args with ids. + for _, id := range ids { + klog.V(4).Infof(" id: %x", id) + args = append(args, id) + } + } + + rows, err := stx.QueryContext(ctx, args...) + if err != nil { + klog.Warningf("Failed to get merkle subtrees: %s", err) + return nil, err + } + defer func() { + if err := rows.Close(); err != nil { + klog.Errorf("rows.Close(): %v", err) + } + }() + + if rows.Err() != nil { + // Nothing from the DB + klog.Warningf("Nothing from DB: %s", rows.Err()) + return nil, rows.Err() + } + + ret := make([]*storagepb.SubtreeProto, 0, len(ids)) + + for rows.Next() { + var subtreeIDBytes []byte + var nodesRaw []byte + if err := rows.Scan(&subtreeIDBytes, &nodesRaw); err != nil { + klog.Warningf("Failed to scan merkle subtree: %s", err) + return nil, err + } + var subtree storagepb.SubtreeProto + if err := proto.Unmarshal(nodesRaw, &subtree); err != nil { + klog.Warningf("Failed to unmarshal SubtreeProto: %s", err) + return nil, err + } + if subtree.Prefix == nil { + subtree.Prefix = []byte{} + } + ret = append(ret, &subtree) + + if klog.V(4).Enabled() { + klog.Infof(" subtree: NID: %x, prefix: %x, depth: %d", + subtreeIDBytes, subtree.Prefix, subtree.Depth) + for k, v := range subtree.Leaves { + b, err := base64.StdEncoding.DecodeString(k) + if err != nil { + klog.Errorf("base64.DecodeString(%v): %v", k, err) + } + klog.Infof(" %x: %x", b, v) + } + } + } + + if err := rows.Err(); err != nil { + return nil, err + } + + // The InternalNodes cache is possibly nil here, but the SubtreeCache (which called + // this method) will re-populate it. + return ret, nil +} + +func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.SubtreeProto) error { + klog.V(2).Infof("storeSubtrees(len(subtrees)=%d)", len(subtrees)) + if klog.V(4).Enabled() { + klog.Infof("storeSubtrees(") + for _, s := range subtrees { + klog.Infof(" prefix: %x, depth: %d", s.Prefix, s.Depth) + for k, v := range s.Leaves { + b, err := base64.StdEncoding.DecodeString(k) + if err != nil { + klog.Errorf("base64.DecodeString(%v): %v", k, err) + } + klog.Infof(" %x: %x", b, v) + } + } + } + if len(subtrees) == 0 { + return nil + } + + // TODO(al): probably need to be able to batch this in the case where we have + // a really large number of subtrees to store. + args := make([]interface{}, 0, len(subtrees)) + + // If not using subtree revisions then default value of 0 is fine. There is no + // significance to this value, other than it cannot be NULL in the DB. + var subtreeRev int64 + if t.subtreeRevs { + // We're using subtree revisions, so ensure we write at the correct revision + subtreeRev = t.writeRevision + } + for _, s := range subtrees { + s := s + if s.Prefix == nil { + panic(fmt.Errorf("nil prefix on %v", s)) + } + subtreeBytes, err := proto.Marshal(s) + if err != nil { + return err + } + args = append(args, t.treeID) + args = append(args, s.Prefix) + args = append(args, subtreeBytes) + args = append(args, subtreeRev) + } + + tmpl, err := t.ts.setSubtreeStmt(ctx, len(subtrees)) + if err != nil { + return err + } + stx := t.tx.StmtContext(ctx, tmpl) + defer func() { + if err := stx.Close(); err != nil { + klog.Errorf("stx.Close(): %v", err) + } + }() + + r, err := stx.ExecContext(ctx, args...) + if err != nil { + klog.Warningf("Failed to set merkle subtrees: %s", err) + return err + } + _, _ = r.RowsAffected() + return nil +} + +func checkResultOkAndRowCountIs(res sql.Result, err error, count int64) error { + // The Exec() might have just failed + if err != nil { + return mysqlToGRPC(err) + } + + // Otherwise we have to look at the result of the operation + rowsAffected, rowsError := res.RowsAffected() + + if rowsError != nil { + return mysqlToGRPC(rowsError) + } + + if rowsAffected != count { + return fmt.Errorf("expected %d row(s) to be affected but saw: %d", count, + rowsAffected) + } + + return nil +} + +// getSubtreesAtRev returns a GetSubtreesFunc which reads at the passed in rev. +func (t *treeTX) getSubtreesAtRev(ctx context.Context, rev int64) cache.GetSubtreesFunc { + return func(ids [][]byte) ([]*storagepb.SubtreeProto, error) { + return t.getSubtrees(ctx, rev, ids) + } +} + +func (t *treeTX) SetMerkleNodes(ctx context.Context, nodes []tree.Node) error { + t.mu.Lock() + defer t.mu.Unlock() + rev := t.writeRevision - 1 + return t.subtreeCache.SetNodes(nodes, t.getSubtreesAtRev(ctx, rev)) +} + +func (t *treeTX) Commit(ctx context.Context) error { + t.mu.Lock() + defer t.mu.Unlock() + + if t.writeRevision > -1 { + tiles, err := t.subtreeCache.UpdatedTiles() + if err != nil { + klog.Warningf("SubtreeCache updated tiles error: %v", err) + return err + } + if err := t.storeSubtrees(ctx, tiles); err != nil { + klog.Warningf("TX commit flush error: %v", err) + return err + } + } + t.closed = true + if err := t.tx.Commit(); err != nil { + klog.Warningf("TX commit error: %s, stack:\n%s", err, string(debug.Stack())) + return err + } + return nil +} + +func (t *treeTX) rollbackInternal() error { + t.closed = true + if err := t.tx.Rollback(); err != nil { + klog.Warningf("TX rollback error: %s, stack:\n%s", err, string(debug.Stack())) + return err + } + return nil +} + +func (t *treeTX) Close() error { + t.mu.Lock() + defer t.mu.Unlock() + if t.closed { + return nil + } + err := t.rollbackInternal() + if err != nil { + klog.Warningf("Rollback error on Close(): %v", err) + } + return err +} diff --git a/storage/testdb/testdb.go b/storage/testdb/testdb.go new file mode 100644 index 0000000000..da3c0bc3df --- /dev/null +++ b/storage/testdb/testdb.go @@ -0,0 +1,301 @@ +// Copyright 2017 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package testdb creates new databases for tests. +package testdb + +import ( + "bytes" + "context" + "database/sql" + "fmt" + "log" + "net/url" + "os" + "strings" + "testing" + "time" + + "github.com/google/trillian/testonly" + "golang.org/x/sys/unix" + "k8s.io/klog/v2" + + _ "github.com/go-sql-driver/mysql" // mysql driver + _ "github.com/lib/pq" // postgres driver +) + +const ( + // MySQLURIEnv is the name of the ENV variable checked for the test MySQL + // instance URI to use. The value must have a trailing slash. + MySQLURIEnv = "TEST_MYSQL_URI" + + // Note: sql.Open requires the URI to end with a slash. + defaultTestMySQLURI = "root@tcp(127.0.0.1)/" + + // CockroachDBURIEnv is the name of the ENV variable checked for the test CockroachDB + // instance URI to use. The value must have a trailing slash. + CockroachDBURIEnv = "TEST_COCKROACHDB_URI" + + defaultTestCockroachDBURI = "postgres://root@localhost:26257/?sslmode=disable" +) + +type storageDriverInfo struct { + sqlDriverName string + schema string + uriFunc func(paths ...string) string +} + +var ( + trillianMySQLSchema = testonly.RelativeToPackage("../mysql/schema/storage.sql") + trillianCRDBSchema = testonly.RelativeToPackage("../crdb/schema/storage.sql") +) + +// DriverName is the name of a database driver. +type DriverName string + +const ( + // DriverMySQL is the identifier for the MySQL storage driver. + DriverMySQL DriverName = "mysql" + // DriverCockroachDB is the identifier for the CockroachDB storage driver. + DriverCockroachDB DriverName = "cockroachdb" +) + +var driverMapping = map[DriverName]storageDriverInfo{ + DriverMySQL: { + sqlDriverName: "mysql", + schema: trillianMySQLSchema, + uriFunc: mysqlURI, + }, + DriverCockroachDB: { + sqlDriverName: "postgres", + schema: trillianCRDBSchema, + uriFunc: crdbURI, + }, +} + +// mysqlURI returns the MySQL connection URI to use for tests. It returns the +// value in the ENV variable defined by MySQLURIEnv. If the value is empty, +// returns defaultTestMySQLURI. +// +// We use an ENV variable, rather than a flag, for flexibility. Only a subset +// of the tests in this repo require a database and import this package. With a +// flag, it would be necessary to distinguish "go test" invocations that need a +// database, and those that don't. ENV allows to "blanket apply" this setting. +func mysqlURI(dbRef ...string) string { + var stringurl string + if e := os.Getenv(MySQLURIEnv); len(e) > 0 { + stringurl = e + } else { + stringurl = defaultTestMySQLURI + } + + for _, ref := range dbRef { + separator := "/" + if strings.HasSuffix(stringurl, "/") { + separator = "" + } + stringurl = strings.Join([]string{stringurl, ref}, separator) + } + + return stringurl +} + +// crdbURI returns the CockroachDB connection URI to use for tests. It returns the +// value in the ENV variable defined by CockroachDBURIEnv. If the value is empty, +// returns defaultTestCockroachDBURI. +func crdbURI(dbRef ...string) string { + var uri *url.URL + if e := os.Getenv(CockroachDBURIEnv); len(e) > 0 { + uri = getURL(e) + } else { + uri = getURL(defaultTestCockroachDBURI) + } + + return addPathToURI(uri, dbRef...) +} + +func addPathToURI(uri *url.URL, paths ...string) string { + if len(paths) > 0 { + for _, ref := range paths { + currentPaths := uri.Path + // If the path is the root path, we don't want to append a slash. + if currentPaths == "/" { + currentPaths = "" + } + uri.Path = strings.Join([]string{currentPaths, ref}, "/") + } + } + return uri.String() +} + +func getURL(unparsedurl string) *url.URL { + //nolint:errcheck // We're not expecting an error here. + u, _ := url.Parse(unparsedurl) + return u +} + +// MySQLAvailable indicates whether the configured MySQL database is available. +func MySQLAvailable() bool { + return dbAvailable(DriverMySQL) +} + +// CockroachDBAvailable indicates whether the configured CockroachDB database is available. +func CockroachDBAvailable() bool { + return dbAvailable(DriverCockroachDB) +} + +func dbAvailable(driver DriverName) bool { + driverName := driverMapping[driver].sqlDriverName + uri := driverMapping[driver].uriFunc() + db, err := sql.Open(driverName, uri) + if err != nil { + log.Printf("sql.Open(): %v", err) + return false + } + defer func() { + if err := db.Close(); err != nil { + log.Printf("db.Close(): %v", err) + } + }() + if err := db.Ping(); err != nil { + log.Printf("db.Ping(): %v", err) + return false + } + return true +} + +// SetFDLimit sets the soft limit on the maximum number of open file descriptors. +// See http://man7.org/linux/man-pages/man2/setrlimit.2.html +func SetFDLimit(uLimit uint64) error { + var rLimit unix.Rlimit + if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rLimit); err != nil { + return err + } + if uLimit > rLimit.Max { + return fmt.Errorf("Could not set FD limit to %v. Must be less than the hard limit %v", uLimit, rLimit.Max) + } + rLimit.Cur = uLimit + return unix.Setrlimit(unix.RLIMIT_NOFILE, &rLimit) +} + +// newEmptyDB creates a new, empty database. +// It returns the database handle and a clean-up function, or an error. +// The returned clean-up function should be called once the caller is finished +// using the DB, the caller should not continue to use the returned DB after +// calling this function as it may, for example, delete the underlying +// instance. +func newEmptyDB(ctx context.Context, driver DriverName) (*sql.DB, func(context.Context), error) { + if err := SetFDLimit(2048); err != nil { + return nil, nil, err + } + + inf, gotinf := driverMapping[driver] + if !gotinf { + return nil, nil, fmt.Errorf("unknown driver %q", driver) + } + + db, err := sql.Open(inf.sqlDriverName, inf.uriFunc()) + if err != nil { + return nil, nil, err + } + + // Create a randomly-named database and then connect using the new name. + name := fmt.Sprintf("trl_%v", time.Now().UnixNano()) + + stmt := fmt.Sprintf("CREATE DATABASE %v", name) + if _, err := db.ExecContext(ctx, stmt); err != nil { + return nil, nil, fmt.Errorf("error running statement %q: %v", stmt, err) + } + + if err := db.Close(); err != nil { + return nil, nil, fmt.Errorf("failed to close DB: %v", err) + } + uri := inf.uriFunc(name) + db, err = sql.Open(inf.sqlDriverName, uri) + if err != nil { + return nil, nil, err + } + + done := func(ctx context.Context) { + defer func() { + if err := db.Close(); err != nil { + klog.Errorf("db.Close(): %v", err) + } + }() + if _, err := db.ExecContext(ctx, fmt.Sprintf("DROP DATABASE %v", name)); err != nil { + klog.Warningf("Failed to drop test database %q: %v", name, err) + } + } + + return db, done, db.Ping() +} + +// NewTrillianDB creates an empty database with the Trillian schema. The database name is randomly +// generated. +// NewTrillianDB is equivalent to Default().NewTrillianDB(ctx). +func NewTrillianDB(ctx context.Context, driver DriverName) (*sql.DB, func(context.Context), error) { + db, done, err := newEmptyDB(ctx, driver) + if err != nil { + return nil, nil, err + } + + schema := driverMapping[driver].schema + + sqlBytes, err := os.ReadFile(schema) + if err != nil { + return nil, nil, err + } + + for _, stmt := range strings.Split(sanitize(string(sqlBytes)), ";") { + stmt = strings.TrimSpace(stmt) + if stmt == "" { + continue + } + if _, err := db.ExecContext(ctx, stmt); err != nil { + return nil, nil, fmt.Errorf("error running statement %q: %v", stmt, err) + } + } + return db, done, nil +} + +func sanitize(script string) string { + buf := &bytes.Buffer{} + for _, line := range strings.Split(string(script), "\n") { + line = strings.TrimSpace(line) + if line == "" || line[0] == '#' || strings.Index(line, "--") == 0 { + continue // skip empty lines and comments + } + buf.WriteString(line) + buf.WriteString("\n") + } + return buf.String() +} + +// SkipIfNoMySQL is a test helper that skips tests that require a local MySQL. +func SkipIfNoMySQL(t *testing.T) { + t.Helper() + if !MySQLAvailable() { + t.Skip("Skipping test as MySQL not available") + } + t.Logf("Test MySQL available at %q", mysqlURI()) +} + +// SkipIfNoCockroachDB is a test helper that skips tests that require a local CockroachDB. +func SkipIfNoCockroachDB(t *testing.T) { + t.Helper() + if !CockroachDBAvailable() { + t.Skip("Skipping test as CockroachDB not available") + } + t.Logf("Test CockroachDB available at %q", crdbURI()) +} diff --git a/storage/testdb/testdb_test.go b/storage/testdb/testdb_test.go new file mode 100644 index 0000000000..dd0b520bd8 --- /dev/null +++ b/storage/testdb/testdb_test.go @@ -0,0 +1,27 @@ +// Copyright 2018 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testdb + +import ( + "testing" + + _ "k8s.io/klog/v2" +) + +func TestMySQLWarning(t *testing.T) { + if !MySQLAvailable() { + t.Error("Deliberate test failure as a reminder that all storage-related tests are being skipped due to absent MySQL") + } +} From 9f48d081ef0cb6c6abad5de11db23ce365b937a4 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:19:43 +0100 Subject: [PATCH 03/62] Replace 'MySQL' references with 'PostgreSQL', preserving case of each reference --- quota/postgresqlqm/postgresql_quota.go | 18 ++-- quota/postgresqlqm/postgresql_quota_test.go | 34 +++---- quota/postgresqlqm/quota_provider.go | 18 ++-- storage/postgresql/admin_storage.go | 32 +++---- storage/postgresql/admin_storage_test.go | 10 +- storage/postgresql/errors.go | 16 ++-- storage/postgresql/log_storage.go | 74 +++++++------- storage/postgresql/log_storage_test.go | 4 +- storage/postgresql/postgresqlpb/gen.go | 4 +- storage/postgresql/postgresqlpb/options.pb.go | 6 +- storage/postgresql/postgresqlpb/options.proto | 6 +- storage/postgresql/provider.go | 96 +++++++++---------- storage/postgresql/provider_test.go | 10 +- storage/postgresql/queue.go | 2 +- storage/postgresql/queue_batching.go | 4 +- storage/postgresql/schema/storage.sql | 4 +- storage/postgresql/sql.go | 10 +- storage/postgresql/storage_test.go | 14 +-- storage/postgresql/testdbpgx/testdbpgx.go | 50 +++++----- .../postgresql/testdbpgx/testdbpgx_test.go | 6 +- storage/postgresql/tree_storage.go | 38 ++++---- 21 files changed, 228 insertions(+), 228 deletions(-) diff --git a/quota/postgresqlqm/postgresql_quota.go b/quota/postgresqlqm/postgresql_quota.go index ca55613909..47922bcf8d 100644 --- a/quota/postgresqlqm/postgresql_quota.go +++ b/quota/postgresqlqm/postgresql_quota.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package mysqlqm defines a MySQL-based quota.Manager implementation. -package mysqlqm +// Package postgresqlqm defines a PostgreSQL-based quota.Manager implementation. +package postgresqlqm import ( "context" @@ -43,12 +43,12 @@ const ( // beyond the configured limit. var ErrTooManyUnsequencedRows = errors.New("too many unsequenced rows") -// QuotaManager is a MySQL-based quota.Manager implementation. +// QuotaManager is a PostgreSQL-based quota.Manager implementation. // // It has two working modes: one queries the information schema for the number of Unsequenced rows, // the other does a select count(*) on the Unsequenced table. Information schema queries are // default, even though they are approximate, as they're constant time (select count(*) on InnoDB -// based MySQL needs to traverse the index and may take quite a while to complete). +// based PostgreSQL needs to traverse the index and may take quite a while to complete). // // QuotaManager only implements Global/Write quotas, which is based on the number of Unsequenced // rows (to be exact, tokens = MaxUnsequencedRows - actualUnsequencedRows). @@ -99,7 +99,7 @@ func (m *QuotaManager) countUnsequenced(ctx context.Context) (int, error) { } func countFromInformationSchema(ctx context.Context, db *sql.DB) (int, error) { - // turn off statistics caching for MySQL 8 + // turn off statistics caching for PostgreSQL 8 if err := turnOffInformationSchemaCache(ctx, db); err != nil { return 0, err } @@ -135,10 +135,10 @@ func countFromTable(ctx context.Context, db *sql.DB) (int, error) { return count, nil } -// turnOffInformationSchemaCache turn off statistics caching for MySQL 8 +// turnOffInformationSchemaCache turn off statistics caching for PostgreSQL 8 // To always retrieve the latest statistics directly from the storage engine and bypass cached values, set information_schema_stats_expiry to 0. -// See https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_information_schema_stats_expiry -// MySQL versions prior to 8 will fail safely. +// See https://dev.postgresql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_information_schema_stats_expiry +// PostgreSQL versions prior to 8 will fail safely. func turnOffInformationSchemaCache(ctx context.Context, db *sql.DB) error { opt := "information_schema_stats_expiry" res := db.QueryRowContext(ctx, "SHOW VARIABLES LIKE '"+opt+"'") @@ -146,7 +146,7 @@ func turnOffInformationSchemaCache(ctx context.Context, db *sql.DB) error { var expiry int if err := res.Scan(&none, &expiry); err != nil { - // fail safely for all versions of MySQL prior to 8 + // fail safely for all versions of PostgreSQL prior to 8 if errors.Is(err, sql.ErrNoRows) { return nil } diff --git a/quota/postgresqlqm/postgresql_quota_test.go b/quota/postgresqlqm/postgresql_quota_test.go index dabfd131dd..8d03cd2778 100644 --- a/quota/postgresqlqm/postgresql_quota_test.go +++ b/quota/postgresqlqm/postgresql_quota_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysqlqm_test +package postgresqlqm_test import ( "context" @@ -24,9 +24,9 @@ import ( "github.com/google/trillian" "github.com/google/trillian/quota" - "github.com/google/trillian/quota/mysqlqm" + "github.com/google/trillian/quota/postgresqlqm" "github.com/google/trillian/storage" - "github.com/google/trillian/storage/mysql" + "github.com/google/trillian/storage/postgresql" "github.com/google/trillian/storage/testdb" "github.com/google/trillian/types" @@ -34,10 +34,10 @@ import ( ) func TestQuotaManager_GetTokens(t *testing.T) { - testdb.SkipIfNoMySQL(t) + testdb.SkipIfNoPostgreSQL(t) ctx := context.Background() - db, done, err := testdb.NewTrillianDB(ctx, testdb.DriverMySQL) + db, done, err := testdb.NewTrillianDB(ctx, testdb.DriverPostgreSQL) if err != nil { t.Fatalf("GetTestDB() returned err = %v", err) } @@ -106,16 +106,16 @@ func TestQuotaManager_GetTokens(t *testing.T) { // Test general cases using select count(*) to avoid flakiness / allow for more // precise assertions. // See TestQuotaManager_GetTokens_InformationSchema for information schema tests. - qm := &mysqlqm.QuotaManager{DB: db, MaxUnsequencedRows: test.maxUnsequencedRows, UseSelectCount: true} + qm := &postgresqlqm.QuotaManager{DB: db, MaxUnsequencedRows: test.maxUnsequencedRows, UseSelectCount: true} err := qm.GetTokens(ctx, test.numTokens, test.specs) - if hasErr := err == mysqlqm.ErrTooManyUnsequencedRows; hasErr != test.wantErr { + if hasErr := err == postgresqlqm.ErrTooManyUnsequencedRows; hasErr != test.wantErr { t.Errorf("%v: GetTokens() returned err = %q, wantErr = %v", test.desc, err, test.wantErr) } } } func TestQuotaManager_GetTokens_InformationSchema(t *testing.T) { - testdb.SkipIfNoMySQL(t) + testdb.SkipIfNoPostgreSQL(t) ctx := context.Background() maxUnsequenced := 20 @@ -131,7 +131,7 @@ func TestQuotaManager_GetTokens_InformationSchema(t *testing.T) { for _, test := range tests { desc := fmt.Sprintf("useSelectCount = %v", test.useSelectCount) t.Run(desc, func(t *testing.T) { - db, done, err := testdb.NewTrillianDB(ctx, testdb.DriverMySQL) + db, done, err := testdb.NewTrillianDB(ctx, testdb.DriverPostgreSQL) if err != nil { t.Fatalf("NewTrillianDB() returned err = %v", err) } @@ -142,7 +142,7 @@ func TestQuotaManager_GetTokens_InformationSchema(t *testing.T) { t.Fatalf("createTree() returned err = %v", err) } - qm := &mysqlqm.QuotaManager{DB: db, MaxUnsequencedRows: maxUnsequenced, UseSelectCount: test.useSelectCount} + qm := &postgresqlqm.QuotaManager{DB: db, MaxUnsequencedRows: maxUnsequenced, UseSelectCount: test.useSelectCount} // All GetTokens() calls where leaves < maxUnsequenced should succeed: // information_schema may be outdated, but it should refer to a valid point in the @@ -171,7 +171,7 @@ func TestQuotaManager_GetTokens_InformationSchema(t *testing.T) { stop = true default: // An error means that GetTokens is working correctly - stop = qm.GetTokens(ctx, 1 /* numTokens */, globalWriteSpec) == mysqlqm.ErrTooManyUnsequencedRows + stop = qm.GetTokens(ctx, 1 /* numTokens */, globalWriteSpec) == postgresqlqm.ErrTooManyUnsequencedRows } } }) @@ -179,16 +179,16 @@ func TestQuotaManager_GetTokens_InformationSchema(t *testing.T) { } func TestQuotaManager_Noops(t *testing.T) { - testdb.SkipIfNoMySQL(t) + testdb.SkipIfNoPostgreSQL(t) ctx := context.Background() - db, done, err := testdb.NewTrillianDB(ctx, testdb.DriverMySQL) + db, done, err := testdb.NewTrillianDB(ctx, testdb.DriverPostgreSQL) if err != nil { t.Fatalf("GetTestDB() returned err = %v", err) } defer done(ctx) - qm := &mysqlqm.QuotaManager{DB: db, MaxUnsequencedRows: 1000} + qm := &postgresqlqm.QuotaManager{DB: db, MaxUnsequencedRows: 1000} specs := allSpecs(ctx, qm, 10 /* treeID */) tests := []struct { @@ -238,7 +238,7 @@ func createTree(ctx context.Context, db *sql.DB) (*trillian.Tree, error) { var tree *trillian.Tree { - as := mysql.NewAdminStorage(db) + as := postgresql.NewAdminStorage(db) err := as.ReadWriteTransaction(ctx, func(ctx context.Context, tx storage.AdminTX) error { var err error tree, err = tx.CreateTree(ctx, stestonly.LogTree) @@ -250,7 +250,7 @@ func createTree(ctx context.Context, db *sql.DB) (*trillian.Tree, error) { } { - ls := mysql.NewLogStorage(db, nil) + ls := postgresql.NewLogStorage(db, nil) err := ls.ReadWriteTransaction(ctx, tree, func(ctx context.Context, tx storage.LogTreeTX) error { logRoot, err := (&types.LogRootV1{RootHash: []byte{0}}).MarshalBinary() if err != nil { @@ -286,7 +286,7 @@ func queueLeaves(ctx context.Context, db *sql.DB, tree *trillian.Tree, firstID, }) } - ls := mysql.NewLogStorage(db, nil) + ls := postgresql.NewLogStorage(db, nil) _, err := ls.QueueLeaves(ctx, tree, leaves, time.Now()) return err } diff --git a/quota/postgresqlqm/quota_provider.go b/quota/postgresqlqm/quota_provider.go index 008f13b6a0..e6d599d728 100644 --- a/quota/postgresqlqm/quota_provider.go +++ b/quota/postgresqlqm/quota_provider.go @@ -12,30 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysqlqm +package postgresqlqm import ( "flag" "github.com/google/trillian/quota" - "github.com/google/trillian/storage/mysql" + "github.com/google/trillian/storage/postgresql" "k8s.io/klog/v2" ) -// QuotaManagerName identifies the MySQL quota implementation. -const QuotaManagerName = "mysql" +// QuotaManagerName identifies the PostgreSQL quota implementation. +const QuotaManagerName = "postgresql" var maxUnsequencedRows = flag.Int("max_unsequenced_rows", DefaultMaxUnsequenced, "Max number of unsequenced rows before rate limiting kicks in. "+ - "Only effective for quota_system=mysql.") + "Only effective for quota_system=postgresql.") func init() { - if err := quota.RegisterProvider(QuotaManagerName, newMySQLQuotaManager); err != nil { + if err := quota.RegisterProvider(QuotaManagerName, newPostgreSQLQuotaManager); err != nil { klog.Fatalf("Failed to register quota manager %v: %v", QuotaManagerName, err) } } -func newMySQLQuotaManager() (quota.Manager, error) { - db, err := mysql.GetDatabase() +func newPostgreSQLQuotaManager() (quota.Manager, error) { + db, err := postgresql.GetDatabase() if err != nil { return nil, err } @@ -43,6 +43,6 @@ func newMySQLQuotaManager() (quota.Manager, error) { DB: db, MaxUnsequencedRows: *maxUnsequencedRows, } - klog.Info("Using MySQL QuotaManager") + klog.Info("Using PostgreSQL QuotaManager") return qm, nil } diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 0b267f6732..7c71de6981 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( "bytes" @@ -25,7 +25,7 @@ import ( "github.com/google/trillian" "github.com/google/trillian/storage" - "github.com/google/trillian/storage/mysql/mysqlpb" + "github.com/google/trillian/storage/postgresql/postgresqlpb" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" @@ -65,21 +65,21 @@ const ( WHERE TreeId = ?` ) -// NewAdminStorage returns a MySQL storage.AdminStorage implementation backed by DB. -func NewAdminStorage(db *sql.DB) *mysqlAdminStorage { - return &mysqlAdminStorage{db} +// NewAdminStorage returns a PostgreSQL storage.AdminStorage implementation backed by DB. +func NewAdminStorage(db *sql.DB) *postgresqlAdminStorage { + return &postgresqlAdminStorage{db} } -// mysqlAdminStorage implements storage.AdminStorage -type mysqlAdminStorage struct { +// postgresqlAdminStorage implements storage.AdminStorage +type postgresqlAdminStorage struct { db *sql.DB } -func (s *mysqlAdminStorage) Snapshot(ctx context.Context) (storage.ReadOnlyAdminTX, error) { +func (s *postgresqlAdminStorage) Snapshot(ctx context.Context) (storage.ReadOnlyAdminTX, error) { return s.beginInternal(ctx) } -func (s *mysqlAdminStorage) beginInternal(ctx context.Context) (storage.AdminTX, error) { +func (s *postgresqlAdminStorage) beginInternal(ctx context.Context) (storage.AdminTX, error) { tx, err := s.db.BeginTx(ctx, nil /* opts */) if err != nil { return nil, err @@ -87,7 +87,7 @@ func (s *mysqlAdminStorage) beginInternal(ctx context.Context) (storage.AdminTX, return &adminTX{tx: tx}, nil } -func (s *mysqlAdminStorage) ReadWriteTransaction(ctx context.Context, f storage.AdminTXFunc) error { +func (s *postgresqlAdminStorage) ReadWriteTransaction(ctx context.Context, f storage.AdminTXFunc) error { tx, err := s.beginInternal(ctx) if err != nil { return err @@ -103,7 +103,7 @@ func (s *mysqlAdminStorage) ReadWriteTransaction(ctx context.Context, f storage. return tx.Commit() } -func (s *mysqlAdminStorage) CheckDatabaseAccessible(ctx context.Context) error { +func (s *postgresqlAdminStorage) CheckDatabaseAccessible(ctx context.Context) error { return s.db.PingContext(ctx) } @@ -238,7 +238,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia if tree.StorageSettings != nil { newTree.StorageSettings = proto.Clone(tree.StorageSettings).(*anypb.Any) } else { - o := &mysqlpb.StorageOptions{ + o := &postgresqlpb.StorageOptions{ SubtreeRevisions: false, // Default behaviour for new trees is to skip writing subtree revisions. } a, err := anypb.New(o) @@ -247,7 +247,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia } newTree.StorageSettings = a } - o := &mysqlpb.StorageOptions{} + o := &postgresqlpb.StorageOptions{} if err := anypb.UnmarshalTo(newTree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { return nil, fmt.Errorf("failed to unmarshal StorageOptions: %v", err) } @@ -306,7 +306,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia return nil, err } - // MySQL silently truncates data when running in non-strict mode. + // PostgreSQL silently truncates data when running in non-strict mode. // We shouldn't be using non-strict modes, but let's guard against it // anyway. if _, err := t.GetTree(ctx, newTree.TreeId); err != nil { @@ -459,14 +459,14 @@ func validateDeleted(ctx context.Context, tx *sql.Tx, treeID int64, wantDeleted } func validateStorageSettings(tree *trillian.Tree) error { - if tree.StorageSettings.MessageIs(&mysqlpb.StorageOptions{}) { + if tree.StorageSettings.MessageIs(&postgresqlpb.StorageOptions{}) { return nil } if tree.StorageSettings == nil { // No storage settings is OK, we'll just use the defaults for new trees return nil } - return fmt.Errorf("storage_settings must be nil or mysqlpb.StorageOptions, but got %v", tree.StorageSettings) + return fmt.Errorf("storage_settings must be nil or postgresqlpb.StorageOptions, but got %v", tree.StorageSettings) } // storageSettings allows us to persist storage settings to the DB. diff --git a/storage/postgresql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go index d1a7cfd81d..7f613c1029 100644 --- a/storage/postgresql/admin_storage_test.go +++ b/storage/postgresql/admin_storage_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( "bytes" @@ -24,7 +24,7 @@ import ( "github.com/google/trillian" "github.com/google/trillian/storage" - "github.com/google/trillian/storage/mysql/mysqlpb" + "github.com/google/trillian/storage/postgresql/postgresqlpb" "github.com/google/trillian/storage/testonly" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -32,7 +32,7 @@ import ( const selectTreeControlByID = "SELECT SigningEnabled, SequencingEnabled, SequenceIntervalSeconds FROM TreeControl WHERE TreeId = ?" -func TestMysqlAdminStorage(t *testing.T) { +func TestPostgresqlAdminStorage(t *testing.T) { tester := &testonly.AdminStorageTester{NewAdminStorage: func() storage.AdminStorage { cleanTestDB(DB) return NewAdminStorage(DB) @@ -140,7 +140,7 @@ func TestAdminTX_StorageSettings(t *testing.T) { if err != nil { t.Fatalf("Error marshaling proto: %v", err) } - goodSettings, err := anypb.New(&mysqlpb.StorageOptions{}) + goodSettings, err := anypb.New(&postgresqlpb.StorageOptions{}) if err != nil { t.Fatalf("Error marshaling proto: %v", err) } @@ -270,7 +270,7 @@ func TestAdminTX_GetTreeLegacies(t *testing.T) { if err != nil { t.Fatal(err) } - o := &mysqlpb.StorageOptions{} + o := &postgresqlpb.StorageOptions{} if err := anypb.UnmarshalTo(readTree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { t.Fatal(err) } diff --git a/storage/postgresql/errors.go b/storage/postgresql/errors.go index e457fa40fc..0282e6ee86 100644 --- a/storage/postgresql/errors.go +++ b/storage/postgresql/errors.go @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( - "github.com/go-sql-driver/mysql" + "github.com/go-sql-driver/postgresql" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) @@ -27,22 +27,22 @@ const ( errNumDeadlock = 1213 ) -// mysqlToGRPC converts some types of MySQL errors to GRPC errors. This gives +// postgresqlToGRPC converts some types of PostgreSQL errors to GRPC errors. This gives // clients more signal when the operation can be retried. -func mysqlToGRPC(err error) error { - mysqlErr, ok := err.(*mysql.MySQLError) +func postgresqlToGRPC(err error) error { + postgresqlErr, ok := err.(*postgresql.PostgreSQLError) if !ok { return err } - if mysqlErr.Number == errNumDeadlock { - return status.Errorf(codes.Aborted, "MySQL: %v", mysqlErr) + if postgresqlErr.Number == errNumDeadlock { + return status.Errorf(codes.Aborted, "PostgreSQL: %v", postgresqlErr) } return err } func isDuplicateErr(err error) bool { switch err := err.(type) { - case *mysql.MySQLError: + case *postgresql.PostgreSQLError: return err.Number == errNumDuplicate default: return false diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index b3f10f7818..b98da3deba 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( "bytes" @@ -98,19 +98,19 @@ var ( ) func createMetrics(mf monitoring.MetricFactory) { - queuedCounter = mf.NewCounter("mysql_queued_leaves", "Number of leaves queued", logIDLabel) - queuedDupCounter = mf.NewCounter("mysql_queued_dup_leaves", "Number of duplicate leaves queued", logIDLabel) - dequeuedCounter = mf.NewCounter("mysql_dequeued_leaves", "Number of leaves dequeued", logIDLabel) + queuedCounter = mf.NewCounter("postgresql_queued_leaves", "Number of leaves queued", logIDLabel) + queuedDupCounter = mf.NewCounter("postgresql_queued_dup_leaves", "Number of duplicate leaves queued", logIDLabel) + dequeuedCounter = mf.NewCounter("postgresql_dequeued_leaves", "Number of leaves dequeued", logIDLabel) - queueLatency = mf.NewHistogram("mysql_queue_leaves_latency", "Latency of queue leaves operation in seconds", logIDLabel) - queueInsertLatency = mf.NewHistogram("mysql_queue_leaves_latency_insert", "Latency of insertion part of queue leaves operation in seconds", logIDLabel) - queueReadLatency = mf.NewHistogram("mysql_queue_leaves_latency_read_dups", "Latency of read-duplicates part of queue leaves operation in seconds", logIDLabel) - queueInsertLeafLatency = mf.NewHistogram("mysql_queue_leaf_latency_leaf", "Latency of insert-leaf part of queue (single) leaf operation in seconds", logIDLabel) - queueInsertEntryLatency = mf.NewHistogram("mysql_queue_leaf_latency_entry", "Latency of insert-entry part of queue (single) leaf operation in seconds", logIDLabel) + queueLatency = mf.NewHistogram("postgresql_queue_leaves_latency", "Latency of queue leaves operation in seconds", logIDLabel) + queueInsertLatency = mf.NewHistogram("postgresql_queue_leaves_latency_insert", "Latency of insertion part of queue leaves operation in seconds", logIDLabel) + queueReadLatency = mf.NewHistogram("postgresql_queue_leaves_latency_read_dups", "Latency of read-duplicates part of queue leaves operation in seconds", logIDLabel) + queueInsertLeafLatency = mf.NewHistogram("postgresql_queue_leaf_latency_leaf", "Latency of insert-leaf part of queue (single) leaf operation in seconds", logIDLabel) + queueInsertEntryLatency = mf.NewHistogram("postgresql_queue_leaf_latency_entry", "Latency of insert-entry part of queue (single) leaf operation in seconds", logIDLabel) - dequeueLatency = mf.NewHistogram("mysql_dequeue_leaves_latency", "Latency of dequeue leaves operation in seconds", logIDLabel) - dequeueSelectLatency = mf.NewHistogram("mysql_dequeue_leaves_latency_select", "Latency of selection part of dequeue leaves operation in seconds", logIDLabel) - dequeueRemoveLatency = mf.NewHistogram("mysql_dequeue_leaves_latency_remove", "Latency of removal part of dequeue leaves operation in seconds", logIDLabel) + dequeueLatency = mf.NewHistogram("postgresql_dequeue_leaves_latency", "Latency of dequeue leaves operation in seconds", logIDLabel) + dequeueSelectLatency = mf.NewHistogram("postgresql_dequeue_leaves_latency_select", "Latency of selection part of dequeue leaves operation in seconds", logIDLabel) + dequeueRemoveLatency = mf.NewHistogram("postgresql_dequeue_leaves_latency_remove", "Latency of removal part of dequeue leaves operation in seconds", logIDLabel) } func labelForTX(t *logTreeTX) string { @@ -121,30 +121,30 @@ func observe(hist monitoring.Histogram, duration time.Duration, label string) { hist.Observe(duration.Seconds(), label) } -type mySQLLogStorage struct { - *mySQLTreeStorage +type postgreSQLLogStorage struct { + *postgreSQLTreeStorage admin storage.AdminStorage metricFactory monitoring.MetricFactory } -// NewLogStorage creates a storage.LogStorage instance for the specified MySQL URL. -// It assumes storage.AdminStorage is backed by the same MySQL database as well. +// NewLogStorage creates a storage.LogStorage instance for the specified PostgreSQL URL. +// It assumes storage.AdminStorage is backed by the same PostgreSQL database as well. func NewLogStorage(db *sql.DB, mf monitoring.MetricFactory) storage.LogStorage { if mf == nil { mf = monitoring.InertMetricFactory{} } - return &mySQLLogStorage{ + return &postgreSQLLogStorage{ admin: NewAdminStorage(db), - mySQLTreeStorage: newTreeStorage(db), + postgreSQLTreeStorage: newTreeStorage(db), metricFactory: mf, } } -func (m *mySQLLogStorage) CheckDatabaseAccessible(ctx context.Context) error { +func (m *postgreSQLLogStorage) CheckDatabaseAccessible(ctx context.Context) error { return m.db.PingContext(ctx) } -func (m *mySQLLogStorage) getLeavesByMerkleHashStmt(ctx context.Context, num int, orderBySequence bool) (*sql.Stmt, error) { +func (m *postgreSQLLogStorage) getLeavesByMerkleHashStmt(ctx context.Context, num int, orderBySequence bool) (*sql.Stmt, error) { if orderBySequence { return m.getStmt(ctx, selectLeavesByMerkleHashOrderedBySequenceSQL, num, "?", "?") } @@ -152,11 +152,11 @@ func (m *mySQLLogStorage) getLeavesByMerkleHashStmt(ctx context.Context, num int return m.getStmt(ctx, selectLeavesByMerkleHashSQL, num, "?", "?") } -func (m *mySQLLogStorage) getLeavesByLeafIdentityHashStmt(ctx context.Context, num int) (*sql.Stmt, error) { +func (m *postgreSQLLogStorage) getLeavesByLeafIdentityHashStmt(ctx context.Context, num int) (*sql.Stmt, error) { return m.getStmt(ctx, selectLeavesByLeafIdentityHashSQL, num, "?", "?") } -func (m *mySQLLogStorage) GetActiveLogIDs(ctx context.Context) ([]int64, error) { +func (m *postgreSQLLogStorage) GetActiveLogIDs(ctx context.Context) ([]int64, error) { // Include logs that are DRAINING in the active list as we're still // integrating leaves into them. rows, err := m.db.QueryContext( @@ -182,7 +182,7 @@ func (m *mySQLLogStorage) GetActiveLogIDs(ctx context.Context) ([]int64, error) return ids, rows.Err() } -func (m *mySQLLogStorage) beginInternal(ctx context.Context, tree *trillian.Tree) (*logTreeTX, error) { +func (m *postgreSQLLogStorage) beginInternal(ctx context.Context, tree *trillian.Tree) (*logTreeTX, error) { once.Do(func() { createMetrics(m.metricFactory) }) @@ -224,7 +224,7 @@ func (m *mySQLLogStorage) beginInternal(ctx context.Context, tree *trillian.Tree // implementation can leak a specific sql.ErrTxDone all the way to the client, // if the transaction is rolled back as a result of a canceled context. It must // return "generic" errors, and only log the specific ones for debugging. -func (m *mySQLLogStorage) ReadWriteTransaction(ctx context.Context, tree *trillian.Tree, f storage.LogTXFunc) error { +func (m *postgreSQLLogStorage) ReadWriteTransaction(ctx context.Context, tree *trillian.Tree, f storage.LogTXFunc) error { tx, err := m.beginInternal(ctx, tree) if err != nil && err != storage.ErrTreeNeedsInit { return err @@ -240,7 +240,7 @@ func (m *mySQLLogStorage) ReadWriteTransaction(ctx context.Context, tree *trilli return tx.Commit(ctx) } -func (m *mySQLLogStorage) AddSequencedLeaves(ctx context.Context, tree *trillian.Tree, leaves []*trillian.LogLeaf, timestamp time.Time) ([]*trillian.QueuedLogLeaf, error) { +func (m *postgreSQLLogStorage) AddSequencedLeaves(ctx context.Context, tree *trillian.Tree, leaves []*trillian.LogLeaf, timestamp time.Time) ([]*trillian.QueuedLogLeaf, error) { tx, err := m.beginInternal(ctx, tree) if tx != nil { // Ensure we don't leak the transaction. For example if we get an @@ -265,7 +265,7 @@ func (m *mySQLLogStorage) AddSequencedLeaves(ctx context.Context, tree *trillian return res, nil } -func (m *mySQLLogStorage) SnapshotForTree(ctx context.Context, tree *trillian.Tree) (storage.ReadOnlyLogTreeTX, error) { +func (m *postgreSQLLogStorage) SnapshotForTree(ctx context.Context, tree *trillian.Tree) (storage.ReadOnlyLogTreeTX, error) { tx, err := m.beginInternal(ctx, tree) if err != nil && err != storage.ErrTreeNeedsInit { return nil, err @@ -273,7 +273,7 @@ func (m *mySQLLogStorage) SnapshotForTree(ctx context.Context, tree *trillian.Tr return tx, err } -func (m *mySQLLogStorage) QueueLeaves(ctx context.Context, tree *trillian.Tree, leaves []*trillian.LogLeaf, queueTimestamp time.Time) ([]*trillian.QueuedLogLeaf, error) { +func (m *postgreSQLLogStorage) QueueLeaves(ctx context.Context, tree *trillian.Tree, leaves []*trillian.LogLeaf, queueTimestamp time.Time) ([]*trillian.QueuedLogLeaf, error) { tx, err := m.beginInternal(ctx, tree) if tx != nil { // Ensure we don't leak the transaction. For example if we get an @@ -313,7 +313,7 @@ func (m *mySQLLogStorage) QueueLeaves(ctx context.Context, tree *trillian.Tree, type logTreeTX struct { treeTX - ls *mySQLLogStorage + ls *postgreSQLLogStorage root types.LogRootV1 readRev int64 slr *trillian.SignedLogRoot @@ -446,7 +446,7 @@ func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, } if err != nil { klog.Warningf("Error inserting %d into LeafData: %s", i, err) - return nil, mysqlToGRPC(err) + return nil, postgresqlToGRPC(err) } // Create the work queue entry @@ -463,7 +463,7 @@ func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, ) if err != nil { klog.Warningf("Error inserting into Unsequenced: %s", err) - return nil, mysqlToGRPC(err) + return nil, postgresqlToGRPC(err) } leafDuration := time.Since(leafStart) observe(queueInsertEntryLatency, (leafDuration - insertDuration), label) @@ -535,7 +535,7 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L const savepoint = "SAVEPOINT AddSequencedLeaves" if _, err := t.tx.ExecContext(ctx, savepoint); err != nil { klog.Errorf("Error adding savepoint: %s", err) - return nil, mysqlToGRPC(err) + return nil, postgresqlToGRPC(err) } // TODO(pavelkalinnikov): Consider performance implication of executing this // extra SAVEPOINT, especially for 1-entry batches. Optimize if necessary. @@ -556,7 +556,7 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L if _, err := t.tx.ExecContext(ctx, savepoint); err != nil { klog.Errorf("Error updating savepoint: %s", err) - return nil, mysqlToGRPC(err) + return nil, postgresqlToGRPC(err) } res[i] = &trillian.QueuedLogLeaf{Status: ok} @@ -573,7 +573,7 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L continue } else if err != nil { klog.Errorf("Error inserting leaves[%d] into LeafData: %s", i, err) - return nil, mysqlToGRPC(err) + return nil, postgresqlToGRPC(err) } _, err = t.tx.ExecContext(ctx, insertSequencedLeafSQL+valuesPlaceholder5, @@ -584,11 +584,11 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIndex").Proto() if _, err := t.tx.ExecContext(ctx, "ROLLBACK TO "+savepoint); err != nil { klog.Errorf("Error rolling back to savepoint: %s", err) - return nil, mysqlToGRPC(err) + return nil, postgresqlToGRPC(err) } } else if err != nil { klog.Errorf("Error inserting leaves[%d] into SequencedLeafData: %s", i, err) - return nil, mysqlToGRPC(err) + return nil, postgresqlToGRPC(err) } // TODO(pavelkalinnikov): Load LeafData for conflicting entries. @@ -596,7 +596,7 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L if _, err := t.tx.ExecContext(ctx, "RELEASE "+savepoint); err != nil { klog.Errorf("Error releasing savepoint: %s", err) - return nil, mysqlToGRPC(err) + return nil, postgresqlToGRPC(err) } return res, nil @@ -750,7 +750,7 @@ func (t *logTreeTX) StoreSignedLogRoot(ctx context.Context, root *trillian.Signe return err } if len(logRoot.Metadata) != 0 { - return fmt.Errorf("unimplemented: mysql storage does not support log root metadata") + return fmt.Errorf("unimplemented: postgresql storage does not support log root metadata") } res, err := t.tx.ExecContext( diff --git a/storage/postgresql/log_storage_test.go b/storage/postgresql/log_storage_test.go index b67035bd30..9d3f144241 100644 --- a/storage/postgresql/log_storage_test.go +++ b/storage/postgresql/log_storage_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( "bytes" @@ -34,7 +34,7 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" "k8s.io/klog/v2" - _ "github.com/go-sql-driver/mysql" + _ "github.com/go-sql-driver/postgresql" ) var allTables = []string{"Unsequenced", "TreeHead", "SequencedLeafData", "LeafData", "Subtree", "TreeControl", "Trees"} diff --git a/storage/postgresql/postgresqlpb/gen.go b/storage/postgresql/postgresqlpb/gen.go index 875c14cbf4..41d1119fb0 100644 --- a/storage/postgresql/postgresqlpb/gen.go +++ b/storage/postgresql/postgresqlpb/gen.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package mysqlpb contains protobuf definitions used by the mysql implementation. -package mysqlpb +// Package postgresqlpb contains protobuf definitions used by the postgresql implementation. +package postgresqlpb //go:generate protoc -I=. --go_out=paths=source_relative:. options.proto diff --git a/storage/postgresql/postgresqlpb/options.pb.go b/storage/postgresql/postgresqlpb/options.pb.go index 4f7ed42cda..76936064a7 100644 --- a/storage/postgresql/postgresqlpb/options.pb.go +++ b/storage/postgresql/postgresqlpb/options.pb.go @@ -18,7 +18,7 @@ // protoc v3.20.1 // source: options.proto -package mysqlpb +package postgresqlpb import ( protoreflect "google.golang.org/protobuf/reflect/protoreflect" @@ -34,7 +34,7 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) -// StorageOptions contains configuration parameters for MySQL implementation +// StorageOptions contains configuration parameters for PostgreSQL implementation // of the storage backend. This is envisioned only to be used for changes that // would be breaking, but need to support old behaviour for backwards compatibility. type StorageOptions struct { @@ -115,7 +115,7 @@ func file_options_proto_rawDescGZIP() []byte { var file_options_proto_msgTypes = make([]protoimpl.MessageInfo, 1) var file_options_proto_goTypes = []any{ - (*StorageOptions)(nil), // 0: mysqlpb.StorageOptions + (*StorageOptions)(nil), // 0: postgresqlpb.StorageOptions } var file_options_proto_depIdxs = []int32{ 0, // [0:0] is the sub-list for method output_type diff --git a/storage/postgresql/postgresqlpb/options.proto b/storage/postgresql/postgresqlpb/options.proto index 2ebdc670ae..96c5859d3b 100644 --- a/storage/postgresql/postgresqlpb/options.proto +++ b/storage/postgresql/postgresqlpb/options.proto @@ -13,11 +13,11 @@ // limitations under the License. syntax = "proto3"; -option go_package = "github.com/google/trillian/storage/mysql/mysqlpb"; +option go_package = "github.com/google/trillian/storage/postgresql/postgresqlpb"; -package mysqlpb; +package postgresqlpb; -// StorageOptions contains configuration parameters for MySQL implementation +// StorageOptions contains configuration parameters for PostgreSQL implementation // of the storage backend. This is envisioned only to be used for changes that // would be breaking, but need to support old behaviour for backwards compatibility. message StorageOptions { diff --git a/storage/postgresql/provider.go b/storage/postgresql/provider.go index 6c1a4c5e4e..054b9a0be1 100644 --- a/storage/postgresql/provider.go +++ b/storage/postgresql/provider.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( "crypto/tls" @@ -27,76 +27,76 @@ import ( "github.com/google/trillian/storage" "k8s.io/klog/v2" - // Load MySQL driver - "github.com/go-sql-driver/mysql" + // Load PostgreSQL driver + "github.com/go-sql-driver/postgresql" ) var ( - mySQLURI = flag.String("mysql_uri", "test:zaphod@tcp(127.0.0.1:3306)/test", "Connection URI for MySQL database") - maxConns = flag.Int("mysql_max_conns", 0, "Maximum connections to the database") - maxIdle = flag.Int("mysql_max_idle_conns", -1, "Maximum idle database connections in the connection pool") - mySQLTLSCA = flag.String("mysql_tls_ca", "", "Path to the CA certificate file for MySQL TLS connection ") - mySQLServerName = flag.String("mysql_server_name", "", "Name of the MySQL server to be used as the Server Name in the TLS configuration") + postgreSQLURI = flag.String("postgresql_uri", "test:zaphod@tcp(127.0.0.1:3306)/test", "Connection URI for PostgreSQL database") + maxConns = flag.Int("postgresql_max_conns", 0, "Maximum connections to the database") + maxIdle = flag.Int("postgresql_max_idle_conns", -1, "Maximum idle database connections in the connection pool") + postgreSQLTLSCA = flag.String("postgresql_tls_ca", "", "Path to the CA certificate file for PostgreSQL TLS connection ") + postgreSQLServerName = flag.String("postgresql_server_name", "", "Name of the PostgreSQL server to be used as the Server Name in the TLS configuration") - mysqlMu sync.Mutex - mysqlErr error - mysqlDB *sql.DB - mysqlStorageInstance *mysqlProvider + postgresqlMu sync.Mutex + postgresqlErr error + postgresqlDB *sql.DB + postgresqlStorageInstance *postgresqlProvider ) -// GetDatabase returns an instance of MySQL database, or creates one. +// GetDatabase returns an instance of PostgreSQL database, or creates one. // -// TODO(pavelkalinnikov): Make the dependency of MySQL quota provider from -// MySQL storage provider explicit. +// TODO(pavelkalinnikov): Make the dependency of PostgreSQL quota provider from +// PostgreSQL storage provider explicit. func GetDatabase() (*sql.DB, error) { - mysqlMu.Lock() - defer mysqlMu.Unlock() - return getMySQLDatabaseLocked() + postgresqlMu.Lock() + defer postgresqlMu.Unlock() + return getPostgreSQLDatabaseLocked() } func init() { - if err := storage.RegisterProvider("mysql", newMySQLStorageProvider); err != nil { - klog.Fatalf("Failed to register storage provider mysql: %v", err) + if err := storage.RegisterProvider("postgresql", newPostgreSQLStorageProvider); err != nil { + klog.Fatalf("Failed to register storage provider postgresql: %v", err) } } -type mysqlProvider struct { +type postgresqlProvider struct { db *sql.DB mf monitoring.MetricFactory } -func newMySQLStorageProvider(mf monitoring.MetricFactory) (storage.Provider, error) { - mysqlMu.Lock() - defer mysqlMu.Unlock() - if mysqlStorageInstance == nil { - db, err := getMySQLDatabaseLocked() +func newPostgreSQLStorageProvider(mf monitoring.MetricFactory) (storage.Provider, error) { + postgresqlMu.Lock() + defer postgresqlMu.Unlock() + if postgresqlStorageInstance == nil { + db, err := getPostgreSQLDatabaseLocked() if err != nil { return nil, err } - mysqlStorageInstance = &mysqlProvider{ + postgresqlStorageInstance = &postgresqlProvider{ db: db, mf: mf, } } - return mysqlStorageInstance, nil + return postgresqlStorageInstance, nil } -// getMySQLDatabaseLocked returns an instance of MySQL database, or creates -// one. Requires mysqlMu to be locked. -func getMySQLDatabaseLocked() (*sql.DB, error) { - if mysqlDB != nil || mysqlErr != nil { - return mysqlDB, mysqlErr +// getPostgreSQLDatabaseLocked returns an instance of PostgreSQL database, or creates +// one. Requires postgresqlMu to be locked. +func getPostgreSQLDatabaseLocked() (*sql.DB, error) { + if postgresqlDB != nil || postgresqlErr != nil { + return postgresqlDB, postgresqlErr } - dsn := *mySQLURI - if *mySQLTLSCA != "" { - if err := registerMySQLTLSConfig(); err != nil { + dsn := *postgreSQLURI + if *postgreSQLTLSCA != "" { + if err := registerPostgreSQLTLSConfig(); err != nil { return nil, err } dsn += "?tls=custom" } db, err := OpenDB(dsn) if err != nil { - mysqlErr = err + postgresqlErr = err return nil, err } if *maxConns > 0 { @@ -105,30 +105,30 @@ func getMySQLDatabaseLocked() (*sql.DB, error) { if *maxIdle >= 0 { db.SetMaxIdleConns(*maxIdle) } - mysqlDB, mysqlErr = db, nil + postgresqlDB, postgresqlErr = db, nil return db, nil } -func (s *mysqlProvider) LogStorage() storage.LogStorage { +func (s *postgresqlProvider) LogStorage() storage.LogStorage { return NewLogStorage(s.db, s.mf) } -func (s *mysqlProvider) AdminStorage() storage.AdminStorage { +func (s *postgresqlProvider) AdminStorage() storage.AdminStorage { return NewAdminStorage(s.db) } -func (s *mysqlProvider) Close() error { +func (s *postgresqlProvider) Close() error { return s.db.Close() } -// registerMySQLTLSConfig registers a custom TLS config for MySQL using a provided CA certificate and optional server name. +// registerPostgreSQLTLSConfig registers a custom TLS config for PostgreSQL using a provided CA certificate and optional server name. // Returns an error if the CA certificate can't be read or added to the root cert pool, or when the registration of the TLS config fails. -func registerMySQLTLSConfig() error { - if *mySQLTLSCA == "" { +func registerPostgreSQLTLSConfig() error { + if *postgreSQLTLSCA == "" { return nil } rootCertPool := x509.NewCertPool() - pem, err := os.ReadFile(*mySQLTLSCA) + pem, err := os.ReadFile(*postgreSQLTLSCA) if err != nil { return err } @@ -138,8 +138,8 @@ func registerMySQLTLSConfig() error { tlsConfig := &tls.Config{ RootCAs: rootCertPool, } - if *mySQLServerName != "" { - tlsConfig.ServerName = *mySQLServerName + if *postgreSQLServerName != "" { + tlsConfig.ServerName = *postgreSQLServerName } - return mysql.RegisterTLSConfig("custom", tlsConfig) + return postgresql.RegisterTLSConfig("custom", tlsConfig) } diff --git a/storage/postgresql/provider_test.go b/storage/postgresql/provider_test.go index ad20f408b6..15552437b0 100644 --- a/storage/postgresql/provider_test.go +++ b/storage/postgresql/provider_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( "flag" @@ -22,20 +22,20 @@ import ( "github.com/google/trillian/testonly/flagsaver" ) -func TestMySQLStorageProviderErrorPersistence(t *testing.T) { +func TestPostgreSQLStorageProviderErrorPersistence(t *testing.T) { defer flagsaver.Save().MustRestore() - if err := flag.Set("mysql_uri", "&bogus*:::?"); err != nil { + if err := flag.Set("postgresql_uri", "&bogus*:::?"); err != nil { t.Errorf("Failed to set flag: %v", err) } // First call: This should fail due to the Database URL being garbage. - _, err1 := storage.NewProvider("mysql", nil) + _, err1 := storage.NewProvider("postgresql", nil) if err1 == nil { t.Fatalf("Expected 'storage.NewProvider' to fail") } // Second call: This should fail with the same error. - _, err2 := storage.NewProvider("mysql", nil) + _, err2 := storage.NewProvider("postgresql", nil) if err2 == nil { t.Fatalf("Expected second call to 'storage.NewProvider' to fail") } diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index 53ff1071a6..e5d3436864 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -15,7 +15,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( "context" diff --git a/storage/postgresql/queue_batching.go b/storage/postgresql/queue_batching.go index d5317aa197..0c36c6c135 100644 --- a/storage/postgresql/queue_batching.go +++ b/storage/postgresql/queue_batching.go @@ -15,7 +15,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( "context" @@ -121,7 +121,7 @@ func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillia return t.removeSequencedLeaves(ctx, dequeuedLeaves) } -func (m *mySQLLogStorage) getDeleteUnsequencedStmt(ctx context.Context, num int) (*sql.Stmt, error) { +func (m *postgreSQLLogStorage) getDeleteUnsequencedStmt(ctx context.Context, num int) (*sql.Stmt, error) { return m.getStmt(ctx, deleteUnsequencedSQL, num, "?", "?") } diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 0d571b24fa..e951876575 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -1,4 +1,4 @@ -# MySQL / MariaDB version of the tree schema +# PostgreSQL / MariaDB version of the tree schema -- --------------------------------------------- -- Tree stuff here @@ -42,7 +42,7 @@ CREATE TABLE IF NOT EXISTS Subtree( Nodes MEDIUMBLOB NOT NULL, SubtreeRevision INTEGER NOT NULL, -- Key columns must be in ASC order in order to benefit from group-by/min-max - -- optimization in MySQL. + -- optimization in PostgreSQL. PRIMARY KEY(TreeId, SubtreeId, SubtreeRevision), FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE ); diff --git a/storage/postgresql/sql.go b/storage/postgresql/sql.go index a48e2a554f..271f4fd5f0 100644 --- a/storage/postgresql/sql.go +++ b/storage/postgresql/sql.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( "bytes" @@ -22,7 +22,7 @@ import ( "time" "github.com/google/trillian" - "github.com/google/trillian/storage/mysql/mysqlpb" + "github.com/google/trillian/storage/postgresql/postgresqlpb" "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" @@ -135,16 +135,16 @@ func readTree(r row) (*trillian.Tree, error) { buff := bytes.NewBuffer(publicKey) dec := gob.NewDecoder(buff) ss := &storageSettings{} - var o *mysqlpb.StorageOptions + var o *postgresqlpb.StorageOptions if err := dec.Decode(ss); err != nil { // If there are no storageSettings then this tree was created before settings // were supported, and thus we have to populate the settings with the oldest // settings for features. - o = &mysqlpb.StorageOptions{ + o = &postgresqlpb.StorageOptions{ SubtreeRevisions: true, } } else { - o = &mysqlpb.StorageOptions{ + o = &postgresqlpb.StorageOptions{ SubtreeRevisions: ss.Revisioned, } } diff --git a/storage/postgresql/storage_test.go b/storage/postgresql/storage_test.go index c08aa757f2..a5b7e91b9f 100644 --- a/storage/postgresql/storage_test.go +++ b/storage/postgresql/storage_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package mysql +package postgresql import ( "bytes" @@ -29,7 +29,7 @@ import ( "github.com/google/trillian" "github.com/google/trillian/storage" - "github.com/google/trillian/storage/mysql/mysqlpb" + "github.com/google/trillian/storage/postgresql/postgresqlpb" "github.com/google/trillian/storage/testdb" storageto "github.com/google/trillian/storage/testonly" stree "github.com/google/trillian/storage/tree" @@ -56,7 +56,7 @@ var ( ) func mustCreateRevisionedStorage() *anypb.Any { - o := &mysqlpb.StorageOptions{ + o := &postgresqlpb.StorageOptions{ SubtreeRevisions: true, } a, err := anypb.New(o) @@ -274,7 +274,7 @@ func diffNodes(got, want []stree.Node) ([]stree.Node, []stree.Node) { } func openTestDBOrDie() (*sql.DB, func(context.Context)) { - db, done, err := testdb.NewTrillianDB(context.TODO(), testdb.DriverMySQL) + db, done, err := testdb.NewTrillianDB(context.TODO(), testdb.DriverPostgreSQL) if err != nil { panic(err) } @@ -345,8 +345,8 @@ var DB *sql.DB func TestMain(m *testing.M) { flag.Parse() - if !testdb.MySQLAvailable() { - klog.Errorf("MySQL not available, skipping all MySQL storage tests") + if !testdb.PostgreSQLAvailable() { + klog.Errorf("PostgreSQL not available, skipping all PostgreSQL storage tests") return } @@ -355,7 +355,7 @@ func TestMain(m *testing.M) { DB, done = openTestDBOrDie() if v, err := getVersion(DB); err == nil { - klog.Infof("MySQL version '%v'", v) + klog.Infof("PostgreSQL version '%v'", v) } status := m.Run() done(context.Background()) diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index da3c0bc3df..40fb210aa5 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -31,17 +31,17 @@ import ( "golang.org/x/sys/unix" "k8s.io/klog/v2" - _ "github.com/go-sql-driver/mysql" // mysql driver + _ "github.com/go-sql-driver/postgresql" // postgresql driver _ "github.com/lib/pq" // postgres driver ) const ( - // MySQLURIEnv is the name of the ENV variable checked for the test MySQL + // PostgreSQLURIEnv is the name of the ENV variable checked for the test PostgreSQL // instance URI to use. The value must have a trailing slash. - MySQLURIEnv = "TEST_MYSQL_URI" + PostgreSQLURIEnv = "TEST_POSTGRESQL_URI" // Note: sql.Open requires the URI to end with a slash. - defaultTestMySQLURI = "root@tcp(127.0.0.1)/" + defaultTestPostgreSQLURI = "root@tcp(127.0.0.1)/" // CockroachDBURIEnv is the name of the ENV variable checked for the test CockroachDB // instance URI to use. The value must have a trailing slash. @@ -57,7 +57,7 @@ type storageDriverInfo struct { } var ( - trillianMySQLSchema = testonly.RelativeToPackage("../mysql/schema/storage.sql") + trillianPostgreSQLSchema = testonly.RelativeToPackage("../postgresql/schema/storage.sql") trillianCRDBSchema = testonly.RelativeToPackage("../crdb/schema/storage.sql") ) @@ -65,17 +65,17 @@ var ( type DriverName string const ( - // DriverMySQL is the identifier for the MySQL storage driver. - DriverMySQL DriverName = "mysql" + // DriverPostgreSQL is the identifier for the PostgreSQL storage driver. + DriverPostgreSQL DriverName = "postgresql" // DriverCockroachDB is the identifier for the CockroachDB storage driver. DriverCockroachDB DriverName = "cockroachdb" ) var driverMapping = map[DriverName]storageDriverInfo{ - DriverMySQL: { - sqlDriverName: "mysql", - schema: trillianMySQLSchema, - uriFunc: mysqlURI, + DriverPostgreSQL: { + sqlDriverName: "postgresql", + schema: trillianPostgreSQLSchema, + uriFunc: postgresqlURI, }, DriverCockroachDB: { sqlDriverName: "postgres", @@ -84,20 +84,20 @@ var driverMapping = map[DriverName]storageDriverInfo{ }, } -// mysqlURI returns the MySQL connection URI to use for tests. It returns the -// value in the ENV variable defined by MySQLURIEnv. If the value is empty, -// returns defaultTestMySQLURI. +// postgresqlURI returns the PostgreSQL connection URI to use for tests. It returns the +// value in the ENV variable defined by PostgreSQLURIEnv. If the value is empty, +// returns defaultTestPostgreSQLURI. // // We use an ENV variable, rather than a flag, for flexibility. Only a subset // of the tests in this repo require a database and import this package. With a // flag, it would be necessary to distinguish "go test" invocations that need a // database, and those that don't. ENV allows to "blanket apply" this setting. -func mysqlURI(dbRef ...string) string { +func postgresqlURI(dbRef ...string) string { var stringurl string - if e := os.Getenv(MySQLURIEnv); len(e) > 0 { + if e := os.Getenv(PostgreSQLURIEnv); len(e) > 0 { stringurl = e } else { - stringurl = defaultTestMySQLURI + stringurl = defaultTestPostgreSQLURI } for _, ref := range dbRef { @@ -145,9 +145,9 @@ func getURL(unparsedurl string) *url.URL { return u } -// MySQLAvailable indicates whether the configured MySQL database is available. -func MySQLAvailable() bool { - return dbAvailable(DriverMySQL) +// PostgreSQLAvailable indicates whether the configured PostgreSQL database is available. +func PostgreSQLAvailable() bool { + return dbAvailable(DriverPostgreSQL) } // CockroachDBAvailable indicates whether the configured CockroachDB database is available. @@ -282,13 +282,13 @@ func sanitize(script string) string { return buf.String() } -// SkipIfNoMySQL is a test helper that skips tests that require a local MySQL. -func SkipIfNoMySQL(t *testing.T) { +// SkipIfNoPostgreSQL is a test helper that skips tests that require a local PostgreSQL. +func SkipIfNoPostgreSQL(t *testing.T) { t.Helper() - if !MySQLAvailable() { - t.Skip("Skipping test as MySQL not available") + if !PostgreSQLAvailable() { + t.Skip("Skipping test as PostgreSQL not available") } - t.Logf("Test MySQL available at %q", mysqlURI()) + t.Logf("Test PostgreSQL available at %q", postgresqlURI()) } // SkipIfNoCockroachDB is a test helper that skips tests that require a local CockroachDB. diff --git a/storage/postgresql/testdbpgx/testdbpgx_test.go b/storage/postgresql/testdbpgx/testdbpgx_test.go index dd0b520bd8..cf2b72a549 100644 --- a/storage/postgresql/testdbpgx/testdbpgx_test.go +++ b/storage/postgresql/testdbpgx/testdbpgx_test.go @@ -20,8 +20,8 @@ import ( _ "k8s.io/klog/v2" ) -func TestMySQLWarning(t *testing.T) { - if !MySQLAvailable() { - t.Error("Deliberate test failure as a reminder that all storage-related tests are being skipped due to absent MySQL") +func TestPostgreSQLWarning(t *testing.T) { + if !PostgreSQLAvailable() { + t.Error("Deliberate test failure as a reminder that all storage-related tests are being skipped due to absent PostgreSQL") } } diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 7fb5ed8d0d..0c120aed05 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package mysql provides a MySQL-based storage layer implementation. -package mysql +// Package postgresql provides a PostgreSQL-based storage layer implementation. +package postgresql import ( "context" @@ -26,7 +26,7 @@ import ( "github.com/google/trillian" "github.com/google/trillian/storage/cache" - "github.com/google/trillian/storage/mysql/mysqlpb" + "github.com/google/trillian/storage/postgresql/postgresqlpb" "github.com/google/trillian/storage/storagepb" "github.com/google/trillian/storage/tree" "google.golang.org/protobuf/proto" @@ -63,9 +63,9 @@ const ( placeholderSQL = "" ) -// mySQLTreeStorage is shared between the mySQLLog- and (forthcoming) mySQLMap- +// postgreSQLTreeStorage is shared between the postgreSQLLog- and (forthcoming) postgreSQLMap- // Storage implementations, and contains functionality which is common to both, -type mySQLTreeStorage struct { +type postgreSQLTreeStorage struct { db *sql.DB // Must hold the mutex before manipulating the statement map. Sharing a lock because @@ -76,25 +76,25 @@ type mySQLTreeStorage struct { statements map[string]map[int]*sql.Stmt } -// OpenDB opens a database connection for all MySQL-based storage implementations. +// OpenDB opens a database connection for all PostgreSQL-based storage implementations. func OpenDB(dbURL string) (*sql.DB, error) { - db, err := sql.Open("mysql", dbURL) + db, err := sql.Open("postgresql", dbURL) if err != nil { // Don't log uri as it could contain credentials - klog.Warningf("Could not open MySQL database, check config: %s", err) + klog.Warningf("Could not open PostgreSQL database, check config: %s", err) return nil, err } if _, err := db.ExecContext(context.TODO(), "SET sql_mode = 'STRICT_ALL_TABLES'"); err != nil { - klog.Warningf("Failed to set strict mode on mysql db: %s", err) + klog.Warningf("Failed to set strict mode on postgresql db: %s", err) return nil, err } return db, nil } -func newTreeStorage(db *sql.DB) *mySQLTreeStorage { - return &mySQLTreeStorage{ +func newTreeStorage(db *sql.DB) *postgreSQLTreeStorage { + return &postgreSQLTreeStorage{ db: db, statements: make(map[string]map[int]*sql.Stmt), } @@ -116,7 +116,7 @@ func expandPlaceholderSQL(sql string, num int, first, rest string) string { // and number of bound arguments. // TODO(al,martin): consider pulling this all out as a separate unit for reuse // elsewhere. -func (m *mySQLTreeStorage) getStmt(ctx context.Context, statement string, num int, first, rest string) (*sql.Stmt, error) { +func (m *postgreSQLTreeStorage) getStmt(ctx context.Context, statement string, num int, first, rest string) (*sql.Stmt, error) { m.statementMutex.Lock() defer m.statementMutex.Unlock() @@ -141,7 +141,7 @@ func (m *mySQLTreeStorage) getStmt(ctx context.Context, statement string, num in return s, nil } -func (m *mySQLTreeStorage) getSubtreeStmt(ctx context.Context, subtreeRevs bool, num int) (*sql.Stmt, error) { +func (m *postgreSQLTreeStorage) getSubtreeStmt(ctx context.Context, subtreeRevs bool, num int) (*sql.Stmt, error) { if subtreeRevs { return m.getStmt(ctx, selectSubtreeSQL, num, "?", "?") } else { @@ -149,18 +149,18 @@ func (m *mySQLTreeStorage) getSubtreeStmt(ctx context.Context, subtreeRevs bool, } } -func (m *mySQLTreeStorage) setSubtreeStmt(ctx context.Context, num int) (*sql.Stmt, error) { +func (m *postgreSQLTreeStorage) setSubtreeStmt(ctx context.Context, num int) (*sql.Stmt, error) { return m.getStmt(ctx, insertSubtreeMultiSQL, num, "VALUES(?, ?, ?, ?)", "(?, ?, ?, ?)") } -func (m *mySQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes int, subtreeCache *cache.SubtreeCache) (treeTX, error) { +func (m *postgreSQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes int, subtreeCache *cache.SubtreeCache) (treeTX, error) { t, err := m.db.BeginTx(ctx, nil /* opts */) if err != nil { klog.Warningf("Could not start tree TX: %s", err) return treeTX{}, err } var subtreeRevisions bool - o := &mysqlpb.StorageOptions{} + o := &postgresqlpb.StorageOptions{} if err := anypb.UnmarshalTo(tree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { return treeTX{}, fmt.Errorf("failed to unmarshal StorageSettings: %v", err) } @@ -183,7 +183,7 @@ type treeTX struct { mu *sync.Mutex closed bool tx *sql.Tx - ts *mySQLTreeStorage + ts *postgreSQLTreeStorage treeID int64 treeType trillian.TreeType hashSizeBytes int @@ -358,14 +358,14 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre func checkResultOkAndRowCountIs(res sql.Result, err error, count int64) error { // The Exec() might have just failed if err != nil { - return mysqlToGRPC(err) + return postgresqlToGRPC(err) } // Otherwise we have to look at the result of the operation rowsAffected, rowsError := res.RowsAffected() if rowsError != nil { - return mysqlToGRPC(rowsError) + return postgresqlToGRPC(rowsError) } if rowsAffected != count { From b0f7dfe1e17b41be156b795923231d716f6ef195 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:19:43 +0100 Subject: [PATCH 04/62] Build the PostgreSQL protobuf definitions --- storage/postgresql/postgresqlpb/options.pb.go | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/storage/postgresql/postgresqlpb/options.pb.go b/storage/postgresql/postgresqlpb/options.pb.go index 76936064a7..1816d39b72 100644 --- a/storage/postgresql/postgresqlpb/options.pb.go +++ b/storage/postgresql/postgresqlpb/options.pb.go @@ -14,8 +14,8 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.34.2 -// protoc v3.20.1 +// protoc-gen-go v1.32.0 +// protoc v4.22.3 // source: options.proto package postgresqlpb @@ -90,15 +90,16 @@ var File_options_proto protoreflect.FileDescriptor var file_options_proto_rawDesc = []byte{ 0x0a, 0x0d, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, - 0x07, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x22, 0x3c, 0x0a, 0x0e, 0x53, 0x74, 0x6f, 0x72, - 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x2a, 0x0a, 0x10, 0x73, 0x75, - 0x62, 0x74, 0x72, 0x65, 0x65, 0x52, 0x65, 0x76, 0x69, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x73, 0x75, 0x62, 0x74, 0x72, 0x65, 0x65, 0x52, 0x65, 0x76, - 0x69, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x32, 0x5a, 0x30, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, - 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x74, 0x72, 0x69, 0x6c, - 0x6c, 0x69, 0x61, 0x6e, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2f, 0x6d, 0x79, 0x73, - 0x71, 0x6c, 0x2f, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x0c, 0x70, 0x6f, 0x73, 0x74, 0x67, 0x72, 0x65, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x22, 0x3c, 0x0a, + 0x0e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, + 0x2a, 0x0a, 0x10, 0x73, 0x75, 0x62, 0x74, 0x72, 0x65, 0x65, 0x52, 0x65, 0x76, 0x69, 0x73, 0x69, + 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x73, 0x75, 0x62, 0x74, 0x72, + 0x65, 0x65, 0x52, 0x65, 0x76, 0x69, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x3c, 0x5a, 0x3a, 0x67, + 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2f, 0x74, 0x72, 0x69, 0x6c, 0x6c, 0x69, 0x61, 0x6e, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, + 0x65, 0x2f, 0x70, 0x6f, 0x73, 0x74, 0x67, 0x72, 0x65, 0x73, 0x71, 0x6c, 0x2f, 0x70, 0x6f, 0x73, + 0x74, 0x67, 0x72, 0x65, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, } var ( @@ -114,7 +115,7 @@ func file_options_proto_rawDescGZIP() []byte { } var file_options_proto_msgTypes = make([]protoimpl.MessageInfo, 1) -var file_options_proto_goTypes = []any{ +var file_options_proto_goTypes = []interface{}{ (*StorageOptions)(nil), // 0: postgresqlpb.StorageOptions } var file_options_proto_depIdxs = []int32{ @@ -131,7 +132,7 @@ func file_options_proto_init() { return } if !protoimpl.UnsafeEnabled { - file_options_proto_msgTypes[0].Exporter = func(v any, i int) any { + file_options_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*StorageOptions); i { case 0: return &v.state From 18cf30646d6704b95a736381bc34fa0ca77644d6 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:19:43 +0100 Subject: [PATCH 05/62] Rename PostgreSQL 'testdb' package to 'testdbpgx' --- quota/postgresqlqm/postgresql_quota_test.go | 2 +- storage/postgresql/storage_test.go | 2 +- storage/postgresql/testdbpgx/testdbpgx.go | 2 +- storage/postgresql/testdbpgx/testdbpgx_test.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/quota/postgresqlqm/postgresql_quota_test.go b/quota/postgresqlqm/postgresql_quota_test.go index 8d03cd2778..ec5ad74a6c 100644 --- a/quota/postgresqlqm/postgresql_quota_test.go +++ b/quota/postgresqlqm/postgresql_quota_test.go @@ -27,7 +27,7 @@ import ( "github.com/google/trillian/quota/postgresqlqm" "github.com/google/trillian/storage" "github.com/google/trillian/storage/postgresql" - "github.com/google/trillian/storage/testdb" + testdb "github.com/google/trillian/storage/postgresql/testdbpgx" "github.com/google/trillian/types" stestonly "github.com/google/trillian/storage/testonly" diff --git a/storage/postgresql/storage_test.go b/storage/postgresql/storage_test.go index a5b7e91b9f..f6c5434548 100644 --- a/storage/postgresql/storage_test.go +++ b/storage/postgresql/storage_test.go @@ -30,7 +30,7 @@ import ( "github.com/google/trillian" "github.com/google/trillian/storage" "github.com/google/trillian/storage/postgresql/postgresqlpb" - "github.com/google/trillian/storage/testdb" + testdb "github.com/google/trillian/storage/postgresql/testdbpgx" storageto "github.com/google/trillian/storage/testonly" stree "github.com/google/trillian/storage/tree" "github.com/google/trillian/types" diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index 40fb210aa5..79cdd1a69e 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -13,7 +13,7 @@ // limitations under the License. // Package testdb creates new databases for tests. -package testdb +package testdbpgx import ( "bytes" diff --git a/storage/postgresql/testdbpgx/testdbpgx_test.go b/storage/postgresql/testdbpgx/testdbpgx_test.go index cf2b72a549..6cd0eea8be 100644 --- a/storage/postgresql/testdbpgx/testdbpgx_test.go +++ b/storage/postgresql/testdbpgx/testdbpgx_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package testdb +package testdbpgx import ( "testing" From e01fb0b5d446b8352c95a6442938a111fe6c2b37 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:19:43 +0100 Subject: [PATCH 06/62] Convert from database/sql API to jackc/pgx/v5 API --- quota/postgresqlqm/postgresql_quota.go | 16 +++++----- quota/postgresqlqm/postgresql_quota_test.go | 14 ++++----- storage/postgresql/admin_storage.go | 34 ++++++++++----------- storage/postgresql/admin_storage_test.go | 12 ++++---- storage/postgresql/log_storage.go | 34 ++++++++++----------- storage/postgresql/log_storage_test.go | 16 +++++----- storage/postgresql/provider.go | 8 ++--- storage/postgresql/queue.go | 6 ++-- storage/postgresql/queue_batching.go | 6 ++-- storage/postgresql/storage_test.go | 12 ++++---- storage/postgresql/testdbpgx/testdbpgx.go | 20 ++++++------ storage/postgresql/tree_storage.go | 20 ++++++------ 12 files changed, 99 insertions(+), 99 deletions(-) diff --git a/quota/postgresqlqm/postgresql_quota.go b/quota/postgresqlqm/postgresql_quota.go index 47922bcf8d..02a864dbdc 100644 --- a/quota/postgresqlqm/postgresql_quota.go +++ b/quota/postgresqlqm/postgresql_quota.go @@ -54,7 +54,7 @@ var ErrTooManyUnsequencedRows = errors.New("too many unsequenced rows") // rows (to be exact, tokens = MaxUnsequencedRows - actualUnsequencedRows). // Other quotas are considered infinite. type QuotaManager struct { - DB *sql.DB + DB *pgxpool.Pool MaxUnsequencedRows int UseSelectCount bool } @@ -98,14 +98,14 @@ func (m *QuotaManager) countUnsequenced(ctx context.Context) (int, error) { return countFromInformationSchema(ctx, m.DB) } -func countFromInformationSchema(ctx context.Context, db *sql.DB) (int, error) { +func countFromInformationSchema(ctx context.Context, db *pgxpool.Pool) (int, error) { // turn off statistics caching for PostgreSQL 8 if err := turnOffInformationSchemaCache(ctx, db); err != nil { return 0, err } // information_schema.tables doesn't have an explicit PK, so let's play it safe and ensure // the cursor returns a single row. - rows, err := db.QueryContext(ctx, countFromInformationSchemaQuery, "Unsequenced", "BASE TABLE") + rows, err := db.Query(ctx, countFromInformationSchemaQuery, "Unsequenced", "BASE TABLE") if err != nil { return 0, err } @@ -127,9 +127,9 @@ func countFromInformationSchema(ctx context.Context, db *sql.DB) (int, error) { return count, nil } -func countFromTable(ctx context.Context, db *sql.DB) (int, error) { +func countFromTable(ctx context.Context, db *pgxpool.Pool) (int, error) { var count int - if err := db.QueryRowContext(ctx, countFromUnsequencedQuery).Scan(&count); err != nil { + if err := db.QueryRow(ctx, countFromUnsequencedQuery).Scan(&count); err != nil { return 0, err } return count, nil @@ -139,9 +139,9 @@ func countFromTable(ctx context.Context, db *sql.DB) (int, error) { // To always retrieve the latest statistics directly from the storage engine and bypass cached values, set information_schema_stats_expiry to 0. // See https://dev.postgresql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_information_schema_stats_expiry // PostgreSQL versions prior to 8 will fail safely. -func turnOffInformationSchemaCache(ctx context.Context, db *sql.DB) error { +func turnOffInformationSchemaCache(ctx context.Context, db *pgxpool.Pool) error { opt := "information_schema_stats_expiry" - res := db.QueryRowContext(ctx, "SHOW VARIABLES LIKE '"+opt+"'") + res := db.QueryRow(ctx, "SHOW VARIABLES LIKE '"+opt+"'") var none string var expiry int @@ -155,7 +155,7 @@ func turnOffInformationSchemaCache(ctx context.Context, db *sql.DB) error { } if expiry != 0 { - if _, err := db.ExecContext(ctx, "SET SESSION "+opt+"=0"); err != nil { + if _, err := db.Exec(ctx, "SET SESSION "+opt+"=0"); err != nil { return fmt.Errorf("failed to set variable %q: %v", opt, err) } } diff --git a/quota/postgresqlqm/postgresql_quota_test.go b/quota/postgresqlqm/postgresql_quota_test.go index ec5ad74a6c..ebcc3cd45d 100644 --- a/quota/postgresqlqm/postgresql_quota_test.go +++ b/quota/postgresqlqm/postgresql_quota_test.go @@ -226,15 +226,15 @@ func allSpecs(_ context.Context, _ quota.Manager, treeID int64) []quota.Spec { } } -func countUnsequenced(ctx context.Context, db *sql.DB) (int, error) { +func countUnsequenced(ctx context.Context, db *pgxpool.Pool) (int, error) { var count int - if err := db.QueryRowContext(ctx, "SELECT COUNT(*) FROM Unsequenced").Scan(&count); err != nil { + if err := db.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced").Scan(&count); err != nil { return 0, err } return count, nil } -func createTree(ctx context.Context, db *sql.DB) (*trillian.Tree, error) { +func createTree(ctx context.Context, db *pgxpool.Pool) (*trillian.Tree, error) { var tree *trillian.Tree { @@ -267,7 +267,7 @@ func createTree(ctx context.Context, db *sql.DB) (*trillian.Tree, error) { return tree, nil } -func queueLeaves(ctx context.Context, db *sql.DB, tree *trillian.Tree, firstID, num int) error { +func queueLeaves(ctx context.Context, db *pgxpool.Pool, tree *trillian.Tree, firstID, num int) error { hasher := crypto.SHA256.New() leaves := []*trillian.LogLeaf{} @@ -291,7 +291,7 @@ func queueLeaves(ctx context.Context, db *sql.DB, tree *trillian.Tree, firstID, return err } -func setUnsequencedRows(ctx context.Context, db *sql.DB, tree *trillian.Tree, wantRows int) error { +func setUnsequencedRows(ctx context.Context, db *pgxpool.Pool, tree *trillian.Tree, wantRows int) error { count, err := countUnsequenced(ctx, db) if err != nil { return err @@ -302,10 +302,10 @@ func setUnsequencedRows(ctx context.Context, db *sql.DB, tree *trillian.Tree, wa // Clear the tables and re-create leaves from scratch. It's easier than having to reason // about duplicate entries. - if _, err := db.ExecContext(ctx, "DELETE FROM LeafData"); err != nil { + if _, err := db.Exec(ctx, "DELETE FROM LeafData"); err != nil { return err } - if _, err := db.ExecContext(ctx, "DELETE FROM Unsequenced"); err != nil { + if _, err := db.Exec(ctx, "DELETE FROM Unsequenced"); err != nil { return err } if err := queueLeaves(ctx, db, tree, 0 /* firstID */, wantRows); err != nil { diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 7c71de6981..8a2ee73ef6 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -66,13 +66,13 @@ const ( ) // NewAdminStorage returns a PostgreSQL storage.AdminStorage implementation backed by DB. -func NewAdminStorage(db *sql.DB) *postgresqlAdminStorage { +func NewAdminStorage(db *pgxpool.Pool) *postgresqlAdminStorage { return &postgresqlAdminStorage{db} } // postgresqlAdminStorage implements storage.AdminStorage type postgresqlAdminStorage struct { - db *sql.DB + db *pgxpool.Pool } func (s *postgresqlAdminStorage) Snapshot(ctx context.Context) (storage.ReadOnlyAdminTX, error) { @@ -80,7 +80,7 @@ func (s *postgresqlAdminStorage) Snapshot(ctx context.Context) (storage.ReadOnly } func (s *postgresqlAdminStorage) beginInternal(ctx context.Context) (storage.AdminTX, error) { - tx, err := s.db.BeginTx(ctx, nil /* opts */) + tx, err := s.db.BeginTx(ctx, pgx.TxOptions{}) if err != nil { return nil, err } @@ -104,11 +104,11 @@ func (s *postgresqlAdminStorage) ReadWriteTransaction(ctx context.Context, f sto } func (s *postgresqlAdminStorage) CheckDatabaseAccessible(ctx context.Context) error { - return s.db.PingContext(ctx) + return s.db.Ping(ctx) } type adminTX struct { - tx *sql.Tx + tx pgx.Tx // mu guards reads/writes on closed, which happen on Commit/Close methods. // @@ -147,9 +147,9 @@ func (t *adminTX) GetTree(ctx context.Context, treeID int64) (*trillian.Tree, er }() // GetTree is an entry point for most RPCs, let's provide somewhat nicer error messages. - tree, err := readTree(stmt.QueryRowContext(ctx, treeID)) + tree, err := readTree(stmt.QueryRow(ctx, treeID)) switch { - case err == sql.ErrNoRows: + case err == pgx.ErrNoRows: // ErrNoRows doesn't provide useful information, so we don't forward it. return nil, status.Errorf(codes.NotFound, "tree %v not found", treeID) case err != nil: @@ -175,7 +175,7 @@ func (t *adminTX) ListTrees(ctx context.Context, includeDeleted bool) ([]*trilli klog.Errorf("stmt.Close(): %v", err) } }() - rows, err := stmt.QueryContext(ctx) + rows, err := stmt.Query(ctx) if err != nil { return nil, err } @@ -286,7 +286,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia } }() - _, err = insertTreeStmt.ExecContext( + _, err = insertTreeStmt.Exec( ctx, newTree.TreeId, newTree.TreeState.String(), @@ -331,7 +331,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia klog.Errorf("insertControlStmt.Close(): %v", err) } }() - _, err = insertControlStmt.ExecContext( + _, err = insertControlStmt.Exec( ctx, newTree.TreeId, true, /* SigningEnabled */ @@ -385,7 +385,7 @@ func (t *adminTX) UpdateTree(ctx context.Context, treeID int64, updateFunc func( } }() - if _, err = stmt.ExecContext( + if _, err = stmt.Exec( ctx, tree.TreeState.String(), tree.TreeType.String(), @@ -418,7 +418,7 @@ func (t *adminTX) updateDeleted(ctx context.Context, treeID int64, deleted bool, if err := validateDeleted(ctx, t.tx, treeID, !deleted); err != nil { return nil, err } - if _, err := t.tx.ExecContext( + if _, err := t.tx.Exec( ctx, "UPDATE Trees SET Deleted = ?, DeleteTimeMillis = ? WHERE TreeId = ?", deleted, deleteTimeMillis, treeID); err != nil { @@ -433,17 +433,17 @@ func (t *adminTX) HardDeleteTree(ctx context.Context, treeID int64) error { } // TreeControl didn't have "ON DELETE CASCADE" on previous versions, so let's hit it explicitly - if _, err := t.tx.ExecContext(ctx, "DELETE FROM TreeControl WHERE TreeId = ?", treeID); err != nil { + if _, err := t.tx.Exec(ctx, "DELETE FROM TreeControl WHERE TreeId = ?", treeID); err != nil { return err } - _, err := t.tx.ExecContext(ctx, "DELETE FROM Trees WHERE TreeId = ?", treeID) + _, err := t.tx.Exec(ctx, "DELETE FROM Trees WHERE TreeId = ?", treeID) return err } -func validateDeleted(ctx context.Context, tx *sql.Tx, treeID int64, wantDeleted bool) error { +func validateDeleted(ctx context.Context, tx pgx.Tx, treeID int64, wantDeleted bool) error { var nullDeleted sql.NullBool - switch err := tx.QueryRowContext(ctx, "SELECT Deleted FROM Trees WHERE TreeId = ?", treeID).Scan(&nullDeleted); { - case err == sql.ErrNoRows: + switch err := tx.QueryRow(ctx, "SELECT Deleted FROM Trees WHERE TreeId = ?", treeID).Scan(&nullDeleted); { + case err == pgx.ErrNoRows: return status.Errorf(codes.NotFound, "tree %v not found", treeID) case err != nil: return err diff --git a/storage/postgresql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go index 7f613c1029..85661ebcc1 100644 --- a/storage/postgresql/admin_storage_test.go +++ b/storage/postgresql/admin_storage_test.go @@ -53,7 +53,7 @@ func TestAdminTX_CreateTree_InitializesStorageStructures(t *testing.T) { // Check if TreeControl is correctly written. var signingEnabled, sequencingEnabled bool var sequenceIntervalSeconds int - if err := DB.QueryRowContext(ctx, selectTreeControlByID, tree.TreeId).Scan(&signingEnabled, &sequencingEnabled, &sequenceIntervalSeconds); err != nil { + if err := DB.QueryRow(ctx, selectTreeControlByID, tree.TreeId).Scan(&signingEnabled, &sequencingEnabled, &sequenceIntervalSeconds); err != nil { t.Fatalf("Failed to read TreeControl: %v", err) } // We don't mind about specific values, defaults change, but let's check @@ -256,7 +256,7 @@ func TestAdminTX_GetTreeLegacies(t *testing.T) { // We are reaching really into the internals here, but it's the only way to set up // archival state. Going through the Create/Update methods will change the storage // options. - tx, err := s.db.BeginTx(ctx, nil /* opts */) + tx, err := s.db.BeginTx(ctx, pgx.TxOptions{}) if err != nil { t.Fatal(err) } @@ -303,8 +303,8 @@ func TestAdminTX_HardDeleteTree(t *testing.T) { // database and check that the rows are gone, so let's do just that. // If there's no record on Trees, then there can be no record in any of the dependent tables. var name string - if err := DB.QueryRowContext(ctx, "SELECT DisplayName FROM Trees WHERE TreeId = ?", tree.TreeId).Scan(&name); err != sql.ErrNoRows { - t.Errorf("QueryRowContext() returned err = %v, want = %v", err, sql.ErrNoRows) + if err := DB.QueryRow(ctx, "SELECT DisplayName FROM Trees WHERE TreeId = ?", tree.TreeId).Scan(&name); err != pgx.ErrNoRows { + t.Errorf("QueryRow() returned err = %v, want = %v", err, pgx.ErrNoRows) } } @@ -331,12 +331,12 @@ func TestCheckDatabaseAccessible_OK(t *testing.T) { } } -func setNulls(ctx context.Context, db *sql.DB, treeID int64) error { +func setNulls(ctx context.Context, db *pgxpool.Pool, treeID int64) error { stmt, err := db.PrepareContext(ctx, "UPDATE Trees SET DisplayName = NULL, Description = NULL WHERE TreeId = ?") if err != nil { return err } defer func() { _ = stmt.Close() }() - _, err = stmt.ExecContext(ctx, treeID) + _, err = stmt.Exec(ctx, treeID) return err } diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index b98da3deba..b8238400d3 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -129,7 +129,7 @@ type postgreSQLLogStorage struct { // NewLogStorage creates a storage.LogStorage instance for the specified PostgreSQL URL. // It assumes storage.AdminStorage is backed by the same PostgreSQL database as well. -func NewLogStorage(db *sql.DB, mf monitoring.MetricFactory) storage.LogStorage { +func NewLogStorage(db *pgxpool.Pool, mf monitoring.MetricFactory) storage.LogStorage { if mf == nil { mf = monitoring.InertMetricFactory{} } @@ -141,7 +141,7 @@ func NewLogStorage(db *sql.DB, mf monitoring.MetricFactory) storage.LogStorage { } func (m *postgreSQLLogStorage) CheckDatabaseAccessible(ctx context.Context) error { - return m.db.PingContext(ctx) + return m.db.Ping(ctx) } func (m *postgreSQLLogStorage) getLeavesByMerkleHashStmt(ctx context.Context, num int, orderBySequence bool) (*sql.Stmt, error) { @@ -159,7 +159,7 @@ func (m *postgreSQLLogStorage) getLeavesByLeafIdentityHashStmt(ctx context.Conte func (m *postgreSQLLogStorage) GetActiveLogIDs(ctx context.Context) ([]int64, error) { // Include logs that are DRAINING in the active list as we're still // integrating leaves into them. - rows, err := m.db.QueryContext( + rows, err := m.db.Query( ctx, selectNonDeletedTreeIDByTypeAndStateSQL, trillian.TreeType_LOG.String(), trillian.TreeType_PREORDERED_LOG.String(), trillian.TreeState_ACTIVE.String(), trillian.TreeState_DRAINING.String()) @@ -350,7 +350,7 @@ func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime tim }() leaves := make([]*trillian.LogLeaf, 0, limit) - rows, err := stx.QueryContext(ctx, t.treeID, cutoffTime.UnixNano(), limit) + rows, err := stx.Query(ctx, t.treeID, cutoffTime.UnixNano(), limit) if err != nil { klog.Warningf("Failed to select rows for work: %s", err) return nil, err @@ -434,7 +434,7 @@ func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, return nil, fmt.Errorf("got invalid queue timestamp: %w", err) } qTimestamp := leaf.QueueTimestamp.AsTime() - _, err := t.tx.ExecContext(ctx, insertLeafDataSQL, t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, qTimestamp.UnixNano()) + _, err := t.tx.Exec(ctx, insertLeafDataSQL, t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, qTimestamp.UnixNano()) insertDuration := time.Since(leafStart) observe(queueInsertLeafLatency, insertDuration, label) if isDuplicateErr(err) { @@ -456,7 +456,7 @@ func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, leaf.MerkleLeafHash, } args = append(args, queueArgs(t.treeID, leaf.LeafIdentityHash, qTimestamp)...) - _, err = t.tx.ExecContext( + _, err = t.tx.Exec( ctx, insertUnsequencedEntrySQL, args..., @@ -533,7 +533,7 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L // one of the two inserts fails, we remove the side effect by rolling back to // a savepoint installed before the first insert of the two. const savepoint = "SAVEPOINT AddSequencedLeaves" - if _, err := t.tx.ExecContext(ctx, savepoint); err != nil { + if _, err := t.tx.Exec(ctx, savepoint); err != nil { klog.Errorf("Error adding savepoint: %s", err) return nil, postgresqlToGRPC(err) } @@ -554,7 +554,7 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L return nil, status.Errorf(codes.FailedPrecondition, "leaves[%d] has incorrect hash size %d, want %d", i, got, want) } - if _, err := t.tx.ExecContext(ctx, savepoint); err != nil { + if _, err := t.tx.Exec(ctx, savepoint); err != nil { klog.Errorf("Error updating savepoint: %s", err) return nil, postgresqlToGRPC(err) } @@ -562,7 +562,7 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L res[i] = &trillian.QueuedLogLeaf{Status: ok} // TODO(pavelkalinnikov): Measure latencies. - _, err := t.tx.ExecContext(ctx, insertLeafDataSQL, + _, err := t.tx.Exec(ctx, insertLeafDataSQL, t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, timestamp.UnixNano()) // TODO(pavelkalinnikov): Detach PREORDERED_LOG integration latency metric. @@ -576,13 +576,13 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L return nil, postgresqlToGRPC(err) } - _, err = t.tx.ExecContext(ctx, insertSequencedLeafSQL+valuesPlaceholder5, + _, err = t.tx.Exec(ctx, insertSequencedLeafSQL+valuesPlaceholder5, t.treeID, leaf.LeafIdentityHash, leaf.MerkleLeafHash, leaf.LeafIndex, 0) // TODO(pavelkalinnikov): Update IntegrateTimestamp on integrating the leaf. if isDuplicateErr(err) { res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIndex").Proto() - if _, err := t.tx.ExecContext(ctx, "ROLLBACK TO "+savepoint); err != nil { + if _, err := t.tx.Exec(ctx, "ROLLBACK TO "+savepoint); err != nil { klog.Errorf("Error rolling back to savepoint: %s", err) return nil, postgresqlToGRPC(err) } @@ -594,7 +594,7 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L // TODO(pavelkalinnikov): Load LeafData for conflicting entries. } - if _, err := t.tx.ExecContext(ctx, "RELEASE "+savepoint); err != nil { + if _, err := t.tx.Exec(ctx, "RELEASE "+savepoint); err != nil { klog.Errorf("Error releasing savepoint: %s", err) return nil, postgresqlToGRPC(err) } @@ -631,7 +631,7 @@ func (t *logTreeTX) getLeavesByRangeInternal(ctx context.Context, start, count i // TODO(pavelkalinnikov): Further clip `count` to a safe upper bound like 64k. args := []interface{}{start, start + count, t.treeID} - rows, err := t.tx.QueryContext(ctx, selectLeavesByRangeSQL, args...) + rows, err := t.tx.Query(ctx, selectLeavesByRangeSQL, args...) if err != nil { klog.Warningf("Failed to get leaves by range: %s", err) return nil, err @@ -719,10 +719,10 @@ func (t *logTreeTX) LatestSignedLogRoot(ctx context.Context) (*trillian.SignedLo func (t *logTreeTX) fetchLatestRoot(ctx context.Context) (*trillian.SignedLogRoot, int64, error) { var timestamp, treeSize, treeRevision int64 var rootHash, rootSignatureBytes []byte - if err := t.tx.QueryRowContext( + if err := t.tx.QueryRow( ctx, selectLatestSignedLogRootSQL, t.treeID).Scan( ×tamp, &treeSize, &rootHash, &treeRevision, &rootSignatureBytes, - ); err == sql.ErrNoRows { + ); err == pgx.ErrNoRows { // It's possible there are no roots for this tree yet return nil, 0, storage.ErrTreeNeedsInit } @@ -753,7 +753,7 @@ func (t *logTreeTX) StoreSignedLogRoot(ctx context.Context, root *trillian.Signe return fmt.Errorf("unimplemented: postgresql storage does not support log root metadata") } - res, err := t.tx.ExecContext( + res, err := t.tx.Exec( ctx, insertTreeHeadSQL, t.treeID, @@ -782,7 +782,7 @@ func (t *logTreeTX) getLeavesByHashInternal(ctx context.Context, leafHashes [][] args = append(args, []byte(hash)) } args = append(args, t.treeID) - rows, err := stx.QueryContext(ctx, args...) + rows, err := stx.Query(ctx, args...) if err != nil { klog.Warningf("Query() %s hash = %v", desc, err) return nil, err diff --git a/storage/postgresql/log_storage_test.go b/storage/postgresql/log_storage_test.go index 9d3f144241..79043e98cd 100644 --- a/storage/postgresql/log_storage_test.go +++ b/storage/postgresql/log_storage_test.go @@ -67,12 +67,12 @@ const ( // run in parallel or race conditions / unexpected interactions. Tests that pass should hold // no locks afterwards. -func createFakeLeaf(ctx context.Context, db *sql.DB, logID int64, rawHash, hash, data, extraData []byte, seq int64, t *testing.T) *trillian.LogLeaf { +func createFakeLeaf(ctx context.Context, db *pgxpool.Pool, logID int64, rawHash, hash, data, extraData []byte, seq int64, t *testing.T) *trillian.LogLeaf { t.Helper() queuedAtNanos := fakeQueueTime.UnixNano() integratedAtNanos := fakeIntegrateTime.UnixNano() - _, err := db.ExecContext(ctx, "INSERT INTO LeafData(TreeId, LeafIdentityHash, LeafValue, ExtraData, QueueTimestampNanos) VALUES(?,?,?,?,?)", logID, rawHash, data, extraData, queuedAtNanos) - _, err2 := db.ExecContext(ctx, "INSERT INTO SequencedLeafData(TreeId, SequenceNumber, LeafIdentityHash, MerkleLeafHash, IntegrateTimestampNanos) VALUES(?,?,?,?,?)", logID, seq, rawHash, hash, integratedAtNanos) + _, err := db.Exec(ctx, "INSERT INTO LeafData(TreeId, LeafIdentityHash, LeafValue, ExtraData, QueueTimestampNanos) VALUES(?,?,?,?,?)", logID, rawHash, data, extraData, queuedAtNanos) + _, err2 := db.Exec(ctx, "INSERT INTO SequencedLeafData(TreeId, SequenceNumber, LeafIdentityHash, MerkleLeafHash, IntegrateTimestampNanos) VALUES(?,?,?,?,?)", logID, seq, rawHash, hash, integratedAtNanos) if err != nil || err2 != nil { t.Fatalf("Failed to create test leaves: %v %v", err, err2) @@ -214,7 +214,7 @@ func TestQueueLeaves(t *testing.T) { // Should see the leaves in the database. There is no API to read from the unsequenced data. var count int - if err := DB.QueryRowContext(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&count); err != nil { + if err := DB.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&count); err != nil { t.Fatalf("Could not query row count: %v", err) } if leavesToInsert != count { @@ -223,7 +223,7 @@ func TestQueueLeaves(t *testing.T) { // Additional check on timestamp being set correctly in the database var queueTimestamp int64 - if err := DB.QueryRowContext(ctx, "SELECT DISTINCT QueueTimestampNanos FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&queueTimestamp); err != nil { + if err := DB.QueryRow(ctx, "SELECT DISTINCT QueueTimestampNanos FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&queueTimestamp); err != nil { t.Fatalf("Could not query timestamp: %v", err) } if got, want := queueTimestamp, fakeQueueTime.UnixNano(); got != want { @@ -254,7 +254,7 @@ func TestQueueLeavesDuplicateBigBatch(t *testing.T) { // Should see the leaves in the database. There is no API to read from the unsequenced data. var count int - if err := DB.QueryRowContext(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&count); err != nil { + if err := DB.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&count); err != nil { t.Fatalf("Could not query row count: %v", err) } if leafCount != count { @@ -735,8 +735,8 @@ func TestGetActiveLogIDs(t *testing.T) { } }() for _, treeID := range []int64{deletedLog.TreeId} { - if _, err := updateDeletedStmt.ExecContext(ctx, true, treeID); err != nil { - t.Fatalf("ExecContext(%v) returned err = %v", treeID, err) + if _, err := updateDeletedStmt.Exec(ctx, true, treeID); err != nil { + t.Fatalf("Exec(%v) returned err = %v", treeID, err) } } diff --git a/storage/postgresql/provider.go b/storage/postgresql/provider.go index 054b9a0be1..58c844a15b 100644 --- a/storage/postgresql/provider.go +++ b/storage/postgresql/provider.go @@ -40,7 +40,7 @@ var ( postgresqlMu sync.Mutex postgresqlErr error - postgresqlDB *sql.DB + postgresqlDB *pgxpool.Pool postgresqlStorageInstance *postgresqlProvider ) @@ -48,7 +48,7 @@ var ( // // TODO(pavelkalinnikov): Make the dependency of PostgreSQL quota provider from // PostgreSQL storage provider explicit. -func GetDatabase() (*sql.DB, error) { +func GetDatabase() (*pgxpool.Pool, error) { postgresqlMu.Lock() defer postgresqlMu.Unlock() return getPostgreSQLDatabaseLocked() @@ -61,7 +61,7 @@ func init() { } type postgresqlProvider struct { - db *sql.DB + db *pgxpool.Pool mf monitoring.MetricFactory } @@ -83,7 +83,7 @@ func newPostgreSQLStorageProvider(mf monitoring.MetricFactory) (storage.Provider // getPostgreSQLDatabaseLocked returns an instance of PostgreSQL database, or creates // one. Requires postgresqlMu to be locked. -func getPostgreSQLDatabaseLocked() (*sql.DB, error) { +func getPostgreSQLDatabaseLocked() (*pgxpool.Pool, error) { if postgresqlDB != nil || postgresqlErr != nil { return postgresqlDB, postgresqlErr } diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index e5d3436864..c054550abf 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -51,7 +51,7 @@ func dequeueInfo(leafIDHash []byte, queueTimestamp int64) dequeuedLeaf { return dequeuedLeaf{queueTimestampNanos: queueTimestamp, leafIdentityHash: leafIDHash} } -func (t *logTreeTX) dequeueLeaf(rows *sql.Rows) (*trillian.LogLeaf, dequeuedLeaf, error) { +func (t *logTreeTX) dequeueLeaf(rows pgx.Rows) (*trillian.LogLeaf, dequeuedLeaf, error) { var leafIDHash []byte var merkleHash []byte var queueTimestamp int64 @@ -93,7 +93,7 @@ func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillia return fmt.Errorf("got invalid integrate timestamp: %w", err) } iTimestamp := leaf.IntegrateTimestamp.AsTime() - _, err := t.tx.ExecContext( + _, err := t.tx.Exec( ctx, insertSequencedLeafSQL+valuesPlaceholder5, t.treeID, @@ -134,7 +134,7 @@ func (t *logTreeTX) removeSequencedLeaves(ctx context.Context, leaves []dequeued } }() for _, dql := range leaves { - result, err := stx.ExecContext(ctx, t.treeID, dql.queueTimestampNanos, dql.leafIdentityHash) + result, err := stx.Exec(ctx, t.treeID, dql.queueTimestampNanos, dql.leafIdentityHash) err = checkResultOkAndRowCountIs(result, err, int64(1)) if err != nil { return err diff --git a/storage/postgresql/queue_batching.go b/storage/postgresql/queue_batching.go index 0c36c6c135..582b6d2f66 100644 --- a/storage/postgresql/queue_batching.go +++ b/storage/postgresql/queue_batching.go @@ -49,7 +49,7 @@ func dequeueInfo(_ []byte, queueID []byte) dequeuedLeaf { return dequeuedLeaf(queueID) } -func (t *logTreeTX) dequeueLeaf(rows *sql.Rows) (*trillian.LogLeaf, dequeuedLeaf, error) { +func (t *logTreeTX) dequeueLeaf(rows pgx.Rows) (*trillian.LogLeaf, dequeuedLeaf, error) { var leafIDHash []byte var merkleHash []byte var queueTimestamp int64 @@ -110,7 +110,7 @@ func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillia } dequeuedLeaves = append(dequeuedLeaves, qe) } - result, err := t.tx.ExecContext(ctx, insertSequencedLeafSQL+strings.Join(querySuffix, ","), args...) + result, err := t.tx.Exec(ctx, insertSequencedLeafSQL+strings.Join(querySuffix, ","), args...) if err != nil { klog.Warningf("Failed to update sequenced leaves: %s", err) } @@ -141,7 +141,7 @@ func (t *logTreeTX) removeSequencedLeaves(ctx context.Context, queueIDs []dequeu for i, q := range queueIDs { args[i] = []byte(q) } - result, err := stx.ExecContext(ctx, args...) + result, err := stx.Exec(ctx, args...) if err != nil { // Error is handled by checkResultOkAndRowCountIs() below klog.Warningf("Failed to delete sequenced work: %s", err) diff --git a/storage/postgresql/storage_test.go b/storage/postgresql/storage_test.go index f6c5434548..2671d9e12a 100644 --- a/storage/postgresql/storage_test.go +++ b/storage/postgresql/storage_test.go @@ -273,7 +273,7 @@ func diffNodes(got, want []stree.Node) ([]stree.Node, []stree.Node) { return missing, extra } -func openTestDBOrDie() (*sql.DB, func(context.Context)) { +func openTestDBOrDie() (*pgxpool.Pool, func(context.Context)) { db, done, err := testdb.NewTrillianDB(context.TODO(), testdb.DriverPostgreSQL) if err != nil { panic(err) @@ -282,16 +282,16 @@ func openTestDBOrDie() (*sql.DB, func(context.Context)) { } // cleanTestDB deletes all the entries in the database. -func cleanTestDB(db *sql.DB) { +func cleanTestDB(db *pgxpool.Pool) { for _, table := range allTables { - if _, err := db.ExecContext(context.TODO(), fmt.Sprintf("DELETE FROM %s", table)); err != nil { + if _, err := db.Exec(context.TODO(), fmt.Sprintf("DELETE FROM %s", table)); err != nil { panic(fmt.Sprintf("Failed to delete rows in %s: %v", table, err)) } } } -func getVersion(db *sql.DB) (string, error) { - rows, err := db.QueryContext(context.TODO(), "SELECT @@GLOBAL.version") +func getVersion(db *pgxpool.Pool) (string, error) { + rows, err := db.Query(context.TODO(), "SELECT @@GLOBAL.version") if err != nil { return "", fmt.Errorf("getVersion: failed to perform query: %v", err) } @@ -341,7 +341,7 @@ func mustCreateTree(ctx context.Context, t *testing.T, s storage.AdminStorage, t } // DB is the database used for tests. It's initialized and closed by TestMain(). -var DB *sql.DB +var DB *pgxpool.Pool func TestMain(m *testing.M) { flag.Parse() diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index 79cdd1a69e..273b282103 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -40,7 +40,7 @@ const ( // instance URI to use. The value must have a trailing slash. PostgreSQLURIEnv = "TEST_POSTGRESQL_URI" - // Note: sql.Open requires the URI to end with a slash. + // Note: pgxpool.New requires the URI to end with a slash. defaultTestPostgreSQLURI = "root@tcp(127.0.0.1)/" // CockroachDBURIEnv is the name of the ENV variable checked for the test CockroachDB @@ -158,9 +158,9 @@ func CockroachDBAvailable() bool { func dbAvailable(driver DriverName) bool { driverName := driverMapping[driver].sqlDriverName uri := driverMapping[driver].uriFunc() - db, err := sql.Open(driverName, uri) + db, err := pgxpool.New(driverName, uri) if err != nil { - log.Printf("sql.Open(): %v", err) + log.Printf("pgxpool.New(): %v", err) return false } defer func() { @@ -195,7 +195,7 @@ func SetFDLimit(uLimit uint64) error { // using the DB, the caller should not continue to use the returned DB after // calling this function as it may, for example, delete the underlying // instance. -func newEmptyDB(ctx context.Context, driver DriverName) (*sql.DB, func(context.Context), error) { +func newEmptyDB(ctx context.Context, driver DriverName) (*pgxpool.Pool, func(context.Context), error) { if err := SetFDLimit(2048); err != nil { return nil, nil, err } @@ -205,7 +205,7 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*sql.DB, func(context.C return nil, nil, fmt.Errorf("unknown driver %q", driver) } - db, err := sql.Open(inf.sqlDriverName, inf.uriFunc()) + db, err := pgxpool.New(inf.sqlDriverName, inf.uriFunc()) if err != nil { return nil, nil, err } @@ -214,7 +214,7 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*sql.DB, func(context.C name := fmt.Sprintf("trl_%v", time.Now().UnixNano()) stmt := fmt.Sprintf("CREATE DATABASE %v", name) - if _, err := db.ExecContext(ctx, stmt); err != nil { + if _, err := db.Exec(ctx, stmt); err != nil { return nil, nil, fmt.Errorf("error running statement %q: %v", stmt, err) } @@ -222,7 +222,7 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*sql.DB, func(context.C return nil, nil, fmt.Errorf("failed to close DB: %v", err) } uri := inf.uriFunc(name) - db, err = sql.Open(inf.sqlDriverName, uri) + db, err = pgxpool.New(inf.sqlDriverName, uri) if err != nil { return nil, nil, err } @@ -233,7 +233,7 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*sql.DB, func(context.C klog.Errorf("db.Close(): %v", err) } }() - if _, err := db.ExecContext(ctx, fmt.Sprintf("DROP DATABASE %v", name)); err != nil { + if _, err := db.Exec(ctx, fmt.Sprintf("DROP DATABASE %v", name)); err != nil { klog.Warningf("Failed to drop test database %q: %v", name, err) } } @@ -244,7 +244,7 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*sql.DB, func(context.C // NewTrillianDB creates an empty database with the Trillian schema. The database name is randomly // generated. // NewTrillianDB is equivalent to Default().NewTrillianDB(ctx). -func NewTrillianDB(ctx context.Context, driver DriverName) (*sql.DB, func(context.Context), error) { +func NewTrillianDB(ctx context.Context, driver DriverName) (*pgxpool.Pool, func(context.Context), error) { db, done, err := newEmptyDB(ctx, driver) if err != nil { return nil, nil, err @@ -262,7 +262,7 @@ func NewTrillianDB(ctx context.Context, driver DriverName) (*sql.DB, func(contex if stmt == "" { continue } - if _, err := db.ExecContext(ctx, stmt); err != nil { + if _, err := db.Exec(ctx, stmt); err != nil { return nil, nil, fmt.Errorf("error running statement %q: %v", stmt, err) } } diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 0c120aed05..ac58149f64 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -66,7 +66,7 @@ const ( // postgreSQLTreeStorage is shared between the postgreSQLLog- and (forthcoming) postgreSQLMap- // Storage implementations, and contains functionality which is common to both, type postgreSQLTreeStorage struct { - db *sql.DB + db *pgxpool.Pool // Must hold the mutex before manipulating the statement map. Sharing a lock because // it only needs to be held while the statements are built, not while they execute and @@ -77,15 +77,15 @@ type postgreSQLTreeStorage struct { } // OpenDB opens a database connection for all PostgreSQL-based storage implementations. -func OpenDB(dbURL string) (*sql.DB, error) { - db, err := sql.Open("postgresql", dbURL) +func OpenDB(dbURL string) (*pgxpool.Pool, error) { + db, err := pgxpool.New("postgresql", dbURL) if err != nil { // Don't log uri as it could contain credentials klog.Warningf("Could not open PostgreSQL database, check config: %s", err) return nil, err } - if _, err := db.ExecContext(context.TODO(), "SET sql_mode = 'STRICT_ALL_TABLES'"); err != nil { + if _, err := db.Exec(context.TODO(), "SET sql_mode = 'STRICT_ALL_TABLES'"); err != nil { klog.Warningf("Failed to set strict mode on postgresql db: %s", err) return nil, err } @@ -93,7 +93,7 @@ func OpenDB(dbURL string) (*sql.DB, error) { return db, nil } -func newTreeStorage(db *sql.DB) *postgreSQLTreeStorage { +func newTreeStorage(db *pgxpool.Pool) *postgreSQLTreeStorage { return &postgreSQLTreeStorage{ db: db, statements: make(map[string]map[int]*sql.Stmt), @@ -154,7 +154,7 @@ func (m *postgreSQLTreeStorage) setSubtreeStmt(ctx context.Context, num int) (*s } func (m *postgreSQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes int, subtreeCache *cache.SubtreeCache) (treeTX, error) { - t, err := m.db.BeginTx(ctx, nil /* opts */) + t, err := m.db.BeginTx(ctx, pgx.TxOptions{}) if err != nil { klog.Warningf("Could not start tree TX: %s", err) return treeTX{}, err @@ -182,7 +182,7 @@ type treeTX struct { // mu ensures that tx can only be used for one query/exec at a time. mu *sync.Mutex closed bool - tx *sql.Tx + tx pgx.Tx ts *postgreSQLTreeStorage treeID int64 treeType trillian.TreeType @@ -232,7 +232,7 @@ func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]by } } - rows, err := stx.QueryContext(ctx, args...) + rows, err := stx.Query(ctx, args...) if err != nil { klog.Warningf("Failed to get merkle subtrees: %s", err) return nil, err @@ -346,7 +346,7 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre } }() - r, err := stx.ExecContext(ctx, args...) + r, err := stx.Exec(ctx, args...) if err != nil { klog.Warningf("Failed to set merkle subtrees: %s", err) return err @@ -355,7 +355,7 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre return nil } -func checkResultOkAndRowCountIs(res sql.Result, err error, count int64) error { +func checkResultOkAndRowCountIs(res pgconn.CommandTag, err error, count int64) error { // The Exec() might have just failed if err != nil { return postgresqlToGRPC(err) From 33451e65a1ca5ecec0f27cc6035563e6819f5d62 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:20:59 +0100 Subject: [PATCH 07/62] Update imports --- go.mod | 4 +++- go.sum | 4 ++++ quota/postgresqlqm/postgresql_quota.go | 1 + quota/postgresqlqm/postgresql_quota_test.go | 2 +- storage/postgresql/admin_storage.go | 2 ++ storage/postgresql/admin_storage_test.go | 3 ++- storage/postgresql/errors.go | 1 - storage/postgresql/log_storage.go | 2 ++ storage/postgresql/log_storage_test.go | 4 ++-- storage/postgresql/provider.go | 3 +-- storage/postgresql/queue.go | 2 +- storage/postgresql/queue_batching.go | 1 + storage/postgresql/storage_test.go | 2 +- storage/postgresql/testdbpgx/testdbpgx.go | 6 +++--- storage/postgresql/tree_storage.go | 3 +++ 15 files changed, 27 insertions(+), 13 deletions(-) diff --git a/go.mod b/go.mod index 09b3ae3249..67d42f92d0 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,8 @@ require ( github.com/google/go-cmp v0.6.0 github.com/google/go-licenses/v2 v2.0.0-alpha.1 github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 + github.com/jackc/pgconn v1.14.3 + github.com/jackc/pgx/v5 v5.5.2 github.com/letsencrypt/pkcs11key/v4 v4.0.0 github.com/lib/pq v1.10.9 github.com/prometheus/client_golang v1.20.4 @@ -103,13 +105,13 @@ require ( github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect - github.com/jackc/pgconn v1.14.3 // indirect github.com/jackc/pgio v1.0.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgproto3/v2 v2.3.3 // indirect github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 // indirect github.com/jackc/pgtype v1.14.3 // indirect github.com/jackc/pgx/v4 v4.18.3 // indirect + github.com/jackc/puddle/v2 v2.2.1 // indirect github.com/jhump/protoreflect v1.16.0 // indirect github.com/jmespath/go-jmespath v0.4.1-0.20220621161143-b0104c826a24 // indirect github.com/jonboulle/clockwork v0.4.0 // indirect diff --git a/go.sum b/go.sum index 3feeb6fd13..bd094bb3c5 100644 --- a/go.sum +++ b/go.sum @@ -1009,11 +1009,15 @@ github.com/jackc/pgx/v4 v4.12.1-0.20210724153913-640aa07df17c/go.mod h1:1QD0+tgS github.com/jackc/pgx/v4 v4.18.2/go.mod h1:Ey4Oru5tH5sB6tV7hDmfWFahwF15Eb7DNXlRKx2CkVw= github.com/jackc/pgx/v4 v4.18.3 h1:dE2/TrEsGX3RBprb3qryqSV9Y60iZN1C6i8IrmW9/BA= github.com/jackc/pgx/v4 v4.18.3/go.mod h1:Ey4Oru5tH5sB6tV7hDmfWFahwF15Eb7DNXlRKx2CkVw= +github.com/jackc/pgx/v5 v5.5.2 h1:iLlpgp4Cp/gC9Xuscl7lFL1PhhW+ZLtXZcrfCt4C3tA= +github.com/jackc/pgx/v5 v5.5.2/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A= github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.3.0 h1:eHK/5clGOatcjX3oWGBO/MpxpbHzSwud5EWTSCI+MX0= github.com/jackc/puddle v1.3.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= +github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk= +github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/jhump/protoreflect v1.16.0 h1:54fZg+49widqXYQ0b+usAFHbMkBGR4PpXrsHc8+TBDg= github.com/jhump/protoreflect v1.16.0/go.mod h1:oYPd7nPvcBw/5wlDfm/AVmU9zH9BgqGCI469pGxfj/8= github.com/jmespath/go-jmespath v0.4.1-0.20220621161143-b0104c826a24 h1:liMMTbpW34dhU4az1GN0pTPADwNmvoRSeoZ6PItiqnY= diff --git a/quota/postgresqlqm/postgresql_quota.go b/quota/postgresqlqm/postgresql_quota.go index 02a864dbdc..ff537313b5 100644 --- a/quota/postgresqlqm/postgresql_quota.go +++ b/quota/postgresqlqm/postgresql_quota.go @@ -22,6 +22,7 @@ import ( "fmt" "github.com/google/trillian/quota" + "github.com/jackc/pgx/v5/pgxpool" "k8s.io/klog/v2" ) diff --git a/quota/postgresqlqm/postgresql_quota_test.go b/quota/postgresqlqm/postgresql_quota_test.go index ebcc3cd45d..7b3bbd2665 100644 --- a/quota/postgresqlqm/postgresql_quota_test.go +++ b/quota/postgresqlqm/postgresql_quota_test.go @@ -17,7 +17,6 @@ package postgresqlqm_test import ( "context" "crypto" - "database/sql" "fmt" "testing" "time" @@ -29,6 +28,7 @@ import ( "github.com/google/trillian/storage/postgresql" testdb "github.com/google/trillian/storage/postgresql/testdbpgx" "github.com/google/trillian/types" + "github.com/jackc/pgx/v5/pgxpool" stestonly "github.com/google/trillian/storage/testonly" ) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 8a2ee73ef6..344569ff06 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -26,6 +26,8 @@ import ( "github.com/google/trillian" "github.com/google/trillian/storage" "github.com/google/trillian/storage/postgresql/postgresqlpb" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" diff --git a/storage/postgresql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go index 85661ebcc1..6db79861d8 100644 --- a/storage/postgresql/admin_storage_test.go +++ b/storage/postgresql/admin_storage_test.go @@ -17,7 +17,6 @@ package postgresql import ( "bytes" "context" - "database/sql" "encoding/gob" "fmt" "testing" @@ -26,6 +25,8 @@ import ( "github.com/google/trillian/storage" "github.com/google/trillian/storage/postgresql/postgresqlpb" "github.com/google/trillian/storage/testonly" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" ) diff --git a/storage/postgresql/errors.go b/storage/postgresql/errors.go index 0282e6ee86..1184cea194 100644 --- a/storage/postgresql/errors.go +++ b/storage/postgresql/errors.go @@ -15,7 +15,6 @@ package postgresql import ( - "github.com/go-sql-driver/postgresql" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index b8238400d3..1a13749b76 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -31,6 +31,8 @@ import ( "github.com/google/trillian/storage/cache" "github.com/google/trillian/storage/tree" "github.com/google/trillian/types" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" "github.com/transparency-dev/merkle/compact" "github.com/transparency-dev/merkle/rfc6962" "google.golang.org/grpc/codes" diff --git a/storage/postgresql/log_storage_test.go b/storage/postgresql/log_storage_test.go index 79043e98cd..50b406662e 100644 --- a/storage/postgresql/log_storage_test.go +++ b/storage/postgresql/log_storage_test.go @@ -18,7 +18,6 @@ import ( "bytes" "context" "crypto/sha256" - "database/sql" "fmt" "sort" "testing" @@ -34,7 +33,8 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" "k8s.io/klog/v2" - _ "github.com/go-sql-driver/postgresql" + "github.com/jackc/pgx/v5/pgxpool" + _ "github.com/jackc/pgx/v5/pgxpool" ) var allTables = []string{"Unsequenced", "TreeHead", "SequencedLeafData", "LeafData", "Subtree", "TreeControl", "Trees"} diff --git a/storage/postgresql/provider.go b/storage/postgresql/provider.go index 58c844a15b..066c29dce7 100644 --- a/storage/postgresql/provider.go +++ b/storage/postgresql/provider.go @@ -17,7 +17,6 @@ package postgresql import ( "crypto/tls" "crypto/x509" - "database/sql" "errors" "flag" "os" @@ -28,7 +27,7 @@ import ( "k8s.io/klog/v2" // Load PostgreSQL driver - "github.com/go-sql-driver/postgresql" + "github.com/jackc/pgx/v5/pgxpool" ) var ( diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index c054550abf..822dbe8fbd 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -19,12 +19,12 @@ package postgresql import ( "context" - "database/sql" "errors" "fmt" "time" "github.com/google/trillian" + "github.com/jackc/pgx/v5" "google.golang.org/protobuf/types/known/timestamppb" "k8s.io/klog/v2" ) diff --git a/storage/postgresql/queue_batching.go b/storage/postgresql/queue_batching.go index 582b6d2f66..fb341b8810 100644 --- a/storage/postgresql/queue_batching.go +++ b/storage/postgresql/queue_batching.go @@ -27,6 +27,7 @@ import ( "time" "github.com/google/trillian" + "github.com/jackc/pgx/v5" "google.golang.org/protobuf/types/known/timestamppb" "k8s.io/klog/v2" ) diff --git a/storage/postgresql/storage_test.go b/storage/postgresql/storage_test.go index 2671d9e12a..f2206c17a3 100644 --- a/storage/postgresql/storage_test.go +++ b/storage/postgresql/storage_test.go @@ -19,7 +19,6 @@ import ( "context" "crypto" "crypto/sha256" - "database/sql" "errors" "flag" "fmt" @@ -34,6 +33,7 @@ import ( storageto "github.com/google/trillian/storage/testonly" stree "github.com/google/trillian/storage/tree" "github.com/google/trillian/types" + "github.com/jackc/pgx/v5/pgxpool" "github.com/transparency-dev/merkle/compact" "github.com/transparency-dev/merkle/rfc6962" "google.golang.org/protobuf/types/known/anypb" diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index 273b282103..a5e018db8a 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -18,7 +18,6 @@ package testdbpgx import ( "bytes" "context" - "database/sql" "fmt" "log" "net/url" @@ -31,8 +30,9 @@ import ( "golang.org/x/sys/unix" "k8s.io/klog/v2" - _ "github.com/go-sql-driver/postgresql" // postgresql driver - _ "github.com/lib/pq" // postgres driver + "github.com/jackc/pgx/v5/pgxpool" + _ "github.com/jackc/pgx/v5/pgxpool" // postgresql driver + _ "github.com/lib/pq" // postgres driver ) const ( diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index ac58149f64..bf604dacdd 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -29,6 +29,9 @@ import ( "github.com/google/trillian/storage/postgresql/postgresqlpb" "github.com/google/trillian/storage/storagepb" "github.com/google/trillian/storage/tree" + "github.com/jackc/pgconn" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" "k8s.io/klog/v2" From 6d88880c50e84d58862aa5439e46c66ad09712b0 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:27:49 +0100 Subject: [PATCH 08/62] Convert schema --- storage/postgresql/schema/storage.sql | 70 ++++++++++++++++----------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index e951876575..c8f414dd9a 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -1,4 +1,4 @@ -# PostgreSQL / MariaDB version of the tree schema +# PostgreSQL version of the tree schema. -- --------------------------------------------- -- Tree stuff here @@ -6,20 +6,26 @@ -- Tree parameters should not be changed after creation. Doing so can -- render the data in the tree unusable or inconsistent. +CREATE TYPE TreeState AS ENUM ('ACTIVE', 'FROZEN', 'DRAINING'); +CREATE TYPE TreeType AS ENUM ('LOG', 'MAP', 'PREORDERED_LOG'); +CREATE TYPE HashStrategy AS ENUM ('RFC6962_SHA256', 'TEST_MAP_HASHER', 'OBJECT_RFC6962_SHA256', 'CONIKS_SHA512_256', 'CONIKS_SHA256'); +CREATE TYPE HashAlgorithm AS ENUM ('SHA256'); +CREATE TYPE SignatureAlgorithm AS ENUM ('ECDSA', 'RSA', 'ED25519'); + CREATE TABLE IF NOT EXISTS Trees( TreeId BIGINT NOT NULL, - TreeState ENUM('ACTIVE', 'FROZEN', 'DRAINING') NOT NULL, - TreeType ENUM('LOG', 'MAP', 'PREORDERED_LOG') NOT NULL, - HashStrategy ENUM('RFC6962_SHA256', 'TEST_MAP_HASHER', 'OBJECT_RFC6962_SHA256', 'CONIKS_SHA512_256', 'CONIKS_SHA256') NOT NULL, - HashAlgorithm ENUM('SHA256') NOT NULL, - SignatureAlgorithm ENUM('ECDSA', 'RSA', 'ED25519') NOT NULL, + TreeState TreeState NOT NULL, + TreeType TreeType NOT NULL, + HashStrategy HashStrategy NOT NULL, + HashAlgorithm HashAlgorithm NOT NULL, + SignatureAlgorithm SignatureAlgorithm NOT NULL, DisplayName VARCHAR(20), Description VARCHAR(200), CreateTimeMillis BIGINT NOT NULL, UpdateTimeMillis BIGINT NOT NULL, MaxRootDurationMillis BIGINT NOT NULL, - PrivateKey MEDIUMBLOB NOT NULL, -- Unused. - PublicKey MEDIUMBLOB NOT NULL, -- This is now used to store settings. + PrivateKey BYTEA NOT NULL, -- Unused. + PublicKey BYTEA NOT NULL, -- This is now used to store settings. Deleted BOOLEAN, DeleteTimeMillis BIGINT, PRIMARY KEY(TreeId) @@ -38,13 +44,14 @@ CREATE TABLE IF NOT EXISTS TreeControl( CREATE TABLE IF NOT EXISTS Subtree( TreeId BIGINT NOT NULL, - SubtreeId VARBINARY(255) NOT NULL, - Nodes MEDIUMBLOB NOT NULL, + SubtreeId BYTEA NOT NULL, + Nodes BYTEA NOT NULL, SubtreeRevision INTEGER NOT NULL, -- Key columns must be in ASC order in order to benefit from group-by/min-max -- optimization in PostgreSQL. - PRIMARY KEY(TreeId, SubtreeId, SubtreeRevision), - FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE + PRIMARY KEY (TreeId, SubtreeId, SubtreeRevision), + FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE, + CHECK (length(SubtreeId) <= 255) ); -- The TreeRevisionIdx is used to enforce that there is only one STH at any @@ -53,11 +60,13 @@ CREATE TABLE IF NOT EXISTS TreeHead( TreeId BIGINT NOT NULL, TreeHeadTimestamp BIGINT, TreeSize BIGINT, - RootHash VARBINARY(255) NOT NULL, - RootSignature VARBINARY(1024) NOT NULL, + RootHash BYTEA NOT NULL, + RootSignature BYTEA NOT NULL, TreeRevision BIGINT, PRIMARY KEY(TreeId, TreeHeadTimestamp), - FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE + FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE, + CHECK (length(RootHash) <= 255), + CHECK (length(RootSignature) <= 1024) ); CREATE UNIQUE INDEX TreeHeadRevisionIdx @@ -78,17 +87,18 @@ CREATE TABLE IF NOT EXISTS LeafData( -- This is a personality specific has of some subset of the leaf data. -- It's only purpose is to allow Trillian to identify duplicate entries in -- the context of the personality. - LeafIdentityHash VARBINARY(255) NOT NULL, + LeafIdentityHash BYTEA NOT NULL, -- This is the data stored in the leaf for example in CT it contains a DER encoded -- X.509 certificate but is application dependent - LeafValue LONGBLOB NOT NULL, + LeafValue BYTEA NOT NULL, -- This is extra data that the application can associate with the leaf should it wish to. -- This data is not included in signing and hashing. - ExtraData LONGBLOB, + ExtraData BYTEA, -- The timestamp from when this leaf data was first queued for inclusion. QueueTimestampNanos BIGINT NOT NULL, PRIMARY KEY(TreeId, LeafIdentityHash), - FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE + FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE, + CHECK (length(LeafIdentityHash) <= 255) ); -- When a leaf is sequenced a row is added to this table. If logs allow duplicates then @@ -99,18 +109,21 @@ CREATE TABLE IF NOT EXISTS LeafData( -- LeafData table is keyed by the raw data hash. CREATE TABLE IF NOT EXISTS SequencedLeafData( TreeId BIGINT NOT NULL, - SequenceNumber BIGINT UNSIGNED NOT NULL, + SequenceNumber BIGINT NOT NULL, -- This is a personality specific has of some subset of the leaf data. -- It's only purpose is to allow Trillian to identify duplicate entries in -- the context of the personality. - LeafIdentityHash VARBINARY(255) NOT NULL, + LeafIdentityHash BYTEA NOT NULL, -- This is a MerkleLeafHash as defined by the treehasher that the log uses. For example for -- CT this hash will include the leaf prefix byte as well as the leaf data. - MerkleLeafHash VARBINARY(255) NOT NULL, + MerkleLeafHash BYTEA NOT NULL, IntegrateTimestampNanos BIGINT NOT NULL, PRIMARY KEY(TreeId, SequenceNumber), FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE, - FOREIGN KEY(TreeId, LeafIdentityHash) REFERENCES LeafData(TreeId, LeafIdentityHash) ON DELETE CASCADE + FOREIGN KEY(TreeId, LeafIdentityHash) REFERENCES LeafData(TreeId, LeafIdentityHash) ON DELETE CASCADE, + CHECK (SequenceNumber >= 0), + CHECK (length(LeafIdentityHash) <= 255), + CHECK (length(MerkleLeafHash) <= 255) ); CREATE INDEX SequencedLeafMerkleIdx @@ -124,14 +137,17 @@ CREATE TABLE IF NOT EXISTS Unsequenced( -- This is a personality specific hash of some subset of the leaf data. -- It's only purpose is to allow Trillian to identify duplicate entries in -- the context of the personality. - LeafIdentityHash VARBINARY(255) NOT NULL, + LeafIdentityHash BYTEA NOT NULL, -- This is a MerkleLeafHash as defined by the treehasher that the log uses. For example for -- CT this hash will include the leaf prefix byte as well as the leaf data. - MerkleLeafHash VARBINARY(255) NOT NULL, + MerkleLeafHash BYTEA NOT NULL, QueueTimestampNanos BIGINT NOT NULL, -- This is a SHA256 hash of the TreeID, LeafIdentityHash and QueueTimestampNanos. It is used -- for batched deletes from the table when trillian_log_server and trillian_log_signer are -- built with the batched_queue tag. - QueueID VARBINARY(32) DEFAULT NULL UNIQUE, - PRIMARY KEY (TreeId, Bucket, QueueTimestampNanos, LeafIdentityHash) + QueueID BYTEA DEFAULT NULL UNIQUE, + PRIMARY KEY (TreeId, Bucket, QueueTimestampNanos, LeafIdentityHash), + CHECK (length(LeafIdentityHash) <= 255), + CHECK (length(MerkleLeafHash) <= 255), + CHECK (length(QueueID) <= 32) ); From 74526b667642871db58b6f1509a496f03366b507 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:31:00 +0100 Subject: [PATCH 09/62] Use EXPLAIN output to estimate rows in PostgreSQL quota.Manager --- quota/postgresqlqm/postgresql_quota.go | 80 +++++--------------------- storage/postgresql/drop_storage.sql | 2 + storage/postgresql/schema/storage.sql | 16 ++++++ 3 files changed, 32 insertions(+), 66 deletions(-) diff --git a/quota/postgresqlqm/postgresql_quota.go b/quota/postgresqlqm/postgresql_quota.go index ff537313b5..87a65d51f4 100644 --- a/quota/postgresqlqm/postgresql_quota.go +++ b/quota/postgresqlqm/postgresql_quota.go @@ -17,13 +17,10 @@ package postgresqlqm import ( "context" - "database/sql" "errors" - "fmt" "github.com/google/trillian/quota" "github.com/jackc/pgx/v5/pgxpool" - "k8s.io/klog/v2" ) const ( @@ -31,13 +28,8 @@ const ( // Note that this is a Global/Write quota suggestion, so it applies across trees. DefaultMaxUnsequenced = 500000 // About 2h of non-stop signing at 70QPS. - countFromInformationSchemaQuery = ` - SELECT table_rows - FROM information_schema.tables - WHERE table_schema = schema() - AND table_name = ? - AND table_type = ?` - countFromUnsequencedQuery = "SELECT COUNT(*) FROM Unsequenced" + countFromExplainOutputQuery = "SELECT count_estimate($1)" + countFromUnsequencedQuery = "SELECT COUNT(*) FROM Unsequenced" ) // ErrTooManyUnsequencedRows is returned when tokens are requested but Unsequenced has grown @@ -46,10 +38,15 @@ var ErrTooManyUnsequencedRows = errors.New("too many unsequenced rows") // QuotaManager is a PostgreSQL-based quota.Manager implementation. // -// It has two working modes: one queries the information schema for the number of Unsequenced rows, -// the other does a select count(*) on the Unsequenced table. Information schema queries are -// default, even though they are approximate, as they're constant time (select count(*) on InnoDB -// based PostgreSQL needs to traverse the index and may take quite a while to complete). +// It has two working modes: one estimates the number of Unsequenced rows by collecting information +// from EXPLAIN output; the other does a select count(*) on the Unsequenced table. Estimates are +// default, even though they are approximate, as they're constant time (select count(*) on +// PostgreSQL needs to traverse the index and may take quite a while to complete). +// Other estimation methods exist (see https://wiki.postgresql.org/wiki/Count_estimate), but using +// EXPLAIN output is the most accurate because it "fetches the actual current number of pages in +// the table (this is a cheap operation, not requiring a table scan). If that is different from +// relpages then reltuples is scaled accordingly to arrive at a current number-of-rows estimate." +// (quoting https://www.postgresql.org/docs/current/row-estimation-examples.html) // // QuotaManager only implements Global/Write quotas, which is based on the number of Unsequenced // rows (to be exact, tokens = MaxUnsequencedRows - actualUnsequencedRows). @@ -96,35 +93,14 @@ func (m *QuotaManager) countUnsequenced(ctx context.Context) (int, error) { if m.UseSelectCount { return countFromTable(ctx, m.DB) } - return countFromInformationSchema(ctx, m.DB) + return countFromExplainOutput(ctx, m.DB) } -func countFromInformationSchema(ctx context.Context, db *pgxpool.Pool) (int, error) { - // turn off statistics caching for PostgreSQL 8 - if err := turnOffInformationSchemaCache(ctx, db); err != nil { - return 0, err - } - // information_schema.tables doesn't have an explicit PK, so let's play it safe and ensure - // the cursor returns a single row. - rows, err := db.Query(ctx, countFromInformationSchemaQuery, "Unsequenced", "BASE TABLE") - if err != nil { - return 0, err - } - defer func() { - if err := rows.Close(); err != nil { - klog.Errorf("Close(): %v", err) - } - }() - if !rows.Next() { - return 0, errors.New("cursor has no rows after information_schema query") - } +func countFromExplainOutput(ctx context.Context, db *pgxpool.Pool) (int, error) { var count int - if err := rows.Scan(&count); err != nil { + if err := db.QueryRow(ctx, countFromExplainOutputQuery, "Unsequenced").Scan(&count); err != nil { return 0, err } - if rows.Next() { - return 0, errors.New("too many rows returned from information_schema query") - } return count, nil } @@ -135,31 +111,3 @@ func countFromTable(ctx context.Context, db *pgxpool.Pool) (int, error) { } return count, nil } - -// turnOffInformationSchemaCache turn off statistics caching for PostgreSQL 8 -// To always retrieve the latest statistics directly from the storage engine and bypass cached values, set information_schema_stats_expiry to 0. -// See https://dev.postgresql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_information_schema_stats_expiry -// PostgreSQL versions prior to 8 will fail safely. -func turnOffInformationSchemaCache(ctx context.Context, db *pgxpool.Pool) error { - opt := "information_schema_stats_expiry" - res := db.QueryRow(ctx, "SHOW VARIABLES LIKE '"+opt+"'") - var none string - var expiry int - - if err := res.Scan(&none, &expiry); err != nil { - // fail safely for all versions of PostgreSQL prior to 8 - if errors.Is(err, sql.ErrNoRows) { - return nil - } - - return fmt.Errorf("failed to get variable %q: %v", opt, err) - } - - if expiry != 0 { - if _, err := db.Exec(ctx, "SET SESSION "+opt+"=0"); err != nil { - return fmt.Errorf("failed to set variable %q: %v", opt, err) - } - } - - return nil -} diff --git a/storage/postgresql/drop_storage.sql b/storage/postgresql/drop_storage.sql index 6e407b5d40..defd04417d 100644 --- a/storage/postgresql/drop_storage.sql +++ b/storage/postgresql/drop_storage.sql @@ -7,3 +7,5 @@ DROP TABLE IF EXISTS TreeHead; DROP TABLE IF EXISTS LeafData; DROP TABLE IF EXISTS TreeControl; DROP TABLE IF EXISTS Trees; + +DROP FUNCTION IF EXISTS count_estimate; diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index c8f414dd9a..517abcb231 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -151,3 +151,19 @@ CREATE TABLE IF NOT EXISTS Unsequenced( CHECK (length(MerkleLeafHash) <= 255), CHECK (length(QueueID) <= 32) ); + +-- Adapted from https://wiki.postgresql.org/wiki/Count_estimate +CREATE OR REPLACE FUNCTION count_estimate( + table_name text +) RETURNS bigint +LANGUAGE plpgsql AS $$ +DECLARE + plan jsonb; +BEGIN + EXECUTE 'EXPLAIN (FORMAT JSON) SELECT * FROM ' || table_name INTO plan; + RETURN plan->0->'Plan'->'Plan Rows'; +EXCEPTION + WHEN OTHERS THEN + RETURN 0; +END; +$$; From e3583cd2412c0e0562abac91b1b847c1ee7ee0fe Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:34:50 +0100 Subject: [PATCH 10/62] PostgreSQL doesn't have an equivalent of MySQL's strict mode --- storage/postgresql/admin_storage.go | 9 --------- storage/postgresql/tree_storage.go | 5 ----- 2 files changed, 14 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 344569ff06..653b36bda2 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -308,15 +308,6 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia return nil, err } - // PostgreSQL silently truncates data when running in non-strict mode. - // We shouldn't be using non-strict modes, but let's guard against it - // anyway. - if _, err := t.GetTree(ctx, newTree.TreeId); err != nil { - // GetTree will fail for truncated enums (they get recorded as - // empty strings, which will not match any known value). - return nil, fmt.Errorf("enum truncated: %v", err) - } - insertControlStmt, err := t.tx.PrepareContext( ctx, `INSERT INTO TreeControl( diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index bf604dacdd..a88645a936 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -88,11 +88,6 @@ func OpenDB(dbURL string) (*pgxpool.Pool, error) { return nil, err } - if _, err := db.Exec(context.TODO(), "SET sql_mode = 'STRICT_ALL_TABLES'"); err != nil { - klog.Warningf("Failed to set strict mode on postgresql db: %s", err) - return nil, err - } - return db, nil } From a167577d2b24bfc14d9f3746249eb05a88df94c7 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 12:37:25 +0100 Subject: [PATCH 11/62] Context required for pgx Commit() and Rollback() functions --- storage/postgresql/admin_storage.go | 4 ++-- storage/postgresql/admin_storage_test.go | 2 +- storage/postgresql/tree_storage.go | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 653b36bda2..4d9f28d648 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -124,7 +124,7 @@ func (t *adminTX) Commit() error { t.mu.Lock() defer t.mu.Unlock() t.closed = true - return t.tx.Commit() + return t.tx.Commit(context.TODO()) } func (t *adminTX) Close() error { @@ -134,7 +134,7 @@ func (t *adminTX) Close() error { return nil } t.closed = true - return t.tx.Rollback() + return t.tx.Rollback(context.TODO()) } func (t *adminTX) GetTree(ctx context.Context, treeID int64) (*trillian.Tree, error) { diff --git a/storage/postgresql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go index 6db79861d8..d09d4af305 100644 --- a/storage/postgresql/admin_storage_test.go +++ b/storage/postgresql/admin_storage_test.go @@ -264,7 +264,7 @@ func TestAdminTX_GetTreeLegacies(t *testing.T) { if _, err := tx.Exec("UPDATE Trees SET PublicKey = ? WHERE TreeId = ?", tC.key, tree.TreeId); err != nil { t.Fatal(err) } - if err := tx.Commit(); err != nil { + if err := tx.Commit(ctx); err != nil { t.Fatal(err) } readTree, err := storage.GetTree(ctx, s, tree.TreeId) diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index a88645a936..76895c7f98 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -404,7 +404,7 @@ func (t *treeTX) Commit(ctx context.Context) error { } } t.closed = true - if err := t.tx.Commit(); err != nil { + if err := t.tx.Commit(ctx); err != nil { klog.Warningf("TX commit error: %s, stack:\n%s", err, string(debug.Stack())) return err } @@ -413,7 +413,7 @@ func (t *treeTX) Commit(ctx context.Context) error { func (t *treeTX) rollbackInternal() error { t.closed = true - if err := t.tx.Rollback(); err != nil { + if err := t.tx.Rollback(context.TODO()); err != nil { klog.Warningf("TX rollback error: %s, stack:\n%s", err, string(debug.Stack())) return err } From c1279654c53e531cbe0811b9b55bd3ccf1368350 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 13:53:31 +0100 Subject: [PATCH 12/62] Remove statement map and PrepareContext() calls, because pgx automatically prepares and caches statements --- storage/postgresql/admin_storage.go | 65 +++--------------------- storage/postgresql/admin_storage_test.go | 7 +-- storage/postgresql/log_storage.go | 16 ++---- storage/postgresql/log_storage_test.go | 12 +---- storage/postgresql/queue.go | 12 +---- storage/postgresql/tree_storage.go | 12 ++--- 6 files changed, 18 insertions(+), 106 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 4d9f28d648..c44d3f1c6a 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -138,18 +138,8 @@ func (t *adminTX) Close() error { } func (t *adminTX) GetTree(ctx context.Context, treeID int64) (*trillian.Tree, error) { - stmt, err := t.tx.PrepareContext(ctx, selectTreeByID) - if err != nil { - return nil, err - } - defer func() { - if err := stmt.Close(); err != nil { - klog.Errorf("stmt.Close(): %v", err) - } - }() - // GetTree is an entry point for most RPCs, let's provide somewhat nicer error messages. - tree, err := readTree(stmt.QueryRow(ctx, treeID)) + tree, err := readTree(t.tx.QueryRow(ctx, selectTreeByID, treeID)) switch { case err == pgx.ErrNoRows: // ErrNoRows doesn't provide useful information, so we don't forward it. @@ -168,16 +158,7 @@ func (t *adminTX) ListTrees(ctx context.Context, includeDeleted bool) ([]*trilli query = selectNonDeletedTrees } - stmt, err := t.tx.PrepareContext(ctx, query) - if err != nil { - return nil, err - } - defer func() { - if err := stmt.Close(); err != nil { - klog.Errorf("stmt.Close(): %v", err) - } - }() - rows, err := stmt.Query(ctx) + rows, err := t.tx.Query(ctx, query) if err != nil { return nil, err } @@ -262,7 +243,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia return nil, fmt.Errorf("failed to encode storageSettings: %v", err) } - insertTreeStmt, err := t.tx.PrepareContext( + _, err = t.tx.Exec( ctx, `INSERT INTO Trees( TreeId, @@ -278,18 +259,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia PrivateKey, -- Unused PublicKey, -- Used to store StorageSettings MaxRootDurationMillis) - VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`) - if err != nil { - return nil, err - } - defer func() { - if err := insertTreeStmt.Close(); err != nil { - klog.Errorf("insertTreeStmt.Close(): %v", err) - } - }() - - _, err = insertTreeStmt.Exec( - ctx, + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, newTree.TreeId, newTree.TreeState.String(), newTree.TreeType.String(), @@ -308,24 +278,14 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia return nil, err } - insertControlStmt, err := t.tx.PrepareContext( + _, err = t.tx.Exec( ctx, `INSERT INTO TreeControl( TreeId, SigningEnabled, SequencingEnabled, SequenceIntervalSeconds) - VALUES(?, ?, ?, ?)`) - if err != nil { - return nil, err - } - defer func() { - if err := insertControlStmt.Close(); err != nil { - klog.Errorf("insertControlStmt.Close(): %v", err) - } - }() - _, err = insertControlStmt.Exec( - ctx, + VALUES(?, ?, ?, ?)`, newTree.TreeId, true, /* SigningEnabled */ true, /* SequencingEnabled */ @@ -368,18 +328,9 @@ func (t *adminTX) UpdateTree(ctx context.Context, treeID int64, updateFunc func( } rootDuration := tree.MaxRootDuration.AsDuration() - stmt, err := t.tx.PrepareContext(ctx, updateTreeSQL) - if err != nil { - return nil, err - } - defer func() { - if err := stmt.Close(); err != nil { - klog.Errorf("stmt.Close(): %v", err) - } - }() - - if _, err = stmt.Exec( + if _, err = t.tx.Exec( ctx, + updateTreeSQL, tree.TreeState.String(), tree.TreeType.String(), tree.DisplayName, diff --git a/storage/postgresql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go index d09d4af305..e5e0b7862b 100644 --- a/storage/postgresql/admin_storage_test.go +++ b/storage/postgresql/admin_storage_test.go @@ -333,11 +333,6 @@ func TestCheckDatabaseAccessible_OK(t *testing.T) { } func setNulls(ctx context.Context, db *pgxpool.Pool, treeID int64) error { - stmt, err := db.PrepareContext(ctx, "UPDATE Trees SET DisplayName = NULL, Description = NULL WHERE TreeId = ?") - if err != nil { - return err - } - defer func() { _ = stmt.Close() }() - _, err = stmt.Exec(ctx, treeID) + _, err := db.Exec(ctx, "UPDATE Trees SET DisplayName = NULL, Description = NULL WHERE TreeId = ?", treeID) return err } diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index 1a13749b76..fc2b211894 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -136,9 +136,9 @@ func NewLogStorage(db *pgxpool.Pool, mf monitoring.MetricFactory) storage.LogSto mf = monitoring.InertMetricFactory{} } return &postgreSQLLogStorage{ - admin: NewAdminStorage(db), + admin: NewAdminStorage(db), postgreSQLTreeStorage: newTreeStorage(db), - metricFactory: mf, + metricFactory: mf, } } @@ -340,19 +340,9 @@ func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime tim } start := time.Now() - stx, err := t.tx.PrepareContext(ctx, selectQueuedLeavesSQL) - if err != nil { - klog.Warningf("Failed to prepare dequeue select: %s", err) - return nil, err - } - defer func() { - if err := stx.Close(); err != nil { - klog.Errorf("stx.Close(): %v", err) - } - }() leaves := make([]*trillian.LogLeaf, 0, limit) - rows, err := stx.Query(ctx, t.treeID, cutoffTime.UnixNano(), limit) + rows, err := t.tx.Query(ctx, selectQueuedLeavesSQL, t.treeID, cutoffTime.UnixNano(), limit) if err != nil { klog.Warningf("Failed to select rows for work: %s", err) return nil, err diff --git a/storage/postgresql/log_storage_test.go b/storage/postgresql/log_storage_test.go index 50b406662e..001ef5965c 100644 --- a/storage/postgresql/log_storage_test.go +++ b/storage/postgresql/log_storage_test.go @@ -31,7 +31,6 @@ import ( "github.com/google/trillian/types" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/timestamppb" - "k8s.io/klog/v2" "github.com/jackc/pgx/v5/pgxpool" _ "github.com/jackc/pgx/v5/pgxpool" @@ -725,17 +724,8 @@ func TestGetActiveLogIDs(t *testing.T) { } // Update deleted trees accordingly - updateDeletedStmt, err := DB.PrepareContext(ctx, "UPDATE Trees SET Deleted = ? WHERE TreeId = ?") - if err != nil { - t.Fatalf("PrepareContext() returned err = %v", err) - } - defer func() { - if err := updateDeletedStmt.Close(); err != nil { - klog.Errorf("updateDeletedStmt.Close(): %v", err) - } - }() for _, treeID := range []int64{deletedLog.TreeId} { - if _, err := updateDeletedStmt.Exec(ctx, true, treeID); err != nil { + if _, err := DB.Exec(ctx, "UPDATE Trees SET Deleted = ? WHERE TreeId = ?", true, treeID); err != nil { t.Fatalf("Exec(%v) returned err = %v", treeID, err) } } diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index 822dbe8fbd..4cf1fedc06 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -123,18 +123,8 @@ func (t *logTreeTX) removeSequencedLeaves(ctx context.Context, leaves []dequeued // Don't need to re-sort because the query ordered by leaf hash. If that changes because // the query is expensive then the sort will need to be done here. See comment in // QueueLeaves. - stx, err := t.tx.PrepareContext(ctx, deleteUnsequencedSQL) - if err != nil { - klog.Warningf("Failed to prep delete statement for sequenced work: %v", err) - return err - } - defer func() { - if err := stx.Close(); err != nil { - klog.Errorf("stx.Close(): %v", err) - } - }() for _, dql := range leaves { - result, err := stx.Exec(ctx, t.treeID, dql.queueTimestampNanos, dql.leafIdentityHash) + result, err := t.tx.Exec(ctx, deleteUnsequencedSQL, t.treeID, dql.queueTimestampNanos, dql.leafIdentityHash) err = checkResultOkAndRowCountIs(result, err, int64(1)) if err != nil { return err diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 76895c7f98..9e1438a071 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -71,12 +71,9 @@ const ( type postgreSQLTreeStorage struct { db *pgxpool.Pool - // Must hold the mutex before manipulating the statement map. Sharing a lock because - // it only needs to be held while the statements are built, not while they execute and - // this will be a short time. These maps are from the number of placeholder '?' - // in the query to the statement that should be used. - statementMutex sync.Mutex - statements map[string]map[int]*sql.Stmt + // pgx automatically prepares and caches statements, so there is no need for + // a statement map in this struct. + // (See https://github.com/jackc/pgx/wiki/Automatic-Prepared-Statement-Caching) } // OpenDB opens a database connection for all PostgreSQL-based storage implementations. @@ -93,8 +90,7 @@ func OpenDB(dbURL string) (*pgxpool.Pool, error) { func newTreeStorage(db *pgxpool.Pool) *postgreSQLTreeStorage { return &postgreSQLTreeStorage{ - db: db, - statements: make(map[string]map[int]*sql.Stmt), + db: db, } } From cb7bda273f1040c609b2db87a201b39af3cbba3a Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 13:54:38 +0100 Subject: [PATCH 13/62] Use ANY to bind arrays, instead of expanding placeholders --- storage/postgresql/log_storage.go | 49 +++--------- storage/postgresql/queue_batching.go | 19 +---- storage/postgresql/tree_storage.go | 107 ++------------------------- 3 files changed, 20 insertions(+), 155 deletions(-) diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index fc2b211894..2d14d847ab 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -62,11 +62,10 @@ const ( WHERE l.LeafIdentityHash = s.LeafIdentityHash AND s.SequenceNumber >= ? AND s.SequenceNumber < ? AND l.TreeId = ? AND s.TreeId = l.TreeId` + orderBySequenceNumberSQL - // These statements need to be expanded to provide the correct number of parameter placeholders. selectLeavesByMerkleHashSQL = `SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos FROM LeafData l,SequencedLeafData s WHERE l.LeafIdentityHash = s.LeafIdentityHash - AND s.MerkleLeafHash IN (` + placeholderSQL + `) AND l.TreeId = ? AND s.TreeId = l.TreeId` + AND s.MerkleLeafHash = ANY(?) AND l.TreeId = ? AND s.TreeId = l.TreeId` // TODO(#1548): rework the code so the dummy hash isn't needed (e.g. this assumes hash size is 32) dummyMerkleLeafHash = "00000000000000000000000000000000" // This statement returns a dummy Merkle leaf hash value (which must be @@ -74,7 +73,7 @@ const ( // leaf-selection statements. selectLeavesByLeafIdentityHashSQL = `SELECT '` + dummyMerkleLeafHash + `',l.LeafIdentityHash,l.LeafValue,-1,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos FROM LeafData l LEFT JOIN SequencedLeafData s ON (l.LeafIdentityHash = s.LeafIdentityHash AND l.TreeID = s.TreeID) - WHERE l.LeafIdentityHash IN (` + placeholderSQL + `) AND l.TreeId = ?` + WHERE l.LeafIdentityHash = ANY(?) AND l.TreeId = ?` // Same as above except with leaves ordered by sequence so we only incur this cost when necessary orderBySequenceNumberSQL = " ORDER BY s.SequenceNumber" @@ -146,18 +145,6 @@ func (m *postgreSQLLogStorage) CheckDatabaseAccessible(ctx context.Context) erro return m.db.Ping(ctx) } -func (m *postgreSQLLogStorage) getLeavesByMerkleHashStmt(ctx context.Context, num int, orderBySequence bool) (*sql.Stmt, error) { - if orderBySequence { - return m.getStmt(ctx, selectLeavesByMerkleHashOrderedBySequenceSQL, num, "?", "?") - } - - return m.getStmt(ctx, selectLeavesByMerkleHashSQL, num, "?", "?") -} - -func (m *postgreSQLLogStorage) getLeavesByLeafIdentityHashStmt(ctx context.Context, num int) (*sql.Stmt, error) { - return m.getStmt(ctx, selectLeavesByLeafIdentityHashSQL, num, "?", "?") -} - func (m *postgreSQLLogStorage) GetActiveLogIDs(ctx context.Context) ([]int64, error) { // Include logs that are DRAINING in the active list as we're still // integrating leaves into them. @@ -677,23 +664,21 @@ func (t *logTreeTX) GetLeavesByHash(ctx context.Context, leafHashes [][]byte, or t.treeTX.mu.Lock() defer t.treeTX.mu.Unlock() - tmpl, err := t.ls.getLeavesByMerkleHashStmt(ctx, len(leafHashes), orderBySequence) - if err != nil { - return nil, err + var query string + if orderBySequence { + query = selectLeavesByMerkleHashOrderedBySequenceSQL + } else { + query = selectLeavesByMerkleHashSQL } - return t.getLeavesByHashInternal(ctx, leafHashes, tmpl, "merkle") + return t.getLeavesByHashInternal(ctx, leafHashes, query, "merkle") } // getLeafDataByIdentityHash retrieves leaf data by LeafIdentityHash, returned // as a slice of LogLeaf objects for convenience. However, note that the // returned LogLeaf objects will not have a valid MerkleLeafHash, LeafIndex, or IntegrateTimestamp. func (t *logTreeTX) getLeafDataByIdentityHash(ctx context.Context, leafHashes [][]byte) ([]*trillian.LogLeaf, error) { - tmpl, err := t.ls.getLeavesByLeafIdentityHashStmt(ctx, len(leafHashes)) - if err != nil { - return nil, err - } - return t.getLeavesByHashInternal(ctx, leafHashes, tmpl, "leaf-identity") + return t.getLeavesByHashInternal(ctx, leafHashes, selectLeavesByLeafIdentityHashSQL, "leaf-identity") } func (t *logTreeTX) LatestSignedLogRoot(ctx context.Context) (*trillian.SignedLogRoot, error) { @@ -761,20 +746,8 @@ func (t *logTreeTX) StoreSignedLogRoot(ctx context.Context, root *trillian.Signe return checkResultOkAndRowCountIs(res, err, 1) } -func (t *logTreeTX) getLeavesByHashInternal(ctx context.Context, leafHashes [][]byte, tmpl *sql.Stmt, desc string) ([]*trillian.LogLeaf, error) { - stx := t.tx.StmtContext(ctx, tmpl) - defer func() { - if err := stx.Close(); err != nil { - klog.Errorf("stx.Close(): %v", err) - } - }() - - var args []interface{} - for _, hash := range leafHashes { - args = append(args, []byte(hash)) - } - args = append(args, t.treeID) - rows, err := stx.Query(ctx, args...) +func (t *logTreeTX) getLeavesByHashInternal(ctx context.Context, leafHashes [][]byte, query string, desc string) ([]*trillian.LogLeaf, error) { + rows, err := t.tx.Query(ctx, query, leafHashes, t.treeID) if err != nil { klog.Warningf("Query() %s hash = %v", desc, err) return nil, err diff --git a/storage/postgresql/queue_batching.go b/storage/postgresql/queue_batching.go index fb341b8810..8b84e56d9f 100644 --- a/storage/postgresql/queue_batching.go +++ b/storage/postgresql/queue_batching.go @@ -20,7 +20,6 @@ package postgresql import ( "context" "crypto/sha256" - "database/sql" "encoding/binary" "fmt" "strings" @@ -41,7 +40,7 @@ const ( AND QueueTimestampNanos<=? ORDER BY QueueTimestampNanos,LeafIdentityHash ASC LIMIT ?` insertUnsequencedEntrySQL = `INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID) VALUES(?,0,?,?,?,?)` - deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE QueueID IN ()" + deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE QueueID = ANY(?)" ) type dequeuedLeaf []byte @@ -122,27 +121,13 @@ func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillia return t.removeSequencedLeaves(ctx, dequeuedLeaves) } -func (m *postgreSQLLogStorage) getDeleteUnsequencedStmt(ctx context.Context, num int) (*sql.Stmt, error) { - return m.getStmt(ctx, deleteUnsequencedSQL, num, "?", "?") -} - // removeSequencedLeaves removes the passed in leaves slice (which may be // modified as part of the operation). func (t *logTreeTX) removeSequencedLeaves(ctx context.Context, queueIDs []dequeuedLeaf) error { // Don't need to re-sort because the query ordered by leaf hash. If that changes because // the query is expensive then the sort will need to be done here. See comment in // QueueLeaves. - tmpl, err := t.ls.getDeleteUnsequencedStmt(ctx, len(queueIDs)) - if err != nil { - klog.Warningf("Failed to get delete statement for sequenced work: %s", err) - return err - } - stx := t.tx.StmtContext(ctx, tmpl) - args := make([]interface{}, len(queueIDs)) - for i, q := range queueIDs { - args[i] = []byte(q) - } - result, err := stx.Exec(ctx, args...) + result, err := t.tx.Exec(ctx, deleteUnsequencedSQL, queueIDs) if err != nil { // Error is handled by checkResultOkAndRowCountIs() below klog.Warningf("Failed to delete sequenced work: %s", err) diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 9e1438a071..71401d2148 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -17,11 +17,9 @@ package postgresql import ( "context" - "database/sql" "encoding/base64" "fmt" "runtime/debug" - "strings" "sync" "github.com/google/trillian" @@ -48,7 +46,7 @@ const ( FROM ( SELECT n.TreeId, n.SubtreeId, max(n.SubtreeRevision) AS MaxRevision FROM Subtree n - WHERE n.SubtreeId IN (` + placeholderSQL + `) AND + WHERE n.SubtreeId = ANY(?) AND n.TreeId = ? AND n.SubtreeRevision <= ? GROUP BY n.TreeId, n.SubtreeId ) AS x @@ -62,7 +60,7 @@ const ( SELECT SubtreeId, Subtree.Nodes FROM Subtree WHERE Subtree.TreeId = ? - AND SubtreeId IN (` + placeholderSQL + `)` + AND SubtreeId = ANY(?)` placeholderSQL = "" ) @@ -94,59 +92,6 @@ func newTreeStorage(db *pgxpool.Pool) *postgreSQLTreeStorage { } } -// expandPlaceholderSQL expands an sql statement by adding a specified number of '?' -// placeholder slots. At most one placeholder will be expanded. -func expandPlaceholderSQL(sql string, num int, first, rest string) string { - if num <= 0 { - panic(fmt.Errorf("trying to expand SQL placeholder with <= 0 parameters: %s", sql)) - } - - parameters := first + strings.Repeat(","+rest, num-1) - - return strings.Replace(sql, placeholderSQL, parameters, 1) -} - -// getStmt creates and caches sql.Stmt structs based on the passed in statement -// and number of bound arguments. -// TODO(al,martin): consider pulling this all out as a separate unit for reuse -// elsewhere. -func (m *postgreSQLTreeStorage) getStmt(ctx context.Context, statement string, num int, first, rest string) (*sql.Stmt, error) { - m.statementMutex.Lock() - defer m.statementMutex.Unlock() - - if m.statements[statement] != nil { - if m.statements[statement][num] != nil { - // TODO(al,martin): we'll possibly need to expire Stmts from the cache, - // e.g. when DB connections break etc. - return m.statements[statement][num], nil - } - } else { - m.statements[statement] = make(map[int]*sql.Stmt) - } - - s, err := m.db.PrepareContext(ctx, expandPlaceholderSQL(statement, num, first, rest)) - if err != nil { - klog.Warningf("Failed to prepare statement %d: %s", num, err) - return nil, err - } - - m.statements[statement][num] = s - - return s, nil -} - -func (m *postgreSQLTreeStorage) getSubtreeStmt(ctx context.Context, subtreeRevs bool, num int) (*sql.Stmt, error) { - if subtreeRevs { - return m.getStmt(ctx, selectSubtreeSQL, num, "?", "?") - } else { - return m.getStmt(ctx, selectSubtreeSQLNoRev, num, "?", "?") - } -} - -func (m *postgreSQLTreeStorage) setSubtreeStmt(ctx context.Context, num int) (*sql.Stmt, error) { - return m.getStmt(ctx, insertSubtreeMultiSQL, num, "VALUES(?, ?, ?, ?)", "(?, ?, ?, ?)") -} - func (m *postgreSQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes int, subtreeCache *cache.SubtreeCache) (treeTX, error) { t, err := m.db.BeginTx(ctx, pgx.TxOptions{}) if err != nil { @@ -193,40 +138,13 @@ func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]by return nil, nil } - tmpl, err := t.ts.getSubtreeStmt(ctx, t.subtreeRevs, len(ids)) - if err != nil { - return nil, err - } - stx := t.tx.StmtContext(ctx, tmpl) - defer func() { - if err := stx.Close(); err != nil { - klog.Errorf("stx.Close(): %v", err) - } - }() - - var args []interface{} + var rows pgx.Rows + var err error if t.subtreeRevs { - args = make([]interface{}, 0, len(ids)+3) - // populate args with ids. - for _, id := range ids { - klog.V(4).Infof(" id: %x", id) - args = append(args, id) - } - args = append(args, t.treeID) - args = append(args, treeRevision) - args = append(args, t.treeID) + rows, err = t.tx.Query(ctx, selectSubtreeSQL, ids, t.treeID, treeRevision, t.treeID) } else { - args = make([]interface{}, 0, len(ids)+1) - args = append(args, t.treeID) - - // populate args with ids. - for _, id := range ids { - klog.V(4).Infof(" id: %x", id) - args = append(args, id) - } + rows, err = t.tx.Query(ctx, selectSubtreeSQLNoRev, t.treeID, ids) } - - rows, err := stx.Query(ctx, args...) if err != nil { klog.Warningf("Failed to get merkle subtrees: %s", err) return nil, err @@ -329,18 +247,7 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre args = append(args, subtreeRev) } - tmpl, err := t.ts.setSubtreeStmt(ctx, len(subtrees)) - if err != nil { - return err - } - stx := t.tx.StmtContext(ctx, tmpl) - defer func() { - if err := stx.Close(); err != nil { - klog.Errorf("stx.Close(): %v", err) - } - }() - - r, err := stx.Exec(ctx, args...) + r, err := t.tx.Exec(ctx, insertSubtreeMultiSQL, args...) if err != nil { klog.Warningf("Failed to set merkle subtrees: %s", err) return err From 52f2867d775f25313065225043103ce7fbf7d4b3 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 13:56:12 +0100 Subject: [PATCH 14/62] Use PostgreSQL's COPY interface and a temporary table to upsert subtrees --- storage/postgresql/tree_storage.go | 38 ++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 71401d2148..58eb6c7201 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -37,7 +37,14 @@ import ( // These statements are fixed const ( - insertSubtreeMultiSQL = `INSERT INTO Subtree(TreeId, SubtreeId, Nodes, SubtreeRevision) ` + placeholderSQL + ` ON DUPLICATE KEY UPDATE Nodes=VALUES(Nodes)` + createTempSubtreeTable = `CREATE TEMP TABLE TempSubtree ( + TreeId BIGINT, + SubtreeId BYTEA, + Nodes BYTEA, + SubtreeRevision INTEGER, + CONSTRAINT TempSubtree_pk PRIMARY KEY (TreeId,SubtreeId,SubtreeRevision) +) ON COMMIT DROP` + insertSubtreeMultiSQL = `INSERT INTO Subtree(TreeId, SubtreeId, Nodes, SubtreeRevision) SELECT TreeId, SubtreeId, Nodes, SubtreeRevision FROM TempSubtree ON CONFLICT ON CONSTRAINT TempSubtree_pk DO UPDATE Nodes=EXCLUDED.Nodes` insertTreeHeadSQL = `INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) VALUES(?,?,?,?,?,?)` @@ -223,7 +230,7 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre // TODO(al): probably need to be able to batch this in the case where we have // a really large number of subtrees to store. - args := make([]interface{}, 0, len(subtrees)) + rows := make([][]interface{}, 0, len(subtrees)) // If not using subtree revisions then default value of 0 is fine. There is no // significance to this value, other than it cannot be NULL in the DB. @@ -241,13 +248,30 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre if err != nil { return err } - args = append(args, t.treeID) - args = append(args, s.Prefix) - args = append(args, subtreeBytes) - args = append(args, subtreeRev) + rows = append(rows, []interface{}{t.treeID, s.Prefix, subtreeBytes, subtreeRev}) } - r, err := t.tx.Exec(ctx, insertSubtreeMultiSQL, args...) + // Create temporary subtree table. + _, err := t.tx.Exec(ctx, createTempSubtreeTable) + if err != nil { + klog.Warningf("Failed to create temporary subtree table: %s", err) + return err + } + + // Copy subtrees to temporary table. + _, err = t.tx.CopyFrom( + ctx, + pgx.Identifier{"TempSubtree"}, + []string{"TreeId", "SubtreeId", "Nodes", "SubtreeRevision"}, + pgx.CopyFromRows(rows), + ) + if err != nil { + klog.Warningf("Failed to copy merkle subtrees: %s", err) + return err + } + + // Upsert the subtrees. + _, err = t.tx.Exec(ctx, insertSubtreeMultiSQL) if err != nil { klog.Warningf("Failed to set merkle subtrees: %s", err) return err From 51def326acb53400362fd50a858ad8937f6876d0 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 13:58:49 +0100 Subject: [PATCH 15/62] Use PostgreSQL parameter placeholder syntax --- storage/postgresql/admin_storage.go | 18 +++++++++--------- storage/postgresql/admin_storage_test.go | 8 ++++---- storage/postgresql/log_storage.go | 14 +++++++------- storage/postgresql/log_storage_test.go | 12 ++++++------ storage/postgresql/queue.go | 10 +++++----- storage/postgresql/queue_batching.go | 10 +++++----- storage/postgresql/tree_storage.go | 13 ++++++------- 7 files changed, 42 insertions(+), 43 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index c44d3f1c6a..d3007e4d50 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -60,11 +60,11 @@ const ( DeleteTimeMillis FROM Trees` selectNonDeletedTrees = selectTrees + nonDeletedWhere - selectTreeByID = selectTrees + " WHERE TreeId = ?" + selectTreeByID = selectTrees + " WHERE TreeId = $1" updateTreeSQL = `UPDATE Trees - SET TreeState = ?, TreeType = ?, DisplayName = ?, Description = ?, UpdateTimeMillis = ?, MaxRootDurationMillis = ?, PrivateKey = ? - WHERE TreeId = ?` + SET TreeState = $1, TreeType = $2, DisplayName = $3, Description = $4, UpdateTimeMillis = $5, MaxRootDurationMillis = $6, PrivateKey = $7 + WHERE TreeId = $8` ) // NewAdminStorage returns a PostgreSQL storage.AdminStorage implementation backed by DB. @@ -259,7 +259,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia PrivateKey, -- Unused PublicKey, -- Used to store StorageSettings MaxRootDurationMillis) - VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)`, newTree.TreeId, newTree.TreeState.String(), newTree.TreeType.String(), @@ -285,7 +285,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia SigningEnabled, SequencingEnabled, SequenceIntervalSeconds) - VALUES(?, ?, ?, ?)`, + VALUES($1, $2, $3, $4)`, newTree.TreeId, true, /* SigningEnabled */ true, /* SequencingEnabled */ @@ -364,7 +364,7 @@ func (t *adminTX) updateDeleted(ctx context.Context, treeID int64, deleted bool, } if _, err := t.tx.Exec( ctx, - "UPDATE Trees SET Deleted = ?, DeleteTimeMillis = ? WHERE TreeId = ?", + "UPDATE Trees SET Deleted = $1, DeleteTimeMillis = $2 WHERE TreeId = $3", deleted, deleteTimeMillis, treeID); err != nil { return nil, err } @@ -377,16 +377,16 @@ func (t *adminTX) HardDeleteTree(ctx context.Context, treeID int64) error { } // TreeControl didn't have "ON DELETE CASCADE" on previous versions, so let's hit it explicitly - if _, err := t.tx.Exec(ctx, "DELETE FROM TreeControl WHERE TreeId = ?", treeID); err != nil { + if _, err := t.tx.Exec(ctx, "DELETE FROM TreeControl WHERE TreeId = $1", treeID); err != nil { return err } - _, err := t.tx.Exec(ctx, "DELETE FROM Trees WHERE TreeId = ?", treeID) + _, err := t.tx.Exec(ctx, "DELETE FROM Trees WHERE TreeId = $1", treeID) return err } func validateDeleted(ctx context.Context, tx pgx.Tx, treeID int64, wantDeleted bool) error { var nullDeleted sql.NullBool - switch err := tx.QueryRow(ctx, "SELECT Deleted FROM Trees WHERE TreeId = ?", treeID).Scan(&nullDeleted); { + switch err := tx.QueryRow(ctx, "SELECT Deleted FROM Trees WHERE TreeId = $1", treeID).Scan(&nullDeleted); { case err == pgx.ErrNoRows: return status.Errorf(codes.NotFound, "tree %v not found", treeID) case err != nil: diff --git a/storage/postgresql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go index e5e0b7862b..2e6dca2497 100644 --- a/storage/postgresql/admin_storage_test.go +++ b/storage/postgresql/admin_storage_test.go @@ -31,7 +31,7 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -const selectTreeControlByID = "SELECT SigningEnabled, SequencingEnabled, SequenceIntervalSeconds FROM TreeControl WHERE TreeId = ?" +const selectTreeControlByID = "SELECT SigningEnabled, SequencingEnabled, SequenceIntervalSeconds FROM TreeControl WHERE TreeId = $1" func TestPostgresqlAdminStorage(t *testing.T) { tester := &testonly.AdminStorageTester{NewAdminStorage: func() storage.AdminStorage { @@ -261,7 +261,7 @@ func TestAdminTX_GetTreeLegacies(t *testing.T) { if err != nil { t.Fatal(err) } - if _, err := tx.Exec("UPDATE Trees SET PublicKey = ? WHERE TreeId = ?", tC.key, tree.TreeId); err != nil { + if _, err := tx.Exec("UPDATE Trees SET PublicKey = $1 WHERE TreeId = $2", tC.key, tree.TreeId); err != nil { t.Fatal(err) } if err := tx.Commit(ctx); err != nil { @@ -304,7 +304,7 @@ func TestAdminTX_HardDeleteTree(t *testing.T) { // database and check that the rows are gone, so let's do just that. // If there's no record on Trees, then there can be no record in any of the dependent tables. var name string - if err := DB.QueryRow(ctx, "SELECT DisplayName FROM Trees WHERE TreeId = ?", tree.TreeId).Scan(&name); err != pgx.ErrNoRows { + if err := DB.QueryRow(ctx, "SELECT DisplayName FROM Trees WHERE TreeId = $1", tree.TreeId).Scan(&name); err != pgx.ErrNoRows { t.Errorf("QueryRow() returned err = %v, want = %v", err, pgx.ErrNoRows) } } @@ -333,6 +333,6 @@ func TestCheckDatabaseAccessible_OK(t *testing.T) { } func setNulls(ctx context.Context, db *pgxpool.Pool, treeID int64) error { - _, err := db.Exec(ctx, "UPDATE Trees SET DisplayName = NULL, Description = NULL WHERE TreeId = ?", treeID) + _, err := db.Exec(ctx, "UPDATE Trees SET DisplayName = NULL, Description = NULL WHERE TreeId = $1", treeID) return err } diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index 2d14d847ab..1e85d50b6e 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -42,30 +42,30 @@ import ( ) const ( - valuesPlaceholder5 = "(?,?,?,?,?)" + valuesPlaceholder5 = "($1,$2,$3,$4,$5)" insertLeafDataSQL = "INSERT INTO LeafData(TreeId,LeafIdentityHash,LeafValue,ExtraData,QueueTimestampNanos) VALUES" + valuesPlaceholder5 insertSequencedLeafSQL = "INSERT INTO SequencedLeafData(TreeId,LeafIdentityHash,MerkleLeafHash,SequenceNumber,IntegrateTimestampNanos) VALUES" selectNonDeletedTreeIDByTypeAndStateSQL = ` SELECT TreeId FROM Trees - WHERE TreeType IN(?,?) - AND TreeState IN(?,?) + WHERE TreeType IN($1,$2) + AND TreeState IN($3,$4) AND (Deleted IS NULL OR Deleted = 'false')` selectLatestSignedLogRootSQL = `SELECT TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature - FROM TreeHead WHERE TreeId=? + FROM TreeHead WHERE TreeId=$1 ORDER BY TreeHeadTimestamp DESC LIMIT 1` selectLeavesByRangeSQL = `SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos FROM LeafData l,SequencedLeafData s WHERE l.LeafIdentityHash = s.LeafIdentityHash - AND s.SequenceNumber >= ? AND s.SequenceNumber < ? AND l.TreeId = ? AND s.TreeId = l.TreeId` + orderBySequenceNumberSQL + AND s.SequenceNumber >= $1 AND s.SequenceNumber < $2 AND l.TreeId = $3 AND s.TreeId = l.TreeId` + orderBySequenceNumberSQL selectLeavesByMerkleHashSQL = `SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos FROM LeafData l,SequencedLeafData s WHERE l.LeafIdentityHash = s.LeafIdentityHash - AND s.MerkleLeafHash = ANY(?) AND l.TreeId = ? AND s.TreeId = l.TreeId` + AND s.MerkleLeafHash = ANY($1) AND l.TreeId = $2 AND s.TreeId = l.TreeId` // TODO(#1548): rework the code so the dummy hash isn't needed (e.g. this assumes hash size is 32) dummyMerkleLeafHash = "00000000000000000000000000000000" // This statement returns a dummy Merkle leaf hash value (which must be @@ -73,7 +73,7 @@ const ( // leaf-selection statements. selectLeavesByLeafIdentityHashSQL = `SELECT '` + dummyMerkleLeafHash + `',l.LeafIdentityHash,l.LeafValue,-1,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos FROM LeafData l LEFT JOIN SequencedLeafData s ON (l.LeafIdentityHash = s.LeafIdentityHash AND l.TreeID = s.TreeID) - WHERE l.LeafIdentityHash = ANY(?) AND l.TreeId = ?` + WHERE l.LeafIdentityHash = ANY($1) AND l.TreeId = $2` // Same as above except with leaves ordered by sequence so we only incur this cost when necessary orderBySequenceNumberSQL = " ORDER BY s.SequenceNumber" diff --git a/storage/postgresql/log_storage_test.go b/storage/postgresql/log_storage_test.go index 001ef5965c..cd1eac4c47 100644 --- a/storage/postgresql/log_storage_test.go +++ b/storage/postgresql/log_storage_test.go @@ -70,8 +70,8 @@ func createFakeLeaf(ctx context.Context, db *pgxpool.Pool, logID int64, rawHash, t.Helper() queuedAtNanos := fakeQueueTime.UnixNano() integratedAtNanos := fakeIntegrateTime.UnixNano() - _, err := db.Exec(ctx, "INSERT INTO LeafData(TreeId, LeafIdentityHash, LeafValue, ExtraData, QueueTimestampNanos) VALUES(?,?,?,?,?)", logID, rawHash, data, extraData, queuedAtNanos) - _, err2 := db.Exec(ctx, "INSERT INTO SequencedLeafData(TreeId, SequenceNumber, LeafIdentityHash, MerkleLeafHash, IntegrateTimestampNanos) VALUES(?,?,?,?,?)", logID, seq, rawHash, hash, integratedAtNanos) + _, err := db.Exec(ctx, "INSERT INTO LeafData(TreeId, LeafIdentityHash, LeafValue, ExtraData, QueueTimestampNanos) VALUES($1,$2,$3,$4,$5)", logID, rawHash, data, extraData, queuedAtNanos) + _, err2 := db.Exec(ctx, "INSERT INTO SequencedLeafData(TreeId, SequenceNumber, LeafIdentityHash, MerkleLeafHash, IntegrateTimestampNanos) VALUES($1,$2,$3,$4,$5)", logID, seq, rawHash, hash, integratedAtNanos) if err != nil || err2 != nil { t.Fatalf("Failed to create test leaves: %v %v", err, err2) @@ -213,7 +213,7 @@ func TestQueueLeaves(t *testing.T) { // Should see the leaves in the database. There is no API to read from the unsequenced data. var count int - if err := DB.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&count); err != nil { + if err := DB.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=$1", tree.TreeId).Scan(&count); err != nil { t.Fatalf("Could not query row count: %v", err) } if leavesToInsert != count { @@ -222,7 +222,7 @@ func TestQueueLeaves(t *testing.T) { // Additional check on timestamp being set correctly in the database var queueTimestamp int64 - if err := DB.QueryRow(ctx, "SELECT DISTINCT QueueTimestampNanos FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&queueTimestamp); err != nil { + if err := DB.QueryRow(ctx, "SELECT DISTINCT QueueTimestampNanos FROM Unsequenced WHERE TreeID=$1", tree.TreeId).Scan(&queueTimestamp); err != nil { t.Fatalf("Could not query timestamp: %v", err) } if got, want := queueTimestamp, fakeQueueTime.UnixNano(); got != want { @@ -253,7 +253,7 @@ func TestQueueLeavesDuplicateBigBatch(t *testing.T) { // Should see the leaves in the database. There is no API to read from the unsequenced data. var count int - if err := DB.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=?", tree.TreeId).Scan(&count); err != nil { + if err := DB.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=$1", tree.TreeId).Scan(&count); err != nil { t.Fatalf("Could not query row count: %v", err) } if leafCount != count { @@ -725,7 +725,7 @@ func TestGetActiveLogIDs(t *testing.T) { // Update deleted trees accordingly for _, treeID := range []int64{deletedLog.TreeId} { - if _, err := DB.Exec(ctx, "UPDATE Trees SET Deleted = ? WHERE TreeId = ?", true, treeID); err != nil { + if _, err := DB.Exec(ctx, "UPDATE Trees SET Deleted = $1 WHERE TreeId = $2", true, treeID); err != nil { t.Fatalf("Exec(%v) returned err = %v", treeID, err) } } diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index 4cf1fedc06..9ddcb25448 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -33,13 +33,13 @@ const ( // If this statement ORDER BY clause is changed refer to the comment in removeSequencedLeaves selectQueuedLeavesSQL = `SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos FROM Unsequenced - WHERE TreeID=? + WHERE TreeID=$1 AND Bucket=0 - AND QueueTimestampNanos<=? - ORDER BY QueueTimestampNanos,LeafIdentityHash ASC LIMIT ?` + AND QueueTimestampNanos<=$2 + ORDER BY QueueTimestampNanos,LeafIdentityHash ASC LIMIT $3` insertUnsequencedEntrySQL = `INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos) - VALUES(?,0,?,?,?)` - deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE TreeId=? AND Bucket=0 AND QueueTimestampNanos=? AND LeafIdentityHash=?" + VALUES($1,0,$2,$3,$4)` + deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE TreeId=$1 AND Bucket=0 AND QueueTimestampNanos=$2 AND LeafIdentityHash=$3" ) type dequeuedLeaf struct { diff --git a/storage/postgresql/queue_batching.go b/storage/postgresql/queue_batching.go index 8b84e56d9f..b87542e82c 100644 --- a/storage/postgresql/queue_batching.go +++ b/storage/postgresql/queue_batching.go @@ -35,12 +35,12 @@ const ( // If this statement ORDER BY clause is changed refer to the comment in removeSequencedLeaves selectQueuedLeavesSQL = `SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID FROM Unsequenced - WHERE TreeID=? + WHERE TreeID=$1 AND Bucket=0 - AND QueueTimestampNanos<=? - ORDER BY QueueTimestampNanos,LeafIdentityHash ASC LIMIT ?` - insertUnsequencedEntrySQL = `INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID) VALUES(?,0,?,?,?,?)` - deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE QueueID = ANY(?)" + AND QueueTimestampNanos<=$2 + ORDER BY QueueTimestampNanos,LeafIdentityHash ASC LIMIT $3` + insertUnsequencedEntrySQL = `INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID) VALUES($1,0,$2,$3,$4,$5)` + deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE QueueID = ANY($1)" ) type dequeuedLeaf []byte diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 58eb6c7201..eab7b9c711 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -46,29 +46,28 @@ const ( ) ON COMMIT DROP` insertSubtreeMultiSQL = `INSERT INTO Subtree(TreeId, SubtreeId, Nodes, SubtreeRevision) SELECT TreeId, SubtreeId, Nodes, SubtreeRevision FROM TempSubtree ON CONFLICT ON CONSTRAINT TempSubtree_pk DO UPDATE Nodes=EXCLUDED.Nodes` insertTreeHeadSQL = `INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) - VALUES(?,?,?,?,?,?)` + VALUES($1,$2,$3,$4,$5,$6)` selectSubtreeSQL = ` SELECT x.SubtreeId, Subtree.Nodes FROM ( SELECT n.TreeId, n.SubtreeId, max(n.SubtreeRevision) AS MaxRevision FROM Subtree n - WHERE n.SubtreeId = ANY(?) AND - n.TreeId = ? AND n.SubtreeRevision <= ? + WHERE n.SubtreeId = ANY($1) AND + n.TreeId = $2 AND n.SubtreeRevision <= $3 GROUP BY n.TreeId, n.SubtreeId ) AS x INNER JOIN Subtree ON Subtree.SubtreeId = x.SubtreeId AND Subtree.SubtreeRevision = x.MaxRevision AND Subtree.TreeId = x.TreeId - AND Subtree.TreeId = ?` + AND Subtree.TreeId = $4` selectSubtreeSQLNoRev = ` SELECT SubtreeId, Subtree.Nodes FROM Subtree - WHERE Subtree.TreeId = ? - AND SubtreeId = ANY(?)` - placeholderSQL = "" + WHERE Subtree.TreeId = $1 + AND SubtreeId = ANY($2)` ) // postgreSQLTreeStorage is shared between the postgreSQLLog- and (forthcoming) postgreSQLMap- From 22f955a57edabb67b855e498b55b2d2343bbcfb8 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 13:59:51 +0100 Subject: [PATCH 16/62] Format SQL queries consistently and without including unnecessary whitespace in the strings --- storage/postgresql/admin_storage.go | 65 +++++------------------- storage/postgresql/admin_storage_test.go | 10 ++-- storage/postgresql/log_storage.go | 51 +++++++++++-------- storage/postgresql/log_storage_test.go | 12 ++--- storage/postgresql/queue.go | 18 +++---- storage/postgresql/queue_batching.go | 17 ++++--- storage/postgresql/schema/storage.sql | 2 +- storage/postgresql/tree_storage.go | 60 +++++++++++----------- 8 files changed, 103 insertions(+), 132 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index d3007e4d50..46858da61f 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -39,32 +39,14 @@ import ( const ( defaultSequenceIntervalSeconds = 60 - nonDeletedWhere = " WHERE (Deleted IS NULL OR Deleted = 'false')" - - selectTrees = ` - SELECT - TreeId, - TreeState, - TreeType, - HashStrategy, - HashAlgorithm, - SignatureAlgorithm, - DisplayName, - Description, - CreateTimeMillis, - UpdateTimeMillis, - PrivateKey, -- Unused - PublicKey, -- Used to store StorageSettings - MaxRootDurationMillis, - Deleted, - DeleteTimeMillis - FROM Trees` - selectNonDeletedTrees = selectTrees + nonDeletedWhere - selectTreeByID = selectTrees + " WHERE TreeId = $1" - - updateTreeSQL = `UPDATE Trees - SET TreeState = $1, TreeType = $2, DisplayName = $3, Description = $4, UpdateTimeMillis = $5, MaxRootDurationMillis = $6, PrivateKey = $7 - WHERE TreeId = $8` + selectTrees = "SELECT TreeId,TreeState,TreeType,HashStrategy,HashAlgorithm,SignatureAlgorithm,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,PrivateKey,PublicKey,MaxRootDurationMillis,Deleted,DeleteTimeMillis " + + "FROM Trees" // PrivateKey is unused; PublicKey is used to store StorageSettings. + selectNonDeletedTrees = selectTrees + " WHERE (Deleted IS NULL OR Deleted='false')" + selectTreeByID = selectTrees + " WHERE TreeId=$1" + + updateTreeSQL = "UPDATE Trees " + + "SET TreeState=$1,TreeType=$2,DisplayName=$3,Description=$4,UpdateTimeMillis=$5,MaxRootDurationMillis=$6,PrivateKey=$7 " + + "WHERE TreeId=$8" ) // NewAdminStorage returns a PostgreSQL storage.AdminStorage implementation backed by DB. @@ -245,21 +227,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia _, err = t.tx.Exec( ctx, - `INSERT INTO Trees( - TreeId, - TreeState, - TreeType, - HashStrategy, - HashAlgorithm, - SignatureAlgorithm, - DisplayName, - Description, - CreateTimeMillis, - UpdateTimeMillis, - PrivateKey, -- Unused - PublicKey, -- Used to store StorageSettings - MaxRootDurationMillis) - VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)`, + "INSERT INTO Trees(TreeId,TreeState,TreeType,HashStrategy,HashAlgorithm,SignatureAlgorithm,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,PrivateKey,PublicKey,MaxRootDurationMillis) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13)", newTree.TreeId, newTree.TreeState.String(), newTree.TreeType.String(), @@ -280,12 +248,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia _, err = t.tx.Exec( ctx, - `INSERT INTO TreeControl( - TreeId, - SigningEnabled, - SequencingEnabled, - SequenceIntervalSeconds) - VALUES($1, $2, $3, $4)`, + "INSERT INTO TreeControl(TreeId,SigningEnabled,SequencingEnabled,SequenceIntervalSeconds) VALUES($1,$2,$3,$4)", newTree.TreeId, true, /* SigningEnabled */ true, /* SequencingEnabled */ @@ -364,7 +327,7 @@ func (t *adminTX) updateDeleted(ctx context.Context, treeID int64, deleted bool, } if _, err := t.tx.Exec( ctx, - "UPDATE Trees SET Deleted = $1, DeleteTimeMillis = $2 WHERE TreeId = $3", + "UPDATE Trees SET Deleted=$1, DeleteTimeMillis=$2 WHERE TreeId=$3", deleted, deleteTimeMillis, treeID); err != nil { return nil, err } @@ -377,16 +340,16 @@ func (t *adminTX) HardDeleteTree(ctx context.Context, treeID int64) error { } // TreeControl didn't have "ON DELETE CASCADE" on previous versions, so let's hit it explicitly - if _, err := t.tx.Exec(ctx, "DELETE FROM TreeControl WHERE TreeId = $1", treeID); err != nil { + if _, err := t.tx.Exec(ctx, "DELETE FROM TreeControl WHERE TreeId=$1", treeID); err != nil { return err } - _, err := t.tx.Exec(ctx, "DELETE FROM Trees WHERE TreeId = $1", treeID) + _, err := t.tx.Exec(ctx, "DELETE FROM Trees WHERE TreeId=$1", treeID) return err } func validateDeleted(ctx context.Context, tx pgx.Tx, treeID int64, wantDeleted bool) error { var nullDeleted sql.NullBool - switch err := tx.QueryRow(ctx, "SELECT Deleted FROM Trees WHERE TreeId = $1", treeID).Scan(&nullDeleted); { + switch err := tx.QueryRow(ctx, "SELECT Deleted FROM Trees WHERE TreeId=$1", treeID).Scan(&nullDeleted); { case err == pgx.ErrNoRows: return status.Errorf(codes.NotFound, "tree %v not found", treeID) case err != nil: diff --git a/storage/postgresql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go index 2e6dca2497..e9c0c1fa2c 100644 --- a/storage/postgresql/admin_storage_test.go +++ b/storage/postgresql/admin_storage_test.go @@ -31,7 +31,9 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -const selectTreeControlByID = "SELECT SigningEnabled, SequencingEnabled, SequenceIntervalSeconds FROM TreeControl WHERE TreeId = $1" +const selectTreeControlByID = "SELECT SigningEnabled,SequencingEnabled,SequenceIntervalSeconds " + + "FROM TreeControl " + + "WHERE TreeId=$1" func TestPostgresqlAdminStorage(t *testing.T) { tester := &testonly.AdminStorageTester{NewAdminStorage: func() storage.AdminStorage { @@ -261,7 +263,7 @@ func TestAdminTX_GetTreeLegacies(t *testing.T) { if err != nil { t.Fatal(err) } - if _, err := tx.Exec("UPDATE Trees SET PublicKey = $1 WHERE TreeId = $2", tC.key, tree.TreeId); err != nil { + if _, err := tx.Exec("UPDATE Trees SET PublicKey=$1 WHERE TreeId=$2", tC.key, tree.TreeId); err != nil { t.Fatal(err) } if err := tx.Commit(ctx); err != nil { @@ -304,7 +306,7 @@ func TestAdminTX_HardDeleteTree(t *testing.T) { // database and check that the rows are gone, so let's do just that. // If there's no record on Trees, then there can be no record in any of the dependent tables. var name string - if err := DB.QueryRow(ctx, "SELECT DisplayName FROM Trees WHERE TreeId = $1", tree.TreeId).Scan(&name); err != pgx.ErrNoRows { + if err := DB.QueryRow(ctx, "SELECT DisplayName FROM Trees WHERE TreeId=$1", tree.TreeId).Scan(&name); err != pgx.ErrNoRows { t.Errorf("QueryRow() returned err = %v, want = %v", err, pgx.ErrNoRows) } } @@ -333,6 +335,6 @@ func TestCheckDatabaseAccessible_OK(t *testing.T) { } func setNulls(ctx context.Context, db *pgxpool.Pool, treeID int64) error { - _, err := db.Exec(ctx, "UPDATE Trees SET DisplayName = NULL, Description = NULL WHERE TreeId = $1", treeID) + _, err := db.Exec(ctx, "UPDATE Trees SET DisplayName=NULL,Description=NULL WHERE TreeId=$1", treeID) return err } diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index 1e85d50b6e..f9c9774355 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -47,33 +47,40 @@ const ( insertLeafDataSQL = "INSERT INTO LeafData(TreeId,LeafIdentityHash,LeafValue,ExtraData,QueueTimestampNanos) VALUES" + valuesPlaceholder5 insertSequencedLeafSQL = "INSERT INTO SequencedLeafData(TreeId,LeafIdentityHash,MerkleLeafHash,SequenceNumber,IntegrateTimestampNanos) VALUES" - selectNonDeletedTreeIDByTypeAndStateSQL = ` - SELECT TreeId FROM Trees - WHERE TreeType IN($1,$2) - AND TreeState IN($3,$4) - AND (Deleted IS NULL OR Deleted = 'false')` - - selectLatestSignedLogRootSQL = `SELECT TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature - FROM TreeHead WHERE TreeId=$1 - ORDER BY TreeHeadTimestamp DESC LIMIT 1` - - selectLeavesByRangeSQL = `SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos - FROM LeafData l,SequencedLeafData s - WHERE l.LeafIdentityHash = s.LeafIdentityHash - AND s.SequenceNumber >= $1 AND s.SequenceNumber < $2 AND l.TreeId = $3 AND s.TreeId = l.TreeId` + orderBySequenceNumberSQL - - selectLeavesByMerkleHashSQL = `SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos - FROM LeafData l,SequencedLeafData s - WHERE l.LeafIdentityHash = s.LeafIdentityHash - AND s.MerkleLeafHash = ANY($1) AND l.TreeId = $2 AND s.TreeId = l.TreeId` + selectNonDeletedTreeIDByTypeAndStateSQL = "SELECT TreeId " + + "FROM Trees " + + "WHERE TreeType IN($1,$2)" + + " AND TreeState IN($3,$4)" + + " AND (Deleted IS NULL OR Deleted='false')" + + selectLatestSignedLogRootSQL = "SELECT TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature " + + "FROM TreeHead " + + "WHERE TreeId=$1 " + + "ORDER BY TreeHeadTimestamp DESC " + + "LIMIT 1" + + selectLeavesByRangeSQL = "SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos " + + "FROM SequencedLeafData s" + + " INNER JOIN LeafData l ON (s.LeafIdentityHash=l.LeafIdentityHash AND s.TreeId=l.TreeId) " + + "WHERE s.SequenceNumber>=$1" + + " AND s.SequenceNumber<$2" + + " AND l.TreeId=$3" + orderBySequenceNumberSQL + + selectLeavesByMerkleHashSQL = "SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos " + + "FROM SequencedLeafData s" + + " INNER JOIN LeafData l ON (s.LeafIdentityHash=l.LeafIdentityHash AND s.TreeId=l.TreeId) " + + "WHERE s.MerkleLeafHash=ANY($1)" + + " AND l.TreeId=$2" // TODO(#1548): rework the code so the dummy hash isn't needed (e.g. this assumes hash size is 32) dummyMerkleLeafHash = "00000000000000000000000000000000" // This statement returns a dummy Merkle leaf hash value (which must be // of the right size) so that its signature matches that of the other // leaf-selection statements. - selectLeavesByLeafIdentityHashSQL = `SELECT '` + dummyMerkleLeafHash + `',l.LeafIdentityHash,l.LeafValue,-1,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos - FROM LeafData l LEFT JOIN SequencedLeafData s ON (l.LeafIdentityHash = s.LeafIdentityHash AND l.TreeID = s.TreeID) - WHERE l.LeafIdentityHash = ANY($1) AND l.TreeId = $2` + selectLeavesByLeafIdentityHashSQL = "SELECT E'\\\\x" + dummyMerkleLeafHash + "',l.LeafIdentityHash,l.LeafValue,-1,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos " + + "FROM LeafData l" + + " LEFT JOIN SequencedLeafData s ON (l.LeafIdentityHash=s.LeafIdentityHash AND l.TreeId=s.TreeId) " + + "WHERE l.LeafIdentityHash=ANY($1)" + + " AND l.TreeId=$2" // Same as above except with leaves ordered by sequence so we only incur this cost when necessary orderBySequenceNumberSQL = " ORDER BY s.SequenceNumber" diff --git a/storage/postgresql/log_storage_test.go b/storage/postgresql/log_storage_test.go index cd1eac4c47..075770533e 100644 --- a/storage/postgresql/log_storage_test.go +++ b/storage/postgresql/log_storage_test.go @@ -70,8 +70,8 @@ func createFakeLeaf(ctx context.Context, db *pgxpool.Pool, logID int64, rawHash, t.Helper() queuedAtNanos := fakeQueueTime.UnixNano() integratedAtNanos := fakeIntegrateTime.UnixNano() - _, err := db.Exec(ctx, "INSERT INTO LeafData(TreeId, LeafIdentityHash, LeafValue, ExtraData, QueueTimestampNanos) VALUES($1,$2,$3,$4,$5)", logID, rawHash, data, extraData, queuedAtNanos) - _, err2 := db.Exec(ctx, "INSERT INTO SequencedLeafData(TreeId, SequenceNumber, LeafIdentityHash, MerkleLeafHash, IntegrateTimestampNanos) VALUES($1,$2,$3,$4,$5)", logID, seq, rawHash, hash, integratedAtNanos) + _, err := db.Exec(ctx, "INSERT INTO LeafData(TreeId,LeafIdentityHash,LeafValue,ExtraData,QueueTimestampNanos) VALUES($1,$2,$3,$4,$5)", logID, rawHash, data, extraData, queuedAtNanos) + _, err2 := db.Exec(ctx, "INSERT INTO SequencedLeafData(TreeId,SequenceNumber,LeafIdentityHash,MerkleLeafHash,IntegrateTimestampNanos) VALUES($1,$2,$3,$4,$5)", logID, seq, rawHash, hash, integratedAtNanos) if err != nil || err2 != nil { t.Fatalf("Failed to create test leaves: %v %v", err, err2) @@ -213,7 +213,7 @@ func TestQueueLeaves(t *testing.T) { // Should see the leaves in the database. There is no API to read from the unsequenced data. var count int - if err := DB.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=$1", tree.TreeId).Scan(&count); err != nil { + if err := DB.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeId=$1", tree.TreeId).Scan(&count); err != nil { t.Fatalf("Could not query row count: %v", err) } if leavesToInsert != count { @@ -222,7 +222,7 @@ func TestQueueLeaves(t *testing.T) { // Additional check on timestamp being set correctly in the database var queueTimestamp int64 - if err := DB.QueryRow(ctx, "SELECT DISTINCT QueueTimestampNanos FROM Unsequenced WHERE TreeID=$1", tree.TreeId).Scan(&queueTimestamp); err != nil { + if err := DB.QueryRow(ctx, "SELECT DISTINCT QueueTimestampNanos FROM Unsequenced WHERE TreeId=$1", tree.TreeId).Scan(&queueTimestamp); err != nil { t.Fatalf("Could not query timestamp: %v", err) } if got, want := queueTimestamp, fakeQueueTime.UnixNano(); got != want { @@ -253,7 +253,7 @@ func TestQueueLeavesDuplicateBigBatch(t *testing.T) { // Should see the leaves in the database. There is no API to read from the unsequenced data. var count int - if err := DB.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeID=$1", tree.TreeId).Scan(&count); err != nil { + if err := DB.QueryRow(ctx, "SELECT COUNT(*) FROM Unsequenced WHERE TreeId=$1", tree.TreeId).Scan(&count); err != nil { t.Fatalf("Could not query row count: %v", err) } if leafCount != count { @@ -725,7 +725,7 @@ func TestGetActiveLogIDs(t *testing.T) { // Update deleted trees accordingly for _, treeID := range []int64{deletedLog.TreeId} { - if _, err := DB.Exec(ctx, "UPDATE Trees SET Deleted = $1 WHERE TreeId = $2", true, treeID); err != nil { + if _, err := DB.Exec(ctx, "UPDATE Trees SET Deleted=$1 WHERE TreeId=$2", true, treeID); err != nil { t.Fatalf("Exec(%v) returned err = %v", treeID, err) } } diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index 9ddcb25448..0bc3de5989 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -31,15 +31,15 @@ import ( const ( // If this statement ORDER BY clause is changed refer to the comment in removeSequencedLeaves - selectQueuedLeavesSQL = `SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos - FROM Unsequenced - WHERE TreeID=$1 - AND Bucket=0 - AND QueueTimestampNanos<=$2 - ORDER BY QueueTimestampNanos,LeafIdentityHash ASC LIMIT $3` - insertUnsequencedEntrySQL = `INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos) - VALUES($1,0,$2,$3,$4)` - deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE TreeId=$1 AND Bucket=0 AND QueueTimestampNanos=$2 AND LeafIdentityHash=$3" + selectQueuedLeavesSQL = "SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos " + + "FROM Unsequenced " + + "WHERE TreeId=$1" + + " AND Bucket=0" + + " AND QueueTimestampNanos<=$2 " + + "ORDER BY QueueTimestampNanos,LeafIdentityHash " + + "LIMIT $3" + insertUnsequencedEntrySQL = "INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos) VALUES($1,0,$2,$3,$4)" + deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE TreeId=$1 AND Bucket=0 AND QueueTimestampNanos=$2 AND LeafIdentityHash=$3" ) type dequeuedLeaf struct { diff --git a/storage/postgresql/queue_batching.go b/storage/postgresql/queue_batching.go index b87542e82c..c9ee9d1868 100644 --- a/storage/postgresql/queue_batching.go +++ b/storage/postgresql/queue_batching.go @@ -33,14 +33,15 @@ import ( const ( // If this statement ORDER BY clause is changed refer to the comment in removeSequencedLeaves - selectQueuedLeavesSQL = `SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID - FROM Unsequenced - WHERE TreeID=$1 - AND Bucket=0 - AND QueueTimestampNanos<=$2 - ORDER BY QueueTimestampNanos,LeafIdentityHash ASC LIMIT $3` - insertUnsequencedEntrySQL = `INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID) VALUES($1,0,$2,$3,$4,$5)` - deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE QueueID = ANY($1)" + selectQueuedLeavesSQL = "SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID " + + "FROM Unsequenced " + + "WHERE TreeId=$1" + + " AND Bucket=0" + + " AND QueueTimestampNanos<=$2 " + + "ORDER BY QueueTimestampNanos,LeafIdentityHash " + + "LIMIT $3" + insertUnsequencedEntrySQL = "INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID) VALUES($1,0,$2,$3,$4,$5)" + deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE QueueID=ANY($1)" ) type dequeuedLeaf []byte diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 517abcb231..4bf8256aa4 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -142,7 +142,7 @@ CREATE TABLE IF NOT EXISTS Unsequenced( -- CT this hash will include the leaf prefix byte as well as the leaf data. MerkleLeafHash BYTEA NOT NULL, QueueTimestampNanos BIGINT NOT NULL, - -- This is a SHA256 hash of the TreeID, LeafIdentityHash and QueueTimestampNanos. It is used + -- This is a SHA256 hash of the TreeId, LeafIdentityHash and QueueTimestampNanos. It is used -- for batched deletes from the table when trillian_log_server and trillian_log_signer are -- built with the batched_queue tag. QueueID BYTEA DEFAULT NULL UNIQUE, diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index eab7b9c711..e8e72b1f47 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -37,37 +37,35 @@ import ( // These statements are fixed const ( - createTempSubtreeTable = `CREATE TEMP TABLE TempSubtree ( - TreeId BIGINT, - SubtreeId BYTEA, - Nodes BYTEA, - SubtreeRevision INTEGER, - CONSTRAINT TempSubtree_pk PRIMARY KEY (TreeId,SubtreeId,SubtreeRevision) -) ON COMMIT DROP` - insertSubtreeMultiSQL = `INSERT INTO Subtree(TreeId, SubtreeId, Nodes, SubtreeRevision) SELECT TreeId, SubtreeId, Nodes, SubtreeRevision FROM TempSubtree ON CONFLICT ON CONSTRAINT TempSubtree_pk DO UPDATE Nodes=EXCLUDED.Nodes` - insertTreeHeadSQL = `INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) - VALUES($1,$2,$3,$4,$5,$6)` - - selectSubtreeSQL = ` - SELECT x.SubtreeId, Subtree.Nodes - FROM ( - SELECT n.TreeId, n.SubtreeId, max(n.SubtreeRevision) AS MaxRevision - FROM Subtree n - WHERE n.SubtreeId = ANY($1) AND - n.TreeId = $2 AND n.SubtreeRevision <= $3 - GROUP BY n.TreeId, n.SubtreeId - ) AS x - INNER JOIN Subtree - ON Subtree.SubtreeId = x.SubtreeId - AND Subtree.SubtreeRevision = x.MaxRevision - AND Subtree.TreeId = x.TreeId - AND Subtree.TreeId = $4` - - selectSubtreeSQLNoRev = ` - SELECT SubtreeId, Subtree.Nodes - FROM Subtree - WHERE Subtree.TreeId = $1 - AND SubtreeId = ANY($2)` + createTempSubtreeTable = "CREATE TEMP TABLE TempSubtree (" + + " TreeId BIGINT," + + " SubtreeId BYTEA," + + " Nodes BYTEA," + + " SubtreeRevision INTEGER," + + " CONSTRAINT TempSubtree_pk PRIMARY KEY (TreeId,SubtreeId,SubtreeRevision)" + + ") ON COMMIT DROP" + insertSubtreeMultiSQL = "INSERT INTO Subtree(TreeId,SubtreeId,Nodes,SubtreeRevision) " + + "SELECT TreeId,SubtreeId,Nodes,SubtreeRevision " + + "FROM TempSubtree " + + "ON CONFLICT ON CONSTRAINT TempSubtree_pk DO UPDATE Nodes=EXCLUDED.Nodes" + insertTreeHeadSQL = "INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) VALUES($1,$2,$3,$4,$5,$6)" + + selectSubtreeSQL = "SELECT x.SubtreeId,s.Nodes " + + "FROM (" + + "SELECT n.TreeId,n.SubtreeId,max(n.SubtreeRevision) AS MaxRevision " + + "FROM Subtree n " + + "WHERE n.SubtreeId=ANY($1)" + + " AND n.TreeId=$2" + + " AND n.SubtreeRevision<=$3 " + + "GROUP BY n.TreeId,n.SubtreeId" + + ") AS x" + + " INNER JOIN Subtree s ON (x.SubtreeId=s.SubtreeId AND x.MaxRevision=s.SubtreeRevision AND x.TreeId=s.TreeId) " + + "WHERE s.TreeId=$4" + + selectSubtreeSQLNoRev = "SELECT SubtreeId,Nodes " + + "FROM Subtree " + + "WHERE TreeId=$1" + + " AND SubtreeId=ANY($2)" ) // postgreSQLTreeStorage is shared between the postgreSQLLog- and (forthcoming) postgreSQLMap- From e8fdc37aa84ba75cc9266bbbd9c4fe3981b9d317 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:01:05 +0100 Subject: [PATCH 17/62] Remove explicit delete from TreeControl, because no previous versions without ON DELETE CASCADE exist --- storage/postgresql/admin_storage.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 46858da61f..aa5ac24f86 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -339,10 +339,6 @@ func (t *adminTX) HardDeleteTree(ctx context.Context, treeID int64) error { return err } - // TreeControl didn't have "ON DELETE CASCADE" on previous versions, so let's hit it explicitly - if _, err := t.tx.Exec(ctx, "DELETE FROM TreeControl WHERE TreeId=$1", treeID); err != nil { - return err - } _, err := t.tx.Exec(ctx, "DELETE FROM Trees WHERE TreeId=$1", treeID) return err } From 0f6f5937e63bcd577b05ebe8e3a9890785c94187 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:01:49 +0100 Subject: [PATCH 18/62] Adapt getVersion query to PostgreSQL --- storage/postgresql/storage_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/postgresql/storage_test.go b/storage/postgresql/storage_test.go index f2206c17a3..47f90c8b38 100644 --- a/storage/postgresql/storage_test.go +++ b/storage/postgresql/storage_test.go @@ -291,7 +291,7 @@ func cleanTestDB(db *pgxpool.Pool) { } func getVersion(db *pgxpool.Pool) (string, error) { - rows, err := db.Query(context.TODO(), "SELECT @@GLOBAL.version") + rows, err := db.Query(context.TODO(), "SELECT version()") if err != nil { return "", fmt.Errorf("getVersion: failed to perform query: %v", err) } From 10d47f8fb8371f4493c14f045ed5393baa87be13 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:02:55 +0100 Subject: [PATCH 19/62] Context required for pgx Exec() and Ping() functions --- storage/postgresql/admin_storage_test.go | 2 +- storage/postgresql/testdbpgx/testdbpgx.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/storage/postgresql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go index e9c0c1fa2c..25791e93f1 100644 --- a/storage/postgresql/admin_storage_test.go +++ b/storage/postgresql/admin_storage_test.go @@ -263,7 +263,7 @@ func TestAdminTX_GetTreeLegacies(t *testing.T) { if err != nil { t.Fatal(err) } - if _, err := tx.Exec("UPDATE Trees SET PublicKey=$1 WHERE TreeId=$2", tC.key, tree.TreeId); err != nil { + if _, err := tx.Exec(ctx, "UPDATE Trees SET PublicKey=$1 WHERE TreeId=$2", tC.key, tree.TreeId); err != nil { t.Fatal(err) } if err := tx.Commit(ctx); err != nil { diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index a5e018db8a..a46049c53f 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -168,7 +168,7 @@ func dbAvailable(driver DriverName) bool { log.Printf("db.Close(): %v", err) } }() - if err := db.Ping(); err != nil { + if err := db.Ping(context.TODO()); err != nil { log.Printf("db.Ping(): %v", err) return false } @@ -238,7 +238,7 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*pgxpool.Pool, func(con } } - return db, done, db.Ping() + return db, done, db.Ping(ctx) } // NewTrillianDB creates an empty database with the Trillian schema. The database name is randomly From e617388067654b39f5e62be914f76ce847a8da09 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:03:29 +0100 Subject: [PATCH 20/62] No error returned by pgx RowsAffected() function --- storage/postgresql/tree_storage.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index e8e72b1f47..360a42385e 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -273,7 +273,6 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre klog.Warningf("Failed to set merkle subtrees: %s", err) return err } - _, _ = r.RowsAffected() return nil } @@ -284,12 +283,7 @@ func checkResultOkAndRowCountIs(res pgconn.CommandTag, err error, count int64) e } // Otherwise we have to look at the result of the operation - rowsAffected, rowsError := res.RowsAffected() - - if rowsError != nil { - return postgresqlToGRPC(rowsError) - } - + rowsAffected := res.RowsAffected() if rowsAffected != count { return fmt.Errorf("expected %d row(s) to be affected but saw: %d", count, rowsAffected) From a1ccf26a00fa258700cee19fbfafe1279ca9fcc3 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:05:06 +0100 Subject: [PATCH 21/62] Check Rows errors consistently and correctly --- storage/postgresql/admin_storage.go | 8 ++++++-- storage/postgresql/log_storage.go | 31 ++++++++++++++++++----------- storage/postgresql/storage_test.go | 10 +++++++++- storage/postgresql/tree_storage.go | 12 +++-------- 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index aa5ac24f86..8def7b27cd 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -145,8 +145,9 @@ func (t *adminTX) ListTrees(ctx context.Context, includeDeleted bool) ([]*trilli return nil, err } defer func() { - if err := rows.Close(); err != nil { - klog.Errorf("rows.Close(): %v", err) + rows.Close() + if err := rows.Err(); err != nil { + klog.Errorf("rows.Err(): %v", err) } }() trees := []*trillian.Tree{} @@ -157,6 +158,9 @@ func (t *adminTX) ListTrees(ctx context.Context, includeDeleted bool) ([]*trilli } trees = append(trees, tree) } + if err = rows.Err(); err != nil { + return nil, err + } return trees, nil } diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index f9c9774355..34097c111d 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -163,8 +163,9 @@ func (m *postgreSQLLogStorage) GetActiveLogIDs(ctx context.Context) ([]int64, er return nil, err } defer func() { - if err := rows.Close(); err != nil { - klog.Errorf("rows.Close(): %v", err) + rows.Close() + if err := rows.Err(); err != nil { + klog.Errorf("rows.Err(): %v", err) } }() ids := []int64{} @@ -175,7 +176,10 @@ func (m *postgreSQLLogStorage) GetActiveLogIDs(ctx context.Context) ([]int64, er } ids = append(ids, treeID) } - return ids, rows.Err() + if err = rows.Err(); err != nil { + return nil, err + } + return ids, nil } func (m *postgreSQLLogStorage) beginInternal(ctx context.Context, tree *trillian.Tree) (*logTreeTX, error) { @@ -342,8 +346,9 @@ func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime tim return nil, err } defer func() { - if err := rows.Close(); err != nil { - klog.Errorf("rows.Close(): %v", err) + rows.Close() + if err := rows.Err(); err != nil { + klog.Errorf("rows.Err(): %v", err) } }() @@ -366,10 +371,10 @@ func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime tim t.dequeued[k] = dqInfo leaves = append(leaves, leaf) } - - if rows.Err() != nil { - return nil, rows.Err() + if err = rows.Err(); err != nil { + return nil, err } + label := labelForTX(t) observe(dequeueSelectLatency, time.Since(start), label) observe(dequeueLatency, time.Since(start), label) @@ -623,8 +628,9 @@ func (t *logTreeTX) getLeavesByRangeInternal(ctx context.Context, start, count i return nil, err } defer func() { - if err := rows.Close(); err != nil { - klog.Errorf("rows.Close(): %v", err) + rows.Close() + if err := rows.Err(); err != nil { + klog.Errorf("rows.Err(): %v", err) } }() @@ -760,8 +766,9 @@ func (t *logTreeTX) getLeavesByHashInternal(ctx context.Context, leafHashes [][] return nil, err } defer func() { - if err := rows.Close(); err != nil { - klog.Errorf("rows.Close(): %v", err) + rows.Close() + if err := rows.Err(); err != nil { + klog.Errorf("rows.Err(): %v", err) } }() diff --git a/storage/postgresql/storage_test.go b/storage/postgresql/storage_test.go index 47f90c8b38..b370dd995d 100644 --- a/storage/postgresql/storage_test.go +++ b/storage/postgresql/storage_test.go @@ -295,7 +295,12 @@ func getVersion(db *pgxpool.Pool) (string, error) { if err != nil { return "", fmt.Errorf("getVersion: failed to perform query: %v", err) } - defer func() { _ = rows.Close() }() + defer func() { + rows.Close() + if err := rows.Err(); err != nil { + klog.Errorf("rows.Err(): %v", err) + } + }() if !rows.Next() { return "", errors.New("getVersion: cursor has no rows") } @@ -306,6 +311,9 @@ func getVersion(db *pgxpool.Pool) (string, error) { if rows.Next() { return "", errors.New("getVersion: too many rows returned") } + if err = rows.Err(); err != nil { + return "", err + } return v, nil } diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 360a42385e..f3d857972f 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -154,17 +154,12 @@ func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]by return nil, err } defer func() { - if err := rows.Close(); err != nil { - klog.Errorf("rows.Close(): %v", err) + rows.Close() + if err := rows.Err(); err != nil { + klog.Errorf("rows.Err(): %v", err) } }() - if rows.Err() != nil { - // Nothing from the DB - klog.Warningf("Nothing from DB: %s", rows.Err()) - return nil, rows.Err() - } - ret := make([]*storagepb.SubtreeProto, 0, len(ids)) for rows.Next() { @@ -196,7 +191,6 @@ func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]by } } } - if err := rows.Err(); err != nil { return nil, err } From 66d2c7f7639262f2166c0b8762956816c5f1ad73 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:05:51 +0100 Subject: [PATCH 22/62] Adapt errors.go to PostgreSQL --- storage/postgresql/errors.go | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/storage/postgresql/errors.go b/storage/postgresql/errors.go index 1184cea194..1c48c3df8f 100644 --- a/storage/postgresql/errors.go +++ b/storage/postgresql/errors.go @@ -15,25 +15,20 @@ package postgresql import ( + "github.com/jackc/pgerrcode" + "github.com/jackc/pgx/v5/pgconn" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) -const ( - // ER_DUP_ENTRY: Error returned by driver when inserting a duplicate row. - errNumDuplicate = 1062 - // ER_LOCK_DEADLOCK: Error returned when there was a deadlock. - errNumDeadlock = 1213 -) - // postgresqlToGRPC converts some types of PostgreSQL errors to GRPC errors. This gives // clients more signal when the operation can be retried. func postgresqlToGRPC(err error) error { - postgresqlErr, ok := err.(*postgresql.PostgreSQLError) + postgresqlErr, ok := err.(*pgconn.PgError) if !ok { return err } - if postgresqlErr.Number == errNumDeadlock { + if postgresqlErr.Code == pgerrcode.DeadlockDetected { return status.Errorf(codes.Aborted, "PostgreSQL: %v", postgresqlErr) } return err @@ -41,8 +36,8 @@ func postgresqlToGRPC(err error) error { func isDuplicateErr(err error) bool { switch err := err.(type) { - case *postgresql.PostgreSQLError: - return err.Number == errNumDuplicate + case *pgconn.PgError: + return err.Code == pgerrcode.UniqueViolation default: return false } From 8218303750e30ffd83001a90a0a3e7132972644c Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:06:44 +0100 Subject: [PATCH 23/62] Adapt connection pool opening to PostgreSQL --- storage/postgresql/testdbpgx/testdbpgx.go | 6 +++--- storage/postgresql/tree_storage.go | 10 ++++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index a46049c53f..414fee00ff 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -158,7 +158,7 @@ func CockroachDBAvailable() bool { func dbAvailable(driver DriverName) bool { driverName := driverMapping[driver].sqlDriverName uri := driverMapping[driver].uriFunc() - db, err := pgxpool.New(driverName, uri) + db, err := pgxpool.New(context.TODO(), uri) if err != nil { log.Printf("pgxpool.New(): %v", err) return false @@ -205,7 +205,7 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*pgxpool.Pool, func(con return nil, nil, fmt.Errorf("unknown driver %q", driver) } - db, err := pgxpool.New(inf.sqlDriverName, inf.uriFunc()) + db, err := pgxpool.New(ctx, inf.uriFunc()) if err != nil { return nil, nil, err } @@ -222,7 +222,7 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*pgxpool.Pool, func(con return nil, nil, fmt.Errorf("failed to close DB: %v", err) } uri := inf.uriFunc(name) - db, err = pgxpool.New(inf.sqlDriverName, uri) + db, err = pgxpool.New(ctx, uri) if err != nil { return nil, nil, err } diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index f3d857972f..4df71d8d16 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -78,9 +78,15 @@ type postgreSQLTreeStorage struct { // (See https://github.com/jackc/pgx/wiki/Automatic-Prepared-Statement-Caching) } -// OpenDB opens a database connection for all PostgreSQL-based storage implementations. +// OpenDB opens a database connection pool for all PostgreSQL-based storage implementations. func OpenDB(dbURL string) (*pgxpool.Pool, error) { - db, err := pgxpool.New("postgresql", dbURL) + pgxConfig, err := pgxpool.ParseConfig(dbURL) + if err != nil { + klog.Warningf("Could not parse PostgreSQL connection URI, check config: %s", err) + return nil, err + } + + db, err := pgxpool.NewWithConfig(context.TODO(), pgxConfig) if err != nil { // Don't log uri as it could contain credentials klog.Warningf("Could not open PostgreSQL database, check config: %s", err) From 92030b305cf1c6c4828a2f4ac18665597e03d7ef Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:07:49 +0100 Subject: [PATCH 24/62] No error returned by pgxpool Close() function --- storage/postgresql/provider.go | 3 ++- storage/postgresql/testdbpgx/testdbpgx.go | 16 +++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/storage/postgresql/provider.go b/storage/postgresql/provider.go index 066c29dce7..b1027b512b 100644 --- a/storage/postgresql/provider.go +++ b/storage/postgresql/provider.go @@ -117,7 +117,8 @@ func (s *postgresqlProvider) AdminStorage() storage.AdminStorage { } func (s *postgresqlProvider) Close() error { - return s.db.Close() + s.db.Close() + return nil } // registerPostgreSQLTLSConfig registers a custom TLS config for PostgreSQL using a provided CA certificate and optional server name. diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index 414fee00ff..097d3c921c 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -163,11 +163,7 @@ func dbAvailable(driver DriverName) bool { log.Printf("pgxpool.New(): %v", err) return false } - defer func() { - if err := db.Close(); err != nil { - log.Printf("db.Close(): %v", err) - } - }() + defer db.Close() if err := db.Ping(context.TODO()); err != nil { log.Printf("db.Ping(): %v", err) return false @@ -218,9 +214,7 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*pgxpool.Pool, func(con return nil, nil, fmt.Errorf("error running statement %q: %v", stmt, err) } - if err := db.Close(); err != nil { - return nil, nil, fmt.Errorf("failed to close DB: %v", err) - } + db.Close() uri := inf.uriFunc(name) db, err = pgxpool.New(ctx, uri) if err != nil { @@ -228,11 +222,7 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*pgxpool.Pool, func(con } done := func(ctx context.Context) { - defer func() { - if err := db.Close(); err != nil { - klog.Errorf("db.Close(): %v", err) - } - }() + defer db.Close() if _, err := db.Exec(ctx, fmt.Sprintf("DROP DATABASE %v", name)); err != nil { klog.Warningf("Failed to drop test database %q: %v", name, err) } From f68b8b9fabf8a4f1c5a51ff18b2d71badf3c006a Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:08:35 +0100 Subject: [PATCH 25/62] Pass arguments directly to Query in getLeavesByRangeInternal --- storage/postgresql/log_storage.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index 34097c111d..9c53b8b60b 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -621,8 +621,7 @@ func (t *logTreeTX) getLeavesByRangeInternal(ctx context.Context, start, count i } // TODO(pavelkalinnikov): Further clip `count` to a safe upper bound like 64k. - args := []interface{}{start, start + count, t.treeID} - rows, err := t.tx.Query(ctx, selectLeavesByRangeSQL, args...) + rows, err := t.tx.Query(ctx, selectLeavesByRangeSQL, start, start+count, t.treeID) if err != nil { klog.Warningf("Failed to get leaves by range: %s", err) return nil, err From 1456b20e37dbf8be872e0614a5bf7e067d5de7a2 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:13:38 +0100 Subject: [PATCH 26/62] Adapt provider.go with plausible postgreSQLURI and drop calls to database/sql functions that pgx doesn't have --- storage/postgresql/provider.go | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/storage/postgresql/provider.go b/storage/postgresql/provider.go index b1027b512b..63cacad5e5 100644 --- a/storage/postgresql/provider.go +++ b/storage/postgresql/provider.go @@ -24,16 +24,12 @@ import ( "github.com/google/trillian/monitoring" "github.com/google/trillian/storage" - "k8s.io/klog/v2" - - // Load PostgreSQL driver "github.com/jackc/pgx/v5/pgxpool" + "k8s.io/klog/v2" ) var ( - postgreSQLURI = flag.String("postgresql_uri", "test:zaphod@tcp(127.0.0.1:3306)/test", "Connection URI for PostgreSQL database") - maxConns = flag.Int("postgresql_max_conns", 0, "Maximum connections to the database") - maxIdle = flag.Int("postgresql_max_idle_conns", -1, "Maximum idle database connections in the connection pool") + postgreSQLURI = flag.String("postgresql_uri", "postgresql:///ctlog?host=localhost&user=ctlog", "Connection URI for PostgreSQL database") postgreSQLTLSCA = flag.String("postgresql_tls_ca", "", "Path to the CA certificate file for PostgreSQL TLS connection ") postgreSQLServerName = flag.String("postgresql_server_name", "", "Name of the PostgreSQL server to be used as the Server Name in the TLS configuration") @@ -98,12 +94,6 @@ func getPostgreSQLDatabaseLocked() (*pgxpool.Pool, error) { postgresqlErr = err return nil, err } - if *maxConns > 0 { - db.SetMaxOpenConns(*maxConns) - } - if *maxIdle >= 0 { - db.SetMaxIdleConns(*maxIdle) - } postgresqlDB, postgresqlErr = db, nil return db, nil } From aff11fcf7601ca408c52f15d63063376edb5ed21 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:15:55 +0100 Subject: [PATCH 27/62] Remove CockroachDB stuff from testdbpgx and specify plausible defaultTestPostgreSQLURI --- storage/postgresql/testdbpgx/testdbpgx.go | 88 +++-------------------- 1 file changed, 9 insertions(+), 79 deletions(-) diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index 097d3c921c..73e28d2ee4 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package testdb creates new databases for tests. +// Package testdbpgx creates new PostgreSQL databases for tests. package testdbpgx import ( @@ -20,19 +20,15 @@ import ( "context" "fmt" "log" - "net/url" "os" "strings" "testing" "time" "github.com/google/trillian/testonly" + "github.com/jackc/pgx/v5/pgxpool" "golang.org/x/sys/unix" "k8s.io/klog/v2" - - "github.com/jackc/pgx/v5/pgxpool" - _ "github.com/jackc/pgx/v5/pgxpool" // postgresql driver - _ "github.com/lib/pq" // postgres driver ) const ( @@ -40,25 +36,16 @@ const ( // instance URI to use. The value must have a trailing slash. PostgreSQLURIEnv = "TEST_POSTGRESQL_URI" - // Note: pgxpool.New requires the URI to end with a slash. - defaultTestPostgreSQLURI = "root@tcp(127.0.0.1)/" - - // CockroachDBURIEnv is the name of the ENV variable checked for the test CockroachDB - // instance URI to use. The value must have a trailing slash. - CockroachDBURIEnv = "TEST_COCKROACHDB_URI" - - defaultTestCockroachDBURI = "postgres://root@localhost:26257/?sslmode=disable" + defaultTestPostgreSQLURI = "postgresql:///ctlog?host=localhost&user=ctlog" ) type storageDriverInfo struct { - sqlDriverName string - schema string - uriFunc func(paths ...string) string + schema string + uriFunc func(paths ...string) string } var ( trillianPostgreSQLSchema = testonly.RelativeToPackage("../postgresql/schema/storage.sql") - trillianCRDBSchema = testonly.RelativeToPackage("../crdb/schema/storage.sql") ) // DriverName is the name of a database driver. @@ -67,20 +54,12 @@ type DriverName string const ( // DriverPostgreSQL is the identifier for the PostgreSQL storage driver. DriverPostgreSQL DriverName = "postgresql" - // DriverCockroachDB is the identifier for the CockroachDB storage driver. - DriverCockroachDB DriverName = "cockroachdb" ) var driverMapping = map[DriverName]storageDriverInfo{ DriverPostgreSQL: { - sqlDriverName: "postgresql", - schema: trillianPostgreSQLSchema, - uriFunc: postgresqlURI, - }, - DriverCockroachDB: { - sqlDriverName: "postgres", - schema: trillianCRDBSchema, - uriFunc: crdbURI, + schema: trillianPostgreSQLSchema, + uriFunc: postgresqlURI, }, } @@ -101,8 +80,8 @@ func postgresqlURI(dbRef ...string) string { } for _, ref := range dbRef { - separator := "/" - if strings.HasSuffix(stringurl, "/") { + separator := "&" + if strings.HasSuffix(stringurl, "&") { separator = "" } stringurl = strings.Join([]string{stringurl, ref}, separator) @@ -111,52 +90,12 @@ func postgresqlURI(dbRef ...string) string { return stringurl } -// crdbURI returns the CockroachDB connection URI to use for tests. It returns the -// value in the ENV variable defined by CockroachDBURIEnv. If the value is empty, -// returns defaultTestCockroachDBURI. -func crdbURI(dbRef ...string) string { - var uri *url.URL - if e := os.Getenv(CockroachDBURIEnv); len(e) > 0 { - uri = getURL(e) - } else { - uri = getURL(defaultTestCockroachDBURI) - } - - return addPathToURI(uri, dbRef...) -} - -func addPathToURI(uri *url.URL, paths ...string) string { - if len(paths) > 0 { - for _, ref := range paths { - currentPaths := uri.Path - // If the path is the root path, we don't want to append a slash. - if currentPaths == "/" { - currentPaths = "" - } - uri.Path = strings.Join([]string{currentPaths, ref}, "/") - } - } - return uri.String() -} - -func getURL(unparsedurl string) *url.URL { - //nolint:errcheck // We're not expecting an error here. - u, _ := url.Parse(unparsedurl) - return u -} - // PostgreSQLAvailable indicates whether the configured PostgreSQL database is available. func PostgreSQLAvailable() bool { return dbAvailable(DriverPostgreSQL) } -// CockroachDBAvailable indicates whether the configured CockroachDB database is available. -func CockroachDBAvailable() bool { - return dbAvailable(DriverCockroachDB) -} - func dbAvailable(driver DriverName) bool { - driverName := driverMapping[driver].sqlDriverName uri := driverMapping[driver].uriFunc() db, err := pgxpool.New(context.TODO(), uri) if err != nil { @@ -280,12 +219,3 @@ func SkipIfNoPostgreSQL(t *testing.T) { } t.Logf("Test PostgreSQL available at %q", postgresqlURI()) } - -// SkipIfNoCockroachDB is a test helper that skips tests that require a local CockroachDB. -func SkipIfNoCockroachDB(t *testing.T) { - t.Helper() - if !CockroachDBAvailable() { - t.Skip("Skipping test as CockroachDB not available") - } - t.Logf("Test CockroachDB available at %q", crdbURI()) -} From bdd9fafee72fb6067c94b5c680220e4bf663f8f8 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:22:15 +0100 Subject: [PATCH 28/62] Tidy up / fix pgx imports --- go.mod | 3 ++- go.sum | 2 ++ storage/postgresql/log_storage_test.go | 4 +--- storage/postgresql/tree_storage.go | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 67d42f92d0..795f64d6bb 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,7 @@ require ( github.com/google/go-cmp v0.6.0 github.com/google/go-licenses/v2 v2.0.0-alpha.1 github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 - github.com/jackc/pgconn v1.14.3 + github.com/jackc/pgerrcode v0.0.0-20240316143900-6e2875d9b438 github.com/jackc/pgx/v5 v5.5.2 github.com/letsencrypt/pkcs11key/v4 v4.0.0 github.com/lib/pq v1.10.9 @@ -105,6 +105,7 @@ require ( github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect + github.com/jackc/pgconn v1.14.3 // indirect github.com/jackc/pgio v1.0.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgproto3/v2 v2.3.3 // indirect diff --git a/go.sum b/go.sum index bd094bb3c5..b469c8eee7 100644 --- a/go.sum +++ b/go.sum @@ -974,6 +974,8 @@ github.com/jackc/pgconn v1.9.0/go.mod h1:YctiPyvzfU11JFxoXokUOOKQXQmDMoJL9vJzHH8 github.com/jackc/pgconn v1.9.1-0.20210724152538-d89c8390a530/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI= github.com/jackc/pgconn v1.14.3 h1:bVoTr12EGANZz66nZPkMInAV/KHD2TxH9npjXXgiB3w= github.com/jackc/pgconn v1.14.3/go.mod h1:RZbme4uasqzybK2RK5c65VsHxoyaml09lx3tXOcO/VM= +github.com/jackc/pgerrcode v0.0.0-20240316143900-6e2875d9b438 h1:Dj0L5fhJ9F82ZJyVOmBx6msDp/kfd1t9GRfny/mfJA0= +github.com/jackc/pgerrcode v0.0.0-20240316143900-6e2875d9b438/go.mod h1:a/s9Lp5W7n/DD0VrVoyJ00FbP2ytTPDVOivvn2bMlds= github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE= github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8= github.com/jackc/pgmock v0.0.0-20190831213851-13a1b77aafa2/go.mod h1:fGZlG77KXmcq05nJLRkk0+p82V8B8Dw8KN2/V9c/OAE= diff --git a/storage/postgresql/log_storage_test.go b/storage/postgresql/log_storage_test.go index 075770533e..94bce0845c 100644 --- a/storage/postgresql/log_storage_test.go +++ b/storage/postgresql/log_storage_test.go @@ -29,11 +29,9 @@ import ( "github.com/google/trillian/storage" "github.com/google/trillian/storage/testonly" "github.com/google/trillian/types" + "github.com/jackc/pgx/v5/pgxpool" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/timestamppb" - - "github.com/jackc/pgx/v5/pgxpool" - _ "github.com/jackc/pgx/v5/pgxpool" ) var allTables = []string{"Unsequenced", "TreeHead", "SequencedLeafData", "LeafData", "Subtree", "TreeControl", "Trees"} diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 4df71d8d16..0f1afb3b3d 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -27,8 +27,8 @@ import ( "github.com/google/trillian/storage/postgresql/postgresqlpb" "github.com/google/trillian/storage/storagepb" "github.com/google/trillian/storage/tree" - "github.com/jackc/pgconn" "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" "github.com/jackc/pgx/v5/pgxpool" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" From 7b0ae0aa695af88b77e299d8e4f5367e36c6eeb3 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:23:14 +0100 Subject: [PATCH 29/62] Plug PostgreSQL storage and quota providers into log_server and log_signer --- cmd/trillian_log_server/main.go | 2 ++ cmd/trillian_log_signer/main.go | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cmd/trillian_log_server/main.go b/cmd/trillian_log_server/main.go index c1f2191455..7733d0302f 100644 --- a/cmd/trillian_log_server/main.go +++ b/cmd/trillian_log_server/main.go @@ -49,10 +49,12 @@ import ( _ "github.com/google/trillian/storage/cloudspanner" _ "github.com/google/trillian/storage/crdb" _ "github.com/google/trillian/storage/mysql" + _ "github.com/google/trillian/storage/postgresql" // Load quota providers _ "github.com/google/trillian/quota/crdbqm" _ "github.com/google/trillian/quota/mysqlqm" + _ "github.com/google/trillian/quota/postgresqlqm" ) var ( diff --git a/cmd/trillian_log_signer/main.go b/cmd/trillian_log_signer/main.go index 273099bbd6..16e081c9ad 100644 --- a/cmd/trillian_log_signer/main.go +++ b/cmd/trillian_log_signer/main.go @@ -55,10 +55,12 @@ import ( _ "github.com/google/trillian/storage/cloudspanner" _ "github.com/google/trillian/storage/crdb" _ "github.com/google/trillian/storage/mysql" + _ "github.com/google/trillian/storage/postgresql" // Load quota providers _ "github.com/google/trillian/quota/crdbqm" _ "github.com/google/trillian/quota/mysqlqm" + _ "github.com/google/trillian/quota/postgresqlqm" ) var ( From 2846d365ee4b296371fb32af38ea28bda521f1a0 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:24:47 +0100 Subject: [PATCH 30/62] Enable the testdbpgx tokenizer to handle PL/pgSQL functions correctly, plus some other testdbpgx fixes --- storage/postgresql/drop_storage.sql | 8 ++++++- storage/postgresql/schema/storage.sql | 10 +++++++- storage/postgresql/testdbpgx/testdbpgx.go | 28 +++++++++++++++++------ 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/storage/postgresql/drop_storage.sql b/storage/postgresql/drop_storage.sql index defd04417d..5f3beda0a0 100644 --- a/storage/postgresql/drop_storage.sql +++ b/storage/postgresql/drop_storage.sql @@ -1,5 +1,7 @@ -- Caution - this removes all tables in our schema +DROP FUNCTION IF EXISTS count_estimate; + DROP TABLE IF EXISTS Unsequenced; DROP TABLE IF EXISTS Subtree; DROP TABLE IF EXISTS SequencedLeafData; @@ -8,4 +10,8 @@ DROP TABLE IF EXISTS LeafData; DROP TABLE IF EXISTS TreeControl; DROP TABLE IF EXISTS Trees; -DROP FUNCTION IF EXISTS count_estimate; +DROP TYPE IF EXISTS SignatureAlgorithm; +DROP TYPE IF EXISTS HashAlgorithm; +DROP TYPE IF EXISTS HashStrategy; +DROP TYPE IF EXISTS TreeType; +DROP TYPE IF EXISTS TreeState; diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 4bf8256aa4..6735017b27 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -1,4 +1,7 @@ -# PostgreSQL version of the tree schema. +-- PostgreSQL version of the tree schema. +-- +-- Each statement must end with a semicolon, and there must be a blank line before the next statement. +-- This will ensure that the testdbpgx tokenizer will handle semicolons in the PL/pgSQL function correctly. -- --------------------------------------------- -- Tree stuff here @@ -7,11 +10,16 @@ -- Tree parameters should not be changed after creation. Doing so can -- render the data in the tree unusable or inconsistent. CREATE TYPE TreeState AS ENUM ('ACTIVE', 'FROZEN', 'DRAINING'); + CREATE TYPE TreeType AS ENUM ('LOG', 'MAP', 'PREORDERED_LOG'); + CREATE TYPE HashStrategy AS ENUM ('RFC6962_SHA256', 'TEST_MAP_HASHER', 'OBJECT_RFC6962_SHA256', 'CONIKS_SHA512_256', 'CONIKS_SHA256'); + CREATE TYPE HashAlgorithm AS ENUM ('SHA256'); + CREATE TYPE SignatureAlgorithm AS ENUM ('ECDSA', 'RSA', 'ED25519'); + CREATE TABLE IF NOT EXISTS Trees( TreeId BIGINT NOT NULL, TreeState TreeState NOT NULL, diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index 73e28d2ee4..bcaf6b5983 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -36,7 +36,7 @@ const ( // instance URI to use. The value must have a trailing slash. PostgreSQLURIEnv = "TEST_POSTGRESQL_URI" - defaultTestPostgreSQLURI = "postgresql:///ctlog?host=localhost&user=ctlog" + defaultTestPostgreSQLURI = "postgresql:///template1?host=localhost&user=postgres&password=postgres" ) type storageDriverInfo struct { @@ -45,7 +45,7 @@ type storageDriverInfo struct { } var ( - trillianPostgreSQLSchema = testonly.RelativeToPackage("../postgresql/schema/storage.sql") + trillianPostgreSQLSchema = testonly.RelativeToPackage("../schema/storage.sql") ) // DriverName is the name of a database driver. @@ -80,11 +80,20 @@ func postgresqlURI(dbRef ...string) string { } for _, ref := range dbRef { - separator := "&" - if strings.HasSuffix(stringurl, "&") { - separator = "" + if strings.Contains(ref, "=") { + separator := "&" + if strings.HasSuffix(stringurl, "&") { + separator = "" + } + stringurl = strings.Join([]string{stringurl, ref}, separator) + } else { + // No equals character, so use this string as the database name. + if s1 := strings.SplitN(stringurl, "//", 2); len(s1) == 2 { + if s2 := strings.SplitN(stringurl, "?", 2); len(s2) == 2 { + stringurl = s1[0] + "///" + ref + "?" + s2[1] + } + } } - stringurl = strings.Join([]string{stringurl, ref}, separator) } return stringurl @@ -161,6 +170,10 @@ func newEmptyDB(ctx context.Context, driver DriverName) (*pgxpool.Pool, func(con } done := func(ctx context.Context) { + db.Close() + if db, err = pgxpool.New(ctx, inf.uriFunc()); err != nil { + klog.Warningf("Failed to reconnect: %v", err) + } defer db.Close() if _, err := db.Exec(ctx, fmt.Sprintf("DROP DATABASE %v", name)); err != nil { klog.Warningf("Failed to drop test database %q: %v", name, err) @@ -186,7 +199,8 @@ func NewTrillianDB(ctx context.Context, driver DriverName) (*pgxpool.Pool, func( return nil, nil, err } - for _, stmt := range strings.Split(sanitize(string(sqlBytes)), ";") { + // Execute each statement in the schema file. Each statement must end with a semicolon, and there must be a blank line before the next statement. + for _, stmt := range strings.Split(sanitize(string(sqlBytes)), ";\n\n") { stmt = strings.TrimSpace(stmt) if stmt == "" { continue From 012172080234e903c3e1396dfbb986f8d7b96c2c Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:25:39 +0100 Subject: [PATCH 31/62] To pass postgresql_quota_test requires fresher row estimates, so ANALYZE the table just before estimating the rows --- storage/postgresql/schema/storage.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 6735017b27..0ba0eb3f2d 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -168,7 +168,7 @@ LANGUAGE plpgsql AS $$ DECLARE plan jsonb; BEGIN - EXECUTE 'EXPLAIN (FORMAT JSON) SELECT * FROM ' || table_name INTO plan; + EXECUTE 'ANALYZE (SKIP_LOCKED TRUE) ' || table_name || ';EXPLAIN (FORMAT JSON) SELECT * FROM ' || table_name INTO plan; RETURN plan->0->'Plan'->'Plan Rows'; EXCEPTION WHEN OTHERS THEN From ba74553f080447483a3b2817e8ac4d776d7f9cbc Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 14:49:42 +0100 Subject: [PATCH 32/62] PostgreSQL connection strings support TLS natively --- storage/postgresql/provider.go | 40 ++-------------------------------- 1 file changed, 2 insertions(+), 38 deletions(-) diff --git a/storage/postgresql/provider.go b/storage/postgresql/provider.go index 63cacad5e5..84cdcd820e 100644 --- a/storage/postgresql/provider.go +++ b/storage/postgresql/provider.go @@ -15,11 +15,7 @@ package postgresql import ( - "crypto/tls" - "crypto/x509" - "errors" "flag" - "os" "sync" "github.com/google/trillian/monitoring" @@ -29,9 +25,7 @@ import ( ) var ( - postgreSQLURI = flag.String("postgresql_uri", "postgresql:///ctlog?host=localhost&user=ctlog", "Connection URI for PostgreSQL database") - postgreSQLTLSCA = flag.String("postgresql_tls_ca", "", "Path to the CA certificate file for PostgreSQL TLS connection ") - postgreSQLServerName = flag.String("postgresql_server_name", "", "Name of the PostgreSQL server to be used as the Server Name in the TLS configuration") + postgreSQLURI = flag.String("postgresql_uri", "postgresql:///ctlog?host=localhost&user=ctlog", "Connection URI for PostgreSQL database") postgresqlMu sync.Mutex postgresqlErr error @@ -82,14 +76,7 @@ func getPostgreSQLDatabaseLocked() (*pgxpool.Pool, error) { if postgresqlDB != nil || postgresqlErr != nil { return postgresqlDB, postgresqlErr } - dsn := *postgreSQLURI - if *postgreSQLTLSCA != "" { - if err := registerPostgreSQLTLSConfig(); err != nil { - return nil, err - } - dsn += "?tls=custom" - } - db, err := OpenDB(dsn) + db, err := OpenDB(*postgreSQLURI) if err != nil { postgresqlErr = err return nil, err @@ -110,26 +97,3 @@ func (s *postgresqlProvider) Close() error { s.db.Close() return nil } - -// registerPostgreSQLTLSConfig registers a custom TLS config for PostgreSQL using a provided CA certificate and optional server name. -// Returns an error if the CA certificate can't be read or added to the root cert pool, or when the registration of the TLS config fails. -func registerPostgreSQLTLSConfig() error { - if *postgreSQLTLSCA == "" { - return nil - } - rootCertPool := x509.NewCertPool() - pem, err := os.ReadFile(*postgreSQLTLSCA) - if err != nil { - return err - } - if ok := rootCertPool.AppendCertsFromPEM(pem); !ok { - return errors.New("failed to append PEM") - } - tlsConfig := &tls.Config{ - RootCAs: rootCertPool, - } - if *postgreSQLServerName != "" { - tlsConfig.ServerName = *postgreSQLServerName - } - return postgresql.RegisterTLSConfig("custom", tlsConfig) -} From 2c5598f63e454d37fe56317c1123557f980abad4 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 1 Oct 2024 15:10:15 +0100 Subject: [PATCH 33/62] Rebuild the PostgreSQL protobuf definitions with updated protoc-gen-go --- storage/postgresql/postgresqlpb/options.pb.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/storage/postgresql/postgresqlpb/options.pb.go b/storage/postgresql/postgresqlpb/options.pb.go index 1816d39b72..111c119d63 100644 --- a/storage/postgresql/postgresqlpb/options.pb.go +++ b/storage/postgresql/postgresqlpb/options.pb.go @@ -14,7 +14,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.32.0 +// protoc-gen-go v1.34.2 // protoc v4.22.3 // source: options.proto @@ -115,7 +115,7 @@ func file_options_proto_rawDescGZIP() []byte { } var file_options_proto_msgTypes = make([]protoimpl.MessageInfo, 1) -var file_options_proto_goTypes = []interface{}{ +var file_options_proto_goTypes = []any{ (*StorageOptions)(nil), // 0: postgresqlpb.StorageOptions } var file_options_proto_depIdxs = []int32{ @@ -132,7 +132,7 @@ func file_options_proto_init() { return } if !protoimpl.UnsafeEnabled { - file_options_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + file_options_proto_msgTypes[0].Exporter = func(v any, i int) any { switch v := v.(*StorageOptions); i { case 0: return &v.state From b365aedbeb0995c7a793c4ba6bdc254cac30f50e Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Wed, 2 Oct 2024 13:46:04 +0100 Subject: [PATCH 34/62] Use PostgreSQL's COPY interface for updating sequenced leaves --- storage/postgresql/queue.go | 29 ++++++++++++++----------- storage/postgresql/queue_batching.go | 32 ++++++++++++++++++---------- storage/postgresql/tree_storage.go | 15 +++++++++++++ 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index 0bc3de5989..64487c4693 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -82,6 +82,7 @@ func queueArgs(_ int64, _ []byte, queueTimestamp time.Time) []interface{} { } func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf) error { + rows := make([][]interface{}, 0, len(leaves)) dequeuedLeaves := make([]dequeuedLeaf, 0, len(leaves)) for _, leaf := range leaves { // This should fail on insert but catch it early @@ -93,19 +94,7 @@ func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillia return fmt.Errorf("got invalid integrate timestamp: %w", err) } iTimestamp := leaf.IntegrateTimestamp.AsTime() - _, err := t.tx.Exec( - ctx, - insertSequencedLeafSQL+valuesPlaceholder5, - t.treeID, - leaf.LeafIdentityHash, - leaf.MerkleLeafHash, - leaf.LeafIndex, - iTimestamp.UnixNano()) - if err != nil { - klog.Warningf("Failed to update sequenced leaves: %s", err) - return err - } - + rows = append(rows, []interface{}{t.treeID, leaf.LeafIdentityHash, leaf.MerkleLeafHash, leaf.LeafIndex, iTimestamp.UnixNano()}) qe, ok := t.dequeued[string(leaf.LeafIdentityHash)] if !ok { return fmt.Errorf("attempting to update leaf that wasn't dequeued. IdentityHash: %x", leaf.LeafIdentityHash) @@ -113,6 +102,20 @@ func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillia dequeuedLeaves = append(dequeuedLeaves, qe) } + // Copy sequenced leaves to SequencedLeafData table. + n, err := t.tx.CopyFrom( + ctx, + pgx.Identifier{"SequencedLeafData"}, + []string{"TreeId", "LeafIdentityHash", "MerkleLeafHash", "SequenceNumber", "IntegrateTimestampNanos"}, + pgx.CopyFromRows(rows), + ) + if err != nil { + klog.Warningf("Failed to copy sequenced leaves: %s", err) + } + if err := checkResultOkAndCopyCountIs(n, err, int64(len(leaves))); err != nil { + return err + } + return t.removeSequencedLeaves(ctx, dequeuedLeaves) } diff --git a/storage/postgresql/queue_batching.go b/storage/postgresql/queue_batching.go index c9ee9d1868..f54a9563de 100644 --- a/storage/postgresql/queue_batching.go +++ b/storage/postgresql/queue_batching.go @@ -21,8 +21,8 @@ import ( "context" "crypto/sha256" "encoding/binary" + "errors" "fmt" - "strings" "time" "github.com/google/trillian" @@ -62,13 +62,13 @@ func (t *logTreeTX) dequeueLeaf(rows pgx.Rows) (*trillian.LogLeaf, dequeuedLeaf, return nil, nil, err } + // Note: the LeafData and ExtraData being nil here is OK as this is only used by the + // sequencer. The sequencer only writes to the SequencedLeafData table and the client + // supplied data was already written to LeafData as part of queueing the leaf. queueTimestampProto := timestamppb.New(time.Unix(0, queueTimestamp)) if err := queueTimestampProto.CheckValid(); err != nil { return nil, dequeuedLeaf{}, fmt.Errorf("got invalid queue timestamp: %w", err) } - // Note: the LeafData and ExtraData being nil here is OK as this is only used by the - // sequencer. The sequencer only writes to the SequencedLeafData table and the client - // supplied data was already written to LeafData as part of queueing the leaf. leaf := &trillian.LogLeaf{ LeafIdentityHash: leafIDHash, MerkleLeafHash: merkleHash, @@ -95,27 +95,37 @@ func queueArgs(treeID int64, identityHash []byte, queueTimestamp time.Time) []in } func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf) error { - querySuffix := []string{} - args := []interface{}{} + rows := make([][]interface{}, 0, len(leaves)) dequeuedLeaves := make([]dequeuedLeaf, 0, len(leaves)) for _, leaf := range leaves { + // This should fail on insert but catch it early + if len(leaf.LeafIdentityHash) != t.hashSizeBytes { + return errors.New("sequenced leaf has incorrect hash size") + } + if err := leaf.IntegrateTimestamp.CheckValid(); err != nil { return fmt.Errorf("got invalid integrate timestamp: %w", err) } iTimestamp := leaf.IntegrateTimestamp.AsTime() - querySuffix = append(querySuffix, valuesPlaceholder5) - args = append(args, t.treeID, leaf.LeafIdentityHash, leaf.MerkleLeafHash, leaf.LeafIndex, iTimestamp.UnixNano()) + rows = append(rows, []interface{}{t.treeID, leaf.LeafIdentityHash, leaf.MerkleLeafHash, leaf.LeafIndex, iTimestamp.UnixNano()}) qe, ok := t.dequeued[string(leaf.LeafIdentityHash)] if !ok { return fmt.Errorf("attempting to update leaf that wasn't dequeued. IdentityHash: %x", leaf.LeafIdentityHash) } dequeuedLeaves = append(dequeuedLeaves, qe) } - result, err := t.tx.Exec(ctx, insertSequencedLeafSQL+strings.Join(querySuffix, ","), args...) + + // Copy sequenced leaves to SequencedLeafData table. + n, err := t.tx.CopyFrom( + ctx, + pgx.Identifier{"SequencedLeafData"}, + []string{"TreeId", "LeafIdentityHash", "MerkleLeafHash", "SequenceNumber", "IntegrateTimestampNanos"}, + pgx.CopyFromRows(rows), + ) if err != nil { - klog.Warningf("Failed to update sequenced leaves: %s", err) + klog.Warningf("Failed to copy sequenced leaves: %s", err) } - if err := checkResultOkAndRowCountIs(result, err, int64(len(leaves))); err != nil { + if err := checkResultOkAndCopyCountIs(n, err, int64(len(leaves))); err != nil { return err } diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 0f1afb3b3d..1564e1e1db 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -292,6 +292,21 @@ func checkResultOkAndRowCountIs(res pgconn.CommandTag, err error, count int64) e return nil } +func checkResultOkAndCopyCountIs(rowsAffected int64, err error, count int64) error { + // The Exec() might have just failed + if err != nil { + return postgresqlToGRPC(err) + } + + // Otherwise we have to look at the result of the operation + if rowsAffected != count { + return fmt.Errorf("expected %d row(s) to be affected but saw: %d", count, + rowsAffected) + } + + return nil +} + // getSubtreesAtRev returns a GetSubtreesFunc which reads at the passed in rev. func (t *treeTX) getSubtreesAtRev(ctx context.Context, rev int64) cache.GetSubtreesFunc { return func(ids [][]byte) ([]*storagepb.SubtreeProto, error) { From aa47b36c0af489c459c6349c7b73b2e85c15b312 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Wed, 2 Oct 2024 14:04:22 +0100 Subject: [PATCH 35/62] CopyFrom seems to require all lower case for table and column names --- storage/postgresql/queue.go | 4 ++-- storage/postgresql/queue_batching.go | 4 ++-- storage/postgresql/tree_storage.go | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index 64487c4693..d15859f16f 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -105,8 +105,8 @@ func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillia // Copy sequenced leaves to SequencedLeafData table. n, err := t.tx.CopyFrom( ctx, - pgx.Identifier{"SequencedLeafData"}, - []string{"TreeId", "LeafIdentityHash", "MerkleLeafHash", "SequenceNumber", "IntegrateTimestampNanos"}, + pgx.Identifier{"sequencedleafdata"}, + []string{"treeid", "leafidentityhash", "merkleleafhash", "sequencenumber", "integratetimestampnanos"}, pgx.CopyFromRows(rows), ) if err != nil { diff --git a/storage/postgresql/queue_batching.go b/storage/postgresql/queue_batching.go index f54a9563de..49232c29e6 100644 --- a/storage/postgresql/queue_batching.go +++ b/storage/postgresql/queue_batching.go @@ -118,8 +118,8 @@ func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillia // Copy sequenced leaves to SequencedLeafData table. n, err := t.tx.CopyFrom( ctx, - pgx.Identifier{"SequencedLeafData"}, - []string{"TreeId", "LeafIdentityHash", "MerkleLeafHash", "SequenceNumber", "IntegrateTimestampNanos"}, + pgx.Identifier{"sequencedleafdata"}, + []string{"treeid", "leafidentityhash", "merkleleafhash", "sequencenumber", "integratetimestampnanos"}, pgx.CopyFromRows(rows), ) if err != nil { diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 1564e1e1db..c7d6280501 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -258,8 +258,8 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre // Copy subtrees to temporary table. _, err = t.tx.CopyFrom( ctx, - pgx.Identifier{"TempSubtree"}, - []string{"TreeId", "SubtreeId", "Nodes", "SubtreeRevision"}, + pgx.Identifier{"tempsubtree"}, + []string{"treeid", "subtreeid", "nodes", "subtreerevision"}, pgx.CopyFromRows(rows), ) if err != nil { From 607c13ab20f682d7cca670dba804394552578e45 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Mon, 7 Oct 2024 22:05:00 +0100 Subject: [PATCH 36/62] Use PostgreSQL's COPY interface and a temporary table to queue leaves and add sequenced leaves --- storage/postgresql/errors.go | 9 - storage/postgresql/log_storage.go | 251 +++++++++++++------------- storage/postgresql/queue.go | 2 +- storage/postgresql/schema/storage.sql | 61 ++++++- storage/postgresql/tree_storage.go | 4 +- 5 files changed, 194 insertions(+), 133 deletions(-) diff --git a/storage/postgresql/errors.go b/storage/postgresql/errors.go index 1c48c3df8f..785c913deb 100644 --- a/storage/postgresql/errors.go +++ b/storage/postgresql/errors.go @@ -33,12 +33,3 @@ func postgresqlToGRPC(err error) error { } return err } - -func isDuplicateErr(err error) bool { - switch err := err.(type) { - case *pgconn.PgError: - return err.Code == pgerrcode.UniqueViolation - default: - return false - } -} diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index 9c53b8b60b..8b02034579 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -42,10 +42,30 @@ import ( ) const ( - valuesPlaceholder5 = "($1,$2,$3,$4,$5)" - - insertLeafDataSQL = "INSERT INTO LeafData(TreeId,LeafIdentityHash,LeafValue,ExtraData,QueueTimestampNanos) VALUES" + valuesPlaceholder5 - insertSequencedLeafSQL = "INSERT INTO SequencedLeafData(TreeId,LeafIdentityHash,MerkleLeafHash,SequenceNumber,IntegrateTimestampNanos) VALUES" + createTempQueueLeavesTable = "CREATE TEMP TABLE TempQueueLeaves (" + + " TreeId BIGINT," + + " LeafIdentityHash BYTEA," + + " LeafValue BYTEA," + + " ExtraData BYTEA," + + " MerkleLeafHash BYTEA," + + " QueueTimestampNanos BIGINT," + + " QueueID BYTEA," + + " IsDuplicate BOOLEAN DEFAULT FALSE" + + ") ON COMMIT DROP" + queueLeavesSQL = "SELECT * FROM queue_leaves()" + + createTempAddSequencedLeavesTable = "CREATE TEMP TABLE TempAddSequencedLeaves (" + + " TreeId BIGINT," + + " LeafIdentityHash BYTEA," + + " LeafValue BYTEA," + + " ExtraData BYTEA," + + " MerkleLeafHash BYTEA," + + " QueueTimestampNanos BIGINT," + + " SequenceNumber BIGINT," + + " IsDuplicateLeafData BOOLEAN DEFAULT FALSE," + + " IsDuplicateSequencedLeafData BOOLEAN DEFAULT FALSE" + + ") ON COMMIT DROP" + addSequencedLeavesSQL = "SELECT * FROM add_sequenced_leaves()" selectNonDeletedTreeIDByTypeAndStateSQL = "SELECT TreeId " + "FROM Trees " + @@ -76,7 +96,7 @@ const ( // This statement returns a dummy Merkle leaf hash value (which must be // of the right size) so that its signature matches that of the other // leaf-selection statements. - selectLeavesByLeafIdentityHashSQL = "SELECT E'\\\\x" + dummyMerkleLeafHash + "',l.LeafIdentityHash,l.LeafValue,-1,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos " + + selectLeavesByLeafIdentityHashSQL = "SELECT decode('" + dummyMerkleLeafHash + "','escape'),l.LeafIdentityHash,l.LeafValue,-1,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos " + "FROM LeafData l" + " LEFT JOIN SequencedLeafData s ON (l.LeafIdentityHash=s.LeafIdentityHash AND l.TreeId=s.TreeId) " + "WHERE l.LeafIdentityHash=ANY($1)" + @@ -95,14 +115,9 @@ var ( queuedDupCounter monitoring.Counter dequeuedCounter monitoring.Counter - queueLatency monitoring.Histogram - queueInsertLatency monitoring.Histogram - queueReadLatency monitoring.Histogram - queueInsertLeafLatency monitoring.Histogram - queueInsertEntryLatency monitoring.Histogram - dequeueLatency monitoring.Histogram - dequeueSelectLatency monitoring.Histogram - dequeueRemoveLatency monitoring.Histogram + dequeueLatency monitoring.Histogram + dequeueSelectLatency monitoring.Histogram + dequeueRemoveLatency monitoring.Histogram ) func createMetrics(mf monitoring.MetricFactory) { @@ -110,12 +125,6 @@ func createMetrics(mf monitoring.MetricFactory) { queuedDupCounter = mf.NewCounter("postgresql_queued_dup_leaves", "Number of duplicate leaves queued", logIDLabel) dequeuedCounter = mf.NewCounter("postgresql_dequeued_leaves", "Number of leaves dequeued", logIDLabel) - queueLatency = mf.NewHistogram("postgresql_queue_leaves_latency", "Latency of queue leaves operation in seconds", logIDLabel) - queueInsertLatency = mf.NewHistogram("postgresql_queue_leaves_latency_insert", "Latency of insertion part of queue leaves operation in seconds", logIDLabel) - queueReadLatency = mf.NewHistogram("postgresql_queue_leaves_latency_read_dups", "Latency of read-duplicates part of queue leaves operation in seconds", logIDLabel) - queueInsertLeafLatency = mf.NewHistogram("postgresql_queue_leaf_latency_leaf", "Latency of insert-leaf part of queue (single) leaf operation in seconds", logIDLabel) - queueInsertEntryLatency = mf.NewHistogram("postgresql_queue_leaf_latency_entry", "Latency of insert-entry part of queue (single) leaf operation in seconds", logIDLabel) - dequeueLatency = mf.NewHistogram("postgresql_dequeue_leaves_latency", "Latency of dequeue leaves operation in seconds", logIDLabel) dequeueSelectLatency = mf.NewHistogram("postgresql_dequeue_leaves_latency_select", "Latency of selection part of dequeue leaves operation in seconds", logIDLabel) dequeueRemoveLatency = mf.NewHistogram("postgresql_dequeue_leaves_latency_remove", "Latency of removal part of dequeue leaves operation in seconds", logIDLabel) @@ -410,77 +419,79 @@ func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, return nil, fmt.Errorf("got invalid queue timestamp: %w", err) } } - start := time.Now() label := labelForTX(t) ordLeaves := sortLeavesForInsert(leaves) existingCount := 0 existingLeaves := make([]*trillian.LogLeaf, len(leaves)) + copyRows := make([][]interface{}, 0, len(ordLeaves)) + // Prepare rows to copy. for _, ol := range ordLeaves { - i, leaf := ol.idx, ol.leaf + leaf := ol.leaf - leafStart := time.Now() if err := leaf.QueueTimestamp.CheckValid(); err != nil { return nil, fmt.Errorf("got invalid queue timestamp: %w", err) } qTimestamp := leaf.QueueTimestamp.AsTime() - _, err := t.tx.Exec(ctx, insertLeafDataSQL, t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, qTimestamp.UnixNano()) - insertDuration := time.Since(leafStart) - observe(queueInsertLeafLatency, insertDuration, label) - if isDuplicateErr(err) { - // Remember the duplicate leaf, using the requested leaf for now. - existingLeaves[i] = leaf - existingCount++ - queuedDupCounter.Inc(label) - continue - } - if err != nil { - klog.Warningf("Error inserting %d into LeafData: %s", i, err) - return nil, postgresqlToGRPC(err) - } + args := queueArgs(t.treeID, leaf.LeafIdentityHash, qTimestamp) + copyRows = append(copyRows, []interface{}{t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, leaf.MerkleLeafHash, args[0], args[1]}) + } - // Create the work queue entry - args := []interface{}{ - t.treeID, - leaf.LeafIdentityHash, - leaf.MerkleLeafHash, + // Create temporary table. + _, err := t.tx.Exec(ctx, createTempQueueLeavesTable) + if err != nil { + klog.Warningf("Failed to create tempqueueleaves table: %s", err) + return nil, postgresqlToGRPC(err) + } + + // Copy rows to temporary table. + _, err = t.tx.CopyFrom( + ctx, + pgx.Identifier{"tempqueueleaves"}, + []string{"treeid", "leafidentityhash", "leafvalue", "extradata", "merkleleafhash", "queuetimestampnanos", "queueid"}, + pgx.CopyFromRows(copyRows), + ) + if err != nil { + klog.Warningf("Failed to copy queued leaves: %s", err) + return nil, postgresqlToGRPC(err) + } + + // Create the leaf data records, work queue entries, and obtain a deduplicated list of existing leaves. + var toRetrieve [][]byte + var leafIdentityHash []byte + if rows, err := t.tx.Query(ctx, queueLeavesSQL); err != nil { + klog.Warningf("Failed to queue leaves: %s", err) + return nil, postgresqlToGRPC(err) + } else { + defer rows.Close() + for rows.Next() { + if err = rows.Scan(&leafIdentityHash); err != nil { + klog.Warningf("Failed to scan row: %s", err) + return nil, postgresqlToGRPC(err) + } + + for i, leaf := range leaves { + if bytes.Equal(leaf.LeafIdentityHash, leafIdentityHash) { + // Remember the duplicate leaf, using the requested leaf for now. + existingLeaves[i] = leaf + existingCount++ + queuedDupCounter.Inc(label) + } + } + toRetrieve = append(toRetrieve, leafIdentityHash) } - args = append(args, queueArgs(t.treeID, leaf.LeafIdentityHash, qTimestamp)...) - _, err = t.tx.Exec( - ctx, - insertUnsequencedEntrySQL, - args..., - ) - if err != nil { - klog.Warningf("Error inserting into Unsequenced: %s", err) + if rows.Err() != nil { + klog.Errorf("Failed processing rows: %s", err) return nil, postgresqlToGRPC(err) } - leafDuration := time.Since(leafStart) - observe(queueInsertEntryLatency, (leafDuration - insertDuration), label) } - insertDuration := time.Since(start) - observe(queueInsertLatency, insertDuration, label) queuedCounter.Add(float64(len(leaves)), label) if existingCount == 0 { return existingLeaves, nil } - // For existing leaves, we need to retrieve the contents. First collate the desired LeafIdentityHash values - // We deduplicate the hashes to address https://github.com/google/trillian/issues/3603 but will be mapped - // back to the existingLeaves slice below - uniqueLeafMap := make(map[string]struct{}, len(existingLeaves)) - var toRetrieve [][]byte - for _, existing := range existingLeaves { - if existing != nil { - key := string(existing.LeafIdentityHash) - if _, ok := uniqueLeafMap[key]; !ok { - uniqueLeafMap[key] = struct{}{} - toRetrieve = append(toRetrieve, existing.LeafIdentityHash) - } - } - } results, err := t.getLeafDataByIdentityHash(ctx, toRetrieve) if err != nil { return nil, fmt.Errorf("failed to retrieve existing leaves: %v", err) @@ -505,10 +516,6 @@ func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, return nil, fmt.Errorf("failed to find existing leaf for hash %x", requested.LeafIdentityHash) } } - totalDuration := time.Since(start) - readDuration := totalDuration - insertDuration - observe(queueReadLatency, readDuration, label) - observe(queueLatency, totalDuration, label) return existingLeaves, nil } @@ -520,23 +527,10 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L res := make([]*trillian.QueuedLogLeaf, len(leaves)) ok := status.New(codes.OK, "OK").Proto() - // Leaves in this transaction are inserted in two tables. For each leaf, if - // one of the two inserts fails, we remove the side effect by rolling back to - // a savepoint installed before the first insert of the two. - const savepoint = "SAVEPOINT AddSequencedLeaves" - if _, err := t.tx.Exec(ctx, savepoint); err != nil { - klog.Errorf("Error adding savepoint: %s", err) - return nil, postgresqlToGRPC(err) - } - // TODO(pavelkalinnikov): Consider performance implication of executing this - // extra SAVEPOINT, especially for 1-entry batches. Optimize if necessary. - - // Note: LeafData inserts are presumably protected from deadlocks due to - // sorting, but the order of the corresponding SequencedLeafData inserts - // becomes indeterministic. However, in a typical case when leaves are - // supplied in contiguous non-intersecting batches, the chance of having - // circular dependencies between transactions is significantly lower. ordLeaves := sortLeavesForInsert(leaves) + copyRows := make([][]interface{}, 0, len(ordLeaves)) + + // Prepare rows to copy. for _, ol := range ordLeaves { i, leaf := ol.idx, ol.leaf @@ -545,50 +539,65 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L return nil, status.Errorf(codes.FailedPrecondition, "leaves[%d] has incorrect hash size %d, want %d", i, got, want) } - if _, err := t.tx.Exec(ctx, savepoint); err != nil { - klog.Errorf("Error updating savepoint: %s", err) - return nil, postgresqlToGRPC(err) - } - + copyRows = append(copyRows, []interface{}{t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, leaf.MerkleLeafHash, timestamp.UnixNano(), leaf.LeafIndex}) res[i] = &trillian.QueuedLogLeaf{Status: ok} + } - // TODO(pavelkalinnikov): Measure latencies. - _, err := t.tx.Exec(ctx, insertLeafDataSQL, - t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, timestamp.UnixNano()) - // TODO(pavelkalinnikov): Detach PREORDERED_LOG integration latency metric. - - // TODO(pavelkalinnikov): Support opting out from duplicates detection. - if isDuplicateErr(err) { - res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIdentityHash").Proto() - // Note: No rolling back to savepoint because there is no side effect. - continue - } else if err != nil { - klog.Errorf("Error inserting leaves[%d] into LeafData: %s", i, err) - return nil, postgresqlToGRPC(err) - } + // Create temporary table. + _, err := t.tx.Exec(ctx, createTempAddSequencedLeavesTable) + if err != nil { + klog.Warningf("Failed to create tempaddsequencedleaves table: %s", err) + return nil, postgresqlToGRPC(err) + } - _, err = t.tx.Exec(ctx, insertSequencedLeafSQL+valuesPlaceholder5, - t.treeID, leaf.LeafIdentityHash, leaf.MerkleLeafHash, leaf.LeafIndex, 0) - // TODO(pavelkalinnikov): Update IntegrateTimestamp on integrating the leaf. + // Copy rows to temporary table. + _, err = t.tx.CopyFrom( + ctx, + pgx.Identifier{"tempaddsequencedleaves"}, + []string{"treeid", "leafidentityhash", "leafvalue", "extradata", "merkleleafhash", "queuetimestampnanos", "sequencenumber"}, + pgx.CopyFromRows(copyRows), + ) + if err != nil { + klog.Warningf("Failed to copy sequenced leaves: %s", err) + return nil, postgresqlToGRPC(err) + } - if isDuplicateErr(err) { - res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIndex").Proto() - if _, err := t.tx.Exec(ctx, "ROLLBACK TO "+savepoint); err != nil { - klog.Errorf("Error rolling back to savepoint: %s", err) + // Create the leaf data records and sequenced leaf data records, returning details of which records already existed. + if rows, err := t.tx.Query(ctx, addSequencedLeavesSQL); err != nil { + klog.Warningf("Failed to add sequenced leaves: %s", err) + return nil, postgresqlToGRPC(err) + } else { + defer rows.Close() + for rows.Next() { + var leafIdentityHash []byte + var isDuplicateLeafData, isDuplicateSequencedLeafData bool + if err = rows.Scan(&leafIdentityHash, &isDuplicateLeafData, &isDuplicateSequencedLeafData); err != nil { + klog.Warningf("Failed to scan row: %s", err) return nil, postgresqlToGRPC(err) } - } else if err != nil { - klog.Errorf("Error inserting leaves[%d] into SequencedLeafData: %s", i, err) + + for _, ol := range ordLeaves { + i, leaf := ol.idx, ol.leaf + + if bytes.Equal(leaf.LeafIdentityHash, leafIdentityHash) { + if isDuplicateLeafData { + res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIdentityHash").Proto() + } else if isDuplicateSequencedLeafData { + res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIndex").Proto() + } + break + } + } + } + if rows.Err() != nil { + klog.Errorf("Error processing rows: %s", err) return nil, postgresqlToGRPC(err) } - - // TODO(pavelkalinnikov): Load LeafData for conflicting entries. } - if _, err := t.tx.Exec(ctx, "RELEASE "+savepoint); err != nil { - klog.Errorf("Error releasing savepoint: %s", err) - return nil, postgresqlToGRPC(err) - } + // TODO(pavelkalinnikov): Support opting out from duplicates detection. + // TODO(pavelkalinnikov): Update IntegrateTimestamp on integrating the leaf. + // TODO(pavelkalinnikov): Load LeafData for conflicting entries. return res, nil } diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index d15859f16f..b83d37bcdc 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -78,7 +78,7 @@ func (t *logTreeTX) dequeueLeaf(rows pgx.Rows) (*trillian.LogLeaf, dequeuedLeaf, } func queueArgs(_ int64, _ []byte, queueTimestamp time.Time) []interface{} { - return []interface{}{queueTimestamp.UnixNano()} + return []interface{}{queueTimestamp.UnixNano(), nil} } func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf) error { diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 0ba0eb3f2d..5a52fb0e6e 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -1,7 +1,7 @@ -- PostgreSQL version of the tree schema. -- -- Each statement must end with a semicolon, and there must be a blank line before the next statement. --- This will ensure that the testdbpgx tokenizer will handle semicolons in the PL/pgSQL function correctly. +-- This will ensure that the testdbpgx tokenizer will handle semicolons in the PL/pgSQL functions correctly. -- --------------------------------------------- -- Tree stuff here @@ -175,3 +175,62 @@ EXCEPTION RETURN 0; END; $$; + +CREATE OR REPLACE FUNCTION queue_leaves( +) RETURNS SETOF bytea +LANGUAGE plpgsql AS $$ +BEGIN + LOCK TABLE LeafData IN SHARE ROW EXCLUSIVE MODE; + LOCK TABLE Unsequenced IN SHARE ROW EXCLUSIVE MODE; + UPDATE TempQueueLeaves t + SET IsDuplicate = TRUE + FROM LeafData l + WHERE t.TreeId = l.TreeId + AND t.LeafIdentityHash = l.LeafIdentityHash; + INSERT INTO LeafData (TreeId,LeafIdentityHash,LeafValue,ExtraData,QueueTimestampNanos) + SELECT TreeId,LeafIdentityHash,LeafValue,ExtraData,QueueTimestampNanos + FROM TempQueueLeaves + WHERE NOT IsDuplicate; + INSERT INTO Unsequenced (TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID) + SELECT TreeId,0,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID + FROM TempQueueLeaves + WHERE NOT IsDuplicate; + RETURN QUERY SELECT DISTINCT LeafIdentityHash + FROM TempQueueLeaves + WHERE IsDuplicate; + RETURN; +END; +$$; + +CREATE OR REPLACE FUNCTION add_sequenced_leaves( +) RETURNS TABLE(leaf_identity_hash bytea, is_duplicate_leaf_data boolean, is_duplicate_sequenced_leaf_data boolean) +LANGUAGE plpgsql AS $$ +BEGIN + LOCK TABLE LeafData IN SHARE ROW EXCLUSIVE MODE; + LOCK TABLE SequencedLeafData IN SHARE ROW EXCLUSIVE MODE; + UPDATE TempAddSequencedLeaves t + SET IsDuplicateLeafData = TRUE + FROM LeafData l + WHERE t.TreeId = l.TreeId + AND t.LeafIdentityHash = l.LeafIdentityHash; + UPDATE TempAddSequencedLeaves t + SET IsDuplicateSequencedLeafData = TRUE + FROM SequencedLeafData s + WHERE t.TreeId = s.TreeId + AND t.SequenceNumber = s.SequenceNumber; + INSERT INTO LeafData (TreeId,LeafIdentityHash,LeafValue,ExtraData,QueueTimestampNanos) + SELECT TreeId,LeafIdentityHash,LeafValue,ExtraData,QueueTimestampNanos + FROM TempAddSequencedLeaves + WHERE NOT IsDuplicateLeafData + AND NOT IsDuplicateSequencedLeafData; + INSERT INTO SequencedLeafData (TreeId,LeafIdentityHash,MerkleLeafHash,SequenceNumber,IntegrateTimestampNanos) + SELECT TreeId,LeafIdentityHash,MerkleLeafHash,SequenceNumber,0 + FROM TempAddSequencedLeaves + WHERE NOT IsDuplicateLeafData + AND NOT IsDuplicateSequencedLeafData; + RETURN QUERY SELECT LeafIdentityHash, IsDuplicateLeafData, IsDuplicateSequencedLeafData + FROM TempAddSequencedLeaves + ORDER BY LeafIdentityHash; + RETURN; +END; +$$; diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index c7d6280501..0587c3de35 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -48,7 +48,9 @@ const ( "SELECT TreeId,SubtreeId,Nodes,SubtreeRevision " + "FROM TempSubtree " + "ON CONFLICT ON CONSTRAINT TempSubtree_pk DO UPDATE Nodes=EXCLUDED.Nodes" - insertTreeHeadSQL = "INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) VALUES($1,$2,$3,$4,$5,$6)" + insertTreeHeadSQL = "INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) " + + "VALUES($1,$2,$3,$4,$5,$6) " + + "ON CONFLICT DO NOTHING" selectSubtreeSQL = "SELECT x.SubtreeId,s.Nodes " + "FROM (" + From 7a0df8805ecf112b7fece74ce969560323c1ef81 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 8 Oct 2024 10:50:53 +0100 Subject: [PATCH 37/62] Fix conflict handling in multiple subtree insert query --- storage/postgresql/schema/storage.sql | 2 +- storage/postgresql/tree_storage.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 5a52fb0e6e..0e09d419d1 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -57,7 +57,7 @@ CREATE TABLE IF NOT EXISTS Subtree( SubtreeRevision INTEGER NOT NULL, -- Key columns must be in ASC order in order to benefit from group-by/min-max -- optimization in PostgreSQL. - PRIMARY KEY (TreeId, SubtreeId, SubtreeRevision), + CONSTRAINT Subtree_pk PRIMARY KEY (TreeId, SubtreeId, SubtreeRevision), FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE, CHECK (length(SubtreeId) <= 255) ); diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 0587c3de35..9ad35b0d5a 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -47,7 +47,7 @@ const ( insertSubtreeMultiSQL = "INSERT INTO Subtree(TreeId,SubtreeId,Nodes,SubtreeRevision) " + "SELECT TreeId,SubtreeId,Nodes,SubtreeRevision " + "FROM TempSubtree " + - "ON CONFLICT ON CONSTRAINT TempSubtree_pk DO UPDATE Nodes=EXCLUDED.Nodes" + "ON CONFLICT ON CONSTRAINT Subtree_pk DO UPDATE SET Nodes=EXCLUDED.Nodes" insertTreeHeadSQL = "INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) " + "VALUES($1,$2,$3,$4,$5,$6) " + "ON CONFLICT DO NOTHING" From 7ce02582c88846f3abbf7177246064677a65ed06 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 8 Oct 2024 13:09:13 +0100 Subject: [PATCH 38/62] Unmarshal storage settings before beginning transaction in beginTreeTx, to avoid 'go test' hanging due to TestSnapshot:unknownSnapshot --- storage/postgresql/tree_storage.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 9ad35b0d5a..fbc5fb7d95 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -105,17 +105,19 @@ func newTreeStorage(db *pgxpool.Pool) *postgreSQLTreeStorage { } func (m *postgreSQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes int, subtreeCache *cache.SubtreeCache) (treeTX, error) { - t, err := m.db.BeginTx(ctx, pgx.TxOptions{}) - if err != nil { - klog.Warningf("Could not start tree TX: %s", err) - return treeTX{}, err - } var subtreeRevisions bool o := &postgresqlpb.StorageOptions{} if err := anypb.UnmarshalTo(tree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { return treeTX{}, fmt.Errorf("failed to unmarshal StorageSettings: %v", err) } subtreeRevisions = o.SubtreeRevisions + + t, err := m.db.BeginTx(ctx, pgx.TxOptions{}) + if err != nil { + klog.Warningf("Could not start tree TX: %s", err) + return treeTX{}, err + } + return treeTX{ tx: t, mu: &sync.Mutex{}, From 55f06562875cb440ee5f161257700ca3fbb4db3e Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 8 Oct 2024 22:19:16 +0100 Subject: [PATCH 39/62] go get -u github.com/jackc/pgx/v5 --- go.mod | 12 ++++++------ go.sum | 10 ++++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 795f64d6bb..09b19ff66a 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/google/go-licenses/v2 v2.0.0-alpha.1 github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 github.com/jackc/pgerrcode v0.0.0-20240316143900-6e2875d9b438 - github.com/jackc/pgx/v5 v5.5.2 + github.com/jackc/pgx/v5 v5.7.1 github.com/letsencrypt/pkcs11key/v4 v4.0.0 github.com/lib/pq v1.10.9 github.com/prometheus/client_golang v1.20.4 @@ -29,9 +29,9 @@ require ( go.etcd.io/etcd/server/v3 v3.5.16 go.etcd.io/etcd/v3 v3.5.16 go.opencensus.io v0.24.0 - golang.org/x/crypto v0.27.0 + golang.org/x/crypto v0.28.0 golang.org/x/sync v0.8.0 - golang.org/x/sys v0.25.0 + golang.org/x/sys v0.26.0 golang.org/x/tools v0.25.0 google.golang.org/api v0.199.0 google.golang.org/genproto v0.0.0-20240903143218-8af14fe29dc1 @@ -109,10 +109,10 @@ require ( github.com/jackc/pgio v1.0.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgproto3/v2 v2.3.3 // indirect - github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 // indirect + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/pgtype v1.14.3 // indirect github.com/jackc/pgx/v4 v4.18.3 // indirect - github.com/jackc/puddle/v2 v2.2.1 // indirect + github.com/jackc/puddle/v2 v2.2.2 // indirect github.com/jhump/protoreflect v1.16.0 // indirect github.com/jmespath/go-jmespath v0.4.1-0.20220621161143-b0104c826a24 // indirect github.com/jonboulle/clockwork v0.4.0 // indirect @@ -174,7 +174,7 @@ require ( golang.org/x/mod v0.21.0 // indirect golang.org/x/net v0.29.0 // indirect golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/text v0.18.0 // indirect + golang.org/x/text v0.19.0 // indirect golang.org/x/time v0.6.0 // indirect gopkg.in/cheggaaa/pb.v1 v1.0.28 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect diff --git a/go.sum b/go.sum index b469c8eee7..58840a426a 100644 --- a/go.sum +++ b/go.sum @@ -997,6 +997,8 @@ github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b/go.mod h1:vsD4 github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 h1:L0QtFUgDarD7Fpv9jeVMgy/+Ec0mtnmYuImjTz6dtDA= github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= github.com/jackc/pgtype v0.0.0-20190421001408-4ed0de4755e0/go.mod h1:hdSHsc1V01CGwFsrv11mJRHWJ6aifDLfdV3aVjFF0zg= github.com/jackc/pgtype v0.0.0-20190824184912-ab885b375b90/go.mod h1:KcahbBH1nCMSo2DXpzsoWOAfFkdEtEJpPbVLq8eE+mc= github.com/jackc/pgtype v0.0.0-20190828014616-a8802b16cc59/go.mod h1:MWlu30kVJrUS8lot6TQqcg7mtthZ9T0EoIBFiJcmcyw= @@ -1013,6 +1015,8 @@ github.com/jackc/pgx/v4 v4.18.3 h1:dE2/TrEsGX3RBprb3qryqSV9Y60iZN1C6i8IrmW9/BA= github.com/jackc/pgx/v4 v4.18.3/go.mod h1:Ey4Oru5tH5sB6tV7hDmfWFahwF15Eb7DNXlRKx2CkVw= github.com/jackc/pgx/v5 v5.5.2 h1:iLlpgp4Cp/gC9Xuscl7lFL1PhhW+ZLtXZcrfCt4C3tA= github.com/jackc/pgx/v5 v5.5.2/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A= +github.com/jackc/pgx/v5 v5.7.1 h1:x7SYsPBYDkHDksogeSmZZ5xzThcTgRz++I5E+ePFUcs= +github.com/jackc/pgx/v5 v5.7.1/go.mod h1:e7O26IywZZ+naJtWWos6i6fvWK+29etgITqrqHLfoZA= github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= @@ -1020,6 +1024,7 @@ github.com/jackc/puddle v1.3.0 h1:eHK/5clGOatcjX3oWGBO/MpxpbHzSwud5EWTSCI+MX0= github.com/jackc/puddle v1.3.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk= github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/jhump/protoreflect v1.16.0 h1:54fZg+49widqXYQ0b+usAFHbMkBGR4PpXrsHc8+TBDg= github.com/jhump/protoreflect v1.16.0/go.mod h1:oYPd7nPvcBw/5wlDfm/AVmU9zH9BgqGCI469pGxfj/8= github.com/jmespath/go-jmespath v0.4.1-0.20220621161143-b0104c826a24 h1:liMMTbpW34dhU4az1GN0pTPADwNmvoRSeoZ6PItiqnY= @@ -1339,6 +1344,8 @@ golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDf golang.org/x/crypto v0.20.0/go.mod h1:Xwo95rrVNIoSMx9wa1JroENMToLWn3RNVrTBpLHgZPQ= golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -1609,6 +1616,7 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -1639,6 +1647,8 @@ golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From b64463033957b9d3d0985692aac7ff8763bdc00f Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 8 Oct 2024 22:46:52 +0100 Subject: [PATCH 40/62] go get github.com/google/trillian/storage/postgresql/testdbpgx --- go.sum | 2 ++ 1 file changed, 2 insertions(+) diff --git a/go.sum b/go.sum index 58840a426a..d8cb2300ba 100644 --- a/go.sum +++ b/go.sum @@ -1024,6 +1024,7 @@ github.com/jackc/puddle v1.3.0 h1:eHK/5clGOatcjX3oWGBO/MpxpbHzSwud5EWTSCI+MX0= github.com/jackc/puddle v1.3.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk= github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/jhump/protoreflect v1.16.0 h1:54fZg+49widqXYQ0b+usAFHbMkBGR4PpXrsHc8+TBDg= github.com/jhump/protoreflect v1.16.0/go.mod h1:oYPd7nPvcBw/5wlDfm/AVmU9zH9BgqGCI469pGxfj/8= @@ -1616,6 +1617,7 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= From 9ecaa589e5a7dd0f20b97620e5b36212b0031a73 Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Thu, 10 Oct 2024 10:14:57 +0100 Subject: [PATCH 41/62] Deleted all of the revisioned tree support This was a legacy slow read path. Removing this means we can remove the options to support either revisioned or revisionless, as well as the complex query. This change makes it so that the postgres does not support revisioned trees, which is a win. --- storage/postgresql/admin_storage.go | 61 +------ storage/postgresql/admin_storage_test.go | 153 ---------------- storage/postgresql/log_storage.go | 2 +- storage/postgresql/postgresqlpb/gen.go | 18 -- storage/postgresql/postgresqlpb/options.pb.go | 166 ------------------ storage/postgresql/postgresqlpb/options.proto | 27 --- storage/postgresql/sql.go | 29 --- storage/postgresql/storage_test.go | 51 +----- storage/postgresql/tree_storage.go | 49 +----- 9 files changed, 19 insertions(+), 537 deletions(-) delete mode 100644 storage/postgresql/postgresqlpb/gen.go delete mode 100644 storage/postgresql/postgresqlpb/options.pb.go delete mode 100644 storage/postgresql/postgresqlpb/options.proto diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 8def7b27cd..476211ec9d 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -15,23 +15,19 @@ package postgresql import ( - "bytes" "context" "database/sql" - "encoding/gob" "fmt" "sync" "time" "github.com/google/trillian" "github.com/google/trillian/storage" - "github.com/google/trillian/storage/postgresql/postgresqlpb" "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgxpool" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/timestamppb" "k8s.io/klog/v2" ) @@ -168,9 +164,6 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia if err := storage.ValidateTreeForCreation(ctx, tree); err != nil { return nil, err } - if err := validateStorageSettings(tree); err != nil { - return nil, err - } id, err := storage.NewTreeID() if err != nil { @@ -196,39 +189,6 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia } rootDuration := newTree.MaxRootDuration.AsDuration() - // When creating a new tree we automatically add StorageSettings to allow us to - // determine that this tree can support newer storage features. When reading - // trees that do not have this StorageSettings populated, it must be assumed that - // the tree was created with the oldest settings. - // The gist of this code is super simple: create a new StorageSettings with the most - // modern defaults if the created tree does not have one, and then create a struct that - // represents this to store in the DB. Unfortunately because this involves anypb, struct - // copies, marshalling, and proper error handling this turns into a scary amount of code. - if tree.StorageSettings != nil { - newTree.StorageSettings = proto.Clone(tree.StorageSettings).(*anypb.Any) - } else { - o := &postgresqlpb.StorageOptions{ - SubtreeRevisions: false, // Default behaviour for new trees is to skip writing subtree revisions. - } - a, err := anypb.New(o) - if err != nil { - return nil, fmt.Errorf("failed to create new StorageOptions: %v", err) - } - newTree.StorageSettings = a - } - o := &postgresqlpb.StorageOptions{} - if err := anypb.UnmarshalTo(newTree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { - return nil, fmt.Errorf("failed to unmarshal StorageOptions: %v", err) - } - ss := storageSettings{ - Revisioned: o.SubtreeRevisions, - } - buff := &bytes.Buffer{} - enc := gob.NewEncoder(buff) - if err := enc.Encode(ss); err != nil { - return nil, fmt.Errorf("failed to encode storageSettings: %v", err) - } - _, err = t.tx.Exec( ctx, "INSERT INTO Trees(TreeId,TreeState,TreeType,HashStrategy,HashAlgorithm,SignatureAlgorithm,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,PrivateKey,PublicKey,MaxRootDurationMillis) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13)", @@ -242,8 +202,8 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia newTree.Description, nowMillis, nowMillis, - []byte{}, // PrivateKey: Unused, filling in for backward compatibility. - buff.Bytes(), // Using the otherwise unused PublicKey for storing StorageSettings. + []byte{}, // PrivateKey: Unused, filling in for backward compatibility. + []byte{}, // PublicKey: Unused, filling in for backward compatibility. rootDuration/time.Millisecond, ) if err != nil { @@ -276,9 +236,6 @@ func (t *adminTX) UpdateTree(ctx context.Context, treeID int64, updateFunc func( if err := storage.ValidateTreeForUpdate(ctx, beforeUpdate, tree); err != nil { return nil, err } - if err := validateStorageSettings(tree); err != nil { - return nil, err - } // TODO(pavelkalinnikov): When switching TreeType from PREORDERED_LOG to LOG, // ensure all entries in SequencedLeafData are integrated. @@ -287,9 +244,6 @@ func (t *adminTX) UpdateTree(ctx context.Context, treeID int64, updateFunc func( nowMillis := toMillisSinceEpoch(time.Now()) now := fromMillisSinceEpoch(nowMillis) tree.UpdateTime = timestamppb.New(now) - if err != nil { - return nil, fmt.Errorf("failed to build update time: %v", err) - } if err := tree.MaxRootDuration.CheckValid(); err != nil { return nil, fmt.Errorf("could not parse MaxRootDuration: %w", err) } @@ -365,17 +319,6 @@ func validateDeleted(ctx context.Context, tx pgx.Tx, treeID int64, wantDeleted b return nil } -func validateStorageSettings(tree *trillian.Tree) error { - if tree.StorageSettings.MessageIs(&postgresqlpb.StorageOptions{}) { - return nil - } - if tree.StorageSettings == nil { - // No storage settings is OK, we'll just use the defaults for new trees - return nil - } - return fmt.Errorf("storage_settings must be nil or postgresqlpb.StorageOptions, but got %v", tree.StorageSettings) -} - // storageSettings allows us to persist storage settings to the DB. // It is a tempting trap to use protos for this, but the way they encode // makes it impossible to tell the difference between no value ever written diff --git a/storage/postgresql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go index 25791e93f1..0540eaf4b4 100644 --- a/storage/postgresql/admin_storage_test.go +++ b/storage/postgresql/admin_storage_test.go @@ -15,20 +15,16 @@ package postgresql import ( - "bytes" "context" - "encoding/gob" "fmt" "testing" "github.com/google/trillian" "github.com/google/trillian/storage" - "github.com/google/trillian/storage/postgresql/postgresqlpb" "github.com/google/trillian/storage/testonly" "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgxpool" "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/anypb" ) const selectTreeControlByID = "SELECT SigningEnabled,SequencingEnabled,SequenceIntervalSeconds " + @@ -134,155 +130,6 @@ func TestAdminTX_TreeWithNulls(t *testing.T) { } } -func TestAdminTX_StorageSettings(t *testing.T) { - cleanTestDB(DB) - s := NewAdminStorage(DB) - ctx := context.Background() - - badSettings, err := anypb.New(&trillian.Tree{}) - if err != nil { - t.Fatalf("Error marshaling proto: %v", err) - } - goodSettings, err := anypb.New(&postgresqlpb.StorageOptions{}) - if err != nil { - t.Fatalf("Error marshaling proto: %v", err) - } - - tests := []struct { - desc string - // fn attempts to either create or update a tree with a non-nil, valid Any proto - // on Tree.StorageSettings. It's expected to return an error. - fn func(storage.AdminStorage) error - wantErr bool - }{ - { - desc: "CreateTree Bad Settings", - fn: func(s storage.AdminStorage) error { - tree := proto.Clone(testonly.LogTree).(*trillian.Tree) - tree.StorageSettings = badSettings - _, err := storage.CreateTree(ctx, s, tree) - return err - }, - wantErr: true, - }, - { - desc: "CreateTree nil Settings", - fn: func(s storage.AdminStorage) error { - tree := proto.Clone(testonly.LogTree).(*trillian.Tree) - tree.StorageSettings = nil - _, err := storage.CreateTree(ctx, s, tree) - return err - }, - wantErr: false, - }, - { - desc: "CreateTree StorageOptions Settings", - fn: func(s storage.AdminStorage) error { - tree := proto.Clone(testonly.LogTree).(*trillian.Tree) - tree.StorageSettings = goodSettings - _, err := storage.CreateTree(ctx, s, tree) - return err - }, - wantErr: false, - }, - { - desc: "UpdateTree", - fn: func(s storage.AdminStorage) error { - tree, err := storage.CreateTree(ctx, s, testonly.LogTree) - if err != nil { - t.Fatalf("CreateTree() failed with err = %v", err) - } - _, err = storage.UpdateTree(ctx, s, tree.TreeId, func(tree *trillian.Tree) { tree.StorageSettings = badSettings }) - return err - }, - wantErr: true, - }, - } - for _, test := range tests { - if err := test.fn(s); (err != nil) != test.wantErr { - t.Errorf("err: %v, wantErr = %v", err, test.wantErr) - } - } -} - -// Test reading variants of trees that could have been created by old versions -// of Trillian to check we infer the correct storage options. -func TestAdminTX_GetTreeLegacies(t *testing.T) { - cleanTestDB(DB) - s := NewAdminStorage(DB) - ctx := context.Background() - - serializedStorageSettings := func(revisioned bool) []byte { - ss := storageSettings{ - Revisioned: revisioned, - } - buff := &bytes.Buffer{} - enc := gob.NewEncoder(buff) - if err := enc.Encode(ss); err != nil { - t.Fatalf("failed to encode storageSettings: %v", err) - } - return buff.Bytes() - } - tests := []struct { - desc string - key []byte - wantRevisioned bool - }{ - { - desc: "No data", - key: []byte{}, - wantRevisioned: true, - }, - { - desc: "Public key", - key: []byte("trustmethatthisisapublickey"), - wantRevisioned: true, - }, - { - desc: "StorageOptions revisioned", - key: serializedStorageSettings(true), - wantRevisioned: true, - }, - { - desc: "StorageOptions revisionless", - key: serializedStorageSettings(false), - wantRevisioned: false, - }, - } - for _, tC := range tests { - // Create a tree with default settings, and then reach into the DB to override - // whatever was written into the persisted settings to align with the test case. - tree, err := storage.CreateTree(ctx, s, testonly.LogTree) - if err != nil { - t.Fatal(err) - } - // We are reaching really into the internals here, but it's the only way to set up - // archival state. Going through the Create/Update methods will change the storage - // options. - tx, err := s.db.BeginTx(ctx, pgx.TxOptions{}) - if err != nil { - t.Fatal(err) - } - if _, err := tx.Exec(ctx, "UPDATE Trees SET PublicKey=$1 WHERE TreeId=$2", tC.key, tree.TreeId); err != nil { - t.Fatal(err) - } - if err := tx.Commit(ctx); err != nil { - t.Fatal(err) - } - readTree, err := storage.GetTree(ctx, s, tree.TreeId) - if err != nil { - t.Fatal(err) - } - o := &postgresqlpb.StorageOptions{} - if err := anypb.UnmarshalTo(readTree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { - t.Fatal(err) - } - if got, want := o.SubtreeRevisions, tC.wantRevisioned; got != want { - t.Errorf("%s SubtreeRevisions: got %t, wanted %t", tC.desc, got, want) - } - } -} - func TestAdminTX_HardDeleteTree(t *testing.T) { cleanTestDB(DB) s := NewAdminStorage(DB) diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index 8b02034579..abf4101b1e 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -333,7 +333,7 @@ type logTreeTX struct { func (t *logTreeTX) GetMerkleNodes(ctx context.Context, ids []compact.NodeID) ([]tree.Node, error) { t.treeTX.mu.Lock() defer t.treeTX.mu.Unlock() - return t.subtreeCache.GetNodes(ids, t.getSubtreesAtRev(ctx, t.readRev)) + return t.subtreeCache.GetNodes(ids, t.getSubtreesFunc(ctx)) } func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime time.Time) ([]*trillian.LogLeaf, error) { diff --git a/storage/postgresql/postgresqlpb/gen.go b/storage/postgresql/postgresqlpb/gen.go deleted file mode 100644 index 41d1119fb0..0000000000 --- a/storage/postgresql/postgresqlpb/gen.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2023 Google LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package postgresqlpb contains protobuf definitions used by the postgresql implementation. -package postgresqlpb - -//go:generate protoc -I=. --go_out=paths=source_relative:. options.proto diff --git a/storage/postgresql/postgresqlpb/options.pb.go b/storage/postgresql/postgresqlpb/options.pb.go deleted file mode 100644 index 111c119d63..0000000000 --- a/storage/postgresql/postgresqlpb/options.pb.go +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2023 Google LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by protoc-gen-go. DO NOT EDIT. -// versions: -// protoc-gen-go v1.34.2 -// protoc v4.22.3 -// source: options.proto - -package postgresqlpb - -import ( - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" -) - -const ( - // Verify that this generated code is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) - // Verify that runtime/protoimpl is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) -) - -// StorageOptions contains configuration parameters for PostgreSQL implementation -// of the storage backend. This is envisioned only to be used for changes that -// would be breaking, but need to support old behaviour for backwards compatibility. -type StorageOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - // subtreeRevisions being explicitly set to false will skip writing subtree revisions. - // https://github.com/google/trillian/pull/3201 - SubtreeRevisions bool `protobuf:"varint,1,opt,name=subtreeRevisions,proto3" json:"subtreeRevisions,omitempty"` -} - -func (x *StorageOptions) Reset() { - *x = StorageOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_options_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StorageOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StorageOptions) ProtoMessage() {} - -func (x *StorageOptions) ProtoReflect() protoreflect.Message { - mi := &file_options_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StorageOptions.ProtoReflect.Descriptor instead. -func (*StorageOptions) Descriptor() ([]byte, []int) { - return file_options_proto_rawDescGZIP(), []int{0} -} - -func (x *StorageOptions) GetSubtreeRevisions() bool { - if x != nil { - return x.SubtreeRevisions - } - return false -} - -var File_options_proto protoreflect.FileDescriptor - -var file_options_proto_rawDesc = []byte{ - 0x0a, 0x0d, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, - 0x0c, 0x70, 0x6f, 0x73, 0x74, 0x67, 0x72, 0x65, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x22, 0x3c, 0x0a, - 0x0e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, - 0x2a, 0x0a, 0x10, 0x73, 0x75, 0x62, 0x74, 0x72, 0x65, 0x65, 0x52, 0x65, 0x76, 0x69, 0x73, 0x69, - 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x73, 0x75, 0x62, 0x74, 0x72, - 0x65, 0x65, 0x52, 0x65, 0x76, 0x69, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x3c, 0x5a, 0x3a, 0x67, - 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, - 0x2f, 0x74, 0x72, 0x69, 0x6c, 0x6c, 0x69, 0x61, 0x6e, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, - 0x65, 0x2f, 0x70, 0x6f, 0x73, 0x74, 0x67, 0x72, 0x65, 0x73, 0x71, 0x6c, 0x2f, 0x70, 0x6f, 0x73, - 0x74, 0x67, 0x72, 0x65, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x33, -} - -var ( - file_options_proto_rawDescOnce sync.Once - file_options_proto_rawDescData = file_options_proto_rawDesc -) - -func file_options_proto_rawDescGZIP() []byte { - file_options_proto_rawDescOnce.Do(func() { - file_options_proto_rawDescData = protoimpl.X.CompressGZIP(file_options_proto_rawDescData) - }) - return file_options_proto_rawDescData -} - -var file_options_proto_msgTypes = make([]protoimpl.MessageInfo, 1) -var file_options_proto_goTypes = []any{ - (*StorageOptions)(nil), // 0: postgresqlpb.StorageOptions -} -var file_options_proto_depIdxs = []int32{ - 0, // [0:0] is the sub-list for method output_type - 0, // [0:0] is the sub-list for method input_type - 0, // [0:0] is the sub-list for extension type_name - 0, // [0:0] is the sub-list for extension extendee - 0, // [0:0] is the sub-list for field type_name -} - -func init() { file_options_proto_init() } -func file_options_proto_init() { - if File_options_proto != nil { - return - } - if !protoimpl.UnsafeEnabled { - file_options_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*StorageOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } - type x struct{} - out := protoimpl.TypeBuilder{ - File: protoimpl.DescBuilder{ - GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_options_proto_rawDesc, - NumEnums: 0, - NumMessages: 1, - NumExtensions: 0, - NumServices: 0, - }, - GoTypes: file_options_proto_goTypes, - DependencyIndexes: file_options_proto_depIdxs, - MessageInfos: file_options_proto_msgTypes, - }.Build() - File_options_proto = out.File - file_options_proto_rawDesc = nil - file_options_proto_goTypes = nil - file_options_proto_depIdxs = nil -} diff --git a/storage/postgresql/postgresqlpb/options.proto b/storage/postgresql/postgresqlpb/options.proto deleted file mode 100644 index 96c5859d3b..0000000000 --- a/storage/postgresql/postgresqlpb/options.proto +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2023 Google LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; -option go_package = "github.com/google/trillian/storage/postgresql/postgresqlpb"; - -package postgresqlpb; - -// StorageOptions contains configuration parameters for PostgreSQL implementation -// of the storage backend. This is envisioned only to be used for changes that -// would be breaking, but need to support old behaviour for backwards compatibility. -message StorageOptions { - // subtreeRevisions being explicitly set to false will skip writing subtree revisions. - // https://github.com/google/trillian/pull/3201 - bool subtreeRevisions = 1; -} diff --git a/storage/postgresql/sql.go b/storage/postgresql/sql.go index 271f4fd5f0..25ecd33dd9 100644 --- a/storage/postgresql/sql.go +++ b/storage/postgresql/sql.go @@ -15,15 +15,11 @@ package postgresql import ( - "bytes" "database/sql" - "encoding/gob" "fmt" "time" "github.com/google/trillian" - "github.com/google/trillian/storage/postgresql/postgresqlpb" - "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -128,30 +124,5 @@ func readTree(r row) (*trillian.Tree, error) { } } - // We're going to try to interpret PublicKey as storageSettings, but it could be a - // public key from a really old tree, or an empty column from a tree created in the - // period between Trillian key material being removed and this column being used for - // storing settings. - buff := bytes.NewBuffer(publicKey) - dec := gob.NewDecoder(buff) - ss := &storageSettings{} - var o *postgresqlpb.StorageOptions - if err := dec.Decode(ss); err != nil { - // If there are no storageSettings then this tree was created before settings - // were supported, and thus we have to populate the settings with the oldest - // settings for features. - o = &postgresqlpb.StorageOptions{ - SubtreeRevisions: true, - } - } else { - o = &postgresqlpb.StorageOptions{ - SubtreeRevisions: ss.Revisioned, - } - } - tree.StorageSettings, err = anypb.New(o) - if err != nil { - return nil, fmt.Errorf("failed to put StorageSettings into tree: %w", err) - } - return tree, nil } diff --git a/storage/postgresql/storage_test.go b/storage/postgresql/storage_test.go index b370dd995d..b9b484510e 100644 --- a/storage/postgresql/storage_test.go +++ b/storage/postgresql/storage_test.go @@ -24,11 +24,9 @@ import ( "fmt" "os" "testing" - "time" "github.com/google/trillian" "github.com/google/trillian/storage" - "github.com/google/trillian/storage/postgresql/postgresqlpb" testdb "github.com/google/trillian/storage/postgresql/testdbpgx" storageto "github.com/google/trillian/storage/testonly" stree "github.com/google/trillian/storage/tree" @@ -36,36 +34,9 @@ import ( "github.com/jackc/pgx/v5/pgxpool" "github.com/transparency-dev/merkle/compact" "github.com/transparency-dev/merkle/rfc6962" - "google.golang.org/protobuf/types/known/anypb" - "google.golang.org/protobuf/types/known/durationpb" "k8s.io/klog/v2" ) -var ( - // LogTree is a valid, LOG-type trillian.Tree for tests. - // This tree is configured to write revisions for each subtree. - // This matches the legacy behaviour before revisions were removed. - RevisionedLogTree = &trillian.Tree{ - TreeState: trillian.TreeState_ACTIVE, - TreeType: trillian.TreeType_LOG, - DisplayName: "Llamas Log", - Description: "Registry of publicly-owned llamas", - MaxRootDuration: durationpb.New(0 * time.Millisecond), - StorageSettings: mustCreateRevisionedStorage(), - } -) - -func mustCreateRevisionedStorage() *anypb.Any { - o := &postgresqlpb.StorageOptions{ - SubtreeRevisions: true, - } - a, err := anypb.New(o) - if err != nil { - panic(err) - } - return a -} - func TestNodeRoundTrip(t *testing.T) { nodes := createSomeNodes(256) nodeIDs := make([]compact.NodeID, len(nodes)) @@ -100,7 +71,7 @@ func TestNodeRoundTrip(t *testing.T) { if err := tx.SetMerkleNodes(ctx, tc.store); err != nil { t.Fatalf("Failed to store nodes: %s", err) } - return storeLogRoot(ctx, tx, uint64(len(tc.store)), uint64(writeRev), []byte{1, 2, 3}) + return storeLogRoot(ctx, tx, uint64(len(tc.store)), []byte{1, 2, 3}) }) runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { @@ -119,9 +90,6 @@ func TestNodeRoundTrip(t *testing.T) { t.Run(tc.desc+"-norevisions", func(t *testing.T) { testbody(storageto.LogTree) }) - t.Run(tc.desc+"-revisions", func(t *testing.T) { - testbody(RevisionedLogTree) - }) } } @@ -136,10 +104,6 @@ func TestLogNodeRoundTripMultiSubtree(t *testing.T) { desc: "Revisionless", tree: storageto.LogTree, }, - { - desc: "Revisions", - tree: RevisionedLogTree, - }, } for _, tC := range testCases { t.Run(tC.desc, func(t *testing.T) { @@ -151,7 +115,7 @@ func TestLogNodeRoundTripMultiSubtree(t *testing.T) { const writeRev = int64(100) const size = 871 - nodesToStore, err := createLogNodesForTreeAtSize(t, size, writeRev) + nodesToStore, err := createLogNodesForTreeAtSize(t, size) if err != nil { t.Fatalf("failed to create test tree: %v", err) } @@ -166,7 +130,7 @@ func TestLogNodeRoundTripMultiSubtree(t *testing.T) { if err := tx.SetMerkleNodes(ctx, nodesToStore); err != nil { t.Fatalf("Failed to store nodes: %s", err) } - return storeLogRoot(ctx, tx, uint64(size), uint64(writeRev), []byte{1, 2, 3}) + return storeLogRoot(ctx, tx, uint64(size), []byte{1, 2, 3}) }) } @@ -196,7 +160,7 @@ func TestLogNodeRoundTripMultiSubtree(t *testing.T) { func forceWriteRevision(rev int64, tx storage.LogTreeTX) { mtx, ok := tx.(*logTreeTX) if !ok { - panic(nil) + panic(errors.New("uh oh")) } mtx.treeTX.writeRevision = rev } @@ -212,7 +176,8 @@ func createSomeNodes(count int) []stree.Node { return r } -func createLogNodesForTreeAtSize(t *testing.T, ts, rev int64) ([]stree.Node, error) { +func createLogNodesForTreeAtSize(t *testing.T, ts int64) ([]stree.Node, error) { + t.Helper() hasher := rfc6962.New(crypto.SHA256) fact := compact.RangeFactory{Hash: hasher.HashChildren} cr := fact.NewEmptyRange(0) @@ -320,13 +285,13 @@ func getVersion(db *pgxpool.Pool) (string, error) { func mustSignAndStoreLogRoot(ctx context.Context, t *testing.T, l storage.LogStorage, tree *trillian.Tree, treeSize uint64) { t.Helper() if err := l.ReadWriteTransaction(ctx, tree, func(ctx context.Context, tx storage.LogTreeTX) error { - return storeLogRoot(ctx, tx, treeSize, 0, []byte{0}) + return storeLogRoot(ctx, tx, treeSize, []byte{0}) }); err != nil { t.Fatalf("ReadWriteTransaction: %v", err) } } -func storeLogRoot(ctx context.Context, tx storage.LogTreeTX, size, rev uint64, hash []byte) error { +func storeLogRoot(ctx context.Context, tx storage.LogTreeTX, size uint64, hash []byte) error { logRoot, err := (&types.LogRootV1{TreeSize: size, RootHash: hash}).MarshalBinary() if err != nil { return fmt.Errorf("error marshaling new LogRoot: %v", err) diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index fbc5fb7d95..a073b4c3ce 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -24,14 +24,12 @@ import ( "github.com/google/trillian" "github.com/google/trillian/storage/cache" - "github.com/google/trillian/storage/postgresql/postgresqlpb" "github.com/google/trillian/storage/storagepb" "github.com/google/trillian/storage/tree" "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgconn" "github.com/jackc/pgx/v5/pgxpool" "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/anypb" "k8s.io/klog/v2" ) @@ -52,19 +50,7 @@ const ( "VALUES($1,$2,$3,$4,$5,$6) " + "ON CONFLICT DO NOTHING" - selectSubtreeSQL = "SELECT x.SubtreeId,s.Nodes " + - "FROM (" + - "SELECT n.TreeId,n.SubtreeId,max(n.SubtreeRevision) AS MaxRevision " + - "FROM Subtree n " + - "WHERE n.SubtreeId=ANY($1)" + - " AND n.TreeId=$2" + - " AND n.SubtreeRevision<=$3 " + - "GROUP BY n.TreeId,n.SubtreeId" + - ") AS x" + - " INNER JOIN Subtree s ON (x.SubtreeId=s.SubtreeId AND x.MaxRevision=s.SubtreeRevision AND x.TreeId=s.TreeId) " + - "WHERE s.TreeId=$4" - - selectSubtreeSQLNoRev = "SELECT SubtreeId,Nodes " + + selectSubtreeSQL = "SELECT SubtreeId,Nodes " + "FROM Subtree " + "WHERE TreeId=$1" + " AND SubtreeId=ANY($2)" @@ -105,13 +91,6 @@ func newTreeStorage(db *pgxpool.Pool) *postgreSQLTreeStorage { } func (m *postgreSQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes int, subtreeCache *cache.SubtreeCache) (treeTX, error) { - var subtreeRevisions bool - o := &postgresqlpb.StorageOptions{} - if err := anypb.UnmarshalTo(tree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { - return treeTX{}, fmt.Errorf("failed to unmarshal StorageSettings: %v", err) - } - subtreeRevisions = o.SubtreeRevisions - t, err := m.db.BeginTx(ctx, pgx.TxOptions{}) if err != nil { klog.Warningf("Could not start tree TX: %s", err) @@ -127,7 +106,6 @@ func (m *postgreSQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian. hashSizeBytes: hashSizeBytes, subtreeCache: subtreeCache, writeRevision: -1, - subtreeRevs: subtreeRevisions, }, nil } @@ -142,10 +120,9 @@ type treeTX struct { hashSizeBytes int subtreeCache *cache.SubtreeCache writeRevision int64 - subtreeRevs bool } -func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]byte) ([]*storagepb.SubtreeProto, error) { +func (t *treeTX) getSubtrees(ctx context.Context, ids [][]byte) ([]*storagepb.SubtreeProto, error) { klog.V(2).Infof("getSubtrees(len(ids)=%d)", len(ids)) klog.V(4).Infof("getSubtrees(") if len(ids) == 0 { @@ -154,11 +131,7 @@ func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]by var rows pgx.Rows var err error - if t.subtreeRevs { - rows, err = t.tx.Query(ctx, selectSubtreeSQL, ids, t.treeID, treeRevision, t.treeID) - } else { - rows, err = t.tx.Query(ctx, selectSubtreeSQLNoRev, t.treeID, ids) - } + rows, err = t.tx.Query(ctx, selectSubtreeSQL, t.treeID, ids) if err != nil { klog.Warningf("Failed to get merkle subtrees: %s", err) return nil, err @@ -233,13 +206,8 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre // a really large number of subtrees to store. rows := make([][]interface{}, 0, len(subtrees)) - // If not using subtree revisions then default value of 0 is fine. There is no - // significance to this value, other than it cannot be NULL in the DB. + // TODO(mhutchinson): continue deleting this throughout var subtreeRev int64 - if t.subtreeRevs { - // We're using subtree revisions, so ensure we write at the correct revision - subtreeRev = t.writeRevision - } for _, s := range subtrees { s := s if s.Prefix == nil { @@ -311,18 +279,17 @@ func checkResultOkAndCopyCountIs(rowsAffected int64, err error, count int64) err return nil } -// getSubtreesAtRev returns a GetSubtreesFunc which reads at the passed in rev. -func (t *treeTX) getSubtreesAtRev(ctx context.Context, rev int64) cache.GetSubtreesFunc { +// getSubtreesFunc returns a GetSubtreesFunc which reads at the passed in rev. +func (t *treeTX) getSubtreesFunc(ctx context.Context) cache.GetSubtreesFunc { return func(ids [][]byte) ([]*storagepb.SubtreeProto, error) { - return t.getSubtrees(ctx, rev, ids) + return t.getSubtrees(ctx, ids) } } func (t *treeTX) SetMerkleNodes(ctx context.Context, nodes []tree.Node) error { t.mu.Lock() defer t.mu.Unlock() - rev := t.writeRevision - 1 - return t.subtreeCache.SetNodes(nodes, t.getSubtreesAtRev(ctx, rev)) + return t.subtreeCache.SetNodes(nodes, t.getSubtreesFunc(ctx)) } func (t *treeTX) Commit(ctx context.Context) error { From 8e459da6e8e1f7576f494224e8df42d2a9baf59c Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Thu, 10 Oct 2024 10:20:50 +0100 Subject: [PATCH 42/62] Missed one --- storage/postgresql/admin_storage.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 476211ec9d..58c78236c5 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -318,12 +318,3 @@ func validateDeleted(ctx context.Context, tx pgx.Tx, treeID int64, wantDeleted b } return nil } - -// storageSettings allows us to persist storage settings to the DB. -// It is a tempting trap to use protos for this, but the way they encode -// makes it impossible to tell the difference between no value ever written -// and a value that was written with the default values for each field. -// Using an explicit struct and gob encoding allows us to tell the difference. -type storageSettings struct { - Revisioned bool -} From 6698903998afef3fae30a4d2cafaad98f9bdae7d Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Thu, 10 Oct 2024 10:25:40 +0100 Subject: [PATCH 43/62] Disambiguate flag name --- quota/postgresqlqm/quota_provider.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quota/postgresqlqm/quota_provider.go b/quota/postgresqlqm/quota_provider.go index e6d599d728..c85ea5d125 100644 --- a/quota/postgresqlqm/quota_provider.go +++ b/quota/postgresqlqm/quota_provider.go @@ -25,7 +25,7 @@ import ( // QuotaManagerName identifies the PostgreSQL quota implementation. const QuotaManagerName = "postgresql" -var maxUnsequencedRows = flag.Int("max_unsequenced_rows", DefaultMaxUnsequenced, "Max number of unsequenced rows before rate limiting kicks in. "+ +var maxUnsequencedRows = flag.Int("pg_max_unsequenced_rows", DefaultMaxUnsequenced, "Max number of unsequenced rows before rate limiting kicks in. "+ "Only effective for quota_system=postgresql.") func init() { From 2d5c941e55f38dfdb5410e5bdc539f52bc9024d1 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Fri, 25 Oct 2024 22:08:09 +0100 Subject: [PATCH 44/62] Fix integration and unit tests --- .github/workflows/test_pgdb.yaml | 70 +++++++++++++ integration/functions.sh | 5 + scripts/resetpgdb.sh | 114 ++++++++++++++++++++++ storage/postgresql/testdbpgx/testdbpgx.go | 2 +- 4 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/test_pgdb.yaml create mode 100755 scripts/resetpgdb.sh diff --git a/.github/workflows/test_pgdb.yaml b/.github/workflows/test_pgdb.yaml new file mode 100644 index 0000000000..d8b34b6791 --- /dev/null +++ b/.github/workflows/test_pgdb.yaml @@ -0,0 +1,70 @@ +--- +name: Test PostgreSQL +on: + push: + branches: + - master + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + lint: + permissions: + contents: read # for actions/checkout to fetch code + pull-requests: read # for golangci/golangci-lint-action to fetch pull requests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + + - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 + with: + go-version-file: go.mod + check-latest: true + cache: true + + - uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 + with: + version: 'v1.55.1' + args: ./storage/postgresql + + integration-and-unit-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + + - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 + with: + go-version-file: go.mod + check-latest: true + cache: true + + - name: Build before tests + run: go mod download && go build ./... + + - name: Run PostgreSQL + run: docker run --rm -d --name=pgsql -p 5432:5432 -e POSTGRES_HOST_AUTH_METHOD=trust postgres:latest + + - name: Wait for PostgreSQL + uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0 + with: + timeout_seconds: 15 + max_attempts: 3 + retry_on: error + command: docker exec pgsql psql -U postgres -c "SELECT 1" + + - name: Get PostgreSQL logs + run: docker logs pgsql 2>&1 + + - name: Run integration tests + run: ./integration/integration_test.sh + env: + TEST_POSTGRESQL_URI: postgresql:///defaultdb?host=localhost&user=postgres&password=postgres + POSTGRESQL_IN_CONTAINER: true + POSTGRESQL_CONTAINER_NAME: pgsql + + - name: Run unit tests + run: go test -v ./storage/postgresql/... ./quota/postgresqlqm/... + \ No newline at end of file diff --git a/integration/functions.sh b/integration/functions.sh index cd4b90f4b1..ae202b8ec5 100755 --- a/integration/functions.sh +++ b/integration/functions.sh @@ -127,6 +127,8 @@ log_prep_test() { yes | bash "${TRILLIAN_PATH}/scripts/resetdb.sh" elif [[ "${TEST_COCKROACHDB_URI}" != "" ]]; then yes | bash "${TRILLIAN_PATH}/scripts/resetcrdb.sh" + elif [[ "${TEST_POSTGRESQL_URI}" != "" ]]; then + yes | bash "${TRILLIAN_PATH}/scripts/resetpgdb.sh" fi local logserver_opts='' @@ -139,6 +141,9 @@ log_prep_test() { elif [[ "${TEST_COCKROACHDB_URI}" != "" ]]; then logserver_opts+="--quota_system=crdb --storage_system=crdb --crdb_uri=${TEST_COCKROACHDB_URI}" logsigner_opts+="--quota_system=crdb --storage_system=crdb --crdb_uri=${TEST_COCKROACHDB_URI}" + elif [[ "${TEST_POSTGRESQL_URI}" != "" ]]; then + logserver_opts+="--quota_system=postgresql --storage_system=postgresql --postgresql_uri=${TEST_POSTGRESQL_URI}" + logsigner_opts+="--quota_system=postgresql --storage_system=postgresql --postgresql_uri=${TEST_POSTGRESQL_URI}" fi # Start a local etcd instance (if configured). diff --git a/scripts/resetpgdb.sh b/scripts/resetpgdb.sh new file mode 100755 index 0000000000..73c898445c --- /dev/null +++ b/scripts/resetpgdb.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +set -e + +usage() { + cat < /dev/stderr + exit 1 +} + +collect_vars() { + # set unset environment variables to defaults + [ -z ${POSTGRESQL_ROOT_USER+x} ] && POSTGRESQL_ROOT_USER="postgres" + [ -z ${POSTGRESQL_HOST+x} ] && POSTGRESQL_HOST="localhost" + [ -z ${POSTGRESQL_PORT+x} ] && POSTGRESQL_PORT="5432" + [ -z ${POSTGRESQL_DATABASE+x} ] && POSTGRESQL_DATABASE="defaultdb" + [ -z ${POSTGRESQL_USER+x} ] && POSTGRESQL_USER="test" + [ -z ${POSTGRESQL_PASSWORD+x} ] && POSTGRESQL_PASSWORD="zaphod" + [ -z ${POSTGRESQL_USER_HOST+x} ] && POSTGRESQL_USER_HOST="localhost" + [ -z ${POSTGRESQL_INSECURE+x} ] && POSTGRESQL_INSECURE="true" + [ -z ${POSTGRESQL_IN_CONTAINER+x} ] && POSTGRESQL_IN_CONTAINER="false" + [ -z ${POSTGRESQL_CONTAINER_NAME+x} ] && POSTGRESQL_CONTAINER_NAME="pgsql" + FLAGS=() + + # handle flags + FORCE=false + VERBOSE=false + while [[ $# -gt 0 ]]; do + case "$1" in + --force) FORCE=true ;; + --verbose) VERBOSE=true ;; + --help) usage; exit ;; + *) FLAGS+=("$1") + esac + shift 1 + done + + FLAGS+=(-U "${POSTGRESQL_ROOT_USER}") + FLAGS+=(--host "${POSTGRESQL_HOST}") + FLAGS+=(--port "${POSTGRESQL_PORT}") + + # Useful for debugging + FLAGS+=(--echo-all) + + # Optionally print flags (before appending password) + [[ ${VERBOSE} = 'true' ]] && echo "- Using PostgreSQL Flags: ${FLAGS[@]}" + + # append password if supplied + [ -z ${POSTGRESQL_ROOT_PASSWORD+x} ] || FLAGS+=(-p"${POSTGRESQL_ROOT_PASSWORD}") + + if [[ ${POSTGRESQL_IN_CONTAINER} = 'true' ]]; then + CMD="docker exec -i ${POSTGRESQL_CONTAINER_NAME} psql" + else + CMD="psql" + fi +} + +main() { + collect_vars "$@" + + readonly TRILLIAN_PATH=$(go list -f '{{.Dir}}' github.com/google/trillian) + + echo "Warning: about to destroy and reset database '${POSTGRESQL_DATABASE}'" + + [[ ${FORCE} = true ]] || read -p "Are you sure? [Y/N]: " -n 1 -r + echo # Print newline following the above prompt + + if [ -z ${REPLY+x} ] || [[ $REPLY =~ ^[Yy]$ ]] + then + echo "Resetting DB..." + set -eux + $CMD "${FLAGS[@]}" -c "DROP DATABASE IF EXISTS ${POSTGRESQL_DATABASE};" || \ + die "Error: Failed to drop database '${POSTGRESQL_DATABASE}'." + $CMD "${FLAGS[@]}" -c "CREATE DATABASE ${POSTGRESQL_DATABASE};" || \ + die "Error: Failed to create database '${POSTGRESQL_DATABASE}'." + if [[ ${POSTGRESQL_INSECURE} = 'true' ]]; then + $CMD "${FLAGS[@]}" -c "CREATE USER ${POSTGRESQL_USER};" || \ + die "Error: Failed to create user '${POSTGRESQL_USER}'." + else + $CMD "${FLAGS[@]}" -c "CREATE USER ${POSTGRESQL_USER} WITH PASSWORD '${POSTGRESQL_PASSWORD}';" || \ + die "Error: Failed to create user '${POSTGRESQL_USER}'." + fi + $CMD "${FLAGS[@]}" -c "GRANT ALL PRIVILEGES ON DATABASE ${POSTGRESQL_DATABASE} TO ${POSTGRESQL_USER} WITH GRANT OPTION" || \ + die "Error: Failed to grant '${POSTGRESQL_USER}' user all privileges on '${POSTGRESQL_DATABASE}'." + $CMD "${FLAGS[@]}" -d ${POSTGRESQL_DATABASE} < ${TRILLIAN_PATH}/storage/postgresql/schema/storage.sql || \ + die "Error: Failed to create tables in '${POSTGRESQL_DATABASE}' database." + echo "Reset Complete" + fi +} + +main "$@" diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index bcaf6b5983..bd42b28676 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -36,7 +36,7 @@ const ( // instance URI to use. The value must have a trailing slash. PostgreSQLURIEnv = "TEST_POSTGRESQL_URI" - defaultTestPostgreSQLURI = "postgresql:///template1?host=localhost&user=postgres&password=postgres" + defaultTestPostgreSQLURI = "postgresql:///defaultdb?host=localhost&user=postgres&password=postgres" ) type storageDriverInfo struct { From d1a21e18a627a8683177a70df16e0714083af011 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Fri, 25 Oct 2024 22:42:19 +0100 Subject: [PATCH 45/62] Enable the BigBatch tests --- storage/postgresql/log_storage_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/storage/postgresql/log_storage_test.go b/storage/postgresql/log_storage_test.go index 94bce0845c..43f8abc19d 100644 --- a/storage/postgresql/log_storage_test.go +++ b/storage/postgresql/log_storage_test.go @@ -229,7 +229,6 @@ func TestQueueLeaves(t *testing.T) { } func TestQueueLeavesDuplicateBigBatch(t *testing.T) { - t.Skip("Known Issue: https://github.com/google/trillian/issues/1845") ctx := context.Background() cleanTestDB(DB) @@ -450,7 +449,6 @@ func TestGetLeavesByHash(t *testing.T) { } func TestGetLeavesByHashBigBatch(t *testing.T) { - t.Skip("Known Issue: https://github.com/google/trillian/issues/1845") ctx := context.Background() // Create fake leaf as if it had been sequenced From 0eac2ab06754623f417e4b9f2624907a0ffb9c6b Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Mon, 28 Oct 2024 19:36:44 +0000 Subject: [PATCH 46/62] Hopefully fix trillian-pr-tests (trillian-opensource-ci) checks --- .../postgresql/testdbpgx/testdbpgx_test.go | 27 ------------------- 1 file changed, 27 deletions(-) delete mode 100644 storage/postgresql/testdbpgx/testdbpgx_test.go diff --git a/storage/postgresql/testdbpgx/testdbpgx_test.go b/storage/postgresql/testdbpgx/testdbpgx_test.go deleted file mode 100644 index 6cd0eea8be..0000000000 --- a/storage/postgresql/testdbpgx/testdbpgx_test.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2018 Google LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testdbpgx - -import ( - "testing" - - _ "k8s.io/klog/v2" -) - -func TestPostgreSQLWarning(t *testing.T) { - if !PostgreSQLAvailable() { - t.Error("Deliberate test failure as a reminder that all storage-related tests are being skipped due to absent PostgreSQL") - } -} From 965aa919f14f577ad1608d68e739ac562b7a4ea6 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 29 Oct 2024 13:45:34 +0000 Subject: [PATCH 47/62] No need to sortLeavesForInsert --- storage/postgresql/log_storage.go | 105 +++++++------------------- storage/postgresql/schema/storage.sql | 3 +- 2 files changed, 29 insertions(+), 79 deletions(-) diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index abf4101b1e..1c73510dbe 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -18,9 +18,9 @@ import ( "bytes" "context" "database/sql" + "encoding/hex" "errors" "fmt" - "sort" "strconv" "sync" "time" @@ -392,25 +392,14 @@ func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime tim return leaves, nil } -// sortLeavesForInsert returns a slice containing the passed in leaves sorted -// by LeafIdentityHash, and paired with their original positions. -// QueueLeaves and AddSequencedLeaves use this to make the order that LeafData -// row locks are acquired deterministic and reduce the chance of deadlocks. -func sortLeavesForInsert(leaves []*trillian.LogLeaf) []leafAndPosition { - ordLeaves := make([]leafAndPosition, len(leaves)) - for i, leaf := range leaves { - ordLeaves[i] = leafAndPosition{leaf: leaf, idx: i} - } - sort.Sort(byLeafIdentityHashWithPosition(ordLeaves)) - return ordLeaves -} - func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, queueTimestamp time.Time) ([]*trillian.LogLeaf, error) { t.treeTX.mu.Lock() defer t.treeTX.mu.Unlock() - // Don't accept batches if any of the leaves are invalid. - for _, leaf := range leaves { + // Prepare rows to copy, but don't accept batches if any of the leaves are invalid. + leafMap := make(map[string]int) + copyRows := make([][]interface{}, 0, len(leaves)) + for i, leaf := range leaves { if len(leaf.LeafIdentityHash) != t.hashSizeBytes { return nil, fmt.Errorf("queued leaf must have a leaf ID hash of length %d", t.hashSizeBytes) } @@ -418,25 +407,12 @@ func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, if err := leaf.QueueTimestamp.CheckValid(); err != nil { return nil, fmt.Errorf("got invalid queue timestamp: %w", err) } - } - label := labelForTX(t) - - ordLeaves := sortLeavesForInsert(leaves) - existingCount := 0 - existingLeaves := make([]*trillian.LogLeaf, len(leaves)) - copyRows := make([][]interface{}, 0, len(ordLeaves)) - - // Prepare rows to copy. - for _, ol := range ordLeaves { - leaf := ol.leaf - - if err := leaf.QueueTimestamp.CheckValid(); err != nil { - return nil, fmt.Errorf("got invalid queue timestamp: %w", err) - } qTimestamp := leaf.QueueTimestamp.AsTime() args := queueArgs(t.treeID, leaf.LeafIdentityHash, qTimestamp) copyRows = append(copyRows, []interface{}{t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, leaf.MerkleLeafHash, args[0], args[1]}) + leafMap[hex.EncodeToString(leaf.LeafIdentityHash)] = i } + label := labelForTX(t) // Create temporary table. _, err := t.tx.Exec(ctx, createTempQueueLeavesTable) @@ -458,6 +434,8 @@ func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, } // Create the leaf data records, work queue entries, and obtain a deduplicated list of existing leaves. + existingCount := 0 + existingLeaves := make([]*trillian.LogLeaf, len(leaves)) var toRetrieve [][]byte var leafIdentityHash []byte if rows, err := t.tx.Query(ctx, queueLeavesSQL); err != nil { @@ -471,13 +449,14 @@ func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, return nil, postgresqlToGRPC(err) } - for i, leaf := range leaves { - if bytes.Equal(leaf.LeafIdentityHash, leafIdentityHash) { - // Remember the duplicate leaf, using the requested leaf for now. - existingLeaves[i] = leaf - existingCount++ - queuedDupCounter.Inc(label) - } + if i, ok := leafMap[hex.EncodeToString(leafIdentityHash)]; !ok { + klog.Warningf("Unexpected leafIdentityHash: %s", hex.EncodeToString(leafIdentityHash)) + return nil, postgresqlToGRPC(err) + } else { + // Remember the duplicate leaf, using the requested leaf for now. + existingLeaves[i] = leaves[i] + existingCount++ + queuedDupCounter.Inc(label) } toRetrieve = append(toRetrieve, leafIdentityHash) } @@ -527,19 +506,17 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L res := make([]*trillian.QueuedLogLeaf, len(leaves)) ok := status.New(codes.OK, "OK").Proto() - ordLeaves := sortLeavesForInsert(leaves) - copyRows := make([][]interface{}, 0, len(ordLeaves)) - // Prepare rows to copy. - for _, ol := range ordLeaves { - i, leaf := ol.idx, ol.leaf - + leafMap := make(map[string]int) + copyRows := make([][]interface{}, 0, len(leaves)) + for i, leaf := range leaves { // This should fail on insert, but catch it early. if got, want := len(leaf.LeafIdentityHash), t.hashSizeBytes; got != want { return nil, status.Errorf(codes.FailedPrecondition, "leaves[%d] has incorrect hash size %d, want %d", i, got, want) } copyRows = append(copyRows, []interface{}{t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, leaf.MerkleLeafHash, timestamp.UnixNano(), leaf.LeafIndex}) + leafMap[hex.EncodeToString(leaf.LeafIdentityHash)] = i res[i] = &trillian.QueuedLogLeaf{Status: ok} } @@ -576,17 +553,13 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L return nil, postgresqlToGRPC(err) } - for _, ol := range ordLeaves { - i, leaf := ol.idx, ol.leaf - - if bytes.Equal(leaf.LeafIdentityHash, leafIdentityHash) { - if isDuplicateLeafData { - res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIdentityHash").Proto() - } else if isDuplicateSequencedLeafData { - res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIndex").Proto() - } - break - } + if i, ok := leafMap[hex.EncodeToString(leafIdentityHash)]; !ok { + klog.Warningf("Unexpected leafIdentityHash: %s", hex.EncodeToString(leafIdentityHash)) + return nil, postgresqlToGRPC(err) + } else if isDuplicateLeafData { + res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIdentityHash").Proto() + } else if isDuplicateSequencedLeafData { + res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIndex").Proto() } } if rows.Err() != nil { @@ -820,25 +793,3 @@ func (t *logTreeTX) getLeavesByHashInternal(ctx context.Context, leafHashes [][] return ret, nil } - -// leafAndPosition records original position before sort. -type leafAndPosition struct { - leaf *trillian.LogLeaf - idx int -} - -// byLeafIdentityHashWithPosition allows sorting (as above), but where we need -// to remember the original position -type byLeafIdentityHashWithPosition []leafAndPosition - -func (l byLeafIdentityHashWithPosition) Len() int { - return len(l) -} - -func (l byLeafIdentityHashWithPosition) Swap(i, j int) { - l[i], l[j] = l[j], l[i] -} - -func (l byLeafIdentityHashWithPosition) Less(i, j int) bool { - return bytes.Compare(l[i].leaf.LeafIdentityHash, l[j].leaf.LeafIdentityHash) == -1 -} diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 0e09d419d1..5d56a048e7 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -229,8 +229,7 @@ BEGIN WHERE NOT IsDuplicateLeafData AND NOT IsDuplicateSequencedLeafData; RETURN QUERY SELECT LeafIdentityHash, IsDuplicateLeafData, IsDuplicateSequencedLeafData - FROM TempAddSequencedLeaves - ORDER BY LeafIdentityHash; + FROM TempAddSequencedLeaves; RETURN; END; $$; From 651b3b58fc362c6c10cb28a04e94bc1563e0f81c Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Wed, 30 Oct 2024 22:00:01 +0000 Subject: [PATCH 48/62] Update copyright details --- AUTHORS | 1 + CONTRIBUTORS | 1 + quota/postgresqlqm/postgresql_quota.go | 2 +- quota/postgresqlqm/postgresql_quota_test.go | 2 +- quota/postgresqlqm/quota_provider.go | 2 +- storage/postgresql/admin_storage.go | 2 +- storage/postgresql/admin_storage_test.go | 2 +- storage/postgresql/errors.go | 2 +- storage/postgresql/log_storage.go | 2 +- storage/postgresql/log_storage_test.go | 2 +- storage/postgresql/provider.go | 2 +- storage/postgresql/provider_test.go | 2 +- storage/postgresql/queue.go | 2 +- storage/postgresql/queue_batching.go | 2 +- storage/postgresql/sql.go | 2 +- storage/postgresql/storage_test.go | 2 +- storage/postgresql/testdbpgx/testdbpgx.go | 2 +- storage/postgresql/tree_storage.go | 2 +- 18 files changed, 18 insertions(+), 16 deletions(-) diff --git a/AUTHORS b/AUTHORS index f45549fade..a0ae0d7976 100644 --- a/AUTHORS +++ b/AUTHORS @@ -11,4 +11,5 @@ Antonio Marcedone Google LLC Internet Security Research Group +Sectigo Limited Vishal Kuo diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 680a58b981..353c7b2a21 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -35,5 +35,6 @@ Paul Hadfield Pavel Kalinnikov Pierre Phaneuf Rob Percival +Rob Stradling Roger Ng Vishal Kuo diff --git a/quota/postgresqlqm/postgresql_quota.go b/quota/postgresqlqm/postgresql_quota.go index 87a65d51f4..76ad25062a 100644 --- a/quota/postgresqlqm/postgresql_quota.go +++ b/quota/postgresqlqm/postgresql_quota.go @@ -1,4 +1,4 @@ -// Copyright 2017 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/quota/postgresqlqm/postgresql_quota_test.go b/quota/postgresqlqm/postgresql_quota_test.go index 7b3bbd2665..fc592b0d5f 100644 --- a/quota/postgresqlqm/postgresql_quota_test.go +++ b/quota/postgresqlqm/postgresql_quota_test.go @@ -1,4 +1,4 @@ -// Copyright 2017 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/quota/postgresqlqm/quota_provider.go b/quota/postgresqlqm/quota_provider.go index c85ea5d125..cff23a9ace 100644 --- a/quota/postgresqlqm/quota_provider.go +++ b/quota/postgresqlqm/quota_provider.go @@ -1,4 +1,4 @@ -// Copyright 2018 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 58c78236c5..e36f609652 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -1,4 +1,4 @@ -// Copyright 2017 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/admin_storage_test.go b/storage/postgresql/admin_storage_test.go index 0540eaf4b4..c1c5a9adc3 100644 --- a/storage/postgresql/admin_storage_test.go +++ b/storage/postgresql/admin_storage_test.go @@ -1,4 +1,4 @@ -// Copyright 2017 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/errors.go b/storage/postgresql/errors.go index 785c913deb..51ea4130b0 100644 --- a/storage/postgresql/errors.go +++ b/storage/postgresql/errors.go @@ -1,4 +1,4 @@ -// Copyright 2021 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index 1c73510dbe..b2002a9c91 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -1,4 +1,4 @@ -// Copyright 2016 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/log_storage_test.go b/storage/postgresql/log_storage_test.go index 43f8abc19d..fa217ddab5 100644 --- a/storage/postgresql/log_storage_test.go +++ b/storage/postgresql/log_storage_test.go @@ -1,4 +1,4 @@ -// Copyright 2016 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/provider.go b/storage/postgresql/provider.go index 84cdcd820e..1f3052129f 100644 --- a/storage/postgresql/provider.go +++ b/storage/postgresql/provider.go @@ -1,4 +1,4 @@ -// Copyright 2018 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/provider_test.go b/storage/postgresql/provider_test.go index 15552437b0..92d85ba212 100644 --- a/storage/postgresql/provider_test.go +++ b/storage/postgresql/provider_test.go @@ -1,4 +1,4 @@ -// Copyright 2018 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index b83d37bcdc..349b96c62f 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -1,7 +1,7 @@ //go:build !batched_queue // +build !batched_queue -// Copyright 2017 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/queue_batching.go b/storage/postgresql/queue_batching.go index 49232c29e6..909534546b 100644 --- a/storage/postgresql/queue_batching.go +++ b/storage/postgresql/queue_batching.go @@ -1,7 +1,7 @@ //go:build batched_queue // +build batched_queue -// Copyright 2017 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/sql.go b/storage/postgresql/sql.go index 25ecd33dd9..2714786a3b 100644 --- a/storage/postgresql/sql.go +++ b/storage/postgresql/sql.go @@ -1,4 +1,4 @@ -// Copyright 2018 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/storage_test.go b/storage/postgresql/storage_test.go index b9b484510e..6af115c1c8 100644 --- a/storage/postgresql/storage_test.go +++ b/storage/postgresql/storage_test.go @@ -1,4 +1,4 @@ -// Copyright 2016 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/testdbpgx/testdbpgx.go b/storage/postgresql/testdbpgx/testdbpgx.go index bd42b28676..8a38a453ff 100644 --- a/storage/postgresql/testdbpgx/testdbpgx.go +++ b/storage/postgresql/testdbpgx/testdbpgx.go @@ -1,4 +1,4 @@ -// Copyright 2017 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index a073b4c3ce..8af54451c1 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -1,4 +1,4 @@ -// Copyright 2016 Google LLC. All Rights Reserved. +// Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. From 72964d20a0db33b1be5715638a2de4b688522923 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Wed, 30 Oct 2024 22:57:29 +0000 Subject: [PATCH 49/62] Remove vestigial Trees.PrivateKey and Trees.PublicKey fields --- storage/postgresql/admin_storage.go | 16 +++++----------- storage/postgresql/schema/storage.sql | 2 -- storage/postgresql/sql.go | 3 --- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index e36f609652..4a2b8f0eb2 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -35,14 +35,14 @@ import ( const ( defaultSequenceIntervalSeconds = 60 - selectTrees = "SELECT TreeId,TreeState,TreeType,HashStrategy,HashAlgorithm,SignatureAlgorithm,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,PrivateKey,PublicKey,MaxRootDurationMillis,Deleted,DeleteTimeMillis " + - "FROM Trees" // PrivateKey is unused; PublicKey is used to store StorageSettings. + selectTrees = "SELECT TreeId,TreeState,TreeType,HashStrategy,HashAlgorithm,SignatureAlgorithm,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,MaxRootDurationMillis,Deleted,DeleteTimeMillis " + + "FROM Trees" selectNonDeletedTrees = selectTrees + " WHERE (Deleted IS NULL OR Deleted='false')" selectTreeByID = selectTrees + " WHERE TreeId=$1" updateTreeSQL = "UPDATE Trees " + - "SET TreeState=$1,TreeType=$2,DisplayName=$3,Description=$4,UpdateTimeMillis=$5,MaxRootDurationMillis=$6,PrivateKey=$7 " + - "WHERE TreeId=$8" + "SET TreeState=$1,TreeType=$2,DisplayName=$3,Description=$4,UpdateTimeMillis=$5,MaxRootDurationMillis=$6 " + + "WHERE TreeId=$7" ) // NewAdminStorage returns a PostgreSQL storage.AdminStorage implementation backed by DB. @@ -191,7 +191,7 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia _, err = t.tx.Exec( ctx, - "INSERT INTO Trees(TreeId,TreeState,TreeType,HashStrategy,HashAlgorithm,SignatureAlgorithm,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,PrivateKey,PublicKey,MaxRootDurationMillis) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13)", + "INSERT INTO Trees(TreeId,TreeState,TreeType,HashStrategy,HashAlgorithm,SignatureAlgorithm,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,MaxRootDurationMillis) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)", newTree.TreeId, newTree.TreeState.String(), newTree.TreeType.String(), @@ -202,8 +202,6 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia newTree.Description, nowMillis, nowMillis, - []byte{}, // PrivateKey: Unused, filling in for backward compatibility. - []byte{}, // PublicKey: Unused, filling in for backward compatibility. rootDuration/time.Millisecond, ) if err != nil { @@ -258,10 +256,6 @@ func (t *adminTX) UpdateTree(ctx context.Context, treeID int64, updateFunc func( tree.Description, nowMillis, rootDuration/time.Millisecond, - []byte{}, // PrivateKey: Unused, filling in for backward compatibility. - // PublicKey should not be updated with any storageSettings here without - // a lot of thought put into it. At the moment storageSettings are inferred - // when reading the tree, even if no value is stored in the database. tree.TreeId); err != nil { return nil, err } diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 5d56a048e7..9ae52eeb43 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -32,8 +32,6 @@ CREATE TABLE IF NOT EXISTS Trees( CreateTimeMillis BIGINT NOT NULL, UpdateTimeMillis BIGINT NOT NULL, MaxRootDurationMillis BIGINT NOT NULL, - PrivateKey BYTEA NOT NULL, -- Unused. - PublicKey BYTEA NOT NULL, -- This is now used to store settings. Deleted BOOLEAN, DeleteTimeMillis BIGINT, PRIMARY KEY(TreeId) diff --git a/storage/postgresql/sql.go b/storage/postgresql/sql.go index 2714786a3b..e602aebeef 100644 --- a/storage/postgresql/sql.go +++ b/storage/postgresql/sql.go @@ -54,7 +54,6 @@ func readTree(r row) (*trillian.Tree, error) { var treeState, treeType, hashStrategy, hashAlgorithm, signatureAlgorithm string var createMillis, updateMillis, maxRootDurationMillis int64 var displayName, description sql.NullString - var privateKey, publicKey []byte var deleted sql.NullBool var deleteMillis sql.NullInt64 err := r.Scan( @@ -68,8 +67,6 @@ func readTree(r row) (*trillian.Tree, error) { &description, &createMillis, &updateMillis, - &privateKey, - &publicKey, &maxRootDurationMillis, &deleted, &deleteMillis, From 917f362c70edeb42a671b1efee99717eb82512ff Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Thu, 31 Oct 2024 09:55:06 +0000 Subject: [PATCH 50/62] Remove vestigial Trees.HashStrategy, Trees.HashAlgorithm, and Trees.SignatureAlgorithm fields --- storage/postgresql/admin_storage.go | 7 ++----- storage/postgresql/drop_storage.sql | 3 --- storage/postgresql/schema/storage.sql | 9 --------- storage/postgresql/sql.go | 12 +++--------- 4 files changed, 5 insertions(+), 26 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index 4a2b8f0eb2..b7b42aaa64 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -35,7 +35,7 @@ import ( const ( defaultSequenceIntervalSeconds = 60 - selectTrees = "SELECT TreeId,TreeState,TreeType,HashStrategy,HashAlgorithm,SignatureAlgorithm,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,MaxRootDurationMillis,Deleted,DeleteTimeMillis " + + selectTrees = "SELECT TreeId,TreeState,TreeType,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,MaxRootDurationMillis,Deleted,DeleteTimeMillis " + "FROM Trees" selectNonDeletedTrees = selectTrees + " WHERE (Deleted IS NULL OR Deleted='false')" selectTreeByID = selectTrees + " WHERE TreeId=$1" @@ -191,13 +191,10 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia _, err = t.tx.Exec( ctx, - "INSERT INTO Trees(TreeId,TreeState,TreeType,HashStrategy,HashAlgorithm,SignatureAlgorithm,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,MaxRootDurationMillis) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)", + "INSERT INTO Trees(TreeId,TreeState,TreeType,DisplayName,Description,CreateTimeMillis,UpdateTimeMillis,MaxRootDurationMillis) VALUES($1,$2,$3,$4,$5,$6,$7,$8)", newTree.TreeId, newTree.TreeState.String(), newTree.TreeType.String(), - "RFC6962_SHA256", // Unused, filling in for backward compatibility. - "SHA256", // Unused, filling in for backward compatibility. - "ECDSA", // Unused, filling in for backward compatibility. newTree.DisplayName, newTree.Description, nowMillis, diff --git a/storage/postgresql/drop_storage.sql b/storage/postgresql/drop_storage.sql index 5f3beda0a0..013eb476f8 100644 --- a/storage/postgresql/drop_storage.sql +++ b/storage/postgresql/drop_storage.sql @@ -10,8 +10,5 @@ DROP TABLE IF EXISTS LeafData; DROP TABLE IF EXISTS TreeControl; DROP TABLE IF EXISTS Trees; -DROP TYPE IF EXISTS SignatureAlgorithm; -DROP TYPE IF EXISTS HashAlgorithm; -DROP TYPE IF EXISTS HashStrategy; DROP TYPE IF EXISTS TreeType; DROP TYPE IF EXISTS TreeState; diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 9ae52eeb43..096448c968 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -13,20 +13,11 @@ CREATE TYPE TreeState AS ENUM ('ACTIVE', 'FROZEN', 'DRAINING'); CREATE TYPE TreeType AS ENUM ('LOG', 'MAP', 'PREORDERED_LOG'); -CREATE TYPE HashStrategy AS ENUM ('RFC6962_SHA256', 'TEST_MAP_HASHER', 'OBJECT_RFC6962_SHA256', 'CONIKS_SHA512_256', 'CONIKS_SHA256'); - -CREATE TYPE HashAlgorithm AS ENUM ('SHA256'); - -CREATE TYPE SignatureAlgorithm AS ENUM ('ECDSA', 'RSA', 'ED25519'); - CREATE TABLE IF NOT EXISTS Trees( TreeId BIGINT NOT NULL, TreeState TreeState NOT NULL, TreeType TreeType NOT NULL, - HashStrategy HashStrategy NOT NULL, - HashAlgorithm HashAlgorithm NOT NULL, - SignatureAlgorithm SignatureAlgorithm NOT NULL, DisplayName VARCHAR(20), Description VARCHAR(200), CreateTimeMillis BIGINT NOT NULL, diff --git a/storage/postgresql/sql.go b/storage/postgresql/sql.go index e602aebeef..281d06b404 100644 --- a/storage/postgresql/sql.go +++ b/storage/postgresql/sql.go @@ -51,7 +51,7 @@ func readTree(r row) (*trillian.Tree, error) { tree := &trillian.Tree{} // Enums and Datetimes need an extra conversion step - var treeState, treeType, hashStrategy, hashAlgorithm, signatureAlgorithm string + var treeState, treeType string var createMillis, updateMillis, maxRootDurationMillis int64 var displayName, description sql.NullString var deleted sql.NullBool @@ -60,9 +60,6 @@ func readTree(r row) (*trillian.Tree, error) { &tree.TreeId, &treeState, &treeType, - &hashStrategy, - &hashAlgorithm, - &signatureAlgorithm, &displayName, &description, &createMillis, @@ -89,18 +86,15 @@ func readTree(r row) (*trillian.Tree, error) { } else { return nil, fmt.Errorf("unknown TreeType: %v", treeType) } - if hashStrategy != "RFC6962_SHA256" { - return nil, fmt.Errorf("unknown HashStrategy: %v", hashStrategy) - } // Let's make sure we didn't mismatch any of the casts above ok := tree.TreeState.String() == treeState && tree.TreeType.String() == treeType if !ok { return nil, fmt.Errorf( - "mismatched enum: tree = %v, enums = [%v, %v, %v, %v, %v]", + "mismatched enum: tree = %v, enums = [%v, %v]", tree, - treeState, treeType, hashStrategy, hashAlgorithm, signatureAlgorithm) + treeState, treeType) } tree.CreateTime = timestamppb.New(fromMillisSinceEpoch(createMillis)) From dd851398e87e4a28033aa8fa496e88e716a494c7 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Thu, 31 Oct 2024 10:21:11 +0000 Subject: [PATCH 51/62] Add missing DROP FUNCTIONs. --- storage/postgresql/drop_storage.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/storage/postgresql/drop_storage.sql b/storage/postgresql/drop_storage.sql index 013eb476f8..16cda524f1 100644 --- a/storage/postgresql/drop_storage.sql +++ b/storage/postgresql/drop_storage.sql @@ -1,6 +1,8 @@ -- Caution - this removes all tables in our schema DROP FUNCTION IF EXISTS count_estimate; +DROP FUNCTION IF EXISTS queue_leaves; +DROP FUNCTION IF EXISTS add_sequenced_leaves; DROP TABLE IF EXISTS Unsequenced; DROP TABLE IF EXISTS Subtree; From 32096c41d0f15b9a71fe3b96e6d93aacc7b55bcd Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 5 Nov 2024 15:44:18 +0000 Subject: [PATCH 52/62] Call out the caveats in the quota manager more explicitly and up-front --- quota/postgresqlqm/postgresql_quota.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/quota/postgresqlqm/postgresql_quota.go b/quota/postgresqlqm/postgresql_quota.go index 76ad25062a..d98e62e690 100644 --- a/quota/postgresqlqm/postgresql_quota.go +++ b/quota/postgresqlqm/postgresql_quota.go @@ -38,6 +38,12 @@ var ErrTooManyUnsequencedRows = errors.New("too many unsequenced rows") // QuotaManager is a PostgreSQL-based quota.Manager implementation. // +// QuotaManager only implements Global/Write quotas, which is based on the number of Unsequenced +// rows (to be exact, tokens = MaxUnsequencedRows - actualUnsequencedRows). +// Other quotas are considered infinite. In other words, it attempts to protect the MMD SLO of all +// logs in the instance, but it does not make any attempt to ensure fairness, whether per-tree, +// per-intermediate-CA (in the case of Certificate Transparency), or any other dimension. +// // It has two working modes: one estimates the number of Unsequenced rows by collecting information // from EXPLAIN output; the other does a select count(*) on the Unsequenced table. Estimates are // default, even though they are approximate, as they're constant time (select count(*) on @@ -47,10 +53,6 @@ var ErrTooManyUnsequencedRows = errors.New("too many unsequenced rows") // the table (this is a cheap operation, not requiring a table scan). If that is different from // relpages then reltuples is scaled accordingly to arrive at a current number-of-rows estimate." // (quoting https://www.postgresql.org/docs/current/row-estimation-examples.html) -// -// QuotaManager only implements Global/Write quotas, which is based on the number of Unsequenced -// rows (to be exact, tokens = MaxUnsequencedRows - actualUnsequencedRows). -// Other quotas are considered infinite. type QuotaManager struct { DB *pgxpool.Pool MaxUnsequencedRows int From 8c40e33a7ad92cf11b83af87e2d2d405785036b6 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 5 Nov 2024 23:34:02 +0000 Subject: [PATCH 53/62] Add a README.md that provides an overview of the PostgreSQL storage implementation --- storage/postgresql/README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 storage/postgresql/README.md diff --git a/storage/postgresql/README.md b/storage/postgresql/README.md new file mode 100644 index 0000000000..fb0a2d1d4c --- /dev/null +++ b/storage/postgresql/README.md @@ -0,0 +1,30 @@ +# PostgreSQL storage implementation + +## Origin + +This storage implementation, added in [PR #3644](https://github.com/google/trillian/pull/3644), began as a fork of the MySQL storage implementation after [PR #3201](https://github.com/google/trillian/pull/3201) was merged. + +## Motivation + +Sectigo operates Certificate Transparency logs that run Trillian with MariaDB, using the MySQL storage implementation. One log's MariaDB database [suffered unrecoverable corruption](https://groups.google.com/a/chromium.org/g/ct-policy/c/038B7F4g8cU/m/KsOJaEhnBgAJ) as a result of disk space exhaustion, and another log has [struggled to sequence entries quickly enough](https://groups.google.com/a/chromium.org/g/ct-policy/c/wVhEWVI7Xzo/m/0WiIEbZ_BgAJ). Sectigo has more experience with PostgreSQL, believes that PostgreSQL databases are not vulnerable to corruption due to disk space exhaustion, and anticipates that PostgreSQL can achieve significantly greater sequencing throughput. + +## Database driver + +The [pgx](https://github.com/jackc/pgx) driver is used directly. This offers faster performance than the standard `database/sql` interface, and provides access to a number of PostgreSQL-specific features such as `COPY`. + +## Major changes compared to the MySQL storage implementation + +- Implemented [bulk processing](#bulk-processing) to greatly improve performance, making use of `COPY`, temporary tables, and database functions. +- Removed SQL statement caching, because pgx does this itself automatically. +- Removed several vestigial features (e.g., pre-#3201 subtree revisions). +- Forked `storage/testdb` to `storage/postgresql/testdbpgx`, because the former only supports the `database/sql` interface. + +## Bulk processing + +The `QueueLeaves`, `AddSequencedLeaves`, `UpdateSequencedLeaves`, and `storeSubtrees` functions all operate on sets of records. The individual INSERT statements inherited from the MySQL storage implementation have been replaced by the use of PostgreSQL's `COPY` interface, which bulk-loads data more efficiently and in far fewer network round trips. + +`QueueLeaves`, `AddSequencedLeaves`, and `storeSubtrees` each bulk-load data into temporary tables that are bound to a single transaction. This approach enables each function to perform its processing efficiently, after which the processed data is written to the real tables. + +`QueueLeaves` and `AddSequencedLeaves` each use a corresponding PL/pgSQL function to perform multiple processing steps involving the temporary tables, which includes the leaf deduplication logic. This could all instead have been implemented as multiple SQL statements called from the Go code, but the approach taken reduces the number of network round trips and the amount of data being transferred to and from the database, and therefore improves performance. + +`AddSequencedLeaves` avoids having to use (and to sometimes rollback) savepoints, which further improves performance compared to the equivalent MySQL implementation. From 28df5c0421ac780ca6c0b4a3a8415409b67fa068 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 5 Nov 2024 23:39:16 +0000 Subject: [PATCH 54/62] Cover PostgreSQL in the mitigation for Issue #1297 in CODEOWNERS --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/CODEOWNERS b/CODEOWNERS index 0e627a2f17..b4ea069d52 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -19,3 +19,4 @@ # upgrade schema instances. /storage/mysql/schema/* @mhutchinson @AlCutter /storage/cloudspanner/spanner.sdl @mhutchinson @AlCutter +/storage/postgresql/schema/* @robstradling @mhutchinson @AlCutter From 286e2789bcb61f4b430f0c651543dd884415cd3c Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 5 Nov 2024 23:55:23 +0000 Subject: [PATCH 55/62] Add PostgreSQL storage and quota implementations to the feature implementation matrix --- docs/Feature_Implementation_Matrix.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/Feature_Implementation_Matrix.md b/docs/Feature_Implementation_Matrix.md index dfbfb29f99..6fa2f2a1e3 100644 --- a/docs/Feature_Implementation_Matrix.md +++ b/docs/Feature_Implementation_Matrix.md @@ -61,6 +61,7 @@ The Log storage implementations supporting the original Trillian log. | CloudSpanner | Beta | | Google maintains continuous-integration environment based on CloudSpanner. | | MySQL | GA | ✓ | | | CockroachDB | Alpha | | Supported by [Equinix Metal](https://deploy.equinix.com/). | +| PostgreSQL | Alpha | | Supported by [Rob Stradling](https://github.com/robstradling) at [Sectigo](https://github.com/sectigo) | ##### Spanner This is a Google-internal implementation, and is used by all of Google's current Trillian deployments. @@ -86,6 +87,12 @@ This implementation has been tested with CockroachDB 22.1.10. It's currently in alpha mode and is not yet in production use. +##### PostgreSQL + +This implementation has been tested with PostgreSQL 17.0. + +It's currently in alpha mode and is not yet in production use. + ### Monitoring Supported monitoring frameworks, allowing for production monitoring and alerting. @@ -115,6 +122,7 @@ Supported frameworks for quota management. | MySQL | Beta | ? | | | Redis | Alpha | ✓ | | | CockroachDB | Alpha | | Supported by [Equinix Metal](https://deploy.equinix.com/). | +| PostgreSQL | Alpha | | Supported by [Rob Stradling](https://github.com/robstradling) at [Sectigo](https://github.com/sectigo) | ### Key management From a4ee5cdc26b758280f02c976723ff31d95d2bc58 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Tue, 5 Nov 2024 23:56:41 +0000 Subject: [PATCH 56/62] Full stops --- docs/Feature_Implementation_Matrix.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Feature_Implementation_Matrix.md b/docs/Feature_Implementation_Matrix.md index 6fa2f2a1e3..36b709e9b8 100644 --- a/docs/Feature_Implementation_Matrix.md +++ b/docs/Feature_Implementation_Matrix.md @@ -61,7 +61,7 @@ The Log storage implementations supporting the original Trillian log. | CloudSpanner | Beta | | Google maintains continuous-integration environment based on CloudSpanner. | | MySQL | GA | ✓ | | | CockroachDB | Alpha | | Supported by [Equinix Metal](https://deploy.equinix.com/). | -| PostgreSQL | Alpha | | Supported by [Rob Stradling](https://github.com/robstradling) at [Sectigo](https://github.com/sectigo) | +| PostgreSQL | Alpha | | Supported by [Rob Stradling](https://github.com/robstradling) at [Sectigo](https://github.com/sectigo). | ##### Spanner This is a Google-internal implementation, and is used by all of Google's current Trillian deployments. @@ -122,7 +122,7 @@ Supported frameworks for quota management. | MySQL | Beta | ? | | | Redis | Alpha | ✓ | | | CockroachDB | Alpha | | Supported by [Equinix Metal](https://deploy.equinix.com/). | -| PostgreSQL | Alpha | | Supported by [Rob Stradling](https://github.com/robstradling) at [Sectigo](https://github.com/sectigo) | +| PostgreSQL | Alpha | | Supported by [Rob Stradling](https://github.com/robstradling) at [Sectigo](https://github.com/sectigo). | ### Key management From 0462f6201d6504a6066ab1c3765987800562b66e Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Wed, 6 Nov 2024 00:07:52 +0000 Subject: [PATCH 57/62] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f656e93330..de0ea9843f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## HEAD +* Add PostgreSQL quota manager and storage backend by @robstradling in https://github.com/google/trillian/pull/3644 + ## v1.6.1 * Recommended go version for development: 1.22 From 6e3eb86c203c2b92bc59f61c70289d826522530f Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Wed, 6 Nov 2024 11:48:00 +0000 Subject: [PATCH 58/62] Remove Rob from CODEOWNERS --- CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CODEOWNERS b/CODEOWNERS index b4ea069d52..4b799a27e7 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -19,4 +19,4 @@ # upgrade schema instances. /storage/mysql/schema/* @mhutchinson @AlCutter /storage/cloudspanner/spanner.sdl @mhutchinson @AlCutter -/storage/postgresql/schema/* @robstradling @mhutchinson @AlCutter +/storage/postgresql/schema/* @mhutchinson @AlCutter From d41757ea5efca3f6282d5e6849546bc29b0bc6ca Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Wed, 6 Nov 2024 12:03:47 +0000 Subject: [PATCH 59/62] Transfer PostgreSQL TODOs to Rob --- storage/postgresql/admin_storage.go | 2 +- storage/postgresql/log_storage.go | 14 +++++++------- storage/postgresql/provider.go | 2 +- storage/postgresql/storage_test.go | 2 +- storage/postgresql/tree_storage.go | 3 +-- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/storage/postgresql/admin_storage.go b/storage/postgresql/admin_storage.go index b7b42aaa64..a5ab7a6d33 100644 --- a/storage/postgresql/admin_storage.go +++ b/storage/postgresql/admin_storage.go @@ -232,7 +232,7 @@ func (t *adminTX) UpdateTree(ctx context.Context, treeID int64, updateFunc func( return nil, err } - // TODO(pavelkalinnikov): When switching TreeType from PREORDERED_LOG to LOG, + // TODO(robstradling): When switching TreeType from PREORDERED_LOG to LOG, // ensure all entries in SequencedLeafData are integrated. // Use the time truncated-to-millis throughout, as that's what's stored. diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index b2002a9c91..f672058192 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -91,7 +91,7 @@ const ( " INNER JOIN LeafData l ON (s.LeafIdentityHash=l.LeafIdentityHash AND s.TreeId=l.TreeId) " + "WHERE s.MerkleLeafHash=ANY($1)" + " AND l.TreeId=$2" - // TODO(#1548): rework the code so the dummy hash isn't needed (e.g. this assumes hash size is 32) + // TODO(robstradling): Per #1548, rework the code so the dummy hash isn't needed (e.g. this assumes hash size is 32) dummyMerkleLeafHash = "00000000000000000000000000000000" // This statement returns a dummy Merkle leaf hash value (which must be // of the right size) so that its signature matches that of the other @@ -229,7 +229,7 @@ func (m *postgreSQLLogStorage) beginInternal(ctx context.Context, tree *trillian return ltx, nil } -// TODO(pavelkalinnikov): This and many other methods of this storage +// TODO(robstradling): This and many other methods of this storage // implementation can leak a specific sql.ErrTxDone all the way to the client, // if the transaction is rolled back as a result of a canceled context. It must // return "generic" errors, and only log the specific ones for debugging. @@ -341,7 +341,7 @@ func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime tim defer t.treeTX.mu.Unlock() if t.treeType == trillian.TreeType_PREORDERED_LOG { - // TODO(pavelkalinnikov): Optimize this by fetching only the required + // TODO(robstradling): Optimize this by fetching only the required // fields of LogLeaf. We can avoid joining with LeafData table here. return t.getLeavesByRangeInternal(ctx, int64(t.root.TreeSize), int64(limit)) } @@ -568,9 +568,9 @@ func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.L } } - // TODO(pavelkalinnikov): Support opting out from duplicates detection. - // TODO(pavelkalinnikov): Update IntegrateTimestamp on integrating the leaf. - // TODO(pavelkalinnikov): Load LeafData for conflicting entries. + // TODO(robstradling): Support opting out from duplicates detection. + // TODO(robstradling): Update IntegrateTimestamp on integrating the leaf. + // TODO(robstradling): Load LeafData for conflicting entries. return res, nil } @@ -601,7 +601,7 @@ func (t *logTreeTX) getLeavesByRangeInternal(ctx context.Context, start, count i count = maxCount } } - // TODO(pavelkalinnikov): Further clip `count` to a safe upper bound like 64k. + // TODO(robstradling): Further clip `count` to a safe upper bound like 64k. rows, err := t.tx.Query(ctx, selectLeavesByRangeSQL, start, start+count, t.treeID) if err != nil { diff --git a/storage/postgresql/provider.go b/storage/postgresql/provider.go index 1f3052129f..9d593c9704 100644 --- a/storage/postgresql/provider.go +++ b/storage/postgresql/provider.go @@ -35,7 +35,7 @@ var ( // GetDatabase returns an instance of PostgreSQL database, or creates one. // -// TODO(pavelkalinnikov): Make the dependency of PostgreSQL quota provider from +// TODO(robstradling): Make the dependency of PostgreSQL quota provider from // PostgreSQL storage provider explicit. func GetDatabase() (*pgxpool.Pool, error) { postgresqlMu.Lock() diff --git a/storage/postgresql/storage_test.go b/storage/postgresql/storage_test.go index 6af115c1c8..a857300eaa 100644 --- a/storage/postgresql/storage_test.go +++ b/storage/postgresql/storage_test.go @@ -201,7 +201,7 @@ func createLogNodesForTreeAtSize(t *testing.T, ts int64) ([]stree.Node, error) { return nodes, nil } -// TODO(pavelkalinnikov): Allow nodes to be out of order. +// TODO(robstradling): Allow nodes to be out of order. func nodesAreEqual(lhs, rhs []stree.Node) error { if ls, rs := len(lhs), len(rhs); ls != rs { return fmt.Errorf("different number of nodes, %d vs %d", ls, rs) diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 8af54451c1..dca850c950 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -202,11 +202,10 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre return nil } - // TODO(al): probably need to be able to batch this in the case where we have + // TODO(robstradling): probably need to be able to batch this in the case where we have // a really large number of subtrees to store. rows := make([][]interface{}, 0, len(subtrees)) - // TODO(mhutchinson): continue deleting this throughout var subtreeRev int64 for _, s := range subtrees { s := s From 378779f016e2e829f1abd1011c9a85cd4f23e466 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Wed, 6 Nov 2024 15:47:47 +0000 Subject: [PATCH 60/62] Use the batched queue implementation, and remove the non-batched option --- storage/postgresql/README.md | 1 + storage/postgresql/queue.go | 61 ++++++----- storage/postgresql/queue_batching.go | 147 -------------------------- storage/postgresql/schema/storage.sql | 3 +- 4 files changed, 39 insertions(+), 173 deletions(-) delete mode 100644 storage/postgresql/queue_batching.go diff --git a/storage/postgresql/README.md b/storage/postgresql/README.md index fb0a2d1d4c..de38c54492 100644 --- a/storage/postgresql/README.md +++ b/storage/postgresql/README.md @@ -15,6 +15,7 @@ The [pgx](https://github.com/jackc/pgx) driver is used directly. This offers fas ## Major changes compared to the MySQL storage implementation - Implemented [bulk processing](#bulk-processing) to greatly improve performance, making use of `COPY`, temporary tables, and database functions. +- Switched to [batched queuing](https://github.com/google/trillian/pull/717), for further performance gains. - Removed SQL statement caching, because pgx does this itself automatically. - Removed several vestigial features (e.g., pre-#3201 subtree revisions). - Forked `storage/testdb` to `storage/postgresql/testdbpgx`, because the former only supports the `database/sql` interface. diff --git a/storage/postgresql/queue.go b/storage/postgresql/queue.go index 349b96c62f..fc137ab9ac 100644 --- a/storage/postgresql/queue.go +++ b/storage/postgresql/queue.go @@ -1,6 +1,3 @@ -//go:build !batched_queue -// +build !batched_queue - // Copyright 2024 Trillian Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,6 +16,8 @@ package postgresql import ( "context" + "crypto/sha256" + "encoding/binary" "errors" "fmt" "time" @@ -31,35 +30,33 @@ import ( const ( // If this statement ORDER BY clause is changed refer to the comment in removeSequencedLeaves - selectQueuedLeavesSQL = "SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos " + + selectQueuedLeavesSQL = "SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID " + "FROM Unsequenced " + "WHERE TreeId=$1" + " AND Bucket=0" + " AND QueueTimestampNanos<=$2 " + "ORDER BY QueueTimestampNanos,LeafIdentityHash " + "LIMIT $3" - insertUnsequencedEntrySQL = "INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos) VALUES($1,0,$2,$3,$4)" - deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE TreeId=$1 AND Bucket=0 AND QueueTimestampNanos=$2 AND LeafIdentityHash=$3" + insertUnsequencedEntrySQL = "INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID) VALUES($1,0,$2,$3,$4,$5)" + deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE QueueID=ANY($1)" ) -type dequeuedLeaf struct { - queueTimestampNanos int64 - leafIdentityHash []byte -} +type dequeuedLeaf []byte -func dequeueInfo(leafIDHash []byte, queueTimestamp int64) dequeuedLeaf { - return dequeuedLeaf{queueTimestampNanos: queueTimestamp, leafIdentityHash: leafIDHash} +func dequeueInfo(_ []byte, queueID []byte) dequeuedLeaf { + return dequeuedLeaf(queueID) } func (t *logTreeTX) dequeueLeaf(rows pgx.Rows) (*trillian.LogLeaf, dequeuedLeaf, error) { var leafIDHash []byte var merkleHash []byte var queueTimestamp int64 + var queueID []byte - err := rows.Scan(&leafIDHash, &merkleHash, &queueTimestamp) + err := rows.Scan(&leafIDHash, &merkleHash, &queueTimestamp, &queueID) if err != nil { klog.Warningf("Error scanning work rows: %s", err) - return nil, dequeuedLeaf{}, err + return nil, nil, err } // Note: the LeafData and ExtraData being nil here is OK as this is only used by the @@ -74,11 +71,24 @@ func (t *logTreeTX) dequeueLeaf(rows pgx.Rows) (*trillian.LogLeaf, dequeuedLeaf, MerkleLeafHash: merkleHash, QueueTimestamp: queueTimestampProto, } - return leaf, dequeueInfo(leafIDHash, queueTimestamp), nil + return leaf, dequeueInfo(leafIDHash, queueID), nil } -func queueArgs(_ int64, _ []byte, queueTimestamp time.Time) []interface{} { - return []interface{}{queueTimestamp.UnixNano(), nil} +func generateQueueID(treeID int64, leafIdentityHash []byte, timestamp int64) []byte { + h := sha256.New() + b := make([]byte, 10) + binary.PutVarint(b, treeID) + h.Write(b) + b = make([]byte, 10) + binary.PutVarint(b, timestamp) + h.Write(b) + h.Write(leafIdentityHash) + return h.Sum(nil) +} + +func queueArgs(treeID int64, identityHash []byte, queueTimestamp time.Time) []interface{} { + timestamp := queueTimestamp.UnixNano() + return []interface{}{timestamp, generateQueueID(treeID, identityHash, timestamp)} } func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf) error { @@ -121,17 +131,20 @@ func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillia // removeSequencedLeaves removes the passed in leaves slice (which may be // modified as part of the operation). -func (t *logTreeTX) removeSequencedLeaves(ctx context.Context, leaves []dequeuedLeaf) error { +func (t *logTreeTX) removeSequencedLeaves(ctx context.Context, queueIDs []dequeuedLeaf) error { start := time.Now() // Don't need to re-sort because the query ordered by leaf hash. If that changes because // the query is expensive then the sort will need to be done here. See comment in // QueueLeaves. - for _, dql := range leaves { - result, err := t.tx.Exec(ctx, deleteUnsequencedSQL, t.treeID, dql.queueTimestampNanos, dql.leafIdentityHash) - err = checkResultOkAndRowCountIs(result, err, int64(1)) - if err != nil { - return err - } + result, err := t.tx.Exec(ctx, deleteUnsequencedSQL, queueIDs) + if err != nil { + // Error is handled by checkResultOkAndRowCountIs() below + klog.Warningf("Failed to delete sequenced work: %s", err) + } + + err = checkResultOkAndRowCountIs(result, err, int64(len(queueIDs))) + if err != nil { + return err } observe(dequeueRemoveLatency, time.Since(start), labelForTX(t)) diff --git a/storage/postgresql/queue_batching.go b/storage/postgresql/queue_batching.go deleted file mode 100644 index 909534546b..0000000000 --- a/storage/postgresql/queue_batching.go +++ /dev/null @@ -1,147 +0,0 @@ -//go:build batched_queue -// +build batched_queue - -// Copyright 2024 Trillian Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package postgresql - -import ( - "context" - "crypto/sha256" - "encoding/binary" - "errors" - "fmt" - "time" - - "github.com/google/trillian" - "github.com/jackc/pgx/v5" - "google.golang.org/protobuf/types/known/timestamppb" - "k8s.io/klog/v2" -) - -const ( - // If this statement ORDER BY clause is changed refer to the comment in removeSequencedLeaves - selectQueuedLeavesSQL = "SELECT LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID " + - "FROM Unsequenced " + - "WHERE TreeId=$1" + - " AND Bucket=0" + - " AND QueueTimestampNanos<=$2 " + - "ORDER BY QueueTimestampNanos,LeafIdentityHash " + - "LIMIT $3" - insertUnsequencedEntrySQL = "INSERT INTO Unsequenced(TreeId,Bucket,LeafIdentityHash,MerkleLeafHash,QueueTimestampNanos,QueueID) VALUES($1,0,$2,$3,$4,$5)" - deleteUnsequencedSQL = "DELETE FROM Unsequenced WHERE QueueID=ANY($1)" -) - -type dequeuedLeaf []byte - -func dequeueInfo(_ []byte, queueID []byte) dequeuedLeaf { - return dequeuedLeaf(queueID) -} - -func (t *logTreeTX) dequeueLeaf(rows pgx.Rows) (*trillian.LogLeaf, dequeuedLeaf, error) { - var leafIDHash []byte - var merkleHash []byte - var queueTimestamp int64 - var queueID []byte - - err := rows.Scan(&leafIDHash, &merkleHash, &queueTimestamp, &queueID) - if err != nil { - klog.Warningf("Error scanning work rows: %s", err) - return nil, nil, err - } - - // Note: the LeafData and ExtraData being nil here is OK as this is only used by the - // sequencer. The sequencer only writes to the SequencedLeafData table and the client - // supplied data was already written to LeafData as part of queueing the leaf. - queueTimestampProto := timestamppb.New(time.Unix(0, queueTimestamp)) - if err := queueTimestampProto.CheckValid(); err != nil { - return nil, dequeuedLeaf{}, fmt.Errorf("got invalid queue timestamp: %w", err) - } - leaf := &trillian.LogLeaf{ - LeafIdentityHash: leafIDHash, - MerkleLeafHash: merkleHash, - QueueTimestamp: queueTimestampProto, - } - return leaf, dequeueInfo(leafIDHash, queueID), nil -} - -func generateQueueID(treeID int64, leafIdentityHash []byte, timestamp int64) []byte { - h := sha256.New() - b := make([]byte, 10) - binary.PutVarint(b, treeID) - h.Write(b) - b = make([]byte, 10) - binary.PutVarint(b, timestamp) - h.Write(b) - h.Write(leafIdentityHash) - return h.Sum(nil) -} - -func queueArgs(treeID int64, identityHash []byte, queueTimestamp time.Time) []interface{} { - timestamp := queueTimestamp.UnixNano() - return []interface{}{timestamp, generateQueueID(treeID, identityHash, timestamp)} -} - -func (t *logTreeTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf) error { - rows := make([][]interface{}, 0, len(leaves)) - dequeuedLeaves := make([]dequeuedLeaf, 0, len(leaves)) - for _, leaf := range leaves { - // This should fail on insert but catch it early - if len(leaf.LeafIdentityHash) != t.hashSizeBytes { - return errors.New("sequenced leaf has incorrect hash size") - } - - if err := leaf.IntegrateTimestamp.CheckValid(); err != nil { - return fmt.Errorf("got invalid integrate timestamp: %w", err) - } - iTimestamp := leaf.IntegrateTimestamp.AsTime() - rows = append(rows, []interface{}{t.treeID, leaf.LeafIdentityHash, leaf.MerkleLeafHash, leaf.LeafIndex, iTimestamp.UnixNano()}) - qe, ok := t.dequeued[string(leaf.LeafIdentityHash)] - if !ok { - return fmt.Errorf("attempting to update leaf that wasn't dequeued. IdentityHash: %x", leaf.LeafIdentityHash) - } - dequeuedLeaves = append(dequeuedLeaves, qe) - } - - // Copy sequenced leaves to SequencedLeafData table. - n, err := t.tx.CopyFrom( - ctx, - pgx.Identifier{"sequencedleafdata"}, - []string{"treeid", "leafidentityhash", "merkleleafhash", "sequencenumber", "integratetimestampnanos"}, - pgx.CopyFromRows(rows), - ) - if err != nil { - klog.Warningf("Failed to copy sequenced leaves: %s", err) - } - if err := checkResultOkAndCopyCountIs(n, err, int64(len(leaves))); err != nil { - return err - } - - return t.removeSequencedLeaves(ctx, dequeuedLeaves) -} - -// removeSequencedLeaves removes the passed in leaves slice (which may be -// modified as part of the operation). -func (t *logTreeTX) removeSequencedLeaves(ctx context.Context, queueIDs []dequeuedLeaf) error { - // Don't need to re-sort because the query ordered by leaf hash. If that changes because - // the query is expensive then the sort will need to be done here. See comment in - // QueueLeaves. - result, err := t.tx.Exec(ctx, deleteUnsequencedSQL, queueIDs) - if err != nil { - // Error is handled by checkResultOkAndRowCountIs() below - klog.Warningf("Failed to delete sequenced work: %s", err) - } - return checkResultOkAndRowCountIs(result, err, int64(len(queueIDs))) -} diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 096448c968..11b8e13311 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -140,8 +140,7 @@ CREATE TABLE IF NOT EXISTS Unsequenced( MerkleLeafHash BYTEA NOT NULL, QueueTimestampNanos BIGINT NOT NULL, -- This is a SHA256 hash of the TreeId, LeafIdentityHash and QueueTimestampNanos. It is used - -- for batched deletes from the table when trillian_log_server and trillian_log_signer are - -- built with the batched_queue tag. + -- for batched deletes from the table. QueueID BYTEA DEFAULT NULL UNIQUE, PRIMARY KEY (TreeId, Bucket, QueueTimestampNanos, LeafIdentityHash), CHECK (length(LeafIdentityHash) <= 255), From 8102b6ade7aa508dd2ae255dd5e721b74f522541 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Wed, 6 Nov 2024 16:38:00 +0000 Subject: [PATCH 61/62] Remove vestigial Subtree.SubtreeRevision field --- storage/postgresql/schema/storage.sql | 3 +-- storage/postgresql/tree_storage.go | 12 +++++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 11b8e13311..2679b41f32 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -43,10 +43,9 @@ CREATE TABLE IF NOT EXISTS Subtree( TreeId BIGINT NOT NULL, SubtreeId BYTEA NOT NULL, Nodes BYTEA NOT NULL, - SubtreeRevision INTEGER NOT NULL, -- Key columns must be in ASC order in order to benefit from group-by/min-max -- optimization in PostgreSQL. - CONSTRAINT Subtree_pk PRIMARY KEY (TreeId, SubtreeId, SubtreeRevision), + CONSTRAINT Subtree_pk PRIMARY KEY (TreeId, SubtreeId), FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE, CHECK (length(SubtreeId) <= 255) ); diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index dca850c950..398e1d381e 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -39,11 +39,10 @@ const ( " TreeId BIGINT," + " SubtreeId BYTEA," + " Nodes BYTEA," + - " SubtreeRevision INTEGER," + - " CONSTRAINT TempSubtree_pk PRIMARY KEY (TreeId,SubtreeId,SubtreeRevision)" + + " CONSTRAINT TempSubtree_pk PRIMARY KEY (TreeId,SubtreeId)" + ") ON COMMIT DROP" - insertSubtreeMultiSQL = "INSERT INTO Subtree(TreeId,SubtreeId,Nodes,SubtreeRevision) " + - "SELECT TreeId,SubtreeId,Nodes,SubtreeRevision " + + insertSubtreeMultiSQL = "INSERT INTO Subtree(TreeId,SubtreeId,Nodes) " + + "SELECT TreeId,SubtreeId,Nodes " + "FROM TempSubtree " + "ON CONFLICT ON CONSTRAINT Subtree_pk DO UPDATE SET Nodes=EXCLUDED.Nodes" insertTreeHeadSQL = "INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) " + @@ -206,7 +205,6 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre // a really large number of subtrees to store. rows := make([][]interface{}, 0, len(subtrees)) - var subtreeRev int64 for _, s := range subtrees { s := s if s.Prefix == nil { @@ -216,7 +214,7 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre if err != nil { return err } - rows = append(rows, []interface{}{t.treeID, s.Prefix, subtreeBytes, subtreeRev}) + rows = append(rows, []interface{}{t.treeID, s.Prefix, subtreeBytes}) } // Create temporary subtree table. @@ -230,7 +228,7 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre _, err = t.tx.CopyFrom( ctx, pgx.Identifier{"tempsubtree"}, - []string{"treeid", "subtreeid", "nodes", "subtreerevision"}, + []string{"treeid", "subtreeid", "nodes"}, pgx.CopyFromRows(rows), ) if err != nil { From be9618289c8890e98a0aaf68c6f9bc20636343d6 Mon Sep 17 00:00:00 2001 From: Rob Stradling Date: Wed, 6 Nov 2024 17:12:52 +0000 Subject: [PATCH 62/62] Remove vestigial TreeHead.TreeRevision field --- storage/postgresql/log_storage.go | 24 ++++++++++-------------- storage/postgresql/schema/storage.sql | 6 ------ storage/postgresql/storage_test.go | 12 ------------ storage/postgresql/tree_storage.go | 24 ++++++++++-------------- 4 files changed, 20 insertions(+), 46 deletions(-) diff --git a/storage/postgresql/log_storage.go b/storage/postgresql/log_storage.go index f672058192..fa1b6381be 100644 --- a/storage/postgresql/log_storage.go +++ b/storage/postgresql/log_storage.go @@ -73,7 +73,7 @@ const ( " AND TreeState IN($3,$4)" + " AND (Deleted IS NULL OR Deleted='false')" - selectLatestSignedLogRootSQL = "SELECT TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature " + + selectLatestSignedLogRootSQL = "SELECT TreeHeadTimestamp,TreeSize,RootHash,RootSignature " + "FROM TreeHead " + "WHERE TreeId=$1 " + "ORDER BY TreeHeadTimestamp DESC " + @@ -207,9 +207,8 @@ func (m *postgreSQLLogStorage) beginInternal(ctx context.Context, tree *trillian ls: m, dequeued: make(map[string]dequeuedLeaf), } - ltx.slr, ltx.readRev, err = ltx.fetchLatestRoot(ctx) + ltx.slr, err = ltx.fetchLatestRoot(ctx) if err == storage.ErrTreeNeedsInit { - ltx.treeTX.writeRevision = 0 return ltx, err } else if err != nil { if err := ttx.Close(); err != nil { @@ -225,7 +224,6 @@ func (m *postgreSQLLogStorage) beginInternal(ctx context.Context, tree *trillian return nil, err } - ltx.treeTX.writeRevision = ltx.readRev + 1 return ltx, nil } @@ -324,12 +322,11 @@ type logTreeTX struct { treeTX ls *postgreSQLLogStorage root types.LogRootV1 - readRev int64 slr *trillian.SignedLogRoot dequeued map[string]dequeuedLeaf } -// GetMerkleNodes returns the requested nodes at the read revision. +// GetMerkleNodes returns the requested nodes. func (t *logTreeTX) GetMerkleNodes(ctx context.Context, ids []compact.NodeID) ([]tree.Node, error) { t.treeTX.mu.Lock() defer t.treeTX.mu.Unlock() @@ -686,16 +683,16 @@ func (t *logTreeTX) LatestSignedLogRoot(ctx context.Context) (*trillian.SignedLo return t.slr, nil } -// fetchLatestRoot reads the latest root and the revision from the DB. -func (t *logTreeTX) fetchLatestRoot(ctx context.Context) (*trillian.SignedLogRoot, int64, error) { - var timestamp, treeSize, treeRevision int64 +// fetchLatestRoot reads the latest root from the DB. +func (t *logTreeTX) fetchLatestRoot(ctx context.Context) (*trillian.SignedLogRoot, error) { + var timestamp, treeSize int64 var rootHash, rootSignatureBytes []byte if err := t.tx.QueryRow( ctx, selectLatestSignedLogRootSQL, t.treeID).Scan( - ×tamp, &treeSize, &rootHash, &treeRevision, &rootSignatureBytes, + ×tamp, &treeSize, &rootHash, &rootSignatureBytes, ); err == pgx.ErrNoRows { // It's possible there are no roots for this tree yet - return nil, 0, storage.ErrTreeNeedsInit + return nil, storage.ErrTreeNeedsInit } // Put logRoot back together. Fortunately LogRoot has a deterministic serialization. @@ -705,10 +702,10 @@ func (t *logTreeTX) fetchLatestRoot(ctx context.Context) (*trillian.SignedLogRoo TreeSize: uint64(treeSize), }).MarshalBinary() if err != nil { - return nil, 0, err + return nil, err } - return &trillian.SignedLogRoot{LogRoot: logRoot}, treeRevision, nil + return &trillian.SignedLogRoot{LogRoot: logRoot}, nil } func (t *logTreeTX) StoreSignedLogRoot(ctx context.Context, root *trillian.SignedLogRoot) error { @@ -731,7 +728,6 @@ func (t *logTreeTX) StoreSignedLogRoot(ctx context.Context, root *trillian.Signe logRoot.TimestampNanos, logRoot.TreeSize, logRoot.RootHash, - t.treeTX.writeRevision, []byte{}) if err != nil { klog.Warningf("Failed to store signed root: %s", err) diff --git a/storage/postgresql/schema/storage.sql b/storage/postgresql/schema/storage.sql index 2679b41f32..6804a70619 100644 --- a/storage/postgresql/schema/storage.sql +++ b/storage/postgresql/schema/storage.sql @@ -50,24 +50,18 @@ CREATE TABLE IF NOT EXISTS Subtree( CHECK (length(SubtreeId) <= 255) ); --- The TreeRevisionIdx is used to enforce that there is only one STH at any --- tree revision CREATE TABLE IF NOT EXISTS TreeHead( TreeId BIGINT NOT NULL, TreeHeadTimestamp BIGINT, TreeSize BIGINT, RootHash BYTEA NOT NULL, RootSignature BYTEA NOT NULL, - TreeRevision BIGINT, PRIMARY KEY(TreeId, TreeHeadTimestamp), FOREIGN KEY(TreeId) REFERENCES Trees(TreeId) ON DELETE CASCADE, CHECK (length(RootHash) <= 255), CHECK (length(RootSignature) <= 1024) ); -CREATE UNIQUE INDEX TreeHeadRevisionIdx - ON TreeHead(TreeId, TreeRevision); - -- --------------------------------------------- -- Log specific stuff here -- --------------------------------------------- diff --git a/storage/postgresql/storage_test.go b/storage/postgresql/storage_test.go index a857300eaa..2cae7231e7 100644 --- a/storage/postgresql/storage_test.go +++ b/storage/postgresql/storage_test.go @@ -65,9 +65,7 @@ func TestNodeRoundTrip(t *testing.T) { tree := mustCreateTree(ctx, t, as, treeDef) s := NewLogStorage(DB, nil) - const writeRev = int64(100) runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { - forceWriteRevision(writeRev, tx) if err := tx.SetMerkleNodes(ctx, tc.store); err != nil { t.Fatalf("Failed to store nodes: %s", err) } @@ -113,7 +111,6 @@ func TestLogNodeRoundTripMultiSubtree(t *testing.T) { tree := mustCreateTree(ctx, t, as, tC.tree) s := NewLogStorage(DB, nil) - const writeRev = int64(100) const size = 871 nodesToStore, err := createLogNodesForTreeAtSize(t, size) if err != nil { @@ -126,7 +123,6 @@ func TestLogNodeRoundTripMultiSubtree(t *testing.T) { { runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { - forceWriteRevision(writeRev, tx) if err := tx.SetMerkleNodes(ctx, nodesToStore); err != nil { t.Fatalf("Failed to store nodes: %s", err) } @@ -157,14 +153,6 @@ func TestLogNodeRoundTripMultiSubtree(t *testing.T) { } } -func forceWriteRevision(rev int64, tx storage.LogTreeTX) { - mtx, ok := tx.(*logTreeTX) - if !ok { - panic(errors.New("uh oh")) - } - mtx.treeTX.writeRevision = rev -} - func createSomeNodes(count int) []stree.Node { r := make([]stree.Node, count) for i := range r { diff --git a/storage/postgresql/tree_storage.go b/storage/postgresql/tree_storage.go index 398e1d381e..b7ebb50f8f 100644 --- a/storage/postgresql/tree_storage.go +++ b/storage/postgresql/tree_storage.go @@ -45,8 +45,8 @@ const ( "SELECT TreeId,SubtreeId,Nodes " + "FROM TempSubtree " + "ON CONFLICT ON CONSTRAINT Subtree_pk DO UPDATE SET Nodes=EXCLUDED.Nodes" - insertTreeHeadSQL = "INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) " + - "VALUES($1,$2,$3,$4,$5,$6) " + + insertTreeHeadSQL = "INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,RootSignature) " + + "VALUES($1,$2,$3,$4,$5) " + "ON CONFLICT DO NOTHING" selectSubtreeSQL = "SELECT SubtreeId,Nodes " + @@ -104,7 +104,6 @@ func (m *postgreSQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian. treeType: tree.TreeType, hashSizeBytes: hashSizeBytes, subtreeCache: subtreeCache, - writeRevision: -1, }, nil } @@ -118,7 +117,6 @@ type treeTX struct { treeType trillian.TreeType hashSizeBytes int subtreeCache *cache.SubtreeCache - writeRevision int64 } func (t *treeTX) getSubtrees(ctx context.Context, ids [][]byte) ([]*storagepb.SubtreeProto, error) { @@ -293,16 +291,14 @@ func (t *treeTX) Commit(ctx context.Context) error { t.mu.Lock() defer t.mu.Unlock() - if t.writeRevision > -1 { - tiles, err := t.subtreeCache.UpdatedTiles() - if err != nil { - klog.Warningf("SubtreeCache updated tiles error: %v", err) - return err - } - if err := t.storeSubtrees(ctx, tiles); err != nil { - klog.Warningf("TX commit flush error: %v", err) - return err - } + tiles, err := t.subtreeCache.UpdatedTiles() + if err != nil { + klog.Warningf("SubtreeCache updated tiles error: %v", err) + return err + } + if err := t.storeSubtrees(ctx, tiles); err != nil { + klog.Warningf("TX commit flush error: %v", err) + return err } t.closed = true if err := t.tx.Commit(ctx); err != nil {