Adds sql creation scripts for postgres (#1299)

Referencing #1298, this PR adds the SQL creation script for a postgres data store and a stripped down reset script. I've done my best to translate the MySQL <> Postgres types; however they may not be a perfect mapping and it's possible that there'll need to be further changes made but I figured it's a good starting point for now.
google · Sep 24, 2018 · 40fbfc9 · 40fbfc9
1 parent 08fd917
commit 40fbfc9
Show file tree

Hide file tree

Showing 2 changed files with 199 additions and 0 deletions.
diff --git a/scripts/postgres_resetdb.sh b/scripts/postgres_resetdb.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+set -e
+
+usage() {
+  cat <<EOF
+$(basename $0) [--force] [--verbose] ...
+All unrecognised arguments will be passed through to the 'postgres' command.
+Accepts environment variables:
+- PG_ROOT_USER: A user with sufficient rights to create/reset the Trillian
+  database (default: `postgres`).
+- PG_HOST: The hostname of the PG server (default: localhost).
+- PG_PORT: The port the PG server is listening on (default: 5432).
+- PG_DATABASE: The name to give to the new Trillian user and database
+  (default: test).
+EOF
+}
+
+die() {
+  echo "$*" > /dev/stderr
+  exit 1
+}
+
+collect_vars() {
+  # set unset environment variables to defaults
+  [ -z ${PG_ROOT_USER+x} ] && PG_ROOT_USER="postgres"
+  [ -z ${PG_HOST+x} ] && PG_HOST="localhost"
+  [ -z ${PG_PORT+x} ] && PG_PORT="5432"
+  [ -z ${PG_DATABASE+x} ] && PG_DATABASE="test"
+  FLAGS=()
+
+  FLAGS+=(-U "${PG_ROOT_USER}")
+  FLAGS+=(--host "${PG_HOST}")
+  FLAGS+=(--port "${PG_PORT}")
+}
+
+main() {
+  collect_vars "$@"
+
+  readonly TRILLIAN_PATH=$(go list -f '{{.Dir}}' github.com/google/trillian)
+
+  echo "Warning: about to destroy and reset database '${PG_DATABASE}'"
+
+  [[ ${FORCE} = true ]] || read -p "Are you sure? [Y/N]: " -n 1 -r
+  echo # Print newline following the above prompt
+
+  if [ -z ${REPLY+x} ] || [[ $REPLY =~ ^[Yy]$ ]]
+  then
+      echo "Resetting DB..."
+      psql "${FLAGS[@]}" -c "DROP DATABASE IF EXISTS ${PG_DATABASE};" || \
+        die "Error: Failed to drop database '${PG_DATABASE}'."
+      psql "${FLAGS[@]}" -c "CREATE DATABASE ${PG_DATABASE};" || \
+        die "Error: Failed to create database '${PG_DATABASE}'."
+      psql "${FLAGS[@]}" -d ${PG_DATABASE} -f ${TRILLIAN_PATH}/storage/postgres/storage.sql || \
+        die "Error: Failed to create tables in '${PG_DATABASE}' database."
+      echo "Reset Complete"
+  fi
+}
+
+main "$@"
diff --git a/storage/postgres/storage.sql b/storage/postgres/storage.sql
@@ -0,0 +1,139 @@
+-- Postgres impl of storage
+-- ---------------------------------------------
+-- Tree stuff here
+-- ---------------------------------------------
+
+-- Tree Enums
+CREATE TYPE E_TREE_STATE AS ENUM('ACTIVE', 'FROZEN', 'DRAINING');
+CREATE TYPE E_TREE_TYPE AS ENUM('LOG', 'MAP', 'PREORDERED_LOG');
+CREATE TYPE E_HASH_STRATEGY AS ENUM('RFC6962_SHA256', 'TEST_MAP_HASHER', 'OBJECT_RFC6962_SHA256', 'CONIKS_SHA512_256', 'CONIKS_SHA256');
+CREATE TYPE E_HASH_ALGORITHM AS ENUM('SHA256');
+CREATE TYPE E_SIGNATURE_ALGORITHM AS ENUM('ECDSA', 'RSA');
+
+-- Tree parameters should not be changed after creation. Doing so can
+-- render the data in the tree unusable or inconsistent.
+CREATE TABLE IF NOT EXISTS trees ( 
+  tree_id                  BIGINT NOT NULL, 
+  tree_state               E_TREE_STATE NOT NULL, 
+  tree_type                E_TREE_TYPE NOT NULL, 
+  hash_strategy            E_HASH_STRATEGY NOT NULL, 
+  hash_algorithm           E_HASH_ALGORITHM NOT NULL, 
+  signature_algorithm      E_SIGNATURE_ALGORITHM NOT NULL, 
+  display_name             VARCHAR(20), 
+  description              VARCHAR(200), 
+  create_time_millis       BIGINT NOT NULL, 
+  update_time_millis       BIGINT NOT NULL, 
+  max_root_duration_millis BIGINT NOT NULL, 
+  private_key              BYTEA NOT NULL, 
+  public_key               BYTEA NOT NULL, 
+  deleted                  BOOLEAN, 
+  delete_time_millis       BIGINT, 
+  PRIMARY KEY(tree_id) 
+); 
+
+-- This table contains tree parameters that can be changed at runtime such as for
+-- administrative purposes.
+CREATE TABLE IF NOT EXISTS tree_control(
+  tree_id                   BIGINT NOT NULL,
+  signing_enabled           BOOLEAN NOT NULL,
+  sequencing_enabled        BOOLEAN NOT NULL,
+  sequence_interval_seconds INTEGER NOT NULL,
+  PRIMARY KEY(tree_id),
+  FOREIGN KEY(tree_id) REFERENCES trees(tree_id) ON DELETE CASCADE
+);
+
+CREATE TABLE IF NOT EXISTS subtree(
+  tree_id               BIGINT NOT NULL,
+  subtree_id            BYTEA NOT NULL,
+  nodes                 BYTEA NOT NULL,
+  subtree_revision      INTEGER NOT NULL, 
+  PRIMARY KEY(tree_id, subtree_id, subtree_revision),
+  FOREIGN KEY(tree_id) REFERENCES Trees(tree_id) ON DELETE CASCADE
+);
+
+-- The TreeRevisionIdx is used to enforce that there is only one STH at any
+-- tree revision
+CREATE TABLE IF NOT EXISTS tree_head(
+  tree_id                BIGINT NOT NULL,
+  tree_head_timestamp    BIGINT,
+  tree_size              BIGINT,
+  root_hash              BYTEA NOT NULL,
+  root_signature         BYTEA NOT NULL,
+  tree_revision          BIGINT,
+  PRIMARY KEY(tree_id, tree_revision),
+  FOREIGN KEY(tree_id) REFERENCES trees(tree_id) ON DELETE CASCADE
+);
+
+-- TODO(vishal) benchmark this to see if it's a suitable replacement for not
+-- having a DESC scan on the primary key
+CREATE UNIQUE INDEX TreeHeadRevisionIdx ON tree_head(tree_id, tree_revision DESC);
+
+-- ---------------------------------------------
+-- Log specific stuff here
+-- ---------------------------------------------
+
+-- Creating index at same time as table allows some storage engines to better
+-- optimize physical storage layout. Most engines allow multiple nulls in a
+-- unique index but some may not.
+
+-- A leaf that has not been sequenced has a row in this table. If duplicate leaves
+-- are allowed they will all reference this row.
+CREATE TABLE IF NOT EXISTS leaf_data(
+  tree_id               BIGINT NOT NULL,
+  -- This is a personality specific hash of some subset of the leaf data.
+  -- It's only purpose is to allow Trillian to identify duplicate entries in
+  -- the context of the personality.
+  leaf_identity_hash     BYTEA NOT NULL,
+  -- This is the data stored in the leaf for example in CT it contains a DER encoded
+  -- X.509 certificate but is application dependent
+  leaf_value            BYTEA NOT NULL,
+  -- This is extra data that the application can associate with the leaf should it wish to.
+  -- This data is not included in signing and hashing.
+  extra_data            BYTEA,
+  -- The timestamp from when this leaf data was first queued for inclusion.
+  queue_timestamp_nanos  BIGINT NOT NULL,
+  PRIMARY KEY(tree_id, leaf_identity_hash),
+  FOREIGN KEY(tree_id) REFERENCES trees(tree_id) ON DELETE CASCADE
+);
+
+-- When a leaf is sequenced a row is added to this table. If logs allow duplicates then
+-- multiple rows will exist with different sequence numbers. The signed timestamp
+-- will be communicated via the unsequenced table as this might need to be unique, depending
+-- on the log parameters and we can't insert into this table until we have the sequence number
+-- which is not available at the time we queue the entry. We need both hashes because the
+-- LeafData table is keyed by the raw data hash.
+CREATE TABLE IF NOT EXISTS sequenced_leaf_data(
+  tree_id                   BIGINT NOT NULL,
+  sequence_number           BIGINT NOT NULL,
+  -- This is a personality specific has of some subset of the leaf data.
+  -- It's only purpose is to allow Trillian to identify duplicate entries in
+  -- the context of the personality.
+  leaf_identity_hash        BYTEA NOT NULL,
+  -- This is a MerkleLeafHash as defined by the treehasher that the log uses. For example for
+  -- CT this hash will include the leaf prefix byte as well as the leaf data.
+  merkle_leaf_hash          BYTEA NOT NULL,
+  integrate_timestamp_nanos BIGINT NOT NULL,
+  PRIMARY KEY(tree_id, sequence_number),
+  FOREIGN KEY(tree_id) REFERENCES trees(tree_id) ON DELETE CASCADE,
+  FOREIGN KEY(tree_id, leaf_identity_hash) REFERENCES leaf_data(tree_id, leaf_identity_hash) ON DELETE CASCADE
+);
+
+CREATE TABLE IF NOT EXISTS unsequenced(
+  tree_id               BIGINT NOT NULL,
+  -- The bucket field is to allow the use of time based ring bucketed schemes if desired. If
+  -- unused this should be set to zero for all entries.
+  bucket                INTEGER NOT NULL,
+  -- This is a personality specific hash of some subset of the leaf data.
+  -- It's only purpose is to allow Trillian to identify duplicate entries in
+  -- the context of the personality.
+  leaf_identity_hash    BYTEA NOT NULL,
+  -- This is a MerkleLeafHash as defined by the treehasher that the log uses. For example for
+  -- CT this hash will include the leaf prefix byte as well as the leaf data.
+  merkle_leaf_hash      BYTEA NOT NULL,
+  queue_timestamp_nanos BIGINT NOT NULL,
+  -- This is a SHA256 hash of the TreeID, LeafIdentityHash and QueueTimestampNanos. It is used
+  -- for batched deletes from the table when trillian_log_server and trillian_log_signer are
+  -- built with the batched_queue tag.
+  queue_id              BYTEA DEFAULT NULL UNIQUE,
+  PRIMARY KEY (tree_id, bucket, queue_timestamp_nanos, leaf_identity_hash)
+);