Skip to content

Commit

Permalink
Adds sql creation scripts for postgres (#1299)
Browse files Browse the repository at this point in the history
Referencing #1298, this PR adds the SQL creation script for a postgres data store and a stripped down reset script. I've done my best to translate the MySQL <> Postgres types; however they may not be a perfect mapping and it's possible that there'll need to be further changes made but I figured it's a good starting point for now.
  • Loading branch information
vishalkuo authored and AlCutter committed Sep 24, 2018
1 parent 08fd917 commit 40fbfc9
Show file tree
Hide file tree
Showing 2 changed files with 199 additions and 0 deletions.
60 changes: 60 additions & 0 deletions scripts/postgres_resetdb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash

set -e

usage() {
cat <<EOF
$(basename $0) [--force] [--verbose] ...
All unrecognised arguments will be passed through to the 'postgres' command.
Accepts environment variables:
- PG_ROOT_USER: A user with sufficient rights to create/reset the Trillian
database (default: `postgres`).
- PG_HOST: The hostname of the PG server (default: localhost).
- PG_PORT: The port the PG server is listening on (default: 5432).
- PG_DATABASE: The name to give to the new Trillian user and database
(default: test).
EOF
}

die() {
echo "$*" > /dev/stderr
exit 1
}

collect_vars() {
# set unset environment variables to defaults
[ -z ${PG_ROOT_USER+x} ] && PG_ROOT_USER="postgres"
[ -z ${PG_HOST+x} ] && PG_HOST="localhost"
[ -z ${PG_PORT+x} ] && PG_PORT="5432"
[ -z ${PG_DATABASE+x} ] && PG_DATABASE="test"
FLAGS=()

FLAGS+=(-U "${PG_ROOT_USER}")
FLAGS+=(--host "${PG_HOST}")
FLAGS+=(--port "${PG_PORT}")
}

main() {
collect_vars "$@"

readonly TRILLIAN_PATH=$(go list -f '{{.Dir}}' github.com/google/trillian)

echo "Warning: about to destroy and reset database '${PG_DATABASE}'"

[[ ${FORCE} = true ]] || read -p "Are you sure? [Y/N]: " -n 1 -r
echo # Print newline following the above prompt

if [ -z ${REPLY+x} ] || [[ $REPLY =~ ^[Yy]$ ]]
then
echo "Resetting DB..."
psql "${FLAGS[@]}" -c "DROP DATABASE IF EXISTS ${PG_DATABASE};" || \
die "Error: Failed to drop database '${PG_DATABASE}'."
psql "${FLAGS[@]}" -c "CREATE DATABASE ${PG_DATABASE};" || \
die "Error: Failed to create database '${PG_DATABASE}'."
psql "${FLAGS[@]}" -d ${PG_DATABASE} -f ${TRILLIAN_PATH}/storage/postgres/storage.sql || \
die "Error: Failed to create tables in '${PG_DATABASE}' database."
echo "Reset Complete"
fi
}

main "$@"
139 changes: 139 additions & 0 deletions storage/postgres/storage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
-- Postgres impl of storage
-- ---------------------------------------------
-- Tree stuff here
-- ---------------------------------------------

-- Tree Enums
CREATE TYPE E_TREE_STATE AS ENUM('ACTIVE', 'FROZEN', 'DRAINING');
CREATE TYPE E_TREE_TYPE AS ENUM('LOG', 'MAP', 'PREORDERED_LOG');
CREATE TYPE E_HASH_STRATEGY AS ENUM('RFC6962_SHA256', 'TEST_MAP_HASHER', 'OBJECT_RFC6962_SHA256', 'CONIKS_SHA512_256', 'CONIKS_SHA256');
CREATE TYPE E_HASH_ALGORITHM AS ENUM('SHA256');
CREATE TYPE E_SIGNATURE_ALGORITHM AS ENUM('ECDSA', 'RSA');

-- Tree parameters should not be changed after creation. Doing so can
-- render the data in the tree unusable or inconsistent.
CREATE TABLE IF NOT EXISTS trees (
tree_id BIGINT NOT NULL,
tree_state E_TREE_STATE NOT NULL,
tree_type E_TREE_TYPE NOT NULL,
hash_strategy E_HASH_STRATEGY NOT NULL,
hash_algorithm E_HASH_ALGORITHM NOT NULL,
signature_algorithm E_SIGNATURE_ALGORITHM NOT NULL,
display_name VARCHAR(20),
description VARCHAR(200),
create_time_millis BIGINT NOT NULL,
update_time_millis BIGINT NOT NULL,
max_root_duration_millis BIGINT NOT NULL,
private_key BYTEA NOT NULL,
public_key BYTEA NOT NULL,
deleted BOOLEAN,
delete_time_millis BIGINT,
PRIMARY KEY(tree_id)
);

-- This table contains tree parameters that can be changed at runtime such as for
-- administrative purposes.
CREATE TABLE IF NOT EXISTS tree_control(
tree_id BIGINT NOT NULL,
signing_enabled BOOLEAN NOT NULL,
sequencing_enabled BOOLEAN NOT NULL,
sequence_interval_seconds INTEGER NOT NULL,
PRIMARY KEY(tree_id),
FOREIGN KEY(tree_id) REFERENCES trees(tree_id) ON DELETE CASCADE
);

CREATE TABLE IF NOT EXISTS subtree(
tree_id BIGINT NOT NULL,
subtree_id BYTEA NOT NULL,
nodes BYTEA NOT NULL,
subtree_revision INTEGER NOT NULL,
PRIMARY KEY(tree_id, subtree_id, subtree_revision),
FOREIGN KEY(tree_id) REFERENCES Trees(tree_id) ON DELETE CASCADE
);

-- The TreeRevisionIdx is used to enforce that there is only one STH at any
-- tree revision
CREATE TABLE IF NOT EXISTS tree_head(
tree_id BIGINT NOT NULL,
tree_head_timestamp BIGINT,
tree_size BIGINT,
root_hash BYTEA NOT NULL,
root_signature BYTEA NOT NULL,
tree_revision BIGINT,
PRIMARY KEY(tree_id, tree_revision),
FOREIGN KEY(tree_id) REFERENCES trees(tree_id) ON DELETE CASCADE
);

-- TODO(vishal) benchmark this to see if it's a suitable replacement for not
-- having a DESC scan on the primary key
CREATE UNIQUE INDEX TreeHeadRevisionIdx ON tree_head(tree_id, tree_revision DESC);

-- ---------------------------------------------
-- Log specific stuff here
-- ---------------------------------------------

-- Creating index at same time as table allows some storage engines to better
-- optimize physical storage layout. Most engines allow multiple nulls in a
-- unique index but some may not.

-- A leaf that has not been sequenced has a row in this table. If duplicate leaves
-- are allowed they will all reference this row.
CREATE TABLE IF NOT EXISTS leaf_data(
tree_id BIGINT NOT NULL,
-- This is a personality specific hash of some subset of the leaf data.
-- It's only purpose is to allow Trillian to identify duplicate entries in
-- the context of the personality.
leaf_identity_hash BYTEA NOT NULL,
-- This is the data stored in the leaf for example in CT it contains a DER encoded
-- X.509 certificate but is application dependent
leaf_value BYTEA NOT NULL,
-- This is extra data that the application can associate with the leaf should it wish to.
-- This data is not included in signing and hashing.
extra_data BYTEA,
-- The timestamp from when this leaf data was first queued for inclusion.
queue_timestamp_nanos BIGINT NOT NULL,
PRIMARY KEY(tree_id, leaf_identity_hash),
FOREIGN KEY(tree_id) REFERENCES trees(tree_id) ON DELETE CASCADE
);

-- When a leaf is sequenced a row is added to this table. If logs allow duplicates then
-- multiple rows will exist with different sequence numbers. The signed timestamp
-- will be communicated via the unsequenced table as this might need to be unique, depending
-- on the log parameters and we can't insert into this table until we have the sequence number
-- which is not available at the time we queue the entry. We need both hashes because the
-- LeafData table is keyed by the raw data hash.
CREATE TABLE IF NOT EXISTS sequenced_leaf_data(
tree_id BIGINT NOT NULL,
sequence_number BIGINT NOT NULL,
-- This is a personality specific has of some subset of the leaf data.
-- It's only purpose is to allow Trillian to identify duplicate entries in
-- the context of the personality.
leaf_identity_hash BYTEA NOT NULL,
-- This is a MerkleLeafHash as defined by the treehasher that the log uses. For example for
-- CT this hash will include the leaf prefix byte as well as the leaf data.
merkle_leaf_hash BYTEA NOT NULL,
integrate_timestamp_nanos BIGINT NOT NULL,
PRIMARY KEY(tree_id, sequence_number),
FOREIGN KEY(tree_id) REFERENCES trees(tree_id) ON DELETE CASCADE,
FOREIGN KEY(tree_id, leaf_identity_hash) REFERENCES leaf_data(tree_id, leaf_identity_hash) ON DELETE CASCADE
);

CREATE TABLE IF NOT EXISTS unsequenced(
tree_id BIGINT NOT NULL,
-- The bucket field is to allow the use of time based ring bucketed schemes if desired. If
-- unused this should be set to zero for all entries.
bucket INTEGER NOT NULL,
-- This is a personality specific hash of some subset of the leaf data.
-- It's only purpose is to allow Trillian to identify duplicate entries in
-- the context of the personality.
leaf_identity_hash BYTEA NOT NULL,
-- This is a MerkleLeafHash as defined by the treehasher that the log uses. For example for
-- CT this hash will include the leaf prefix byte as well as the leaf data.
merkle_leaf_hash BYTEA NOT NULL,
queue_timestamp_nanos BIGINT NOT NULL,
-- This is a SHA256 hash of the TreeID, LeafIdentityHash and QueueTimestampNanos. It is used
-- for batched deletes from the table when trillian_log_server and trillian_log_signer are
-- built with the batched_queue tag.
queue_id BYTEA DEFAULT NULL UNIQUE,
PRIMARY KEY (tree_id, bucket, queue_timestamp_nanos, leaf_identity_hash)
);

0 comments on commit 40fbfc9

Please sign in to comment.