From 20d7be3d8b262c5b34dea8e128d16d028a323210 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Fri, 25 Feb 2022 14:32:45 -0600 Subject: [PATCH 001/225] apply migrations for next db at startup --- app/cmd.go | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/app/cmd.go b/app/cmd.go index 8c1bb91688..1f40c86d5d 100644 --- a/app/cmd.go +++ b/app/cmd.go @@ -118,20 +118,37 @@ var RootCmd = &cobra.Command{ u.RawQuery = q.Encode() cfg.DBURL = u.String() - if cfg.APIOnly { - err = migrate.VerifyAll(log.WithDebug(ctx), cfg.DBURL) - if err != nil { - return errors.Wrap(err, "verify migrations") + doMigrations := func(url string) error { + if cfg.APIOnly { + err = migrate.VerifyAll(log.WithDebug(ctx), url) + if err != nil { + return errors.Wrap(err, "verify migrations") + } + return nil } - } else { + s := time.Now() - n, err := migrate.ApplyAll(log.WithDebug(ctx), cfg.DBURL) + n, err := migrate.ApplyAll(log.WithDebug(ctx), url) if err != nil { return errors.Wrap(err, "apply migrations") } if n > 0 { log.Logf(ctx, "Applied %d migrations in %s.", n, time.Since(s)) } + + return nil + } + + err = doMigrations(cfg.DBURL) + if err != nil { + return err + } + + if cfg.DBURLNext != "" { + err = doMigrations(cfg.DBURLNext) + if err != nil { + return errors.Wrap(err, "nextdb") + } } dbc, err := wrappedDriver.OpenConnector(cfg.DBURL) @@ -217,7 +234,6 @@ var ( Use: "version", Short: "Output the current version.", RunE: func(cmd *cobra.Command, args []string) error { - migrations := migrate.Names() fmt.Printf(`Version: %s @@ -472,7 +488,6 @@ Migration: %s (#%d) up := viper.GetString("up") if down != "" { n, err := migrate.Down(ctx, c.DBURL, down) - if err != nil { return errors.Wrap(err, "apply DOWN migrations") } @@ -483,7 +498,6 @@ Migration: %s (#%d) if up != "" || down == "" { n, err := migrate.Up(ctx, c.DBURL, up) - if err != nil { return errors.Wrap(err, "apply UP migrations") } @@ -500,7 +514,6 @@ Migration: %s (#%d) Use: "set-config", Short: "Sets current config values in the DB from stdin.", RunE: func(cmd *cobra.Command, args []string) error { - if viper.GetString("data-encryption-key") == "" && !viper.GetBool("allow-empty-data-encryption-key") { return validation.NewFieldError("data-encryption-key", "Must not be empty, or set --allow-empty-data-encryption-key") } From c5a0dff9268ae5c73dd570e58cf8d84df5a9352e Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 1 Mar 2022 10:39:27 -0600 Subject: [PATCH 002/225] add sqldrv and RetryDriver --- util/sqldrv/retryconnector.go | 37 +++++++++++++++++++++++++++ util/sqldrv/retrydriver.go | 48 +++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 util/sqldrv/retryconnector.go create mode 100644 util/sqldrv/retrydriver.go diff --git a/util/sqldrv/retryconnector.go b/util/sqldrv/retryconnector.go new file mode 100644 index 0000000000..a80ed1f1a0 --- /dev/null +++ b/util/sqldrv/retryconnector.go @@ -0,0 +1,37 @@ +package sqldrv + +import ( + "context" + "database/sql/driver" + "time" + + "github.com/target/goalert/retry" +) + +type retryConnector struct { + dbc driver.Connector + name string + drv *RetryDriver +} + +var _ driver.Connector = (*retryConnector)(nil) + +func (rc *retryConnector) Connect(ctx context.Context) (driver.Conn, error) { + var conn driver.Conn + var err error + err = retry.DoTemporaryError(func(_ int) error { + if rc.dbc == nil { + conn, err = rc.dbc.Connect(ctx) + } else { + conn, err = rc.drv.Open(rc.name) + } + return err + }, + retry.Log(ctx), + retry.Context(ctx), + retry.Limit(rc.drv.limit), + retry.FibBackoff(time.Second/2), + ) + return conn, err +} +func (c *retryConnector) Driver() driver.Driver { return c.drv } diff --git a/util/sqldrv/retrydriver.go b/util/sqldrv/retrydriver.go new file mode 100644 index 0000000000..54d3c1f5df --- /dev/null +++ b/util/sqldrv/retrydriver.go @@ -0,0 +1,48 @@ +package sqldrv + +import ( + "context" + "database/sql/driver" +) + +// RetryDriver will wrap a driver.Driver so that all new connections will be +// retried on temporary errors. +type RetryDriver struct { + drv driver.Driver + limit int +} + +var ( + _ driver.Driver = (*RetryDriver)(nil) + _ driver.DriverContext = (*RetryDriver)(nil) +) + +// NewRetryDriver returns a new RetryDriver with the provided connection retry limit. +func NewRetryDriver(drv driver.Driver, retryLimit int) *RetryDriver { + if retryLimit == 0 { + retryLimit = 10 + } + return &RetryDriver{drv: drv, limit: retryLimit} +} + +func (rd *RetryDriver) Open(name string) (driver.Conn, error) { + cn, err := rd.OpenConnector(name) + if err != nil { + return nil, err + } + + return cn.Connect(context.Background()) +} + +func (rd *RetryDriver) OpenConnector(name string) (driver.Connector, error) { + dbc, ok := rd.drv.(driver.DriverContext) + if !ok { + return &retryConnector{name: name, drv: rd}, nil + } + + cn, err := dbc.OpenConnector(name) + if err != nil { + return nil, err + } + return &retryConnector{dbc: cn, drv: rd}, nil +} From 575cacf03e4c56bec3fff083949ab000788b1705 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 1 Mar 2022 10:40:13 -0600 Subject: [PATCH 003/225] remove trace package --- sqltrace/attributes.go | 21 ----- sqltrace/conn.go | 157 ------------------------------------ sqltrace/connector.go | 35 -------- sqltrace/driver.go | 58 ------------- sqltrace/rows.go | 20 ----- sqltrace/simpleconnector.go | 35 -------- sqltrace/stmt.go | 89 -------------------- sqltrace/tx.go | 31 ------- 8 files changed, 446 deletions(-) delete mode 100644 sqltrace/attributes.go delete mode 100644 sqltrace/conn.go delete mode 100644 sqltrace/connector.go delete mode 100644 sqltrace/driver.go delete mode 100644 sqltrace/rows.go delete mode 100644 sqltrace/simpleconnector.go delete mode 100644 sqltrace/stmt.go delete mode 100644 sqltrace/tx.go diff --git a/sqltrace/attributes.go b/sqltrace/attributes.go deleted file mode 100644 index 7c09d19102..0000000000 --- a/sqltrace/attributes.go +++ /dev/null @@ -1,21 +0,0 @@ -package sqltrace - -import ( - "net/url" - "strings" - - "go.opencensus.io/trace" -) - -func getConnAttributes(name string) ([]trace.Attribute, error) { - u, err := url.Parse(name) - if err != nil { - return nil, err - } - - return []trace.Attribute{ - trace.StringAttribute("sql.user", u.User.Username()), - trace.StringAttribute("sql.db", strings.TrimPrefix(u.Path, "/")), - trace.StringAttribute("sql.host", u.Host), - }, nil -} diff --git a/sqltrace/conn.go b/sqltrace/conn.go deleted file mode 100644 index 069297a237..0000000000 --- a/sqltrace/conn.go +++ /dev/null @@ -1,157 +0,0 @@ -package sqltrace - -import ( - "context" - "database/sql" - "database/sql/driver" - "fmt" - "strconv" - - "go.opencensus.io/trace" -) - -type _Conn struct { - conn driver.Conn - drv *_Driver - - span *trace.Span - - attrs []trace.Attribute -} - -var _ driver.Conn = &_Conn{} -var _ driver.ConnBeginTx = &_Conn{} -var _ driver.ConnPrepareContext = &_Conn{} -var _ driver.ExecerContext = &_Conn{} -var _ driver.QueryerContext = &_Conn{} - -func (c *_Conn) Prepare(query string) (driver.Stmt, error) { - return c.PrepareContext(context.Background(), query) -} -func (c *_Conn) PrepareContext(ctx context.Context, query string) (stmt driver.Stmt, err error) { - ctx, sp := c.startSpan(ctx, "SQL.Prepare") - defer sp.End() - c.annotateSpan(query, nil, sp) - - if cp, ok := c.conn.(driver.ConnPrepareContext); ok { - stmt, err = cp.PrepareContext(ctx, query) - } else { - stmt, err = c.conn.Prepare(query) - } - errSpan(err, sp) - if err != nil { - return nil, err - } - - return &_Stmt{ - query: query, - Stmt: stmt, - conn: c, - }, nil -} - -func (c *_Conn) startSpan(ctx context.Context, name string) (context.Context, *trace.Span) { - if c.span != nil { - return trace.StartSpanWithRemoteParent(ctx, name, c.span.SpanContext()) - } - - return trace.StartSpan(ctx, name) -} - -func (c *_Conn) Begin() (driver.Tx, error) { - return c.BeginTx(context.Background(), driver.TxOptions{}) -} - -func (c *_Conn) BeginTx(ctx context.Context, opts driver.TxOptions) (tx driver.Tx, err error) { - ctx, sp := c.startSpan(ctx, "SQL.Tx") - sp.AddAttributes( - trace.BoolAttribute("sql.tx.readOnly", opts.ReadOnly), - trace.Int64Attribute("sql.tx.isolation", int64(opts.Isolation)), - ) - - if cx, ok := c.conn.(driver.ConnBeginTx); ok { - if c.drv.pgxRRFix && sql.IsolationLevel(opts.Isolation) == sql.LevelRepeatableRead { - opts.Isolation = driver.IsolationLevel(sql.LevelSnapshot) - } - tx, err = cx.BeginTx(ctx, opts) - } else { - //lint:ignore SA1019 We have to fallback if the wrapped driver doesn't implement ConnBeginTx. - tx, err = c.conn.Begin() - } - errSpan(err, sp) - if err != nil { - sp.End() - return nil, err - } - c.span = sp - return &_Tx{conn: c, tx: tx, ctx: ctx}, nil -} -func (c *_Conn) Close() error { - return c.conn.Close() -} - -func (c *_Conn) annotateSpan(query string, args []driver.NamedValue, sp *trace.Span) { - sp.AddAttributes(c.attrs...) - if c.drv.includeQuery { - sp.AddAttributes( - trace.StringAttribute("sql.query", query), - ) - } - if c.drv.includeArgs && len(args) > 0 { - for _, arg := range args { - if arg.Name == "" { - arg.Name = "$" + strconv.Itoa(arg.Ordinal) - } - - sp.AddAttributes( - trace.StringAttribute("sql.arg["+strconv.Quote(arg.Name)+"]", fmt.Sprintf("%v", arg.Value)), - ) - } - } -} - -func (c *_Conn) ExecContext(ctx context.Context, query string, args []driver.NamedValue) (res driver.Result, err error) { - cec, cecOk := c.conn.(driver.ExecerContext) - //lint:ignore SA1019 We have to fallback if the wrapped driver doesn't implement ExecerContext. - ce, ceOk := c.conn.(driver.Execer) - if !cecOk && !ceOk { - return nil, driver.ErrSkip - } - - ctx, sp := c.startSpan(ctx, "SQL.Exec") - defer sp.End() - c.annotateSpan(query, args, sp) - - if cecOk { - res, err = cec.ExecContext(ctx, query, args) - } else { - res, err = ce.Exec(query, getValue(args)) - } - errSpan(err, sp) - - return res, err -} - -func (c *_Conn) QueryContext(ctx context.Context, query string, args []driver.NamedValue) (rows driver.Rows, err error) { - cqc, cqcOk := c.conn.(driver.QueryerContext) - //lint:ignore SA1019 We have to fallback if the wrapped driver doesn't implement QueryerContext. - cq, cqOk := c.conn.(driver.Queryer) - if !cqcOk && !cqOk { - return nil, driver.ErrSkip - } - - ctx, sp := c.startSpan(ctx, "SQL.Query") - c.annotateSpan(query, args, sp) - if cqcOk { - rows, err = cqc.QueryContext(ctx, query, args) - } else { - rows, err = cq.Query(query, getValue(args)) - } - errSpan(err, sp) - if err != nil { - sp.End() - return nil, err - } - - return &_Rows{Rows: rows, sp: sp}, nil -} diff --git a/sqltrace/connector.go b/sqltrace/connector.go deleted file mode 100644 index 08d87a88da..0000000000 --- a/sqltrace/connector.go +++ /dev/null @@ -1,35 +0,0 @@ -package sqltrace - -import ( - "context" - "database/sql/driver" - "time" - - "github.com/target/goalert/retry" - "go.opencensus.io/trace" -) - -type _Connector struct { - dbc driver.Connector - drv *_Driver - - attrs []trace.Attribute -} - -func (c *_Connector) Connect(ctx context.Context) (driver.Conn, error) { - var conn driver.Conn - var err error - err = retry.DoTemporaryError(func(_ int) error { - conn, err = c.dbc.Connect(ctx) - return err - }, - retry.Log(ctx), - retry.Context(ctx), - retry.Limit(10), - retry.FibBackoff(time.Second/2), - ) - return &_Conn{conn: conn, drv: c.drv, attrs: c.attrs}, err -} -func (c *_Connector) Driver() driver.Driver { - return c.drv -} diff --git a/sqltrace/driver.go b/sqltrace/driver.go deleted file mode 100644 index 78fde14f3b..0000000000 --- a/sqltrace/driver.go +++ /dev/null @@ -1,58 +0,0 @@ -package sqltrace - -import ( - "database/sql/driver" - - "github.com/jackc/pgx/v4/stdlib" -) - -type _Driver struct { - drv driver.Driver - includeQuery bool - includeArgs bool - - // TODO: remove once pgx supports specifying `sql.LevelRepeatableRead` - // https://github.com/jackc/pgx/pull/572 - pgxRRFix bool -} - -// WrapOptions allow specifying additional information to include in the trace. -type WrapOptions struct { - Query bool // include the SQL query - Args bool // include the arguments passed -} - -// WrapDriver will wrap a database driver with tracing information. -func WrapDriver(drv driver.Driver, opts *WrapOptions) driver.DriverContext { - if opts == nil { - opts = &WrapOptions{} - } - - _, pgxRRFix := drv.(*stdlib.Driver) - return &_Driver{drv: drv, includeArgs: opts.Args, includeQuery: opts.Query, pgxRRFix: pgxRRFix} -} - -func (d *_Driver) Open(name string) (driver.Conn, error) { - attrs, err := getConnAttributes(name) - if err != nil { - return nil, err - } - c, err := d.drv.Open(name) - return &_Conn{conn: c, drv: d, attrs: attrs}, err -} - -func (d *_Driver) OpenConnector(name string) (driver.Connector, error) { - attrs, err := getConnAttributes(name) - if err != nil { - return nil, err - } - if dc, ok := d.drv.(driver.DriverContext); ok { - dbc, err := dc.OpenConnector(name) - return &_Connector{dbc: dbc, drv: d, attrs: attrs}, err - } - return newSimpleConnector(d, name) -} - -func (d *_Driver) Driver() driver.Driver { - return d -} diff --git a/sqltrace/rows.go b/sqltrace/rows.go deleted file mode 100644 index d3736204c1..0000000000 --- a/sqltrace/rows.go +++ /dev/null @@ -1,20 +0,0 @@ -package sqltrace - -import ( - "database/sql/driver" - - "go.opencensus.io/trace" -) - -type _Rows struct { - driver.Rows - sp *trace.Span -} - -func (r *_Rows) Next(dest []driver.Value) error { - return errSpan(r.Rows.Next(dest), r.sp) -} -func (r *_Rows) Close() error { - defer r.sp.End() - return errSpan(r.Rows.Close(), r.sp) -} diff --git a/sqltrace/simpleconnector.go b/sqltrace/simpleconnector.go deleted file mode 100644 index 408cb80490..0000000000 --- a/sqltrace/simpleconnector.go +++ /dev/null @@ -1,35 +0,0 @@ -package sqltrace - -import ( - "context" - "database/sql/driver" - "time" - - "github.com/target/goalert/retry" -) - -type simpleConnector struct { - name string - drv *_Driver -} - -func newSimpleConnector(drv *_Driver, name string) (*simpleConnector, error) { - return &simpleConnector{name: name, drv: drv}, nil -} -func (c *simpleConnector) Driver() driver.Driver { - return c.drv -} -func (c *simpleConnector) Connect(ctx context.Context) (driver.Conn, error) { - var conn driver.Conn - var err error - err = retry.DoTemporaryError(func(_ int) error { - conn, err = c.drv.Open(c.name) - return err - }, - retry.Log(ctx), - retry.Context(ctx), - retry.Limit(10), - retry.FibBackoff(time.Second), - ) - return conn, err -} diff --git a/sqltrace/stmt.go b/sqltrace/stmt.go deleted file mode 100644 index 77dc472b22..0000000000 --- a/sqltrace/stmt.go +++ /dev/null @@ -1,89 +0,0 @@ -package sqltrace - -import ( - "context" - "database/sql/driver" - "errors" - "io" - - "github.com/target/goalert/util/sqlutil" - "go.opencensus.io/trace" -) - -type _Stmt struct { - driver.Stmt - query string - conn *_Conn -} - -var _ driver.Stmt = &_Stmt{} -var _ driver.StmtExecContext = &_Stmt{} -var _ driver.StmtQueryContext = &_Stmt{} - -func getValue(args []driver.NamedValue) []driver.Value { - values := make([]driver.Value, len(args)) - for i, arg := range args { - values[i] = arg.Value - } - return values -} -func errSpan(err error, sp *trace.Span) error { - if err == nil { - return nil - } - if errors.Is(err, io.EOF) { - return err - } - - attrs := []trace.Attribute{trace.BoolAttribute("error", true)} - - if e := sqlutil.MapError(err); e != nil { - attrs = append(attrs, - trace.StringAttribute("sql.error.detail", e.Detail), - trace.StringAttribute("sql.error.hint", e.Hint), - trace.StringAttribute("sql.error.code", e.Code), - trace.StringAttribute("sql.error.table", e.TableName), - trace.StringAttribute("sql.error.constraint", e.ConstraintName), - trace.StringAttribute("sql.error.where", e.Where), - trace.StringAttribute("sql.error.column", e.ColumnName), - ) - } - sp.Annotate(attrs, err.Error()) - - return err -} - -func (s *_Stmt) ExecContext(ctx context.Context, args []driver.NamedValue) (res driver.Result, err error) { - ctx, sp := s.conn.startSpan(ctx, "SQL.Stmt.Exec") - defer sp.End() - s.conn.annotateSpan(s.query, args, sp) - - if sec, ok := s.Stmt.(driver.StmtExecContext); ok { - res, err = sec.ExecContext(ctx, args) - } else { - //lint:ignore SA1019 We have to fallback if the wrapped driver doesn't implement StmtExecContext. - res, err = s.Stmt.Exec(getValue(args)) - } - errSpan(err, sp) - - return res, err -} - -func (s *_Stmt) QueryContext(ctx context.Context, args []driver.NamedValue) (rows driver.Rows, err error) { - ctx, sp := s.conn.startSpan(ctx, "SQL.Stmt.Query") - s.conn.annotateSpan(s.query, args, sp) - - if sqc, ok := s.Stmt.(driver.StmtQueryContext); ok { - rows, err = sqc.QueryContext(ctx, args) - } else { - //lint:ignore SA1019 We have to fallback if the wrapped driver doesn't implement StmtQueryContext. - rows, err = s.Stmt.Query(getValue(args)) - } - errSpan(err, sp) - if err != nil { - sp.End() - return nil, err - } - - return &_Rows{Rows: rows, sp: sp}, nil -} diff --git a/sqltrace/tx.go b/sqltrace/tx.go deleted file mode 100644 index 12a4e3c2ad..0000000000 --- a/sqltrace/tx.go +++ /dev/null @@ -1,31 +0,0 @@ -package sqltrace - -import ( - "context" - "database/sql/driver" - - "go.opencensus.io/trace" -) - -type _Tx struct { - conn *_Conn - tx driver.Tx - ctx context.Context -} - -func (tx *_Tx) Rollback() error { - _, sp := trace.StartSpan(tx.ctx, "SQL.Tx.Rollback") - err := errSpan(tx.tx.Rollback(), sp) - sp.End() - tx.conn.span.End() - tx.conn.span = nil - return err -} -func (tx *_Tx) Commit() error { - _, sp := trace.StartSpan(tx.ctx, "SQL.Tx.Commit") - err := errSpan(tx.tx.Commit(), sp) - sp.End() - tx.conn.span.End() - tx.conn.span = nil - return err -} From f30aefd2fd845f156b9876920764ab45b455583a Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 1 Mar 2022 10:40:30 -0600 Subject: [PATCH 004/225] use retry wrapper --- app/cmd.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/cmd.go b/app/cmd.go index 1f40c86d5d..e50b0514d7 100644 --- a/app/cmd.go +++ b/app/cmd.go @@ -24,12 +24,12 @@ import ( "github.com/target/goalert/migrate" "github.com/target/goalert/permission" "github.com/target/goalert/remotemonitor" - "github.com/target/goalert/sqltrace" "github.com/target/goalert/switchover" "github.com/target/goalert/switchover/dbsync" "github.com/target/goalert/user" "github.com/target/goalert/util" "github.com/target/goalert/util/log" + "github.com/target/goalert/util/sqldrv" "github.com/target/goalert/validation" "github.com/target/goalert/version" "github.com/target/goalert/web" @@ -102,7 +102,7 @@ var RootCmd = &cobra.Command{ } }() - wrappedDriver := sqltrace.WrapDriver(&stdlib.Driver{}, &sqltrace.WrapOptions{Query: true}) + wrappedDriver := sqldrv.NewRetryDriver(&stdlib.Driver{}, 10) u, err := url.Parse(cfg.DBURL) if err != nil { From 04ed284c9a5cd5b012ec6a898400d0b800a8bec3 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 1 Mar 2022 11:48:16 -0600 Subject: [PATCH 005/225] new pkg --- app/cmd.go | 19 +- app/config.go | 4 + .../20220225095051-switchover-mk2.sql | 9 + swo/drvconn.go | 121 ++++++++++++ swo/drvconnector.go | 74 +++++++ swo/manager.go | 180 ++++++++++++++++++ swo/swomsg/log.go | 138 ++++++++++++++ swo/swomsg/messages.go | 86 +++++++++ swo/syncer.go | 9 + 9 files changed, 627 insertions(+), 13 deletions(-) create mode 100644 migrate/migrations/20220225095051-switchover-mk2.sql create mode 100644 swo/drvconn.go create mode 100644 swo/drvconnector.go create mode 100644 swo/manager.go create mode 100644 swo/swomsg/log.go create mode 100644 swo/swomsg/messages.go create mode 100644 swo/syncer.go diff --git a/app/cmd.go b/app/cmd.go index e50b0514d7..f15bea3a60 100644 --- a/app/cmd.go +++ b/app/cmd.go @@ -24,8 +24,8 @@ import ( "github.com/target/goalert/migrate" "github.com/target/goalert/permission" "github.com/target/goalert/remotemonitor" - "github.com/target/goalert/switchover" "github.com/target/goalert/switchover/dbsync" + "github.com/target/goalert/swo" "github.com/target/goalert/user" "github.com/target/goalert/util" "github.com/target/goalert/util/log" @@ -155,8 +155,8 @@ var RootCmd = &cobra.Command{ if err != nil { return errors.Wrap(err, "connect to postgres") } + var db *sql.DB - var h *switchover.Handler if cfg.DBURLNext != "" { u, err := url.Parse(cfg.DBURLNext) if err != nil { @@ -172,11 +172,12 @@ var RootCmd = &cobra.Command{ if err != nil { return errors.Wrap(err, "connect to postres (next)") } - h, err = switchover.NewHandler(ctx, l, dbc, dbcNext, cfg.DBURL, cfg.DBURLNext) + mgr, err := swo.NewManager(dbc, dbcNext, !cfg.APIOnly) if err != nil { return errors.Wrap(err, "init changeover handler") } - db = h.DB() + db = mgr.DB() + cfg.SWO = mgr } else { db = sql.OpenDB(dbc) } @@ -185,16 +186,8 @@ var RootCmd = &cobra.Command{ if err != nil { return errors.Wrap(err, "init app") } - if h != nil { - h.SetApp(app) - } - go handleShutdown(ctx, func(ctx context.Context) error { - if h != nil { - h.Abort() - } - return app.Shutdown(ctx) - }) + go handleShutdown(ctx, app.Shutdown) // trigger engine cycles by process signal trigCh := make(chan os.Signal, 1) diff --git a/app/config.go b/app/config.go index dc822ee7d9..befe375d58 100644 --- a/app/config.go +++ b/app/config.go @@ -6,6 +6,7 @@ import ( "github.com/target/goalert/config" "github.com/target/goalert/keyring" + "github.com/target/goalert/swo" "github.com/target/goalert/util/log" ) @@ -71,4 +72,7 @@ type Config struct { // InitialConfig will be pushed into the config store // if specified before the engine is started. InitialConfig *config.Config + + // SWO should be set to operate in switchover mode. + SWO *swo.Manager } diff --git a/migrate/migrations/20220225095051-switchover-mk2.sql b/migrate/migrations/20220225095051-switchover-mk2.sql new file mode 100644 index 0000000000..671bccce47 --- /dev/null +++ b/migrate/migrations/20220225095051-switchover-mk2.sql @@ -0,0 +1,9 @@ +-- +migrate Up +CREATE TABLE switchover_log ( + id BIGINT PRIMARY KEY, + timestamp timestamp with time zone NOT NULL DEFAULT now(), + data jsonb NOT NULL +); + +-- +migrate Down +DROP TABLE switchover_log; diff --git a/swo/drvconn.go b/swo/drvconn.go new file mode 100644 index 0000000000..07abe2b5b3 --- /dev/null +++ b/swo/drvconn.go @@ -0,0 +1,121 @@ +package swo + +import ( + "context" + "database/sql/driver" + "fmt" + "time" +) + +type Conn struct { + DBConn + + n *Notifier + locked bool +} + +var ErrDone = fmt.Errorf("switchover is already done") + +type DBConn interface { + driver.Conn + driver.Pinger + driver.ExecerContext + driver.QueryerContext + driver.ConnPrepareContext + driver.ConnBeginTx + driver.NamedValueChecker +} + +var ( + _ driver.SessionResetter = (*Conn)(nil) + _ driver.Validator = (*Conn)(nil) +) + +func (c *Conn) lock(ctx context.Context) error { + if c.n.IsDone() { + return driver.ErrBadConn + } + if c.locked { + return nil + } + + _, err := c.ExecContext(ctx, "select pg_advisory_lock_shared(4369)", nil) + if err != nil { + return err + } + c.locked = true + + rows, err := c.QueryContext(ctx, "select current_state from switchover_state", nil) + if err != nil { + return err + } + + scan := make([]driver.Value, 1) + err = rows.Next(scan) + if err != nil { + return err + } + + var state string + switch t := scan[0].(type) { + case string: + state = t + case []byte: + state = string(t) + default: + return fmt.Errorf("expected string for current_state value, got %t", t) + } + err = rows.Close() + if err != nil { + return err + } + + if state == "use_next_db" { + c.n.Done() + return driver.ErrBadConn + } + + return nil +} + +func (c *Conn) unlock(ctx context.Context) error { + if !c.locked { + return nil + } + + _, err := c.ExecContext(ctx, "select pg_advisory_unlock_shared(4369)", nil) + if err != nil { + return err + } + + c.locked = false + + return nil +} + +func (c *Conn) ResetSession(ctx context.Context) error { + err := c.lock(ctx) + if err != nil { + return err + } + + if s, ok := c.DBConn.(driver.SessionResetter); ok { + return s.ResetSession(ctx) + } + + return nil +} + +func (c *Conn) IsValid() bool { + if c.n.IsDone() { + return false + } + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + if err := c.unlock(ctx); err != nil { + return false + } + + return true +} diff --git a/swo/drvconnector.go b/swo/drvconnector.go new file mode 100644 index 0000000000..cee8541e68 --- /dev/null +++ b/swo/drvconnector.go @@ -0,0 +1,74 @@ +package swo + +import ( + "context" + "database/sql/driver" + "errors" + "sync" +) + +type Connector struct { + dbcOld, dbcNew driver.Connector + + n *Notifier +} + +type Notifier struct { + doneCh chan struct{} + done sync.Once +} + +func NewNotifier() *Notifier { + return &Notifier{ + doneCh: make(chan struct{}), + } +} +func (n *Notifier) Done() { n.done.Do(func() { close(n.doneCh) }) } +func (n *Notifier) IsDone() bool { + select { + case <-n.doneCh: + return true + default: + return false + } +} + +var _ driver.Connector = (*Connector)(nil) + +func NewConnector(dbcOld, dbcNew driver.Connector) *Connector { + return &Connector{ + dbcOld: dbcOld, + dbcNew: dbcNew, + n: NewNotifier(), + } +} + +func (drv *Connector) Driver() driver.Driver { return nil } + +func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { + if drv.n.IsDone() { + return drv.dbcNew.Connect(ctx) + } + + conn, err := drv.dbcOld.Connect(ctx) + if err != nil { + return nil, err + } + + drvConn := &Conn{ + DBConn: conn.(DBConn), + n: drv.n, + } + + err = drvConn.lock(ctx) + if err != nil { + conn.Close() + + if errors.Is(err, driver.ErrBadConn) { + return drv.dbcNew.Connect(ctx) + } + return nil, err + } + + return drvConn, nil +} diff --git a/swo/manager.go b/swo/manager.go new file mode 100644 index 0000000000..d5fd50add6 --- /dev/null +++ b/swo/manager.go @@ -0,0 +1,180 @@ +package swo + +import ( + "context" + "database/sql" + "database/sql/driver" + "fmt" + + "github.com/google/uuid" + "github.com/target/goalert/swo/swomsg" + "github.com/target/goalert/util/log" + "gorm.io/driver/postgres" + "gorm.io/gorm" +) + +type Manager struct { + id uuid.UUID + + dbOld, dbNew *gorm.DB + protectedDB *sql.DB + + s Syncer + + msgLog *swomsg.Log + nextMsgLog *swomsg.Log + + msgCh chan *swomsg.Message + nextMsgCh chan *swomsg.Message + errCh chan error + + nodes map[uuid.UUID]*Node + exec map[uuid.UUID]*swomsg.Message + + cancel func() + + canExec bool +} + +type Node struct { + ID uuid.UUID + + OldValid bool + NewValid bool +} + +func NewManager(dbcOld, dbcNew driver.Connector, canExec bool) (*Manager, error) { + gCfg := &gorm.Config{PrepareStmt: true} + gormOld, err := gorm.Open(postgres.New(postgres.Config{Conn: sql.OpenDB(dbcOld)}), gCfg) + if err != nil { + return nil, err + } + gormNew, err := gorm.Open(postgres.New(postgres.Config{Conn: sql.OpenDB(dbcNew)}), gCfg) + if err != nil { + return nil, err + } + + id := uuid.New() + msgLog, err := swomsg.NewLog(gormOld, id) + if err != nil { + return nil, err + } + + msgLogNext, err := swomsg.NewLog(gormNew, id) + if err != nil { + return nil, err + } + + ctx, cancel := context.WithCancel(context.Background()) + m := &Manager{ + dbOld: gormOld, + dbNew: gormNew, + + protectedDB: sql.OpenDB(NewConnector(dbcOld, dbcNew)), + + id: id, + msgLog: msgLog, + nextMsgLog: msgLogNext, + canExec: canExec, + msgCh: make(chan *swomsg.Message), + nextMsgCh: make(chan *swomsg.Message), + errCh: make(chan error, 10), + cancel: cancel, + nodes: make(map[uuid.UUID]*Node), + exec: make(map[uuid.UUID]*swomsg.Message), + } + + go func() { + for { + msg, err := m.msgLog.Next(ctx) + if err != nil { + m.errCh <- fmt.Errorf("read from log: %w", err) + return + } + m.msgCh <- msg + } + }() + go func() { + msg, err := m.nextMsgLog.Next(ctx) + if err != nil { + m.errCh <- fmt.Errorf("read from next log: %w", err) + return + } + m.nextMsgCh <- msg + }() + + go m.loop(ctx) + + return m, nil +} + +func (m *Manager) DB() *sql.DB { return m.protectedDB } + +func (m *Manager) processMessage(ctx context.Context, msg *swomsg.Message) { + appendLog := func(msg interface{}) { + err := m.msgLog.Append(ctx, msg) + if err != nil { + log.Log(ctx, err) + } + } + + switch { + case msg.Ping != nil: + appendLog(swomsg.Pong{IsNextDB: false}) + err := m.nextMsgLog.Append(ctx, swomsg.Pong{IsNextDB: true}) + if err != nil { + log.Log(ctx, err) + } + case msg.Reset != nil: + m.nodes = make(map[uuid.UUID]*Node) + m.exec = make(map[uuid.UUID]*swomsg.Message) + m.id = uuid.New() + } + + if !m.canExec { + // api-only node, don't process execute commands + return + } + + // any execute command needs to be claimed + switch { + case msg.Execute != nil: + m.exec[msg.ID] = msg + appendLog(swomsg.Claim{MsgID: msg.ID}) + case msg.Reset != nil: + m.exec[msg.ID] = msg + appendLog(swomsg.Claim{MsgID: msg.ID}) + case msg.Claim != nil: + execMsg := m.exec[msg.Claim.MsgID] + delete(m.exec, msg.Claim.MsgID) + if msg.NodeID != m.id { + // claimed by another node + return + } + + m.execute(execMsg) + } +} + +func (m *Manager) execute(msg *swomsg.Message) { + switch { + } +} + +func (m *Manager) loop(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case msg := <-m.msgCh: + m.processMessage(ctx, msg) + case msg := <-m.nextMsgCh: + if msg.Pong != nil && msg.Pong.IsNextDB { + m.nodes[msg.NodeID].NewValid = true + } + case err := <-m.errCh: + log.Log(ctx, err) + m.msgLog.Append(ctx, swomsg.Error{Details: err.Error()}) + } + } +} diff --git a/swo/swomsg/log.go b/swo/swomsg/log.go new file mode 100644 index 0000000000..3c1b6ddd34 --- /dev/null +++ b/swo/swomsg/log.go @@ -0,0 +1,138 @@ +package swomsg + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/google/uuid" + "github.com/target/goalert/util/sqlutil" + "gorm.io/gorm" +) + +type Log struct { + db *gorm.DB + id uuid.UUID + + readID int64 + + events chan []logEvent + lastLoad time.Time +} + +var ErrStaleLog = fmt.Errorf("cannot append until log is read") + +type logEvent struct { + ID int64 + Data []byte +} + +func NewLog(db *gorm.DB, id uuid.UUID) (*Log, error) { + l := &Log{ + id: id, + db: db.Table("switchover_log"), + events: make(chan []logEvent, 1), + } + l.events <- nil + + return l, nil +} + +func ctxSleep(ctx context.Context, d time.Duration) error { + if d <= 0 { + return nil + } + + t := time.NewTimer(d) + defer t.Stop() + + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.C: + return nil + } +} + +func (l *Log) Next(ctx context.Context) (*Message, error) { + events := <-l.events + var err error + for len(events) == 0 { + events, err = l.loadEvents(ctx) + if err != nil { + l.events <- nil + return nil, err + } + } + + var w Message + err = json.Unmarshal(events[0].Data, &w) + if err != nil { + l.events <- events + return nil, err + } + + l.readID = events[0].ID + l.events <- events[1:] + + return &w, nil +} + +func (l *Log) loadEvents(ctx context.Context) ([]logEvent, error) { + err := ctxSleep(ctx, time.Second-time.Since(l.lastLoad)) + if err != nil { + return nil, err + } + l.lastLoad = time.Now() + + var events []logEvent + err = l.db. + Where("timestamp > now() - interval '1 minute'"). + Where("id > ?", l.readID). + Order("id asc"). + Limit(100). + Find(&events).Error + if err != nil { + return nil, err + } + + return events, nil +} + +func (l *Log) Append(ctx context.Context, v interface{}) error { + var msg Message + switch m := v.(type) { + case Ping: + msg.Ping = &m + case Pong: + msg.Pong = &m + case Reset: + msg.Reset = &m + case Error: + msg.Error = &m + case Execute: + msg.Execute = &m + case Claim: + msg.Claim = &m + default: + return fmt.Errorf("unknown message type %T", m) + } + + msg.ID = uuid.New() + msg.NodeID = l.id + msg.TS = time.Now() + data, err := json.Marshal(msg) + if err != nil { + return err + } + e := <-l.events + err = l.db.WithContext(ctx).Exec("insert into switchover_log (id, data) values ((select max(id)+1 from switchover_log), ?)", data).Error + l.events <- e + + if dbErr := sqlutil.MapError(err); dbErr != nil && dbErr.Code == "23505" { + return ErrStaleLog + } + + return err +} diff --git a/swo/swomsg/messages.go b/swo/swomsg/messages.go new file mode 100644 index 0000000000..69ab96ed64 --- /dev/null +++ b/swo/swomsg/messages.go @@ -0,0 +1,86 @@ +package swomsg + +import ( + "time" + + "github.com/google/uuid" +) + +type Message struct { + Header + + Ping *Ping `json:",omitempty"` + Pong *Pong `json:",omitempty"` + Reset *Reset `json:",omitempty"` + Error *Error `json:",omitempty"` + Claim *Claim `json:",omitempty"` + Execute *Execute `json:",omitempty"` +} + +type Header struct { + ID uuid.UUID + NodeID uuid.UUID + TS time.Time +} + +type ( + Ping struct{} + Pong struct{ IsNextDB bool } + + Reset struct{ ClaimDeadline time.Time } + Execute struct{ ClaimDeadline time.Time } + + Claim struct { + MsgID uuid.UUID + } + + Error struct{ Details string } + + Plan struct { + BeginAt time.Time + ConsensusDeadline time.Time + GlobalPauseAt time.Time + AbsoluteDeadline time.Time + } + ConfirmPlan struct{ MsgID uuid.UUID } + Progress struct { + Details string + } + + Done struct{} +) + +/* +UI + +{ Connections Section } + +{ Nodes section, with "Refresh" button} +Node ID | Ping Response Time | DB Calls/min (1m, 5m, 15m) | DB Resp. Avg (1m, 5m, 15m) + +States: Idle, Error, Active, Done +{ Status section (progress text here), with "Reset", "Execute" buttons} + +1. User goes to UI page + +2. User clicks "Refresh" button +3. Ping is sent +4. Pong is received from all nodes +5. UI updates + +6. User clicks "Execute" button +7. Execute is sent + +8. Execute is claimed by engine +9. Begins instrumenting, syncing, etc... sending Progress messages +10. UI updates with progress + +11. Engine sends out Plan message +12. All nodes ConfirmPlan by ConsensusDeadline +13. Engine performs switchover + +14. Engine sends Done message + +** if anything goes wrong, engine sends Error message and Reset is required by the user + +*/ diff --git a/swo/syncer.go b/swo/syncer.go new file mode 100644 index 0000000000..6e2ba4cc9b --- /dev/null +++ b/swo/syncer.go @@ -0,0 +1,9 @@ +package swo + +import "context" + +type Syncer interface { + Reset(context.Context) error + Setup(context.Context) error + Sync(ctx context.Context, progress func(float64)) error +} From fd9d43d7c3bb4a001e129bae755307d8ca4c1da3 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 1 Mar 2022 11:53:16 -0600 Subject: [PATCH 006/225] fix bool --- util/sqldrv/retryconnector.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/sqldrv/retryconnector.go b/util/sqldrv/retryconnector.go index a80ed1f1a0..e4a8a30cf2 100644 --- a/util/sqldrv/retryconnector.go +++ b/util/sqldrv/retryconnector.go @@ -20,7 +20,7 @@ func (rc *retryConnector) Connect(ctx context.Context) (driver.Conn, error) { var conn driver.Conn var err error err = retry.DoTemporaryError(func(_ int) error { - if rc.dbc == nil { + if rc.dbc != nil { conn, err = rc.dbc.Connect(ctx) } else { conn, err = rc.drv.Open(rc.name) From c367ff32f20c599d53d8387a54a8c9a88ada694f Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 16 Mar 2022 10:16:14 -0500 Subject: [PATCH 007/225] simplify --- app/app.go | 21 ++--- app/cmd.go | 5 +- app/contextlocker.go | 164 --------------------------------------- app/inithttp.go | 4 - app/lifecycle/manager.go | 6 +- app/pause.go | 49 +++--------- app/shutdown.go | 1 - 7 files changed, 27 insertions(+), 223 deletions(-) delete mode 100644 app/contextlocker.go diff --git a/app/app.go b/app/app.go index 55e156060e..185e1841d6 100644 --- a/app/app.go +++ b/app/app.go @@ -75,9 +75,8 @@ type App struct { sysAPISrv *grpc.Server hSrv *health.Server - srv *http.Server - requestLock *contextLocker - startupErr error + srv *http.Server + startupErr error notificationManager *notification.Manager Engine *engine.Engine @@ -154,8 +153,16 @@ func NewApp(c Config, db *sql.DB) (*App, error) { db: db, cfg: c, doneCh: make(chan struct{}), + } - requestLock: newContextLocker(), + if c.SWO != nil { + c.SWO.SetPauseResumer(app) + log.Logf(app.LogBackgroundContext(), "SWO Enabled.") + go func() { + for range time.NewTicker(3 * time.Second).C { + log.Logf(app.LogBackgroundContext(), "SWO: %v", c.SWO.Stats()) + } + }() } gCfg := &gorm.Config{ @@ -234,9 +241,3 @@ func (a *App) URL() string { func (a *App) Status() lifecycle.Status { return a.mgr.Status() } - -// ActiveRequests returns the current number of active -// requests, not including pending ones during pause. -func (a *App) ActiveRequests() int { - return a.requestLock.RLockCount() -} diff --git a/app/cmd.go b/app/cmd.go index f15bea3a60..ff872850d2 100644 --- a/app/cmd.go +++ b/app/cmd.go @@ -172,9 +172,10 @@ var RootCmd = &cobra.Command{ if err != nil { return errors.Wrap(err, "connect to postres (next)") } - mgr, err := swo.NewManager(dbc, dbcNext, !cfg.APIOnly) + + mgr, err := swo.NewManager(swo.Config{OldDBC: dbc, NewDBC: dbcNext, CanExec: !cfg.APIOnly}) if err != nil { - return errors.Wrap(err, "init changeover handler") + return errors.Wrap(err, "init switchover handler") } db = mgr.DB() cfg.SWO = mgr diff --git a/app/contextlocker.go b/app/contextlocker.go deleted file mode 100644 index a8f1eea04b..0000000000 --- a/app/contextlocker.go +++ /dev/null @@ -1,164 +0,0 @@ -package app - -import ( - "context" - "errors" - "sync/atomic" -) - -type contextLocker struct { - readCount int64 - - lock chan lockReq - unlock chan chan struct{} - - rLock chan struct{} - rUnlock chan struct{} - rNotLocked chan struct{} - - isShutdown chan struct{} - shutdownCh chan struct{} -} -type lockReq struct { - cancel <-chan struct{} - ch chan bool -} - -func (c *contextLocker) Shutdown(ctx context.Context) error { - select { - case <-c.shutdownCh: - case <-ctx.Done(): - return ctx.Err() - } - return nil -} -func newContextLocker() *contextLocker { - c := &contextLocker{ - lock: make(chan lockReq), - unlock: make(chan chan struct{}, 1), - rLock: make(chan struct{}), - rUnlock: make(chan struct{}), - rNotLocked: make(chan struct{}), - isShutdown: make(chan struct{}), - shutdownCh: make(chan struct{}), - } - go c.loop() - return c -} -func (c *contextLocker) writeLock(req lockReq) { - for atomic.LoadInt64(&c.readCount) > 0 { - select { - case <-c.rUnlock: - atomic.AddInt64(&c.readCount, -1) - case <-req.cancel: - req.ch <- false - return - case c.shutdownCh <- struct{}{}: - close(c.isShutdown) - close(c.shutdownCh) - req.ch <- false - return - } - } - - ch := make(chan struct{}) - c.unlock <- ch - req.ch <- true - for { - select { - case <-ch: - return - case <-c.rNotLocked: - } - } -} - -func (c *contextLocker) loop() { - for { - select { - // request for write lock always takes precedence - case req := <-c.lock: - c.writeLock(req) - continue - default: - } - - if atomic.LoadInt64(&c.readCount) == 0 { - select { - case req := <-c.lock: - c.writeLock(req) - case <-c.rLock: - atomic.AddInt64(&c.readCount, 1) - case <-c.rNotLocked: - case c.shutdownCh <- struct{}{}: - close(c.isShutdown) - close(c.shutdownCh) - return - } - continue - } - - select { - case req := <-c.lock: - c.writeLock(req) - case <-c.rLock: - atomic.AddInt64(&c.readCount, 1) - case <-c.rUnlock: - atomic.AddInt64(&c.readCount, -1) - case c.shutdownCh <- struct{}{}: - close(c.isShutdown) - close(c.shutdownCh) - return - } - } -} -func (c *contextLocker) RLockCount() int { - return int(atomic.LoadInt64(&c.readCount)) -} - -// ErrLockerShutdown is returned when attempting to acquire a lock after being shutdown. -var ErrLockerShutdown = errors.New("context locker is already shutdown") - -func (c *contextLocker) Lock(ctx context.Context) error { - ch := make(chan bool) - select { - case <-ctx.Done(): - return ctx.Err() - case c.lock <- lockReq{cancel: ctx.Done(), ch: ch}: - case <-c.isShutdown: - return ErrLockerShutdown - } - - if <-ch { - return nil - } - - return ctx.Err() -} -func (c *contextLocker) Unlock() { - select { - case ch := <-c.unlock: - ch <- struct{}{} - default: - // safe to call, even if not write-locked (unlike RUnlock) - } -} -func (c *contextLocker) RLock(ctx context.Context) error { - select { - case <-ctx.Done(): - return ctx.Err() - case c.rLock <- struct{}{}: - case <-c.isShutdown: - return ErrLockerShutdown - } - - return nil -} -func (c *contextLocker) RUnlock() { - select { - case c.rUnlock <- struct{}{}: - case c.rNotLocked <- struct{}{}: - panic("not locked") - case <-c.isShutdown: - } -} diff --git a/app/inithttp.go b/app/inithttp.go index 8a3b5a58f6..1a09a3b0ce 100644 --- a/app/inithttp.go +++ b/app/inithttp.go @@ -134,9 +134,6 @@ func (app *App) initHTTP(ctx context.Context) error { // limit max request size maxBodySizeMiddleware(app.cfg.MaxReqBodyBytes), - // pause has to become before anything that uses the DB (like auth) - app.pauseHandler, - // DB access func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { @@ -256,7 +253,6 @@ func (app *App) initHTTP(ctx context.Context) error { *app.twilioConfig, ) return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { - if strings.HasPrefix(req.URL.Path, "/api/v2/twilio/") { twilioHandler.ServeHTTP(w, req) return diff --git a/app/lifecycle/manager.go b/app/lifecycle/manager.go index 909529b614..33536b3cff 100644 --- a/app/lifecycle/manager.go +++ b/app/lifecycle/manager.go @@ -54,8 +54,10 @@ type Manager struct { isPausing bool } -var _ Pausable = &Manager{} -var _ PauseResumer = &Manager{} +var ( + _ Pausable = &Manager{} + _ PauseResumer = &Manager{} +) // NewManager will construct a new manager wrapping the provided // run and shutdown funcs. diff --git a/app/pause.go b/app/pause.go index 5f77d01e6f..d28863a83e 100644 --- a/app/pause.go +++ b/app/pause.go @@ -2,62 +2,31 @@ package app import ( "context" - "net/http" - "github.com/target/goalert/switchover" "github.com/target/goalert/util/log" - - "go.opencensus.io/trace" ) -func (app *App) pauseHandler(next http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { - ctx := req.Context() - err := app.requestLock.RLock(ctx) - if err != nil { - log.Log(ctx, err) - return - } - defer app.requestLock.RUnlock() - next.ServeHTTP(w, req) - }) -} - // LogBackgroundContext returns a context.Background with the application logger configured. func (app *App) LogBackgroundContext() context.Context { return app.cfg.Logger.BackgroundContext() } func (app *App) Pause(ctx context.Context) error { - ctx = log.WithLogger(ctx, app.cfg.Logger) - ctx, sp := trace.StartSpan(ctx, "App.Pause") - defer sp.End() - - err := app.mgr.Pause(ctx) - if err != nil { - return err - } - app.db.SetMaxIdleConns(0) - return nil + return app.mgr.Pause(log.WithLogger(ctx, app.cfg.Logger)) } -func (app *App) Resume() { - app.db.SetMaxIdleConns(app.cfg.DBMaxIdle) - app.mgr.Resume(app.LogBackgroundContext()) + +func (app *App) Resume(ctx context.Context) error { + return app.mgr.Resume(log.WithLogger(ctx, app.cfg.Logger)) } + func (app *App) _pause(ctx context.Context) error { + app.db.SetMaxIdleConns(0) app.events.Stop() - cfg := switchover.ConfigFromContext(ctx) - if cfg.NoPauseAPI { - return nil - } - err := app.requestLock.Lock(ctx) - if err != nil { - app.events.Start() - return err - } return nil } + func (app *App) _resume(ctx context.Context) error { + app.db.SetMaxIdleConns(app.cfg.DBMaxIdle) app.events.Start() - app.requestLock.Unlock() + return nil } diff --git a/app/shutdown.go b/app/shutdown.go index e5d041d2c3..b48dc91cb4 100644 --- a/app/shutdown.go +++ b/app/shutdown.go @@ -65,7 +65,6 @@ func (app *App) _Shutdown(ctx context.Context) error { shut(app.APIKeyring, "API keyring") shut(app.NonceStore, "nonce store") shut(app.ConfigStore, "config store") - shut(app.requestLock, "context locker") if len(errs) == 1 { return errs[0] From efbf82b54b7ae4b0d153c43a10969f83e91fc936 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 16 Mar 2022 10:16:27 -0500 Subject: [PATCH 008/225] don't use truncate --- limit/store.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/limit/store.go b/limit/store.go index 19ea19548a..ad8ff9f546 100644 --- a/limit/store.go +++ b/limit/store.go @@ -32,7 +32,7 @@ func NewStore(ctx context.Context, db *sql.DB) (*Store, error) { on conflict (id) do update set max = $2 `), - resetAll: p.P(`truncate config_limits`), + resetAll: p.P(`delete from config_limits`), }, p.Err } From e6e25ccff336a4c9d209f6b6ee6ed88c5123fd60 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 16 Mar 2022 10:16:37 -0500 Subject: [PATCH 009/225] describe values --- lock/global.go | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/lock/global.go b/lock/global.go index a1c431af72..0cc046b9e7 100644 --- a/lock/global.go +++ b/lock/global.go @@ -2,10 +2,32 @@ package lock // Defined global lock values. const ( - GlobalMigrate = uint32(0x1337) // 4919 - GlobalEngineProcessing = uint32(0x1234) // 4660 - GlobalMessageSending = uint32(0x1330) // 4912 + // Ensures only a single instance is performing migrations at a time. + GlobalMigrate = uint32(0x1337) // 4919 + + // Currently unused. + GlobalEngineProcessing = uint32(0x1234) // 4660 + + // Ensures only a single instance is sending messages, + // this includes out-of-transaction processes. + GlobalMessageSending = uint32(0x1330) // 4912 + + // Currently unused. RegionalEngineProcessing = uint32(0x1342) // 4930 - ModularEngineProcessing = uint32(0x1347) // 4935 - GlobalSwitchOver = uint32(0x1111) // 4369 + + // Currently unused. + ModularEngineProcessing = uint32(0x1347) // 4935 + + // A shared lock is grabbed by the application, and exclusive + // lock during the final sync as a stop-the-world lock for the + // atomic DB switch. + GlobalSwitchOver = uint32(0x1111) // 4369 + + // Used exclusively by engine instances to elect a leader. + // + // Only the instance and connection with this lock is allowed + // to perform trigger updates and syncronization. + // + // It must be acquired before the global switchover lock. + GlobalSwitchOverExec = uint32(0x1112) // 4370 ) From e0c2c39fe241175e6f7e44b82b7c1d5f177bd235 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 21 Mar 2022 15:37:59 -0500 Subject: [PATCH 010/225] state handling --- app/app.go | 5 - ....sql => 20220316112851-switchover-mk2.sql} | 0 swo/README.md | 26 ++ swo/changelog.go | 65 +++++ swo/changelog_table.sql | 9 + swo/changelog_trigger.sql | 44 ++++ swo/drvconn.go | 121 --------- swo/drvconnector.go | 21 +- swo/execute.go | 57 +++++ swo/initialsync.go | 147 +++++++++++ swo/manager.go | 171 +++++++------ swo/preflightlocks.go | 98 ++++++++ swo/reset.go | 119 +++++++++ swo/scantables.go | 103 ++++++++ swo/scantables_column_list.sql | 11 + swo/scantables_fkey_refs.sql | 9 + swo/sessionstats.go | 96 ++++++++ swo/state.go | 231 ++++++++++++++++++ swo/swomsg/log.go | 23 +- swo/swomsg/messages.go | 68 ++++-- swo/table.go | 110 +++++++++ 21 files changed, 1283 insertions(+), 251 deletions(-) rename migrate/migrations/{20220225095051-switchover-mk2.sql => 20220316112851-switchover-mk2.sql} (100%) create mode 100644 swo/README.md create mode 100644 swo/changelog.go create mode 100644 swo/changelog_table.sql create mode 100644 swo/changelog_trigger.sql delete mode 100644 swo/drvconn.go create mode 100644 swo/execute.go create mode 100644 swo/initialsync.go create mode 100644 swo/preflightlocks.go create mode 100644 swo/reset.go create mode 100644 swo/scantables.go create mode 100644 swo/scantables_column_list.sql create mode 100644 swo/scantables_fkey_refs.sql create mode 100644 swo/sessionstats.go create mode 100644 swo/state.go create mode 100644 swo/table.go diff --git a/app/app.go b/app/app.go index 3945845fbd..1010a982f8 100644 --- a/app/app.go +++ b/app/app.go @@ -182,11 +182,6 @@ func NewApp(c Config, db *sql.DB) (*App, error) { if c.SWO != nil { c.SWO.SetPauseResumer(app) log.Logf(app.LogBackgroundContext(), "SWO Enabled.") - go func() { - for range time.NewTicker(3 * time.Second).C { - log.Logf(app.LogBackgroundContext(), "SWO: %v", c.SWO.Stats()) - } - }() } gCfg := &gorm.Config{ diff --git a/migrate/migrations/20220225095051-switchover-mk2.sql b/migrate/migrations/20220316112851-switchover-mk2.sql similarity index 100% rename from migrate/migrations/20220225095051-switchover-mk2.sql rename to migrate/migrations/20220316112851-switchover-mk2.sql diff --git a/swo/README.md b/swo/README.md new file mode 100644 index 0000000000..0f22344f3f --- /dev/null +++ b/swo/README.md @@ -0,0 +1,26 @@ +# Switchover (SWO) + +Switchover (SWO) is a feature that allows a live system to safely switch from one database to another. + +## Theory of Operation + +During SWO, 2 DB url's are involved. "old" and "new". + +- All app-related DB connections acquire a shared advisory lock `GlobalSwitchOver` to the "old" DB, followed by checking switchover state is not `use_next_db`. These locks are at the session level and persist as long as the connections remain in the pool. +- If it is `use_next_db`, SWO is complete, the connection is closed, and future connections are made to the "new" DB without the lock. +- Once initiated, the first engine instance to acquire the `GlobalSwitchOverExec` lock (separate from `GlobalSwitchOver`) will begin the switch. +- When the switch is started, a `change_log` table is created and populated by triggers added to existing tables for INSERT/UPDATE/DELETE operations. +- An initial sync is performed effectively copying a snapshot of all data from the "old" DB to the "new" DB. +- Subsequent syncs are performed by applying records from the `change_log` table to the "new" DB. +- After each sync, the synced rows are deleted from the `change_log` table, so that it always represents the diff between both DBs. +- This is repeated until the `change_log` table has less than 100 rows at the start of a sync. +- Once the DBs are relatively similar, SWO goes into "critical phase". +- In this phase, idle connections are disabled until a shared deadline (meaning each query requires the shared lock, as connections are not re-used). +- When the final sync begins, an exclusive `GlobalSwitchOver` lock is acquired, and behaves as a stop-the-world lock. +- After the final sync, sequences are also copied from the "old" DB to the "new" DB. +- Finally, the `current_state` column is updated to `use_next_db`, and the `GlobalSwitchOver` lock is released. + +If deadlines are reached, or any error is encountered, the connection for the switchover is dropped, and syncing resumes. If a commit to the new DB succeeds, but fails on the old DB, an error state is entered. + +From an error state, only RESET can be performed, which wipes the "new" DB and recreates `change_log` and all triggers to begin again with another attempt. + diff --git a/swo/changelog.go b/swo/changelog.go new file mode 100644 index 0000000000..da86854a6f --- /dev/null +++ b/swo/changelog.go @@ -0,0 +1,65 @@ +package swo + +import ( + "context" + _ "embed" + "fmt" + + "github.com/jackc/pgx/v4" +) + +var ( + //go:embed changelog_table.sql + changelogTable string + + //go:embed changelog_trigger.sql + changelogTrigger string +) + +func EnableChangeLog(ctx context.Context, conn *pgx.Conn) error { + err := SwitchOverExecLock(ctx, conn) + if err != nil { + return fmt.Errorf("acquire lock: %w", err) + } + defer UnlockConn(ctx, conn) + + tables, err := ScanTables(ctx, conn) + if err != nil { + return fmt.Errorf("scan tables: %w", err) + } + + _, err = conn.Exec(ctx, changelogTable) + if err != nil { + return fmt.Errorf("create change_log table: %w", err) + } + _, err = conn.Exec(ctx, changelogTrigger) + if err != nil { + return fmt.Errorf("create change_log AFTER trigger: %w", err) + } + _, err = conn.Exec(ctx, `insert into change_log(id,table_name,op,row_id) values(0,'','INIT',0)`) + if err != nil { + return fmt.Errorf("create change_log INIT row: %w", err) + } + + // create triggers + for _, table := range tables { + if table.SkipSync() { + continue + } + + _, err = conn.Exec(ctx, fmt.Sprintf(` + CREATE TRIGGER %s AFTER INSERT OR UPDATE OR DELETE ON %s + FOR EACH ROW EXECUTE PROCEDURE fn_process_change_log() + `, table.QuotedChangeTriggerName(), table.QuotedName())) + if err != nil { + return fmt.Errorf("create trigger %s: %w", table.QuotedChangeTriggerName(), err) + } + } + + _, err = conn.Exec(ctx, "update switchover_state set current_state = 'in_progress' where current_state = 'idle'") + if err != nil { + return fmt.Errorf("update switchover_state to in_progress: %w", err) + } + + return nil +} diff --git a/swo/changelog_table.sql b/swo/changelog_table.sql new file mode 100644 index 0000000000..d06819f943 --- /dev/null +++ b/swo/changelog_table.sql @@ -0,0 +1,9 @@ +CREATE TABLE change_log ( + id BIGSERIAL PRIMARY KEY, + op TEXT NOT NULL, + table_name TEXT NOT NULL, + row_id TEXT NOT NULL, + tx_id BIGINT, + cmd_id cid, + row_data JSONB +) diff --git a/swo/changelog_trigger.sql b/swo/changelog_trigger.sql new file mode 100644 index 0000000000..b9a617b24f --- /dev/null +++ b/swo/changelog_trigger.sql @@ -0,0 +1,44 @@ +CREATE +OR REPLACE FUNCTION fn_process_change_log() RETURNS TRIGGER AS $$ +DECLARE cur_state enum_switchover_state := 'idle'; + +BEGIN +SELECT INTO cur_state current_state +FROM switchover_state; + +IF cur_state != 'in_progress' THEN RETURN NEW; + +END IF; + +IF (TG_OP = 'DELETE') THEN +INSERT INTO change_log (op, table_name, row_id, tx_id, cmd_id) +VALUES ( + TG_OP, + TG_TABLE_NAME, + cast(OLD .id AS TEXT), + txid_current(), + OLD .cmax + ); + +RETURN OLD; + +ELSE +INSERT INTO change_log (op, table_name, row_id, tx_id, cmd_id, row_data) +VALUES ( + TG_OP, + TG_TABLE_NAME, + cast(NEW .id AS TEXT), + txid_current(), + NEW .cmin, + to_jsonb(NEW) + ); + +RETURN NEW; + +END IF; + +RETURN NULL; + +END; + +$$ LANGUAGE 'plpgsql' diff --git a/swo/drvconn.go b/swo/drvconn.go deleted file mode 100644 index 07abe2b5b3..0000000000 --- a/swo/drvconn.go +++ /dev/null @@ -1,121 +0,0 @@ -package swo - -import ( - "context" - "database/sql/driver" - "fmt" - "time" -) - -type Conn struct { - DBConn - - n *Notifier - locked bool -} - -var ErrDone = fmt.Errorf("switchover is already done") - -type DBConn interface { - driver.Conn - driver.Pinger - driver.ExecerContext - driver.QueryerContext - driver.ConnPrepareContext - driver.ConnBeginTx - driver.NamedValueChecker -} - -var ( - _ driver.SessionResetter = (*Conn)(nil) - _ driver.Validator = (*Conn)(nil) -) - -func (c *Conn) lock(ctx context.Context) error { - if c.n.IsDone() { - return driver.ErrBadConn - } - if c.locked { - return nil - } - - _, err := c.ExecContext(ctx, "select pg_advisory_lock_shared(4369)", nil) - if err != nil { - return err - } - c.locked = true - - rows, err := c.QueryContext(ctx, "select current_state from switchover_state", nil) - if err != nil { - return err - } - - scan := make([]driver.Value, 1) - err = rows.Next(scan) - if err != nil { - return err - } - - var state string - switch t := scan[0].(type) { - case string: - state = t - case []byte: - state = string(t) - default: - return fmt.Errorf("expected string for current_state value, got %t", t) - } - err = rows.Close() - if err != nil { - return err - } - - if state == "use_next_db" { - c.n.Done() - return driver.ErrBadConn - } - - return nil -} - -func (c *Conn) unlock(ctx context.Context) error { - if !c.locked { - return nil - } - - _, err := c.ExecContext(ctx, "select pg_advisory_unlock_shared(4369)", nil) - if err != nil { - return err - } - - c.locked = false - - return nil -} - -func (c *Conn) ResetSession(ctx context.Context) error { - err := c.lock(ctx) - if err != nil { - return err - } - - if s, ok := c.DBConn.(driver.SessionResetter); ok { - return s.ResetSession(ctx) - } - - return nil -} - -func (c *Conn) IsValid() bool { - if c.n.IsDone() { - return false - } - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) - defer cancel() - - if err := c.unlock(ctx); err != nil { - return false - } - - return true -} diff --git a/swo/drvconnector.go b/swo/drvconnector.go index cee8541e68..4c24dcdd1b 100644 --- a/swo/drvconnector.go +++ b/swo/drvconnector.go @@ -10,7 +10,8 @@ import ( type Connector struct { dbcOld, dbcNew driver.Connector - n *Notifier + n *Notifier + sm *StatsManager } type Notifier struct { @@ -35,11 +36,12 @@ func (n *Notifier) IsDone() bool { var _ driver.Connector = (*Connector)(nil) -func NewConnector(dbcOld, dbcNew driver.Connector) *Connector { +func NewConnector(dbcOld, dbcNew driver.Connector, sm *StatsManager) *Connector { return &Connector{ dbcOld: dbcOld, dbcNew: dbcNew, n: NewNotifier(), + sm: sm, } } @@ -55,20 +57,15 @@ func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { return nil, err } - drvConn := &Conn{ - DBConn: conn.(DBConn), - n: drv.n, + err = sessionLock(ctx, conn) + if errors.Is(err, errDone) { + drv.n.Done() + return drv.dbcNew.Connect(ctx) } - - err = drvConn.lock(ctx) if err != nil { conn.Close() - - if errors.Is(err, driver.ErrBadConn) { - return drv.dbcNew.Connect(ctx) - } return nil, err } - return drvConn, nil + return conn, nil } diff --git a/swo/execute.go b/swo/execute.go new file mode 100644 index 0000000000..18652ff226 --- /dev/null +++ b/swo/execute.go @@ -0,0 +1,57 @@ +package swo + +import ( + "context" + "fmt" + + "github.com/google/uuid" + "github.com/jackc/pgx/v4" +) + +func (m *Manager) SendProposal() (uuid.UUID, error) { + return uuid.Nil, nil +} + +func (m *Manager) ProposalIsValid() (bool, error) { + return false, nil +} + +func (m *Manager) Execute(ctx context.Context, oldConn, newConn *pgx.Conn) error { + /* + - initial sync + - loop until few changes + - send proposal + - loop until proposal is valid + - execute proposal + + */ + + return m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { + err := EnableChangeLog(ctx, oldConn) + if err != nil { + return fmt.Errorf("enable change log: %w", err) + } + + err = m.InitialSync(ctx, oldConn, newConn) + if err != nil { + return fmt.Errorf("initial sync: %w", err) + } + + // sync in a loop until DB is up-to-date + // err = m.LoopSync(ctx, oldConn, newConn) + + return nil + }) +} + +func LoopSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { + return nil +} + +func FinalSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { + return nil +} + +func syncChanges(ctx context.Context, oldConn, newConn pgx.Tx) (int, error) { + return 0, nil +} diff --git a/swo/initialsync.go b/swo/initialsync.go new file mode 100644 index 0000000000..4572551cff --- /dev/null +++ b/swo/initialsync.go @@ -0,0 +1,147 @@ +package swo + +import ( + "bytes" + "context" + "fmt" + "io" + "sync" + "time" + + "github.com/jackc/pgx/v4" +) + +func (m *Manager) Progressf(ctx context.Context, format string, a ...interface{}) { +} + +func (m *Manager) InitialSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { + m.Progressf(ctx, "scanning tables") + tables, err := ScanTables(ctx, oldConn) + if err != nil { + return fmt.Errorf("scan tables: %w", err) + } + + srcTx, err := oldConn.BeginTx(ctx, pgx.TxOptions{ + AccessMode: pgx.ReadOnly, + IsoLevel: pgx.Serializable, + }) + if err != nil { + return fmt.Errorf("begin src tx: %w", err) + } + defer srcTx.Rollback(ctx) + + dstTx, err := newConn.BeginTx(ctx, pgx.TxOptions{}) + if err != nil { + return fmt.Errorf("begin dst tx: %w", err) + } + defer dstTx.Rollback(ctx) + + for _, table := range tables { + if table.SkipSync() { + continue + } + + err = m.SyncTableInit(ctx, table, srcTx, dstTx) + if err != nil { + return fmt.Errorf("sync table %s: %w", table.Name, err) + } + } + + m.Progressf(ctx, "commit initial sync") + // Important to validate src commit, even though it's read-only. + // + // A failure here indicates the isolation level has been violated + // and we will need to try again. + err = srcTx.Commit(ctx) + if err != nil { + return fmt.Errorf("commit src tx: %w", err) + } + + err = dstTx.Commit(ctx) + if err != nil { + return fmt.Errorf("commit dst tx: %w", err) + } + + return nil +} + +type lineCount struct { + n int + mx sync.Mutex +} + +func (lc *lineCount) Write(p []byte) (n int, err error) { + lc.mx.Lock() + lc.n += bytes.Count(p, []byte("\n")) + lc.mx.Unlock() + return len(p), nil +} + +func (lc *lineCount) Lines() int { + lc.mx.Lock() + defer lc.mx.Unlock() + return lc.n +} + +func (m *Manager) SyncTableInit(ctx context.Context, t Table, srcTx, dstTx pgx.Tx) error { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + var rowCount int + err := srcTx.QueryRow(ctx, fmt.Sprintf("select count(*) from %s", t.QuotedName())).Scan(&rowCount) + if err != nil { + return fmt.Errorf("count rows: %w", err) + } + + pr, pw := io.Pipe() + var lc lineCount + errCh := make(chan error, 3) + go func() { + prog := time.NewTimer(2 * time.Second) + defer prog.Stop() + for { + select { + case <-ctx.Done(): + errCh <- ctx.Err() + pw.CloseWithError(ctx.Err()) + pr.CloseWithError(ctx.Err()) + return + case <-prog.C: + } + + m.Progressf(ctx, "syncing table %s (%d/%d)", t.Name, lc.Lines(), rowCount) + } + }() + go func() { + defer cancel() + _, err := srcTx.Conn().PgConn().CopyTo(ctx, pw, fmt.Sprintf(`copy %s to stdout`, t.QuotedName())) + if err != nil { + errCh <- fmt.Errorf("read from src: %w", err) + pw.CloseWithError(err) + pr.CloseWithError(err) + } else { + errCh <- nil + } + }() + go func() { + defer cancel() + _, err := dstTx.Conn().PgConn().CopyFrom(ctx, io.TeeReader(pr, &lc), fmt.Sprintf(`copy %s from stdin`, t.QuotedName())) + if err != nil { + errCh <- fmt.Errorf("write to dst: %w", err) + pw.CloseWithError(err) + pr.CloseWithError(err) + } else { + errCh <- nil + } + }() + + // check first error, but wait for all to finish + err = <-errCh + <-errCh + <-errCh + if err != nil { + return err + } + + return nil +} diff --git a/swo/manager.go b/swo/manager.go index d5fd50add6..f7101f0b54 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -7,8 +7,10 @@ import ( "fmt" "github.com/google/uuid" + "github.com/jackc/pgx/v4" + "github.com/jackc/pgx/v4/stdlib" + "github.com/target/goalert/app/lifecycle" "github.com/target/goalert/swo/swomsg" - "github.com/target/goalert/util/log" "gorm.io/driver/postgres" "gorm.io/gorm" ) @@ -16,11 +18,16 @@ import ( type Manager struct { id uuid.UUID - dbOld, dbNew *gorm.DB - protectedDB *sql.DB + dbOld, dbNew *sql.DB + + protectedDB *sql.DB s Syncer + app lifecycle.PauseResumer + + stats *StatsManager + msgLog *swomsg.Log nextMsgLog *swomsg.Log @@ -28,8 +35,7 @@ type Manager struct { nextMsgCh chan *swomsg.Message errCh chan error - nodes map[uuid.UUID]*Node - exec map[uuid.UUID]*swomsg.Message + msgState *state cancel func() @@ -41,47 +47,61 @@ type Node struct { OldValid bool NewValid bool + CanExec bool + + Status string +} + +type Config struct { + OldDBC, NewDBC driver.Connector + CanExec bool } -func NewManager(dbcOld, dbcNew driver.Connector, canExec bool) (*Manager, error) { +func NewManager(cfg Config) (*Manager, error) { gCfg := &gorm.Config{PrepareStmt: true} - gormOld, err := gorm.Open(postgres.New(postgres.Config{Conn: sql.OpenDB(dbcOld)}), gCfg) + gormOld, err := gorm.Open(postgres.New(postgres.Config{Conn: sql.OpenDB(cfg.OldDBC)}), gCfg) if err != nil { - return nil, err + return nil, fmt.Errorf("open old database: %w", err) } - gormNew, err := gorm.Open(postgres.New(postgres.Config{Conn: sql.OpenDB(dbcNew)}), gCfg) + gormNew, err := gorm.Open(postgres.New(postgres.Config{Conn: sql.OpenDB(cfg.NewDBC)}), gCfg) if err != nil { - return nil, err + return nil, fmt.Errorf("open new database: %w", err) } id := uuid.New() msgLog, err := swomsg.NewLog(gormOld, id) if err != nil { - return nil, err + return nil, fmt.Errorf("create old message log: %w", err) } msgLogNext, err := swomsg.NewLog(gormNew, id) if err != nil { - return nil, err + return nil, fmt.Errorf("create new message log: %w", err) } + sm := NewStatsManager() ctx, cancel := context.WithCancel(context.Background()) m := &Manager{ - dbOld: gormOld, - dbNew: gormNew, + dbOld: sql.OpenDB(cfg.OldDBC), + dbNew: sql.OpenDB(cfg.NewDBC), - protectedDB: sql.OpenDB(NewConnector(dbcOld, dbcNew)), + protectedDB: sql.OpenDB(NewConnector(cfg.OldDBC, cfg.NewDBC, sm)), id: id, msgLog: msgLog, nextMsgLog: msgLogNext, - canExec: canExec, + canExec: cfg.CanExec, msgCh: make(chan *swomsg.Message), nextMsgCh: make(chan *swomsg.Message), errCh: make(chan error, 10), cancel: cancel, - nodes: make(map[uuid.UUID]*Node), - exec: make(map[uuid.UUID]*swomsg.Message), + + stats: sm, + } + + m.msgState, err = newState(ctx, m) + if err != nil { + return nil, fmt.Errorf("create state: %w", err) } go func() { @@ -91,7 +111,11 @@ func NewManager(dbcOld, dbcNew driver.Connector, canExec bool) (*Manager, error) m.errCh <- fmt.Errorf("read from log: %w", err) return } - m.msgCh <- msg + err = m.msgState.processFromOld(ctx, msg) + if err != nil { + m.errCh <- fmt.Errorf("process from old db log: %w", err) + return + } } }() go func() { @@ -100,81 +124,66 @@ func NewManager(dbcOld, dbcNew driver.Connector, canExec bool) (*Manager, error) m.errCh <- fmt.Errorf("read from next log: %w", err) return } - m.nextMsgCh <- msg + err = m.msgState.processFromNew(ctx, msg) + if err != nil { + m.errCh <- fmt.Errorf("process from new db log: %w", err) + return + } }() - go m.loop(ctx) - return m, nil } -func (m *Manager) DB() *sql.DB { return m.protectedDB } +func (m *Manager) SetPauseResumer(app lifecycle.PauseResumer) { m.app = app } -func (m *Manager) processMessage(ctx context.Context, msg *swomsg.Message) { - appendLog := func(msg interface{}) { - err := m.msgLog.Append(ctx, msg) - if err != nil { - log.Log(ctx, err) - } - } +// withConnFromOld allows performing operations with a raw connection to the old database. +func (m *Manager) withConnFromOld(ctx context.Context, f func(context.Context, *pgx.Conn) error) error { + return WithLockedConn(ctx, m.dbOld, f) +} - switch { - case msg.Ping != nil: - appendLog(swomsg.Pong{IsNextDB: false}) - err := m.nextMsgLog.Append(ctx, swomsg.Pong{IsNextDB: true}) - if err != nil { - log.Log(ctx, err) - } - case msg.Reset != nil: - m.nodes = make(map[uuid.UUID]*Node) - m.exec = make(map[uuid.UUID]*swomsg.Message) - m.id = uuid.New() - } +// withConnFromNew allows performing operations with a raw connection to the new database. +func (m *Manager) withConnFromNew(ctx context.Context, f func(context.Context, *pgx.Conn) error) error { + return WithLockedConn(ctx, m.dbNew, f) +} - if !m.canExec { - // api-only node, don't process execute commands - return +// withConnFromBoth allows performing operations with a raw connection to both databases database. +func (m *Manager) withConnFromBoth(ctx context.Context, f func(ctx context.Context, oldConn, newConn *pgx.Conn) error) error { + // grab lock with old DB first + return WithLockedConn(ctx, m.dbOld, func(ctx context.Context, oldConn *pgx.Conn) error { + return WithLockedConn(ctx, m.dbNew, func(ctx context.Context, newConn *pgx.Conn) error { + return f(ctx, oldConn, newConn) + }) + }) +} + +func WithLockedConn(ctx context.Context, db *sql.DB, runFunc func(context.Context, *pgx.Conn) error) error { + conn, err := db.Conn(ctx) + if err != nil { + return err } + defer conn.Close() - // any execute command needs to be claimed - switch { - case msg.Execute != nil: - m.exec[msg.ID] = msg - appendLog(swomsg.Claim{MsgID: msg.ID}) - case msg.Reset != nil: - m.exec[msg.ID] = msg - appendLog(swomsg.Claim{MsgID: msg.ID}) - case msg.Claim != nil: - execMsg := m.exec[msg.Claim.MsgID] - delete(m.exec, msg.Claim.MsgID) - if msg.NodeID != m.id { - // claimed by another node - return + return conn.Raw(func(driverConn interface{}) error { + conn := driverConn.(*stdlib.Conn).Conn() + err := SwitchOverExecLock(ctx, conn) + if err != nil { + return err } - m.execute(execMsg) - } + return runFunc(ctx, conn) + }) } -func (m *Manager) execute(msg *swomsg.Message) { - switch { - } -} +func (m *Manager) Status() *Status { return m.msgState.Status() } +func (m *Manager) DB() *sql.DB { return m.protectedDB } -func (m *Manager) loop(ctx context.Context) { - for { - select { - case <-ctx.Done(): - return - case msg := <-m.msgCh: - m.processMessage(ctx, msg) - case msg := <-m.nextMsgCh: - if msg.Pong != nil && msg.Pong.IsNextDB { - m.nodes[msg.NodeID].NewValid = true - } - case err := <-m.errCh: - log.Log(ctx, err) - m.msgLog.Append(ctx, swomsg.Error{Details: err.Error()}) - } - } +type Status struct { + Details string + Nodes []Node + + // IsDone is true if the switch has already been completed. + IsDone bool + + // IsIdle must be true before executing a switch-over. + IsIdle bool } diff --git a/swo/preflightlocks.go b/swo/preflightlocks.go new file mode 100644 index 0000000000..16231acb13 --- /dev/null +++ b/swo/preflightlocks.go @@ -0,0 +1,98 @@ +package swo + +import ( + "context" + "database/sql/driver" + "errors" + "fmt" + + "github.com/jackc/pgx/v4" + "github.com/target/goalert/lock" +) + +var ErrNoLock = errors.New("no lock") + +// SwitchOverExecLock will attempt to grab the GlobalSwitchOverExec lock. +// +// After acquiring the lock, it will ensure the switchover has not yet been +// completed. +// +// This lock should be acquired by an engine instance that is going to perform +// the sync & switchover. +func SwitchOverExecLock(ctx context.Context, conn *pgx.Conn) error { + var gotLock bool + err := conn.QueryRow(ctx, ` + select pg_try_advisory_lock($1) + from switchover_state + where current_state != 'use_next_db' + `, lock.GlobalSwitchOverExec).Scan(&gotLock) + if err != nil { + return err + } + + if !gotLock { + return ErrNoLock + } + + return nil +} + +// UnlockConn will release all session locks or close the connection. +func UnlockConn(ctx context.Context, conn *pgx.Conn) { + _, err := conn.Exec(ctx, `select pg_advisory_unlock_all()`) + if err != nil { + conn.Close(ctx) + } +} + +var errDone = errors.New("done") + +// sessionLock will get a shared advisory lock for the connection. +func sessionLock(ctx context.Context, conn driver.Conn) error { + type execQuery interface { + driver.ExecerContext + driver.QueryerContext + } + + c := conn.(execQuery) + + // Using literal here so we can avoid a prepared statement round trip. + // + // This will run for every new connection in SWO mode and for every + // query while idle connections are disabled during critical phase. + _, err := c.ExecContext(ctx, fmt.Sprintf("select pg_advisory_lock_shared(%d)", lock.GlobalSwitchOver), nil) + if err != nil { + return fmt.Errorf("get SWO shared session lock: %w", err) + } + + rows, err := c.QueryContext(ctx, "select current_state from switchover_state", nil) + if err != nil { + return fmt.Errorf("get current SWO state: %w", err) + } + + scan := make([]driver.Value, 1) + err = rows.Next(scan) + if err != nil { + return err + } + + var state string + switch t := scan[0].(type) { + case string: + state = t + case []byte: + state = string(t) + default: + return fmt.Errorf("get current SWO state: expected string for current_state value, got %t", t) + } + err = rows.Close() + if err != nil { + return err + } + + if state == "use_next_db" { + return errDone + } + + return nil +} diff --git a/swo/reset.go b/swo/reset.go new file mode 100644 index 0000000000..f3a0db4daf --- /dev/null +++ b/swo/reset.go @@ -0,0 +1,119 @@ +package swo + +import ( + "context" + "fmt" + + "github.com/jackc/pgx/v4" +) + +func (m *Manager) DoReset(ctx context.Context) error { + err := m.withConnFromOld(ctx, ResetOldDB) + if err != nil { + return fmt.Errorf("reset old db: %w", err) + } + + err = m.withConnFromNew(ctx, ResetNewDB) + if err != nil { + return fmt.Errorf("reset new db: %w", err) + } + + return nil +} + +// ResetNewDB will reset the new database to a clean state. +func ResetNewDB(ctx context.Context, conn *pgx.Conn) error { + err := SwitchOverExecLock(ctx, conn) + if err != nil { + return fmt.Errorf("failed to acquire lock: %w", err) + } + defer UnlockConn(ctx, conn) + + _, err = conn.Exec(ctx, "update switchover_state set current_state = 'idle' where current_state = 'in_progress'") + if err != nil { + return fmt.Errorf("set state to idle: %w", err) + } + + tables, err := ScanTables(ctx, conn) + if err != nil { + return fmt.Errorf("scan tables: %w", err) + } + + // truncate sync tables + for _, table := range tables { + if table.SkipSync() { + continue + } + + _, err = conn.Exec(ctx, fmt.Sprintf("truncate %s", table.QuotedName())) + if err != nil { + return fmt.Errorf("truncate %s: %w", table.QuotedName(), err) + } + } + + // drop the change_log table + _, err = conn.Exec(ctx, "drop table if exists change_log") + if err != nil { + return fmt.Errorf("drop change_log: %w", err) + } + + return nil +} + +// ResetOldDB will reset the old database to a clean state. +// +// It will remove all change triggers and cleanup switchover data. +func ResetOldDB(ctx context.Context, conn *pgx.Conn) error { + err := SwitchOverExecLock(ctx, conn) + if err != nil { + return fmt.Errorf("acquire lock: %w", err) + } + defer UnlockConn(ctx, conn) + + _, err = conn.Exec(ctx, "update switchover_state set current_state = 'idle' where current_state = 'in_progress'") + if err != nil { + return fmt.Errorf("set state to idle: %w", err) + } + + tables, err := ScanTables(ctx, conn) + if err != nil { + return fmt.Errorf("scan tables: %w", err) + } + + // drop change triggers + for _, table := range tables { + if table.SkipSync() { + continue + } + + _, err = conn.Exec(ctx, fmt.Sprintf("drop trigger if exists %s on %s", table.QuotedChangeTriggerName(), table.QuotedName())) + if err != nil { + return fmt.Errorf("drop trigger %s: %w", table.QuotedChangeTriggerName(), err) + } + + _, err = conn.Exec(ctx, fmt.Sprintf("drop trigger if exists %s on %s", table.QuotedLockTriggerName(), table.QuotedName())) + if err != nil { + return fmt.Errorf("drop trigger %s: %w", table.QuotedChangeTriggerName(), err) + } + + } + + // TODO: ensure no deps get missed + _, err = conn.Exec(ctx, "DROP FUNCTION IF EXISTS fn_switchover_change_log_lock()") + if err != nil { + return fmt.Errorf("drop fn_switchover_change_log_lock: %w", err) + } + + _, err = conn.Exec(ctx, "DROP FUNCTION IF EXISTS fn_process_change_log()") + if err != nil { + return fmt.Errorf("drop fn_process_change_log: %w", err) + } + + // drop the change_log table + _, err = conn.Exec(ctx, "drop table if exists change_log") + if err != nil { + return fmt.Errorf("drop change_log: %w", err) + } + + return nil +} diff --git a/swo/scantables.go b/swo/scantables.go new file mode 100644 index 0000000000..847a2de9d0 --- /dev/null +++ b/swo/scantables.go @@ -0,0 +1,103 @@ +package swo + +import ( + "context" + _ "embed" + "fmt" + "sort" + + "github.com/jackc/pgx/v4" +) + +type Column struct { + Name string + Type string + Ord int +} + +var ( + //go:embed scantables_column_list.sql + columnListQuery string + + //go:embed scantables_fkey_refs.sql + fkeyRefsQuery string +) + +// ScanTables scans the database for tables, their columns, and dependencies. +func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { + var cRow struct { + TableName string + Column + } + + tables := make(map[string]*Table) + _, err := conn.QueryFunc(ctx, columnListQuery, nil, + []interface{}{&cRow.TableName, &cRow.Column.Name, &cRow.Column.Type, &cRow.Column.Ord}, + func(pgx.QueryFuncRow) error { + if tables[cRow.TableName] == nil { + tables[cRow.TableName] = &Table{Name: cRow.TableName, deps: make(map[string]*Table)} + } + tables[cRow.TableName].Columns = append(tables[cRow.TableName].Columns, cRow.Column) + if cRow.Column.Name == "id" { + tables[cRow.TableName].IDCol = cRow.Column + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("scanning table columns: %w", err) + } + + var fRow struct { + SrcName string + DstName string + } + _, err = conn.QueryFunc(ctx, fkeyRefsQuery, nil, []interface{}{&fRow.SrcName, &fRow.DstName}, + func(pgx.QueryFuncRow) error { + tables[fRow.SrcName].deps[fRow.DstName] = tables[fRow.DstName] + + return nil + }) + if err != nil { + return nil, err + } + + // resolve/flatten dependencies + var tableList []Table + for _, t := range tables { + tableList = append(tableList, *t) + for t.flattenDeps() > 0 { + } + + if _, ok := t.deps[t.Name]; ok { + return nil, fmt.Errorf("circular non-deferrable dependency detected: %s", t.Name) + } + } + + // sort columns by ordinal + for _, t := range tableList { + sort.Slice(t.Columns, func(i, j int) bool { + return t.Columns[i].Ord < t.Columns[j].Ord + }) + } + + sort.Slice(tableList, func(i, j int) bool { + if tableList[i].DependsOn(tableList[j].Name) { + return false + } + if tableList[j].DependsOn(tableList[i].Name) { + return true + } + + return tableList[i].Name < tableList[j].Name + }) + + return tableList, nil +} + +func (c Column) IsInteger() bool { + switch c.Type { + case "integer", "bigint": + return true + } + return false +} diff --git a/swo/scantables_column_list.sql b/swo/scantables_column_list.sql new file mode 100644 index 0000000000..e5cfa8fa21 --- /dev/null +++ b/swo/scantables_column_list.sql @@ -0,0 +1,11 @@ +SELECT col.table_name, + col.column_name, + col.data_type, + col.ordinal_position +FROM information_schema.columns col + JOIN information_schema.tables t ON t.table_catalog = col.table_catalog + AND t.table_schema = col.table_schema + AND t.table_name = col.table_name + AND t.table_type = 'BASE TABLE' +WHERE col.table_catalog = current_database() + AND col.table_schema = 'public' diff --git a/swo/scantables_fkey_refs.sql b/swo/scantables_fkey_refs.sql new file mode 100644 index 0000000000..4bb0bef65b --- /dev/null +++ b/swo/scantables_fkey_refs.sql @@ -0,0 +1,9 @@ +SELECT src.relname, + dst.relname +FROM pg_catalog.pg_constraint con + JOIN pg_namespace ns ON ns.nspname = 'public' + AND ns.oid = con.connamespace + JOIN pg_class src ON src.oid = con.conrelid + JOIN pg_class dst ON dst.oid = con.confrelid +WHERE con.contype = 'f' + AND NOT con.condeferrable diff --git a/swo/sessionstats.go b/swo/sessionstats.go new file mode 100644 index 0000000000..c0b4ecceb0 --- /dev/null +++ b/swo/sessionstats.go @@ -0,0 +1,96 @@ +package swo + +import "time" + +type StatsManager struct { + sessCh chan sessionRecord + statsCh chan Stats +} + +type Stats struct { + Last1Min TimeframeStats + Last5Min TimeframeStats + Last15Min TimeframeStats +} + +type TimeframeStats struct { + Count int + AvgTime time.Duration + MaxTime time.Duration +} + +type sessionRecord struct { + Dur time.Duration + End time.Time +} + +func NewStatsManager() *StatsManager { + sm := &StatsManager{ + sessCh: make(chan sessionRecord, 100), + statsCh: make(chan Stats), + } + go sm.loop() + + return sm +} + +func (sm *StatsManager) Start() (stop func()) { + start := time.Now() + return func() { + end := time.Now() + sm.sessCh <- sessionRecord{end.Sub(start), end} + } +} + +func (sm *StatsManager) Stats() Stats { return <-sm.statsCh } + +func (sm *StatsManager) loop() { + var sessRecs []sessionRecord + var stats Stats + t := time.NewTicker(time.Second) + defer t.Stop() + + for { + select { + case <-t.C: + sessRecs, stats = updateStats(sessRecs) + case sess := <-sm.sessCh: + sessRecs = append(sessRecs, sess) + case sm.statsCh <- stats: + } + } +} + +func statsForTime(sessRecs []sessionRecord, t time.Time) (s TimeframeStats) { + for _, sess := range sessRecs { + if !sess.End.After(t) { + continue + } + s.Count++ + s.AvgTime += sess.Dur + if sess.Dur > s.MaxTime { + s.MaxTime = sess.Dur + } + } + if s.Count == 0 { + return s + } + + s.AvgTime = s.AvgTime / time.Duration(s.Count) + + return s +} + +func updateStats(sessRecs []sessionRecord) ([]sessionRecord, Stats) { + n := time.Now() + + var s Stats + + s.Last15Min = statsForTime(sessRecs, n.Add(-15*time.Minute)) + sessRecs = sessRecs[len(sessRecs)-s.Last15Min.Count:] + + s.Last5Min = statsForTime(sessRecs, n.Add(-5*time.Minute)) + s.Last1Min = statsForTime(sessRecs[len(sessRecs)-s.Last5Min.Count:], n.Add(-time.Minute)) + + return sessRecs, s +} diff --git a/swo/state.go b/swo/state.go new file mode 100644 index 0000000000..f997937f51 --- /dev/null +++ b/swo/state.go @@ -0,0 +1,231 @@ +package swo + +import ( + "context" + "fmt" + "sync" + + "github.com/google/uuid" + "github.com/target/goalert/swo/swomsg" + "github.com/target/goalert/util/log" +) + +type state struct { + m *Manager + + stateName string + + status string + + nodes map[uuid.UUID]*Node + + taskID uuid.UUID + cancel func() + + stateFn StateFunc + + mx sync.Mutex +} + +func newState(ctx context.Context, m *Manager) (*state, error) { + s := &state{ + m: m, + nodes: make(map[uuid.UUID]*Node), + stateFn: StateIdle, + stateName: "idle", + cancel: func() {}, + } + + return s, s.hello(ctx) +} + +type StateFunc func(context.Context, *state, *swomsg.Message) StateFunc + +func (s *state) Status() *Status { + s.mx.Lock() + defer s.mx.Unlock() + + var nodes []Node + for _, n := range s.nodes { + nodes = append(nodes, *n) + } + + return &Status{ + Details: s.status, + Nodes: nodes, + + IsDone: s.stateName == "complete", + IsIdle: s.stateName == "idle", + } +} + +func (s *state) ackMessage(ctx context.Context, msgID uuid.UUID) { + err := s.m.msgLog.Append(ctx, swomsg.Ack{MsgID: msgID, Status: s.stateName}) + if err != nil { + log.Log(ctx, err) + } +} + +func (s *state) update(msg *swomsg.Message) { + s.mx.Lock() + defer s.mx.Unlock() + + n, ok := s.nodes[msg.NodeID] + if !ok { + n = &Node{ + ID: msg.NodeID, + } + s.nodes[msg.NodeID] = n + } + + switch { + case msg.Hello != nil: + n.OldValid = msg.Hello.IsOldDB + n.Status = msg.Hello.Status + case msg.Ack != nil: + n.Status = msg.Ack.Status + case msg.Progress != nil: + s.status = msg.Progress.Details + case msg.Error != nil: + s.status = "error: " + msg.Error.Details + case msg.Done != nil: + s.status = "" + } +} + +func (s *state) taskDone(ctx context.Context, err error) { + if err != nil { + err = s.m.msgLog.Append(ctx, swomsg.Error{MsgID: s.taskID, Details: err.Error()}) + } else { + err = s.m.msgLog.Append(ctx, swomsg.Done{MsgID: s.taskID}) + } + if err != nil { + log.Log(ctx, err) + } +} + +func (s *state) hello(ctx context.Context) error { + err := s.m.msgLog.Append(ctx, swomsg.Hello{IsOldDB: true, Status: s.stateName}) + if err != nil { + return err + } + err = s.m.nextMsgLog.Append(ctx, swomsg.Hello{IsOldDB: false, Status: s.stateName}) + if err != nil { + return err + } + return nil +} + +func (s *state) processFromNew(ctx context.Context, msg *swomsg.Message) error { + if msg.Hello == nil { + return fmt.Errorf("unexpected message to NEW DB: %v", msg) + } + + n, ok := s.nodes[msg.NodeID] + if !ok { + n = &Node{ + ID: msg.NodeID, + } + s.nodes[msg.NodeID] = n + } + n.NewValid = msg.Hello.IsOldDB == false + return nil +} + +func (s *state) processFromOld(ctx context.Context, msg *swomsg.Message) error { + s.update(msg) + + if msg.Reset != nil { + s.cancel() + s.nodes = make(map[uuid.UUID]*Node) + s.m.app.Resume(ctx) + s.taskID = msg.ID + s.stateName = "reset-wait" + s.stateFn = StateResetWait + s.status = "performing reset" + return s.hello(ctx) + } + + s.stateFn = s.stateFn(ctx, s, msg) + if msg.Ping != nil { + s.ackMessage(ctx, msg.ID) + } + + return nil +} + +func (s *state) StartTask(task func(context.Context) error) { + ctx, cancel := context.WithCancel(context.Background()) + s.cancel = cancel + go func() { s.taskDone(ctx, task(ctx)) }() +} + +// StateIdle is the state when the node is idle. +func StateIdle(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { + s.stateName = "idle" + + switch { + case msg.Execute != nil: + case msg.Plan != nil: + } + + return StateIdle +} + +// StateError is the state after a task failed. +func StateError(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { + s.stateName = "error" + + return StateError +} + +// StateResetWait is the state when the node is waiting for a reset to be performed. +func StateResetWait(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { + s.stateName = "reset-wait" + + switch { + case msg.Error != nil: + s.ackMessage(ctx, msg.ID) + return StateError + case msg.Done != nil: + s.ackMessage(ctx, msg.ID) + return StateIdle + case msg.Ack != nil && s.m.canExec: + if msg.Ack.MsgID != s.taskID { + // ack for a different message + break + } + if msg.NodeID != s.m.id { + // claimed by another node + s.taskID = uuid.Nil + break + } + s.StartTask(s.m.DoReset) + s.stateName = "reset-exec" + s.ackMessage(ctx, msg.ID) + return StateResetExec + } + + return StateResetWait +} + +// StateResetExec is the state when the current node is performing a reset. +func StateResetExec(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { + s.stateName = "reset-exec" + + switch { + case msg.Error != nil: + s.cancel() + s.stateName = "error" + s.ackMessage(ctx, msg.ID) + return StateError + case msg.Done != nil: + // already done, make sure we still cancel the context though + s.cancel() + s.stateName = "idle" + s.ackMessage(ctx, msg.ID) + return StateIdle + } + + return StateResetExec +} diff --git a/swo/swomsg/log.go b/swo/swomsg/log.go index 3c1b6ddd34..9a11e4fbe8 100644 --- a/swo/swomsg/log.go +++ b/swo/swomsg/log.go @@ -24,8 +24,9 @@ type Log struct { var ErrStaleLog = fmt.Errorf("cannot append until log is read") type logEvent struct { - ID int64 - Data []byte + ID int64 + Timestamp time.Time + Data []byte } func NewLog(db *gorm.DB, id uuid.UUID) (*Log, error) { @@ -72,6 +73,7 @@ func (l *Log) Next(ctx context.Context) (*Message, error) { l.events <- events return nil, err } + w.TS = events[0].Timestamp l.readID = events[0].ID l.events <- events[1:] @@ -105,29 +107,34 @@ func (l *Log) Append(ctx context.Context, v interface{}) error { switch m := v.(type) { case Ping: msg.Ping = &m - case Pong: - msg.Pong = &m + case Ack: + msg.Ack = &m case Reset: msg.Reset = &m case Error: msg.Error = &m case Execute: msg.Execute = &m - case Claim: - msg.Claim = &m + case Plan: + msg.Plan = &m + case Progress: + msg.Progress = &m + case Done: + msg.Done = &m + case Hello: + msg.Hello = &m default: return fmt.Errorf("unknown message type %T", m) } msg.ID = uuid.New() msg.NodeID = l.id - msg.TS = time.Now() data, err := json.Marshal(msg) if err != nil { return err } e := <-l.events - err = l.db.WithContext(ctx).Exec("insert into switchover_log (id, data) values ((select max(id)+1 from switchover_log), ?)", data).Error + err = l.db.WithContext(ctx).Exec("insert into switchover_log (id, timestamp, data) values ((select max(id)+1 from switchover_log), now(), ?)", data).Error l.events <- e if dbErr := sqlutil.MapError(err); dbErr != nil && dbErr.Code == "23505" { diff --git a/swo/swomsg/messages.go b/swo/swomsg/messages.go index 69ab96ed64..033f46c18a 100644 --- a/swo/swomsg/messages.go +++ b/swo/swomsg/messages.go @@ -9,45 +9,65 @@ import ( type Message struct { Header - Ping *Ping `json:",omitempty"` - Pong *Pong `json:",omitempty"` - Reset *Reset `json:",omitempty"` - Error *Error `json:",omitempty"` - Claim *Claim `json:",omitempty"` - Execute *Execute `json:",omitempty"` + Ping *Ping `json:",omitempty"` + Ack *Ack `json:",omitempty"` + Reset *Reset `json:",omitempty"` + Execute *Execute `json:",omitempty"` + Error *Error `json:",omitempty"` + Plan *Plan `json:",omitempty"` + Progress *Progress `json:",omitempty"` + Done *Done `json:",omitempty"` + Hello *Hello `json:",omitempty"` } type Header struct { ID uuid.UUID NodeID uuid.UUID - TS time.Time + TS time.Time `json:"-"` } type ( - Ping struct{} - Pong struct{ IsNextDB bool } - - Reset struct{ ClaimDeadline time.Time } - Execute struct{ ClaimDeadline time.Time } - - Claim struct { - MsgID uuid.UUID + // user commands + Ping struct{} + Reset struct{} + Execute struct{} + + Hello struct { + IsOldDB bool + Status string } - Error struct{ Details string } + Ack struct { + MsgID uuid.UUID + Exec bool `json:",omitempty"` + Status string + } - Plan struct { - BeginAt time.Time - ConsensusDeadline time.Time - GlobalPauseAt time.Time - AbsoluteDeadline time.Time + // task updates + Progress struct { + MsgID uuid.UUID + Details string } - ConfirmPlan struct{ MsgID uuid.UUID } - Progress struct { + Error struct { + MsgID uuid.UUID Details string } + Done struct{ MsgID uuid.UUID } - Done struct{} + Plan struct { + // Must receive Ack from all nodes before this time. + ConsensusDeadline time.Time + + // Must receive PlanStart or Error before this time, otherwise all + // nodes will Error. + StartAt time.Time + + // All nodes should disable idle connections after this time. + DisableIdleAt time.Time + + // All nodes should re-enable idle connections after this time. + Deadline time.Time + } ) /* diff --git a/swo/table.go b/swo/table.go new file mode 100644 index 0000000000..c1d6019dfe --- /dev/null +++ b/swo/table.go @@ -0,0 +1,110 @@ +package swo + +import ( + "fmt" + "strings" + + "github.com/target/goalert/util/sqlutil" +) + +// Table describes a database table for a switchover operation. +type Table struct { + Name string + Columns []Column + IDCol Column + + deps map[string]*Table +} + +func (t *Table) DependsOn(name string) bool { + return t.deps[name] != nil +} + +func (t *Table) flattenDeps() int { + var n int + for _, tbl := range t.deps { + for name, dep := range tbl.deps { + if _, ok := t.deps[name]; ok { + continue + } + t.deps[name] = dep + n++ + } + } + + return n +} + +// SkipSync returns true if the table should not be synced or instrumented with triggers. +// +// This could be because the data comes from migration or is stateful/related +// to the switchover. +func (t Table) SkipSync() bool { + switch t.Name { + case "switchover_state", "switchover_log", "engine_processing_versions", "gorp_migrations", "change_log": + return true + } + + return false +} + +func (t Table) QuotedName() string { + return sqlutil.QuoteID(t.Name) +} + +func (t Table) QuotedChangeTriggerName() string { + return sqlutil.QuoteID(fmt.Sprintf("zz_99_change_log_%s", t.Name)) +} + +func (t Table) QuotedLockTriggerName() string { + return sqlutil.QuoteID(fmt.Sprintf("!_change_log_%s", t.Name)) +} + +func (t Table) ColumnNames() []string { + colNames := make([]string, len(t.Columns)) + for i, col := range t.Columns { + colNames[i] = col.Name + } + return colNames +} + +func (t Table) SelectOneRowQuery() string { + return fmt.Sprintf(`select * from %s where id = cast($1 as %s)`, t.QuotedName(), t.IDCol.Type) +} + +func (t Table) DeleteOneRowQuery() string { + return fmt.Sprintf(`delete from %s where id = cast($1 as %s)`, t.QuotedName(), t.IDCol.Type) +} + +func (t Table) InsertOneRowQuery() string { + return fmt.Sprintf(` + insert into %s + select * from + json_populate_record(null::%s, $1) + as data + `, + t.QuotedName(), + t.QuotedName(), + ) +} + +func (t Table) UpdateOneRowQuery() string { + cols := make([]string, 0, len(t.Columns)) + for _, col := range t.Columns { + if col.Name == "id" { + continue + } + cols = append(cols, fmt.Sprintf(`%s = data.%s`, sqlutil.QuoteID(col.Name), sqlutil.QuoteID(col.Name))) + } + + return fmt.Sprintf(` + update %s dst + set %s + from (select * from json_populate_record(null::%s, $2)) as data + where dst.id = $1 + `, + t.QuotedName(), + strings.Join(cols, ", "), + t.QuotedName(), + ) +} From 8a34be33e08d81a2a4cccd62f3e10863cfdca066 Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Mon, 21 Mar 2022 14:00:42 -0700 Subject: [PATCH 011/225] add switchover route --- web/src/app/admin/AdminRouter.js | 4 +- .../app/admin/switchover/AdminSwitchover.tsx | 52 +++++++++++++++++++ web/src/app/main/routes.js | 15 ++++-- 3 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 web/src/app/admin/switchover/AdminSwitchover.tsx diff --git a/web/src/app/admin/AdminRouter.js b/web/src/app/admin/AdminRouter.js index f6db592f2b..e447bff353 100644 --- a/web/src/app/admin/AdminRouter.js +++ b/web/src/app/admin/AdminRouter.js @@ -7,6 +7,7 @@ import AdminToolbox from './AdminToolbox' import AdminDebugMessagesLayout from './admin-message-logs/AdminDebugMessagesLayout' import { useSessionInfo } from '../util/RequireConfig' import Spinner from '../loading/components/Spinner' +import AdminSwitchover from './switchover/AdminSwitchover' function AdminRouter() { const { isAdmin, ready } = useSessionInfo() @@ -20,9 +21,10 @@ function AdminRouter() { return ( } /> + } /> + } /> } /> } /> - } /> } /> diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx new file mode 100644 index 0000000000..e2011278df --- /dev/null +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -0,0 +1,52 @@ +import React from 'react' +import Grid from '@mui/material/Grid' +import Typography from '@mui/material/Typography' +import makeStyles from '@mui/styles/makeStyles' +import { Theme } from '@mui/material/styles' + +const useStyles = makeStyles((theme: Theme) => ({ + gridContainer: { + [theme.breakpoints.up('md')]: { + justifyContent: 'center', + }, + }, + groupTitle: { + fontSize: '1.1rem', + }, + saveDisabled: { + color: 'rgba(255, 255, 255, 0.5)', + }, +})) + +export default function AdminSwitchover(): JSX.Element { + const classes = useStyles() + + return ( + + + + + Twilio Number Lookup + + + + + + + Send SMS + + + + + ) +} diff --git a/web/src/app/main/routes.js b/web/src/app/main/routes.js index 20d5ae8890..b4b1017648 100644 --- a/web/src/app/main/routes.js +++ b/web/src/app/main/routes.js @@ -121,6 +121,16 @@ export default [ path: '/config', element: AdminRouter, }, + { + title: 'Message Logs', + path: '/message-logs', + element: AdminRouter, + }, + { + title: 'Switchover', + path: '/switchover', + element: AdminRouter, + }, { title: 'System Limits', path: '/limits', @@ -131,11 +141,6 @@ export default [ path: '/toolbox', element: AdminRouter, }, - { - title: 'Message Logs', - path: '/message-logs', - element: AdminRouter, - }, ], }, { From cc332530a702294ea7f62d0690a04d12174f10a4 Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Mon, 21 Mar 2022 14:40:48 -0700 Subject: [PATCH 012/225] add switchover action buttons --- .../app/admin/switchover/AdminSwitchover.tsx | 94 +++++++++++-------- 1 file changed, 54 insertions(+), 40 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index e2011278df..95ec55cf5d 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -1,51 +1,65 @@ import React from 'react' +import Button from '@mui/material/Button' import Grid from '@mui/material/Grid' -import Typography from '@mui/material/Typography' -import makeStyles from '@mui/styles/makeStyles' -import { Theme } from '@mui/material/styles' +import PingIcon from 'mdi-material-ui/SourceCommitStartNextLocal' +import RestartIcon from '@mui/icons-material/Refresh' +import ExecuteIcon from '@mui/icons-material/Start' +import { gql, useMutation, useQuery } from '@apollo/client' -const useStyles = makeStyles((theme: Theme) => ({ - gridContainer: { - [theme.breakpoints.up('md')]: { - justifyContent: 'center', - }, - }, - groupTitle: { - fontSize: '1.1rem', - }, - saveDisabled: { - color: 'rgba(255, 255, 255, 0.5)', - }, -})) +const query = gql` + query { + SwitchOverState { + actions + status + nodes { + id + status + } + } + } +` + +const mutation = gql` + mutation ($action: SwitchoverAction!) { + switchoverAction(action: $action) + } +` export default function AdminSwitchover(): JSX.Element { - const classes = useStyles() + const queryRes = useQuery(query) + const [commit, mutationRes] = useMutation(mutation) return ( - - - - - Twilio Number Lookup - - + + + + + + - - - - Send SMS - - + + ) From f01fa4a7d062457dc1071035e0298ecf190ef127 Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Mon, 21 Mar 2022 14:49:53 -0700 Subject: [PATCH 013/225] show the status --- web/src/app/admin/switchover/AdminSwitchover.tsx | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 95ec55cf5d..0a53cf902c 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -1,6 +1,9 @@ import React from 'react' import Button from '@mui/material/Button' +import Card from '@mui/material/Card' +import CardContent from '@mui/material/CardContent' import Grid from '@mui/material/Grid' +import Typography from '@mui/material/Typography' import PingIcon from 'mdi-material-ui/SourceCommitStartNextLocal' import RestartIcon from '@mui/icons-material/Refresh' import ExecuteIcon from '@mui/icons-material/Start' @@ -28,9 +31,17 @@ const mutation = gql` export default function AdminSwitchover(): JSX.Element { const queryRes = useQuery(query) const [commit, mutationRes] = useMutation(mutation) + const s = queryRes.data return ( + + + + {s?.status ?? 'Some Status'} + + + @@ -97,19 +120,29 @@ export default function AdminSwitchover(): JSX.Element { disabled={data?.isDone} size='large' variant='outlined' - startIcon={} + sx={buttonSx} > + {data?.isDone ? ( + + ) : ( + + )} Reset From 2b8700adf8f39ff98e563d3c9aea4106c18787f8 Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Tue, 22 Mar 2022 13:40:10 -0700 Subject: [PATCH 033/225] work on mutation --- .../app/admin/switchover/AdminSwitchover.tsx | 115 +++++++++++++++--- web/src/app/details/Notices.tsx | 2 +- 2 files changed, 98 insertions(+), 19 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 13346294aa..3f81a546e4 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -3,6 +3,7 @@ import Button from '@mui/material/Button' import Card from '@mui/material/Card' import CardHeader from '@mui/material/CardHeader' import Grid from '@mui/material/Grid' +import Skeleton from '@mui/material/Skeleton' import Typography from '@mui/material/Typography' import PingIcon from 'mdi-material-ui/DatabaseMarker' import NoResetIcon from 'mdi-material-ui/DatabaseRefreshOutline' @@ -11,10 +12,9 @@ import NoExecuteIcon from 'mdi-material-ui/DatabaseExportOutline' import ExecuteIcon from 'mdi-material-ui/DatabaseExport' import ErrorIcon from 'mdi-material-ui/DatabaseAlert' import IdlingIcon from 'mdi-material-ui/DatabaseSettings' -import NotIdlingIcon from 'mdi-material-ui/DatabaseEdit' -import DoneIcon from 'mdi-material-ui/DatabaseCheck' -import NotDoneIcon from 'mdi-material-ui/DatabaseRemove' +import InProgressIcon from 'mdi-material-ui/DatabaseEdit' import { gql, useMutation, useQuery } from '@apollo/client' +import Notices, { Notice } from '../../details/Notices' const query = gql` query { @@ -39,15 +39,15 @@ const mutation = gql` } ` +function cptlz(s: string): string { + return s.charAt(0).toUpperCase() + s.substring(1) +} + export default function AdminSwitchover(): JSX.Element { const { loading, error, data } = useQuery(query) - const [mutationResults, setMutationResults] = useState([]) - const [commit, mutationStatus] = useMutation(mutation, { - onError: (error) => { - setMutationResults([...mutationResults, error.message]) - }, - }) + const [statusNotices, setStatusNotices] = useState([]) + const [commit] = useMutation(mutation) function getIcon(): React.ReactNode { if (error) { @@ -55,10 +55,17 @@ export default function AdminSwitchover(): JSX.Element { } if (loading) { - return null // todo: use skeleton + return ( + + + + ) } // todo: in progress state icon + if (!data.isIdle && !data.isDone) { + return + } if (data.isIdle) { return @@ -66,9 +73,6 @@ export default function AdminSwitchover(): JSX.Element { } function getDetails(): React.ReactNode { - const cptlz = (s: string): string => - s.charAt(0).toUpperCase() + s.substring(1) - if (error) { return {cptlz(error.message)} } @@ -89,6 +93,12 @@ export default function AdminSwitchover(): JSX.Element { const iconSx = { justifySelf: 'center', height: '1.25em', width: '1.25em' } return ( + {statusNotices.length > 0 && ( + + + + )} + + + {data?.nodes.length > 0 && + data.nodes.map((node: SWONode, idx: number) => ( + + + + + + {node.canExec ? 'Executable' : 'Not Executable'} + + + {node.oldValid ? 'Old is valid' : 'Old is invalid'} + + + {node.newValid ? 'New is valid' : 'New is invalid'} + + + + + ))} ) } From d61834d6a1759736b1054cb68076fd1e0c6cd13f Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Tue, 22 Mar 2022 14:31:08 -0700 Subject: [PATCH 041/225] remove notices on success --- .../app/admin/switchover/AdminSwitchover.tsx | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 8d8b8ab716..5099aa551c 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -118,16 +118,6 @@ export default function AdminSwitchover(): JSX.Element { onClick={() => commit({ variables: { action: 'ping' }, - onCompleted: () => { - setStatusNotices([ - ...statusNotices, - { - type: 'success', - message: 'Successfully pinged', - endNote: DateTime.local().toFormat('fff'), - }, - ]) - }, onError: (error) => { setStatusNotices([ ...statusNotices, @@ -154,16 +144,6 @@ export default function AdminSwitchover(): JSX.Element { onClick={() => commit({ variables: { action: 'reset' }, - onCompleted: () => { - setStatusNotices([ - ...statusNotices, - { - type: 'success', - message: 'Successfully reset', - endNote: DateTime.local().toFormat('fff'), - }, - ]) - }, onError: (error) => { setStatusNotices([ ...statusNotices, @@ -195,16 +175,6 @@ export default function AdminSwitchover(): JSX.Element { onClick={() => commit({ variables: { action: 'execute' }, - onCompleted: () => { - setStatusNotices([ - ...statusNotices, - { - type: 'success', - message: 'Successfully executed', - endNote: DateTime.local().toFormat('fff'), - }, - ]) - }, onError: (error) => { setStatusNotices([ ...statusNotices, From de78e8d501f367ce9738a485969b3081c5ad2caa Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 22 Mar 2022 17:24:41 -0500 Subject: [PATCH 042/225] use lock to ensure log order --- swo/execute.go | 25 ++++++++++++++++++++ swo/initialsync.go | 5 ++-- swo/scantables.go | 57 ++++++++++++++++++++++++++++------------------ swo/swomsg/log.go | 12 ++++++---- swo/table.go | 21 +---------------- 5 files changed, 71 insertions(+), 49 deletions(-) diff --git a/swo/execute.go b/swo/execute.go index 99dcfe5d77..983c60ff5e 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -27,11 +27,19 @@ func (m *Manager) DoExecute(ctx context.Context) error { */ return m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { + m.Progressf(ctx, "enabling change log") err := EnableChangeLog(ctx, oldConn) if err != nil { return fmt.Errorf("enable change log: %w", err) } + m.Progressf(ctx, "disabling triggers") + err = DisableTriggers(ctx, newConn) + if err != nil { + return fmt.Errorf("disable triggers: %w", err) + } + + m.Progressf(ctx, "performing initial sync") err = m.InitialSync(ctx, oldConn, newConn) if err != nil { return fmt.Errorf("initial sync: %w", err) @@ -44,6 +52,23 @@ func (m *Manager) DoExecute(ctx context.Context) error { }) } +// DisableTriggers will disable all triggers in the new DB. +func DisableTriggers(ctx context.Context, conn *pgx.Conn) error { + tables, err := ScanTables(ctx, conn) + if err != nil { + return fmt.Errorf("scan tables: %w", err) + } + + for _, table := range tables { + _, err := conn.Exec(ctx, fmt.Sprintf("ALTER TABLE %s DISABLE TRIGGER USER", table.QuotedName())) + if err != nil { + return fmt.Errorf("%s: %w", table.Name, err) + } + } + + return nil +} + func LoopSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { return nil } diff --git a/swo/initialsync.go b/swo/initialsync.go index ccc2f86cf3..2b5860fcba 100644 --- a/swo/initialsync.go +++ b/swo/initialsync.go @@ -28,8 +28,9 @@ func (m *Manager) InitialSync(ctx context.Context, oldConn, newConn *pgx.Conn) e } srcTx, err := oldConn.BeginTx(ctx, pgx.TxOptions{ - AccessMode: pgx.ReadOnly, - IsoLevel: pgx.Serializable, + AccessMode: pgx.ReadOnly, + IsoLevel: pgx.Serializable, + DeferrableMode: pgx.Deferrable, }) if err != nil { return fmt.Errorf("begin src tx: %w", err) diff --git a/swo/scantables.go b/swo/scantables.go index 847a2de9d0..685ea3a6b8 100644 --- a/swo/scantables.go +++ b/swo/scantables.go @@ -35,7 +35,7 @@ func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { []interface{}{&cRow.TableName, &cRow.Column.Name, &cRow.Column.Type, &cRow.Column.Ord}, func(pgx.QueryFuncRow) error { if tables[cRow.TableName] == nil { - tables[cRow.TableName] = &Table{Name: cRow.TableName, deps: make(map[string]*Table)} + tables[cRow.TableName] = &Table{Name: cRow.TableName, deps: make(map[string]struct{})} } tables[cRow.TableName].Columns = append(tables[cRow.TableName].Columns, cRow.Column) if cRow.Column.Name == "id" { @@ -53,7 +53,7 @@ func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { } _, err = conn.QueryFunc(ctx, fkeyRefsQuery, nil, []interface{}{&fRow.SrcName, &fRow.DstName}, func(pgx.QueryFuncRow) error { - tables[fRow.SrcName].deps[fRow.DstName] = tables[fRow.DstName] + tables[fRow.SrcName].deps[fRow.DstName] = struct{}{} return nil }) @@ -61,37 +61,50 @@ func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { return nil, err } - // resolve/flatten dependencies - var tableList []Table + var tableList []*Table for _, t := range tables { - tableList = append(tableList, *t) - for t.flattenDeps() > 0 { - } - - if _, ok := t.deps[t.Name]; ok { - return nil, fmt.Errorf("circular non-deferrable dependency detected: %s", t.Name) - } - } - - // sort columns by ordinal - for _, t := range tableList { sort.Slice(t.Columns, func(i, j int) bool { return t.Columns[i].Ord < t.Columns[j].Ord }) + tableList = append(tableList, t) } + // sort tables by name sort.Slice(tableList, func(i, j int) bool { - if tableList[i].DependsOn(tableList[j].Name) { - return false + return tableList[i].Name < tableList[j].Name + }) + + remove := func(i int) *Table { + t := tableList[i] + tableList = append(tableList[:i], tableList[i+1:]...) + + // delete table name from all deps + for _, t2 := range tableList { + delete(t2.deps, t.Name) } - if tableList[j].DependsOn(tableList[i].Name) { - return true + + return t + } + next := func() *Table { + for i, t := range tableList { + if len(t.deps) == 0 { + return remove(i) + } } - return tableList[i].Name < tableList[j].Name - }) + return nil + } + + var result []Table + for { + t := next() + if t == nil { + break + } + result = append(result, *t) + } - return tableList, nil + return result, nil } func (c Column) IsInteger() bool { diff --git a/swo/swomsg/log.go b/swo/swomsg/log.go index 20c8863237..cc1bebac00 100644 --- a/swo/swomsg/log.go +++ b/swo/swomsg/log.go @@ -7,7 +7,6 @@ import ( "time" "github.com/google/uuid" - "github.com/target/goalert/util/sqlutil" "gorm.io/gorm" ) @@ -134,11 +133,14 @@ func (l *Log) Append(ctx context.Context, v interface{}) error { if err != nil { return err } - err = l.db.WithContext(ctx).Exec("insert into switchover_log (id, timestamp, data) values (coalesce((select max(id)+1 from switchover_log), 1), now(), ?)", data).Error + l.db.WithContext(ctx).Transaction(func(db *gorm.DB) error { + err := db.Exec("lock switchover_log in exclusive mode").Error + if err != nil { + return err + } - if dbErr := sqlutil.MapError(err); dbErr != nil && dbErr.Code == "23505" { - return ErrStaleLog - } + return db.Exec("insert into switchover_log (id, timestamp, data) values (coalesce((select max(id)+1 from switchover_log), 1), now(), ?)", data).Error + }) return err } diff --git a/swo/table.go b/swo/table.go index c1d6019dfe..d93bf45df3 100644 --- a/swo/table.go +++ b/swo/table.go @@ -13,26 +13,7 @@ type Table struct { Columns []Column IDCol Column - deps map[string]*Table -} - -func (t *Table) DependsOn(name string) bool { - return t.deps[name] != nil -} - -func (t *Table) flattenDeps() int { - var n int - for _, tbl := range t.deps { - for name, dep := range tbl.deps { - if _, ok := t.deps[name]; ok { - continue - } - t.deps[name] = dep - n++ - } - } - - return n + deps map[string]struct{} } // SkipSync returns true if the table should not be synced or instrumented with triggers. From 2175061f17e6edfba539910c59500d4114a833cc Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 22 Mar 2022 17:36:03 -0500 Subject: [PATCH 043/225] delete synced changes --- swo/execute.go | 3 ++- swo/initialsync.go | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/swo/execute.go b/swo/execute.go index 983c60ff5e..039c1b205f 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -2,6 +2,7 @@ package swo import ( "context" + "errors" "fmt" "github.com/google/uuid" @@ -48,7 +49,7 @@ func (m *Manager) DoExecute(ctx context.Context) error { // sync in a loop until DB is up-to-date // err = m.LoopSync(ctx, oldConn, newConn) - return nil + return errors.New("not implemented") }) } diff --git a/swo/initialsync.go b/swo/initialsync.go index 2b5860fcba..2f38532ebf 100644 --- a/swo/initialsync.go +++ b/swo/initialsync.go @@ -60,6 +60,13 @@ func (m *Manager) InitialSync(ctx context.Context, oldConn, newConn *pgx.Conn) e } } + var changeIDs []int + var id int + srcTx.QueryFunc(ctx, "select id from change_log", nil, []interface{}{&id}, func(r pgx.QueryFuncRow) error { + changeIDs = append(changeIDs, id) + return nil + }) + m.Progressf(ctx, "commit initial sync") // Important to validate src commit, even though it's read-only. // @@ -75,7 +82,9 @@ func (m *Manager) InitialSync(ctx context.Context, oldConn, newConn *pgx.Conn) e return fmt.Errorf("commit dst tx: %w", err) } - return nil + // delete synced changes after tx has been committed + _, err = oldConn.Exec(ctx, "delete from change_log where id = any($1)", changeIDs) + return err } type lineCount struct { From 3e445107c89416e2821a1b2e29d4f47f2cc6a7a0 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 23 Mar 2022 09:58:08 -0500 Subject: [PATCH 044/225] fix context cancel and add vacuum --- swo/initialsync.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/swo/initialsync.go b/swo/initialsync.go index 2f38532ebf..111ff73d20 100644 --- a/swo/initialsync.go +++ b/swo/initialsync.go @@ -84,7 +84,18 @@ func (m *Manager) InitialSync(ctx context.Context, oldConn, newConn *pgx.Conn) e // delete synced changes after tx has been committed _, err = oldConn.Exec(ctx, "delete from change_log where id = any($1)", changeIDs) - return err + if err != nil { + return fmt.Errorf("delete change log: %w", err) + } + + // vacuum analyze new DB + m.Progressf(ctx, "vacuum analyze") + _, err = newConn.Exec(ctx, "vacuum analyze") + if err != nil { + return fmt.Errorf("vacuum analyze: %w", err) + } + + return nil } type lineCount struct { @@ -105,8 +116,8 @@ func (lc *lineCount) Lines() int { return lc.n } -func (m *Manager) SyncTableInit(ctx context.Context, t Table, srcTx, dstTx pgx.Tx) error { - ctx, cancel := context.WithCancel(ctx) +func (m *Manager) SyncTableInit(origCtx context.Context, t Table, srcTx, dstTx pgx.Tx) error { + ctx, cancel := context.WithCancel(origCtx) defer cancel() var rowCount int @@ -125,7 +136,7 @@ func (m *Manager) SyncTableInit(ctx context.Context, t Table, srcTx, dstTx pgx.T prog := time.NewTimer(2 * time.Second) defer prog.Stop() for { - m.Progressf(ctx, "syncing table %s (%d/%d)", t.Name, lc.Lines(), rowCount) + m.Progressf(origCtx, "syncing table %s (%d/%d)", t.Name, lc.Lines(), rowCount) select { case <-ctx.Done(): pw.CloseWithError(ctx.Err()) From a32caec5096755098deb0900595828be5c62e8cf Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 23 Mar 2022 13:12:44 -0500 Subject: [PATCH 045/225] scan tables once --- swo/changelog.go | 9 +--- swo/changelog_table.sql | 12 +++-- swo/execute.go | 115 +++++++++++++++++++++++++++++++++++----- 3 files changed, 112 insertions(+), 24 deletions(-) diff --git a/swo/changelog.go b/swo/changelog.go index 100780ac30..ba8927bec7 100644 --- a/swo/changelog.go +++ b/swo/changelog.go @@ -16,13 +16,8 @@ var ( changelogTrigger string ) -func EnableChangeLog(ctx context.Context, conn *pgx.Conn) error { - tables, err := ScanTables(ctx, conn) - if err != nil { - return fmt.Errorf("scan tables: %w", err) - } - - _, err = conn.Exec(ctx, changelogTable) +func EnableChangeLog(ctx context.Context, tables []Table, conn *pgx.Conn) error { + _, err := conn.Exec(ctx, changelogTable) if err != nil { return fmt.Errorf("create change_log table: %w", err) } diff --git a/swo/changelog_table.sql b/swo/changelog_table.sql index d06819f943..5face683d6 100644 --- a/swo/changelog_table.sql +++ b/swo/changelog_table.sql @@ -1,9 +1,15 @@ -CREATE TABLE change_log ( +CREATE UNLOGGED TABLE change_log ( id BIGSERIAL PRIMARY KEY, op TEXT NOT NULL, table_name TEXT NOT NULL, row_id TEXT NOT NULL, tx_id BIGINT, cmd_id cid, - row_data JSONB -) + row_data JSONB DEFAULT '{}' +); + +ALTER TABLE change_log +SET ( + autovacuum_enabled = FALSE, + toast.autovacuum_enabled = FALSE + ) diff --git a/swo/execute.go b/swo/execute.go index 039c1b205f..927504dd5a 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "time" "github.com/google/uuid" "github.com/jackc/pgx/v4" @@ -28,14 +29,20 @@ func (m *Manager) DoExecute(ctx context.Context) error { */ return m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { + m.Progressf(ctx, "scanning tables...") + tables, err := ScanTables(ctx, oldConn) + if err != nil { + return fmt.Errorf("scan tables: %w", err) + } + m.Progressf(ctx, "enabling change log") - err := EnableChangeLog(ctx, oldConn) + err = EnableChangeLog(ctx, tables, oldConn) if err != nil { return fmt.Errorf("enable change log: %w", err) } m.Progressf(ctx, "disabling triggers") - err = DisableTriggers(ctx, newConn) + err = DisableTriggers(ctx, tables, newConn) if err != nil { return fmt.Errorf("disable triggers: %w", err) } @@ -46,20 +53,31 @@ func (m *Manager) DoExecute(ctx context.Context) error { return fmt.Errorf("initial sync: %w", err) } - // sync in a loop until DB is up-to-date - // err = m.LoopSync(ctx, oldConn, newConn) + getTable := func(name string) *Table { + for _, t := range tables { + if t.Name == name { + return &t + } + } + return nil + } + + for { + // sync in a loop until DB is up-to-date + n, err := LoopSync(ctx, getTable, oldConn, newConn) + if err != nil { + return fmt.Errorf("loop sync: %w", err) + } + m.Progressf(ctx, "sync: %d changes", n) + time.Sleep(5 * time.Second) + } return errors.New("not implemented") }) } // DisableTriggers will disable all triggers in the new DB. -func DisableTriggers(ctx context.Context, conn *pgx.Conn) error { - tables, err := ScanTables(ctx, conn) - if err != nil { - return fmt.Errorf("scan tables: %w", err) - } - +func DisableTriggers(ctx context.Context, tables []Table, conn *pgx.Conn) error { for _, table := range tables { _, err := conn.Exec(ctx, fmt.Sprintf("ALTER TABLE %s DISABLE TRIGGER USER", table.QuotedName())) if err != nil { @@ -70,14 +88,83 @@ func DisableTriggers(ctx context.Context, conn *pgx.Conn) error { return nil } -func LoopSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { - return nil +func LoopSync(ctx context.Context, getTable func(string) *Table, oldConn, newConn *pgx.Conn) (int, error) { + oldTx, newTx, err := syncTx(ctx, oldConn, newConn) + if err != nil { + return 0, fmt.Errorf("sync tx: %w", err) + } + defer oldTx.Rollback(ctx) + defer newTx.Rollback(ctx) + + n, err := syncChangeLog(ctx, getTable, oldTx, newTx) + if err != nil { + return 0, fmt.Errorf("sync change log: %w", err) + } + + err = newTx.Commit(ctx) + if err != nil { + return 0, fmt.Errorf("commit dst: %w", err) + } + + err = oldTx.Commit(ctx) + if err != nil { + return 0, fmt.Errorf("commit src: %w", err) + } + + return n, nil } func FinalSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { return nil } -func syncChanges(ctx context.Context, oldConn, newConn pgx.Tx) (int, error) { - return 0, nil +func syncTx(ctx context.Context, oldConn, newConn *pgx.Conn) (old, new pgx.Tx, err error) { + srcTx, err := oldConn.BeginTx(ctx, pgx.TxOptions{ + AccessMode: pgx.ReadWrite, + IsoLevel: pgx.Serializable, + DeferrableMode: pgx.Deferrable, + }) + if err != nil { + return nil, nil, fmt.Errorf("begin src: %w", err) + } + + dstTx, err := newConn.BeginTx(ctx, pgx.TxOptions{}) + if err != nil { + srcTx.Rollback(ctx) + return nil, nil, fmt.Errorf("begin dst: %w", err) + } + + return srcTx, dstTx, nil +} + +func syncChangeLog(ctx context.Context, getTable func(string) *Table, oldConn, newConn pgx.Tx) (int, error) { + var b pgx.Batch + var rowID, table, op string + var data []byte + var n int + _, err := oldConn.QueryFunc(ctx, "delete from change_log returning table_name, op, row_id, row_data", nil, []interface{}{&table, &op, &rowID, &data}, func(pgx.QueryFuncRow) error { + t := getTable(table) + if t == nil { + return fmt.Errorf("unknown table: %s", table) + } + + switch op { + case "INSERT": + b.Queue(t.InsertOneRowQuery(), data) + case "UPDATE": + b.Queue(t.UpdateOneRowQuery(), rowID, data) + case "DELETE": + b.Queue(t.DeleteOneRowQuery(), rowID) + default: + return fmt.Errorf("unknown op: %s", op) + } + n++ + + return nil + }) + if err != nil { + return 0, fmt.Errorf("query changes: %w", err) + } + + return n, newConn.SendBatch(ctx, &b).Close() } From 9a8c44867582ee19d5fe9905cdd68929a3f2df4a Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 23 Mar 2022 13:30:41 -0500 Subject: [PATCH 046/225] old query --- swo/execute.go | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/swo/execute.go b/swo/execute.go index 927504dd5a..67bf65210c 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -69,7 +69,9 @@ func (m *Manager) DoExecute(ctx context.Context) error { return fmt.Errorf("loop sync: %w", err) } m.Progressf(ctx, "sync: %d changes", n) - time.Sleep(5 * time.Second) + if n == 0 { + time.Sleep(time.Second) + } } return errors.New("not implemented") @@ -137,12 +139,27 @@ func syncTx(ctx context.Context, oldConn, newConn *pgx.Conn) (old, new pgx.Tx, e return srcTx, dstTx, nil } +const fetchQuery = ` +with tx_max_id as ( + select max(id), tx_id + from change_log + group by tx_id +) +select id, op, table_name, row_id, row_data +from change_log c +join tx_max_id max_id on max_id.tx_id = c.tx_id +order by + max_id.max, + cmd_id::text::int +` + func syncChangeLog(ctx context.Context, getTable func(string) *Table, oldConn, newConn pgx.Tx) (int, error) { var b pgx.Batch var rowID, table, op string var data []byte - var n int - _, err := oldConn.QueryFunc(ctx, "delete from change_log returning table_name, op, row_id, row_data", nil, []interface{}{&table, &op, &rowID, &data}, func(pgx.QueryFuncRow) error { + var toDelete []int + var id int + _, err := oldConn.QueryFunc(ctx, fetchQuery, nil, []interface{}{&id, &op, &table, &rowID, &data}, func(pgx.QueryFuncRow) error { t := getTable(table) if t == nil { return fmt.Errorf("unknown table: %s", table) @@ -158,7 +175,7 @@ func syncChangeLog(ctx context.Context, getTable func(string) *Table, oldConn, n default: return fmt.Errorf("unknown op: %s", op) } - n++ + toDelete = append(toDelete, id) return nil }) @@ -166,5 +183,12 @@ func syncChangeLog(ctx context.Context, getTable func(string) *Table, oldConn, n return 0, fmt.Errorf("query changes: %w", err) } - return n, newConn.SendBatch(ctx, &b).Close() + err = newConn.SendBatch(ctx, &b).Close() + if err != nil { + return 0, fmt.Errorf("send batch: %w", err) + } + + _, err = oldConn.Exec(ctx, "delete from change_log where id = any($1)", toDelete) + + return len(toDelete), err } From 9ffe993785650c29b94ee7f29e3cf84b8f4d2c1c Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 13:46:28 -0500 Subject: [PATCH 047/225] fix deps --- Makefile.binaries.mk | 2 +- devtools/genmake/template.mk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.binaries.mk b/Makefile.binaries.mk index 598704f561..e2698b5af5 100644 --- a/Makefile.binaries.mk +++ b/Makefile.binaries.mk @@ -5,7 +5,7 @@ BIN_DIR=bin GO_DEPS := Makefile.binaries.mk $(shell find . -path ./web/src -prune -o -path ./vendor -prune -o -path ./.git -prune -o -type f -name "*.go" -print) go.sum -GO_DEPS += migrate/migrations/ migrate/migrations/*.sql graphql2/graphqlapp/playground.html web/index.html graphql2/graphqlapp/slack.manifest.yaml +GO_DEPS += migrate/migrations/ migrate/migrations/*.sql graphql2/graphqlapp/playground.html web/index.html graphql2/graphqlapp/slack.manifest.yaml swo/*.sql GO_DEPS += graphql2/mapconfig.go graphql2/maplimit.go graphql2/generated.go graphql2/models_gen.go ifdef BUNDLE diff --git a/devtools/genmake/template.mk b/devtools/genmake/template.mk index 1e534bc720..a604c86d66 100644 --- a/devtools/genmake/template.mk +++ b/devtools/genmake/template.mk @@ -3,7 +3,7 @@ BIN_DIR=bin GO_DEPS := Makefile.binaries.mk $(shell find . -path ./web/src -prune -o -path ./vendor -prune -o -path ./.git -prune -o -type f -name "*.go" -print) go.sum -GO_DEPS += migrate/migrations/ migrate/migrations/*.sql graphql2/graphqlapp/playground.html web/index.html graphql2/graphqlapp/slack.manifest.yaml +GO_DEPS += migrate/migrations/ migrate/migrations/*.sql graphql2/graphqlapp/playground.html web/index.html graphql2/graphqlapp/slack.manifest.yaml swo/*.sql GO_DEPS += graphql2/mapconfig.go graphql2/maplimit.go graphql2/generated.go graphql2/models_gen.go ifdef BUNDLE From 52fdff02c9b1865a6b6baf85cb0efbbad9f32c81 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 13:50:49 -0500 Subject: [PATCH 048/225] wait for log poll before returning --- swo/manager.go | 4 ++++ swo/swomsg/log.go | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/swo/manager.go b/swo/manager.go index cdb025718a..77d492666a 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -5,6 +5,7 @@ import ( "database/sql" "database/sql/driver" "fmt" + "time" "github.com/google/uuid" "github.com/jackc/pgx/v4" @@ -191,6 +192,7 @@ func (m *Manager) Status() *Status { return m.msgState.Status() } // SendPing will ping all nodes in the cluster. func (m *Manager) SendPing(ctx context.Context) error { + defer time.Sleep(swomsg.PollInterval * 2) // wait for send & ack return m.msgLog.Append(ctx, swomsg.Ping{}) } @@ -199,6 +201,7 @@ func (m *Manager) SendReset(ctx context.Context) error { if m.Status().IsDone { return fmt.Errorf("cannot reset switch-over: switch-over is done") } + defer time.Sleep(swomsg.PollInterval * 2) // wait for send & ack return m.msgLog.Append(ctx, swomsg.Reset{}) } @@ -207,6 +210,7 @@ func (m *Manager) SendExecute(ctx context.Context) error { if !m.Status().IsIdle { return fmt.Errorf("cannot execute switch-over: switch-over is not idle") } + defer time.Sleep(swomsg.PollInterval * 3) // wait for send, ack, and start return m.msgLog.Append(ctx, swomsg.Execute{}) } diff --git a/swo/swomsg/log.go b/swo/swomsg/log.go index cc1bebac00..8dbdc9c106 100644 --- a/swo/swomsg/log.go +++ b/swo/swomsg/log.go @@ -10,6 +10,8 @@ import ( "gorm.io/gorm" ) +const PollInterval = time.Second + type Log struct { db *gorm.DB id uuid.UUID @@ -79,7 +81,7 @@ func (l *Log) Next(ctx context.Context) (*Message, error) { } func (l *Log) loadEvents(ctx context.Context) error { - err := ctxSleep(ctx, time.Second-time.Since(l.lastLoad)) + err := ctxSleep(ctx, PollInterval-time.Since(l.lastLoad)) if err != nil { return err } From 5808ca48ead4ab408c3984daed9ea8235f1d22dd Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 14:16:06 -0500 Subject: [PATCH 049/225] fix reset state --- swo/manager.go | 20 +++++++++++--------- swo/state.go | 22 ++++++++++++++++------ 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/swo/manager.go b/swo/manager.go index 77d492666a..f40df24ad8 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -125,15 +125,17 @@ func NewManager(cfg Config) (*Manager, error) { }() go func() { <-m.ready - msg, err := m.nextMsgLog.Next(ctx) - if err != nil { - m.errCh <- fmt.Errorf("read from next log: %w", err) - return - } - err = m.msgState.processFromNew(ctx, msg) - if err != nil { - m.errCh <- fmt.Errorf("process from new db log: %w", err) - return + for { + msg, err := m.nextMsgLog.Next(ctx) + if err != nil { + m.errCh <- fmt.Errorf("read from next log: %w", err) + return + } + err = m.msgState.processFromNew(ctx, msg) + if err != nil { + m.errCh <- fmt.Errorf("process from new db log: %w", err) + return + } } }() diff --git a/swo/state.go b/swo/state.go index 005e3f221e..698d375741 100644 --- a/swo/state.go +++ b/swo/state.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "sync" + "time" "github.com/google/uuid" "github.com/target/goalert/swo/swomsg" @@ -115,6 +116,10 @@ func (s *state) hello(ctx context.Context) error { if err != nil { return err } + + // wait for poll interval before sending to new DB, + // giving all nodes a chance to process + time.Sleep(swomsg.PollInterval) err = s.m.nextMsgLog.Append(ctx, swomsg.Hello{IsNewDB: true, Status: s.stateName, CanExec: s.m.canExec}) if err != nil { return err @@ -129,14 +134,19 @@ func (s *state) processFromNew(ctx context.Context, msg *swomsg.Message) error { s.mx.Lock() defer s.mx.Unlock() + n, ok := s.nodes[msg.NodeID] - if !ok { - n = &Node{ - ID: msg.NodeID, - } - s.nodes[msg.NodeID] = n + if ok { + n.NewValid = msg.Hello.IsNewDB + return nil + } + + s.nodes[msg.NodeID] = &Node{ + ID: msg.NodeID, + CanExec: msg.Hello.CanExec, + NewValid: msg.Hello.IsNewDB, + Status: msg.Hello.Status, } - n.NewValid = msg.Hello.IsNewDB return nil } From 592edcbaba054785e6ea9112b48ad13734efdcf8 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 14:30:48 -0500 Subject: [PATCH 050/225] consistent language --- app/cmd.go | 2 +- docs/getting-started.md | 2 +- swo/changelog_table.sql | 6 +++--- swo/changelog_trigger.sql | 38 ++++++++++++++++++++++++++++++-------- swo/manager.go | 12 ++++++------ swo/state.go | 2 +- 6 files changed, 42 insertions(+), 20 deletions(-) diff --git a/app/cmd.go b/app/cmd.go index 11070d7631..7585a96e9c 100644 --- a/app/cmd.go +++ b/app/cmd.go @@ -735,7 +735,7 @@ func init() { RootCmd.Flags().String("region-name", def.RegionName, "Name of region for message processing (case sensitive). Only one instance per-region-name will process outgoing messages.") RootCmd.PersistentFlags().String("db-url", def.DBURL, "Connection string for Postgres.") - RootCmd.PersistentFlags().String("db-url-next", def.DBURLNext, "Connection string for the *next* Postgres server (enables DB switch-over mode).") + RootCmd.PersistentFlags().String("db-url-next", def.DBURLNext, "Connection string for the *next* Postgres server (enables DB switchover mode).") RootCmd.Flags().String("jaeger-endpoint", def.JaegerEndpoint, "Jaeger HTTP Thrift endpoint") RootCmd.Flags().String("jaeger-agent-endpoint", def.JaegerAgentEndpoint, "Instructs Jaeger exporter to send spans to jaeger-agent at this address.") diff --git a/docs/getting-started.md b/docs/getting-started.md index a4d6494750..3e85554604 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -87,7 +87,7 @@ Global Flags: --data-encryption-key string Encryption key for sensitive data like signing keys. Used for encrypting new and decrypting existing data. --data-encryption-key-old string Fallback key. Used for decrypting existing data only. --db-url string Connection string for Postgres. - --db-url-next string Connection string for the *next* Postgres server (enables DB switch-over mode). + --db-url-next string Connection string for the *next* Postgres server (enables DB switchover mode). --json Log in JSON format. --stack-traces Enables stack traces with all error logs. -v, --verbose Enable verbose logging. diff --git a/swo/changelog_table.sql b/swo/changelog_table.sql index 5face683d6..877efae437 100644 --- a/swo/changelog_table.sql +++ b/swo/changelog_table.sql @@ -3,9 +3,9 @@ CREATE UNLOGGED TABLE change_log ( op TEXT NOT NULL, table_name TEXT NOT NULL, row_id TEXT NOT NULL, - tx_id BIGINT, - cmd_id cid, - row_data JSONB DEFAULT '{}' + row_data JSONB DEFAULT '{}', + old_hash bytea, + new_hash bytea ); ALTER TABLE change_log diff --git a/swo/changelog_trigger.sql b/swo/changelog_trigger.sql index b9a617b24f..0170f3c011 100644 --- a/swo/changelog_trigger.sql +++ b/swo/changelog_trigger.sql @@ -11,30 +11,52 @@ IF cur_state != 'in_progress' THEN RETURN NEW; END IF; IF (TG_OP = 'DELETE') THEN -INSERT INTO change_log (op, table_name, row_id, tx_id, cmd_id) +INSERT INTO change_log (op, table_name, row_id, old_hash) VALUES ( TG_OP, TG_TABLE_NAME, cast(OLD .id AS TEXT), - txid_current(), - OLD .cmax + sha256(OLD::TEXT::BYTEA) ); RETURN OLD; -ELSE -INSERT INTO change_log (op, table_name, row_id, tx_id, cmd_id, row_data) +ELSIF (TG_OP = 'UPDATE') THEN +INSERT INTO change_log ( + op, + table_name, + row_id, + row_data, + new_hash, + old_hash + ) VALUES ( TG_OP, TG_TABLE_NAME, cast(NEW .id AS TEXT), - txid_current(), - NEW .cmin, - to_jsonb(NEW) + to_jsonb(NEW), + sha256(NEW::TEXT::BYTEA), + sha256(OLD::TEXT::BYTEA) ); RETURN NEW; +ELSIF (TG_OP = 'INSERT') THEN +INSERT INTO change_log (op, table_name, row_id, row_data, new_hash) +VALUES ( + TG_OP, + TG_TABLE_NAME, + cast(NEW .id AS TEXT), + to_jsonb(NEW), + sha256(NEW::TEXT::BYTEA) + ); + +RETURN NEW; + +ELSE RAISE +EXCEPTION 'Unexpected operation in switchover mode: %', + TG_OP; + END IF; RETURN NULL; diff --git a/swo/manager.go b/swo/manager.go index f40df24ad8..a2f7929a36 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -189,7 +189,7 @@ func WithLockedConn(ctx context.Context, db *sql.DB, runFunc func(context.Contex }) } -// Status will return the current switch-over status. +// Status will return the current switchover status. func (m *Manager) Status() *Status { return m.msgState.Status() } // SendPing will ping all nodes in the cluster. @@ -198,19 +198,19 @@ func (m *Manager) SendPing(ctx context.Context) error { return m.msgLog.Append(ctx, swomsg.Ping{}) } -// SendReset will trigger a reset of the switch-over. +// SendReset will trigger a reset of the switchover. func (m *Manager) SendReset(ctx context.Context) error { if m.Status().IsDone { - return fmt.Errorf("cannot reset switch-over: switch-over is done") + return fmt.Errorf("cannot reset switchover: switchover is done") } defer time.Sleep(swomsg.PollInterval * 2) // wait for send & ack return m.msgLog.Append(ctx, swomsg.Reset{}) } -// SendExecute will trigger the switch-over to begin. +// SendExecute will trigger the switchover to begin. func (m *Manager) SendExecute(ctx context.Context) error { if !m.Status().IsIdle { - return fmt.Errorf("cannot execute switch-over: switch-over is not idle") + return fmt.Errorf("cannot execute switchover: switchover is not idle") } defer time.Sleep(swomsg.PollInterval * 3) // wait for send, ack, and start return m.msgLog.Append(ctx, swomsg.Execute{}) @@ -225,6 +225,6 @@ type Status struct { // IsDone is true if the switch has already been completed. IsDone bool - // IsIdle must be true before executing a switch-over. + // IsIdle must be true before executing a switchover. IsIdle bool } diff --git a/swo/state.go b/swo/state.go index 698d375741..5866f46b53 100644 --- a/swo/state.go +++ b/swo/state.go @@ -230,7 +230,7 @@ func StateExecWait(ctx context.Context, s *state, msg *swomsg.Message) StateFunc return StateExecWait } -// StateExecRun is the state when the current node is executing the switch-over. +// StateExecRun is the state when the current node is executing the switchover. func StateExecRun(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { s.stateName = "exec-run" From 2c4c5347b3bf0dd9146047c92d87ae34ce5b5d81 Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Thu, 24 Mar 2022 12:50:00 -0700 Subject: [PATCH 051/225] consistent ui language --- web/src/app/main/routes.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/app/main/routes.js b/web/src/app/main/routes.js index b4b1017648..70f5043a1c 100644 --- a/web/src/app/main/routes.js +++ b/web/src/app/main/routes.js @@ -127,7 +127,7 @@ export default [ element: AdminRouter, }, { - title: 'Switchover', + title: 'DB Switchover', path: '/switchover', element: AdminRouter, }, From a7e557135515d375484cfc858e19a14fcd0b3dfb Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Thu, 24 Mar 2022 12:50:24 -0700 Subject: [PATCH 052/225] show db status idle/in prog/complete text in main card --- .../app/admin/switchover/AdminSwitchover.tsx | 84 ++++++++++++------- 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 5099aa551c..b6ca18ada5 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -6,13 +6,14 @@ import CardHeader from '@mui/material/CardHeader' import Grid from '@mui/material/Grid' import Skeleton from '@mui/material/Skeleton' import Typography from '@mui/material/Typography' +import { SvgIconProps, TypographyProps } from '@mui/material' import PingIcon from 'mdi-material-ui/DatabaseMarker' import NoResetIcon from 'mdi-material-ui/DatabaseRefreshOutline' import ResetIcon from 'mdi-material-ui/DatabaseRefresh' import NoExecuteIcon from 'mdi-material-ui/DatabaseExportOutline' import ExecuteIcon from 'mdi-material-ui/DatabaseExport' import ErrorIcon from 'mdi-material-ui/DatabaseAlert' -import IdlingIcon from 'mdi-material-ui/DatabaseSettings' +import IdleIcon from 'mdi-material-ui/DatabaseSettings' import InProgressIcon from 'mdi-material-ui/DatabaseEdit' import { gql, useMutation, useQuery } from '@apollo/client' import Notices, { Notice } from '../../details/Notices' @@ -54,24 +55,49 @@ export default function AdminSwitchover(): JSX.Element { const [commit] = useMutation(mutation) function getIcon(): React.ReactNode { + const i: SvgIconProps = { color: 'primary', sx: { fontSize: '3.5rem' } } + const t: TypographyProps = { + variant: 'caption', + sx: { display: 'flex' }, + flexDirection: 'column', + } + if (error) { - return + return ( + + + Error + + ) } if (loading) { return ( - - - + + + + + Loading... + ) } if (!data.isIdle && !data.isDone) { - return + return ( + + + In Progress + + ) } if (data.isIdle) { - return + return ( + + + Idle + + ) } } @@ -80,10 +106,6 @@ export default function AdminSwitchover(): JSX.Element { return {cptlz(error.message)} } - if (loading) { - return Loading... - } - if (data?.details) { return {cptlz(data.details)} } @@ -202,25 +224,27 @@ export default function AdminSwitchover(): JSX.Element { - {data?.nodes.length > 0 && - data.nodes.map((node: SWONode, idx: number) => ( - - - - - - {node.canExec ? 'Executable' : 'Not Executable'} - - - {node.oldValid ? 'Old is valid' : 'Old is invalid'} - - - {node.newValid ? 'New is valid' : 'New is invalid'} - - - - - ))} + + {data?.nodes.length > 0 && + data.nodes.map((node: SWONode, idx: number) => ( + + + + + + {node.canExec ? 'Executable' : 'Not Executable'} + + + {node.oldValid ? 'Old is valid' : 'Old is invalid'} + + + {node.newValid ? 'New is valid' : 'New is invalid'} + + + + + ))} + ) } From 7a47b4a7fe32359351c28cf0a61b66de59714c3d Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Thu, 24 Mar 2022 14:08:34 -0700 Subject: [PATCH 053/225] refactor layout --- .../app/admin/switchover/AdminSwitchover.tsx | 316 +++++++++--------- 1 file changed, 166 insertions(+), 150 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index b6ca18ada5..2835ed030b 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -1,12 +1,18 @@ import React, { useState } from 'react' import Button from '@mui/material/Button' +import ButtonGroup from '@mui/material/ButtonGroup' import Card from '@mui/material/Card' import CardContent from '@mui/material/CardContent' import CardHeader from '@mui/material/CardHeader' +import Divider from '@mui/material/Divider' import Grid from '@mui/material/Grid' +import List from '@mui/material/List' +import ListItem from '@mui/material/ListItem' +import ListItemText from '@mui/material/ListItemText' +import ListItemSecondaryAction from '@mui/material/ListItemSecondaryAction' import Skeleton from '@mui/material/Skeleton' import Typography from '@mui/material/Typography' -import { SvgIconProps, TypographyProps } from '@mui/material' +import { useTheme, SvgIconProps } from '@mui/material' import PingIcon from 'mdi-material-ui/DatabaseMarker' import NoResetIcon from 'mdi-material-ui/DatabaseRefreshOutline' import ResetIcon from 'mdi-material-ui/DatabaseRefresh' @@ -16,6 +22,8 @@ import ErrorIcon from 'mdi-material-ui/DatabaseAlert' import IdleIcon from 'mdi-material-ui/DatabaseSettings' import InProgressIcon from 'mdi-material-ui/DatabaseEdit' import { gql, useMutation, useQuery } from '@apollo/client' +import TrueIcon from 'mdi-material-ui/CheckboxMarkedCircleOutline' +import FalseIcon from 'mdi-material-ui/CloseCircleOutline' import Notices, { Notice } from '../../details/Notices' import { DateTime } from 'luxon' import { SWONode } from '../../../schema' @@ -48,6 +56,7 @@ function cptlz(s: string): string { } export default function AdminSwitchover(): JSX.Element { + const theme = useTheme() const { loading, error, data: _data } = useQuery(query) const data = _data?.swoStatus @@ -56,66 +65,43 @@ export default function AdminSwitchover(): JSX.Element { function getIcon(): React.ReactNode { const i: SvgIconProps = { color: 'primary', sx: { fontSize: '3.5rem' } } - const t: TypographyProps = { - variant: 'caption', - sx: { display: 'flex' }, - flexDirection: 'column', - } if (error) { - return ( - - - Error - - ) + return } - if (loading) { return ( - - - - - Loading... - + + + ) } - if (!data.isIdle && !data.isDone) { - return ( - - - In Progress - - ) + return } - if (data.isIdle) { - return ( - - - Idle - - ) + return } } + function getSubheader(): React.ReactNode { + if (error) return 'Error' + if (loading) return 'Loading...' + if (!data.isIdle && !data.isDone) return 'In progress' + if (data.isIdle) return 'Idle' + return null + } + function getDetails(): React.ReactNode { if (error) { return {cptlz(error.message)} } - if (data?.details) { - return {cptlz(data.details)} + return cptlz(data.details) } - - return null + return 'Testing some details yeehaw' } - const minHeight = 90 - const buttonSx = { display: 'grid', minHeight, minWidth: minHeight } - const iconSx = { justifySelf: 'center', height: '1.25em', width: '1.25em' } return ( {statusNotices.length > 0 && ( @@ -124,126 +110,156 @@ export default function AdminSwitchover(): JSX.Element { )} - - + + + + {getDetails()} + + + + + + - - - - - - - - - - - + {data?.nodes.length > 0 && - data.nodes.map((node: SWONode, idx: number) => ( - - - - - - {node.canExec ? 'Executable' : 'Not Executable'} - - - {node.oldValid ? 'Old is valid' : 'Old is invalid'} - - - {node.newValid ? 'New is valid' : 'New is invalid'} + data.nodes + .sort((a: SWONode, b: SWONode) => { + if (a.id < b.id) return 1 + if (a.id > b.id) return -1 + return 0 + }) + .map((node: SWONode, idx: number) => ( + + + + Node {idx + 1} - - - - ))} + + + + Status: {node.status} + + + + } + > + + + + {node.canExec ? ( + + ) : ( + + )} + + + + + + {node.oldValid ? ( + + ) : ( + + )} + + + + + + {node.newValid ? ( + + ) : ( + + )} + + + + + + ))} ) From a60ae5a79067a420ee9fc5b528f0e0561e542c42 Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Thu, 24 Mar 2022 14:13:25 -0700 Subject: [PATCH 054/225] reduce padding in main card --- web/src/app/admin/switchover/AdminSwitchover.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 2835ed030b..d5308d5a3f 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -117,10 +117,10 @@ export default function AdminSwitchover(): JSX.Element { titleTypographyProps={{ sx: { fontSize: '1.25rem' } }} avatar={getIcon()} subheader={getSubheader()} - sx={{ '& .MuiCardHeader-content': { alignSelf: 'start' } }} + sx={{ pb: 0 }} /> - {getDetails()} + {getDetails()} - - + From bfa6843d414b8ec229a25b6eedfef9375ab97d22 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 16:50:11 -0500 Subject: [PATCH 059/225] fix text --- web/src/app/admin/switchover/AdminSwitchover.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 475ec29611..9c57af8b46 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -94,7 +94,7 @@ export default function AdminSwitchover(): JSX.Element { ) } if (data?.details) { - return cptlz(data.details) + return {cptlz(data.details)} } return null } From 2175578b73362fc60327b2a39e405d5f146b2741 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 17:17:15 -0500 Subject: [PATCH 060/225] fix sync --- swo/execute.go | 8 +++----- swo/table.go | 12 +++++++++++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/swo/execute.go b/swo/execute.go index b6e4109e58..5a6f3e34e9 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -148,7 +148,7 @@ func syncChangeLog(ctx context.Context, rt *rowTracker, oldConn, newConn pgx.Tx) var r rowID changes := make(map[rowID]struct{}) rowIDs := make(map[string][]string) - _, err := oldConn.QueryFunc(ctx, "delete from change_log returning table_name, row_id", nil, []interface{}{&r.id, &r.table}, func(pgx.QueryFuncRow) error { + _, err := oldConn.QueryFunc(ctx, "delete from change_log returning table_name, row_id", nil, []interface{}{&r.table, &r.id}, func(pgx.QueryFuncRow) error { if _, ok := changes[r]; ok { return nil } @@ -168,7 +168,6 @@ func syncChangeLog(ctx context.Context, rt *rowTracker, oldConn, newConn pgx.Tx) var deletes []pendingDelete // go in insert order for fetching updates/inserts, note deleted rows - // then process deletes in reverse table order for _, table := range rt.tables { if len(rowIDs[table.Name]) == 0 { continue @@ -201,7 +200,7 @@ func syncChangeLog(ctx context.Context, rt *rowTracker, oldConn, newConn pgx.Tx) } } - return 0, nil + return len(changes), nil } func (rt *rowTracker) apply(ctx context.Context, newConn pgx.Tx, q string, rows []syncRow) error { @@ -215,11 +214,10 @@ func (rt *rowTracker) apply(ctx context.Context, newConn pgx.Tx, q string, rows rt.Insert(row.table, row.id) } - data, err := json.Marshal(rows) + data, err := json.Marshal(rowsData) if err != nil { return fmt.Errorf("marshal rows: %w", err) } - _, err = newConn.Exec(ctx, q, data) if err != nil { return fmt.Errorf("exec: %w", err) diff --git a/swo/table.go b/swo/table.go index 277d97b401..0eb7857852 100644 --- a/swo/table.go +++ b/swo/table.go @@ -2,6 +2,7 @@ package swo import ( "fmt" + "strings" "github.com/target/goalert/util/sqlutil" ) @@ -68,9 +69,18 @@ func (t Table) InsertRowsQuery() string { } func (t Table) UpdateRowsQuery() string { + var set strings.Builder + for i, col := range t.Columns { + if i > 0 { + set.WriteString(", ") + } + set.WriteString(fmt.Sprintf("%s = data.%s", col.Name, col.Name)) + } + return fmt.Sprintf(` update %s dst + set %s from json_populate_recordset(null::%s, $1) as data where dst.id = data.id - `, t.QuotedName(), t.QuotedName()) + `, t.QuotedName(), set.String(), t.QuotedName()) } From 4821563d38badc90498cad3a0c9ac4c373d15361 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 17:21:27 -0500 Subject: [PATCH 061/225] add isexec/reset --- graphql2/generated.go | 117 +++++++++++++++++++++++++++++++++++-- graphql2/graphqlapp/swo.go | 10 ++-- graphql2/models_gen.go | 10 ++-- graphql2/schema.graphql | 3 + swo/manager.go | 6 ++ swo/state.go | 15 ++++- 6 files changed, 147 insertions(+), 14 deletions(-) diff --git a/graphql2/generated.go b/graphql2/generated.go index 1a1d851a30..c564a41113 100644 --- a/graphql2/generated.go +++ b/graphql2/generated.go @@ -396,10 +396,12 @@ type ComplexityRoot struct { } SWOStatus struct { - Details func(childComplexity int) int - IsDone func(childComplexity int) int - IsIdle func(childComplexity int) int - Nodes func(childComplexity int) int + Details func(childComplexity int) int + IsDone func(childComplexity int) int + IsExecuting func(childComplexity int) int + IsIdle func(childComplexity int) int + IsResetting func(childComplexity int) int + Nodes func(childComplexity int) int } Schedule struct { @@ -2655,6 +2657,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWOStatus.IsDone(childComplexity), true + case "SWOStatus.isExecuting": + if e.complexity.SWOStatus.IsExecuting == nil { + break + } + + return e.complexity.SWOStatus.IsExecuting(childComplexity), true + case "SWOStatus.isIdle": if e.complexity.SWOStatus.IsIdle == nil { break @@ -2662,6 +2671,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWOStatus.IsIdle(childComplexity), true + case "SWOStatus.isResetting": + if e.complexity.SWOStatus.IsResetting == nil { + break + } + + return e.complexity.SWOStatus.IsResetting(childComplexity), true + case "SWOStatus.nodes": if e.complexity.SWOStatus.Nodes == nil { break @@ -3607,6 +3623,9 @@ type SWOStatus { isIdle: Boolean! isDone: Boolean! + isResetting: Boolean! + isExecuting: Boolean! + details: String! nodes: [SWONode!]! @@ -13931,6 +13950,76 @@ func (ec *executionContext) _SWOStatus_isDone(ctx context.Context, field graphql return ec.marshalNBoolean2bool(ctx, field.Selections, res) } +func (ec *executionContext) _SWOStatus_isResetting(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + fc := &graphql.FieldContext{ + Object: "SWOStatus", + Field: field, + Args: nil, + IsMethod: false, + IsResolver: false, + } + + ctx = graphql.WithFieldContext(ctx, fc) + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.IsResetting, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(bool) + fc.Result = res + return ec.marshalNBoolean2bool(ctx, field.Selections, res) +} + +func (ec *executionContext) _SWOStatus_isExecuting(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + fc := &graphql.FieldContext{ + Object: "SWOStatus", + Field: field, + Args: nil, + IsMethod: false, + IsResolver: false, + } + + ctx = graphql.WithFieldContext(ctx, fc) + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.IsExecuting, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(bool) + fc.Result = res + return ec.marshalNBoolean2bool(ctx, field.Selections, res) +} + func (ec *executionContext) _SWOStatus_details(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { defer func() { if r := recover(); r != nil { @@ -25322,6 +25411,26 @@ func (ec *executionContext) _SWOStatus(ctx context.Context, sel ast.SelectionSet out.Values[i] = innerFunc(ctx) + if out.Values[i] == graphql.Null { + invalids++ + } + case "isResetting": + innerFunc := func(ctx context.Context) (res graphql.Marshaler) { + return ec._SWOStatus_isResetting(ctx, field, obj) + } + + out.Values[i] = innerFunc(ctx) + + if out.Values[i] == graphql.Null { + invalids++ + } + case "isExecuting": + innerFunc := func(ctx context.Context) (res graphql.Marshaler) { + return ec._SWOStatus_isExecuting(ctx, field, obj) + } + + out.Values[i] = innerFunc(ctx) + if out.Values[i] == graphql.Null { invalids++ } diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index c84cf3ebdb..c72309dc35 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -45,9 +45,11 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { } return &graphql2.SWOStatus{ - IsIdle: s.IsIdle, - IsDone: s.IsDone, - Details: s.Details, - Nodes: nodes, + IsIdle: s.IsIdle, + IsDone: s.IsDone, + Details: s.Details, + IsExecuting: s.IsExecuting, + IsResetting: s.IsResetting, + Nodes: nodes, }, nil } diff --git a/graphql2/models_gen.go b/graphql2/models_gen.go index d86b46dd48..2d080afa63 100644 --- a/graphql2/models_gen.go +++ b/graphql2/models_gen.go @@ -340,10 +340,12 @@ type SWONode struct { } type SWOStatus struct { - IsIdle bool `json:"isIdle"` - IsDone bool `json:"isDone"` - Details string `json:"details"` - Nodes []SWONode `json:"nodes"` + IsIdle bool `json:"isIdle"` + IsDone bool `json:"isDone"` + IsResetting bool `json:"isResetting"` + IsExecuting bool `json:"isExecuting"` + Details string `json:"details"` + Nodes []SWONode `json:"nodes"` } type ScheduleConnection struct { diff --git a/graphql2/schema.graphql b/graphql2/schema.graphql index 1983912024..8a8787697e 100644 --- a/graphql2/schema.graphql +++ b/graphql2/schema.graphql @@ -120,6 +120,9 @@ type SWOStatus { isIdle: Boolean! isDone: Boolean! + isResetting: Boolean! + isExecuting: Boolean! + details: String! nodes: [SWONode!]! diff --git a/swo/manager.go b/swo/manager.go index a2f7929a36..e8b3d3accf 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -227,4 +227,10 @@ type Status struct { // IsIdle must be true before executing a switchover. IsIdle bool + + // IsExecuting must be true while the switchover is executing. + IsExecuting bool + + // IsResetting must be true while the switchover is resetting. + IsResetting bool } diff --git a/swo/state.go b/swo/state.go index 5866f46b53..b9ad1ed279 100644 --- a/swo/state.go +++ b/swo/state.go @@ -3,6 +3,7 @@ package swo import ( "context" "fmt" + "strings" "sync" "time" @@ -49,18 +50,28 @@ func (s *state) Status() *Status { isIdle := true isDone := true var nodes []Node + var isResetting bool + var isExecuting bool for _, n := range s.nodes { nodes = append(nodes, *n) isIdle = isIdle && n.Status == "idle" isDone = isDone && n.Status == "complete" + if strings.HasPrefix(n.Status, "reset-") { + isResetting = true + } + if strings.HasPrefix(n.Status, "exec-") { + isExecuting = true + } } return &Status{ Details: s.status, Nodes: nodes, - IsDone: isDone, - IsIdle: isIdle, + IsDone: isDone, + IsIdle: isIdle, + IsResetting: isResetting, + IsExecuting: isExecuting, } } From 70e7cc025ab62ed1526915e54ec54b49e5479fb5 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 17:33:46 -0500 Subject: [PATCH 062/225] persist spinners while action still in bg --- .../app/admin/switchover/AdminSwitchover.tsx | 106 ++++++++---------- 1 file changed, 49 insertions(+), 57 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 6ca4986cb4..d18c649081 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -27,6 +27,8 @@ const query = gql` swoStatus { isDone isIdle + isResetting + isExecuting details nodes { id @@ -52,10 +54,38 @@ function cptlz(s: string): string { export default function AdminSwitchover(): JSX.Element { const { loading, error, data: _data } = useQuery(query) const data = _data?.swoStatus - + const [lastAction, setLastAction] = useState('') const [statusNotices, setStatusNotices] = useState([]) const [commit, mutationStatus] = useMutation(mutation) + function actionHandler(action: 'ping' | 'reset' | 'execute'): () => void { + return () => { + setLastAction(action) + commit({ + variables: { + action, + }, + onError: (error) => { + setStatusNotices([ + ...statusNotices, + { + type: 'error', + message: 'Failed to ' + action, + details: cptlz(error.message), + endNote: DateTime.local().toFormat('fff'), + }, + ]) + }, + }) + } + } + + const pingLoad = lastAction === 'ping' && mutationStatus.loading + const resetLoad = + data?.isResetting || (lastAction === 'reset' && mutationStatus.loading) + const executeLoad = + data?.isExecuting || (lastAction === 'execute' && mutationStatus.loading) + function getIcon(): React.ReactNode { const i: SvgIconProps = { color: 'primary', sx: { fontSize: '3.5rem' } } @@ -123,78 +153,40 @@ export default function AdminSwitchover(): JSX.Element { startIcon={} variant='outlined' size='large' - loading={mutationStatus.loading} + disabled={mutationStatus.loading} + loading={pingLoad} loadingPosition='start' - onClick={() => - commit({ - variables: { action: 'ping' }, - onError: (error) => { - setStatusNotices([ - ...statusNotices, - { - type: 'error', - message: 'Failed to ping', - details: cptlz(error.message), - endNote: DateTime.local().toFormat('fff'), - }, - ]) - }, - }) - } + onClick={actionHandler('ping')} > - Ping + {pingLoad ? 'Sending ping...' : 'Ping'} : } - disabled={data?.isDone} + disabled={data?.isDone || mutationStatus.loading} variant='outlined' size='large' - loading={mutationStatus.loading} - loadingPosition='start' - onClick={() => - commit({ - variables: { action: 'reset' }, - onError: (error) => { - setStatusNotices([ - ...statusNotices, - { - type: 'error', - message: 'Failed to reset', - details: cptlz(error.message), - endNote: DateTime.local().toFormat('fff'), - }, - ]) - }, - }) + loading={ + data?.isResetting || + (lastAction === 'reset' && mutationStatus.loading) } + loadingPosition='start' + onClick={actionHandler('reset')} > - Reset + {resetLoad ? 'Resetting...' : 'Reset'} : } - disabled={!data?.isIdle} + disabled={!data?.isIdle || mutationStatus.loading} variant='outlined' size='large' - loading={mutationStatus.loading} - loadingPosition='start' - onClick={() => - commit({ - variables: { action: 'execute' }, - onError: (error) => { - setStatusNotices([ - ...statusNotices, - { - type: 'error', - message: 'Failed to execute', - details: cptlz(error.message), - endNote: DateTime.local().toFormat('fff'), - }, - ]) - }, - }) + loading={ + data?.isExecuting || + (lastAction === 'execute' && mutationStatus.loading) } + loadingPosition='start' + onClick={actionHandler('execute')} > - Execute + {executeLoad ? 'Executing...' : 'Execute'} From c82aebef8b1fd64085cb9eba4a95dbde68c69a26 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 17:42:10 -0500 Subject: [PATCH 063/225] tweak subheader --- web/src/app/admin/switchover/AdminSwitchover.tsx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index d18c649081..da5be54df6 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -109,10 +109,11 @@ export default function AdminSwitchover(): JSX.Element { function getSubheader(): React.ReactNode { if (error) return 'Error' - if (loading && !data) return 'Loading...' - if (!data.isIdle && !data.isDone) return 'In progress' - if (data.isIdle) return 'Idle' - return null + if (!data) return 'Loading...' + if (data.isDone) return 'Complete' + if (data.isIdle) return 'Ready' + if (!data.isExecuting && !data.isResetting) return 'Needs Reset' + return 'Busy' } function getDetails(): React.ReactNode { From 0c906387fab014b809c8134860e90ac80530fc5d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 18:26:17 -0500 Subject: [PATCH 064/225] non fatal sync error --- swo/execute.go | 16 +++++++++++++--- web/src/schema.d.ts | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/swo/execute.go b/swo/execute.go index 5a6f3e34e9..4897a903ac 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -66,7 +66,8 @@ func (m *Manager) DoExecute(ctx context.Context) error { // sync in a loop until DB is up-to-date n, err := LoopSync(ctx, rt, oldConn, newConn) if err != nil { - return fmt.Errorf("loop sync: %w", err) + m.Progressf(ctx, "sync error: %s", err.Error()) + continue } m.Progressf(ctx, "sync: %d changes", n) if n == 0 { @@ -160,6 +161,15 @@ func syncChangeLog(ctx context.Context, rt *rowTracker, oldConn, newConn pgx.Tx) if err != nil { return 0, fmt.Errorf("fetch changes: %w", err) } + if len(changes) == 0 { + return 0, nil + } + + // defer all constraints + _, err = newConn.Exec(ctx, "SET CONSTRAINTS ALL DEFERRED") + if err != nil { + return 0, fmt.Errorf("defer constraints: %w", err) + } type pendingDelete struct { query string @@ -211,7 +221,6 @@ func (rt *rowTracker) apply(ctx context.Context, newConn pgx.Tx, q string, rows var rowsData []json.RawMessage for _, row := range rows { rowsData = append(rowsData, row.data) - rt.Insert(row.table, row.id) } data, err := json.Marshal(rowsData) @@ -272,6 +281,7 @@ func (rt *rowTracker) fetch(ctx context.Context, table Table, tx pgx.Tx, ids []s if rt.Exists(table.Name, id) { sd.toUpdate = append(sd.toUpdate, syncRow{table.Name, id, data}) } else { + rt.Insert(table.Name, id) sd.toInsert = append(sd.toInsert, syncRow{table.Name, id, data}) } } @@ -280,7 +290,7 @@ func (rt *rowTracker) fetch(ctx context.Context, table Table, tx pgx.Tx, ids []s if _, ok := exists[id]; ok { continue } - + rt.Delete(table.Name, id) sd.toDelete = append(sd.toDelete, id) } diff --git a/web/src/schema.d.ts b/web/src/schema.d.ts index 77993400a4..4ad13a1c23 100644 --- a/web/src/schema.d.ts +++ b/web/src/schema.d.ts @@ -41,6 +41,8 @@ export interface Query { export interface SWOStatus { isIdle: boolean isDone: boolean + isResetting: boolean + isExecuting: boolean details: string nodes: SWONode[] } From 5464b49af557002a5e7055a96bfd37ab18fc47f9 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 18:32:34 -0500 Subject: [PATCH 065/225] fix sort --- web/src/app/admin/switchover/AdminSwitchover.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index da5be54df6..940e11f9d3 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -194,9 +194,10 @@ export default function AdminSwitchover(): JSX.Element { - + {data?.nodes.length > 0 && data.nodes + .slice() .sort((a: SWONodeType, b: SWONodeType) => { if (a.id < b.id) return 1 if (a.id > b.id) return -1 From 041c6ffc8aa0db5d9b1d0dd400927b371597ed41 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 19:06:05 -0500 Subject: [PATCH 066/225] better state display --- swo/state.go | 41 +++++++++++++------ .../app/admin/switchover/AdminSwitchover.tsx | 20 +++++++-- web/src/app/admin/switchover/SWONode.tsx | 13 +++--- 3 files changed, 51 insertions(+), 23 deletions(-) diff --git a/swo/state.go b/swo/state.go index b9ad1ed279..a784308260 100644 --- a/swo/state.go +++ b/swo/state.go @@ -33,7 +33,7 @@ func newState(ctx context.Context, m *Manager) (*state, error) { s := &state{ m: m, nodes: make(map[uuid.UUID]*Node), - stateFn: StateIdle, + stateFn: StateUnknown, stateName: "unknown", cancel: func() {}, } @@ -50,17 +50,18 @@ func (s *state) Status() *Status { isIdle := true isDone := true var nodes []Node - var isResetting bool - var isExecuting bool + isResetting := true + isExecuting := true + for _, n := range s.nodes { nodes = append(nodes, *n) isIdle = isIdle && n.Status == "idle" isDone = isDone && n.Status == "complete" - if strings.HasPrefix(n.Status, "reset-") { - isResetting = true + if !strings.HasPrefix(n.Status, "reset-") { + isResetting = false } - if strings.HasPrefix(n.Status, "exec-") { - isExecuting = true + if !strings.HasPrefix(n.Status, "exec-") { + isExecuting = false } } @@ -102,8 +103,6 @@ func (s *state) update(msg *swomsg.Message) { case msg.Ack != nil: n.Status = msg.Ack.Status n.CanExec = msg.Ack.Exec - case msg.Progress != nil: - s.status = msg.Progress.Details case msg.Error != nil: s.status = "error: " + msg.Error.Details case msg.Done != nil: @@ -205,7 +204,6 @@ func StateIdle(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { s.stateName = "exec-wait" s.ackMessage(ctx, msg.ID) return StateExecWait - case msg.Plan != nil: } return StateIdle @@ -222,13 +220,15 @@ func StateExecWait(ctx context.Context, s *state, msg *swomsg.Message) StateFunc case msg.Done != nil: s.ackMessage(ctx, msg.ID) return StateIdle + case msg.Progress != nil: + s.status = msg.Progress.Details case msg.Ack != nil && s.m.canExec: if msg.Ack.MsgID != s.taskID { // ack for a different message break } - if msg.NodeID != s.m.id { - // claimed by another node + if msg.NodeID != s.m.id && msg.Ack.Exec { + // claimed by another exec node s.taskID = uuid.Nil break } @@ -257,6 +257,8 @@ func StateExecRun(ctx context.Context, s *state, msg *swomsg.Message) StateFunc s.stateName = "idle" s.ackMessage(ctx, msg.ID) return StateIdle + case msg.Progress != nil: + s.status = msg.Progress.Details } return StateExecRun @@ -269,23 +271,34 @@ func StateError(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { return StateError } +// StateUnknown is the state after startup. +func StateUnknown(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { + s.stateName = "unknown" + + return StateError +} + // StateResetWait is the state when the node is waiting for a reset to be performed. func StateResetWait(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { s.stateName = "reset-wait" switch { case msg.Error != nil: + s.stateName = "error" s.ackMessage(ctx, msg.ID) return StateError case msg.Done != nil: + s.stateName = "idle" s.ackMessage(ctx, msg.ID) return StateIdle + case msg.Progress != nil: + s.status = msg.Progress.Details case msg.Ack != nil && s.m.canExec: if msg.Ack.MsgID != s.taskID { // ack for a different message break } - if msg.NodeID != s.m.id { + if msg.NodeID != s.m.id && msg.Ack.Exec { // claimed by another node s.taskID = uuid.Nil break @@ -315,6 +328,8 @@ func StateResetRun(ctx context.Context, s *state, msg *swomsg.Message) StateFunc s.stateName = "idle" s.ackMessage(ctx, msg.ID) return StateIdle + case msg.Progress != nil: + s.status = msg.Progress.Details } return StateResetRun diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 940e11f9d3..55ac4cc549 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -41,6 +41,16 @@ const query = gql` } ` +let n = 1 +const names: { [key: string]: string } = {} + +function friendlyName(id: string): string { + if (!names[id]) { + names[id] = `Node ${n++}` + } + return names[id] +} + const mutation = gql` mutation ($action: SWOAction!) { swoAction(action: $action) @@ -199,12 +209,14 @@ export default function AdminSwitchover(): JSX.Element { data.nodes .slice() .sort((a: SWONodeType, b: SWONodeType) => { - if (a.id < b.id) return 1 - if (a.id > b.id) return -1 + const aName = friendlyName(a.id) + const bName = friendlyName(b.id) + if (aName < bName) return -1 + if (aName > bName) return 1 return 0 }) - .map((node: SWONodeType, idx: number) => ( - + .map((node: SWONodeType) => ( + ))} diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index b6596ed532..78698f464c 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -10,21 +10,22 @@ import Typography from '@mui/material/Typography' import { useTheme } from '@mui/material' import TrueIcon from 'mdi-material-ui/CheckboxMarkedCircleOutline' import FalseIcon from 'mdi-material-ui/CloseCircleOutline' +import FalseOkIcon from 'mdi-material-ui/MinusCircleOutline' import { SWONode as SWONodeType } from '../../../schema' interface SWONodeProps { node: SWONodeType - index: number + name: string } -export default function SWONode({ node, index }: SWONodeProps): JSX.Element { +export default function SWONode({ node, name }: SWONodeProps): JSX.Element { const theme = useTheme() return ( - - + + - Node {index + 1} + {name} ) : ( - + )} From a4880f65bb27aec44a21121d83941a18036bde32 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 19:10:02 -0500 Subject: [PATCH 067/225] add comment about friendly name --- web/src/app/admin/switchover/AdminSwitchover.tsx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 55ac4cc549..7386ca6648 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -44,6 +44,13 @@ const query = gql` let n = 1 const names: { [key: string]: string } = {} +// friendlyName will assign a persistant "friendly" name to the node. +// +// This ensures a specific ID will always refer to the same node. This +// is so that it is clear if a node dissapears or a new one appears. +// +// Note: `Node 1` on one browser tab may not be the same node as `Node 1` +// on another browser tab. function friendlyName(id: string): string { if (!names[id]) { names[id] = `Node ${n++}` From 53ccf8af64adbff3d7e80f3868e13facf9198f8d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 19:27:42 -0500 Subject: [PATCH 068/225] fix a locking issue --- swo/manager.go | 3 ++- swo/state.go | 43 +++++++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/swo/manager.go b/swo/manager.go index e8b3d3accf..b142afdcf3 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -183,7 +183,8 @@ func WithLockedConn(ctx context.Context, db *sql.DB, runFunc func(context.Contex if err != nil { return err } - defer UnlockConn(ctx, conn) + defer conn.Close(context.Background()) + defer UnlockConn(context.Background(), conn) return runFunc(ctx, conn) }) diff --git a/swo/state.go b/swo/state.go index a784308260..3f7d282fc6 100644 --- a/swo/state.go +++ b/swo/state.go @@ -209,29 +209,41 @@ func StateIdle(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { return StateIdle } +func (s *state) isExecAck(msg *swomsg.Message) bool { + if !s.m.canExec { + return false + } + if msg.Ack == nil || !msg.Ack.Exec { + return false + } + if msg.Ack.MsgID != s.taskID { + return false + } + + if msg.NodeID != s.m.id { + s.taskID = uuid.Nil + return false + } + + return true +} + // StateExecWait is the state when the node is waiting for execution to be performed. func StateExecWait(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { s.stateName = "exec-wait" switch { case msg.Error != nil: + s.stateName = "error" s.ackMessage(ctx, msg.ID) return StateError case msg.Done != nil: + s.stateName = "idle" s.ackMessage(ctx, msg.ID) return StateIdle case msg.Progress != nil: s.status = msg.Progress.Details - case msg.Ack != nil && s.m.canExec: - if msg.Ack.MsgID != s.taskID { - // ack for a different message - break - } - if msg.NodeID != s.m.id && msg.Ack.Exec { - // claimed by another exec node - s.taskID = uuid.Nil - break - } + case s.isExecAck(msg): s.StartTask(s.m.DoExecute) s.stateName = "exec-run" s.ackMessage(ctx, msg.ID) @@ -293,16 +305,7 @@ func StateResetWait(ctx context.Context, s *state, msg *swomsg.Message) StateFun return StateIdle case msg.Progress != nil: s.status = msg.Progress.Details - case msg.Ack != nil && s.m.canExec: - if msg.Ack.MsgID != s.taskID { - // ack for a different message - break - } - if msg.NodeID != s.m.id && msg.Ack.Exec { - // claimed by another node - s.taskID = uuid.Nil - break - } + case s.isExecAck(msg): s.StartTask(s.m.DoReset) s.stateName = "reset-run" s.ackMessage(ctx, msg.ID) From c453fba8e155c225160220b39cc5b99eceaecb2e Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 19:37:11 -0500 Subject: [PATCH 069/225] use methods to determine state --- graphql2/graphqlapp/swo.go | 8 +++--- swo/manager.go | 17 ++---------- swo/state.go | 56 ++++++++++++++++++++++++++------------ 3 files changed, 46 insertions(+), 35 deletions(-) diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index c72309dc35..965c72aa4f 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -45,11 +45,11 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { } return &graphql2.SWOStatus{ - IsIdle: s.IsIdle, - IsDone: s.IsDone, + IsIdle: s.IsIdle(), + IsDone: s.IsDone(), Details: s.Details, - IsExecuting: s.IsExecuting, - IsResetting: s.IsResetting, + IsExecuting: s.IsExecuting(), + IsResetting: s.IsResetting(), Nodes: nodes, }, nil } diff --git a/swo/manager.go b/swo/manager.go index b142afdcf3..9b09570913 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -179,6 +179,7 @@ func WithLockedConn(ctx context.Context, db *sql.DB, runFunc func(context.Contex return conn.Raw(func(driverConn interface{}) error { conn := driverConn.(*stdlib.Conn).Conn() + // TODO: still fails to release sometimes err := SwitchOverExecLock(ctx, conn) if err != nil { return err @@ -201,7 +202,7 @@ func (m *Manager) SendPing(ctx context.Context) error { // SendReset will trigger a reset of the switchover. func (m *Manager) SendReset(ctx context.Context) error { - if m.Status().IsDone { + if m.Status().IsDone() { return fmt.Errorf("cannot reset switchover: switchover is done") } defer time.Sleep(swomsg.PollInterval * 2) // wait for send & ack @@ -210,7 +211,7 @@ func (m *Manager) SendReset(ctx context.Context) error { // SendExecute will trigger the switchover to begin. func (m *Manager) SendExecute(ctx context.Context) error { - if !m.Status().IsIdle { + if !m.Status().IsIdle() { return fmt.Errorf("cannot execute switchover: switchover is not idle") } defer time.Sleep(swomsg.PollInterval * 3) // wait for send, ack, and start @@ -222,16 +223,4 @@ func (m *Manager) DB() *sql.DB { return m.protectedDB } type Status struct { Details string Nodes []Node - - // IsDone is true if the switch has already been completed. - IsDone bool - - // IsIdle must be true before executing a switchover. - IsIdle bool - - // IsExecuting must be true while the switchover is executing. - IsExecuting bool - - // IsResetting must be true while the switchover is resetting. - IsResetting bool } diff --git a/swo/state.go b/swo/state.go index 3f7d282fc6..22a2643078 100644 --- a/swo/state.go +++ b/swo/state.go @@ -47,33 +47,55 @@ func (s *state) Status() *Status { s.mx.Lock() defer s.mx.Unlock() - isIdle := true - isDone := true var nodes []Node - isResetting := true - isExecuting := true - for _, n := range s.nodes { nodes = append(nodes, *n) - isIdle = isIdle && n.Status == "idle" - isDone = isDone && n.Status == "complete" - if !strings.HasPrefix(n.Status, "reset-") { - isResetting = false - } - if !strings.HasPrefix(n.Status, "exec-") { - isExecuting = false - } } return &Status{ Details: s.status, Nodes: nodes, + } +} + +// IsIdle returns true before executing a switchover. +func (s Status) IsIdle() bool { + for _, n := range s.Nodes { + if n.Status != "idle" { + return false + } + } + return true +} - IsDone: isDone, - IsIdle: isIdle, - IsResetting: isResetting, - IsExecuting: isExecuting, +// IsDone returns true if the switchover has already been completed. +func (s Status) IsDone() bool { + for _, n := range s.Nodes { + if n.Status != "complete" { + return false + } + } + return true +} + +// IsResetting returns true while the switchover is resetting. +func (s Status) IsResetting() bool { + for _, n := range s.Nodes { + if strings.HasPrefix(n.Status, "reset-") { + return true + } + } + return false +} + +// IsExecuting returns true while the switchover is executing. +func (s Status) IsExecuting() bool { + for _, n := range s.Nodes { + if strings.HasPrefix(n.Status, "exec-") { + return true + } } + return false } func (s *state) ackMessage(ctx context.Context, msgID uuid.UUID) { From b865725091d7ca7feb046f40b6e80cd13ffbdedc Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 24 Mar 2022 19:45:33 -0500 Subject: [PATCH 070/225] fix lock hang --- swo/execute.go | 2 +- swo/manager.go | 1 - swo/state.go | 25 ++++++++++++++++--------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/swo/execute.go b/swo/execute.go index 4897a903ac..f560989e58 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -62,7 +62,7 @@ func (m *Manager) DoExecute(ctx context.Context) error { return fmt.Errorf("read row IDs: %w", err) } - for { + for ctx.Err() == nil { // sync in a loop until DB is up-to-date n, err := LoopSync(ctx, rt, oldConn, newConn) if err != nil { diff --git a/swo/manager.go b/swo/manager.go index 9b09570913..a5fe3a2296 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -179,7 +179,6 @@ func WithLockedConn(ctx context.Context, db *sql.DB, runFunc func(context.Contex return conn.Raw(func(driverConn interface{}) error { conn := driverConn.(*stdlib.Conn).Conn() - // TODO: still fails to release sometimes err := SwitchOverExecLock(ctx, conn) if err != nil { return err diff --git a/swo/state.go b/swo/state.go index 22a2643078..b836db65d1 100644 --- a/swo/state.go +++ b/swo/state.go @@ -21,6 +21,8 @@ type state struct { nodes map[uuid.UUID]*Node + taskClaimed bool + taskID uuid.UUID cancel func() @@ -190,6 +192,7 @@ func (s *state) processFromOld(ctx context.Context, msg *swomsg.Message) error { s.nodes = make(map[uuid.UUID]*Node) s.m.app.Resume(ctx) s.taskID = msg.ID + s.taskClaimed = false s.stateName = "reset-wait" s.stateFn = StateResetWait err := s.hello(ctx) @@ -224,6 +227,7 @@ func StateIdle(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { case msg.Execute != nil: s.taskID = msg.ID s.stateName = "exec-wait" + s.taskClaimed = false s.ackMessage(ctx, msg.ID) return StateExecWait } @@ -241,9 +245,12 @@ func (s *state) isExecAck(msg *swomsg.Message) bool { if msg.Ack.MsgID != s.taskID { return false } + if s.taskClaimed { + return false + } if msg.NodeID != s.m.id { - s.taskID = uuid.Nil + s.taskClaimed = true return false } @@ -255,11 +262,11 @@ func StateExecWait(ctx context.Context, s *state, msg *swomsg.Message) StateFunc s.stateName = "exec-wait" switch { - case msg.Error != nil: + case msg.Error != nil && msg.Error.MsgID == s.taskID: s.stateName = "error" s.ackMessage(ctx, msg.ID) return StateError - case msg.Done != nil: + case msg.Done != nil && msg.Done.MsgID == s.taskID: s.stateName = "idle" s.ackMessage(ctx, msg.ID) return StateIdle @@ -280,12 +287,12 @@ func StateExecRun(ctx context.Context, s *state, msg *swomsg.Message) StateFunc s.stateName = "exec-run" switch { - case msg.Error != nil: + case msg.Error != nil && msg.Error.MsgID == s.taskID: s.cancel() s.stateName = "error" s.ackMessage(ctx, msg.ID) return StateError - case msg.Done != nil: + case msg.Done != nil && msg.Done.MsgID == s.taskID: // already done, make sure we still cancel the context though s.cancel() s.stateName = "idle" @@ -317,11 +324,11 @@ func StateResetWait(ctx context.Context, s *state, msg *swomsg.Message) StateFun s.stateName = "reset-wait" switch { - case msg.Error != nil: + case msg.Error != nil && msg.Error.MsgID == s.taskID: s.stateName = "error" s.ackMessage(ctx, msg.ID) return StateError - case msg.Done != nil: + case msg.Done != nil && msg.Done.MsgID == s.taskID: s.stateName = "idle" s.ackMessage(ctx, msg.ID) return StateIdle @@ -342,12 +349,12 @@ func StateResetRun(ctx context.Context, s *state, msg *swomsg.Message) StateFunc s.stateName = "reset-run" switch { - case msg.Error != nil: + case msg.Error != nil && msg.Error.MsgID == s.taskID: s.cancel() s.stateName = "error" s.ackMessage(ctx, msg.ID) return StateError - case msg.Done != nil: + case msg.Done != nil && msg.Done.MsgID == s.taskID: // already done, make sure we still cancel the context though s.cancel() s.stateName = "idle" From e851505c3aad2056f0024cd08fedb75221716086 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 28 Mar 2022 09:45:41 -0500 Subject: [PATCH 071/225] fix missed rows --- swo/execute.go | 94 ++++++++++++++++++++++++++++------------------ swo/initialsync.go | 13 ------- swo/rowtracker.go | 37 ++++++++++++++++-- 3 files changed, 91 insertions(+), 53 deletions(-) diff --git a/swo/execute.go b/swo/execute.go index f560989e58..93361bf121 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -10,6 +10,7 @@ import ( "github.com/google/uuid" "github.com/jackc/pgx/v4" + "github.com/target/goalert/util/log" "github.com/target/goalert/util/sqlutil" ) @@ -62,16 +63,32 @@ func (m *Manager) DoExecute(ctx context.Context) error { return fmt.Errorf("read row IDs: %w", err) } + var lastNone bool for ctx.Err() == nil { // sync in a loop until DB is up-to-date - n, err := LoopSync(ctx, rt, oldConn, newConn) + n, pend, err := LoopSync(ctx, rt, oldConn, newConn) + if pend > 0 { + lastNone = false + m.Progressf(ctx, "sync: %d rows pending", pend) + } if err != nil { - m.Progressf(ctx, "sync error: %s", err.Error()) + log.Log(ctx, err) + rt.Rollback() + if n > 0 { + return fmt.Errorf("sync failure (commit without record): %w", err) + } continue } - m.Progressf(ctx, "sync: %d changes", n) + rt.Commit() if n == 0 { + if !lastNone { + lastNone = true + m.Progressf(ctx, "sync: waiting for changes") + } time.Sleep(time.Second) + } else { + lastNone = false + m.Progressf(ctx, "sync: %d rows replicated", n) } } @@ -91,38 +108,38 @@ func DisableTriggers(ctx context.Context, tables []Table, conn *pgx.Conn) error return nil } -func LoopSync(ctx context.Context, rt *rowTracker, oldConn, newConn *pgx.Conn) (int, error) { - oldTx, newTx, err := syncTx(ctx, oldConn, newConn) +func LoopSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) (ok, pend int, err error) { + srcTx, dstTx, err := syncTx(ctx, srcConn, dstConn) if err != nil { - return 0, fmt.Errorf("sync tx: %w", err) + return 0, 0, fmt.Errorf("sync tx: %w", err) } - defer oldTx.Rollback(ctx) - defer newTx.Rollback(ctx) + defer srcTx.Rollback(ctx) + defer dstTx.Rollback(ctx) - n, err := syncChangeLog(ctx, rt, oldTx, newTx) + n, err := syncChangeLog(ctx, rt, srcTx, dstTx) if err != nil { - return 0, fmt.Errorf("sync change log: %w", err) + return 0, n, fmt.Errorf("sync change log: %w", err) } - err = newTx.Commit(ctx) + err = dstTx.Commit(ctx) if err != nil { - return 0, fmt.Errorf("commit dst: %w", err) + return 0, n, fmt.Errorf("commit dst: %w", err) } - err = oldTx.Commit(ctx) + err = srcTx.Commit(ctx) if err != nil { - return 0, fmt.Errorf("commit src: %w", err) + return n, 0, fmt.Errorf("commit src: %w", err) } - return n, nil + return n, 0, nil } func FinalSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { return nil } -func syncTx(ctx context.Context, oldConn, newConn *pgx.Conn) (old, new pgx.Tx, err error) { - srcTx, err := oldConn.BeginTx(ctx, pgx.TxOptions{ +func syncTx(ctx context.Context, srcConn, dstConn *pgx.Conn) (src, dst pgx.Tx, err error) { + srcTx, err := srcConn.BeginTx(ctx, pgx.TxOptions{ AccessMode: pgx.ReadWrite, IsoLevel: pgx.Serializable, DeferrableMode: pgx.Deferrable, @@ -131,7 +148,7 @@ func syncTx(ctx context.Context, oldConn, newConn *pgx.Conn) (old, new pgx.Tx, e return nil, nil, fmt.Errorf("begin src: %w", err) } - dstTx, err := newConn.BeginTx(ctx, pgx.TxOptions{}) + dstTx, err := dstConn.BeginTx(ctx, pgx.TxOptions{}) if err != nil { srcTx.Rollback(ctx) return nil, nil, fmt.Errorf("begin dst: %w", err) @@ -140,7 +157,7 @@ func syncTx(ctx context.Context, oldConn, newConn *pgx.Conn) (old, new pgx.Tx, e return srcTx, dstTx, nil } -func syncChangeLog(ctx context.Context, rt *rowTracker, oldConn, newConn pgx.Tx) (int, error) { +func syncChangeLog(ctx context.Context, rt *rowTracker, srcTx, dstTx pgx.Tx) (int, error) { type rowID struct { table string id string @@ -149,7 +166,7 @@ func syncChangeLog(ctx context.Context, rt *rowTracker, oldConn, newConn pgx.Tx) var r rowID changes := make(map[rowID]struct{}) rowIDs := make(map[string][]string) - _, err := oldConn.QueryFunc(ctx, "delete from change_log returning table_name, row_id", nil, []interface{}{&r.table, &r.id}, func(pgx.QueryFuncRow) error { + _, err := srcTx.QueryFunc(ctx, "delete from change_log returning table_name, row_id", nil, []interface{}{&r.table, &r.id}, func(pgx.QueryFuncRow) error { if _, ok := changes[r]; ok { return nil } @@ -166,9 +183,9 @@ func syncChangeLog(ctx context.Context, rt *rowTracker, oldConn, newConn pgx.Tx) } // defer all constraints - _, err = newConn.Exec(ctx, "SET CONSTRAINTS ALL DEFERRED") + _, err = dstTx.Exec(ctx, "SET CONSTRAINTS ALL DEFERRED") if err != nil { - return 0, fmt.Errorf("defer constraints: %w", err) + return len(changes), fmt.Errorf("defer constraints: %w", err) } type pendingDelete struct { @@ -183,37 +200,37 @@ func syncChangeLog(ctx context.Context, rt *rowTracker, oldConn, newConn pgx.Tx) continue } - sd, err := rt.fetch(ctx, table, oldConn, rowIDs[table.Name]) + sd, err := rt.fetch(ctx, table, srcTx, rowIDs[table.Name]) if err != nil { - return 0, fmt.Errorf("fetch changed rows: %w", err) + return len(changes), fmt.Errorf("fetch changed rows: %w", err) } if len(sd.toDelete) > 0 { deletes = append(deletes, pendingDelete{table.DeleteRowsQuery(), table.IDs(sd.toDelete)}) } - err = rt.apply(ctx, newConn, table.UpdateRowsQuery(), sd.toUpdate) + err = rt.apply(ctx, dstTx, table.UpdateRowsQuery(), sd.toUpdate) if err != nil { - return 0, fmt.Errorf("apply updates: %w", err) + return len(changes), fmt.Errorf("apply updates: %w", err) } - err = rt.apply(ctx, newConn, table.InsertRowsQuery(), sd.toInsert) + err = rt.apply(ctx, dstTx, table.InsertRowsQuery(), sd.toInsert) if err != nil { - return 0, fmt.Errorf("apply inserts: %w", err) + return len(changes), fmt.Errorf("apply inserts: %w", err) } } // handle pendingDeletes in reverse table order for i := len(deletes) - 1; i >= 0; i-- { - _, err = newConn.Exec(ctx, deletes[i].query, deletes[i].idArg) + _, err = dstTx.Exec(ctx, deletes[i].query, deletes[i].idArg) if err != nil { - return 0, fmt.Errorf("delete rows: %w", err) + return len(changes), fmt.Errorf("delete rows: %w", err) } } return len(changes), nil } -func (rt *rowTracker) apply(ctx context.Context, newConn pgx.Tx, q string, rows []syncRow) error { +func (rt *rowTracker) apply(ctx context.Context, dstTx pgx.Tx, q string, rows []syncRow) error { if len(rows) == 0 { return nil } @@ -227,7 +244,7 @@ func (rt *rowTracker) apply(ctx context.Context, newConn pgx.Tx, q string, rows if err != nil { return fmt.Errorf("marshal rows: %w", err) } - _, err = newConn.Exec(ctx, q, data) + _, err = dstTx.Exec(ctx, q, data) if err != nil { return fmt.Errorf("exec: %w", err) } @@ -258,8 +275,8 @@ type syncRow struct { data json.RawMessage } -func (rt *rowTracker) fetch(ctx context.Context, table Table, tx pgx.Tx, ids []string) (*syncData, error) { - rows, err := tx.Query(ctx, table.SelectRowsQuery(), table.IDs(ids)) +func (rt *rowTracker) fetch(ctx context.Context, table Table, srcTx pgx.Tx, ids []string) (*syncData, error) { + rows, err := srcTx.Query(ctx, table.SelectRowsQuery(), table.IDs(ids)) if errors.Is(err, pgx.ErrNoRows) { return &syncData{toDelete: ids}, nil } @@ -269,7 +286,7 @@ func (rt *rowTracker) fetch(ctx context.Context, table Table, tx pgx.Tx, ids []s } sd := syncData{t: table} - exists := make(map[string]struct{}) + existsInOld := make(map[string]struct{}) for rows.Next() { var id string var data []byte @@ -277,7 +294,7 @@ func (rt *rowTracker) fetch(ctx context.Context, table Table, tx pgx.Tx, ids []s if err != nil { return nil, fmt.Errorf("scan row: %w", err) } - exists[id] = struct{}{} + existsInOld[id] = struct{}{} if rt.Exists(table.Name, id) { sd.toUpdate = append(sd.toUpdate, syncRow{table.Name, id, data}) } else { @@ -287,7 +304,10 @@ func (rt *rowTracker) fetch(ctx context.Context, table Table, tx pgx.Tx, ids []s } for _, id := range ids { - if _, ok := exists[id]; ok { + if _, ok := existsInOld[id]; ok { + continue + } + if !rt.Exists(table.Name, id) { continue } rt.Delete(table.Name, id) diff --git a/swo/initialsync.go b/swo/initialsync.go index 111ff73d20..2148e2bf3f 100644 --- a/swo/initialsync.go +++ b/swo/initialsync.go @@ -60,13 +60,6 @@ func (m *Manager) InitialSync(ctx context.Context, oldConn, newConn *pgx.Conn) e } } - var changeIDs []int - var id int - srcTx.QueryFunc(ctx, "select id from change_log", nil, []interface{}{&id}, func(r pgx.QueryFuncRow) error { - changeIDs = append(changeIDs, id) - return nil - }) - m.Progressf(ctx, "commit initial sync") // Important to validate src commit, even though it's read-only. // @@ -82,12 +75,6 @@ func (m *Manager) InitialSync(ctx context.Context, oldConn, newConn *pgx.Conn) e return fmt.Errorf("commit dst tx: %w", err) } - // delete synced changes after tx has been committed - _, err = oldConn.Exec(ctx, "delete from change_log where id = any($1)", changeIDs) - if err != nil { - return fmt.Errorf("delete change log: %w", err) - } - // vacuum analyze new DB m.Progressf(ctx, "vacuum analyze") _, err = newConn.Exec(ctx, "vacuum analyze") diff --git a/swo/rowtracker.go b/swo/rowtracker.go index fba72576ba..56771d311e 100644 --- a/swo/rowtracker.go +++ b/swo/rowtracker.go @@ -10,6 +10,13 @@ import ( type rowTracker struct { tables []Table rowIDs map[string]map[string]struct{} + + stagedInserts []stagedID + stagedDeletes []stagedID +} +type stagedID struct { + table string + id string } func newRowTracker(ctx context.Context, tables []Table, newConn *pgx.Conn) (*rowTracker, error) { @@ -35,13 +42,37 @@ func newRowTracker(ctx context.Context, tables []Table, newConn *pgx.Conn) (*row return nil, err } - rt.Insert(table.Name, id) + rt._Insert(table.Name, id) } } return rt, nil } -func (rt *rowTracker) Insert(table, id string) { rt.rowIDs[table][id] = struct{}{} } -func (rt *rowTracker) Delete(table, id string) { delete(rt.rowIDs[table], id) } +func (rt *rowTracker) Insert(table, id string) { + rt.stagedInserts = append(rt.stagedInserts, stagedID{table, id}) +} + +func (rt *rowTracker) Delete(table, id string) { + rt.stagedDeletes = append(rt.stagedDeletes, stagedID{table, id}) +} +func (rt *rowTracker) _Insert(table, id string) { rt.rowIDs[table][id] = struct{}{} } +func (rt *rowTracker) _Delete(table, id string) { delete(rt.rowIDs[table], id) } +func (rt *rowTracker) Rollback() { + rt.stagedDeletes = nil + rt.stagedInserts = nil +} + +func (rt *rowTracker) Commit() { + for _, staged := range rt.stagedInserts { + rt._Insert(staged.table, staged.id) + } + rt.stagedInserts = nil + + for _, staged := range rt.stagedDeletes { + rt._Delete(staged.table, staged.id) + } + rt.stagedDeletes = nil +} + func (rt *rowTracker) Exists(table, id string) bool { _, ok := rt.rowIDs[table][id]; return ok } From f0e6b3671cefa34c5908ea2955c1a1c77570db17 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 09:21:42 -0500 Subject: [PATCH 072/225] refactor --- app/app.go | 2 +- graphql2/generated.go | 54 +++ graphql2/models_gen.go | 1 + graphql2/schema.graphql | 1 + ....sql => 20220405163538-switchover-mk2.sql} | 0 swo/drvconnector.go | 46 +- swo/execute.go | 115 +++-- swo/initialsync.go | 24 +- swo/manager.go | 182 +++----- swo/preflightlocks.go | 10 +- swo/state.go | 368 ---------------- swo/swogrp/config.go | 21 + swo/swogrp/group.go | 404 ++++++++++++++++++ swo/swogrp/msgbuf.go | 42 ++ swo/swogrp/progressf.go | 34 ++ swo/swogrp/state.go | 12 + swo/swomsg/log.go | 152 ++++--- swo/swomsg/messages.go | 180 +++++--- .../app/admin/switchover/AdminSwitchover.tsx | 5 +- web/src/app/admin/switchover/SWONode.tsx | 2 +- web/src/schema.d.ts | 1 + 21 files changed, 929 insertions(+), 727 deletions(-) rename migrate/migrations/{20220316112851-switchover-mk2.sql => 20220405163538-switchover-mk2.sql} (100%) delete mode 100644 swo/state.go create mode 100644 swo/swogrp/config.go create mode 100644 swo/swogrp/group.go create mode 100644 swo/swogrp/msgbuf.go create mode 100644 swo/swogrp/progressf.go create mode 100644 swo/swogrp/state.go diff --git a/app/app.go b/app/app.go index 1010a982f8..9415d35bc8 100644 --- a/app/app.go +++ b/app/app.go @@ -180,7 +180,7 @@ func NewApp(c Config, db *sql.DB) (*App, error) { } if c.SWO != nil { - c.SWO.SetPauseResumer(app) + c.SWO.Init(app) log.Logf(app.LogBackgroundContext(), "SWO Enabled.") } diff --git a/graphql2/generated.go b/graphql2/generated.go index c564a41113..c0906159af 100644 --- a/graphql2/generated.go +++ b/graphql2/generated.go @@ -390,6 +390,7 @@ type ComplexityRoot struct { SWONode struct { CanExec func(childComplexity int) int ID func(childComplexity int) int + IsLeader func(childComplexity int) int NewValid func(childComplexity int) int OldValid func(childComplexity int) int Status func(childComplexity int) int @@ -2622,6 +2623,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWONode.ID(childComplexity), true + case "SWONode.isLeader": + if e.complexity.SWONode.IsLeader == nil { + break + } + + return e.complexity.SWONode.IsLeader(childComplexity), true + case "SWONode.newValid": if e.complexity.SWONode.NewValid == nil { break @@ -3636,6 +3644,7 @@ type SWONode { oldValid: Boolean! newValid: Boolean! canExec: Boolean! + isLeader: Boolean! status: String! } @@ -13845,6 +13854,41 @@ func (ec *executionContext) _SWONode_canExec(ctx context.Context, field graphql. return ec.marshalNBoolean2bool(ctx, field.Selections, res) } +func (ec *executionContext) _SWONode_isLeader(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + fc := &graphql.FieldContext{ + Object: "SWONode", + Field: field, + Args: nil, + IsMethod: false, + IsResolver: false, + } + + ctx = graphql.WithFieldContext(ctx, fc) + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.IsLeader, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(bool) + fc.Result = res + return ec.marshalNBoolean2bool(ctx, field.Selections, res) +} + func (ec *executionContext) _SWONode_status(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { defer func() { if r := recover(); r != nil { @@ -25360,6 +25404,16 @@ func (ec *executionContext) _SWONode(ctx context.Context, sel ast.SelectionSet, out.Values[i] = innerFunc(ctx) + if out.Values[i] == graphql.Null { + invalids++ + } + case "isLeader": + innerFunc := func(ctx context.Context) (res graphql.Marshaler) { + return ec._SWONode_isLeader(ctx, field, obj) + } + + out.Values[i] = innerFunc(ctx) + if out.Values[i] == graphql.Null { invalids++ } diff --git a/graphql2/models_gen.go b/graphql2/models_gen.go index 2d080afa63..7342fbbe9f 100644 --- a/graphql2/models_gen.go +++ b/graphql2/models_gen.go @@ -336,6 +336,7 @@ type SWONode struct { OldValid bool `json:"oldValid"` NewValid bool `json:"newValid"` CanExec bool `json:"canExec"` + IsLeader bool `json:"isLeader"` Status string `json:"status"` } diff --git a/graphql2/schema.graphql b/graphql2/schema.graphql index 8a8787697e..80c6b6b5fa 100644 --- a/graphql2/schema.graphql +++ b/graphql2/schema.graphql @@ -133,6 +133,7 @@ type SWONode { oldValid: Boolean! newValid: Boolean! canExec: Boolean! + isLeader: Boolean! status: String! } diff --git a/migrate/migrations/20220316112851-switchover-mk2.sql b/migrate/migrations/20220405163538-switchover-mk2.sql similarity index 100% rename from migrate/migrations/20220316112851-switchover-mk2.sql rename to migrate/migrations/20220405163538-switchover-mk2.sql diff --git a/swo/drvconnector.go b/swo/drvconnector.go index 4c24dcdd1b..e858b1fd48 100644 --- a/swo/drvconnector.go +++ b/swo/drvconnector.go @@ -5,61 +5,51 @@ import ( "database/sql/driver" "errors" "sync" + + "github.com/jackc/pgx/v4/stdlib" ) type Connector struct { dbcOld, dbcNew driver.Connector - n *Notifier - sm *StatsManager -} - -type Notifier struct { - doneCh chan struct{} - done sync.Once -} - -func NewNotifier() *Notifier { - return &Notifier{ - doneCh: make(chan struct{}), - } -} -func (n *Notifier) Done() { n.done.Do(func() { close(n.doneCh) }) } -func (n *Notifier) IsDone() bool { - select { - case <-n.doneCh: - return true - default: - return false - } + isDone bool + id int + mx sync.Mutex } var _ driver.Connector = (*Connector)(nil) -func NewConnector(dbcOld, dbcNew driver.Connector, sm *StatsManager) *Connector { +func NewConnector(dbcOld, dbcNew driver.Connector) *Connector { return &Connector{ dbcOld: dbcOld, dbcNew: dbcNew, - n: NewNotifier(), - sm: sm, } } func (drv *Connector) Driver() driver.Driver { return nil } func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { - if drv.n.IsDone() { + drv.mx.Lock() + isDone := drv.isDone + drv.mx.Unlock() + + if isDone { return drv.dbcNew.Connect(ctx) } - conn, err := drv.dbcOld.Connect(ctx) + c, err := drv.dbcOld.Connect(ctx) if err != nil { return nil, err } + drv.id++ + conn := c.(*stdlib.Conn) + err = sessionLock(ctx, conn) if errors.Is(err, errDone) { - drv.n.Done() + drv.mx.Lock() + drv.isDone = true + drv.mx.Unlock() return drv.dbcNew.Connect(ctx) } if err != nil { diff --git a/swo/execute.go b/swo/execute.go index 93361bf121..449f54e16e 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -8,18 +8,34 @@ import ( "strconv" "time" - "github.com/google/uuid" "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swogrp" "github.com/target/goalert/util/log" "github.com/target/goalert/util/sqlutil" ) -func (m *Manager) SendProposal() (uuid.UUID, error) { - return uuid.Nil, nil -} +func WaitForRunningTx(ctx context.Context, oldConn *pgx.Conn) error { + var now time.Time + err := oldConn.QueryRow(ctx, "select now()").Scan(&now) + if err != nil { + return fmt.Errorf("get current timestamp: %w", err) + } + + for { + var n int + err = oldConn.QueryRow(ctx, "select count(*) from pg_stat_activity where state <> 'idle' and xact_start <= $1", now).Scan(&n) + if err != nil { + return fmt.Errorf("get running tx count: %w", err) + } + if n == 0 { + break + } + + swogrp.Progressf(ctx, "waiting for %d transaction(s) to finish", n) + time.Sleep(time.Second) + } -func (m *Manager) ProposalIsValid() (bool, error) { - return false, nil + return nil } func (m *Manager) DoExecute(ctx context.Context) error { @@ -33,31 +49,37 @@ func (m *Manager) DoExecute(ctx context.Context) error { */ return m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { - m.Progressf(ctx, "scanning tables...") + swogrp.Progressf(ctx, "scanning tables...") tables, err := ScanTables(ctx, oldConn) if err != nil { return fmt.Errorf("scan tables: %w", err) } - m.Progressf(ctx, "enabling change log") + swogrp.Progressf(ctx, "enabling change log") err = EnableChangeLog(ctx, tables, oldConn) if err != nil { return fmt.Errorf("enable change log: %w", err) } - m.Progressf(ctx, "disabling triggers") + swogrp.Progressf(ctx, "disabling triggers") err = DisableTriggers(ctx, tables, newConn) if err != nil { return fmt.Errorf("disable triggers: %w", err) } - m.Progressf(ctx, "performing initial sync") - err = m.InitialSync(ctx, oldConn, newConn) + swogrp.Progressf(ctx, "waiting for in-flight transactions to finish") + err = WaitForRunningTx(ctx, oldConn) + if err != nil { + return fmt.Errorf("wait for running tx: %w", err) + } + + swogrp.Progressf(ctx, "performing initial sync") + err = m.InitialSync(ctx, tables, oldConn, newConn) if err != nil { return fmt.Errorf("initial sync: %w", err) } - m.Progressf(ctx, "recording new DB state") + swogrp.Progressf(ctx, "recording new DB state") rt, err := newRowTracker(ctx, tables, newConn) if err != nil { return fmt.Errorf("read row IDs: %w", err) @@ -67,9 +89,11 @@ func (m *Manager) DoExecute(ctx context.Context) error { for ctx.Err() == nil { // sync in a loop until DB is up-to-date n, pend, err := LoopSync(ctx, rt, oldConn, newConn) + + fmt.Println("sync", n, "pending", pend) if pend > 0 { lastNone = false - m.Progressf(ctx, "sync: %d rows pending", pend) + swogrp.Progressf(ctx, "sync: %d rows pending", pend) } if err != nil { log.Log(ctx, err) @@ -83,12 +107,12 @@ func (m *Manager) DoExecute(ctx context.Context) error { if n == 0 { if !lastNone { lastNone = true - m.Progressf(ctx, "sync: waiting for changes") + swogrp.Progressf(ctx, "sync: waiting for changes") } - time.Sleep(time.Second) + time.Sleep(10 * time.Second) } else { lastNone = false - m.Progressf(ctx, "sync: %d rows replicated", n) + swogrp.Progressf(ctx, "sync: %d rows replicated", n) } } @@ -116,22 +140,27 @@ func LoopSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) ( defer srcTx.Rollback(ctx) defer dstTx.Rollback(ctx) - n, err := syncChangeLog(ctx, rt, srcTx, dstTx) + ids, err := syncChangeLog(ctx, rt, srcTx, dstTx) + if err != nil { + return 0, len(ids), fmt.Errorf("sync change log: %w", err) + } + + err = srcTx.Commit(ctx) if err != nil { - return 0, n, fmt.Errorf("sync change log: %w", err) + return len(ids), 0, fmt.Errorf("commit src: %w", err) } err = dstTx.Commit(ctx) if err != nil { - return 0, n, fmt.Errorf("commit dst: %w", err) + return 0, len(ids), fmt.Errorf("commit dst: %w", err) } - err = srcTx.Commit(ctx) + _, err = srcConn.Exec(ctx, "DELETE FROM change_log WHERE id = any($1)", sqlutil.IntArray(ids)) if err != nil { - return n, 0, fmt.Errorf("commit src: %w", err) + return len(ids), 0, fmt.Errorf("update change log: %w", err) } - return n, 0, nil + return len(ids), 0, nil } func FinalSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { @@ -140,7 +169,7 @@ func FinalSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { func syncTx(ctx context.Context, srcConn, dstConn *pgx.Conn) (src, dst pgx.Tx, err error) { srcTx, err := srcConn.BeginTx(ctx, pgx.TxOptions{ - AccessMode: pgx.ReadWrite, + AccessMode: pgx.ReadOnly, IsoLevel: pgx.Serializable, DeferrableMode: pgx.Deferrable, }) @@ -157,77 +186,88 @@ func syncTx(ctx context.Context, srcConn, dstConn *pgx.Conn) (src, dst pgx.Tx, e return srcTx, dstTx, nil } -func syncChangeLog(ctx context.Context, rt *rowTracker, srcTx, dstTx pgx.Tx) (int, error) { +func syncChangeLog(ctx context.Context, rt *rowTracker, srcTx, dstTx pgx.Tx) ([]int, error) { type rowID struct { table string id string } var r rowID + var changeIDs []int + var changeID int changes := make(map[rowID]struct{}) rowIDs := make(map[string][]string) - _, err := srcTx.QueryFunc(ctx, "delete from change_log returning table_name, row_id", nil, []interface{}{&r.table, &r.id}, func(pgx.QueryFuncRow) error { + _, err := srcTx.QueryFunc(ctx, "select id, table_name, row_id from change_log", nil, []interface{}{&changeID, &r.table, &r.id}, func(pgx.QueryFuncRow) error { if _, ok := changes[r]; ok { return nil } changes[r] = struct{}{} rowIDs[r.table] = append(rowIDs[r.table], r.id) + changeIDs = append(changeIDs, changeID) return nil }) if err != nil { - return 0, fmt.Errorf("fetch changes: %w", err) + return nil, fmt.Errorf("fetch changes: %w", err) } if len(changes) == 0 { - return 0, nil + return nil, nil } // defer all constraints _, err = dstTx.Exec(ctx, "SET CONSTRAINTS ALL DEFERRED") if err != nil { - return len(changes), fmt.Errorf("defer constraints: %w", err) + return changeIDs, fmt.Errorf("defer constraints: %w", err) } type pendingDelete struct { query string idArg interface{} + count int } var deletes []pendingDelete // go in insert order for fetching updates/inserts, note deleted rows for _, table := range rt.tables { + if table.SkipSync() { + continue + } + if len(rowIDs[table.Name]) == 0 { continue } sd, err := rt.fetch(ctx, table, srcTx, rowIDs[table.Name]) if err != nil { - return len(changes), fmt.Errorf("fetch changed rows: %w", err) + return changeIDs, fmt.Errorf("fetch changed rows: %w", err) } if len(sd.toDelete) > 0 { - deletes = append(deletes, pendingDelete{table.DeleteRowsQuery(), table.IDs(sd.toDelete)}) + deletes = append(deletes, pendingDelete{table.DeleteRowsQuery(), table.IDs(sd.toDelete), len(sd.toDelete)}) } err = rt.apply(ctx, dstTx, table.UpdateRowsQuery(), sd.toUpdate) if err != nil { - return len(changes), fmt.Errorf("apply updates: %w", err) + return changeIDs, fmt.Errorf("apply updates: %w", err) } err = rt.apply(ctx, dstTx, table.InsertRowsQuery(), sd.toInsert) if err != nil { - return len(changes), fmt.Errorf("apply inserts: %w", err) + return changeIDs, fmt.Errorf("apply inserts: %w", err) } } // handle pendingDeletes in reverse table order for i := len(deletes) - 1; i >= 0; i-- { - _, err = dstTx.Exec(ctx, deletes[i].query, deletes[i].idArg) + t, err := dstTx.Exec(ctx, deletes[i].query, deletes[i].idArg) if err != nil { - return len(changes), fmt.Errorf("delete rows: %w", err) + return changeIDs, fmt.Errorf("delete rows: %w", err) + } + if t.RowsAffected() != int64(deletes[i].count) { + return changeIDs, fmt.Errorf("delete rows: got %d != expected %d", t.RowsAffected(), deletes[i].count) } } - return len(changes), nil + return changeIDs, nil } func (rt *rowTracker) apply(ctx context.Context, dstTx pgx.Tx, q string, rows []syncRow) error { @@ -244,10 +284,13 @@ func (rt *rowTracker) apply(ctx context.Context, dstTx pgx.Tx, q string, rows [] if err != nil { return fmt.Errorf("marshal rows: %w", err) } - _, err = dstTx.Exec(ctx, q, data) + t, err := dstTx.Exec(ctx, q, data) if err != nil { return fmt.Errorf("exec: %w", err) } + if t.RowsAffected() != int64(len(rows)) { + return fmt.Errorf("mismatch: got %d rows affected; expected %d", t.RowsAffected(), len(rows)) + } return nil } diff --git a/swo/initialsync.go b/swo/initialsync.go index 2148e2bf3f..c679ec408e 100644 --- a/swo/initialsync.go +++ b/swo/initialsync.go @@ -9,24 +9,10 @@ import ( "time" "github.com/jackc/pgx/v4" - "github.com/target/goalert/swo/swomsg" - "github.com/target/goalert/util/log" + "github.com/target/goalert/swo/swogrp" ) -func (m *Manager) Progressf(ctx context.Context, format string, a ...interface{}) { - err := m.msgLog.Append(ctx, swomsg.Progress{MsgID: m.msgState.taskID, Details: fmt.Sprintf(format, a...)}) - if err != nil { - log.Log(ctx, err) - } -} - -func (m *Manager) InitialSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { - m.Progressf(ctx, "scanning tables") - tables, err := ScanTables(ctx, oldConn) - if err != nil { - return fmt.Errorf("scan tables: %w", err) - } - +func (m *Manager) InitialSync(ctx context.Context, tables []Table, oldConn, newConn *pgx.Conn) error { srcTx, err := oldConn.BeginTx(ctx, pgx.TxOptions{ AccessMode: pgx.ReadOnly, IsoLevel: pgx.Serializable, @@ -60,7 +46,7 @@ func (m *Manager) InitialSync(ctx context.Context, oldConn, newConn *pgx.Conn) e } } - m.Progressf(ctx, "commit initial sync") + swogrp.Progressf(ctx, "commit initial sync") // Important to validate src commit, even though it's read-only. // // A failure here indicates the isolation level has been violated @@ -76,7 +62,7 @@ func (m *Manager) InitialSync(ctx context.Context, oldConn, newConn *pgx.Conn) e } // vacuum analyze new DB - m.Progressf(ctx, "vacuum analyze") + swogrp.Progressf(ctx, "vacuum analyze") _, err = newConn.Exec(ctx, "vacuum analyze") if err != nil { return fmt.Errorf("vacuum analyze: %w", err) @@ -123,7 +109,7 @@ func (m *Manager) SyncTableInit(origCtx context.Context, t Table, srcTx, dstTx p prog := time.NewTimer(2 * time.Second) defer prog.Stop() for { - m.Progressf(origCtx, "syncing table %s (%d/%d)", t.Name, lc.Lines(), rowCount) + swogrp.Progressf(origCtx, "syncing table %s (%d/%d)", t.Name, lc.Lines(), rowCount) select { case <-ctx.Done(): pw.CloseWithError(ctx.Err()) diff --git a/swo/manager.go b/swo/manager.go index a5fe3a2296..9490f4ba3c 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -4,45 +4,28 @@ import ( "context" "database/sql" "database/sql/driver" - "fmt" - "time" + "errors" "github.com/google/uuid" "github.com/jackc/pgx/v4" "github.com/jackc/pgx/v4/stdlib" "github.com/target/goalert/app/lifecycle" + "github.com/target/goalert/swo/swogrp" "github.com/target/goalert/swo/swomsg" - "gorm.io/driver/postgres" - "gorm.io/gorm" + "github.com/target/goalert/util/log" ) type Manager struct { - id uuid.UUID + // sql.DB instance safe for the application to use (instrumented for safe SWO operation) + dbApp *sql.DB + dbMain *sql.DB + dbNext *sql.DB - dbOld, dbNew *sql.DB + pauseResume lifecycle.PauseResumer - protectedDB *sql.DB + Config - s Syncer - - app lifecycle.PauseResumer - - stats *StatsManager - - msgLog *swomsg.Log - nextMsgLog *swomsg.Log - - msgCh chan *swomsg.Message - nextMsgCh chan *swomsg.Message - errCh chan error - - ready chan struct{} - - msgState *state - - cancel func() - - canExec bool + grp *swogrp.Group } type Node struct { @@ -58,114 +41,80 @@ type Node struct { type Config struct { OldDBC, NewDBC driver.Connector CanExec bool + Logger *log.Logger } func NewManager(cfg Config) (*Manager, error) { - gCfg := &gorm.Config{PrepareStmt: true} - gormOld, err := gorm.Open(postgres.New(postgres.Config{Conn: sql.OpenDB(cfg.OldDBC)}), gCfg) - if err != nil { - return nil, fmt.Errorf("open old database: %w", err) - } - gormNew, err := gorm.Open(postgres.New(postgres.Config{Conn: sql.OpenDB(cfg.NewDBC)}), gCfg) - if err != nil { - return nil, fmt.Errorf("open new database: %w", err) + m := &Manager{ + Config: cfg, + dbApp: sql.OpenDB(NewConnector(cfg.OldDBC, cfg.NewDBC)), + dbMain: sql.OpenDB(cfg.OldDBC), + dbNext: sql.OpenDB(cfg.NewDBC), } - id := uuid.New() - msgLog, err := swomsg.NewLog(gormOld, id) + ctx := cfg.Logger.BackgroundContext() + mainLog, err := swomsg.NewLog(ctx, m.dbMain) if err != nil { - return nil, fmt.Errorf("create old message log: %w", err) + return nil, err } - - msgLogNext, err := swomsg.NewLog(gormNew, id) + nextLog, err := swomsg.NewLog(ctx, m.dbNext) if err != nil { - return nil, fmt.Errorf("create new message log: %w", err) + return nil, err } - sm := NewStatsManager() - ctx, cancel := context.WithCancel(context.Background()) - m := &Manager{ - dbOld: sql.OpenDB(cfg.OldDBC), - dbNew: sql.OpenDB(cfg.NewDBC), - - protectedDB: sql.OpenDB(NewConnector(cfg.OldDBC, cfg.NewDBC, sm)), - - id: id, - msgLog: msgLog, - nextMsgLog: msgLogNext, - canExec: cfg.CanExec, - msgCh: make(chan *swomsg.Message), - nextMsgCh: make(chan *swomsg.Message), - errCh: make(chan error, 10), - cancel: cancel, - ready: make(chan struct{}), - - stats: sm, - } + m.grp = swogrp.NewGroup(swogrp.Config{ + CanExec: cfg.CanExec, - m.msgState, err = newState(ctx, m) - if err != nil { - return nil, fmt.Errorf("create state: %w", err) - } + Logger: cfg.Logger, + MainLog: mainLog, + NextLog: nextLog, - go func() { - <-m.ready - for { - msg, err := m.msgLog.Next(ctx) - if err != nil { - m.errCh <- fmt.Errorf("read from log: %w", err) - return - } - err = m.msgState.processFromOld(ctx, msg) - if err != nil { - m.errCh <- fmt.Errorf("process from old db log: %w", err) - return - } - } - }() - go func() { - <-m.ready - for { - msg, err := m.nextMsgLog.Next(ctx) - if err != nil { - m.errCh <- fmt.Errorf("read from next log: %w", err) - return - } - err = m.msgState.processFromNew(ctx, msg) - if err != nil { - m.errCh <- fmt.Errorf("process from new db log: %w", err) - return - } - } - }() + ResetFunc: m.DoReset, + ExecuteFunc: m.DoExecute, + PauseFunc: m.DoPause, + ResumeFunc: m.DoResume, + }) return m, nil } -func (m *Manager) SetPauseResumer(app lifecycle.PauseResumer) { - if m.app != nil { +func (m *Manager) DoPause(ctx context.Context) error { + if m.pauseResume == nil { + return errors.New("not initialized") + } + return m.pauseResume.Pause(ctx) +} + +func (m *Manager) DoResume(ctx context.Context) error { + if m.pauseResume == nil { + return errors.New("not initialized") + } + return m.pauseResume.Resume(ctx) +} + +func (m *Manager) Init(app lifecycle.PauseResumer) { + if m.pauseResume != nil { panic("already set") } - m.app = app - close(m.ready) + m.pauseResume = app } // withConnFromOld allows performing operations with a raw connection to the old database. func (m *Manager) withConnFromOld(ctx context.Context, f func(context.Context, *pgx.Conn) error) error { - return WithLockedConn(ctx, m.dbOld, f) + return WithLockedConn(ctx, m.dbMain, f) } // withConnFromNew allows performing operations with a raw connection to the new database. func (m *Manager) withConnFromNew(ctx context.Context, f func(context.Context, *pgx.Conn) error) error { - return WithLockedConn(ctx, m.dbNew, f) + return WithLockedConn(ctx, m.dbNext, f) } // withConnFromBoth allows performing operations with a raw connection to both databases database. func (m *Manager) withConnFromBoth(ctx context.Context, f func(ctx context.Context, oldConn, newConn *pgx.Conn) error) error { // grab lock with old DB first - return WithLockedConn(ctx, m.dbOld, func(ctx context.Context, oldConn *pgx.Conn) error { - return WithLockedConn(ctx, m.dbNew, func(ctx context.Context, newConn *pgx.Conn) error { - return f(ctx, oldConn, newConn) + return WithLockedConn(ctx, m.dbMain, func(ctx context.Context, connMain *pgx.Conn) error { + return WithLockedConn(ctx, m.dbNext, func(ctx context.Context, connNext *pgx.Conn) error { + return f(ctx, connMain, connNext) }) }) } @@ -191,33 +140,18 @@ func WithLockedConn(ctx context.Context, db *sql.DB, runFunc func(context.Contex } // Status will return the current switchover status. -func (m *Manager) Status() *Status { return m.msgState.Status() } +func (m *Manager) Status() swogrp.Status { return m.grp.Status() } // SendPing will ping all nodes in the cluster. -func (m *Manager) SendPing(ctx context.Context) error { - defer time.Sleep(swomsg.PollInterval * 2) // wait for send & ack - return m.msgLog.Append(ctx, swomsg.Ping{}) -} +func (m *Manager) SendPing(ctx context.Context) error { return m.grp.Ping(ctx) } // SendReset will trigger a reset of the switchover. -func (m *Manager) SendReset(ctx context.Context) error { - if m.Status().IsDone() { - return fmt.Errorf("cannot reset switchover: switchover is done") - } - defer time.Sleep(swomsg.PollInterval * 2) // wait for send & ack - return m.msgLog.Append(ctx, swomsg.Reset{}) -} +func (m *Manager) SendReset(ctx context.Context) error { return m.grp.Reset(ctx) } // SendExecute will trigger the switchover to begin. -func (m *Manager) SendExecute(ctx context.Context) error { - if !m.Status().IsIdle() { - return fmt.Errorf("cannot execute switchover: switchover is not idle") - } - defer time.Sleep(swomsg.PollInterval * 3) // wait for send, ack, and start - return m.msgLog.Append(ctx, swomsg.Execute{}) -} +func (m *Manager) SendExecute(ctx context.Context) error { return m.grp.Execute(ctx) } -func (m *Manager) DB() *sql.DB { return m.protectedDB } +func (m *Manager) DB() *sql.DB { return m.dbApp } type Status struct { Details string diff --git a/swo/preflightlocks.go b/swo/preflightlocks.go index 16231acb13..d01625a86c 100644 --- a/swo/preflightlocks.go +++ b/swo/preflightlocks.go @@ -7,6 +7,7 @@ import ( "fmt" "github.com/jackc/pgx/v4" + "github.com/jackc/pgx/v4/stdlib" "github.com/target/goalert/lock" ) @@ -48,14 +49,7 @@ func UnlockConn(ctx context.Context, conn *pgx.Conn) { var errDone = errors.New("done") // sessionLock will get a shared advisory lock for the connection. -func sessionLock(ctx context.Context, conn driver.Conn) error { - type execQuery interface { - driver.ExecerContext - driver.QueryerContext - } - - c := conn.(execQuery) - +func sessionLock(ctx context.Context, c *stdlib.Conn) error { // Using literal here so we can avoid a prepared statement round trip. // // This will run for every new connection in SWO mode and for every diff --git a/swo/state.go b/swo/state.go deleted file mode 100644 index b836db65d1..0000000000 --- a/swo/state.go +++ /dev/null @@ -1,368 +0,0 @@ -package swo - -import ( - "context" - "fmt" - "strings" - "sync" - "time" - - "github.com/google/uuid" - "github.com/target/goalert/swo/swomsg" - "github.com/target/goalert/util/log" -) - -type state struct { - m *Manager - - stateName string - - status string - - nodes map[uuid.UUID]*Node - - taskClaimed bool - - taskID uuid.UUID - cancel func() - - stateFn StateFunc - - mx sync.Mutex -} - -func newState(ctx context.Context, m *Manager) (*state, error) { - s := &state{ - m: m, - nodes: make(map[uuid.UUID]*Node), - stateFn: StateUnknown, - stateName: "unknown", - cancel: func() {}, - } - - return s, s.hello(ctx) -} - -type StateFunc func(context.Context, *state, *swomsg.Message) StateFunc - -func (s *state) Status() *Status { - s.mx.Lock() - defer s.mx.Unlock() - - var nodes []Node - for _, n := range s.nodes { - nodes = append(nodes, *n) - } - - return &Status{ - Details: s.status, - Nodes: nodes, - } -} - -// IsIdle returns true before executing a switchover. -func (s Status) IsIdle() bool { - for _, n := range s.Nodes { - if n.Status != "idle" { - return false - } - } - return true -} - -// IsDone returns true if the switchover has already been completed. -func (s Status) IsDone() bool { - for _, n := range s.Nodes { - if n.Status != "complete" { - return false - } - } - return true -} - -// IsResetting returns true while the switchover is resetting. -func (s Status) IsResetting() bool { - for _, n := range s.Nodes { - if strings.HasPrefix(n.Status, "reset-") { - return true - } - } - return false -} - -// IsExecuting returns true while the switchover is executing. -func (s Status) IsExecuting() bool { - for _, n := range s.Nodes { - if strings.HasPrefix(n.Status, "exec-") { - return true - } - } - return false -} - -func (s *state) ackMessage(ctx context.Context, msgID uuid.UUID) { - err := s.m.msgLog.Append(ctx, swomsg.Ack{MsgID: msgID, Status: s.stateName, Exec: s.m.canExec}) - if err != nil { - log.Log(ctx, err) - } -} - -func (s *state) update(msg *swomsg.Message) { - s.mx.Lock() - defer s.mx.Unlock() - - n, ok := s.nodes[msg.NodeID] - if !ok { - n = &Node{ - ID: msg.NodeID, - } - s.nodes[msg.NodeID] = n - } - - switch { - case msg.Hello != nil: - n.OldValid = msg.Hello.IsOldDB - n.Status = msg.Hello.Status - n.CanExec = msg.Hello.CanExec - case msg.Ack != nil: - n.Status = msg.Ack.Status - n.CanExec = msg.Ack.Exec - case msg.Error != nil: - s.status = "error: " + msg.Error.Details - case msg.Done != nil: - s.status = "" - } -} - -func (s *state) taskDone(ctx context.Context, err error) { - if err != nil { - err = s.m.msgLog.Append(ctx, swomsg.Error{MsgID: s.taskID, Details: err.Error()}) - } else { - err = s.m.msgLog.Append(ctx, swomsg.Done{MsgID: s.taskID}) - } - if err != nil { - log.Log(ctx, err) - } -} - -func (s *state) hello(ctx context.Context) error { - err := s.m.msgLog.Append(ctx, swomsg.Hello{IsOldDB: true, Status: s.stateName, CanExec: s.m.canExec}) - if err != nil { - return err - } - - // wait for poll interval before sending to new DB, - // giving all nodes a chance to process - time.Sleep(swomsg.PollInterval) - err = s.m.nextMsgLog.Append(ctx, swomsg.Hello{IsNewDB: true, Status: s.stateName, CanExec: s.m.canExec}) - if err != nil { - return err - } - return nil -} - -func (s *state) processFromNew(ctx context.Context, msg *swomsg.Message) error { - if msg.Hello == nil { - return fmt.Errorf("unexpected message to NEW DB: %v", msg) - } - - s.mx.Lock() - defer s.mx.Unlock() - - n, ok := s.nodes[msg.NodeID] - if ok { - n.NewValid = msg.Hello.IsNewDB - return nil - } - - s.nodes[msg.NodeID] = &Node{ - ID: msg.NodeID, - CanExec: msg.Hello.CanExec, - NewValid: msg.Hello.IsNewDB, - Status: msg.Hello.Status, - } - return nil -} - -func (s *state) processFromOld(ctx context.Context, msg *swomsg.Message) error { - s.update(msg) - - if msg.Reset != nil { - s.cancel() - s.nodes = make(map[uuid.UUID]*Node) - s.m.app.Resume(ctx) - s.taskID = msg.ID - s.taskClaimed = false - s.stateName = "reset-wait" - s.stateFn = StateResetWait - err := s.hello(ctx) - if err != nil { - return err - } - if s.m.canExec { - s.ackMessage(ctx, msg.ID) - } - return nil - } - - s.stateFn = s.stateFn(ctx, s, msg) - if msg.Ping != nil { - s.ackMessage(ctx, msg.ID) - } - - return nil -} - -func (s *state) StartTask(task func(context.Context) error) { - ctx, cancel := context.WithCancel(context.Background()) - s.cancel = cancel - go func() { s.taskDone(ctx, task(ctx)) }() -} - -// StateIdle is the state when the node is idle. -func StateIdle(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { - s.stateName = "idle" - - switch { - case msg.Execute != nil: - s.taskID = msg.ID - s.stateName = "exec-wait" - s.taskClaimed = false - s.ackMessage(ctx, msg.ID) - return StateExecWait - } - - return StateIdle -} - -func (s *state) isExecAck(msg *swomsg.Message) bool { - if !s.m.canExec { - return false - } - if msg.Ack == nil || !msg.Ack.Exec { - return false - } - if msg.Ack.MsgID != s.taskID { - return false - } - if s.taskClaimed { - return false - } - - if msg.NodeID != s.m.id { - s.taskClaimed = true - return false - } - - return true -} - -// StateExecWait is the state when the node is waiting for execution to be performed. -func StateExecWait(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { - s.stateName = "exec-wait" - - switch { - case msg.Error != nil && msg.Error.MsgID == s.taskID: - s.stateName = "error" - s.ackMessage(ctx, msg.ID) - return StateError - case msg.Done != nil && msg.Done.MsgID == s.taskID: - s.stateName = "idle" - s.ackMessage(ctx, msg.ID) - return StateIdle - case msg.Progress != nil: - s.status = msg.Progress.Details - case s.isExecAck(msg): - s.StartTask(s.m.DoExecute) - s.stateName = "exec-run" - s.ackMessage(ctx, msg.ID) - return StateResetRun - } - - return StateExecWait -} - -// StateExecRun is the state when the current node is executing the switchover. -func StateExecRun(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { - s.stateName = "exec-run" - - switch { - case msg.Error != nil && msg.Error.MsgID == s.taskID: - s.cancel() - s.stateName = "error" - s.ackMessage(ctx, msg.ID) - return StateError - case msg.Done != nil && msg.Done.MsgID == s.taskID: - // already done, make sure we still cancel the context though - s.cancel() - s.stateName = "idle" - s.ackMessage(ctx, msg.ID) - return StateIdle - case msg.Progress != nil: - s.status = msg.Progress.Details - } - - return StateExecRun -} - -// StateError is the state after a task failed. -func StateError(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { - s.stateName = "error" - - return StateError -} - -// StateUnknown is the state after startup. -func StateUnknown(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { - s.stateName = "unknown" - - return StateError -} - -// StateResetWait is the state when the node is waiting for a reset to be performed. -func StateResetWait(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { - s.stateName = "reset-wait" - - switch { - case msg.Error != nil && msg.Error.MsgID == s.taskID: - s.stateName = "error" - s.ackMessage(ctx, msg.ID) - return StateError - case msg.Done != nil && msg.Done.MsgID == s.taskID: - s.stateName = "idle" - s.ackMessage(ctx, msg.ID) - return StateIdle - case msg.Progress != nil: - s.status = msg.Progress.Details - case s.isExecAck(msg): - s.StartTask(s.m.DoReset) - s.stateName = "reset-run" - s.ackMessage(ctx, msg.ID) - return StateResetRun - } - - return StateResetWait -} - -// StateResetRun is the state when the current node is performing a reset. -func StateResetRun(ctx context.Context, s *state, msg *swomsg.Message) StateFunc { - s.stateName = "reset-run" - - switch { - case msg.Error != nil && msg.Error.MsgID == s.taskID: - s.cancel() - s.stateName = "error" - s.ackMessage(ctx, msg.ID) - return StateError - case msg.Done != nil && msg.Done.MsgID == s.taskID: - // already done, make sure we still cancel the context though - s.cancel() - s.stateName = "idle" - s.ackMessage(ctx, msg.ID) - return StateIdle - case msg.Progress != nil: - s.status = msg.Progress.Details - } - - return StateResetRun -} diff --git a/swo/swogrp/config.go b/swo/swogrp/config.go new file mode 100644 index 0000000000..afe1d64c31 --- /dev/null +++ b/swo/swogrp/config.go @@ -0,0 +1,21 @@ +package swogrp + +import ( + "context" + + "github.com/target/goalert/swo/swomsg" + "github.com/target/goalert/util/log" +) + +type Config struct { + CanExec bool + + Logger *log.Logger + MainLog *swomsg.Log + NextLog *swomsg.Log + + ResetFunc func(context.Context) error + ExecuteFunc func(context.Context) error + PauseFunc func(context.Context) error + ResumeFunc func(context.Context) error +} diff --git a/swo/swogrp/group.go b/swo/swogrp/group.go new file mode 100644 index 0000000000..e2b4bf3ebd --- /dev/null +++ b/swo/swogrp/group.go @@ -0,0 +1,404 @@ +package swogrp + +import ( + "context" + "encoding/json" + "fmt" + "sync" + "time" + + "github.com/google/uuid" + "github.com/target/goalert/swo/swomsg" + "github.com/target/goalert/util/log" +) + +/* + +Input -> Send ping message, wait ack-all +Reset -> Send reset message, (no-wait), elect single node to run reset, track progress, signal done or err +Execute -> Send exec message, wait ack-all, elect single node to run exec, track progress, all sync TX refresh (send & wait), Plan (send & wait for all ack), continue work, signal done or err + +- Send Message +- Send Message and wait for all-ack (from user, from exec) +- Elect node +- Track progress + +reset part of leader, first hello with exec is leader + + +reset +hello, hello-exec, hello-next +start + + +*/ + +type Group struct { + Config + State + + nodeID uuid.UUID + reset bool + nodes map[uuid.UUID]*Node + tasks map[uuid.UUID]TaskInfo + leader bool + mx sync.Mutex + + nextDBNodes map[uuid.UUID]struct{} + + ackMsgs chan map[uuid.UUID]*ackWait +} + +type ackWait struct { + msgID uuid.UUID + waitAck map[uuid.UUID]struct{} + done chan error +} + +type TaskInfo struct { + ID uuid.UUID + Name string + Error string `json:",omitempty"` + Status string `json:",omitempty"` + + cancel func() +} + +type Node struct { + ID uuid.UUID + + IsLeader bool + CanExec bool + OldDBValid bool + NewDBValid bool + + Tasks []TaskInfo +} + +func NewGroup(cfg Config) *Group { + g := &Group{ + Config: cfg, + nodeID: uuid.New(), + nodes: make(map[uuid.UUID]*Node), + tasks: make(map[uuid.UUID]TaskInfo), + State: stateNeedsReset, + ackMsgs: make(chan map[uuid.UUID]*ackWait, 1), + nextDBNodes: make(map[uuid.UUID]struct{}), + } + g.ackMsgs <- make(map[uuid.UUID]*ackWait) + + go g.loopNextLog() + go g.loopMainLog() + + return g +} + +type Status struct { + Nodes []Node + State State +} + +func cloneTasks(in []TaskInfo) []TaskInfo { + out := make([]TaskInfo, len(in)) + copy(out, in) + return out +} + +func (g *Group) Status() Status { + g.mx.Lock() + defer g.mx.Unlock() + + var nodes []Node + + for _, n := range g.nodes { + cpy := *n + cpy.Tasks = cloneTasks(n.Tasks) + nodes = append(nodes, cpy) + } + + return Status{ + Nodes: nodes, + State: g.State, + } +} + +func (g *Group) loopNextLog() { + for msg := range g.NextLog.Events() { + if msg.Type != "hello-next" { + // ignore + continue + } + + g.mx.Lock() + g.addNode(msg.Node, false, true, false) + g.mx.Unlock() + } +} + +func (g *Group) loopMainLog() { + buf := newMsgBuf() + go func() { + for msg := range buf.Next() { + err := g.processMessage(g.Logger.BackgroundContext(), msg) + if err != nil { + g.Logger.Error(context.Background(), fmt.Errorf("process message: %w", err)) + } + } + }() + + for msg := range g.MainLog.Events() { + if msg.Type == "ack" { + g.recordAck(msg) + continue + } + + buf.Append(msg) + } +} + +func (g *Group) startTask(ctx context.Context, name string, fn func(context.Context) error) error { + info := TaskInfo{ID: uuid.New(), Name: name} + err := g.sendMessage(ctx, "task-start", info, false) + if err != nil { + return err + } + + ctx = log.FromContext(ctx).BackgroundContext() + ctx, info.cancel = context.WithCancel(ctx) + g.tasks[info.ID] = info + go func() { + err := fn(withTask(ctx, g, info)) + if err != nil { + info.Error = err.Error() + } + + err = g.sendMessage(ctx, "task-end", info, false) + if err != nil { + log.Log(ctx, fmt.Errorf("send task-end: %w", err)) + } + + info.cancel() + g.mx.Lock() + delete(g.tasks, info.ID) + g.mx.Unlock() + }() + + return nil +} + +func (g *Group) resetState() { + for id := range g.nodes { + delete(g.nodes, id) + } + g.reset = true + g.leader = false + g.State = stateReset + for _, t := range g.tasks { + t.cancel() + } + + msgs := <-g.ackMsgs + for id, aw := range msgs { + aw.done <- fmt.Errorf("reset") + delete(msgs, id) + } + g.ackMsgs <- msgs +} + +// addNode adds a node to the group, returns true if we have become the leader node +// after a reset. +func (g *Group) addNode(id uuid.UUID, oldDB, newDB, exec bool) bool { + if g.State != stateReset { + g.State = stateNeedsReset + } + n := g.nodes[id] + if n == nil { + n = &Node{ID: id} + g.nodes[id] = n + } + n.NewDBValid = n.NewDBValid || newDB + n.OldDBValid = n.OldDBValid || oldDB + n.CanExec = n.CanExec || exec + + var isNewLeader bool + if g.reset && exec { + g.reset = false + g.leader = g.nodeID == id + n.IsLeader = true + isNewLeader = g.leader + } + + return isNewLeader +} + +func (g *Group) ack(ctx context.Context, msgID uuid.UUID) { + err := g.MainLog.Append(ctx, swomsg.Message{ + Type: "ack", + ID: uuid.New(), + Node: g.nodeID, + AckID: msgID, + }) + if err != nil { + log.Log(ctx, fmt.Errorf("send ack: %w", err)) + } +} + +func (g *Group) recordAck(msg swomsg.Message) { + msgs := <-g.ackMsgs + aw := msgs[msg.AckID] + if aw == nil { + g.ackMsgs <- msgs + return + } + + delete(aw.waitAck, msg.Node) + if len(aw.waitAck) == 0 { + aw.done <- nil + delete(msgs, msg.AckID) + } + g.ackMsgs <- msgs +} + +func (g *Group) updateTask(msg swomsg.Message, upsert bool) error { + n := g.nodes[msg.Node] + if n == nil { + return nil + } + var info TaskInfo + err := json.Unmarshal(msg.Data, &info) + if err != nil { + return err + } + filtered := n.Tasks[:0] + for _, t := range n.Tasks { + if t.ID == info.ID { + continue + } + filtered = append(filtered, t) + } + n.Tasks = filtered + if upsert { + n.Tasks = append(n.Tasks, info) + } else if info.Name == "reset-db" { + g.State = stateIdle + } + return nil +} + +func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { + g.mx.Lock() + defer g.mx.Unlock() + + if msg.Ack { + defer g.ack(ctx, msg.ID) + } + + switch msg.Type { + case "hello-exec": + if g.addNode(msg.Node, true, false, true) { + // we are the new leader, perform DB reset + return g.startTask(ctx, "reset-db", g.ResetFunc) + } + case "task-end": + return g.updateTask(msg, false) + case "task-start": + return g.updateTask(msg, true) + case "task-progress": + return g.updateTask(msg, true) + case "hello": + g.addNode(msg.Node, true, false, false) + case "ping": + case "reset": + g.resetState() + + if err := g.startTask(ctx, "resume", g.ResumeFunc); err != nil { + return err + } + if err := g.sendMessageNext(ctx, "hello-next", nil, false); err != nil { + return err + } + + if g.CanExec { + return g.sendMessage(ctx, "hello-exec", nil, false) + } + + return g.sendMessage(ctx, "hello", nil, false) + case "exec": + if g.State != stateIdle { + break + } + g.State = stateExec + if g.leader { + return g.startTask(ctx, "exec", g.ExecuteFunc) + } + case "pause": + return g.startTask(ctx, "pause", g.PauseFunc) + default: + } + + return nil +} + +func (g *Group) sendMessageNext(ctx context.Context, msgType string, v interface{}, wait bool) error { + return g.sendMessageWith(ctx, g.NextLog, msgType, v, wait) +} + +func (g *Group) sendMessage(ctx context.Context, msgType string, v interface{}, wait bool) error { + return g.sendMessageWith(ctx, g.MainLog, msgType, v, wait) +} + +func (g *Group) sendMessageWith(ctx context.Context, log *swomsg.Log, msgType string, v interface{}, wait bool) error { + msg := swomsg.Message{ + Type: msgType, + ID: uuid.New(), + Node: g.nodeID, + Ack: wait, + } + if v != nil { + data, err := json.Marshal(v) + if err != nil { + return err + } + msg.Data = data + } + if err := log.Append(ctx, msg); err != nil { + return err + } + if !wait { + return nil + } + + m := make(map[uuid.UUID]struct{}) + for _, n := range g.nodes { + m[n.ID] = struct{}{} + } + + aw := &ackWait{ + msgID: msg.ID, + done: make(chan error, 1), + waitAck: m, + } + + acks := <-g.ackMsgs + acks[msg.ID] = aw + g.ackMsgs <- acks + + return <-aw.done +} + +func (g *Group) Reset(ctx context.Context) error { + defer time.Sleep(time.Second * 2) + return g.sendMessage(ctx, "reset", nil, false) +} + +func (g *Group) Ping(ctx context.Context) error { + return g.sendMessage(ctx, "ping", nil, true) +} + +func (g *Group) Execute(ctx context.Context) error { + if g.Status().State != stateIdle { + return fmt.Errorf("cannot execute, group is not idle") + } + + return g.sendMessage(ctx, "exec", nil, true) +} diff --git a/swo/swogrp/msgbuf.go b/swo/swogrp/msgbuf.go new file mode 100644 index 0000000000..652ed31fd1 --- /dev/null +++ b/swo/swogrp/msgbuf.go @@ -0,0 +1,42 @@ +package swogrp + +import "github.com/target/goalert/swo/swomsg" + +type msgBuf struct { + full chan []swomsg.Message + empty chan []swomsg.Message + + next chan swomsg.Message +} + +func (buf *msgBuf) Append(msg swomsg.Message) { + var msgs []swomsg.Message + select { + case msgs = <-buf.empty: + case msgs = <-buf.full: + } + msgs = append(msgs, msg) + buf.full <- msgs +} +func (buf *msgBuf) Next() <-chan swomsg.Message { return buf.next } + +func newMsgBuf() *msgBuf { + buf := &msgBuf{ + full: make(chan []swomsg.Message, 1), + empty: make(chan []swomsg.Message, 1), + next: make(chan swomsg.Message), + } + buf.empty <- nil + go func() { + for msgs := range buf.full { + msg := msgs[0] + if len(msgs) > 1 { + buf.full <- msgs[1:] + } else { + buf.empty <- msgs[1:] + } + buf.next <- msg + } + }() + return buf +} diff --git a/swo/swogrp/progressf.go b/swo/swogrp/progressf.go new file mode 100644 index 0000000000..1a6e8f0d86 --- /dev/null +++ b/swo/swogrp/progressf.go @@ -0,0 +1,34 @@ +package swogrp + +import ( + "context" + "fmt" +) + +type ctxKey int + +const ( + ctxKeyTask ctxKey = iota +) + +type taskCtx struct { + *Group + TaskInfo +} + +func withTask(ctx context.Context, grp *Group, info TaskInfo) context.Context { + return context.WithValue(ctx, ctxKeyTask, &taskCtx{Group: grp, TaskInfo: info}) +} + +func task(ctx context.Context) *taskCtx { + return ctx.Value(ctxKeyTask).(*taskCtx) +} + +func Progressf(ctx context.Context, format string, args ...interface{}) { + t := task(ctx) + t.TaskInfo.Status = fmt.Sprintf(format, args...) + err := t.sendMessage(ctx, "task-progress", t.TaskInfo, false) + if err != nil { + t.Logger.Error(ctx, fmt.Errorf("send task-progress: %w", err)) + } +} diff --git a/swo/swogrp/state.go b/swo/swogrp/state.go new file mode 100644 index 0000000000..ca0e5d72cb --- /dev/null +++ b/swo/swogrp/state.go @@ -0,0 +1,12 @@ +package swogrp + +type State string + +const ( + stateNeedsReset State = "needs-reset" + stateIdle State = "idle" + stateReset State = "reset" + stateError State = "error" + stateExec State = "exec" + stateDone State = "done" +) diff --git a/swo/swomsg/log.go b/swo/swomsg/log.go index 8dbdc9c106..ad04f55882 100644 --- a/swo/swomsg/log.go +++ b/swo/swomsg/log.go @@ -2,24 +2,27 @@ package swomsg import ( "context" + "database/sql" "encoding/json" + "errors" "fmt" "time" - "github.com/google/uuid" - "gorm.io/gorm" + "github.com/jackc/pgx/v4" + "github.com/jackc/pgx/v4/stdlib" + "github.com/target/goalert/util/log" ) const PollInterval = time.Second type Log struct { - db *gorm.DB - id uuid.UUID + db *sql.DB readID int64 - events []logEvent lastLoad time.Time + + eventCh chan Message } var ErrStaleLog = fmt.Errorf("cannot append until log is read") @@ -30,16 +33,44 @@ type logEvent struct { Data []byte } -func NewLog(db *gorm.DB, id uuid.UUID) (*Log, error) { +func NewLog(ctx context.Context, db *sql.DB) (*Log, error) { + var lastID int64 + // only ever load new events + err := db.QueryRowContext(ctx, "select coalesce(max(id), 0) from switchover_log").Scan(&lastID) + if err != nil { + return nil, err + } + l := &Log{ - id: id, - db: db.Table("switchover_log"), + db: db, + eventCh: make(chan Message), } + go l.readLoop(log.FromContext(ctx).BackgroundContext(), lastID) + return l, nil +} - // only ever load new events - err := db.Table("switchover_log").Select("coalesce(max(id), 0)").Take(&l.readID).Error +func (l *Log) Events() <-chan Message { return l.eventCh } + +func (l *Log) readLoop(ctx context.Context, lastID int64) { + for { + events, err := l.loadEvents(ctx, lastID) + if err != nil { + log.Log(ctx, err) + continue + } - return l, err + for _, e := range events { + lastID = e.ID + var w Message + err = json.Unmarshal(e.Data, &w) + if err != nil { + log.Log(ctx, fmt.Errorf("error parsing event: %v", err)) + continue + } + w.TS = e.Timestamp + l.eventCh <- w + } + } } func ctxSleep(ctx context.Context, d time.Duration) error { @@ -58,91 +89,58 @@ func ctxSleep(ctx context.Context, d time.Duration) error { } } -func (l *Log) Next(ctx context.Context) (*Message, error) { - var err error - for len(l.events) == 0 { - err = l.loadEvents(ctx) - if err != nil { - return nil, err - } +func (l *Log) loadEvents(ctx context.Context, lastID int64) ([]logEvent, error) { + err := ctxSleep(ctx, PollInterval-time.Since(l.lastLoad)) + if err != nil { + return nil, err } + l.lastLoad = time.Now() - var w Message - err = json.Unmarshal(l.events[0].Data, &w) + rows, err := l.db.QueryContext(ctx, "select id, timestamp, data from switchover_log where id > $1 order by id asc limit 100", lastID) + if errors.Is(err, sql.ErrNoRows) { + return nil, nil + } if err != nil { return nil, err } - w.TS = l.events[0].Timestamp + defer rows.Close() - l.readID = l.events[0].ID - l.events = l.events[1:] + var events []logEvent + var r logEvent + for rows.Next() { + err := rows.Scan(&r.ID, &r.Timestamp, &r.Data) + if err != nil { + return nil, err + } + events = append(events, r) + } - return &w, nil + return events, nil } -func (l *Log) loadEvents(ctx context.Context) error { - err := ctxSleep(ctx, PollInterval-time.Since(l.lastLoad)) +func (l *Log) Append(ctx context.Context, msg Message) error { + data, err := json.Marshal(msg) if err != nil { return err } - l.lastLoad = time.Now() - var events []logEvent - err = l.db. - WithContext(ctx). - Where("timestamp > now() - interval '1 minute'"). - Where("id > ?", l.readID). - Order("id asc"). - Limit(100). - Find(&events).Error + var b pgx.Batch + b.Queue("begin") + b.Queue("lock table switchover_log in exclusive mode") + b.Queue("insert into switchover_log (id, timestamp, data) values (coalesce((select max(id)+1 from switchover_log), 1), now(), $1)", data) + b.Queue("commit") + b.Queue("rollback") + + conn, err := stdlib.AcquireConn(l.db) if err != nil { return err } + defer stdlib.ReleaseConn(l.db, conn) - l.events = append(l.events, events...) - - return nil -} - -func (l *Log) Append(ctx context.Context, v interface{}) error { - var msg Message - switch m := v.(type) { - case Ping: - msg.Ping = &m - case Ack: - msg.Ack = &m - case Reset: - msg.Reset = &m - case Error: - msg.Error = &m - case Execute: - msg.Execute = &m - case Plan: - msg.Plan = &m - case Progress: - msg.Progress = &m - case Done: - msg.Done = &m - case Hello: - msg.Hello = &m - default: - return fmt.Errorf("unknown message type %T", m) - } - - msg.ID = uuid.New() - msg.NodeID = l.id - data, err := json.Marshal(msg) + err = conn.SendBatch(ctx, &b).Close() if err != nil { return err } - l.db.WithContext(ctx).Transaction(func(db *gorm.DB) error { - err := db.Exec("lock switchover_log in exclusive mode").Error - if err != nil { - return err - } - return db.Exec("insert into switchover_log (id, timestamp, data) values (coalesce((select max(id)+1 from switchover_log), 1), now(), ?)", data).Error - }) - - return err + return nil } diff --git a/swo/swomsg/messages.go b/swo/swomsg/messages.go index 5b73e63e80..8fc2a6b58d 100644 --- a/swo/swomsg/messages.go +++ b/swo/swomsg/messages.go @@ -1,76 +1,132 @@ package swomsg import ( + "encoding/json" "time" "github.com/google/uuid" ) -type Message struct { - Header - - Ping *Ping `json:",omitempty"` - Ack *Ack `json:",omitempty"` - Reset *Reset `json:",omitempty"` - Execute *Execute `json:",omitempty"` - Error *Error `json:",omitempty"` - Plan *Plan `json:",omitempty"` - Progress *Progress `json:",omitempty"` - Done *Done `json:",omitempty"` - Hello *Hello `json:",omitempty"` -} +/* + + Idle + Reset -> Elect(Reset) + Elect(Reset) -> ResetRun,ResetWait -> Idle + Execute -> Elect(Execute) + Elect(Execute) -> ExecuteRun,ExecuteWait -> + -type Header struct { - ID uuid.UUID - NodeID uuid.UUID - TS time.Time `json:"-"` + +*/ + +type Message struct { + ID uuid.UUID + Node uuid.UUID + TS time.Time `json:"-"` + + Type string + Ack bool `json:",omitempty"` + AckID uuid.UUID `json:",omitempty"` + Data json.RawMessage `json:",omitempty"` } -type ( - // user commands - Ping struct{} - Reset struct{} - Execute struct{} - - Hello struct { - IsOldDB bool `json:",omitempty"` - IsNewDB bool `json:",omitempty"` - Status string - CanExec bool `json:",omitempty"` - } - - Ack struct { - MsgID uuid.UUID - Status string - Exec bool `json:",omitempty"` - } - - // task updates - Progress struct { - MsgID uuid.UUID - Details string - } - Error struct { - MsgID uuid.UUID - Details string - } - Done struct{ MsgID uuid.UUID } - - Plan struct { - // Must receive Ack from all nodes before this time. - ConsensusDeadline time.Time - - // Must receive PlanStart or Error before this time, otherwise all - // nodes will Error. - StartAt time.Time - - // All nodes should disable idle connections after this time. - DisableIdleAt time.Time - - // All nodes should re-enable idle connections after this time. - Deadline time.Time - } -) +// type Plan struct { +// // Must receive Ack from all nodes before this time. +// ConsensusDeadline time.Time + +// // Must receive PlanStart or Error before this time, otherwise all +// // nodes will Error. +// StartAt time.Time + +// // All nodes should disable idle connections after this time. +// DisableIdleAt time.Time + +// // All nodes should re-enable idle connections after this time. +// Deadline time.Time +// } + +// type Type int + +// const ( +// Unknown Type = iota +// Execute +// Ping +// ) + +// type msg struct { +// Header + +// Ping *Ping `json:",omitempty"` +// Reset *Reset `json:",omitempty"` +// Execute *Execute `json:",omitempty"` +// WaitTx *WaitTx `json:",omitempty"` +// Ack *Ack `json:",omitempty"` +// Error *Error `json:",omitempty"` +// Plan *Plan `json:",omitempty"` +// Progress *Progress `json:",omitempty"` +// Done *Done `json:",omitempty"` +// Hello *Hello `json:",omitempty"` +// } + +// type Header struct { +// ID uuid.UUID +// NodeID uuid.UUID +// TS time.Time `json:"-"` +// } + +// type ( +// Start struct { +// Header `json:"-"` + +// TaskName string +// TaskID uuid.UUID +// NodeID uuid.UUID `json:",omitempty"` +// } + +// // user commands +// Ping struct { +// Header `json:"-"` +// } + +// Hello struct { +// Header `json:"-"` +// IsOldDB bool `json:",omitempty"` +// IsNewDB bool `json:",omitempty"` +// Status string +// CanExec bool `json:",omitempty"` +// } + +// Ack struct { +// Header `json:"-"` +// MsgID uuid.UUID +// } + +// TaskStatus struct { +// Header `json:"-"` +// TaskID uuid.UUID +// Details string +// } +// TaskDone struct { +// Header `json:"-"` +// TaskID uuid.UUID +// Error string `json:",omitempty"` +// } + +// Plan struct { +// // Must receive Ack from all nodes before this time. +// ConsensusDeadline time.Time + +// // Must receive PlanStart or Error before this time, otherwise all +// // nodes will Error. +// StartAt time.Time + +// // All nodes should disable idle connections after this time. +// DisableIdleAt time.Time + +// // All nodes should re-enable idle connections after this time. +// Deadline time.Time +// } +// ) /* UI diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 7386ca6648..8969e35c3d 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -36,6 +36,7 @@ const query = gql` canExec oldValid newValid + isLeader } } } @@ -69,7 +70,7 @@ function cptlz(s: string): string { } export default function AdminSwitchover(): JSX.Element { - const { loading, error, data: _data } = useQuery(query) + const { loading, error, data: _data } = useQuery(query, { pollInterval: 250 }) const data = _data?.swoStatus const [lastAction, setLastAction] = useState('') const [statusNotices, setStatusNotices] = useState([]) @@ -154,7 +155,6 @@ export default function AdminSwitchover(): JSX.Element { )} - - {data?.nodes.length > 0 && data.nodes diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index 78698f464c..785ce76d1f 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -23,7 +23,7 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { return ( - + {name} diff --git a/web/src/schema.d.ts b/web/src/schema.d.ts index 4ad13a1c23..f9a23d57f8 100644 --- a/web/src/schema.d.ts +++ b/web/src/schema.d.ts @@ -52,6 +52,7 @@ export interface SWONode { oldValid: boolean newValid: boolean canExec: boolean + isLeader: boolean status: string } From 5887afefac9e59f57280b851b346b0dbfa89c314 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 09:42:17 -0500 Subject: [PATCH 073/225] fix log --- swo/execute.go | 7 ++++--- swo/swogrp/group.go | 5 +++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/swo/execute.go b/swo/execute.go index 449f54e16e..3746ef0010 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -88,9 +88,10 @@ func (m *Manager) DoExecute(ctx context.Context) error { var lastNone bool for ctx.Err() == nil { // sync in a loop until DB is up-to-date + s := time.Now() n, pend, err := LoopSync(ctx, rt, oldConn, newConn) + dur := time.Since(s) - fmt.Println("sync", n, "pending", pend) if pend > 0 { lastNone = false swogrp.Progressf(ctx, "sync: %d rows pending", pend) @@ -109,10 +110,10 @@ func (m *Manager) DoExecute(ctx context.Context) error { lastNone = true swogrp.Progressf(ctx, "sync: waiting for changes") } - time.Sleep(10 * time.Second) + time.Sleep(100 * time.Millisecond) } else { lastNone = false - swogrp.Progressf(ctx, "sync: %d rows replicated", n) + swogrp.Progressf(ctx, "sync: %d rows replicated in %s", n, dur.Truncate(time.Millisecond)) } } diff --git a/swo/swogrp/group.go b/swo/swogrp/group.go index e2b4bf3ebd..5cc1a2bf70 100644 --- a/swo/swogrp/group.go +++ b/swo/swogrp/group.go @@ -87,6 +87,11 @@ func NewGroup(cfg Config) *Group { } g.ackMsgs <- make(map[uuid.UUID]*ackWait) + err := g.sendMessage(cfg.Logger.BackgroundContext(), "hello", nil, false) + if err != nil { + cfg.Logger.Error(context.Background(), err) + } + go g.loopNextLog() go g.loopMainLog() From 931e6611ce509695f0e4a383fab109e427f02162 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 09:43:08 -0500 Subject: [PATCH 074/225] add skip support to pgdump-lite --- devtools/pgdump-lite/cmd/pgdump-lite/main.go | 4 +++- devtools/pgdump-lite/dumpdata.go | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/devtools/pgdump-lite/cmd/pgdump-lite/main.go b/devtools/pgdump-lite/cmd/pgdump-lite/main.go index 35d9fd7436..12b70cfc56 100644 --- a/devtools/pgdump-lite/cmd/pgdump-lite/main.go +++ b/devtools/pgdump-lite/cmd/pgdump-lite/main.go @@ -5,6 +5,7 @@ import ( "flag" "log" "os" + "strings" "github.com/jackc/pgx/v4" "github.com/target/goalert/devtools/pgdump-lite" @@ -15,6 +16,7 @@ func main() { file := flag.String("f", "", "Output file (default is stdout).") db := flag.String("d", os.Getenv("DBURL"), "DB URL") // use same env var as pg_dump dataOnly := flag.Bool("a", false, "dump only the data, not the schema") + skip := flag.String("s", "", "skip tables") flag.Parse() out := os.Stdout @@ -40,7 +42,7 @@ func main() { } defer conn.Close(ctx) - err = pgdump.DumpData(ctx, conn, out) + err = pgdump.DumpData(ctx, conn, out, strings.Split(*skip, ",")) if err != nil { log.Fatalln("ERROR: dump data:", err) } diff --git a/devtools/pgdump-lite/dumpdata.go b/devtools/pgdump-lite/dumpdata.go index f63cfc2d51..847ab88c20 100644 --- a/devtools/pgdump-lite/dumpdata.go +++ b/devtools/pgdump-lite/dumpdata.go @@ -29,6 +29,7 @@ func sortColumns(columns []string) { return ci < cj }) } + func quoteNames(names []string) { for i, n := range names { names[i] = pgx.Identifier{n}.Sanitize() @@ -66,7 +67,16 @@ func (s *scannable) DecodeText(ci *pgtype.ConnInfo, src []byte) error { return nil } -func DumpData(ctx context.Context, conn *pgx.Conn, out io.Writer) error { +func contains(s []string, e string) bool { + for _, a := range s { + if a == e { + return true + } + } + return false +} + +func DumpData(ctx context.Context, conn *pgx.Conn, out io.Writer, skip []string) error { tx, err := conn.BeginTx(ctx, pgx.TxOptions{IsoLevel: pgx.RepeatableRead}) if err != nil { return fmt.Errorf("begin tx: %w", err) @@ -80,6 +90,10 @@ func DumpData(ctx context.Context, conn *pgx.Conn, out io.Writer) error { sort.Strings(tables) for _, table := range tables { + if contains(skip, table) { + continue + } + columns, err := queryStrings(ctx, tx, "select column_name from information_schema.columns where table_schema = 'public' and table_name = $1 order by ordinal_position", table) if err != nil { return fmt.Errorf("read columns for '%s': %w", table, err) From a402b1b464d3bf8d822a43d1f3f3fb0c2b9c4ab7 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 09:43:17 -0500 Subject: [PATCH 075/225] rr support to simpleproxy --- devtools/simpleproxy/main.go | 17 +++++++++++------ devtools/simpleproxy/roundrobin.go | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) create mode 100644 devtools/simpleproxy/roundrobin.go diff --git a/devtools/simpleproxy/main.go b/devtools/simpleproxy/main.go index 6ae1143e19..8b25ac1bde 100644 --- a/devtools/simpleproxy/main.go +++ b/devtools/simpleproxy/main.go @@ -22,13 +22,18 @@ func main() { parts = []string{"/", parts[0]} } - u, err := url.Parse(parts[1]) - if err != nil { - log.Fatalf("ERORR: parse %s: %v", parts[1], err) - } + var rr RR + hosts := strings.Split(parts[1], ",") + for _, host := range hosts { + + u, err := url.Parse(host) + if err != nil { + log.Fatalf("ERORR: parse %s: %v", host, err) + } - p := httputil.NewSingleHostReverseProxy(u) - h := http.Handler(p) + rr.h = append(rr.h, httputil.NewSingleHostReverseProxy(u)) + } + h := http.Handler(&rr) if *trim { h = http.StripPrefix(parts[0], h) } diff --git a/devtools/simpleproxy/roundrobin.go b/devtools/simpleproxy/roundrobin.go new file mode 100644 index 0000000000..35bdfa33f6 --- /dev/null +++ b/devtools/simpleproxy/roundrobin.go @@ -0,0 +1,20 @@ +package main + +import ( + "net/http" + "sync" +) + +type RR struct { + h []http.Handler + n int + mx sync.Mutex +} + +func (r *RR) ServeHTTP(w http.ResponseWriter, req *http.Request) { + r.mx.Lock() + handler := r.h[r.n] + r.n = (r.n + 1) % len(r.h) + r.mx.Unlock() + handler.ServeHTTP(w, req) +} From 096b5b6087723cb6ff8e28e85b40606112373c90 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 09:43:32 -0500 Subject: [PATCH 076/225] limit log cleanup num --- engine/cleanupmanager/db.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engine/cleanupmanager/db.go b/engine/cleanupmanager/db.go index dbaf13acb4..959933832d 100644 --- a/engine/cleanupmanager/db.go +++ b/engine/cleanupmanager/db.go @@ -74,7 +74,7 @@ func NewDB(ctx context.Context, db *sql.DB) (*DB, error) { cleanupAlertLogs: p.P(` with - scope as (select id from alert_logs where id > $1 order by id limit 1000), + scope as (select id from alert_logs where id > $1 order by id limit 100), id_range as (select min(id), max(id) from scope), _delete as ( delete from alert_logs where id = any( @@ -85,7 +85,7 @@ func NewDB(ctx context.Context, db *sql.DB) (*DB, error) { for update skip locked ) ) - select id from scope offset 999 + select id from scope offset 99 `), cleanupOverrides: p.P(`DELETE FROM user_overrides WHERE id = ANY(SELECT id FROM user_overrides WHERE end_time < (now() - $1::interval) LIMIT 100 FOR UPDATE SKIP LOCKED)`), From 7e0ed9e2db9b10abb7d8f9d4c3e051f0fb0e3c56 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 09:43:44 -0500 Subject: [PATCH 077/225] don't log proc lock --- engine/statusupdatemanager/update.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/engine/statusupdatemanager/update.go b/engine/statusupdatemanager/update.go index baf09bd812..1a68f637cc 100644 --- a/engine/statusupdatemanager/update.go +++ b/engine/statusupdatemanager/update.go @@ -9,6 +9,7 @@ import ( "github.com/pkg/errors" "github.com/target/goalert/alert" "github.com/target/goalert/alert/alertlog" + "github.com/target/goalert/engine/processinglock" "github.com/target/goalert/permission" "github.com/target/goalert/util/log" ) @@ -23,13 +24,13 @@ func (db *DB) UpdateAll(ctx context.Context) error { log.Debugf(ctx, "Processing status updates.") _, err = db.lock.Exec(ctx, db.cmUnsub) - if err != nil { + if err != nil && !errors.Is(err, processinglock.ErrNoLock) { // okay to proceed log.Log(ctx, fmt.Errorf("delete status subscriptions for disabled contact methods: %w", err)) } _, err = db.lock.Exec(ctx, db.usrUnsub) - if err != nil { + if err != nil && !errors.Is(err, processinglock.ErrNoLock) { // okay to proceed log.Log(ctx, fmt.Errorf("delete status subscriptions for disabled users: %w", err)) } From 46c6924d6ee52ac2954426fac2af7410667247d9 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 09:44:07 -0500 Subject: [PATCH 078/225] return not found if rotation no longer exists --- graphql2/graphqlapp/rotation.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/graphql2/graphqlapp/rotation.go b/graphql2/graphqlapp/rotation.go index ffdaa3a60d..d74194764d 100644 --- a/graphql2/graphqlapp/rotation.go +++ b/graphql2/graphqlapp/rotation.go @@ -303,12 +303,14 @@ func (m *Mutation) updateRotationParticipants(ctx context.Context, tx *sql.Tx, r return err } return nil - } func (m *Mutation) UpdateRotation(ctx context.Context, input graphql2.UpdateRotationInput) (res bool, err error) { err = withContextTx(ctx, m.DB, func(ctx context.Context, tx *sql.Tx) error { result, err := m.RotationStore.FindRotationForUpdateTx(ctx, tx, input.ID) + if errors.Is(err, sql.ErrNoRows) { + return validation.NewFieldError("id", "Rotation not found") + } if err != nil { return err } @@ -346,7 +348,6 @@ func (m *Mutation) UpdateRotation(ctx context.Context, input graphql2.UpdateRota if update { err = m.RotationStore.UpdateRotationTx(ctx, tx, result) if err != nil { - return err } } From 10cde97e121e15f10a4f1c1b6e96b3a56c985c11 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 09:44:23 -0500 Subject: [PATCH 079/225] handle participant fkey --- util/errutil/maperror.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/util/errutil/maperror.go b/util/errutil/maperror.go index 10701422db..0da18a2929 100644 --- a/util/errutil/maperror.go +++ b/util/errutil/maperror.go @@ -35,6 +35,8 @@ func MapDBError(err error) error { return validation.NewFieldError("ServiceID", "service does not exist") case "schedule_rules_tgt_user_id_fkey": return validation.NewFieldError("TargetID", "user does not exist") + case "rotation_participants_user_id_fkey": + return validation.NewFieldError("UserID", "user does not exist") } case "23505": // unique constraint if dbErr.ConstraintName == "auth_basic_users_username_key" { From 9dd346a065462cef38e85110ebc8df29a1b7e8f6 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 09:44:38 -0500 Subject: [PATCH 080/225] schema changes for swo --- graphql2/graphqlapp/swo.go | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index 965c72aa4f..47be81b38f 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -2,6 +2,7 @@ package graphqlapp import ( "context" + "strings" "github.com/target/goalert/graphql2" "github.com/target/goalert/validation" @@ -34,22 +35,37 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { s := a.SWO.Status() var nodes []graphql2.SWONode + var prog string for _, n := range s.Nodes { + var tasks []string + for _, t := range n.Tasks { + tasks = append(tasks, t.Name) + if t.Name == "reset-db" || t.Name == "exec" { + prog = t.Status + } + } + nodes = append(nodes, graphql2.SWONode{ ID: n.ID.String(), - OldValid: n.OldValid, - NewValid: n.NewValid, + OldValid: n.OldDBValid, + NewValid: n.NewDBValid, + IsLeader: n.IsLeader, CanExec: n.CanExec, - Status: n.Status, + Status: strings.Join(tasks, ","), }) } + status := string(s.State) + if prog != "" { + status += ": " + prog + } + return &graphql2.SWOStatus{ - IsIdle: s.IsIdle(), - IsDone: s.IsDone(), - Details: s.Details, - IsExecuting: s.IsExecuting(), - IsResetting: s.IsResetting(), + IsIdle: s.State == "idle", + IsDone: s.State == "done", + Details: status, + IsExecuting: strings.HasPrefix(string(s.State), "exec"), + IsResetting: strings.HasPrefix(string(s.State), "reset"), Nodes: nodes, }, nil } From 88043be93a3dce5dc12de7654e4407d854cb96d2 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 09:54:10 -0500 Subject: [PATCH 081/225] increase intervals --- swo/initialsync.go | 2 +- swo/swomsg/log.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/swo/initialsync.go b/swo/initialsync.go index c679ec408e..5f700cd8e3 100644 --- a/swo/initialsync.go +++ b/swo/initialsync.go @@ -106,7 +106,7 @@ func (m *Manager) SyncTableInit(origCtx context.Context, t Table, srcTx, dstTx p var lc lineCount go func() { defer wg.Done() - prog := time.NewTimer(2 * time.Second) + prog := time.NewTimer(500 * time.Millisecond) defer prog.Stop() for { swogrp.Progressf(origCtx, "syncing table %s (%d/%d)", t.Name, lc.Lines(), rowCount) diff --git a/swo/swomsg/log.go b/swo/swomsg/log.go index ad04f55882..290e0b514c 100644 --- a/swo/swomsg/log.go +++ b/swo/swomsg/log.go @@ -13,7 +13,7 @@ import ( "github.com/target/goalert/util/log" ) -const PollInterval = time.Second +const PollInterval = time.Second / 3 type Log struct { db *sql.DB From 17f1cbcd915eff8bd2f782879c7a6d07be3bf217 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 6 Apr 2022 10:06:01 -0500 Subject: [PATCH 082/225] lower delay --- swo/swogrp/group.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swo/swogrp/group.go b/swo/swogrp/group.go index 5cc1a2bf70..33689132bc 100644 --- a/swo/swogrp/group.go +++ b/swo/swogrp/group.go @@ -392,7 +392,7 @@ func (g *Group) sendMessageWith(ctx context.Context, log *swomsg.Log, msgType st } func (g *Group) Reset(ctx context.Context) error { - defer time.Sleep(time.Second * 2) + defer time.Sleep(time.Second) return g.sendMessage(ctx, "reset", nil, false) } From 694c06f57453b1e75045e196735f5f2aa674d158 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 7 Apr 2022 10:17:33 -0500 Subject: [PATCH 083/225] add errors for swo ui --- graphql2/generated.go | 54 +++++ graphql2/graphqlapp/swo.go | 6 + graphql2/models_gen.go | 1 + graphql2/schema.graphql | 1 + swo/execute.go | 227 +++++++++++++++--- swo/swogrp/group.go | 44 +++- swo/swomsg/messages.go | 145 ----------- .../app/admin/switchover/AdminSwitchover.tsx | 12 +- web/src/schema.d.ts | 1 + 9 files changed, 305 insertions(+), 186 deletions(-) diff --git a/graphql2/generated.go b/graphql2/generated.go index c0906159af..4bba2135a4 100644 --- a/graphql2/generated.go +++ b/graphql2/generated.go @@ -398,6 +398,7 @@ type ComplexityRoot struct { SWOStatus struct { Details func(childComplexity int) int + Errors func(childComplexity int) int IsDone func(childComplexity int) int IsExecuting func(childComplexity int) int IsIdle func(childComplexity int) int @@ -2658,6 +2659,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWOStatus.Details(childComplexity), true + case "SWOStatus.errors": + if e.complexity.SWOStatus.Errors == nil { + break + } + + return e.complexity.SWOStatus.Errors(childComplexity), true + case "SWOStatus.isDone": if e.complexity.SWOStatus.IsDone == nil { break @@ -3635,6 +3643,7 @@ type SWOStatus { isExecuting: Boolean! details: String! + errors: [String!]! nodes: [SWONode!]! } @@ -14099,6 +14108,41 @@ func (ec *executionContext) _SWOStatus_details(ctx context.Context, field graphq return ec.marshalNString2string(ctx, field.Selections, res) } +func (ec *executionContext) _SWOStatus_errors(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + fc := &graphql.FieldContext{ + Object: "SWOStatus", + Field: field, + Args: nil, + IsMethod: false, + IsResolver: false, + } + + ctx = graphql.WithFieldContext(ctx, fc) + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.Errors, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.([]string) + fc.Result = res + return ec.marshalNString2ᚕstringᚄ(ctx, field.Selections, res) +} + func (ec *executionContext) _SWOStatus_nodes(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { defer func() { if r := recover(); r != nil { @@ -25495,6 +25539,16 @@ func (ec *executionContext) _SWOStatus(ctx context.Context, sel ast.SelectionSet out.Values[i] = innerFunc(ctx) + if out.Values[i] == graphql.Null { + invalids++ + } + case "errors": + innerFunc := func(ctx context.Context) (res graphql.Marshaler) { + return ec._SWOStatus_errors(ctx, field, obj) + } + + out.Values[i] = innerFunc(ctx) + if out.Values[i] == graphql.Null { invalids++ } diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index 47be81b38f..ed7e680070 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -60,6 +60,11 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { status += ": " + prog } + var errs []string + for _, t := range s.Errors { + errs = append(errs, t.Name+": "+t.Error) + } + return &graphql2.SWOStatus{ IsIdle: s.State == "idle", IsDone: s.State == "done", @@ -67,5 +72,6 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { IsExecuting: strings.HasPrefix(string(s.State), "exec"), IsResetting: strings.HasPrefix(string(s.State), "reset"), Nodes: nodes, + Errors: errs, }, nil } diff --git a/graphql2/models_gen.go b/graphql2/models_gen.go index 7342fbbe9f..5d18312db5 100644 --- a/graphql2/models_gen.go +++ b/graphql2/models_gen.go @@ -346,6 +346,7 @@ type SWOStatus struct { IsResetting bool `json:"isResetting"` IsExecuting bool `json:"isExecuting"` Details string `json:"details"` + Errors []string `json:"errors"` Nodes []SWONode `json:"nodes"` } diff --git a/graphql2/schema.graphql b/graphql2/schema.graphql index 80c6b6b5fa..5137b84bb4 100644 --- a/graphql2/schema.graphql +++ b/graphql2/schema.graphql @@ -124,6 +124,7 @@ type SWOStatus { isExecuting: Boolean! details: String! + errors: [String!]! nodes: [SWONode!]! } diff --git a/swo/execute.go b/swo/execute.go index 3746ef0010..4248f27ac0 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -8,7 +8,9 @@ import ( "strconv" "time" + "github.com/jackc/pgconn" "github.com/jackc/pgx/v4" + "github.com/target/goalert/lock" "github.com/target/goalert/swo/swogrp" "github.com/target/goalert/util/log" "github.com/target/goalert/util/sqlutil" @@ -85,42 +87,84 @@ func (m *Manager) DoExecute(ctx context.Context) error { return fmt.Errorf("read row IDs: %w", err) } - var lastNone bool - for ctx.Err() == nil { - // sync in a loop until DB is up-to-date - s := time.Now() - n, pend, err := LoopSync(ctx, rt, oldConn, newConn) - dur := time.Since(s) + err = SyncChanges(ctx, rt, oldConn, newConn) + if err != nil { + return fmt.Errorf("sync changes: %w", err) + } - if pend > 0 { - lastNone = false - swogrp.Progressf(ctx, "sync: %d rows pending", pend) - } - if err != nil { - log.Log(ctx, err) - rt.Rollback() - if n > 0 { - return fmt.Errorf("sync failure (commit without record): %w", err) + swogrp.Progressf(ctx, "pausing") + err = m.grp.Pause(ctx) + if err != nil { + return fmt.Errorf("pause: %w", err) + } + + t := time.NewTicker(10 * time.Millisecond) + defer t.Stop() + for range t.C { + s := m.grp.Status() + var pausing, waiting int + for _, node := range s.Nodes { + for _, task := range node.Tasks { + if task.Name == "pause" { + pausing++ + } + if task.Name == "resume-after" { + waiting++ + } } - continue } - rt.Commit() - if n == 0 { - if !lastNone { - lastNone = true - swogrp.Progressf(ctx, "sync: waiting for changes") - } - time.Sleep(100 * time.Millisecond) - } else { - lastNone = false - swogrp.Progressf(ctx, "sync: %d rows replicated in %s", n, dur.Truncate(time.Millisecond)) + + if pausing == 0 && waiting == len(s.Nodes) { + break + } + if waiting == 0 { + return fmt.Errorf("pause failed") } } - return errors.New("not implemented") + swogrp.Progressf(ctx, "begin final sync") + err = FinalSync(ctx, rt, oldConn, newConn) + if err != nil { + log.Log(ctx, err) + return fmt.Errorf("final sync: %w", err) + } + fmt.Println("DONE") + + return nil }) } +func SyncChanges(ctx context.Context, rt *rowTracker, oldConn, newConn *pgx.Conn) error { + for ctx.Err() == nil { + // sync in a loop until DB is up-to-date + s := time.Now() + n, pend, err := LoopSync(ctx, rt, oldConn, newConn) + dur := time.Since(s) + + if pend > 0 { + swogrp.Progressf(ctx, "sync: %d rows pending", pend) + } + if err != nil { + log.Log(ctx, err) + rt.Rollback() + if n > 0 { + return fmt.Errorf("sync failure (commit without record): %w", err) + } + continue + } + rt.Commit() + + if n != 0 { + swogrp.Progressf(ctx, "sync: %d rows replicated in %s", n, dur.Truncate(time.Millisecond)) + continue + } + + return nil + } + + return ctx.Err() +} + // DisableTriggers will disable all triggers in the new DB. func DisableTriggers(ctx context.Context, tables []Table, conn *pgx.Conn) error { for _, table := range tables { @@ -133,6 +177,120 @@ func DisableTriggers(ctx context.Context, tables []Table, conn *pgx.Conn) error return nil } +func FinalSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) error { + var seqNames []string + var seqRead pgx.Batch + var name string + _, err := srcConn.QueryFunc(ctx, ` + select sequence_name + from information_schema.sequences + where + sequence_catalog = current_database() and + sequence_schema = 'public' + `, nil, []interface{}{&name}, func(r pgx.QueryFuncRow) error { + if name == "change_log_id_seq" { + // skip, as it does not exist in next db + return nil + } + seqRead.Queue("select last_value, is_called from " + sqlutil.QuoteID(name)) + seqNames = append(seqNames, name) + fmt.Println(name) + return nil + }) + if err != nil { + return fmt.Errorf("get sequence names: %w", err) + } + + if _, err = srcConn.Exec(ctx, "set idle_in_transaction_session_timeout = 1000"); err != nil { + return fmt.Errorf("set idle_in_transaction_session_timeout: %w", err) + } + if _, err = srcConn.Exec(ctx, "set lock_timeout = 3000"); err != nil { + return fmt.Errorf("set idle_in_transaction_session_timeout: %w", err) + } + + // catch up + if err = SyncChanges(ctx, rt, srcConn, dstConn); err != nil { + return fmt.Errorf("sync changes: %w", err) + } + + srcTx, err := srcConn.BeginTx(ctx, pgx.TxOptions{}) + if err != nil { + return fmt.Errorf("begin src: %w", err) + } + defer srcTx.Rollback(ctx) + + dstTx, err := dstConn.BeginTx(ctx, pgx.TxOptions{}) + if err != nil { + return fmt.Errorf("begin dst: %w", err) + } + defer dstTx.Rollback(ctx) + + swogrp.Progressf(ctx, "stop-the-world") + _, err = srcTx.Exec(ctx, fmt.Sprintf("select pg_advisory_xact_lock(%d)", lock.GlobalSwitchOver)) + if err != nil { + return fmt.Errorf("lock global switchover: %w", err) + } + + var stat string + err = srcConn.QueryRow(ctx, `select current_state from switchover_state nowait`).Scan(&stat) + if err != nil { + return fmt.Errorf("get switchover state: %w", err) + } + if stat == "use_next_db" { + return errDone + } + if stat == "idle" { + return errors.New("not running") + } + + go swogrp.Progressf(ctx, "last sync") + _, err = syncChangeLog(ctx, rt, srcTx, dstTx) + if err != nil { + return fmt.Errorf("sync change log: %w", err) + } + + res := srcTx.SendBatch(ctx, &seqRead) + var setSeq pgx.Batch + for _, name := range seqNames { + var last int64 + var called bool + err = res.QueryRow().Scan(&last, &called) + if err != nil { + return fmt.Errorf("get sequence %s: %w", name, err) + } + setSeq.Queue("select pg_catalog.setval($1, $2, $3)", name, last, called) + } + if err = res.Close(); err != nil { + return fmt.Errorf("close seq batch: %w", err) + } + + for _, t := range rt.tables { + setSeq.Queue("alter table " + t.QuotedName() + " enable trigger user") + } + + err = dstTx.SendBatch(ctx, &setSeq).Close() + if err != nil { + return fmt.Errorf("set sequences: %w", err) + } + + if err = dstTx.Commit(ctx); err != nil { + return fmt.Errorf("commit dst: %w", err) + } + + _, err = srcTx.Exec(ctx, "update switchover_state set current_state = 'use_next_db' where current_state = 'in_progress'") + if err != nil { + return fmt.Errorf("update switchover state: %w", err) + } + + err = srcTx.Commit(ctx) + if err != nil { + return fmt.Errorf("commit src: %w", err) + } + + swogrp.Progressf(ctx, "done") + return nil +} + func LoopSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) (ok, pend int, err error) { srcTx, dstTx, err := syncTx(ctx, srcConn, dstConn) if err != nil { @@ -164,10 +322,6 @@ func LoopSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) ( return len(ids), 0, nil } -func FinalSync(ctx context.Context, oldConn, newConn *pgx.Conn) error { - return nil -} - func syncTx(ctx context.Context, srcConn, dstConn *pgx.Conn) (src, dst pgx.Tx, err error) { srcTx, err := srcConn.BeginTx(ctx, pgx.TxOptions{ AccessMode: pgx.ReadOnly, @@ -187,7 +341,7 @@ func syncTx(ctx context.Context, srcConn, dstConn *pgx.Conn) (src, dst pgx.Tx, e return srcTx, dstTx, nil } -func syncChangeLog(ctx context.Context, rt *rowTracker, srcTx, dstTx pgx.Tx) ([]int, error) { +func syncChangeLog(ctx context.Context, rt *rowTracker, srcTx, dstTx pgxQueryer) ([]int, error) { type rowID struct { table string id string @@ -271,7 +425,7 @@ func syncChangeLog(ctx context.Context, rt *rowTracker, srcTx, dstTx pgx.Tx) ([] return changeIDs, nil } -func (rt *rowTracker) apply(ctx context.Context, dstTx pgx.Tx, q string, rows []syncRow) error { +func (rt *rowTracker) apply(ctx context.Context, dstTx pgxQueryer, q string, rows []syncRow) error { if len(rows) == 0 { return nil } @@ -318,8 +472,13 @@ type syncRow struct { id string data json.RawMessage } +type pgxQueryer interface { + Query(context.Context, string, ...interface{}) (pgx.Rows, error) + Exec(context.Context, string, ...interface{}) (pgconn.CommandTag, error) + QueryFunc(context.Context, string, []interface{}, []interface{}, func(pgx.QueryFuncRow) error) (pgconn.CommandTag, error) +} -func (rt *rowTracker) fetch(ctx context.Context, table Table, srcTx pgx.Tx, ids []string) (*syncData, error) { +func (rt *rowTracker) fetch(ctx context.Context, table Table, srcTx pgxQueryer, ids []string) (*syncData, error) { rows, err := srcTx.Query(ctx, table.SelectRowsQuery(), table.IDs(ids)) if errors.Is(err, pgx.ErrNoRows) { return &syncData{toDelete: ids}, nil diff --git a/swo/swogrp/group.go b/swo/swogrp/group.go index 33689132bc..8de8f8c8ab 100644 --- a/swo/swogrp/group.go +++ b/swo/swogrp/group.go @@ -37,6 +37,8 @@ type Group struct { Config State + failed []TaskInfo + nodeID uuid.UUID reset bool nodes map[uuid.UUID]*Node @@ -99,8 +101,9 @@ func NewGroup(cfg Config) *Group { } type Status struct { - Nodes []Node - State State + Nodes []Node + State State + Errors []TaskInfo } func cloneTasks(in []TaskInfo) []TaskInfo { @@ -121,9 +124,13 @@ func (g *Group) Status() Status { nodes = append(nodes, cpy) } + failed := make([]TaskInfo, len(g.failed)) + copy(failed, g.failed) + return Status{ - Nodes: nodes, - State: g.State, + Nodes: nodes, + State: g.State, + Errors: failed, } } @@ -179,6 +186,7 @@ func (g *Group) startTask(ctx context.Context, name string, fn func(context.Cont err = g.sendMessage(ctx, "task-end", info, false) if err != nil { + fmt.Print("TASK ERROR ", info.Name, "\n\n\n", err, "\n\n\n") log.Log(ctx, fmt.Errorf("send task-end: %w", err)) } @@ -195,6 +203,7 @@ func (g *Group) resetState() { for id := range g.nodes { delete(g.nodes, id) } + g.failed = nil g.reset = true g.leader = false g.State = stateReset @@ -287,6 +296,9 @@ func (g *Group) updateTask(msg swomsg.Message, upsert bool) error { } else if info.Name == "reset-db" { g.State = stateIdle } + if info.Error != "" { + g.failed = append(g.failed, info) + } return nil } @@ -337,7 +349,25 @@ func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { return g.startTask(ctx, "exec", g.ExecuteFunc) } case "pause": - return g.startTask(ctx, "pause", g.PauseFunc) + err := g.startTask(ctx, "resume-after", func(ctx context.Context) error { + t := time.NewTimer(15 * time.Second) + defer t.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.C: + } + + return g.ResumeFunc(ctx) + }) + if err != nil { + return err + } + + err = g.startTask(ctx, "pause", g.PauseFunc) + if err != nil { + return err + } default: } @@ -407,3 +437,7 @@ func (g *Group) Execute(ctx context.Context) error { return g.sendMessage(ctx, "exec", nil, true) } + +func (g *Group) Pause(ctx context.Context) error { + return g.sendMessage(ctx, "pause", nil, true) +} diff --git a/swo/swomsg/messages.go b/swo/swomsg/messages.go index 8fc2a6b58d..40c3c768f8 100644 --- a/swo/swomsg/messages.go +++ b/swo/swomsg/messages.go @@ -7,18 +7,6 @@ import ( "github.com/google/uuid" ) -/* - - Idle - Reset -> Elect(Reset) - Elect(Reset) -> ResetRun,ResetWait -> Idle - Execute -> Elect(Execute) - Elect(Execute) -> ExecuteRun,ExecuteWait -> - - - -*/ - type Message struct { ID uuid.UUID Node uuid.UUID @@ -29,136 +17,3 @@ type Message struct { AckID uuid.UUID `json:",omitempty"` Data json.RawMessage `json:",omitempty"` } - -// type Plan struct { -// // Must receive Ack from all nodes before this time. -// ConsensusDeadline time.Time - -// // Must receive PlanStart or Error before this time, otherwise all -// // nodes will Error. -// StartAt time.Time - -// // All nodes should disable idle connections after this time. -// DisableIdleAt time.Time - -// // All nodes should re-enable idle connections after this time. -// Deadline time.Time -// } - -// type Type int - -// const ( -// Unknown Type = iota -// Execute -// Ping -// ) - -// type msg struct { -// Header - -// Ping *Ping `json:",omitempty"` -// Reset *Reset `json:",omitempty"` -// Execute *Execute `json:",omitempty"` -// WaitTx *WaitTx `json:",omitempty"` -// Ack *Ack `json:",omitempty"` -// Error *Error `json:",omitempty"` -// Plan *Plan `json:",omitempty"` -// Progress *Progress `json:",omitempty"` -// Done *Done `json:",omitempty"` -// Hello *Hello `json:",omitempty"` -// } - -// type Header struct { -// ID uuid.UUID -// NodeID uuid.UUID -// TS time.Time `json:"-"` -// } - -// type ( -// Start struct { -// Header `json:"-"` - -// TaskName string -// TaskID uuid.UUID -// NodeID uuid.UUID `json:",omitempty"` -// } - -// // user commands -// Ping struct { -// Header `json:"-"` -// } - -// Hello struct { -// Header `json:"-"` -// IsOldDB bool `json:",omitempty"` -// IsNewDB bool `json:",omitempty"` -// Status string -// CanExec bool `json:",omitempty"` -// } - -// Ack struct { -// Header `json:"-"` -// MsgID uuid.UUID -// } - -// TaskStatus struct { -// Header `json:"-"` -// TaskID uuid.UUID -// Details string -// } -// TaskDone struct { -// Header `json:"-"` -// TaskID uuid.UUID -// Error string `json:",omitempty"` -// } - -// Plan struct { -// // Must receive Ack from all nodes before this time. -// ConsensusDeadline time.Time - -// // Must receive PlanStart or Error before this time, otherwise all -// // nodes will Error. -// StartAt time.Time - -// // All nodes should disable idle connections after this time. -// DisableIdleAt time.Time - -// // All nodes should re-enable idle connections after this time. -// Deadline time.Time -// } -// ) - -/* -UI - -{ Connections Section } - -{ Nodes section, with "Refresh" button} -Node ID | Ping Response Time | DB Calls/min (1m, 5m, 15m) | DB Resp. Avg (1m, 5m, 15m) - -States: Idle, Error, Active, Done -{ Status section (progress text here), with "Reset", "Execute" buttons} - -1. User goes to UI page - -2. User clicks "Refresh" button -3. Ping is sent -4. Pong is received from all nodes -5. UI updates - -6. User clicks "Execute" button -7. Execute is sent - -8. Execute is claimed by engine -9. Begins instrumenting, syncing, etc... sending Progress messages -10. UI updates with progress - -11. Engine sends out Plan message -12. All nodes ConfirmPlan by ConsensusDeadline -13. Engine performs switchover - -14. Engine sends Done message - -** if anything goes wrong, engine sends Error message and Reset is required by the user - -*/ diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 8969e35c3d..7c21e89fc1 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -30,6 +30,7 @@ const query = gql` isResetting isExecuting details + errors nodes { id status @@ -73,7 +74,7 @@ export default function AdminSwitchover(): JSX.Element { const { loading, error, data: _data } = useQuery(query, { pollInterval: 250 }) const data = _data?.swoStatus const [lastAction, setLastAction] = useState('') - const [statusNotices, setStatusNotices] = useState([]) + const [_statusNotices, setStatusNotices] = useState([]) const [commit, mutationStatus] = useMutation(mutation) function actionHandler(action: 'ping' | 'reset' | 'execute'): () => void { @@ -85,7 +86,7 @@ export default function AdminSwitchover(): JSX.Element { }, onError: (error) => { setStatusNotices([ - ...statusNotices, + ..._statusNotices, { type: 'error', message: 'Failed to ' + action, @@ -98,6 +99,13 @@ export default function AdminSwitchover(): JSX.Element { } } + const statusNotices = _statusNotices.concat( + (data?.errors ?? []).map((message: string) => ({ + type: 'error', + message, + })), + ) + const pingLoad = lastAction === 'ping' && mutationStatus.loading const resetLoad = data?.isResetting || (lastAction === 'reset' && mutationStatus.loading) diff --git a/web/src/schema.d.ts b/web/src/schema.d.ts index f9a23d57f8..a8b584d2c8 100644 --- a/web/src/schema.d.ts +++ b/web/src/schema.d.ts @@ -44,6 +44,7 @@ export interface SWOStatus { isResetting: boolean isExecuting: boolean details: string + errors: string[] nodes: SWONode[] } From 064922c9d5fbc08b2e91798ed8437889192aad15 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 7 Apr 2022 11:31:22 -0500 Subject: [PATCH 084/225] cleanup --- swo/drvconnector.go | 3 ++- swo/execute.go | 4 +-- swo/preflightlocks.go | 8 +++--- swo/reset.go | 19 +-------------- swo/swogrp/group.go | 57 ++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 63 insertions(+), 28 deletions(-) diff --git a/swo/drvconnector.go b/swo/drvconnector.go index e858b1fd48..5ac422e0a5 100644 --- a/swo/drvconnector.go +++ b/swo/drvconnector.go @@ -7,6 +7,7 @@ import ( "sync" "github.com/jackc/pgx/v4/stdlib" + "github.com/target/goalert/swo/swogrp" ) type Connector struct { @@ -46,7 +47,7 @@ func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { conn := c.(*stdlib.Conn) err = sessionLock(ctx, conn) - if errors.Is(err, errDone) { + if errors.Is(err, swogrp.ErrDone) { drv.mx.Lock() drv.isDone = true drv.mx.Unlock() diff --git a/swo/execute.go b/swo/execute.go index 4248f27ac0..51337c7619 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -128,7 +128,6 @@ func (m *Manager) DoExecute(ctx context.Context) error { log.Log(ctx, err) return fmt.Errorf("final sync: %w", err) } - fmt.Println("DONE") return nil }) @@ -194,7 +193,6 @@ func FinalSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) } seqRead.Queue("select last_value, is_called from " + sqlutil.QuoteID(name)) seqNames = append(seqNames, name) - fmt.Println(name) return nil }) if err != nil { @@ -237,7 +235,7 @@ func FinalSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) return fmt.Errorf("get switchover state: %w", err) } if stat == "use_next_db" { - return errDone + return swogrp.ErrDone } if stat == "idle" { return errors.New("not running") diff --git a/swo/preflightlocks.go b/swo/preflightlocks.go index d01625a86c..300e61f4dc 100644 --- a/swo/preflightlocks.go +++ b/swo/preflightlocks.go @@ -9,6 +9,7 @@ import ( "github.com/jackc/pgx/v4" "github.com/jackc/pgx/v4/stdlib" "github.com/target/goalert/lock" + "github.com/target/goalert/swo/swogrp" ) var ErrNoLock = errors.New("no lock") @@ -27,6 +28,9 @@ func SwitchOverExecLock(ctx context.Context, conn *pgx.Conn) error { from switchover_state where current_state != 'use_next_db' `, lock.GlobalSwitchOverExec).Scan(&gotLock) + if errors.Is(err, pgx.ErrNoRows) { + return swogrp.ErrDone + } if err != nil { return err } @@ -46,8 +50,6 @@ func UnlockConn(ctx context.Context, conn *pgx.Conn) { } } -var errDone = errors.New("done") - // sessionLock will get a shared advisory lock for the connection. func sessionLock(ctx context.Context, c *stdlib.Conn) error { // Using literal here so we can avoid a prepared statement round trip. @@ -85,7 +87,7 @@ func sessionLock(ctx context.Context, c *stdlib.Conn) error { } if state == "use_next_db" { - return errDone + return swogrp.ErrDone } return nil diff --git a/swo/reset.go b/swo/reset.go index 56196b1c38..6840d75e9b 100644 --- a/swo/reset.go +++ b/swo/reset.go @@ -24,17 +24,6 @@ func (m *Manager) DoReset(ctx context.Context) error { // ResetNewDB will reset the new database to a clean state. func ResetNewDB(ctx context.Context, conn *pgx.Conn) error { - err := SwitchOverExecLock(ctx, conn) - if err != nil { - return fmt.Errorf("failed to acquire lock: %w", err) - } - defer UnlockConn(ctx, conn) - - _, err = conn.Exec(ctx, "update switchover_state set current_state = 'idle' where current_state = 'in_progress'") - if err != nil { - return fmt.Errorf("set state to idle: %w", err) - } - tables, err := ScanTables(ctx, conn) if err != nil { return fmt.Errorf("scan tables: %w", err) @@ -67,13 +56,7 @@ func ResetNewDB(ctx context.Context, conn *pgx.Conn) error { // // It will remove all change triggers and cleanup switchover data. func ResetOldDB(ctx context.Context, conn *pgx.Conn) error { - err := SwitchOverExecLock(ctx, conn) - if err != nil { - return fmt.Errorf("acquire lock: %w", err) - } - defer UnlockConn(ctx, conn) - - _, err = conn.Exec(ctx, "update switchover_state set current_state = 'idle' where current_state = 'in_progress'") + _, err := conn.Exec(ctx, "update switchover_state set current_state = 'idle' where current_state = 'in_progress'") if err != nil { return fmt.Errorf("set state to idle: %w", err) } diff --git a/swo/swogrp/group.go b/swo/swogrp/group.go index 8de8f8c8ab..36eb32d12e 100644 --- a/swo/swogrp/group.go +++ b/swo/swogrp/group.go @@ -3,6 +3,7 @@ package swogrp import ( "context" "encoding/json" + "errors" "fmt" "sync" "time" @@ -49,6 +50,8 @@ type Group struct { nextDBNodes map[uuid.UUID]struct{} ackMsgs chan map[uuid.UUID]*ackWait + + resumeNow chan struct{} } type ackWait struct { @@ -180,13 +183,16 @@ func (g *Group) startTask(ctx context.Context, name string, fn func(context.Cont g.tasks[info.ID] = info go func() { err := fn(withTask(ctx, g, info)) + if errors.Is(err, ErrDone) { + g.sendMessage(g.Logger.BackgroundContext(), "done", nil, false) + err = nil + } if err != nil { info.Error = err.Error() } err = g.sendMessage(ctx, "task-end", info, false) if err != nil { - fmt.Print("TASK ERROR ", info.Name, "\n\n\n", err, "\n\n\n") log.Log(ctx, fmt.Errorf("send task-end: %w", err)) } @@ -203,6 +209,11 @@ func (g *Group) resetState() { for id := range g.nodes { delete(g.nodes, id) } + if g.resumeNow != nil { + close(g.resumeNow) + g.resumeNow = nil + } + g.ResumeFunc(g.Logger.BackgroundContext()) g.failed = nil g.reset = true g.leader = false @@ -293,15 +304,42 @@ func (g *Group) updateTask(msg swomsg.Message, upsert bool) error { n.Tasks = filtered if upsert { n.Tasks = append(n.Tasks, info) - } else if info.Name == "reset-db" { - g.State = stateIdle + } else { + switch info.Name { + case "exec": + if g.resumeNow != nil { + close(g.resumeNow) + g.resumeNow = nil + } + if g.State == stateDone { + break + } + if info.Error == "" { + g.State = stateDone + } else { + g.State = stateError + } + case "reset-db": + if g.State == stateDone { + break + } + if info.Error == "" { + g.State = stateIdle + } else { + g.State = stateError + } + } } + if info.Error != "" { g.failed = append(g.failed, info) } + return nil } +var ErrDone = errors.New("already done") + func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { g.mx.Lock() defer g.mx.Unlock() @@ -325,7 +363,12 @@ func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { case "hello": g.addNode(msg.Node, true, false, false) case "ping": + case "done": + g.State = stateDone case "reset": + if g.State == stateDone { + break + } g.resetState() if err := g.startTask(ctx, "resume", g.ResumeFunc); err != nil { @@ -349,6 +392,10 @@ func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { return g.startTask(ctx, "exec", g.ExecuteFunc) } case "pause": + if g.resumeNow != nil { + close(g.resumeNow) + } + g.resumeNow = make(chan struct{}) err := g.startTask(ctx, "resume-after", func(ctx context.Context) error { t := time.NewTimer(15 * time.Second) defer t.Stop() @@ -356,6 +403,7 @@ func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { case <-ctx.Done(): return ctx.Err() case <-t.C: + case <-g.resumeNow: } return g.ResumeFunc(ctx) @@ -422,6 +470,9 @@ func (g *Group) sendMessageWith(ctx context.Context, log *swomsg.Log, msgType st } func (g *Group) Reset(ctx context.Context) error { + if g.Status().State == stateDone { + return errors.New("cannot reset, already done") + } defer time.Sleep(time.Second) return g.sendMessage(ctx, "reset", nil, false) } From 91341e32a2dc9c6610b3e29998ef9966707429fc Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 7 Apr 2022 11:36:54 -0500 Subject: [PATCH 085/225] add page states for disabled and complete --- .../app/admin/switchover/AdminSwitchover.tsx | 41 +++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 7c21e89fc1..267f66bd7e 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -6,7 +6,7 @@ import CardHeader from '@mui/material/CardHeader' import Grid from '@mui/material/Grid' import Skeleton from '@mui/material/Skeleton' import Typography from '@mui/material/Typography' -import { SvgIconProps } from '@mui/material' +import { SvgIconProps, Tooltip } from '@mui/material' import PingIcon from 'mdi-material-ui/DatabaseMarker' import NoResetIcon from 'mdi-material-ui/DatabaseRefreshOutline' import ResetIcon from 'mdi-material-ui/DatabaseRefresh' @@ -17,10 +17,13 @@ import IdleIcon from 'mdi-material-ui/DatabaseSettings' import InProgressIcon from 'mdi-material-ui/DatabaseEdit' import { gql, useMutation, useQuery } from '@apollo/client' import { DateTime } from 'luxon' -import { SWONode as SWONodeType } from '../../../schema' +import { SWONode as SWONodeType, SWOStatus } from '../../../schema' import Notices, { Notice } from '../../details/Notices' import SWONode from './SWONode' import LoadingButton from '@mui/lab/LoadingButton' +import DatabaseOff from 'mdi-material-ui/DatabaseOff' +import DatabaseCheck from 'mdi-material-ui/DatabaseCheck' +import { Info } from '@mui/icons-material' const query = gql` query { @@ -72,11 +75,43 @@ function cptlz(s: string): string { export default function AdminSwitchover(): JSX.Element { const { loading, error, data: _data } = useQuery(query, { pollInterval: 250 }) - const data = _data?.swoStatus + const data = _data?.swoStatus as SWOStatus const [lastAction, setLastAction] = useState('') const [_statusNotices, setStatusNotices] = useState([]) const [commit, mutationStatus] = useMutation(mutation) + if (error && error.message == 'not in SWO mode') { + return ( + + + + + Unavailable: Application is not in switchover mode.{' '} + + + + + + + ) + } + + if (data?.isDone) { + return ( + + + + + DB switchover is complete. + + + + ) + } + function actionHandler(action: 'ping' | 'reset' | 'execute'): () => void { return () => { setLastAction(action) From a03e030ca78c2f068ac3daf1d33212b968cc39c6 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 7 Apr 2022 11:49:51 -0500 Subject: [PATCH 086/225] add connection list --- graphql2/generated.go | 238 +++++++++++++++++++++++++++++++++++++ graphql2/graphqlapp/swo.go | 25 +++- graphql2/models_gen.go | 20 ++-- graphql2/schema.graphql | 7 ++ 4 files changed, 282 insertions(+), 8 deletions(-) diff --git a/graphql2/generated.go b/graphql2/generated.go index 4bba2135a4..4bfeae4016 100644 --- a/graphql2/generated.go +++ b/graphql2/generated.go @@ -387,6 +387,11 @@ type ComplexityRoot struct { PageInfo func(childComplexity int) int } + SWOConnection struct { + Count func(childComplexity int) int + Name func(childComplexity int) int + } + SWONode struct { CanExec func(childComplexity int) int ID func(childComplexity int) int @@ -397,6 +402,7 @@ type ComplexityRoot struct { } SWOStatus struct { + Connections func(childComplexity int) int Details func(childComplexity int) int Errors func(childComplexity int) int IsDone func(childComplexity int) int @@ -2610,6 +2616,20 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.RotationConnection.PageInfo(childComplexity), true + case "SWOConnection.count": + if e.complexity.SWOConnection.Count == nil { + break + } + + return e.complexity.SWOConnection.Count(childComplexity), true + + case "SWOConnection.name": + if e.complexity.SWOConnection.Name == nil { + break + } + + return e.complexity.SWOConnection.Name(childComplexity), true + case "SWONode.canExec": if e.complexity.SWONode.CanExec == nil { break @@ -2652,6 +2672,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWONode.Status(childComplexity), true + case "SWOStatus.connections": + if e.complexity.SWOStatus.Connections == nil { + break + } + + return e.complexity.SWOStatus.Connections(childComplexity), true + case "SWOStatus.details": if e.complexity.SWOStatus.Details == nil { break @@ -3646,6 +3673,13 @@ type SWOStatus { errors: [String!]! nodes: [SWONode!]! + + connections: [SWOConnection!]! +} + +type SWOConnection { + name: String! + count: Int! } type SWONode { @@ -13723,6 +13757,76 @@ func (ec *executionContext) _RotationConnection_pageInfo(ctx context.Context, fi return ec.marshalNPageInfo2ᚖgithub.comᚋtargetᚋgoalertᚋgraphql2ᚐPageInfo(ctx, field.Selections, res) } +func (ec *executionContext) _SWOConnection_name(ctx context.Context, field graphql.CollectedField, obj *SWOConnection) (ret graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + fc := &graphql.FieldContext{ + Object: "SWOConnection", + Field: field, + Args: nil, + IsMethod: false, + IsResolver: false, + } + + ctx = graphql.WithFieldContext(ctx, fc) + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.Name, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNString2string(ctx, field.Selections, res) +} + +func (ec *executionContext) _SWOConnection_count(ctx context.Context, field graphql.CollectedField, obj *SWOConnection) (ret graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + fc := &graphql.FieldContext{ + Object: "SWOConnection", + Field: field, + Args: nil, + IsMethod: false, + IsResolver: false, + } + + ctx = graphql.WithFieldContext(ctx, fc) + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.Count, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(int) + fc.Result = res + return ec.marshalNInt2int(ctx, field.Selections, res) +} + func (ec *executionContext) _SWONode_id(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { defer func() { if r := recover(); r != nil { @@ -14178,6 +14282,41 @@ func (ec *executionContext) _SWOStatus_nodes(ctx context.Context, field graphql. return ec.marshalNSWONode2ᚕgithub.comᚋtargetᚋgoalertᚋgraphql2ᚐSWONodeᚄ(ctx, field.Selections, res) } +func (ec *executionContext) _SWOStatus_connections(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + fc := &graphql.FieldContext{ + Object: "SWOStatus", + Field: field, + Args: nil, + IsMethod: false, + IsResolver: false, + } + + ctx = graphql.WithFieldContext(ctx, fc) + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.Connections, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.([]SWOConnection) + fc.Result = res + return ec.marshalNSWOConnection2ᚕgithub.comᚋtargetᚋgoalertᚋgraphql2ᚐSWOConnectionᚄ(ctx, field.Selections, res) +} + func (ec *executionContext) _Schedule_id(ctx context.Context, field graphql.CollectedField, obj *schedule.Schedule) (ret graphql.Marshaler) { defer func() { if r := recover(); r != nil { @@ -25401,6 +25540,47 @@ func (ec *executionContext) _RotationConnection(ctx context.Context, sel ast.Sel return out } +var sWOConnectionImplementors = []string{"SWOConnection"} + +func (ec *executionContext) _SWOConnection(ctx context.Context, sel ast.SelectionSet, obj *SWOConnection) graphql.Marshaler { + fields := graphql.CollectFields(ec.OperationContext, sel, sWOConnectionImplementors) + out := graphql.NewFieldSet(fields) + var invalids uint32 + for i, field := range fields { + switch field.Name { + case "__typename": + out.Values[i] = graphql.MarshalString("SWOConnection") + case "name": + innerFunc := func(ctx context.Context) (res graphql.Marshaler) { + return ec._SWOConnection_name(ctx, field, obj) + } + + out.Values[i] = innerFunc(ctx) + + if out.Values[i] == graphql.Null { + invalids++ + } + case "count": + innerFunc := func(ctx context.Context) (res graphql.Marshaler) { + return ec._SWOConnection_count(ctx, field, obj) + } + + out.Values[i] = innerFunc(ctx) + + if out.Values[i] == graphql.Null { + invalids++ + } + default: + panic("unknown field " + strconv.Quote(field.Name)) + } + } + out.Dispatch() + if invalids > 0 { + return graphql.Null + } + return out +} + var sWONodeImplementors = []string{"SWONode"} func (ec *executionContext) _SWONode(ctx context.Context, sel ast.SelectionSet, obj *SWONode) graphql.Marshaler { @@ -25559,6 +25739,16 @@ func (ec *executionContext) _SWOStatus(ctx context.Context, sel ast.SelectionSet out.Values[i] = innerFunc(ctx) + if out.Values[i] == graphql.Null { + invalids++ + } + case "connections": + innerFunc := func(ctx context.Context) (res graphql.Marshaler) { + return ec._SWOStatus_connections(ctx, field, obj) + } + + out.Values[i] = innerFunc(ctx) + if out.Values[i] == graphql.Null { invalids++ } @@ -29206,6 +29396,54 @@ func (ec *executionContext) marshalNSWOAction2githubᚗcomᚋtargetᚋgoalertᚋ return v } +func (ec *executionContext) marshalNSWOConnection2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWOConnection(ctx context.Context, sel ast.SelectionSet, v SWOConnection) graphql.Marshaler { + return ec._SWOConnection(ctx, sel, &v) +} + +func (ec *executionContext) marshalNSWOConnection2ᚕgithub.comᚋtargetᚋgoalertᚋgraphql2ᚐSWOConnectionᚄ(ctx context.Context, sel ast.SelectionSet, v []SWOConnection) graphql.Marshaler { + ret := make(graphql.Array, len(v)) + var wg sync.WaitGroup + isLen1 := len(v) == 1 + if !isLen1 { + wg.Add(len(v)) + } + for i := range v { + i := i + fc := &graphql.FieldContext{ + Index: &i, + Result: &v[i], + } + ctx := graphql.WithFieldContext(ctx, fc) + f := func(i int) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = nil + } + }() + if !isLen1 { + defer wg.Done() + } + ret[i] = ec.marshalNSWOConnection2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWOConnection(ctx, sel, v[i]) + } + if isLen1 { + f(i) + } else { + go f(i) + } + + } + wg.Wait() + + for _, e := range ret { + if e == graphql.Null { + return graphql.Null + } + } + + return ret +} + func (ec *executionContext) marshalNSWONode2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWONode(ctx context.Context, sel ast.SelectionSet, v SWONode) graphql.Marshaler { return ec._SWONode(ctx, sel, &v) } diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index ed7e680070..ce87ac387d 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -5,6 +5,8 @@ import ( "strings" "github.com/target/goalert/graphql2" + "github.com/target/goalert/permission" + "github.com/target/goalert/util/sqlutil" "github.com/target/goalert/validation" ) @@ -13,7 +15,11 @@ func (m *Mutation) SwoAction(ctx context.Context, action graphql2.SWOAction) (bo return false, validation.NewGenericError("not in SWO mode") } - var err error + err := permission.LimitCheckAny(ctx, permission.Admin) + if err != nil { + return false, err + } + switch action { case graphql2.SWOActionPing: err = m.SWO.SendPing(ctx) @@ -33,6 +39,22 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { return nil, validation.NewGenericError("not in SWO mode") } + err := permission.LimitCheckAny(ctx, permission.Admin) + if err != nil { + return nil, err + } + + var conns []graphql2.SWOConnection + err = sqlutil.FromContext(ctx). + Table("pg_stat_activity"). + Select("application_name as name, count(*)"). + Where("datname = current_database()"). + Group("name"). + Find(&conns).Error + if err != nil { + return nil, err + } + s := a.SWO.Status() var nodes []graphql2.SWONode var prog string @@ -73,5 +95,6 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { IsResetting: strings.HasPrefix(string(s.State), "reset"), Nodes: nodes, Errors: errs, + Connections: conns, }, nil } diff --git a/graphql2/models_gen.go b/graphql2/models_gen.go index 5d18312db5..94e8febc96 100644 --- a/graphql2/models_gen.go +++ b/graphql2/models_gen.go @@ -331,6 +331,11 @@ type RotationSearchOptions struct { FavoritesFirst *bool `json:"favoritesFirst"` } +type SWOConnection struct { + Name string `json:"name"` + Count int `json:"count"` +} + type SWONode struct { ID string `json:"id"` OldValid bool `json:"oldValid"` @@ -341,13 +346,14 @@ type SWONode struct { } type SWOStatus struct { - IsIdle bool `json:"isIdle"` - IsDone bool `json:"isDone"` - IsResetting bool `json:"isResetting"` - IsExecuting bool `json:"isExecuting"` - Details string `json:"details"` - Errors []string `json:"errors"` - Nodes []SWONode `json:"nodes"` + IsIdle bool `json:"isIdle"` + IsDone bool `json:"isDone"` + IsResetting bool `json:"isResetting"` + IsExecuting bool `json:"isExecuting"` + Details string `json:"details"` + Errors []string `json:"errors"` + Nodes []SWONode `json:"nodes"` + Connections []SWOConnection `json:"connections"` } type ScheduleConnection struct { diff --git a/graphql2/schema.graphql b/graphql2/schema.graphql index 5137b84bb4..219ee9adc0 100644 --- a/graphql2/schema.graphql +++ b/graphql2/schema.graphql @@ -127,6 +127,13 @@ type SWOStatus { errors: [String!]! nodes: [SWONode!]! + + connections: [SWOConnection!]! +} + +type SWOConnection { + name: String! + count: Int! } type SWONode { From 23b3784d70ed36f27fccf0e5870f9a04da434f37 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 7 Apr 2022 11:58:37 -0500 Subject: [PATCH 087/225] add connections to UI --- .../app/admin/switchover/AdminSwitchover.tsx | 151 +++++++++++------- web/src/schema.d.ts | 6 + 2 files changed, 103 insertions(+), 54 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 267f66bd7e..2d246615c4 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -24,6 +24,13 @@ import LoadingButton from '@mui/lab/LoadingButton' import DatabaseOff from 'mdi-material-ui/DatabaseOff' import DatabaseCheck from 'mdi-material-ui/DatabaseCheck' import { Info } from '@mui/icons-material' +import Table from '@mui/material/Table' +import TableBody from '@mui/material/TableBody' +import TableCell from '@mui/material/TableCell' +import TableContainer from '@mui/material/TableContainer' +import TableHead from '@mui/material/TableHead' +import TableRow from '@mui/material/TableRow' +import Paper from '@mui/material/Paper' const query = gql` query { @@ -34,6 +41,10 @@ const query = gql` isExecuting details errors + connections { + name + count + } nodes { id status @@ -198,60 +209,92 @@ export default function AdminSwitchover(): JSX.Element { )} - - - - - {getDetails()} - - } - variant='outlined' - size='large' - disabled={mutationStatus.loading} - loading={pingLoad} - loadingPosition='start' - onClick={actionHandler('ping')} - > - {pingLoad ? 'Sending ping...' : 'Ping'} - - : } - disabled={data?.isDone || mutationStatus.loading} - variant='outlined' - size='large' - loading={ - data?.isResetting || - (lastAction === 'reset' && mutationStatus.loading) - } - loadingPosition='start' - onClick={actionHandler('reset')} - > - {resetLoad ? 'Resetting...' : 'Reset'} - - : } - disabled={!data?.isIdle || mutationStatus.loading} - variant='outlined' - size='large' - loading={ - data?.isExecuting || - (lastAction === 'execute' && mutationStatus.loading) - } - loadingPosition='start' - onClick={actionHandler('execute')} - > - {executeLoad ? 'Executing...' : 'Execute'} - - - - + + + + + + {getDetails()} + + } + variant='outlined' + size='large' + disabled={mutationStatus.loading} + loading={pingLoad} + loadingPosition='start' + onClick={actionHandler('ping')} + > + {pingLoad ? 'Sending ping...' : 'Ping'} + + : } + disabled={data?.isDone || mutationStatus.loading} + variant='outlined' + size='large' + loading={ + data?.isResetting || + (lastAction === 'reset' && mutationStatus.loading) + } + loadingPosition='start' + onClick={actionHandler('reset')} + > + {resetLoad ? 'Resetting...' : 'Reset'} + + : + } + disabled={!data?.isIdle || mutationStatus.loading} + variant='outlined' + size='large' + loading={ + data?.isExecuting || + (lastAction === 'execute' && mutationStatus.loading) + } + loadingPosition='start' + onClick={actionHandler('execute')} + > + {executeLoad ? 'Executing...' : 'Execute'} + + + + + + + + + + + + + Application Name + Count + + + + {data?.connections?.map((row) => ( + + + {row.name || '(no name)'} + + {row.count} + + ))} + +
+
+
+
{data?.nodes.length > 0 && diff --git a/web/src/schema.d.ts b/web/src/schema.d.ts index a8b584d2c8..865ea3e664 100644 --- a/web/src/schema.d.ts +++ b/web/src/schema.d.ts @@ -46,6 +46,12 @@ export interface SWOStatus { details: string errors: string[] nodes: SWONode[] + connections: SWOConnection[] +} + +export interface SWOConnection { + name: string + count: number } export interface SWONode { From 5dafbca67d3848221cc9392f8ad29a341cb0e3e5 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 7 Apr 2022 12:08:34 -0500 Subject: [PATCH 088/225] fix switch --- swo/execute.go | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/swo/execute.go b/swo/execute.go index 51337c7619..057c8e9da9 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -166,14 +166,24 @@ func SyncChanges(ctx context.Context, rt *rowTracker, oldConn, newConn *pgx.Conn // DisableTriggers will disable all triggers in the new DB. func DisableTriggers(ctx context.Context, tables []Table, conn *pgx.Conn) error { + var send pgx.Batch + for _, table := range tables { - _, err := conn.Exec(ctx, fmt.Sprintf("ALTER TABLE %s DISABLE TRIGGER USER", table.QuotedName())) - if err != nil { - return fmt.Errorf("%s: %w", table.Name, err) - } + send.Queue(fmt.Sprintf("ALTER TABLE %s DISABLE TRIGGER USER", table.QuotedName())) } - return nil + return conn.SendBatch(ctx, &send).Close() +} + +// EnableTriggers will re-enable triggers in the new DB. +func EnableTriggers(ctx context.Context, tables []Table, conn *pgx.Conn) error { + var send pgx.Batch + + for _, table := range tables { + send.Queue(fmt.Sprintf("ALTER TABLE %s ENABLE TRIGGER USER", table.QuotedName())) + } + + return conn.SendBatch(ctx, &send).Close() } func FinalSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) error { @@ -262,10 +272,6 @@ func FinalSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) return fmt.Errorf("close seq batch: %w", err) } - for _, t := range rt.tables { - setSeq.Queue("alter table " + t.QuotedName() + " enable trigger user") - } - err = dstTx.SendBatch(ctx, &setSeq).Close() if err != nil { return fmt.Errorf("set sequences: %w", err) @@ -275,6 +281,10 @@ func FinalSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) return fmt.Errorf("commit dst: %w", err) } + if err = EnableTriggers(ctx, rt.tables, dstConn); err != nil { + return fmt.Errorf("enable triggers: %w", err) + } + _, err = srcTx.Exec(ctx, "update switchover_state set current_state = 'use_next_db' where current_state = 'in_progress'") if err != nil { return fmt.Errorf("update switchover state: %w", err) From 534244760f62b5a87f658dff4f034fbb55afbefa Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 7 Apr 2022 12:09:53 -0500 Subject: [PATCH 089/225] fix api change --- smoketest/harness/harness.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/smoketest/harness/harness.go b/smoketest/harness/harness.go index 486a8830a9..4eeadd2524 100644 --- a/smoketest/harness/harness.go +++ b/smoketest/harness/harness.go @@ -361,6 +361,7 @@ func (h *Harness) modifyDBOffset(d time.Duration) { h.setDBOffset(h.delayOffset) } + func (h *Harness) setDBOffset(d time.Duration) { h.mx.Lock() defer h.mx.Unlock() @@ -579,7 +580,7 @@ func (h *Harness) dumpDB() { if err != nil { h.t.Fatalf("failed to get abs dump path: %v", err) } - os.MkdirAll(filepath.Dir(file), 0755) + os.MkdirAll(filepath.Dir(file), 0o755) var t time.Time err = h.db.QueryRow(context.Background(), "select now()").Scan(&t) if err != nil { @@ -598,7 +599,7 @@ func (h *Harness) dumpDB() { } defer fd.Close() - err = pgdump.DumpData(context.Background(), conn.Conn(), fd) + err = pgdump.DumpData(context.Background(), conn.Conn(), fd, nil) if err != nil { h.t.Errorf("failed to dump database '%s': %v", h.dbName, err) } From aeb89ba8ef5aa327e76a3b826ec37f81982215d7 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 7 Apr 2022 12:19:10 -0500 Subject: [PATCH 090/225] transition --- .../app/admin/switchover/AdminSwitchover.tsx | 259 ++++++++++-------- 1 file changed, 143 insertions(+), 116 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 2d246615c4..28117253a6 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -6,7 +6,7 @@ import CardHeader from '@mui/material/CardHeader' import Grid from '@mui/material/Grid' import Skeleton from '@mui/material/Skeleton' import Typography from '@mui/material/Typography' -import { SvgIconProps, Tooltip } from '@mui/material' +import { Fade, SvgIconProps, Tooltip, Zoom } from '@mui/material' import PingIcon from 'mdi-material-ui/DatabaseMarker' import NoResetIcon from 'mdi-material-ui/DatabaseRefreshOutline' import ResetIcon from 'mdi-material-ui/DatabaseRefresh' @@ -31,6 +31,8 @@ import TableContainer from '@mui/material/TableContainer' import TableHead from '@mui/material/TableHead' import TableRow from '@mui/material/TableRow' import Paper from '@mui/material/Paper' +import { TransitionGroup } from 'react-transition-group' +import Spinner from '../../loading/components/Spinner' const query = gql` query { @@ -91,6 +93,10 @@ export default function AdminSwitchover(): JSX.Element { const [_statusNotices, setStatusNotices] = useState([]) const [commit, mutationStatus] = useMutation(mutation) + if (loading) { + return + } + if (error && error.message == 'not in SWO mode') { return ( @@ -112,14 +118,25 @@ export default function AdminSwitchover(): JSX.Element { if (data?.isDone) { return ( - - - - - DB switchover is complete. - - - + + + + + + + DB switchover is complete. + + + + + ) } @@ -203,114 +220,124 @@ export default function AdminSwitchover(): JSX.Element { } return ( - - {statusNotices.length > 0 && ( - - - - )} - - - - - - {getDetails()} - - } - variant='outlined' - size='large' - disabled={mutationStatus.loading} - loading={pingLoad} - loadingPosition='start' - onClick={actionHandler('ping')} - > - {pingLoad ? 'Sending ping...' : 'Ping'} - - : } - disabled={data?.isDone || mutationStatus.loading} - variant='outlined' - size='large' - loading={ - data?.isResetting || - (lastAction === 'reset' && mutationStatus.loading) - } - loadingPosition='start' - onClick={actionHandler('reset')} - > - {resetLoad ? 'Resetting...' : 'Reset'} - - : - } - disabled={!data?.isIdle || mutationStatus.loading} - variant='outlined' - size='large' - loading={ - data?.isExecuting || - (lastAction === 'execute' && mutationStatus.loading) - } - loadingPosition='start' - onClick={actionHandler('execute')} - > - {executeLoad ? 'Executing...' : 'Execute'} - - - - - - - - - - - - - Application Name - Count - - - - {data?.connections?.map((row) => ( - + + + {statusNotices.length > 0 && ( + + + + )} + + + + + + {getDetails()} + + } + variant='outlined' + size='large' + disabled={mutationStatus.loading} + loading={pingLoad} + loadingPosition='start' + onClick={actionHandler('ping')} > - - {row.name || '(no name)'} - - {row.count} - - ))} - -
-
-
+ {pingLoad ? 'Sending ping...' : 'Ping'} + + : } + disabled={data?.isDone || mutationStatus.loading} + variant='outlined' + size='large' + loading={ + data?.isResetting || + (lastAction === 'reset' && mutationStatus.loading) + } + loadingPosition='start' + onClick={actionHandler('reset')} + > + {resetLoad ? 'Resetting...' : 'Reset'} + + : + } + disabled={!data?.isIdle || mutationStatus.loading} + variant='outlined' + size='large' + loading={ + data?.isExecuting || + (lastAction === 'execute' && mutationStatus.loading) + } + loadingPosition='start' + onClick={actionHandler('execute')} + > + {executeLoad ? 'Executing...' : 'Execute'} + + + +
+
+ + + + + + + + Application Name + Count + + + + {data?.connections?.map((row) => ( + + + {row.name || '(no name)'} + + {row.count} + + ))} + +
+
+
+
+ + + {data?.nodes.length > 0 && + data.nodes + .slice() + .sort((a: SWONodeType, b: SWONodeType) => { + const aName = friendlyName(a.id) + const bName = friendlyName(b.id) + if (aName < bName) return -1 + if (aName > bName) return 1 + return 0 + }) + .map((node: SWONodeType) => ( + + ))} + - - - {data?.nodes.length > 0 && - data.nodes - .slice() - .sort((a: SWONodeType, b: SWONodeType) => { - const aName = friendlyName(a.id) - const bName = friendlyName(b.id) - if (aName < bName) return -1 - if (aName > bName) return 1 - return 0 - }) - .map((node: SWONodeType) => ( - - ))} - - + + ) } From 0bc0e949d39f74afed3ac84442b35f64067de15d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 7 Apr 2022 16:26:21 -0500 Subject: [PATCH 091/225] refactor --- swo/changelog.go | 56 ++- swo/drvconnector.go | 2 +- swo/execute.go | 582 +++++++++------------------- swo/preflightlocks.go | 4 +- swo/rowtracker.go | 141 +++++-- swo/scantables.go | 3 + swo/sessionstats.go | 96 ----- swo/swogrp/progressf.go | 12 +- swo/syncchanges.go | 93 +++++ swo/syncer.go | 9 - swo/{initialsync.go => syncfull.go} | 121 +++--- swo/syncloop.go | 78 ++++ swo/syncsequences.go | 39 ++ swo/waitforactivetx.go | 40 ++ 14 files changed, 660 insertions(+), 616 deletions(-) delete mode 100644 swo/sessionstats.go create mode 100644 swo/syncchanges.go delete mode 100644 swo/syncer.go rename swo/{initialsync.go => syncfull.go} (52%) create mode 100644 swo/syncloop.go create mode 100644 swo/syncsequences.go create mode 100644 swo/waitforactivetx.go diff --git a/swo/changelog.go b/swo/changelog.go index 0efd3218d3..8fefbbbc8a 100644 --- a/swo/changelog.go +++ b/swo/changelog.go @@ -5,7 +5,7 @@ import ( _ "embed" "fmt" - "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swogrp" ) var ( @@ -16,35 +16,51 @@ var ( changelogTrigger string ) -func EnableChangeLog(ctx context.Context, tables []Table, conn *pgx.Conn) error { - _, err := conn.Exec(ctx, changelogTable) - if err != nil { - return fmt.Errorf("create change_log table: %w", err) +func (e *Execute) exec(ctx context.Context, conn pgxQueryer, query string) { + if e.err != nil { + return } - _, err = conn.Exec(ctx, changelogTrigger) + + _, err := conn.Exec(ctx, query) if err != nil { - return fmt.Errorf("create change_log AFTER trigger: %w", err) + e.err = fmt.Errorf("%s: %w", query, err) + return + } +} + +func (e *Execute) readErr() error { + err := e.err + e.err = nil + return err +} + +// EnableChangeLog enables DB change tracking by creating a change_log table that +// records table and row IDs for each INSERT, UPDATE, or DELETE. +func (e *Execute) EnableChangeLog(ctx context.Context) { + if e.err != nil { + return } - // create triggers - for _, table := range tables { + swogrp.Progressf(ctx, "enabling change log") + e.exec(ctx, e.mainDBConn, changelogTable) + e.exec(ctx, e.mainDBConn, changelogTrigger) + + // create triggers for all tables + for _, table := range e.tables { if table.SkipSync() { continue } - - _, err = conn.Exec(ctx, fmt.Sprintf(` + query := fmt.Sprintf(` CREATE TRIGGER %s AFTER INSERT OR UPDATE OR DELETE ON %s FOR EACH ROW EXECUTE PROCEDURE fn_process_change_log() - `, table.QuotedChangeTriggerName(), table.QuotedName())) - if err != nil { - return fmt.Errorf("create trigger %s: %w", table.QuotedChangeTriggerName(), err) - } + `, table.QuotedChangeTriggerName(), table.QuotedName()) + e.exec(ctx, e.mainDBConn, query) } - _, err = conn.Exec(ctx, "update switchover_state set current_state = 'in_progress' where current_state = 'idle'") - if err != nil { - return fmt.Errorf("update switchover_state to in_progress: %w", err) - } + e.exec(ctx, e.mainDBConn, + "update switchover_state set current_state = 'in_progress' where current_state = 'idle'") - return nil + if e.err != nil { + e.err = fmt.Errorf("enable change log: %w", e.err) + } } diff --git a/swo/drvconnector.go b/swo/drvconnector.go index 5ac422e0a5..168330025f 100644 --- a/swo/drvconnector.go +++ b/swo/drvconnector.go @@ -46,7 +46,7 @@ func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { drv.id++ conn := c.(*stdlib.Conn) - err = sessionLock(ctx, conn) + err = SessionLock(ctx, conn) if errors.Is(err, swogrp.ErrDone) { drv.mx.Lock() drv.isDone = true diff --git a/swo/execute.go b/swo/execute.go index 057c8e9da9..325a8374df 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -5,342 +5,272 @@ import ( "encoding/json" "errors" "fmt" - "strconv" "time" "github.com/jackc/pgconn" "github.com/jackc/pgx/v4" "github.com/target/goalert/lock" "github.com/target/goalert/swo/swogrp" - "github.com/target/goalert/util/log" "github.com/target/goalert/util/sqlutil" ) -func WaitForRunningTx(ctx context.Context, oldConn *pgx.Conn) error { - var now time.Time - err := oldConn.QueryRow(ctx, "select now()").Scan(&now) - if err != nil { - return fmt.Errorf("get current timestamp: %w", err) - } - - for { - var n int - err = oldConn.QueryRow(ctx, "select count(*) from pg_stat_activity where state <> 'idle' and xact_start <= $1", now).Scan(&n) - if err != nil { - return fmt.Errorf("get running tx count: %w", err) - } - if n == 0 { - break - } +type Execute struct { + err error + tables []Table - swogrp.Progressf(ctx, "waiting for %d transaction(s) to finish", n) - time.Sleep(time.Second) - } + seqNames []string - return nil -} + mainDBConn, nextDBConn *pgx.Conn -func (m *Manager) DoExecute(ctx context.Context) error { - /* - - initial sync - - loop until few changes - - send proposal - - loop until proposal is valid - - execute proposal + grp *swogrp.Group - */ + rowIDs map[string]map[string]struct{} - return m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { - swogrp.Progressf(ctx, "scanning tables...") - tables, err := ScanTables(ctx, oldConn) - if err != nil { - return fmt.Errorf("scan tables: %w", err) - } - - swogrp.Progressf(ctx, "enabling change log") - err = EnableChangeLog(ctx, tables, oldConn) - if err != nil { - return fmt.Errorf("enable change log: %w", err) - } - - swogrp.Progressf(ctx, "disabling triggers") - err = DisableTriggers(ctx, tables, newConn) - if err != nil { - return fmt.Errorf("disable triggers: %w", err) - } - - swogrp.Progressf(ctx, "waiting for in-flight transactions to finish") - err = WaitForRunningTx(ctx, oldConn) - if err != nil { - return fmt.Errorf("wait for running tx: %w", err) - } + stagedInserts []stagedID + stagedDeletes []stagedID +} - swogrp.Progressf(ctx, "performing initial sync") - err = m.InitialSync(ctx, tables, oldConn, newConn) - if err != nil { - return fmt.Errorf("initial sync: %w", err) - } +func NewExecute(ctx context.Context, mainDBConn, nextDBConn *pgx.Conn, grp *swogrp.Group) (*Execute, error) { + tables, err := ScanTables(ctx, mainDBConn) + if err != nil { + return nil, fmt.Errorf("scan tables: %w", err) + } - swogrp.Progressf(ctx, "recording new DB state") - rt, err := newRowTracker(ctx, tables, newConn) - if err != nil { - return fmt.Errorf("read row IDs: %w", err) + var seqNames []string + var name string + _, err = mainDBConn.QueryFunc(ctx, ` + select sequence_name + from information_schema.sequences + where + sequence_catalog = current_database() and + sequence_schema = 'public' + `, nil, []interface{}{&name}, func(r pgx.QueryFuncRow) error { + if name == "change_log_id_seq" { + // skip, as it does not exist in next db + return nil } + seqNames = append(seqNames, name) + return nil + }) + if err != nil { + return nil, fmt.Errorf("scan sequences: %w", err) + } - err = SyncChanges(ctx, rt, oldConn, newConn) - if err != nil { - return fmt.Errorf("sync changes: %w", err) - } + return &Execute{ + tables: tables, + seqNames: seqNames, + mainDBConn: mainDBConn, + nextDBConn: nextDBConn, + grp: grp, + }, nil +} - swogrp.Progressf(ctx, "pausing") - err = m.grp.Pause(ctx) +func (m *Manager) DoExecute(ctx context.Context) error { + return m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { + exec, err := NewExecute(ctx, oldConn, newConn, m.grp) if err != nil { - return fmt.Errorf("pause: %w", err) + return err } - t := time.NewTicker(10 * time.Millisecond) - defer t.Stop() - for range t.C { - s := m.grp.Status() - var pausing, waiting int - for _, node := range s.Nodes { - for _, task := range node.Tasks { - if task.Name == "pause" { - pausing++ - } - if task.Name == "resume-after" { - waiting++ - } - } - } - - if pausing == 0 && waiting == len(s.Nodes) { - break - } - if waiting == 0 { - return fmt.Errorf("pause failed") - } - } - - swogrp.Progressf(ctx, "begin final sync") - err = FinalSync(ctx, rt, oldConn, newConn) - if err != nil { - log.Log(ctx, err) - return fmt.Errorf("final sync: %w", err) - } + exec.EnableChangeLog(ctx) + exec.DisableNextDBTriggers(ctx) + exec.WaitForActiveTx(ctx) + exec.SyncFull(ctx) + exec.ReadRowIDs(ctx) + exec.SyncLoop(ctx) + exec.PauseApps(ctx) + exec.FinalSync(ctx) return nil }) } -func SyncChanges(ctx context.Context, rt *rowTracker, oldConn, newConn *pgx.Conn) error { - for ctx.Err() == nil { - // sync in a loop until DB is up-to-date - s := time.Now() - n, pend, err := LoopSync(ctx, rt, oldConn, newConn) - dur := time.Since(s) +// PauseApps puts all nodes into a "paused" state: +// - Engine no longer cycles +// - Idle DB connections are disabled +// - Event listeners (postgres pub/sub) are disabled +func (e *Execute) PauseApps(ctx context.Context) { + if e.err != nil { + return + } - if pend > 0 { - swogrp.Progressf(ctx, "sync: %d rows pending", pend) - } - if err != nil { - log.Log(ctx, err) - rt.Rollback() - if n > 0 { - return fmt.Errorf("sync failure (commit without record): %w", err) + e.Progressf(ctx, "pausing") + err := e.grp.Pause(ctx) + if err != nil { + e.err = fmt.Errorf("pause: %w", err) + return + } + + t := time.NewTicker(10 * time.Millisecond) + defer t.Stop() + for range t.C { + s := e.grp.Status() + var pausing, waiting int + for _, node := range s.Nodes { + for _, task := range node.Tasks { + if task.Name == "pause" { + pausing++ + } + if task.Name == "resume-after" { + waiting++ + } } - continue } - rt.Commit() - if n != 0 { - swogrp.Progressf(ctx, "sync: %d rows replicated in %s", n, dur.Truncate(time.Millisecond)) - continue + if pausing == 0 && waiting == len(s.Nodes) { + break + } + if waiting == 0 { + e.err = fmt.Errorf("pause: timed out waiting for nodes to pause") + return } - - return nil } - - return ctx.Err() } // DisableTriggers will disable all triggers in the new DB. -func DisableTriggers(ctx context.Context, tables []Table, conn *pgx.Conn) error { - var send pgx.Batch +func (e *Execute) DisableNextDBTriggers(ctx context.Context) { + if e.err != nil { + return + } + + swogrp.Progressf(ctx, "disabling triggers") - for _, table := range tables { + var send pgx.Batch + for _, table := range e.tables { send.Queue(fmt.Sprintf("ALTER TABLE %s DISABLE TRIGGER USER", table.QuotedName())) } - return conn.SendBatch(ctx, &send).Close() + e.err = e.nextDBConn.SendBatch(ctx, &send).Close() + if e.err != nil { + e.err = fmt.Errorf("disable triggers on next DB: %w", e.err) + } } // EnableTriggers will re-enable triggers in the new DB. -func EnableTriggers(ctx context.Context, tables []Table, conn *pgx.Conn) error { +func (e *Execute) enableTriggers(ctx context.Context) error { var send pgx.Batch - for _, table := range tables { + for _, table := range e.tables { send.Queue(fmt.Sprintf("ALTER TABLE %s ENABLE TRIGGER USER", table.QuotedName())) } - return conn.SendBatch(ctx, &send).Close() + e.err = e.nextDBConn.SendBatch(ctx, &send).Close() + if e.err != nil { + return fmt.Errorf("enable triggers on next DB: %w", e.err) + } + return nil } -func FinalSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) error { - var seqNames []string - var seqRead pgx.Batch - var name string - _, err := srcConn.QueryFunc(ctx, ` - select sequence_name - from information_schema.sequences - where - sequence_catalog = current_database() and - sequence_schema = 'public' - `, nil, []interface{}{&name}, func(r pgx.QueryFuncRow) error { - if name == "change_log_id_seq" { - // skip, as it does not exist in next db - return nil - } - seqRead.Queue("select last_value, is_called from " + sqlutil.QuoteID(name)) - seqNames = append(seqNames, name) - return nil - }) +// stopTheWorld grabs the exclusive advisory lock and then ensures the current state +// is set to in_progress. +func (e *Execute) stopTheWorld(ctx context.Context, srcTx pgx.Tx) error { + e.Progressf(ctx, "stop-the-world") + _, err := srcTx.Exec(ctx, fmt.Sprintf("select pg_advisory_xact_lock(%d)", lock.GlobalSwitchOver)) if err != nil { - return fmt.Errorf("get sequence names: %w", err) + return err } - if _, err = srcConn.Exec(ctx, "set idle_in_transaction_session_timeout = 1000"); err != nil { - return fmt.Errorf("set idle_in_transaction_session_timeout: %w", err) + var stat string + err = srcTx.QueryRow(ctx, "select current_state from switchover_state nowait").Scan(&stat) + if err != nil { + return err } - if _, err = srcConn.Exec(ctx, "set lock_timeout = 3000"); err != nil { - return fmt.Errorf("set idle_in_transaction_session_timeout: %w", err) + switch stat { + case "in_progress": + return nil + case "use_next_db": + return swogrp.ErrDone + case "idle": + return errors.New("not in progress") + default: + if e.err == nil { + return errors.New("unknown state: " + stat) + } + return e.err } +} - // catch up - if err = SyncChanges(ctx, rt, srcConn, dstConn); err != nil { - return fmt.Errorf("sync changes: %w", err) +// FinalSync will attempt to lock and finalize the switchover. +func (e *Execute) FinalSync(ctx context.Context) { + if e.err != nil { + return } - srcTx, err := srcConn.BeginTx(ctx, pgx.TxOptions{}) - if err != nil { - return fmt.Errorf("begin src: %w", err) + e.Progressf(ctx, "finalizing") + + // set timeouts before waiting on locks + e.exec(ctx, e.mainDBConn, "set idle_in_transaction_session_timeout = 1000") + e.exec(ctx, e.mainDBConn, "set lock_timeout = 3000") + e.SyncLoop(ctx) + if e.err != nil { + return } - defer srcTx.Rollback(ctx) - dstTx, err := dstConn.BeginTx(ctx, pgx.TxOptions{}) + srcTx, dstTx, err := e.syncTx(ctx, false) if err != nil { - return fmt.Errorf("begin dst: %w", err) + e.err = fmt.Errorf("final sync: %w", err) + return } + defer srcTx.Rollback(ctx) defer dstTx.Rollback(ctx) - swogrp.Progressf(ctx, "stop-the-world") - _, err = srcTx.Exec(ctx, fmt.Sprintf("select pg_advisory_xact_lock(%d)", lock.GlobalSwitchOver)) - if err != nil { - return fmt.Errorf("lock global switchover: %w", err) + if err = e.stopTheWorld(ctx, srcTx); err != nil { + e.err = fmt.Errorf("final sync: stop-the-world: %w", err) + return } - var stat string - err = srcConn.QueryRow(ctx, `select current_state from switchover_state nowait`).Scan(&stat) + go e.Progressf(ctx, "last sync") + _, err = e.syncChanges(ctx, srcTx, dstTx) if err != nil { - return fmt.Errorf("get switchover state: %w", err) - } - if stat == "use_next_db" { - return swogrp.ErrDone - } - if stat == "idle" { - return errors.New("not running") + e.err = fmt.Errorf("sync change log: %w", err) + return } - go swogrp.Progressf(ctx, "last sync") - _, err = syncChangeLog(ctx, rt, srcTx, dstTx) - if err != nil { - return fmt.Errorf("sync change log: %w", err) - } - - res := srcTx.SendBatch(ctx, &seqRead) - var setSeq pgx.Batch - for _, name := range seqNames { - var last int64 - var called bool - err = res.QueryRow().Scan(&last, &called) - if err != nil { - return fmt.Errorf("get sequence %s: %w", name, err) - } - setSeq.Queue("select pg_catalog.setval($1, $2, $3)", name, last, called) - } - if err = res.Close(); err != nil { - return fmt.Errorf("close seq batch: %w", err) - } - - err = dstTx.SendBatch(ctx, &setSeq).Close() - if err != nil { - return fmt.Errorf("set sequences: %w", err) + if err = e.syncSequences(ctx, srcTx, dstTx); err != nil { + e.err = fmt.Errorf("sync sequences: %w", err) + return } if err = dstTx.Commit(ctx); err != nil { - return fmt.Errorf("commit dst: %w", err) + e.err = fmt.Errorf("commit dst: %w", err) + return } - if err = EnableTriggers(ctx, rt.tables, dstConn); err != nil { - return fmt.Errorf("enable triggers: %w", err) + if err = e.enableTriggers(ctx); err != nil { + return } _, err = srcTx.Exec(ctx, "update switchover_state set current_state = 'use_next_db' where current_state = 'in_progress'") if err != nil { - return fmt.Errorf("update switchover state: %w", err) + e.err = fmt.Errorf("update switchover state: %w", err) + return } err = srcTx.Commit(ctx) if err != nil { - return fmt.Errorf("commit src: %w", err) + e.err = fmt.Errorf("commit src: %w", err) + return } - swogrp.Progressf(ctx, "done") - return nil + e.Progressf(ctx, "done") } -func LoopSync(ctx context.Context, rt *rowTracker, srcConn, dstConn *pgx.Conn) (ok, pend int, err error) { - srcTx, dstTx, err := syncTx(ctx, srcConn, dstConn) - if err != nil { - return 0, 0, fmt.Errorf("sync tx: %w", err) - } - defer srcTx.Rollback(ctx) - defer dstTx.Rollback(ctx) - - ids, err := syncChangeLog(ctx, rt, srcTx, dstTx) - if err != nil { - return 0, len(ids), fmt.Errorf("sync change log: %w", err) - } - - err = srcTx.Commit(ctx) - if err != nil { - return len(ids), 0, fmt.Errorf("commit src: %w", err) - } - - err = dstTx.Commit(ctx) - if err != nil { - return 0, len(ids), fmt.Errorf("commit dst: %w", err) - } - - _, err = srcConn.Exec(ctx, "DELETE FROM change_log WHERE id = any($1)", sqlutil.IntArray(ids)) - if err != nil { - return len(ids), 0, fmt.Errorf("update change log: %w", err) +func (e *Execute) syncTx(ctx context.Context, readOnly bool) (src, dst pgx.Tx, err error) { + var srcOpts pgx.TxOptions + if readOnly { + srcOpts = pgx.TxOptions{ + AccessMode: pgx.ReadOnly, + IsoLevel: pgx.Serializable, + DeferrableMode: pgx.Deferrable, + } } - return len(ids), 0, nil -} - -func syncTx(ctx context.Context, srcConn, dstConn *pgx.Conn) (src, dst pgx.Tx, err error) { - srcTx, err := srcConn.BeginTx(ctx, pgx.TxOptions{ - AccessMode: pgx.ReadOnly, - IsoLevel: pgx.Serializable, - DeferrableMode: pgx.Deferrable, - }) + srcTx, err := e.mainDBConn.BeginTx(ctx, srcOpts) if err != nil { return nil, nil, fmt.Errorf("begin src: %w", err) } - dstTx, err := dstConn.BeginTx(ctx, pgx.TxOptions{}) + dstTx, err := e.nextDBConn.BeginTx(ctx, pgx.TxOptions{}) if err != nil { srcTx.Rollback(ctx) return nil, nil, fmt.Errorf("begin dst: %w", err) @@ -349,115 +279,6 @@ func syncTx(ctx context.Context, srcConn, dstConn *pgx.Conn) (src, dst pgx.Tx, e return srcTx, dstTx, nil } -func syncChangeLog(ctx context.Context, rt *rowTracker, srcTx, dstTx pgxQueryer) ([]int, error) { - type rowID struct { - table string - id string - } - - var r rowID - var changeIDs []int - var changeID int - changes := make(map[rowID]struct{}) - rowIDs := make(map[string][]string) - _, err := srcTx.QueryFunc(ctx, "select id, table_name, row_id from change_log", nil, []interface{}{&changeID, &r.table, &r.id}, func(pgx.QueryFuncRow) error { - if _, ok := changes[r]; ok { - return nil - } - changes[r] = struct{}{} - rowIDs[r.table] = append(rowIDs[r.table], r.id) - changeIDs = append(changeIDs, changeID) - - return nil - }) - if err != nil { - return nil, fmt.Errorf("fetch changes: %w", err) - } - if len(changes) == 0 { - return nil, nil - } - - // defer all constraints - _, err = dstTx.Exec(ctx, "SET CONSTRAINTS ALL DEFERRED") - if err != nil { - return changeIDs, fmt.Errorf("defer constraints: %w", err) - } - - type pendingDelete struct { - query string - idArg interface{} - count int - } - var deletes []pendingDelete - - // go in insert order for fetching updates/inserts, note deleted rows - for _, table := range rt.tables { - if table.SkipSync() { - continue - } - - if len(rowIDs[table.Name]) == 0 { - continue - } - - sd, err := rt.fetch(ctx, table, srcTx, rowIDs[table.Name]) - if err != nil { - return changeIDs, fmt.Errorf("fetch changed rows: %w", err) - } - if len(sd.toDelete) > 0 { - deletes = append(deletes, pendingDelete{table.DeleteRowsQuery(), table.IDs(sd.toDelete), len(sd.toDelete)}) - } - - err = rt.apply(ctx, dstTx, table.UpdateRowsQuery(), sd.toUpdate) - if err != nil { - return changeIDs, fmt.Errorf("apply updates: %w", err) - } - - err = rt.apply(ctx, dstTx, table.InsertRowsQuery(), sd.toInsert) - if err != nil { - return changeIDs, fmt.Errorf("apply inserts: %w", err) - } - } - - // handle pendingDeletes in reverse table order - for i := len(deletes) - 1; i >= 0; i-- { - t, err := dstTx.Exec(ctx, deletes[i].query, deletes[i].idArg) - if err != nil { - return changeIDs, fmt.Errorf("delete rows: %w", err) - } - if t.RowsAffected() != int64(deletes[i].count) { - return changeIDs, fmt.Errorf("delete rows: got %d != expected %d", t.RowsAffected(), deletes[i].count) - } - } - - return changeIDs, nil -} - -func (rt *rowTracker) apply(ctx context.Context, dstTx pgxQueryer, q string, rows []syncRow) error { - if len(rows) == 0 { - return nil - } - - var rowsData []json.RawMessage - for _, row := range rows { - rowsData = append(rowsData, row.data) - } - - data, err := json.Marshal(rowsData) - if err != nil { - return fmt.Errorf("marshal rows: %w", err) - } - t, err := dstTx.Exec(ctx, q, data) - if err != nil { - return fmt.Errorf("exec: %w", err) - } - if t.RowsAffected() != int64(len(rows)) { - return fmt.Errorf("mismatch: got %d rows affected; expected %d", t.RowsAffected(), len(rows)) - } - - return nil -} - func (t Table) IDs(ids []string) interface{} { switch t.IDCol.Type { case "integer", "bigint": @@ -481,61 +302,8 @@ type syncRow struct { data json.RawMessage } type pgxQueryer interface { + QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row Query(context.Context, string, ...interface{}) (pgx.Rows, error) Exec(context.Context, string, ...interface{}) (pgconn.CommandTag, error) QueryFunc(context.Context, string, []interface{}, []interface{}, func(pgx.QueryFuncRow) error) (pgconn.CommandTag, error) } - -func (rt *rowTracker) fetch(ctx context.Context, table Table, srcTx pgxQueryer, ids []string) (*syncData, error) { - rows, err := srcTx.Query(ctx, table.SelectRowsQuery(), table.IDs(ids)) - if errors.Is(err, pgx.ErrNoRows) { - return &syncData{toDelete: ids}, nil - } - defer rows.Close() - if err != nil { - return nil, fmt.Errorf("fetch rows: %w", err) - } - - sd := syncData{t: table} - existsInOld := make(map[string]struct{}) - for rows.Next() { - var id string - var data []byte - err = rows.Scan(&id, &data) - if err != nil { - return nil, fmt.Errorf("scan row: %w", err) - } - existsInOld[id] = struct{}{} - if rt.Exists(table.Name, id) { - sd.toUpdate = append(sd.toUpdate, syncRow{table.Name, id, data}) - } else { - rt.Insert(table.Name, id) - sd.toInsert = append(sd.toInsert, syncRow{table.Name, id, data}) - } - } - - for _, id := range ids { - if _, ok := existsInOld[id]; ok { - continue - } - if !rt.Exists(table.Name, id) { - continue - } - rt.Delete(table.Name, id) - sd.toDelete = append(sd.toDelete, id) - } - - return &sd, nil -} - -func intIDs(ids []string) []int { - var ints []int - for _, id := range ids { - i, err := strconv.Atoi(id) - if err != nil { - panic(err) - } - ints = append(ints, i) - } - return ints -} diff --git a/swo/preflightlocks.go b/swo/preflightlocks.go index 300e61f4dc..82dcd6e792 100644 --- a/swo/preflightlocks.go +++ b/swo/preflightlocks.go @@ -50,8 +50,8 @@ func UnlockConn(ctx context.Context, conn *pgx.Conn) { } } -// sessionLock will get a shared advisory lock for the connection. -func sessionLock(ctx context.Context, c *stdlib.Conn) error { +// SessionLock will get a shared advisory lock for the connection. +func SessionLock(ctx context.Context, c *stdlib.Conn) error { // Using literal here so we can avoid a prepared statement round trip. // // This will run for every new connection in SWO mode and for every diff --git a/swo/rowtracker.go b/swo/rowtracker.go index 56771d311e..86812285db 100644 --- a/swo/rowtracker.go +++ b/swo/rowtracker.go @@ -2,7 +2,10 @@ package swo import ( "context" + "encoding/json" + "errors" "fmt" + "strconv" "github.com/jackc/pgx/v4" ) @@ -19,60 +22,142 @@ type stagedID struct { id string } -func newRowTracker(ctx context.Context, tables []Table, newConn *pgx.Conn) (*rowTracker, error) { - rt := &rowTracker{ - tables: tables, - rowIDs: make(map[string]map[string]struct{}), +// ReadRowIDs reads the row IDs for all tables in the next-db to distinguish +// between those that need an INSERT vs UPDATE. +func (e *Execute) ReadRowIDs(ctx context.Context) { + if e.err != nil { + return } + e.Progressf(ctx, "recording next DB row IDs") + e.rowIDs = make(map[string]map[string]struct{}) - for _, table := range tables { + for _, table := range e.tables { if table.SkipSync() { continue } - rt.rowIDs[table.Name] = make(map[string]struct{}) - rows, err := newConn.Query(ctx, fmt.Sprintf("SELECT id::text FROM %s", table.QuotedName())) + e.rowIDs[table.Name] = make(map[string]struct{}) + rows, err := e.nextDBConn.Query(ctx, fmt.Sprintf("SELECT id::text FROM %s", table.QuotedName())) if err != nil { - return nil, err + e.err = fmt.Errorf("read row ids for %s: %w", table.Name, err) + return } for rows.Next() { var id string if err := rows.Scan(&id); err != nil { rows.Close() - return nil, err + e.err = fmt.Errorf("read row ids for %s: scan: %w", table.Name, err) + return } - rt._Insert(table.Name, id) + e._Insert(table.Name, id) } } +} - return rt, nil +func (e *Execute) Insert(table, id string) { + e.stagedInserts = append(e.stagedInserts, stagedID{table, id}) } -func (rt *rowTracker) Insert(table, id string) { - rt.stagedInserts = append(rt.stagedInserts, stagedID{table, id}) +func (e *Execute) Delete(table, id string) { + e.stagedDeletes = append(e.stagedDeletes, stagedID{table, id}) +} +func (e *Execute) _Insert(table, id string) { e.rowIDs[table][id] = struct{}{} } +func (e *Execute) _Delete(table, id string) { delete(e.rowIDs[table], id) } +func (e *Execute) Rollback() { + e.stagedDeletes = nil + e.stagedInserts = nil } -func (rt *rowTracker) Delete(table, id string) { - rt.stagedDeletes = append(rt.stagedDeletes, stagedID{table, id}) +func (e *Execute) Commit() { + for _, staged := range e.stagedInserts { + e._Insert(staged.table, staged.id) + } + e.stagedInserts = nil + + for _, staged := range e.stagedDeletes { + e._Delete(staged.table, staged.id) + } + e.stagedDeletes = nil } -func (rt *rowTracker) _Insert(table, id string) { rt.rowIDs[table][id] = struct{}{} } -func (rt *rowTracker) _Delete(table, id string) { delete(rt.rowIDs[table], id) } -func (rt *rowTracker) Rollback() { - rt.stagedDeletes = nil - rt.stagedInserts = nil + +func (e *Execute) Exists(table, id string) bool { _, ok := e.rowIDs[table][id]; return ok } + +func (e *Execute) applyChanges(ctx context.Context, dstTx pgxQueryer, q string, rows []syncRow) error { + if len(rows) == 0 { + return nil + } + + var rowsData []json.RawMessage + for _, row := range rows { + rowsData = append(rowsData, row.data) + } + + data, err := json.Marshal(rowsData) + if err != nil { + return fmt.Errorf("marshal rows: %w", err) + } + t, err := dstTx.Exec(ctx, q, data) + if err != nil { + return fmt.Errorf("exec: %w", err) + } + if t.RowsAffected() != int64(len(rows)) { + return fmt.Errorf("mismatch: got %d rows affected; expected %d", t.RowsAffected(), len(rows)) + } + + return nil } -func (rt *rowTracker) Commit() { - for _, staged := range rt.stagedInserts { - rt._Insert(staged.table, staged.id) +func (e *Execute) fetchChanges(ctx context.Context, table Table, srcTx pgxQueryer, ids []string) (*syncData, error) { + rows, err := srcTx.Query(ctx, table.SelectRowsQuery(), table.IDs(ids)) + if errors.Is(err, pgx.ErrNoRows) { + return &syncData{toDelete: ids}, nil + } + defer rows.Close() + if err != nil { + return nil, fmt.Errorf("fetch rows: %w", err) } - rt.stagedInserts = nil - for _, staged := range rt.stagedDeletes { - rt._Delete(staged.table, staged.id) + sd := syncData{t: table} + existsInOld := make(map[string]struct{}) + for rows.Next() { + var id string + var data []byte + err = rows.Scan(&id, &data) + if err != nil { + return nil, fmt.Errorf("scan row: %w", err) + } + existsInOld[id] = struct{}{} + if e.Exists(table.Name, id) { + sd.toUpdate = append(sd.toUpdate, syncRow{table.Name, id, data}) + } else { + e.Insert(table.Name, id) + sd.toInsert = append(sd.toInsert, syncRow{table.Name, id, data}) + } } - rt.stagedDeletes = nil + + for _, id := range ids { + if _, ok := existsInOld[id]; ok { + continue + } + if !e.Exists(table.Name, id) { + continue + } + e.Delete(table.Name, id) + sd.toDelete = append(sd.toDelete, id) + } + + return &sd, nil } -func (rt *rowTracker) Exists(table, id string) bool { _, ok := rt.rowIDs[table][id]; return ok } +func intIDs(ids []string) []int { + var ints []int + for _, id := range ids { + i, err := strconv.Atoi(id) + if err != nil { + panic(err) + } + ints = append(ints, i) + } + return ints +} diff --git a/swo/scantables.go b/swo/scantables.go index 685ea3a6b8..41407f0275 100644 --- a/swo/scantables.go +++ b/swo/scantables.go @@ -7,6 +7,7 @@ import ( "sort" "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swogrp" ) type Column struct { @@ -25,6 +26,8 @@ var ( // ScanTables scans the database for tables, their columns, and dependencies. func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { + swogrp.Progressf(ctx, "scanning tables...") + var cRow struct { TableName string Column diff --git a/swo/sessionstats.go b/swo/sessionstats.go deleted file mode 100644 index c0b4ecceb0..0000000000 --- a/swo/sessionstats.go +++ /dev/null @@ -1,96 +0,0 @@ -package swo - -import "time" - -type StatsManager struct { - sessCh chan sessionRecord - statsCh chan Stats -} - -type Stats struct { - Last1Min TimeframeStats - Last5Min TimeframeStats - Last15Min TimeframeStats -} - -type TimeframeStats struct { - Count int - AvgTime time.Duration - MaxTime time.Duration -} - -type sessionRecord struct { - Dur time.Duration - End time.Time -} - -func NewStatsManager() *StatsManager { - sm := &StatsManager{ - sessCh: make(chan sessionRecord, 100), - statsCh: make(chan Stats), - } - go sm.loop() - - return sm -} - -func (sm *StatsManager) Start() (stop func()) { - start := time.Now() - return func() { - end := time.Now() - sm.sessCh <- sessionRecord{end.Sub(start), end} - } -} - -func (sm *StatsManager) Stats() Stats { return <-sm.statsCh } - -func (sm *StatsManager) loop() { - var sessRecs []sessionRecord - var stats Stats - t := time.NewTicker(time.Second) - defer t.Stop() - - for { - select { - case <-t.C: - sessRecs, stats = updateStats(sessRecs) - case sess := <-sm.sessCh: - sessRecs = append(sessRecs, sess) - case sm.statsCh <- stats: - } - } -} - -func statsForTime(sessRecs []sessionRecord, t time.Time) (s TimeframeStats) { - for _, sess := range sessRecs { - if !sess.End.After(t) { - continue - } - s.Count++ - s.AvgTime += sess.Dur - if sess.Dur > s.MaxTime { - s.MaxTime = sess.Dur - } - } - if s.Count == 0 { - return s - } - - s.AvgTime = s.AvgTime / time.Duration(s.Count) - - return s -} - -func updateStats(sessRecs []sessionRecord) ([]sessionRecord, Stats) { - n := time.Now() - - var s Stats - - s.Last15Min = statsForTime(sessRecs, n.Add(-15*time.Minute)) - sessRecs = sessRecs[len(sessRecs)-s.Last15Min.Count:] - - s.Last5Min = statsForTime(sessRecs, n.Add(-5*time.Minute)) - s.Last1Min = statsForTime(sessRecs[len(sessRecs)-s.Last5Min.Count:], n.Add(-time.Minute)) - - return sessRecs, s -} diff --git a/swo/swogrp/progressf.go b/swo/swogrp/progressf.go index 1a6e8f0d86..6f8f126db3 100644 --- a/swo/swogrp/progressf.go +++ b/swo/swogrp/progressf.go @@ -21,11 +21,21 @@ func withTask(ctx context.Context, grp *Group, info TaskInfo) context.Context { } func task(ctx context.Context) *taskCtx { - return ctx.Value(ctxKeyTask).(*taskCtx) + v := ctx.Value(ctxKeyTask) + if v == nil { + return nil + } + + return v.(*taskCtx) } func Progressf(ctx context.Context, format string, args ...interface{}) { t := task(ctx) + if t == nil { + // not a running task + return + } + t.TaskInfo.Status = fmt.Sprintf(format, args...) err := t.sendMessage(ctx, "task-progress", t.TaskInfo, false) if err != nil { diff --git a/swo/syncchanges.go b/swo/syncchanges.go new file mode 100644 index 0000000000..757d52d49e --- /dev/null +++ b/swo/syncchanges.go @@ -0,0 +1,93 @@ +package swo + +import ( + "context" + "fmt" + + "github.com/jackc/pgx/v4" +) + +// syncChanges will apply all changes recorded in the change_log table to the next DB. +func (e *Execute) syncChanges(ctx context.Context, srcTx, dstTx pgxQueryer) ([]int, error) { + type rowID struct { + table string + id string + } + + var r rowID + var changeIDs []int + var changeID int + changes := make(map[rowID]struct{}) + rowIDs := make(map[string][]string) + _, err := srcTx.QueryFunc(ctx, "select id, table_name, row_id from change_log", nil, []interface{}{&changeID, &r.table, &r.id}, func(pgx.QueryFuncRow) error { + if _, ok := changes[r]; ok { + return nil + } + changes[r] = struct{}{} + rowIDs[r.table] = append(rowIDs[r.table], r.id) + changeIDs = append(changeIDs, changeID) + + return nil + }) + if err != nil { + return nil, fmt.Errorf("fetch changes: %w", err) + } + if len(changes) == 0 { + return nil, nil + } + + // defer all constraints + _, err = dstTx.Exec(ctx, "SET CONSTRAINTS ALL DEFERRED") + if err != nil { + return changeIDs, fmt.Errorf("defer constraints: %w", err) + } + + type pendingDelete struct { + query string + idArg interface{} + count int + } + var deletes []pendingDelete + + // go in insert order for fetching updates/inserts, note deleted rows + for _, table := range e.tables { + if table.SkipSync() { + continue + } + + if len(rowIDs[table.Name]) == 0 { + continue + } + + sd, err := e.fetchChanges(ctx, table, srcTx, rowIDs[table.Name]) + if err != nil { + return changeIDs, fmt.Errorf("fetch changed rows: %w", err) + } + if len(sd.toDelete) > 0 { + deletes = append(deletes, pendingDelete{table.DeleteRowsQuery(), table.IDs(sd.toDelete), len(sd.toDelete)}) + } + + err = e.applyChanges(ctx, dstTx, table.UpdateRowsQuery(), sd.toUpdate) + if err != nil { + return changeIDs, fmt.Errorf("apply updates: %w", err) + } + + err = e.applyChanges(ctx, dstTx, table.InsertRowsQuery(), sd.toInsert) + if err != nil { + return changeIDs, fmt.Errorf("apply inserts: %w", err) + } + } + + // handle pendingDeletes in reverse table order + for i := len(deletes) - 1; i >= 0; i-- { + t, err := dstTx.Exec(ctx, deletes[i].query, deletes[i].idArg) + if err != nil { + return changeIDs, fmt.Errorf("delete rows: %w", err) + } + if t.RowsAffected() != int64(deletes[i].count) { + return changeIDs, fmt.Errorf("delete rows: got %d != expected %d", t.RowsAffected(), deletes[i].count) + } + } + + return changeIDs, nil +} diff --git a/swo/syncer.go b/swo/syncer.go deleted file mode 100644 index 6e2ba4cc9b..0000000000 --- a/swo/syncer.go +++ /dev/null @@ -1,9 +0,0 @@ -package swo - -import "context" - -type Syncer interface { - Reset(context.Context) error - Setup(context.Context) error - Sync(ctx context.Context, progress func(float64)) error -} diff --git a/swo/initialsync.go b/swo/syncfull.go similarity index 52% rename from swo/initialsync.go rename to swo/syncfull.go index 5f700cd8e3..3652e1fefb 100644 --- a/swo/initialsync.go +++ b/swo/syncfull.go @@ -12,91 +12,108 @@ import ( "github.com/target/goalert/swo/swogrp" ) -func (m *Manager) InitialSync(ctx context.Context, tables []Table, oldConn, newConn *pgx.Conn) error { - srcTx, err := oldConn.BeginTx(ctx, pgx.TxOptions{ - AccessMode: pgx.ReadOnly, - IsoLevel: pgx.Serializable, - DeferrableMode: pgx.Deferrable, - }) - if err != nil { - return fmt.Errorf("begin src tx: %w", err) +type lineCount struct { + n int + mx sync.Mutex +} + +func (lc *lineCount) Write(p []byte) (n int, err error) { + lc.mx.Lock() + lc.n += bytes.Count(p, []byte("\n")) + lc.mx.Unlock() + return len(p), nil +} + +func (lc *lineCount) Lines() int { + lc.mx.Lock() + defer lc.mx.Unlock() + return lc.n +} + +func (e *Execute) Progressf(ctx context.Context, format string, args ...interface{}) { + if e.err != nil { + return + } + + swogrp.Progressf(ctx, format, args...) +} + +func (e *Execute) do(ctx context.Context, desc string, fn func(context.Context) error) { + if e.err != nil { + return } - defer srcTx.Rollback(ctx) - dstTx, err := newConn.BeginTx(ctx, pgx.TxOptions{}) + e.err = fn(ctx) + if e.err != nil { + e.err = fmt.Errorf("%s: %w", desc, e.err) + } +} + +// SyncFull performs a full initial sync of the database by copying contents of each table directly to the +// destination database. +func (e *Execute) SyncFull(ctx context.Context) { + if e.err != nil { + return + } + e.Progressf(ctx, "performing initial sync") + + srcTx, dstTx, err := e.syncTx(ctx, true) if err != nil { - return fmt.Errorf("begin dst tx: %w", err) + e.err = fmt.Errorf("initial sync: begin: %w", err) + return } + defer srcTx.Rollback(ctx) defer dstTx.Rollback(ctx) // defer all constraints - _, err = dstTx.Exec(ctx, "SET CONSTRAINTS ALL DEFERRED") - if err != nil { - return fmt.Errorf("defer constraints: %w", err) + if _, err = dstTx.Exec(ctx, "SET CONSTRAINTS ALL DEFERRED"); err != nil { + e.err = fmt.Errorf("initial sync: defer constraints: %w", err) + return } - for _, table := range tables { + for _, table := range e.tables { if table.SkipSync() { continue } - err = m.SyncTableInit(ctx, table, srcTx, dstTx) - if err != nil { - return fmt.Errorf("sync table %s: %w", table.Name, err) + if err = e.syncTableFull(ctx, table, srcTx, dstTx); err != nil { + e.err = fmt.Errorf("initial sync: copy %s: %w", table.Name, err) + return } } - swogrp.Progressf(ctx, "commit initial sync") + e.Progressf(ctx, "commit initial sync") // Important to validate src commit, even though it's read-only. // // A failure here indicates the isolation level has been violated // and we will need to try again. - err = srcTx.Commit(ctx) - if err != nil { - return fmt.Errorf("commit src tx: %w", err) + if err := srcTx.Commit(ctx); err != nil { + e.err = fmt.Errorf("initial sync: src commit: %w", err) + return } - - err = dstTx.Commit(ctx) - if err != nil { - return fmt.Errorf("commit dst tx: %w", err) + if err := dstTx.Commit(ctx); err != nil { + e.err = fmt.Errorf("initial sync: dst commit: %w", err) + return } // vacuum analyze new DB - swogrp.Progressf(ctx, "vacuum analyze") - _, err = newConn.Exec(ctx, "vacuum analyze") - if err != nil { - return fmt.Errorf("vacuum analyze: %w", err) + e.Progressf(ctx, "vacuum analyze") + if _, err := e.nextDBConn.Exec(ctx, "VACUUM ANALYZE"); err != nil { + e.err = fmt.Errorf("initial sync: vacuum analyze: %w", err) + return } - - return nil -} - -type lineCount struct { - n int - mx sync.Mutex -} - -func (lc *lineCount) Write(p []byte) (n int, err error) { - lc.mx.Lock() - lc.n += bytes.Count(p, []byte("\n")) - lc.mx.Unlock() - return len(p), nil -} - -func (lc *lineCount) Lines() int { - lc.mx.Lock() - defer lc.mx.Unlock() - return lc.n } -func (m *Manager) SyncTableInit(origCtx context.Context, t Table, srcTx, dstTx pgx.Tx) error { +// syncTableFull will copy the contents of the table from the source database to the destination database using +// COPY TO and COPY FROM. +func (e *Execute) syncTableFull(origCtx context.Context, t Table, srcTx, dstTx pgx.Tx) error { ctx, cancel := context.WithCancel(origCtx) defer cancel() var rowCount int err := srcTx.QueryRow(ctx, fmt.Sprintf("select count(*) from %s", t.QuotedName())).Scan(&rowCount) if err != nil { - return fmt.Errorf("count rows: %w", err) + return fmt.Errorf("sync table %s: get row count: %w", t.Name, err) } var wg sync.WaitGroup diff --git a/swo/syncloop.go b/swo/syncloop.go new file mode 100644 index 0000000000..356bcfb43e --- /dev/null +++ b/swo/syncloop.go @@ -0,0 +1,78 @@ +package swo + +import ( + "context" + "fmt" + "time" + + "github.com/target/goalert/util/log" + "github.com/target/goalert/util/sqlutil" +) + +// SyncLoop will loop the logical replication sequence until the number of changes reaches zero. +func (e *Execute) SyncLoop(ctx context.Context) { + if e.err != nil { + return + } + + sync := func(ctx context.Context) (ok, pend int, err error) { + srcTx, dstTx, err := e.syncTx(ctx, true) + if err != nil { + return 0, 0, fmt.Errorf("sync tx: %w", err) + } + defer srcTx.Rollback(ctx) + defer dstTx.Rollback(ctx) + + ids, err := e.syncChanges(ctx, srcTx, dstTx) + if err != nil { + return 0, len(ids), fmt.Errorf("sync change log: %w", err) + } + + err = srcTx.Commit(ctx) + if err != nil { + return len(ids), 0, fmt.Errorf("commit src: %w", err) + } + + err = dstTx.Commit(ctx) + if err != nil { + return 0, len(ids), fmt.Errorf("commit dst: %w", err) + } + + _, err = e.mainDBConn.Exec(ctx, "DELETE FROM change_log WHERE id = any($1)", sqlutil.IntArray(ids)) + if err != nil { + return len(ids), 0, fmt.Errorf("update change log: %w", err) + } + + return len(ids), 0, nil + } + + for ctx.Err() == nil { + // sync in a loop until DB is up-to-date + s := time.Now() + n, pend, err := sync(ctx) + dur := time.Since(s) + + if pend > 0 { + e.Progressf(ctx, "sync: %d rows pending", pend) + } + if err != nil { + log.Log(ctx, err) + e.Rollback() + if n > 0 { + e.err = fmt.Errorf("sync changes: sync failure (commit without record): %w", err) + return + } + continue + } + e.Commit() + + if n != 0 { + e.Progressf(ctx, "sync: %d rows replicated in %s", n, dur.Truncate(time.Millisecond)) + continue + } + + return + } + + e.err = fmt.Errorf("sync changes: %w", ctx.Err()) +} diff --git a/swo/syncsequences.go b/swo/syncsequences.go new file mode 100644 index 0000000000..32f9d02a3e --- /dev/null +++ b/swo/syncsequences.go @@ -0,0 +1,39 @@ +package swo + +import ( + "context" + "fmt" + + "github.com/jackc/pgx/v4" + "github.com/target/goalert/util/sqlutil" +) + +func (e *Execute) syncSequences(ctx context.Context, src, dst pgx.Tx) error { + go e.Progressf(ctx, "syncing sequences") + var seqRead pgx.Batch + for _, name := range e.seqNames { + seqRead.Queue("select last_value, is_called from " + sqlutil.QuoteID(name)) + } + + res := src.SendBatch(ctx, &seqRead) + var setSeq pgx.Batch + for _, name := range e.seqNames { + var last int64 + var called bool + err := res.QueryRow().Scan(&last, &called) + if err != nil { + return fmt.Errorf("get sequence %s: %w", name, err) + } + setSeq.Queue("select pg_catalog.setval($1, $2, $3)", name, last, called) + } + if err := res.Close(); err != nil { + return fmt.Errorf("close seq batch: %w", err) + } + + err := dst.SendBatch(ctx, &setSeq).Close() + if err != nil { + return fmt.Errorf("set sequences: %w", err) + } + + return nil +} diff --git a/swo/waitforactivetx.go b/swo/waitforactivetx.go new file mode 100644 index 0000000000..2f7a77ab9e --- /dev/null +++ b/swo/waitforactivetx.go @@ -0,0 +1,40 @@ +package swo + +import ( + "context" + "fmt" + "time" + + "github.com/target/goalert/swo/swogrp" +) + +// WaitForActiveTx waits for all currently active transactions to complete in the main DB. +func (e *Execute) WaitForActiveTx(ctx context.Context) { + if e.err != nil { + return + } + + swogrp.Progressf(ctx, "waiting for in-flight transactions to finish") + + var now time.Time + err := e.mainDBConn.QueryRow(ctx, "select now()").Scan(&now) + if err != nil { + e.err = fmt.Errorf("wait for active tx: get current time: %w", err) + return + } + + for { + var n int + err = e.mainDBConn.QueryRow(ctx, "select count(*) from pg_stat_activity where state <> 'idle' and xact_start <= $1", now).Scan(&n) + if err != nil { + e.err = fmt.Errorf("wait for active tx: get active tx count: %w", err) + return + } + if n == 0 { + break + } + + swogrp.Progressf(ctx, "waiting for %d transaction(s) to finish", n) + time.Sleep(time.Second) + } +} From 5083ebe57434f214c57d1903bc9516cbeab872e0 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 7 Apr 2022 17:14:07 -0500 Subject: [PATCH 092/225] batch queries --- swo/execute.go | 1 + swo/rowtracker.go | 15 +++++++-------- swo/syncchanges.go | 29 +++++++++++++++-------------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/swo/execute.go b/swo/execute.go index 325a8374df..7f24f360bb 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -302,6 +302,7 @@ type syncRow struct { data json.RawMessage } type pgxQueryer interface { + SendBatch(ctx context.Context, b *pgx.Batch) pgx.BatchResults QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row Query(context.Context, string, ...interface{}) (pgx.Rows, error) Exec(context.Context, string, ...interface{}) (pgconn.CommandTag, error) diff --git a/swo/rowtracker.go b/swo/rowtracker.go index 86812285db..b669f93c18 100644 --- a/swo/rowtracker.go +++ b/swo/rowtracker.go @@ -83,7 +83,7 @@ func (e *Execute) Commit() { func (e *Execute) Exists(table, id string) bool { _, ok := e.rowIDs[table][id]; return ok } -func (e *Execute) applyChanges(ctx context.Context, dstTx pgxQueryer, q string, rows []syncRow) error { +func (e *Execute) queueChanges(b *pgx.Batch, q string, rows []syncRow) error { if len(rows) == 0 { return nil } @@ -97,18 +97,17 @@ func (e *Execute) applyChanges(ctx context.Context, dstTx pgxQueryer, q string, if err != nil { return fmt.Errorf("marshal rows: %w", err) } - t, err := dstTx.Exec(ctx, q, data) - if err != nil { - return fmt.Errorf("exec: %w", err) - } - if t.RowsAffected() != int64(len(rows)) { - return fmt.Errorf("mismatch: got %d rows affected; expected %d", t.RowsAffected(), len(rows)) - } + + b.Queue(q, data) return nil } func (e *Execute) fetchChanges(ctx context.Context, table Table, srcTx pgxQueryer, ids []string) (*syncData, error) { + if len(ids) == 0 { + return &syncData{}, nil + } + rows, err := srcTx.Query(ctx, table.SelectRowsQuery(), table.IDs(ids)) if errors.Is(err, pgx.ErrNoRows) { return &syncData{toDelete: ids}, nil diff --git a/swo/syncchanges.go b/swo/syncchanges.go index 757d52d49e..b47b57344c 100644 --- a/swo/syncchanges.go +++ b/swo/syncchanges.go @@ -36,11 +36,8 @@ func (e *Execute) syncChanges(ctx context.Context, srcTx, dstTx pgxQueryer) ([]i return nil, nil } - // defer all constraints - _, err = dstTx.Exec(ctx, "SET CONSTRAINTS ALL DEFERRED") - if err != nil { - return changeIDs, fmt.Errorf("defer constraints: %w", err) - } + var b pgx.Batch + b.Queue("SET CONSTRAINTS ALL DEFERRED") type pendingDelete struct { query string @@ -67,12 +64,12 @@ func (e *Execute) syncChanges(ctx context.Context, srcTx, dstTx pgxQueryer) ([]i deletes = append(deletes, pendingDelete{table.DeleteRowsQuery(), table.IDs(sd.toDelete), len(sd.toDelete)}) } - err = e.applyChanges(ctx, dstTx, table.UpdateRowsQuery(), sd.toUpdate) + err = e.queueChanges(&b, table.UpdateRowsQuery(), sd.toUpdate) if err != nil { return changeIDs, fmt.Errorf("apply updates: %w", err) } - err = e.applyChanges(ctx, dstTx, table.InsertRowsQuery(), sd.toInsert) + err = e.queueChanges(&b, table.InsertRowsQuery(), sd.toInsert) if err != nil { return changeIDs, fmt.Errorf("apply inserts: %w", err) } @@ -80,13 +77,17 @@ func (e *Execute) syncChanges(ctx context.Context, srcTx, dstTx pgxQueryer) ([]i // handle pendingDeletes in reverse table order for i := len(deletes) - 1; i >= 0; i-- { - t, err := dstTx.Exec(ctx, deletes[i].query, deletes[i].idArg) - if err != nil { - return changeIDs, fmt.Errorf("delete rows: %w", err) - } - if t.RowsAffected() != int64(deletes[i].count) { - return changeIDs, fmt.Errorf("delete rows: got %d != expected %d", t.RowsAffected(), deletes[i].count) - } + b.Queue(deletes[i].query, deletes[i].idArg) + } + + if b.Len() == 1 { + // no changes (just defer constraints) + return nil, nil + } + + err = dstTx.SendBatch(ctx, &b).Close() + if err != nil { + return changeIDs, fmt.Errorf("apply changes: %w", err) } return changeIDs, nil From d2304f7f295553962b92dfd5b1f951a9ab75026d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Apr 2022 15:59:01 -0500 Subject: [PATCH 093/225] add reset timeout --- swo/swogrp/group.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/swo/swogrp/group.go b/swo/swogrp/group.go index 36eb32d12e..f62b00e39c 100644 --- a/swo/swogrp/group.go +++ b/swo/swogrp/group.go @@ -42,6 +42,7 @@ type Group struct { nodeID uuid.UUID reset bool + resetS time.Time nodes map[uuid.UUID]*Node tasks map[uuid.UUID]TaskInfo leader bool @@ -130,6 +131,10 @@ func (g *Group) Status() Status { failed := make([]TaskInfo, len(g.failed)) copy(failed, g.failed) + if g.State == stateReset && time.Since(g.resetS) > time.Minute { + g.State = stateNeedsReset + } + return Status{ Nodes: nodes, State: g.State, @@ -216,6 +221,7 @@ func (g *Group) resetState() { g.ResumeFunc(g.Logger.BackgroundContext()) g.failed = nil g.reset = true + g.resetS = time.Now() g.leader = false g.State = stateReset for _, t := range g.tasks { From c1d1e2396022dd74476e089a5390de8d53c35c0c Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Apr 2022 15:59:15 -0500 Subject: [PATCH 094/225] add slowproxy tool --- devtools/slowproxy/main.go | 55 ++++++++++++++++++++++++++ devtools/slowproxy/proxy.go | 20 ++++++++++ devtools/slowproxy/ratelimiter.go | 66 +++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100644 devtools/slowproxy/main.go create mode 100644 devtools/slowproxy/proxy.go create mode 100644 devtools/slowproxy/ratelimiter.go diff --git a/devtools/slowproxy/main.go b/devtools/slowproxy/main.go new file mode 100644 index 0000000000..6be2cd17e6 --- /dev/null +++ b/devtools/slowproxy/main.go @@ -0,0 +1,55 @@ +package main + +import ( + "flag" + "io" + "log" + "net" +) + +func main() { + // TODO: rate for ALL connections + rateOut := flag.Int("o", 0, "Max data rate (in bytes/sec) to DB.") + rateIn := flag.Int("i", 0, "Max data rate (in bytes/sec) from DB.") + latency := flag.Duration("d", 0, "Min latency (one-way).") + jitter := flag.Duration("j", 0, "Jitter in (random +/- to latency).") + l := flag.String("l", "localhost:5435", "Listen address.") + c := flag.String("c", "localhost:5432", "Connect address.") + flag.Parse() + log.SetFlags(log.Lshortfile) + + limitOut := newRateLimiter(*rateOut, *latency, *jitter) + limitIn := newRateLimiter(*rateIn, *latency, *jitter) + + srv, err := net.Listen("tcp", *l) + if err != nil { + log.Fatal(err) + } + + proxy := func(dst, src net.Conn, limiter *rateLimiter) { + // copy through DelayWriter + defer dst.Close() + defer src.Close() + + io.Copy(limiter.NewWriter(dst), src) + } + + for { + conn, err := srv.Accept() + if err != nil { + log.Fatal(err) + } + go func() { + dbConn, err := net.Dial("tcp", *c) + if err != nil { + log.Println("connect error:", err) + conn.Close() + return + } + + log.Println("CONNECT", conn.RemoteAddr().String()) + go proxy(conn, dbConn, limitOut) + go proxy(dbConn, conn, limitIn) + }() + } +} diff --git a/devtools/slowproxy/proxy.go b/devtools/slowproxy/proxy.go new file mode 100644 index 0000000000..c849b5a27c --- /dev/null +++ b/devtools/slowproxy/proxy.go @@ -0,0 +1,20 @@ +package main + +import ( + "io" + "math/rand" + "time" +) + +type DelayWriter struct { + io.Writer + latency time.Duration + jitter time.Duration +} + +func (w *DelayWriter) Write(p []byte) (int, error) { + delay := w.latency - (w.jitter / 2) + time.Duration(rand.Float64()*float64(w.jitter)) + time.Sleep(delay) + + return w.Writer.Write(p) +} diff --git a/devtools/slowproxy/ratelimiter.go b/devtools/slowproxy/ratelimiter.go new file mode 100644 index 0000000000..715bd52194 --- /dev/null +++ b/devtools/slowproxy/ratelimiter.go @@ -0,0 +1,66 @@ +package main + +import ( + "io" + "math/rand" + "time" +) + +type rateLimiter struct { + bucket chan int + overflow chan int + latency time.Duration + jitter time.Duration +} + +func newRateLimiter(bps int, latency, jitter time.Duration) *rateLimiter { + ch := make(chan int) + go func() { + bpt := bps / 100 + t := time.NewTicker(10 * time.Millisecond) + for range t.C { + ch <- bpt + } + }() + return &rateLimiter{ + bucket: ch, + overflow: make(chan int, 1000), + latency: latency, + jitter: jitter, + } +} + +func (r *rateLimiter) WaitFor(count int) { + delay := r.latency - (r.jitter / 2) + time.Duration(rand.Float64()*float64(r.jitter)) + s := time.Now() + var n int + for n < count { + select { + case val := <-r.bucket: + n += val + case val := <-r.overflow: + n += val + } + } + if n > count { + r.overflow <- n - count + } + time.Sleep(delay - time.Since(s)) +} + +func (r *rateLimiter) NewWriter(w io.Writer) io.Writer { + return &rateLimitWriter{ + w: w, + l: r, + } +} + +type rateLimitWriter struct { + l *rateLimiter + w io.Writer +} + +func (w *rateLimitWriter) Write(p []byte) (int, error) { + w.l.WaitFor(len(p)) + return w.w.Write(p) +} From 5e49955267c7ba2357fc79f5ae90cfc7aff471c7 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Apr 2022 16:15:25 -0500 Subject: [PATCH 095/225] fill bucket once per second --- devtools/slowproxy/ratelimiter.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/devtools/slowproxy/ratelimiter.go b/devtools/slowproxy/ratelimiter.go index 715bd52194..2fa6bb55ae 100644 --- a/devtools/slowproxy/ratelimiter.go +++ b/devtools/slowproxy/ratelimiter.go @@ -16,10 +16,9 @@ type rateLimiter struct { func newRateLimiter(bps int, latency, jitter time.Duration) *rateLimiter { ch := make(chan int) go func() { - bpt := bps / 100 - t := time.NewTicker(10 * time.Millisecond) + t := time.NewTicker(time.Second) for range t.C { - ch <- bpt + ch <- bps } }() return &rateLimiter{ From 1565fea4b0886a0af541f40eace0292ee970580a Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Apr 2022 16:35:20 -0500 Subject: [PATCH 096/225] fix jitter --- devtools/slowproxy/ratelimiter.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/devtools/slowproxy/ratelimiter.go b/devtools/slowproxy/ratelimiter.go index 2fa6bb55ae..82229e0730 100644 --- a/devtools/slowproxy/ratelimiter.go +++ b/devtools/slowproxy/ratelimiter.go @@ -29,9 +29,7 @@ func newRateLimiter(bps int, latency, jitter time.Duration) *rateLimiter { } } -func (r *rateLimiter) WaitFor(count int) { - delay := r.latency - (r.jitter / 2) + time.Duration(rand.Float64()*float64(r.jitter)) - s := time.Now() +func (r *rateLimiter) WaitFor(count int) time.Duration { var n int for n < count { select { @@ -44,7 +42,7 @@ func (r *rateLimiter) WaitFor(count int) { if n > count { r.overflow <- n - count } - time.Sleep(delay - time.Since(s)) + return (r.latency - (r.jitter / 2) + time.Duration(rand.Float64()*float64(r.jitter))) / 2 } func (r *rateLimiter) NewWriter(w io.Writer) io.Writer { @@ -60,6 +58,8 @@ type rateLimitWriter struct { } func (w *rateLimitWriter) Write(p []byte) (int, error) { - w.l.WaitFor(len(p)) + dur := w.l.WaitFor(len(p)) + time.Sleep(dur) + defer time.Sleep(dur) return w.w.Write(p) } From 651805d1a0587dfd4673b66bb5d3c3b5fecaa324 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Apr 2022 16:35:43 -0500 Subject: [PATCH 097/225] batch change fetching --- swo/rowtracker.go | 8 ++------ swo/swogrp/group.go | 5 +++-- swo/syncchanges.go | 32 +++++++++++++++++++++----------- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/swo/rowtracker.go b/swo/rowtracker.go index b669f93c18..4be2ce877d 100644 --- a/swo/rowtracker.go +++ b/swo/rowtracker.go @@ -103,12 +103,8 @@ func (e *Execute) queueChanges(b *pgx.Batch, q string, rows []syncRow) error { return nil } -func (e *Execute) fetchChanges(ctx context.Context, table Table, srcTx pgxQueryer, ids []string) (*syncData, error) { - if len(ids) == 0 { - return &syncData{}, nil - } - - rows, err := srcTx.Query(ctx, table.SelectRowsQuery(), table.IDs(ids)) +func (e *Execute) readChanges(ctx context.Context, table Table, res pgx.BatchResults, ids []string) (*syncData, error) { + rows, err := res.Query() if errors.Is(err, pgx.ErrNoRows) { return &syncData{toDelete: ids}, nil } diff --git a/swo/swogrp/group.go b/swo/swogrp/group.go index f62b00e39c..6f43c9de25 100644 --- a/swo/swogrp/group.go +++ b/swo/swogrp/group.go @@ -159,9 +159,10 @@ func (g *Group) loopMainLog() { buf := newMsgBuf() go func() { for msg := range buf.Next() { - err := g.processMessage(g.Logger.BackgroundContext(), msg) + ctx := g.Logger.BackgroundContext() + err := g.processMessage(ctx, msg) if err != nil { - g.Logger.Error(context.Background(), fmt.Errorf("process message: %w", err)) + g.Logger.Error(ctx, fmt.Errorf("process message: %w", err)) } } }() diff --git a/swo/syncchanges.go b/swo/syncchanges.go index b47b57344c..811f1dd46b 100644 --- a/swo/syncchanges.go +++ b/swo/syncchanges.go @@ -36,8 +36,8 @@ func (e *Execute) syncChanges(ctx context.Context, srcTx, dstTx pgxQueryer) ([]i return nil, nil } - var b pgx.Batch - b.Queue("SET CONSTRAINTS ALL DEFERRED") + var applyChanges pgx.Batch + applyChanges.Queue("SET CONSTRAINTS ALL DEFERRED") type pendingDelete struct { query string @@ -46,17 +46,27 @@ func (e *Execute) syncChanges(ctx context.Context, srcTx, dstTx pgxQueryer) ([]i } var deletes []pendingDelete - // go in insert order for fetching updates/inserts, note deleted rows + var queryChanges pgx.Batch + var changedTables []Table for _, table := range e.tables { if table.SkipSync() { continue } - - if len(rowIDs[table.Name]) == 0 { + ids := rowIDs[table.Name] + if len(ids) == 0 { continue } - sd, err := e.fetchChanges(ctx, table, srcTx, rowIDs[table.Name]) + queryChanges.Queue(table.SelectRowsQuery(), table.IDs(ids)) + changedTables = append(changedTables, table) + } + + res := srcTx.SendBatch(ctx, &queryChanges) + defer res.Close() + + // go in insert order for fetching updates/inserts, note deleted rows + for _, table := range changedTables { + sd, err := e.readChanges(ctx, table, res, rowIDs[table.Name]) if err != nil { return changeIDs, fmt.Errorf("fetch changed rows: %w", err) } @@ -64,12 +74,12 @@ func (e *Execute) syncChanges(ctx context.Context, srcTx, dstTx pgxQueryer) ([]i deletes = append(deletes, pendingDelete{table.DeleteRowsQuery(), table.IDs(sd.toDelete), len(sd.toDelete)}) } - err = e.queueChanges(&b, table.UpdateRowsQuery(), sd.toUpdate) + err = e.queueChanges(&applyChanges, table.UpdateRowsQuery(), sd.toUpdate) if err != nil { return changeIDs, fmt.Errorf("apply updates: %w", err) } - err = e.queueChanges(&b, table.InsertRowsQuery(), sd.toInsert) + err = e.queueChanges(&applyChanges, table.InsertRowsQuery(), sd.toInsert) if err != nil { return changeIDs, fmt.Errorf("apply inserts: %w", err) } @@ -77,15 +87,15 @@ func (e *Execute) syncChanges(ctx context.Context, srcTx, dstTx pgxQueryer) ([]i // handle pendingDeletes in reverse table order for i := len(deletes) - 1; i >= 0; i-- { - b.Queue(deletes[i].query, deletes[i].idArg) + applyChanges.Queue(deletes[i].query, deletes[i].idArg) } - if b.Len() == 1 { + if applyChanges.Len() == 1 { // no changes (just defer constraints) return nil, nil } - err = dstTx.SendBatch(ctx, &b).Close() + err = dstTx.SendBatch(ctx, &applyChanges).Close() if err != nil { return changeIDs, fmt.Errorf("apply changes: %w", err) } From 22688d60a84538428cb3f87485dd861f5af198b8 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 12 Apr 2022 16:50:31 -0500 Subject: [PATCH 098/225] increase timeout --- devtools/slowproxy/proxy.go | 20 -------------------- swo/execute.go | 4 ++-- swo/syncloop.go | 2 +- 3 files changed, 3 insertions(+), 23 deletions(-) delete mode 100644 devtools/slowproxy/proxy.go diff --git a/devtools/slowproxy/proxy.go b/devtools/slowproxy/proxy.go deleted file mode 100644 index c849b5a27c..0000000000 --- a/devtools/slowproxy/proxy.go +++ /dev/null @@ -1,20 +0,0 @@ -package main - -import ( - "io" - "math/rand" - "time" -) - -type DelayWriter struct { - io.Writer - latency time.Duration - jitter time.Duration -} - -func (w *DelayWriter) Write(p []byte) (int, error) { - delay := w.latency - (w.jitter / 2) + time.Duration(rand.Float64()*float64(w.jitter)) - time.Sleep(delay) - - return w.Writer.Write(p) -} diff --git a/swo/execute.go b/swo/execute.go index 7f24f360bb..739cb0fbf2 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -81,7 +81,7 @@ func (m *Manager) DoExecute(ctx context.Context) error { exec.PauseApps(ctx) exec.FinalSync(ctx) - return nil + return exec.readErr() }) } @@ -199,7 +199,7 @@ func (e *Execute) FinalSync(ctx context.Context) { e.Progressf(ctx, "finalizing") // set timeouts before waiting on locks - e.exec(ctx, e.mainDBConn, "set idle_in_transaction_session_timeout = 1000") + e.exec(ctx, e.mainDBConn, "set idle_in_transaction_session_timeout = 3000") e.exec(ctx, e.mainDBConn, "set lock_timeout = 3000") e.SyncLoop(ctx) if e.err != nil { diff --git a/swo/syncloop.go b/swo/syncloop.go index 356bcfb43e..b1f42887af 100644 --- a/swo/syncloop.go +++ b/swo/syncloop.go @@ -66,7 +66,7 @@ func (e *Execute) SyncLoop(ctx context.Context) { } e.Commit() - if n != 0 { + if n > 10 { e.Progressf(ctx, "sync: %d rows replicated in %s", n, dur.Truncate(time.Millisecond)) continue } From 88e93da8a2b7fea02cb04d92dbf004d3446e1ab1 Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Thu, 14 Apr 2022 14:04:51 -0700 Subject: [PATCH 099/225] adjust layout --- .../app/admin/switchover/AdminSwitchover.tsx | 186 +++++++++--------- web/src/app/admin/switchover/SWONode.tsx | 2 +- 2 files changed, 92 insertions(+), 96 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 28117253a6..655f3eebaf 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -27,10 +27,8 @@ import { Info } from '@mui/icons-material' import Table from '@mui/material/Table' import TableBody from '@mui/material/TableBody' import TableCell from '@mui/material/TableCell' -import TableContainer from '@mui/material/TableContainer' import TableHead from '@mui/material/TableHead' import TableRow from '@mui/material/TableRow' -import Paper from '@mui/material/Paper' import { TransitionGroup } from 'react-transition-group' import Spinner from '../../loading/components/Spinner' @@ -97,7 +95,7 @@ export default function AdminSwitchover(): JSX.Element { return } - if (error && error.message == 'not in SWO mode') { + if (error && error.message === 'not in SWO mode') { return ( @@ -119,7 +117,7 @@ export default function AdminSwitchover(): JSX.Element { if (data?.isDone) { return ( - + {cptlz(data.details)} } - return null + return   // reserves whitespace } return ( - - + + {statusNotices.length > 0 && ( )} - - - - - - {getDetails()} - - } - variant='outlined' - size='large' - disabled={mutationStatus.loading} - loading={pingLoad} - loadingPosition='start' - onClick={actionHandler('ping')} - > - {pingLoad ? 'Sending ping...' : 'Ping'} - - : } - disabled={data?.isDone || mutationStatus.loading} - variant='outlined' - size='large' - loading={ - data?.isResetting || - (lastAction === 'reset' && mutationStatus.loading) - } - loadingPosition='start' - onClick={actionHandler('reset')} - > - {resetLoad ? 'Resetting...' : 'Reset'} - - : - } - disabled={!data?.isIdle || mutationStatus.loading} - variant='outlined' - size='large' - loading={ - data?.isExecuting || - (lastAction === 'execute' && mutationStatus.loading) - } - loadingPosition='start' - onClick={actionHandler('execute')} + + + + + {getDetails()} + + } + variant='outlined' + size='large' + disabled={mutationStatus.loading} + loading={pingLoad} + loadingPosition='start' + onClick={actionHandler('ping')} + > + {pingLoad ? 'Sending ping...' : 'Ping'} + + : } + disabled={data?.isDone || mutationStatus.loading} + variant='outlined' + size='large' + loading={ + data?.isResetting || + (lastAction === 'reset' && mutationStatus.loading) + } + loadingPosition='start' + onClick={actionHandler('reset')} + > + {resetLoad ? 'Resetting...' : 'Reset'} + + : + } + disabled={!data?.isIdle || mutationStatus.loading} + variant='outlined' + size='large' + loading={ + data?.isExecuting || + (lastAction === 'execute' && mutationStatus.loading) + } + loadingPosition='start' + onClick={actionHandler('execute')} + > + {executeLoad ? 'Executing...' : 'Execute'} + + + + + + + + + + + + + Application Name + Count + + + + {data?.connections?.map((row) => ( + - {executeLoad ? 'Executing...' : 'Execute'} - - - - - - - - - -
- - - Application Name - Count - - - - {data?.connections?.map((row) => ( - - - {row.name || '(no name)'} - - {row.count} - - ))} - -
- -
-
+ + {row.name || '(no name)'} + + {row.count} + + ))} + + +
- + + {data?.nodes.length > 0 && data.nodes .slice() diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index 785ce76d1f..99d5f3a6b5 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -22,7 +22,7 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { const theme = useTheme() return ( - + {name} From 3440e401425fe999a088da1075a47a70db6ed77b Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Thu, 14 Apr 2022 14:12:51 -0700 Subject: [PATCH 100/225] add fallback for node status str --- web/src/app/admin/switchover/SWONode.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index 99d5f3a6b5..e711dc0c12 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -32,7 +32,7 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { - Status: {node.status} + Status: {node.status || 'Unknown'} From 6e196f8e96a4886e3322788d4a862528e3ae28ab Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 2 Jun 2022 10:29:33 -0500 Subject: [PATCH 101/225] add switchover route --- keyring/store.go | 7 +++++-- web/src/app/main/AppRoutes.tsx | 2 ++ web/src/app/main/NavBar.tsx | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/keyring/store.go b/keyring/store.go index 83c1db0b8c..3812540460 100644 --- a/keyring/store.go +++ b/keyring/store.go @@ -112,6 +112,7 @@ func marshalVerificationKeys(keys map[byte]ecdsa.PublicKey) ([]byte, error) { } return json.Marshal(m) } + func parseVerificationKeys(data []byte) (map[byte]ecdsa.PublicKey, error) { var m map[byte][]byte err := json.Unmarshal(data, &m) @@ -228,6 +229,7 @@ func (db *DB) Shutdown(ctx context.Context) error { <-db.shutdown return nil } + func (db *DB) loop() { t := time.NewTicker(12 * time.Hour) var shutdownCtx context.Context @@ -275,6 +277,7 @@ func (db *DB) newKey() (*ecdsa.PrivateKey, []byte, error) { } return key, data, nil } + func (db *DB) loadKey(encData []byte) (*ecdsa.PrivateKey, error) { data, _, err := db.cfg.Keys.Decrypt(encData) if err != nil { @@ -405,7 +408,7 @@ func (db *DB) refreshAndRotateKeys(ctx context.Context, forceRotation bool) erro var vKeysData, signKeyData, nextKeyData []byte var t time.Time - var rotateT *time.Time + var rotateT sql.NullTime var count int err = row.Scan(&vKeysData, &signKeyData, &nextKeyData, &t, &rotateT, &count) if errors.Is(err, sql.ErrNoRows) { @@ -420,7 +423,7 @@ func (db *DB) refreshAndRotateKeys(ctx context.Context, forceRotation bool) erro return errors.Wrap(err, "unmarshal verification keys") } - if forceRotation || (rotateT != nil && !t.Before(*rotateT)) { + if forceRotation || (rotateT.Valid && !t.Before(rotateT.Time)) { // perform a key rotation signKeyData = nextKeyData var nextKey *ecdsa.PrivateKey diff --git a/web/src/app/main/AppRoutes.tsx b/web/src/app/main/AppRoutes.tsx index 30b9c0882b..8466c1239d 100644 --- a/web/src/app/main/AppRoutes.tsx +++ b/web/src/app/main/AppRoutes.tsx @@ -5,6 +5,7 @@ import AdminDebugMessagesLayout from '../admin/admin-message-logs/AdminDebugMess import AdminConfig from '../admin/AdminConfig' import AdminLimits from '../admin/AdminLimits' import AdminToolbox from '../admin/AdminToolbox' +import AdminSwitchover from '../admin/switchover/AdminSwitchover' import AlertsList from '../alerts/AlertsList' import AlertDetailPage from '../alerts/pages/AlertDetailPage' import Documentation from '../documentation/Documentation' @@ -110,6 +111,7 @@ export const routes: Record> = { '/admin/limits': AdminLimits, '/admin/toolbox': AdminToolbox, '/admin/message-logs': AdminDebugMessagesLayout, + '/admin/switchover': AdminSwitchover, '/wizard': WizardRouter, '/docs': Documentation, diff --git a/web/src/app/main/NavBar.tsx b/web/src/app/main/NavBar.tsx index c76cc43c16..99176679cb 100644 --- a/web/src/app/main/NavBar.tsx +++ b/web/src/app/main/NavBar.tsx @@ -74,6 +74,7 @@ export default function NavBar(): JSX.Element { + From a185479086e5fc318f7179b4fb5c1e37ddbe657f Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 2 Jun 2022 10:34:34 -0500 Subject: [PATCH 102/225] update disabled text --- .../app/admin/switchover/AdminSwitchover.tsx | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 655f3eebaf..49154ab9dc 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -100,15 +100,20 @@ export default function AdminSwitchover(): JSX.Element { - - Unavailable: Application is not in switchover mode.{' '} - + - - - + Unavailable: Application is not in switchover mode. +
+
+ You must start GoAlert with + GOALERT_DB_URL_NEXT + or --db-url-next to perform a switchover. +
+
) From a49e969ba22a3f40308d5327f2929470b55a22bd Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 2 Jun 2022 10:42:31 -0500 Subject: [PATCH 103/225] fix spacing --- web/src/app/admin/switchover/AdminSwitchover.tsx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 49154ab9dc..91a4aaeafb 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -6,7 +6,7 @@ import CardHeader from '@mui/material/CardHeader' import Grid from '@mui/material/Grid' import Skeleton from '@mui/material/Skeleton' import Typography from '@mui/material/Typography' -import { Fade, SvgIconProps, Tooltip, Zoom } from '@mui/material' +import { Fade, SvgIconProps, Zoom } from '@mui/material' import PingIcon from 'mdi-material-ui/DatabaseMarker' import NoResetIcon from 'mdi-material-ui/DatabaseRefreshOutline' import ResetIcon from 'mdi-material-ui/DatabaseRefresh' @@ -23,7 +23,6 @@ import SWONode from './SWONode' import LoadingButton from '@mui/lab/LoadingButton' import DatabaseOff from 'mdi-material-ui/DatabaseOff' import DatabaseCheck from 'mdi-material-ui/DatabaseCheck' -import { Info } from '@mui/icons-material' import Table from '@mui/material/Table' import TableBody from '@mui/material/TableBody' import TableCell from '@mui/material/TableCell' @@ -318,7 +317,7 @@ export default function AdminSwitchover(): JSX.Element {
- + {data?.nodes.length > 0 && data.nodes .slice() From 9e3340a884beca80febf6c4405f4cbb0cfe4cdd4 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Fri, 3 Jun 2022 10:57:01 -0500 Subject: [PATCH 104/225] add swo mode --- Makefile | 13 +++++- Procfile.swo | 37 ++++++++++++++++ go.mod | 16 +++++-- go.sum | 122 ++++++++++++++++++++------------------------------- 4 files changed, 110 insertions(+), 78 deletions(-) create mode 100644 Procfile.swo diff --git a/Makefile b/Makefile index 62d15c1fd3..44ddc6a316 100644 --- a/Makefile +++ b/Makefile @@ -91,6 +91,13 @@ cy-mobile-prod-run: web/src/build/static/app.js cypress web/src/schema.d.ts: graphql2/schema.graphql node_modules web/src/genschema.go devtools/gqlgen/* go generate ./web/src +start-swo: bin/psql-lite bin/goalert bin/waitfor bin/runproc + ./bin/waitfor -timeout 1s "$(DB_URL)" || make postgres + ./bin/goalert migrate --db-url=postgres://goalert@localhost/goalert + ./bin/psql-lite -d postgres://goalert@localhost -c "update switchover_state set current_state = 'idle'; drop database if exists goalert2; create database goalert2;" + ./bin/goalert migrate --db-url=postgres://goalert@localhost/goalert2 + GOALERT_VERSION=$(GIT_VERSION) ./bin/runproc -f Procfile.swo + start: bin/goalert node_modules web/src/schema.d.ts $(BIN_DIR)/tools/prometheus go run ./devtools/waitfor -timeout 1s "$(DB_URL)" || make postgres GOALERT_VERSION=$(GIT_VERSION) go run ./devtools/runproc -f Procfile -l Procfile.local @@ -136,7 +143,11 @@ pkg/sysapi/sysapi_grpc.pb.go: pkg/sysapi/sysapi.proto $(BIN_DIR)/tools/protoc-ge pkg/sysapi/sysapi.pb.go: pkg/sysapi/sysapi.proto $(BIN_DIR)/tools/protoc-gen-go $(BIN_DIR)/tools/protoc PATH="$(BIN_DIR)/tools" protoc --go_out=. --go_opt=paths=source_relative pkg/sysapi/sysapi.proto -generate: node_modules pkg/sysapi/sysapi.pb.go pkg/sysapi/sysapi_grpc.pb.go +bin/tools/sqlc: go.mod go.sum + CGO_ENABLED=1 go build -o bin/tools/sqlc github.com/kyleconroy/sqlc/cmd/sqlc + +generate: node_modules pkg/sysapi/sysapi.pb.go pkg/sysapi/sysapi_grpc.pb.go bin/tools/sqlc + ./bin/tools/sqlc generate go generate ./... smoketest: diff --git a/Procfile.swo b/Procfile.swo new file mode 100644 index 0000000000..eb21f4cb5e --- /dev/null +++ b/Procfile.swo @@ -0,0 +1,37 @@ +build: while true; do make -qs bin/goalert || make bin/goalert || (echo '\033[0;31mBuild Failure'; sleep 3); sleep 0.1; done + +@watch-file=./bin/goalert +goalert: ./bin/goalert -l=localhost:3030 --ui-dir=web/src/build --db-url=postgres://goalert@localhost --listen-sysapi=localhost:1234 --listen-prometheus=localhost:2112 --db-url-next=postgres://goalert@localhost:5432/goalert2 + +smtp: go run github.com/mailhog/MailHog -ui-bind-addr=localhost:8025 -api-bind-addr=localhost:8025 -smtp-bind-addr=localhost:1025 | grep -v KEEPALIVE + +@watch-file=./web/src/esbuild.config.js +ui: yarn workspace goalert-web run esbuild --watch + +@watch-file=./bin/goalert +ga2: ./bin/goalert -l=localhost:3050 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 --api-only + +@watch-file=./bin/goalert +ga3: ./bin/goalert -l=localhost:3051 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 --api-only + +@watch-file=./bin/goalert +ga4: ./bin/goalert -l=localhost:3052 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 --api-only + +@watch-file=./bin/goalert +ga5: ./bin/goalert -l=localhost:3053 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 --api-only + +@watch-file=./bin/goalert +ga6: ./bin/goalert -l=localhost:3054 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 --api-only + +@watch-file=./bin/goalert +ga7: ./bin/goalert -l=localhost:3055 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 + +@watch-file=./bin/goalert +ga8: ./bin/goalert -l=localhost:3056 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 + +@watch-file=./bin/goalert +ga9: ./bin/goalert -l=localhost:3057 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 + +proxy: go run ./devtools/simpleproxy /=http://localhost:3030,http://localhost:3050,http://localhost:3051,http://localhost:3052,http://localhost:3053,http://localhost:3054,http://localhost:3055,http://localhost:3056,http://localhost:3057 + +slow: go run ./devtools/slowproxy/ -d 25ms -i 1000000 -o 1000000 -j 10ms diff --git a/go.mod b/go.mod index b930b67aab..d7a8f0ae65 100644 --- a/go.mod +++ b/go.mod @@ -50,7 +50,7 @@ require ( github.com/sirupsen/logrus v1.8.1 github.com/slack-go/slack v0.10.2 github.com/spf13/afero v1.7.0 // indirect - github.com/spf13/cobra v1.3.0 + github.com/spf13/cobra v1.4.0 github.com/spf13/viper v1.10.1 github.com/stretchr/testify v1.7.0 github.com/t-k/fluent-logger-golang v1.0.0 // indirect @@ -69,7 +69,7 @@ require ( google.golang.org/genproto v0.0.0-20220211171837-173942840c17 // indirect google.golang.org/grpc v1.44.0 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 - google.golang.org/protobuf v1.27.1 + google.golang.org/protobuf v1.28.0 gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect @@ -81,6 +81,7 @@ require ( cloud.google.com/go/compute v0.1.0 github.com/creack/pty v1.1.17 github.com/golang-jwt/jwt/v4 v4.3.0 + github.com/kyleconroy/sqlc v1.13.0 gorm.io/driver/postgres v1.2.3 gorm.io/gorm v1.22.5 ) @@ -96,13 +97,14 @@ require ( github.com/Masterminds/sprig v2.22.0+incompatible // indirect github.com/PuerkitoBio/goquery v1.5.0 // indirect github.com/andybalholm/cascadia v1.0.0 // indirect + github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20220209173558-ad29539cd2e9 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/census-instrumentation/opencensus-proto v0.3.0 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect github.com/fsnotify/fsnotify v1.5.1 // indirect github.com/golang/protobuf v1.5.2 // indirect - github.com/google/go-cmp v0.5.6 // indirect + github.com/google/go-cmp v0.5.7 // indirect github.com/googleapis/gax-go/v2 v2.1.1 // indirect github.com/googleapis/google-cloud-go-testing v0.0.0-20210719221736-1c9a4c676720 // indirect github.com/gorilla/context v1.1.1 // indirect @@ -132,7 +134,11 @@ require ( github.com/mitchellh/mapstructure v1.4.3 // indirect github.com/mitchellh/reflectwalk v1.0.0 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect + github.com/pganalyze/pg_query_go/v2 v2.1.0 // indirect github.com/philhofer/fwd v1.1.1 // indirect + github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63 // indirect + github.com/pingcap/log v0.0.0-20210906054005-afc726e70354 // indirect + github.com/pingcap/parser v0.0.0-20210914110036-002913dd28ec // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_model v0.2.0 // indirect github.com/prometheus/common v0.32.1 // indirect @@ -147,6 +153,9 @@ require ( github.com/uber/jaeger-client-go v2.25.0+incompatible // indirect github.com/vanng822/css v0.0.0-20190504095207-a21e860bcd04 // indirect github.com/vanng822/go-premailer v0.0.0-20191214114701-be27abe028fe // indirect + go.uber.org/atomic v1.9.0 // indirect + go.uber.org/multierr v1.7.0 // indirect + go.uber.org/zap v1.19.1 // indirect golang.org/x/exp/typeparams v0.0.0-20220314205449-43aec2f8a4e7 // indirect golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // indirect golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect @@ -155,6 +164,7 @@ require ( google.golang.org/api v0.64.0 // indirect google.golang.org/appengine v1.6.7 // indirect gopkg.in/ini.v1 v1.66.2 // indirect + gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect ) diff --git a/go.sum b/go.sum index 40ea8f26ec..64fcf6acc0 100644 --- a/go.sum +++ b/go.sum @@ -27,7 +27,6 @@ cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aD cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= cloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4= cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc= -cloud.google.com/go v0.98.0/go.mod h1:ua6Ush4NALrHk5QXDWnjvZHN93OuF0HfuEPq9I1X0cM= cloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA= cloud.google.com/go v0.100.1/go.mod h1:fs4QogzfH5n2pBXBP9vRiU+eCny7lD2vmFZy79Iuw1U= cloud.google.com/go v0.100.2 h1:t9Iw5QH5v4XtlEQaCtUY7x6sCABps8sW0acw7e2WQ6Y= @@ -43,7 +42,6 @@ cloud.google.com/go/compute v0.1.0/go.mod h1:GAesmwr110a34z04OlxYkATPBEfVhkymfTB cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= -cloud.google.com/go/firestore v1.6.1/go.mod h1:asNXNOzBdyVQmEU+ggO8UPodTkEVFW5Qx+rwHnAz+EY= cloud.google.com/go/iam v0.1.1 h1:4CapQyNFjiksks1/x7jsvsygFPhihslYk5GptIrlX68= cloud.google.com/go/iam v0.1.1/go.mod h1:CKqrcnI/suGpybEHxZ7BMehL0oA4LpdyJdUlTl9jVMw= cloud.google.com/go/monitoring v1.1.0 h1:ZnyNdf/XRcynMmKzRSNTOdOyYPs6G7do1l2D2hIvIKo= @@ -73,7 +71,6 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/toml v1.0.0 h1:dtDWrepsVPfW9H/4y7dDgFc2MBUSeJhlaDtK13CxFlU= github.com/BurntSushi/toml v1.0.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/Masterminds/goutils v1.1.0 h1:zukEsf/1JZwCMgHiK3GZftabmxiCw4apj3a28RPBiVg= github.com/Masterminds/goutils v1.1.0/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= @@ -101,17 +98,19 @@ github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20220209173558-ad29539cd2e9 h1:zvkJv+9Pxm1nnEMcKnShREt4qtduHKz4iw4AB4ul0Ao= +github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20220209173558-ad29539cd2e9/go.mod h1:F7bn7fEU90QkQ3tnmaTx3LTKLEDqnwWODIYppRQ5hnY= github.com/aokoli/goutils v1.0.1/go.mod h1:SijmP0QR8LtwsmDs8Yii5Z/S4trXFGFC2oO5g9DP+DQ= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= -github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/aws/aws-sdk-go v1.37.0/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/aws/aws-sdk-go v1.42.25 h1:BbdvHAi+t9LRiaYUyd53noq9jcaAcfzOhSVbKfr6Avs= github.com/aws/aws-sdk-go v1.42.25/go.mod h1:gyRszuZ/icHmHAVE4gc/r+cfCmhA1AD+vqfWbgI+eHs= +github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= +github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -132,8 +131,6 @@ github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= -github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= @@ -142,9 +139,7 @@ github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XP github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211130200136-a8f946100490/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= github.com/coreos/go-oidc v2.2.1+incompatible h1:mh48q/BqXqgjVHpy2ZY7WnWAbenxRjsz9N1i1YxjHAk= @@ -160,6 +155,12 @@ github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.17 h1:QeVUsEDNrLBW4tMgZHvxy18sKtr6VI492kBhUfhDJNI= github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= +github.com/cznic/golex v0.0.0-20181122101858-9c343928389c/go.mod h1:+bmmJDNmKlhWNG+gwWCkaBoTy39Fs+bzRxVBzoTQbIc= +github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM= +github.com/cznic/parser v0.0.0-20160622100904-31edd927e5b1/go.mod h1:2B43mz36vGZNZEwkWi8ayRSSUXLfjL8OkbzwW4NcPMM= +github.com/cznic/sortutil v0.0.0-20181122101858-f5f958428db8/go.mod h1:q2w6Bg5jeox1B+QkJ6Wp/+Vn0G/bo3f1uY7Fn3vivIQ= +github.com/cznic/strutil v0.0.0-20171016134553-529a34b1c186/go.mod h1:AHHPPPXTw0h6pVabbcbyGRK1DckRn7r/STdZEeIDzZc= +github.com/cznic/y v0.0.0-20170802143616-045f81c6662a/go.mod h1:1rk5VM7oSnA4vjp+hrLQ3HWHa+Y4yPCa3/CsJrcNnvs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -174,11 +175,8 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.m github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= -github.com/envoyproxy/go-control-plane v0.10.1/go.mod h1:AY7fTTXNdv/aJ2O5jwpxAPOWUZ7hQAEvzN5Pf27BkQQ= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/envoyproxy/protoc-gen-validate v0.6.2/go.mod h1:2t7qjJNvHPx8IjnBOzl9E9/baC+qXE/TeeyBRzgJDws= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= -github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/felixge/httpsnoop v1.0.2 h1:+nS9g82KMXccJ/wp0zyRW9ZBHFETmMGtkk+2CTTrW4o= @@ -198,8 +196,10 @@ github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vb github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-sql-driver/mysql v1.3.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= +github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-test/deep v1.0.4 h1:u2CU3YKy9I2pmu9pX0eq50wCgjfGIt539SqR7FbHiho= github.com/go-test/deep v1.0.4/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= @@ -262,8 +262,9 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= +github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= @@ -319,23 +320,13 @@ github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0U github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= -github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M= github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= -github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= -github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= -github.com/hashicorp/go-hclog v1.0.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= -github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= -github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA= -github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= -github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= @@ -349,14 +340,8 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= -github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY= -github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= -github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= -github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= -github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= github.com/hashicorp/yamux v0.0.0-20211028200310-0bc27b27de87 h1:xixZ2bWeofWV68J+x6AzmKuVM/JWCQwkWm6GW/MUR6I= github.com/hashicorp/yamux v0.0.0-20211028200310-0bc27b27de87/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ= github.com/huandu/xstrings v1.2.0/go.mod h1:DvyZB1rfVYsBIigL8HwpZgxHwXozlTgGqn63UyNX5k4= @@ -370,7 +355,6 @@ github.com/ian-kent/goose v0.0.0-20141221090059-c3541ea826ad h1:5UZIY1lPvsBrRQRg github.com/ian-kent/goose v0.0.0-20141221090059-c3541ea826ad/go.mod h1:VHyJj0/IJFmpYvVqWFIN2HgjCatXujj7XaLLyOMC23M= github.com/ian-kent/linkio v0.0.0-20170807205755-97566b872887 h1:LPaZmcRJS13h+igi07S26uKy0qxCa76u1+pArD+JGrY= github.com/ian-kent/linkio v0.0.0-20170807205755-97566b872887/go.mod h1:aE63iKqF9rMrshaEiYZroUYFZLaYoTuA7pBMsg3lJoY= -github.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= @@ -445,7 +429,6 @@ github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= @@ -466,21 +449,21 @@ github.com/kortschak/utter v1.0.1/go.mod h1:vSmSjbyrlKjjsL71193LmzBOKgwePk9DH6uF github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kyleconroy/sqlc v1.13.0 h1:fhfOZbfuF7ELo5uMB8XkgZJRTnc1YQYtzhyLgp7PzFM= +github.com/kyleconroy/sqlc v1.13.0/go.mod h1:vO++DmIYZfpVDnR8sgiYkjGB/22E8R7h66fykOIzhI8= github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.1.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.10.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= -github.com/lib/pq v1.10.2 h1:AqzbZs4ZoCBp+GtejcpCpcxM3zlSMx29dXbUSeVtJb8= github.com/lib/pq v1.10.2/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/lib/pq v1.10.4 h1:SO9z7FRPzA03QhHKJrH5BXA6HU1rS4V2nIVrrNC1iYk= github.com/logrusorgru/aurora/v3 v3.0.0/go.mod h1:vsR12bk5grlLvLXAYrBsb5Oc/N+LxAlxggSjiwMnCUc= -github.com/lyft/protoc-gen-star v0.5.3/go.mod h1:V0xaHgaf5oCCqmcxYcWiDfTiKsZsRc87/1qhoTACD8w= github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaWak/Gls= github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= github.com/mailhog/MailHog v1.0.1 h1:NDExFIj+JGzXT3kmG31r7Okrn78Sk/5p9lP/TV8OE4E= @@ -516,8 +499,6 @@ github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNx github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= -github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= -github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= @@ -531,15 +512,11 @@ github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= -github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= -github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= github.com/mitchellh/cli v1.1.2/go.mod h1:6iaV0fGdElS6dPBx0EApTxHrcWvmJphyh2n8YBLPPZ4= github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ= github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= @@ -564,12 +541,23 @@ github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXW github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM= github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= +github.com/pganalyze/pg_query_go/v2 v2.1.0 h1:donwPZ4G/X+kMs7j5eYtKjdziqyOLVp3pkUrzb9lDl8= +github.com/pganalyze/pg_query_go/v2 v2.1.0/go.mod h1:XAxmVqz1tEGqizcQ3YSdN90vCOHBWjJi8URL1er5+cA= github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ= github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= +github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8 h1:USx2/E1bX46VG32FIw034Au6seQ2fY9NEILmNh/UlQg= +github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8/go.mod h1:B1+S9LNcuMyLH/4HMTViQOJevkGiik3wW2AN9zb2fNQ= +github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63 h1:+FZIDR/D97YOPik4N4lPDaUcLDF/EQPogxtlHB2ZZRM= +github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= +github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= +github.com/pingcap/log v0.0.0-20210906054005-afc726e70354 h1:SvWCbCPh1YeHd9yQLksvJYAgft6wLTY1aNG81tpyscQ= +github.com/pingcap/log v0.0.0-20210906054005-afc726e70354/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= +github.com/pingcap/parser v0.0.0-20210914110036-002913dd28ec h1:tUcualrzARkmDCM4OGT27cEnjDyAN1MW5AoZqmTCITA= +github.com/pingcap/parser v0.0.0-20210914110036-002913dd28ec/go.mod h1:+xcMiiZzdIktT/Nqdfm81dkECJ2EPuoAYywd57py4Pk= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -580,13 +568,11 @@ github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qR github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= -github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s= github.com/poy/onpar v0.0.0-20190519213022-ee068f8ea4d1/go.mod h1:nSbFQvMj97ZyhFRSJYtut+msi4sOY6zJDGCdSc+/rZU= github.com/pquerna/cachecontrol v0.1.0 h1:yJMy84ti9h/+OEWa752kBTKv4XC30OtVVHYv/8cTqKc= github.com/pquerna/cachecontrol v0.1.0/go.mod h1:NrUG3Z7Rdu85UNR3vm7SOsl1nFIeSiQnrHV5K9mBcUI= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= github.com/prometheus/client_golang v1.12.1 h1:ZiaPsmm9uiBeaSMRznKsCDNtPCS0T3JVDGF+06gjBzk= @@ -597,18 +583,17 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1: github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.32.1 h1:hWIdL3N2HoUx3B8j3YN9mWor0qhY/NlEKZEaXxuIRh4= github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0VU= github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= @@ -624,7 +609,6 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= @@ -648,7 +632,6 @@ github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9 github.com/smartystreets/goconvey v1.7.2 h1:9RBaZCeXEQ3UselpuwUQHltGVXvdwm6cv1hgR6gDIPg= github.com/smartystreets/goconvey v1.7.2/go.mod h1:Vw0tHAZW6lzCRk3xgdin6fKYcG+G3Pg9vgXWeJpQFMM= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4= github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= github.com/spf13/afero v1.7.0 h1:xc1yh8vgcNB8yQ+UqY4cpD56Ogo573e+CJ/C4YmMFTg= github.com/spf13/afero v1.7.0/go.mod h1:CtAatgMJh6bJEIs48Ay/FOnkljP3WeGUG0MC1RfAqwo= @@ -656,14 +639,13 @@ github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkU github.com/spf13/cast v1.4.1 h1:s0hze+J0196ZfEMTs80N7UlFt0BDuQ7Q+JDnHiMWKdA= github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk= -github.com/spf13/cobra v1.3.0 h1:R7cSvGu+Vv+qX0gW5R/85dx2kmmJT5z5NM8ifdYjdn0= -github.com/spf13/cobra v1.3.0/go.mod h1:BrRVncBjOJa/eUcVVm9CE+oC6as8k+VYr4NY7WCi9V4= +github.com/spf13/cobra v1.4.0 h1:y+wJpx64xcgO1V+RcnwW0LEHxTKRi2ZDPSBjWnrg88Q= +github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.8.1/go.mod h1:o0Pch8wJ9BVSWGQMbra6iw0oQ5oktSIBaujf1rJH9Ns= -github.com/spf13/viper v1.10.0/go.mod h1:SoyBPwAtKDzypXNDFKN5kzH7ppppbGZtls1UpIy5AsM= github.com/spf13/viper v1.10.1 h1:nuJZuYpG7gTj/XqiUwg8bA0cp1+M2mC3J4g5luUYBKk= github.com/spf13/viper v1.10.1/go.mod h1:IGlFPqhNAPKRxohIzWpI5QEy4kuI7tcl5WvR+8qy1rU= github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo= @@ -689,7 +671,6 @@ github.com/ttacon/builder v0.0.0-20170518171403-c099f663e1c2/go.mod h1:4kyMkleCi github.com/ttacon/chalk v0.0.0-20160626202418-22c06c80ed31/go.mod h1:onvgF043R+lC5RZ8IT9rBXDaEDnpnw/Cl+HFiw+v/7Q= github.com/ttacon/libphonenumber v1.2.1 h1:fzOfY5zUADkCkbIafAed11gL1sW+bJ26p6zWLBMElR4= github.com/ttacon/libphonenumber v1.2.1/go.mod h1:E0TpmdVMq5dyVlQ7oenAkhsLu86OkUl+yR4OAxyEg/M= -github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/uber/jaeger-client-go v2.25.0+incompatible h1:IxcNZ7WRY1Y3G4poYlx24szfsn/3LvK9QHCq9oQw8+U= github.com/uber/jaeger-client-go v2.25.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M= @@ -710,11 +691,8 @@ github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1 github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q= github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0= go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= -go.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= -go.etcd.io/etcd/client/pkg/v3 v3.5.1/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ= -go.etcd.io/etcd/client/v2 v2.305.1/go.mod h1:pMEacxZW7o8pg4CrFE7pquyCJJzZvkvdD2RibOCCCGs= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= @@ -729,15 +707,26 @@ go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= +go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= +go.uber.org/goleak v1.1.11-0.20210813005559-691160354723 h1:sHOAIxRGBp443oHZIPB+HsUGaksVCXVQENPxwTfQdH4= +go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/multierr v1.7.0 h1:zaiO/rmgFjbmCXdSYJWQcdvOCsthmdaHfr3Gm2Kx4Ec= +go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= +go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= +go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= +go.uber.org/zap v1.19.1 h1:ue41HOKd1vGURxrmeKIgELGb3jPW9DMUDGtsinblHwI= +go.uber.org/zap v1.19.1/go.mod h1:j3DNczoxDZroyBnOT1L/Q79cfUMGZxlv/9dzN7SM1rI= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029175232-7e6ffbd03851/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -747,7 +736,6 @@ golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a/go.mod h1:WFFai1msRO1wXaE golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -755,7 +743,6 @@ golang.org/x/crypto v0.0.0-20201203163018-be400aefbc4c/go.mod h1:jdWPYTVW3xRLrWP golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220213190939-1e6e3497d506 h1:EuGTJDfeg/PGZJp3gq1K+14eSLFTsrj1eg8KQuiUyKg= @@ -797,7 +784,6 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 h1:kQgndtyPBW/JIYERgdxfwMYh3AVStj88WQTlNDi2a+o= golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -818,7 +804,6 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -842,11 +827,9 @@ golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8= golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd h1:O7DYs+zxREGLKzKoMQrtrEacpb0ZVXA5rIwylE2Xchk= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= @@ -899,11 +882,8 @@ golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -911,7 +891,6 @@ golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -934,7 +913,6 @@ golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -951,14 +929,12 @@ golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210816183151-1e6c022a8912/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210917161153-d61c044b1678/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211210111614-af8b64212486/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -998,11 +974,11 @@ golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgw golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190823170909-c4a336ef6a2f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -1089,7 +1065,6 @@ google.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdr google.golang.org/api v0.58.0/go.mod h1:cAbP2FsxoGVNwtgNAmmn3y5G1TWAiVYRmg4yku3lv+E= google.golang.org/api v0.59.0/go.mod h1:sT2boj7M9YJxZzgeZqXogmhfmRWDtPzT31xkieUbuZU= google.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I= -google.golang.org/api v0.62.0/go.mod h1:dKmwPCydfsad4qCH08MSdgWjfHOyfpd4VtDGgRFdavw= google.golang.org/api v0.63.0/go.mod h1:gs4ij2ffTRXwuzzgJl/56BdwJaA194ijkfn++9tDuPo= google.golang.org/api v0.64.0 h1:l3pi8ncrQgB9+ncFw3A716L8lWujnXniBYbxWqqy6tE= google.golang.org/api v0.64.0/go.mod h1:931CdxA8Rm4t6zqTFGSsgwbAEZ2+GMYurbndwSimebM= @@ -1164,10 +1139,7 @@ google.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ6 google.golang.org/genproto v0.0.0-20211008145708-270636b82663/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211016002631-37fc39342514/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211018162055-cf77aa76bad2/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211028162531-8db9c33dc351/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211129164237-f09f9a12af12/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211203200212-54befc351ae9/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211206160659-862468c7d6e0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211221195035-429b39de9b1c/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= @@ -1201,7 +1173,6 @@ google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnD google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.44.0 h1:weqSxi/TMs1SqFRMHCtBgXRs8k3X39QIDEZ0pRcttUg= google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= @@ -1219,8 +1190,9 @@ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGj google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw= +google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc h1:2gGKlE2+asNV9m7xrywl36YYNnBG5ZQ0r/BOOxqPpmk= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk= @@ -1238,6 +1210,8 @@ gopkg.in/ini.v1 v1.66.2 h1:XfR1dOYubytKy4Shzc2LHrrGhU0lDCfDGG1yLPmpgsI= gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 h1:VpOs+IwYnYBaFnrNAeB8UUWtL3vEUnzSCL1nVjPhqrw= gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= +gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= +gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/square/go-jose.v2 v2.5.1 h1:7odma5RETjNHWJnR32wx8t+Io4djHE1PqxCFx3iiZ2w= gopkg.in/square/go-jose.v2 v2.5.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= From a319a7153874aaa7b338030e4f02afdd27167af1 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Fri, 3 Jun 2022 11:05:12 -0500 Subject: [PATCH 105/225] use sqlc --- devtools/tools.go | 1 + graphql2/graphqlapp/swo.go | 4 +- pkg/sysapi/sysapi.pb.go | 2 +- sqlc.yaml | 13 + swo/changelog.go | 12 +- swo/{changelog_trigger.sql => changelog.sql} | 12 + swo/changelog_table.sql | 11 - swo/execute.go | 34 +-- swo/pgtables.sql | 87 ++++++ swo/preflightlocks.go | 38 +-- swo/queries.sql | 87 ++++++ swo/scantables.go | 68 ++--- swo/scantables_column_list.sql | 11 - swo/scantables_fkey_refs.sql | 9 - swo/swodb/db.go | 32 +++ swo/swodb/models.go | 69 +++++ swo/swodb/queries.sql.go | 283 +++++++++++++++++++ swo/swogrp/group.go | 54 ++-- swo/swogrp/set.go | 45 +++ swo/swomsg/log.go | 34 +-- swo/syncchanges.go | 29 +- swo/table.go | 6 +- swo/waitforactivetx.go | 8 +- 23 files changed, 734 insertions(+), 215 deletions(-) create mode 100644 sqlc.yaml rename swo/{changelog_trigger.sql => changelog.sql} (71%) delete mode 100644 swo/changelog_table.sql create mode 100644 swo/pgtables.sql create mode 100644 swo/queries.sql delete mode 100644 swo/scantables_column_list.sql delete mode 100644 swo/scantables_fkey_refs.sql create mode 100644 swo/swodb/db.go create mode 100644 swo/swodb/models.go create mode 100644 swo/swodb/queries.sql.go create mode 100644 swo/swogrp/set.go diff --git a/devtools/tools.go b/devtools/tools.go index b5e650ea7e..8fcff9f4d3 100644 --- a/devtools/tools.go +++ b/devtools/tools.go @@ -5,6 +5,7 @@ package devtools import ( _ "github.com/gordonklaus/ineffassign" + _ "github.com/kyleconroy/sqlc/cmd/sqlc" _ "github.com/mailhog/MailHog" _ "golang.org/x/tools/cmd/goimports" _ "golang.org/x/tools/cmd/stringer" diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index ce87ac387d..d5efc9d63c 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -69,8 +69,8 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { nodes = append(nodes, graphql2.SWONode{ ID: n.ID.String(), - OldValid: n.OldDBValid, - NewValid: n.NewDBValid, + OldValid: n.OldDBValid(), + NewValid: n.NewDBValid(), IsLeader: n.IsLeader, CanExec: n.CanExec, Status: strings.Join(tasks, ","), diff --git a/pkg/sysapi/sysapi.pb.go b/pkg/sysapi/sysapi.pb.go index 4d76cc1966..2e9a44a5fe 100644 --- a/pkg/sysapi/sysapi.pb.go +++ b/pkg/sysapi/sysapi.pb.go @@ -1,6 +1,6 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.27.1 +// protoc-gen-go v1.28.0 // protoc v3.19.4 // source: pkg/sysapi/sysapi.proto diff --git a/sqlc.yaml b/sqlc.yaml new file mode 100644 index 0000000000..37c73c3595 --- /dev/null +++ b/sqlc.yaml @@ -0,0 +1,13 @@ +version: '2' +sql: + - schema: + - swo/pgtables.sql + - swo/changelog.sql + - migrate/migrations/20180816094955-switchover-state.sql + - migrate/migrations/20220405163538-switchover-mk2.sql + queries: [swo/queries.sql] + engine: postgresql + gen: + go: + out: swo/swodb + sql_package: pgx/v4 diff --git a/swo/changelog.go b/swo/changelog.go index 8fefbbbc8a..adba316e99 100644 --- a/swo/changelog.go +++ b/swo/changelog.go @@ -8,13 +8,8 @@ import ( "github.com/target/goalert/swo/swogrp" ) -var ( - //go:embed changelog_table.sql - changelogTable string - - //go:embed changelog_trigger.sql - changelogTrigger string -) +//go:embed changelog.sql +var changelogQuery string func (e *Execute) exec(ctx context.Context, conn pgxQueryer, query string) { if e.err != nil { @@ -42,8 +37,7 @@ func (e *Execute) EnableChangeLog(ctx context.Context) { } swogrp.Progressf(ctx, "enabling change log") - e.exec(ctx, e.mainDBConn, changelogTable) - e.exec(ctx, e.mainDBConn, changelogTrigger) + e.exec(ctx, e.mainDBConn, changelogQuery) // create triggers for all tables for _, table := range e.tables { diff --git a/swo/changelog_trigger.sql b/swo/changelog.sql similarity index 71% rename from swo/changelog_trigger.sql rename to swo/changelog.sql index 707b38cb08..641523e594 100644 --- a/swo/changelog_trigger.sql +++ b/swo/changelog.sql @@ -1,3 +1,15 @@ +CREATE UNLOGGED TABLE change_log ( + id BIGSERIAL PRIMARY KEY, + table_name TEXT NOT NULL, + row_id TEXT NOT NULL +); + +ALTER TABLE change_log +SET ( + autovacuum_enabled = FALSE, + toast.autovacuum_enabled = FALSE + ); + CREATE OR REPLACE FUNCTION fn_process_change_log() RETURNS TRIGGER AS $$ DECLARE cur_state enum_switchover_state := 'idle'; diff --git a/swo/changelog_table.sql b/swo/changelog_table.sql deleted file mode 100644 index fd62607e0b..0000000000 --- a/swo/changelog_table.sql +++ /dev/null @@ -1,11 +0,0 @@ -CREATE UNLOGGED TABLE change_log ( - id BIGSERIAL PRIMARY KEY, - table_name TEXT NOT NULL, - row_id TEXT NOT NULL -); - -ALTER TABLE change_log -SET ( - autovacuum_enabled = FALSE, - toast.autovacuum_enabled = FALSE - ) diff --git a/swo/execute.go b/swo/execute.go index 739cb0fbf2..e87ddafc8a 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -9,7 +9,7 @@ import ( "github.com/jackc/pgconn" "github.com/jackc/pgx/v4" - "github.com/target/goalert/lock" + "github.com/target/goalert/swo/swodb" "github.com/target/goalert/swo/swogrp" "github.com/target/goalert/util/sqlutil" ) @@ -36,22 +36,7 @@ func NewExecute(ctx context.Context, mainDBConn, nextDBConn *pgx.Conn, grp *swog return nil, fmt.Errorf("scan tables: %w", err) } - var seqNames []string - var name string - _, err = mainDBConn.QueryFunc(ctx, ` - select sequence_name - from information_schema.sequences - where - sequence_catalog = current_database() and - sequence_schema = 'public' - `, nil, []interface{}{&name}, func(r pgx.QueryFuncRow) error { - if name == "change_log_id_seq" { - // skip, as it does not exist in next db - return nil - } - seqNames = append(seqNames, name) - return nil - }) + seqNames, err := swodb.New(mainDBConn).SequenceNames(ctx) if err != nil { return nil, fmt.Errorf("scan sequences: %w", err) } @@ -165,26 +150,25 @@ func (e *Execute) enableTriggers(ctx context.Context) error { // is set to in_progress. func (e *Execute) stopTheWorld(ctx context.Context, srcTx pgx.Tx) error { e.Progressf(ctx, "stop-the-world") - _, err := srcTx.Exec(ctx, fmt.Sprintf("select pg_advisory_xact_lock(%d)", lock.GlobalSwitchOver)) + err := swodb.New(srcTx).GlobalSwitchoverTxExclusiveConnLock(ctx) if err != nil { return err } - var stat string - err = srcTx.QueryRow(ctx, "select current_state from switchover_state nowait").Scan(&stat) + stat, err := swodb.New(srcTx).CurrentSwitchoverStateNoWait(ctx) if err != nil { return err } switch stat { - case "in_progress": + case swodb.EnumSwitchoverStateInProgress: return nil - case "use_next_db": + case swodb.EnumSwitchoverStateUseNextDb: return swogrp.ErrDone - case "idle": + case swodb.EnumSwitchoverStateIdle: return errors.New("not in progress") default: if e.err == nil { - return errors.New("unknown state: " + stat) + return errors.New("unknown state: " + string(stat)) } return e.err } @@ -280,7 +264,7 @@ func (e *Execute) syncTx(ctx context.Context, readOnly bool) (src, dst pgx.Tx, e } func (t Table) IDs(ids []string) interface{} { - switch t.IDCol.Type { + switch t.IDCol.DataType { case "integer", "bigint": return sqlutil.IntArray(intIDs(ids)) case "uuid": diff --git a/swo/pgtables.sql b/swo/pgtables.sql new file mode 100644 index 0000000000..f26929e971 --- /dev/null +++ b/swo/pgtables.sql @@ -0,0 +1,87 @@ +-- pg_catalog tables used by SWO +CREATE TABLE pg_catalog.pg_namespace ( + oid oid NOT NULL, + nspname NAME NOT NULL, + nspowner oid NOT NULL, + nspacl aclitem [ ] +); + +CREATE TABLE pg_catalog.pg_class ( + oid oid NOT NULL, + relname NAME NOT NULL, + relnamespace oid NOT NULL, + reltype oid NOT NULL, + reloftype oid NOT NULL, + relowner oid NOT NULL, + relam oid NOT NULL, + relfilenode oid NOT NULL, + reltablespace oid NOT NULL, + relpages INTEGER NOT NULL, + reltuples REAL NOT NULL, + relallvisible INTEGER NOT NULL, + reltoastrelid oid NOT NULL, + relhasindex BOOLEAN NOT NULL, + relisshared BOOLEAN NOT NULL, + relpersistence "char" NOT NULL, + relkind "char" NOT NULL, + relnatts SMALLINT NOT NULL, + relchecks SMALLINT NOT NULL, + relhasrules BOOLEAN NOT NULL, + relhastriggers BOOLEAN NOT NULL, + relhassubclass BOOLEAN NOT NULL, + relrowsecurity BOOLEAN NOT NULL, + relforcerowsecurity BOOLEAN NOT NULL, + relispopulated BOOLEAN NOT NULL, + relreplident "char" NOT NULL, + relispartition BOOLEAN NOT NULL, + relrewrite oid NOT NULL, + relfrozenxid xid NOT NULL, + relminmxid xid NOT NULL, + relacl aclitem [ ], + reloptions text [ ] COLLATE pg_catalog. "C", + relpartbound pg_node_tree COLLATE pg_catalog. "C" +); + +CREATE TABLE pg_catalog.pg_constraint ( + oid oid NOT NULL, + conname NAME NOT NULL, + connamespace oid NOT NULL, + contype "char" NOT NULL, + condeferrable BOOLEAN NOT NULL, + condeferred BOOLEAN NOT NULL, + convalidated BOOLEAN NOT NULL, + conrelid oid NOT NULL, + contypid oid NOT NULL, + conindid oid NOT NULL, + conparentid oid NOT NULL, + confrelid oid NOT NULL, + confupdtype "char" NOT NULL, + confdeltype "char" NOT NULL, + confmatchtype "char" NOT NULL, + conislocal BOOLEAN NOT NULL, + coninhcount INTEGER NOT NULL, + connoinherit BOOLEAN NOT NULL, + conkey SMALLINT [ ], + confkey SMALLINT [ ], + conpfeqop oid [ ], + conppeqop oid [ ], + conffeqop oid [ ], + conexclop oid [ ], + conbin pg_node_tree COLLATE pg_catalog. "C" +); + +-- just for type info +CREATE TABLE pg_stat_activity (state TEXT, XACT_START timestamptz NOT NULL); + +CREATE SCHEMA information_schema; + +CREATE TABLE information_schema.columns ( + table_name TEXT NOT NULL, + column_name TEXT NOT NULL, + data_type TEXT NOT NULL, + ordinal_position INTEGER NOT NULL +); + +CREATE TABLE information_schema.tables (); + +CREATE TABLE information_schema.sequences (sequence_name TEXT NOT NULL); diff --git a/swo/preflightlocks.go b/swo/preflightlocks.go index 82dcd6e792..0e5e2732f2 100644 --- a/swo/preflightlocks.go +++ b/swo/preflightlocks.go @@ -2,13 +2,12 @@ package swo import ( "context" - "database/sql/driver" "errors" "fmt" "github.com/jackc/pgx/v4" "github.com/jackc/pgx/v4/stdlib" - "github.com/target/goalert/lock" + "github.com/target/goalert/swo/swodb" "github.com/target/goalert/swo/swogrp" ) @@ -22,12 +21,7 @@ var ErrNoLock = errors.New("no lock") // This lock should be acquired by an engine instance that is going to perform // the sync & switchover. func SwitchOverExecLock(ctx context.Context, conn *pgx.Conn) error { - var gotLock bool - err := conn.QueryRow(ctx, ` - select pg_try_advisory_lock($1) - from switchover_state - where current_state != 'use_next_db' - `, lock.GlobalSwitchOverExec).Scan(&gotLock) + gotLock, err := swodb.New(conn).GlobalSwitchoverExecLock(ctx) if errors.Is(err, pgx.ErrNoRows) { return swogrp.ErrDone } @@ -44,7 +38,7 @@ func SwitchOverExecLock(ctx context.Context, conn *pgx.Conn) error { // UnlockConn will release all session locks or close the connection. func UnlockConn(ctx context.Context, conn *pgx.Conn) { - _, err := conn.Exec(ctx, `select pg_advisory_unlock_all()`) + err := swodb.New(conn).UnlockAll(ctx) if err != nil { conn.Close(ctx) } @@ -56,37 +50,17 @@ func SessionLock(ctx context.Context, c *stdlib.Conn) error { // // This will run for every new connection in SWO mode and for every // query while idle connections are disabled during critical phase. - _, err := c.ExecContext(ctx, fmt.Sprintf("select pg_advisory_lock_shared(%d)", lock.GlobalSwitchOver), nil) + err := swodb.New(c.Conn()).GlobalSwitchoverSharedConnLock(ctx) if err != nil { return fmt.Errorf("get SWO shared session lock: %w", err) } - rows, err := c.QueryContext(ctx, "select current_state from switchover_state", nil) + state, err := swodb.New(c.Conn()).CurrentSwitchoverState(ctx) if err != nil { return fmt.Errorf("get current SWO state: %w", err) } - scan := make([]driver.Value, 1) - err = rows.Next(scan) - if err != nil { - return err - } - - var state string - switch t := scan[0].(type) { - case string: - state = t - case []byte: - state = string(t) - default: - return fmt.Errorf("get current SWO state: expected string for current_state value, got %t", t) - } - err = rows.Close() - if err != nil { - return err - } - - if state == "use_next_db" { + if state == swodb.EnumSwitchoverStateUseNextDb { return swogrp.ErrDone } diff --git a/swo/queries.sql b/swo/queries.sql new file mode 100644 index 0000000000..a3d23d266d --- /dev/null +++ b/swo/queries.sql @@ -0,0 +1,87 @@ +-- name: ForeignKeys :many +SELECT src.relname::text, + dst.relname::text +FROM pg_catalog.pg_constraint con + JOIN pg_catalog.pg_namespace ns ON ns.nspname = 'public' + AND ns.oid = con.connamespace + JOIN pg_catalog.pg_class src ON src.oid = con.conrelid + JOIN pg_catalog.pg_class dst ON dst.oid = con.confrelid +WHERE con.contype = 'f' + AND NOT con.condeferrable; + +-- name: Changes :many +SELECT id, + table_name, + row_id +FROM change_log; + +-- name: DeleteChanges :exec +DELETE FROM change_log +WHERE id = ANY($1); + +-- name: CurrentTime :one +SELECT now()::timestamptz; + +-- name: ActiveTxCount :one +SELECT COUNT(*) +FROM pg_stat_activity +WHERE "state" <> 'idle' + AND "xact_start" <= $1; + +-- name: GlobalSwitchoverSharedConnLock :exec +SELECT pg_advisory_lock_shared(4369); + +-- name: GlobalSwitchoverTxExclusiveConnLock :exec +SELECT pg_advisory_xact_lock(4369); + +-- name: GlobalSwitchoverExecLock :one +SELECT pg_try_advisory_lock(4370) +FROM switchover_state +WHERE current_state != 'use_next_db'; + +-- name: TableColumns :many +SELECT col.table_name, + col.column_name, + col.data_type, + col.ordinal_position +FROM information_schema.columns col + JOIN information_schema.tables t ON t.table_catalog = col.table_catalog + AND t.table_schema = col.table_schema + AND t.table_name = col.table_name + AND t.table_type = 'BASE TABLE' +WHERE col.table_catalog = current_database() + AND col.table_schema = 'public'; + +-- name: SetIdleTimeout :exec +SET idle_in_transaction_session_timeout = 3000; + +-- name: CurrentSwitchoverState :one +SELECT current_state +FROM switchover_state; + +-- name: CurrentSwitchoverStateNoWait :one +SELECT current_state +FROM switchover_state NOWAIT; + +-- name: UnlockAll :exec +SELECT pg_advisory_unlock_all(); + +-- name: LogEvents :many +SELECT id, + TIMESTAMP, + DATA +FROM switchover_log +WHERE id > $1 +ORDER BY id ASC +LIMIT 100; + +-- name: LastLogID :one +SELECT COALESCE(MAX(id), 0)::bigint +FROM switchover_log; + +-- name: SequenceNames :many +SELECT sequence_name +FROM information_schema.sequences +WHERE sequence_catalog = current_database() + AND sequence_schema = 'public' + AND sequence_name != 'change_log_id_seq'; diff --git a/swo/scantables.go b/swo/scantables.go index 41407f0275..47ebcbfea3 100644 --- a/swo/scantables.go +++ b/swo/scantables.go @@ -7,67 +7,45 @@ import ( "sort" "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swodb" "github.com/target/goalert/swo/swogrp" ) -type Column struct { - Name string - Type string - Ord int -} - -var ( - //go:embed scantables_column_list.sql - columnListQuery string - - //go:embed scantables_fkey_refs.sql - fkeyRefsQuery string -) +type Column swodb.InformationSchemaColumn // ScanTables scans the database for tables, their columns, and dependencies. func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { swogrp.Progressf(ctx, "scanning tables...") - var cRow struct { - TableName string - Column + columns, err := swodb.New(conn).TableColumns(ctx) + if err != nil { + return nil, fmt.Errorf("scan table columns: %w", err) } tables := make(map[string]*Table) - _, err := conn.QueryFunc(ctx, columnListQuery, nil, - []interface{}{&cRow.TableName, &cRow.Column.Name, &cRow.Column.Type, &cRow.Column.Ord}, - func(pgx.QueryFuncRow) error { - if tables[cRow.TableName] == nil { - tables[cRow.TableName] = &Table{Name: cRow.TableName, deps: make(map[string]struct{})} - } - tables[cRow.TableName].Columns = append(tables[cRow.TableName].Columns, cRow.Column) - if cRow.Column.Name == "id" { - tables[cRow.TableName].IDCol = cRow.Column - } - return nil - }) - if err != nil { - return nil, fmt.Errorf("scanning table columns: %w", err) - } + for _, cRow := range columns { + if tables[cRow.TableName] == nil { + tables[cRow.TableName] = &Table{Name: cRow.TableName, deps: make(map[string]struct{})} + } - var fRow struct { - SrcName string - DstName string + tables[cRow.TableName].Columns = append(tables[cRow.TableName].Columns, Column(cRow)) + if cRow.ColumnName == "id" { + tables[cRow.TableName].IDCol = Column(cRow) + } } - _, err = conn.QueryFunc(ctx, fkeyRefsQuery, nil, []interface{}{&fRow.SrcName, &fRow.DstName}, - func(pgx.QueryFuncRow) error { - tables[fRow.SrcName].deps[fRow.DstName] = struct{}{} - return nil - }) + refs, err := swodb.New(conn).ForeignKeys(ctx) if err != nil { - return nil, err + return nil, fmt.Errorf("scan foreign keys: %w", err) + } + for _, fRow := range refs { + tables[fRow.SrcRelname].deps[fRow.DstRelname] = struct{}{} } var tableList []*Table for _, t := range tables { sort.Slice(t.Columns, func(i, j int) bool { - return t.Columns[i].Ord < t.Columns[j].Ord + return t.Columns[i].OrdinalPosition < t.Columns[j].OrdinalPosition }) tableList = append(tableList, t) } @@ -109,11 +87,3 @@ func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { return result, nil } - -func (c Column) IsInteger() bool { - switch c.Type { - case "integer", "bigint": - return true - } - return false -} diff --git a/swo/scantables_column_list.sql b/swo/scantables_column_list.sql deleted file mode 100644 index e5cfa8fa21..0000000000 --- a/swo/scantables_column_list.sql +++ /dev/null @@ -1,11 +0,0 @@ -SELECT col.table_name, - col.column_name, - col.data_type, - col.ordinal_position -FROM information_schema.columns col - JOIN information_schema.tables t ON t.table_catalog = col.table_catalog - AND t.table_schema = col.table_schema - AND t.table_name = col.table_name - AND t.table_type = 'BASE TABLE' -WHERE col.table_catalog = current_database() - AND col.table_schema = 'public' diff --git a/swo/scantables_fkey_refs.sql b/swo/scantables_fkey_refs.sql deleted file mode 100644 index 4bb0bef65b..0000000000 --- a/swo/scantables_fkey_refs.sql +++ /dev/null @@ -1,9 +0,0 @@ -SELECT src.relname, - dst.relname -FROM pg_catalog.pg_constraint con - JOIN pg_namespace ns ON ns.nspname = 'public' - AND ns.oid = con.connamespace - JOIN pg_class src ON src.oid = con.conrelid - JOIN pg_class dst ON dst.oid = con.confrelid -WHERE con.contype = 'f' - AND NOT con.condeferrable diff --git a/swo/swodb/db.go b/swo/swodb/db.go new file mode 100644 index 0000000000..74d516e253 --- /dev/null +++ b/swo/swodb/db.go @@ -0,0 +1,32 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.13.0 + +package swodb + +import ( + "context" + + "github.com/jackc/pgconn" + "github.com/jackc/pgx/v4" +) + +type DBTX interface { + Exec(context.Context, string, ...interface{}) (pgconn.CommandTag, error) + Query(context.Context, string, ...interface{}) (pgx.Rows, error) + QueryRow(context.Context, string, ...interface{}) pgx.Row +} + +func New(db DBTX) *Queries { + return &Queries{db: db} +} + +type Queries struct { + db DBTX +} + +func (q *Queries) WithTx(tx pgx.Tx) *Queries { + return &Queries{ + db: tx, + } +} diff --git a/swo/swodb/models.go b/swo/swodb/models.go new file mode 100644 index 0000000000..7fcf18645f --- /dev/null +++ b/swo/swodb/models.go @@ -0,0 +1,69 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.13.0 + +package swodb + +import ( + "database/sql" + "fmt" + "time" + + "github.com/jackc/pgtype" +) + +type EnumSwitchoverState string + +const ( + EnumSwitchoverStateIdle EnumSwitchoverState = "idle" + EnumSwitchoverStateInProgress EnumSwitchoverState = "in_progress" + EnumSwitchoverStateUseNextDb EnumSwitchoverState = "use_next_db" +) + +func (e *EnumSwitchoverState) Scan(src interface{}) error { + switch s := src.(type) { + case []byte: + *e = EnumSwitchoverState(s) + case string: + *e = EnumSwitchoverState(s) + default: + return fmt.Errorf("unsupported scan type for EnumSwitchoverState: %T", src) + } + return nil +} + +type ChangeLog struct { + ID int64 + TableName string + RowID string +} + +type InformationSchemaColumn struct { + TableName string + ColumnName string + DataType string + OrdinalPosition int32 +} + +type InformationSchemaSequence struct { + SequenceName string +} + +type InformationSchemaTable struct { +} + +type PgStatActivity struct { + State sql.NullString + XactStart time.Time +} + +type SwitchoverLog struct { + ID int64 + Timestamp time.Time + Data pgtype.JSONB +} + +type SwitchoverState struct { + Ok bool + CurrentState EnumSwitchoverState +} diff --git a/swo/swodb/queries.sql.go b/swo/swodb/queries.sql.go new file mode 100644 index 0000000000..875d317470 --- /dev/null +++ b/swo/swodb/queries.sql.go @@ -0,0 +1,283 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.13.0 +// source: queries.sql + +package swodb + +import ( + "context" + "time" +) + +const activeTxCount = `-- name: ActiveTxCount :one +SELECT COUNT(*) +FROM pg_stat_activity +WHERE "state" <> 'idle' + AND "xact_start" <= $1 +` + +func (q *Queries) ActiveTxCount(ctx context.Context, xactStart time.Time) (int64, error) { + row := q.db.QueryRow(ctx, activeTxCount, xactStart) + var count int64 + err := row.Scan(&count) + return count, err +} + +const changes = `-- name: Changes :many +SELECT id, + table_name, + row_id +FROM change_log +` + +func (q *Queries) Changes(ctx context.Context) ([]ChangeLog, error) { + rows, err := q.db.Query(ctx, changes) + if err != nil { + return nil, err + } + defer rows.Close() + var items []ChangeLog + for rows.Next() { + var i ChangeLog + if err := rows.Scan(&i.ID, &i.TableName, &i.RowID); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const currentSwitchoverState = `-- name: CurrentSwitchoverState :one +SELECT current_state +FROM switchover_state +` + +func (q *Queries) CurrentSwitchoverState(ctx context.Context) (EnumSwitchoverState, error) { + row := q.db.QueryRow(ctx, currentSwitchoverState) + var current_state EnumSwitchoverState + err := row.Scan(¤t_state) + return current_state, err +} + +const currentSwitchoverStateNoWait = `-- name: CurrentSwitchoverStateNoWait :one +SELECT current_state +FROM switchover_state NOWAIT +` + +func (q *Queries) CurrentSwitchoverStateNoWait(ctx context.Context) (EnumSwitchoverState, error) { + row := q.db.QueryRow(ctx, currentSwitchoverStateNoWait) + var current_state EnumSwitchoverState + err := row.Scan(¤t_state) + return current_state, err +} + +const currentTime = `-- name: CurrentTime :one +SELECT now()::timestamptz +` + +func (q *Queries) CurrentTime(ctx context.Context) (time.Time, error) { + row := q.db.QueryRow(ctx, currentTime) + var column_1 time.Time + err := row.Scan(&column_1) + return column_1, err +} + +const deleteChanges = `-- name: DeleteChanges :exec +DELETE FROM change_log +WHERE id = ANY($1) +` + +func (q *Queries) DeleteChanges(ctx context.Context, id int64) error { + _, err := q.db.Exec(ctx, deleteChanges, id) + return err +} + +const foreignKeys = `-- name: ForeignKeys :many +SELECT src.relname::text, + dst.relname::text +FROM pg_catalog.pg_constraint con + JOIN pg_catalog.pg_namespace ns ON ns.nspname = 'public' + AND ns.oid = con.connamespace + JOIN pg_catalog.pg_class src ON src.oid = con.conrelid + JOIN pg_catalog.pg_class dst ON dst.oid = con.confrelid +WHERE con.contype = 'f' + AND NOT con.condeferrable +` + +type ForeignKeysRow struct { + SrcRelname string + DstRelname string +} + +func (q *Queries) ForeignKeys(ctx context.Context) ([]ForeignKeysRow, error) { + rows, err := q.db.Query(ctx, foreignKeys) + if err != nil { + return nil, err + } + defer rows.Close() + var items []ForeignKeysRow + for rows.Next() { + var i ForeignKeysRow + if err := rows.Scan(&i.SrcRelname, &i.DstRelname); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const globalSwitchoverExecLock = `-- name: GlobalSwitchoverExecLock :one +SELECT pg_try_advisory_lock(4370) +FROM switchover_state +WHERE current_state != 'use_next_db' +` + +func (q *Queries) GlobalSwitchoverExecLock(ctx context.Context) (bool, error) { + row := q.db.QueryRow(ctx, globalSwitchoverExecLock) + var pg_try_advisory_lock bool + err := row.Scan(&pg_try_advisory_lock) + return pg_try_advisory_lock, err +} + +const globalSwitchoverSharedConnLock = `-- name: GlobalSwitchoverSharedConnLock :exec +SELECT pg_advisory_lock_shared(4369) +` + +func (q *Queries) GlobalSwitchoverSharedConnLock(ctx context.Context) error { + _, err := q.db.Exec(ctx, globalSwitchoverSharedConnLock) + return err +} + +const globalSwitchoverTxExclusiveConnLock = `-- name: GlobalSwitchoverTxExclusiveConnLock :exec +SELECT pg_advisory_xact_lock(4369) +` + +func (q *Queries) GlobalSwitchoverTxExclusiveConnLock(ctx context.Context) error { + _, err := q.db.Exec(ctx, globalSwitchoverTxExclusiveConnLock) + return err +} + +const lastLogID = `-- name: LastLogID :one +SELECT COALESCE(MAX(id), 0)::bigint +FROM switchover_log +` + +func (q *Queries) LastLogID(ctx context.Context) (int64, error) { + row := q.db.QueryRow(ctx, lastLogID) + var column_1 int64 + err := row.Scan(&column_1) + return column_1, err +} + +const logEvents = `-- name: LogEvents :many +SELECT id, + TIMESTAMP, + DATA +FROM switchover_log +WHERE id > $1 +ORDER BY id ASC +LIMIT 100 +` + +func (q *Queries) LogEvents(ctx context.Context, id int64) ([]SwitchoverLog, error) { + rows, err := q.db.Query(ctx, logEvents, id) + if err != nil { + return nil, err + } + defer rows.Close() + var items []SwitchoverLog + for rows.Next() { + var i SwitchoverLog + if err := rows.Scan(&i.ID, &i.Timestamp, &i.Data); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const sequenceNames = `-- name: SequenceNames :many +SELECT sequence_name +FROM information_schema.sequences +WHERE sequence_catalog = current_database() + AND sequence_schema = 'public' + AND sequence_name != 'change_log_id_seq' +` + +func (q *Queries) SequenceNames(ctx context.Context) ([]string, error) { + rows, err := q.db.Query(ctx, sequenceNames) + if err != nil { + return nil, err + } + defer rows.Close() + var items []string + for rows.Next() { + var sequence_name string + if err := rows.Scan(&sequence_name); err != nil { + return nil, err + } + items = append(items, sequence_name) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const tableColumns = `-- name: TableColumns :many +SELECT col.table_name, + col.column_name, + col.data_type, + col.ordinal_position +FROM information_schema.columns col + JOIN information_schema.tables t ON t.table_catalog = col.table_catalog + AND t.table_schema = col.table_schema + AND t.table_name = col.table_name + AND t.table_type = 'BASE TABLE' +WHERE col.table_catalog = current_database() + AND col.table_schema = 'public' +` + +func (q *Queries) TableColumns(ctx context.Context) ([]InformationSchemaColumn, error) { + rows, err := q.db.Query(ctx, tableColumns) + if err != nil { + return nil, err + } + defer rows.Close() + var items []InformationSchemaColumn + for rows.Next() { + var i InformationSchemaColumn + if err := rows.Scan( + &i.TableName, + &i.ColumnName, + &i.DataType, + &i.OrdinalPosition, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const unlockAll = `-- name: UnlockAll :exec +SELECT pg_advisory_unlock_all() +` + +func (q *Queries) UnlockAll(ctx context.Context) error { + _, err := q.db.Exec(ctx, unlockAll) + return err +} diff --git a/swo/swogrp/group.go b/swo/swogrp/group.go index 6f43c9de25..bf7f75215e 100644 --- a/swo/swogrp/group.go +++ b/swo/swogrp/group.go @@ -48,7 +48,9 @@ type Group struct { leader bool mx sync.Mutex - nextDBNodes map[uuid.UUID]struct{} + nextDBValid *Set + oldDBValid *Set + nodeIDs *Set ackMsgs chan map[uuid.UUID]*ackWait @@ -73,10 +75,11 @@ type TaskInfo struct { type Node struct { ID uuid.UUID - IsLeader bool - CanExec bool - OldDBValid bool - NewDBValid bool + IsLeader bool + CanExec bool + + OldDBValid func() bool + NewDBValid func() bool Tasks []TaskInfo } @@ -89,7 +92,9 @@ func NewGroup(cfg Config) *Group { tasks: make(map[uuid.UUID]TaskInfo), State: stateNeedsReset, ackMsgs: make(chan map[uuid.UUID]*ackWait, 1), - nextDBNodes: make(map[uuid.UUID]struct{}), + nextDBValid: NewSet(), + oldDBValid: NewSet(), + nodeIDs: NewSet(), } g.ackMsgs <- make(map[uuid.UUID]*ackWait) @@ -117,20 +122,26 @@ func cloneTasks(in []TaskInfo) []TaskInfo { } func (g *Group) Status() Status { - g.mx.Lock() - defer g.mx.Unlock() - var nodes []Node + for _, id := range g.nodeIDs.List() { + node := Node{ + ID: id, + } + g.mx.Lock() + if n := g.nodes[id]; n != nil { + node = *n + } + g.mx.Unlock() - for _, n := range g.nodes { - cpy := *n - cpy.Tasks = cloneTasks(n.Tasks) - nodes = append(nodes, cpy) + node.NewDBValid = func() bool { return g.nextDBValid.Has(node.ID) } + node.OldDBValid = func() bool { return g.oldDBValid.Has(node.ID) } + nodes = append(nodes, node) } failed := make([]TaskInfo, len(g.failed)) + g.mx.Lock() + defer g.mx.Unlock() copy(failed, g.failed) - if g.State == stateReset && time.Since(g.resetS) > time.Minute { g.State = stateNeedsReset } @@ -144,14 +155,13 @@ func (g *Group) Status() Status { func (g *Group) loopNextLog() { for msg := range g.NextLog.Events() { + g.nodeIDs.Add(msg.Node) if msg.Type != "hello-next" { // ignore continue } - g.mx.Lock() - g.addNode(msg.Node, false, true, false) - g.mx.Unlock() + g.nextDBValid.Add(msg.Node) } } @@ -239,7 +249,8 @@ func (g *Group) resetState() { // addNode adds a node to the group, returns true if we have become the leader node // after a reset. -func (g *Group) addNode(id uuid.UUID, oldDB, newDB, exec bool) bool { +func (g *Group) addNode(id uuid.UUID, exec bool) bool { + g.oldDBValid.Add(id) if g.State != stateReset { g.State = stateNeedsReset } @@ -248,8 +259,6 @@ func (g *Group) addNode(id uuid.UUID, oldDB, newDB, exec bool) bool { n = &Node{ID: id} g.nodes[id] = n } - n.NewDBValid = n.NewDBValid || newDB - n.OldDBValid = n.OldDBValid || oldDB n.CanExec = n.CanExec || exec var isNewLeader bool @@ -348,6 +357,7 @@ func (g *Group) updateTask(msg swomsg.Message, upsert bool) error { var ErrDone = errors.New("already done") func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { + g.nodeIDs.Add(msg.Node) g.mx.Lock() defer g.mx.Unlock() @@ -357,7 +367,7 @@ func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { switch msg.Type { case "hello-exec": - if g.addNode(msg.Node, true, false, true) { + if g.addNode(msg.Node, true) { // we are the new leader, perform DB reset return g.startTask(ctx, "reset-db", g.ResetFunc) } @@ -368,7 +378,7 @@ func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { case "task-progress": return g.updateTask(msg, true) case "hello": - g.addNode(msg.Node, true, false, false) + g.addNode(msg.Node, false) case "ping": case "done": g.State = stateDone diff --git a/swo/swogrp/set.go b/swo/swogrp/set.go new file mode 100644 index 0000000000..0b8214c796 --- /dev/null +++ b/swo/swogrp/set.go @@ -0,0 +1,45 @@ +package swogrp + +import ( + "sync" + + "github.com/google/uuid" +) + +type Set struct { + m map[uuid.UUID]struct{} + mx sync.Mutex +} + +func NewSet() *Set { + return &Set{ + m: make(map[uuid.UUID]struct{}), + } +} + +func (s *Set) Add(id uuid.UUID) { + s.mx.Lock() + defer s.mx.Unlock() + + s.m[id] = struct{}{} +} + +func (s *Set) Has(id uuid.UUID) bool { + s.mx.Lock() + defer s.mx.Unlock() + + _, ok := s.m[id] + return ok +} + +func (s *Set) List() []uuid.UUID { + s.mx.Lock() + defer s.mx.Unlock() + + ids := make([]uuid.UUID, 0, len(s.m)) + for id := range s.m { + ids = append(ids, id) + } + + return ids +} diff --git a/swo/swomsg/log.go b/swo/swomsg/log.go index 290e0b514c..f36cd113cc 100644 --- a/swo/swomsg/log.go +++ b/swo/swomsg/log.go @@ -4,12 +4,12 @@ import ( "context" "database/sql" "encoding/json" - "errors" "fmt" "time" "github.com/jackc/pgx/v4" "github.com/jackc/pgx/v4/stdlib" + "github.com/target/goalert/swo/swodb" "github.com/target/goalert/util/log" ) @@ -34,9 +34,14 @@ type logEvent struct { } func NewLog(ctx context.Context, db *sql.DB) (*Log, error) { - var lastID int64 + conn, err := stdlib.AcquireConn(db) + if err != nil { + return nil, err + } + defer stdlib.ReleaseConn(db, conn) + // only ever load new events - err := db.QueryRowContext(ctx, "select coalesce(max(id), 0) from switchover_log").Scan(&lastID) + lastID, err := swodb.New(conn).LastLogID(ctx) if err != nil { return nil, err } @@ -62,7 +67,7 @@ func (l *Log) readLoop(ctx context.Context, lastID int64) { for _, e := range events { lastID = e.ID var w Message - err = json.Unmarshal(e.Data, &w) + err = json.Unmarshal(e.Data.Bytes, &w) if err != nil { log.Log(ctx, fmt.Errorf("error parsing event: %v", err)) continue @@ -89,33 +94,20 @@ func ctxSleep(ctx context.Context, d time.Duration) error { } } -func (l *Log) loadEvents(ctx context.Context, lastID int64) ([]logEvent, error) { +func (l *Log) loadEvents(ctx context.Context, lastID int64) ([]swodb.SwitchoverLog, error) { err := ctxSleep(ctx, PollInterval-time.Since(l.lastLoad)) if err != nil { return nil, err } l.lastLoad = time.Now() - rows, err := l.db.QueryContext(ctx, "select id, timestamp, data from switchover_log where id > $1 order by id asc limit 100", lastID) - if errors.Is(err, sql.ErrNoRows) { - return nil, nil - } + conn, err := stdlib.AcquireConn(l.db) if err != nil { return nil, err } - defer rows.Close() - - var events []logEvent - var r logEvent - for rows.Next() { - err := rows.Scan(&r.ID, &r.Timestamp, &r.Data) - if err != nil { - return nil, err - } - events = append(events, r) - } + defer stdlib.ReleaseConn(l.db, conn) - return events, nil + return swodb.New(conn).LogEvents(ctx, lastID) } func (l *Log) Append(ctx context.Context, msg Message) error { diff --git a/swo/syncchanges.go b/swo/syncchanges.go index 811f1dd46b..0ff68170ca 100644 --- a/swo/syncchanges.go +++ b/swo/syncchanges.go @@ -5,32 +5,27 @@ import ( "fmt" "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swodb" ) // syncChanges will apply all changes recorded in the change_log table to the next DB. func (e *Execute) syncChanges(ctx context.Context, srcTx, dstTx pgxQueryer) ([]int, error) { + changeRows, err := swodb.New(srcTx).Changes(ctx) + if err != nil { + return nil, fmt.Errorf("fetch changes: %w", err) + } + type rowID struct { - table string id string + table string } - - var r rowID - var changeIDs []int - var changeID int changes := make(map[rowID]struct{}) rowIDs := make(map[string][]string) - _, err := srcTx.QueryFunc(ctx, "select id, table_name, row_id from change_log", nil, []interface{}{&changeID, &r.table, &r.id}, func(pgx.QueryFuncRow) error { - if _, ok := changes[r]; ok { - return nil - } - changes[r] = struct{}{} - rowIDs[r.table] = append(rowIDs[r.table], r.id) - changeIDs = append(changeIDs, changeID) - - return nil - }) - if err != nil { - return nil, fmt.Errorf("fetch changes: %w", err) + var changeIDs []int + for _, row := range changeRows { + changes[rowID{row.RowID, row.TableName}] = struct{}{} + rowIDs[row.TableName] = append(rowIDs[row.TableName], row.RowID) + changeIDs = append(changeIDs, int(row.ID)) } if len(changes) == 0 { return nil, nil diff --git a/swo/table.go b/swo/table.go index 0eb7857852..7efc0a78df 100644 --- a/swo/table.go +++ b/swo/table.go @@ -44,13 +44,13 @@ func (t Table) QuotedLockTriggerName() string { func (t Table) ColumnNames() []string { colNames := make([]string, len(t.Columns)) for i, col := range t.Columns { - colNames[i] = col.Name + colNames[i] = col.ColumnName } return colNames } func (t Table) SelectRowsQuery() string { - if t.IDCol.Type == "USER-DEFINED" { + if t.IDCol.DataType == "USER-DEFINED" { return fmt.Sprintf(`select id::text, to_jsonb(row) from %s row where id::text = any($1)`, t.QuotedName()) } return fmt.Sprintf(`select id::text, to_jsonb(row) from %s row where id = any($1)`, t.QuotedName()) @@ -74,7 +74,7 @@ func (t Table) UpdateRowsQuery() string { if i > 0 { set.WriteString(", ") } - set.WriteString(fmt.Sprintf("%s = data.%s", col.Name, col.Name)) + set.WriteString(fmt.Sprintf("%s = data.%s", col.ColumnName, col.ColumnName)) } return fmt.Sprintf(` diff --git a/swo/waitforactivetx.go b/swo/waitforactivetx.go index 2f7a77ab9e..fd7c427035 100644 --- a/swo/waitforactivetx.go +++ b/swo/waitforactivetx.go @@ -5,6 +5,7 @@ import ( "fmt" "time" + "github.com/target/goalert/swo/swodb" "github.com/target/goalert/swo/swogrp" ) @@ -16,16 +17,17 @@ func (e *Execute) WaitForActiveTx(ctx context.Context) { swogrp.Progressf(ctx, "waiting for in-flight transactions to finish") + db := swodb.New(e.mainDBConn) + var now time.Time - err := e.mainDBConn.QueryRow(ctx, "select now()").Scan(&now) + now, err := db.CurrentTime(ctx) if err != nil { e.err = fmt.Errorf("wait for active tx: get current time: %w", err) return } for { - var n int - err = e.mainDBConn.QueryRow(ctx, "select count(*) from pg_stat_activity where state <> 'idle' and xact_start <= $1", now).Scan(&n) + n, err := db.ActiveTxCount(ctx, now) if err != nil { e.err = fmt.Errorf("wait for active tx: get active tx count: %w", err) return From 051759c6e3c46ef83927a2b86432b7c96b8f1e3d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 28 Jun 2022 12:56:47 -0500 Subject: [PATCH 106/225] update gen --- swo/swodb/db.go | 2 +- swo/swodb/models.go | 2 +- swo/swodb/queries.sql.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/swo/swodb/db.go b/swo/swodb/db.go index 74d516e253..ed75f5e582 100644 --- a/swo/swodb/db.go +++ b/swo/swodb/db.go @@ -1,6 +1,6 @@ // Code generated by sqlc. DO NOT EDIT. // versions: -// sqlc v1.13.0 +// sqlc v1.14.0 package swodb diff --git a/swo/swodb/models.go b/swo/swodb/models.go index 7fcf18645f..f0c7658e9e 100644 --- a/swo/swodb/models.go +++ b/swo/swodb/models.go @@ -1,6 +1,6 @@ // Code generated by sqlc. DO NOT EDIT. // versions: -// sqlc v1.13.0 +// sqlc v1.14.0 package swodb diff --git a/swo/swodb/queries.sql.go b/swo/swodb/queries.sql.go index 875d317470..a16997f06b 100644 --- a/swo/swodb/queries.sql.go +++ b/swo/swodb/queries.sql.go @@ -1,6 +1,6 @@ // Code generated by sqlc. DO NOT EDIT. // versions: -// sqlc v1.13.0 +// sqlc v1.14.0 // source: queries.sql package swodb From 793f7271a9e886a0c4124d3ec43c9e5e2b8e9750 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 28 Jun 2022 13:03:35 -0500 Subject: [PATCH 107/225] fix startup --- keyring/store.go | 2 +- ...538-switchover-mk2.sql => 20220628125954-switchover-mk2.sql} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename migrate/migrations/{20220405163538-switchover-mk2.sql => 20220628125954-switchover-mk2.sql} (100%) diff --git a/keyring/store.go b/keyring/store.go index 3812540460..1431da6e3e 100644 --- a/keyring/store.go +++ b/keyring/store.go @@ -344,7 +344,7 @@ func (db *DB) commitNewKeyring(ctx context.Context, tx *sql.Tx) error { if rowCount == 0 { // failed to insert the new data, so scan old & refresh var vKeysData, signKeyData, nextKeyData []byte - var rotateT time.Time + var rotateT sql.NullTime err = db.fetchKeys.QueryRowContext(ctx, db.cfg.Name).Scan(&vKeysData, &signKeyData, &nextKeyData, &t, &rotateT, &rotationCount) if err != nil { return err diff --git a/migrate/migrations/20220405163538-switchover-mk2.sql b/migrate/migrations/20220628125954-switchover-mk2.sql similarity index 100% rename from migrate/migrations/20220405163538-switchover-mk2.sql rename to migrate/migrations/20220628125954-switchover-mk2.sql From 260ad7c30c3e6ab71bd3670be1b348417c0e18f6 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Fri, 1 Jul 2022 16:07:54 -0500 Subject: [PATCH 108/225] refactor sync logic to it's own package --- Makefile | 2 +- Makefile.binaries.mk | 2 +- devtools/genmake/template.mk | 2 +- sqlc.yaml | 11 +- swo/changelog.go | 60 ------- swo/drvconnector.go | 29 +-- swo/execute.go | 300 ++++--------------------------- swo/manager.go | 18 +- swo/pauseapps.go | 47 +++++ swo/preflightlocks.go | 68 ------- swo/queries.sql | 87 --------- swo/reset.go | 105 ----------- swo/rowtracker.go | 158 ---------------- swo/swodb/queries.sql.go | 139 ++++---------- swo/{ => swoinfo}/pgtables.sql | 0 swo/swoinfo/queries.sql | 30 ++++ swo/swoinfo/scansequences.go | 21 +++ swo/{ => swoinfo}/scantables.go | 52 ++++-- swo/swoinfo/table.go | 23 +++ swo/swomsg/queries.sql | 12 ++ swo/{ => swosync}/changelog.sql | 0 swo/swosync/initialsync.go | 124 +++++++++++++ swo/swosync/logicalreplicator.go | 41 +++++ swo/swosync/logicalsync.go | 154 ++++++++++++++++ swo/swosync/queries.sql | 18 ++ swo/swosync/reset.go | 63 +++++++ swo/swosync/rowset.go | 14 ++ swo/swosync/safety.go | 40 +++++ swo/swosync/sequencesync.go | 48 +++++ swo/swosync/start.go | 99 ++++++++++ swo/swosync/tablesync.go | 235 ++++++++++++++++++++++++ swo/syncchanges.go | 99 ---------- swo/syncfull.go | 164 ----------------- swo/syncloop.go | 78 -------- swo/syncsequences.go | 39 ---- swo/table.go | 86 --------- swo/waitforactivetx.go | 42 ----- 37 files changed, 1110 insertions(+), 1400 deletions(-) delete mode 100644 swo/changelog.go create mode 100644 swo/pauseapps.go delete mode 100644 swo/preflightlocks.go delete mode 100644 swo/queries.sql delete mode 100644 swo/reset.go delete mode 100644 swo/rowtracker.go rename swo/{ => swoinfo}/pgtables.sql (100%) create mode 100644 swo/swoinfo/queries.sql create mode 100644 swo/swoinfo/scansequences.go rename swo/{ => swoinfo}/scantables.go (52%) create mode 100644 swo/swoinfo/table.go create mode 100644 swo/swomsg/queries.sql rename swo/{ => swosync}/changelog.sql (100%) create mode 100644 swo/swosync/initialsync.go create mode 100644 swo/swosync/logicalreplicator.go create mode 100644 swo/swosync/logicalsync.go create mode 100644 swo/swosync/queries.sql create mode 100644 swo/swosync/reset.go create mode 100644 swo/swosync/rowset.go create mode 100644 swo/swosync/safety.go create mode 100644 swo/swosync/sequencesync.go create mode 100644 swo/swosync/start.go create mode 100644 swo/swosync/tablesync.go delete mode 100644 swo/syncchanges.go delete mode 100644 swo/syncfull.go delete mode 100644 swo/syncloop.go delete mode 100644 swo/syncsequences.go delete mode 100644 swo/table.go delete mode 100644 swo/waitforactivetx.go diff --git a/Makefile b/Makefile index 245eee7613..971e18eb1a 100644 --- a/Makefile +++ b/Makefile @@ -96,7 +96,7 @@ start-swo: bin/psql-lite bin/goalert bin/waitfor bin/runproc ./bin/goalert migrate --db-url=postgres://goalert@localhost/goalert ./bin/psql-lite -d postgres://goalert@localhost -c "update switchover_state set current_state = 'idle'; drop database if exists goalert2; create database goalert2;" ./bin/goalert migrate --db-url=postgres://goalert@localhost/goalert2 - GOALERT_VERSION=$(GIT_VERSION) ./bin/runproc -f Procfile.swo + GOALERT_VERSION=$(GIT_VERSION) ./bin/runproc -f Procfile.swo -l Procfile.local start: bin/goalert node_modules web/src/schema.d.ts $(BIN_DIR)/tools/prometheus go run ./devtools/waitfor -timeout 1s "$(DB_URL)" || make postgres diff --git a/Makefile.binaries.mk b/Makefile.binaries.mk index aa3904709d..28b2e7ac1e 100644 --- a/Makefile.binaries.mk +++ b/Makefile.binaries.mk @@ -5,7 +5,7 @@ BIN_DIR=bin GO_DEPS := Makefile.binaries.mk $(shell find . -path ./web/src -prune -o -path ./vendor -prune -o -path ./.git -prune -o -type f -name "*.go" -print) go.sum -GO_DEPS += migrate/migrations/ migrate/migrations/*.sql web/index.html graphql2/graphqlapp/slack.manifest.yaml swo/*.sql +GO_DEPS += migrate/migrations/ migrate/migrations/*.sql web/index.html graphql2/graphqlapp/slack.manifest.yaml swo/*/*.sql GO_DEPS += graphql2/mapconfig.go graphql2/maplimit.go graphql2/generated.go graphql2/models_gen.go GO_DEPS += web/explore.html web/live.js diff --git a/devtools/genmake/template.mk b/devtools/genmake/template.mk index 0fb437f078..fff15c3fe8 100644 --- a/devtools/genmake/template.mk +++ b/devtools/genmake/template.mk @@ -3,7 +3,7 @@ BIN_DIR=bin GO_DEPS := Makefile.binaries.mk $(shell find . -path ./web/src -prune -o -path ./vendor -prune -o -path ./.git -prune -o -type f -name "*.go" -print) go.sum -GO_DEPS += migrate/migrations/ migrate/migrations/*.sql web/index.html graphql2/graphqlapp/slack.manifest.yaml swo/*.sql +GO_DEPS += migrate/migrations/ migrate/migrations/*.sql web/index.html graphql2/graphqlapp/slack.manifest.yaml swo/*/*.sql GO_DEPS += graphql2/mapconfig.go graphql2/maplimit.go graphql2/generated.go graphql2/models_gen.go GO_DEPS += web/explore.html web/live.js diff --git a/sqlc.yaml b/sqlc.yaml index 37c73c3595..2860aff936 100644 --- a/sqlc.yaml +++ b/sqlc.yaml @@ -1,11 +1,14 @@ version: '2' sql: - schema: - - swo/pgtables.sql - - swo/changelog.sql + - swo/swoinfo/pgtables.sql + - swo/swosync/changelog.sql - migrate/migrations/20180816094955-switchover-state.sql - - migrate/migrations/20220405163538-switchover-mk2.sql - queries: [swo/queries.sql] + - migrate/migrations/20220628125954-switchover-mk2.sql + queries: + - swo/swosync/queries.sql + - swo/swoinfo/queries.sql + - swo/swomsg/queries.sql engine: postgresql gen: go: diff --git a/swo/changelog.go b/swo/changelog.go deleted file mode 100644 index adba316e99..0000000000 --- a/swo/changelog.go +++ /dev/null @@ -1,60 +0,0 @@ -package swo - -import ( - "context" - _ "embed" - "fmt" - - "github.com/target/goalert/swo/swogrp" -) - -//go:embed changelog.sql -var changelogQuery string - -func (e *Execute) exec(ctx context.Context, conn pgxQueryer, query string) { - if e.err != nil { - return - } - - _, err := conn.Exec(ctx, query) - if err != nil { - e.err = fmt.Errorf("%s: %w", query, err) - return - } -} - -func (e *Execute) readErr() error { - err := e.err - e.err = nil - return err -} - -// EnableChangeLog enables DB change tracking by creating a change_log table that -// records table and row IDs for each INSERT, UPDATE, or DELETE. -func (e *Execute) EnableChangeLog(ctx context.Context) { - if e.err != nil { - return - } - - swogrp.Progressf(ctx, "enabling change log") - e.exec(ctx, e.mainDBConn, changelogQuery) - - // create triggers for all tables - for _, table := range e.tables { - if table.SkipSync() { - continue - } - query := fmt.Sprintf(` - CREATE TRIGGER %s AFTER INSERT OR UPDATE OR DELETE ON %s - FOR EACH ROW EXECUTE PROCEDURE fn_process_change_log() - `, table.QuotedChangeTriggerName(), table.QuotedName()) - e.exec(ctx, e.mainDBConn, query) - } - - e.exec(ctx, e.mainDBConn, - "update switchover_state set current_state = 'in_progress' where current_state = 'idle'") - - if e.err != nil { - e.err = fmt.Errorf("enable change log: %w", e.err) - } -} diff --git a/swo/drvconnector.go b/swo/drvconnector.go index 168330025f..7c6a7d6548 100644 --- a/swo/drvconnector.go +++ b/swo/drvconnector.go @@ -3,18 +3,16 @@ package swo import ( "context" "database/sql/driver" - "errors" "sync" + "github.com/jackc/pgx/v4" "github.com/jackc/pgx/v4/stdlib" - "github.com/target/goalert/swo/swogrp" ) type Connector struct { dbcOld, dbcNew driver.Connector isDone bool - id int mx sync.Mutex } @@ -43,20 +41,31 @@ func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { return nil, err } - drv.id++ conn := c.(*stdlib.Conn) + var b pgx.Batch + b.Queue("select pg_advisory_lock_shared(4369)") + b.Queue("select current_state = 'use_next_db' FROM switchover_state") - err = SessionLock(ctx, conn) - if errors.Is(err, swogrp.ErrDone) { + res := conn.Conn().SendBatch(ctx, &b) + if _, err := res.Exec(); err != nil { + conn.Close() + return nil, err + } + defer res.Close() + + var useNext bool + if err := res.QueryRow().Scan(&useNext); err != nil { + conn.Close() + return nil, err + } + + if useNext { + conn.Close() drv.mx.Lock() drv.isDone = true drv.mx.Unlock() return drv.dbcNew.Connect(ctx) } - if err != nil { - conn.Close() - return nil, err - } return conn, nil } diff --git a/swo/execute.go b/swo/execute.go index e87ddafc8a..3aa1a8ab11 100644 --- a/swo/execute.go +++ b/swo/execute.go @@ -2,293 +2,59 @@ package swo import ( "context" - "encoding/json" - "errors" "fmt" - "time" - "github.com/jackc/pgconn" "github.com/jackc/pgx/v4" - "github.com/target/goalert/swo/swodb" "github.com/target/goalert/swo/swogrp" - "github.com/target/goalert/util/sqlutil" + "github.com/target/goalert/swo/swosync" ) -type Execute struct { - err error - tables []Table - - seqNames []string - - mainDBConn, nextDBConn *pgx.Conn - - grp *swogrp.Group - - rowIDs map[string]map[string]struct{} - - stagedInserts []stagedID - stagedDeletes []stagedID -} - -func NewExecute(ctx context.Context, mainDBConn, nextDBConn *pgx.Conn, grp *swogrp.Group) (*Execute, error) { - tables, err := ScanTables(ctx, mainDBConn) - if err != nil { - return nil, fmt.Errorf("scan tables: %w", err) - } - - seqNames, err := swodb.New(mainDBConn).SequenceNames(ctx) - if err != nil { - return nil, fmt.Errorf("scan sequences: %w", err) - } - - return &Execute{ - tables: tables, - seqNames: seqNames, - mainDBConn: mainDBConn, - nextDBConn: nextDBConn, - grp: grp, - }, nil -} - func (m *Manager) DoExecute(ctx context.Context) error { return m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { - exec, err := NewExecute(ctx, oldConn, newConn, m.grp) + rep := swosync.NewLogicalReplicator() + rep.SetSourceDB(oldConn) + rep.SetDestinationDB(newConn) + rep.SetProgressFunc(swogrp.Progressf) + + err := rep.Reset(ctx) if err != nil { - return err + return fmt.Errorf("reset: %w", err) } - exec.EnableChangeLog(ctx) - exec.DisableNextDBTriggers(ctx) - exec.WaitForActiveTx(ctx) - exec.SyncFull(ctx) - exec.ReadRowIDs(ctx) - exec.SyncLoop(ctx) - exec.PauseApps(ctx) - exec.FinalSync(ctx) - - return exec.readErr() - }) -} - -// PauseApps puts all nodes into a "paused" state: -// - Engine no longer cycles -// - Idle DB connections are disabled -// - Event listeners (postgres pub/sub) are disabled -func (e *Execute) PauseApps(ctx context.Context) { - if e.err != nil { - return - } + err = rep.Start(ctx) + if err != nil { + return fmt.Errorf("start: %w", err) + } - e.Progressf(ctx, "pausing") - err := e.grp.Pause(ctx) - if err != nil { - e.err = fmt.Errorf("pause: %w", err) - return - } + err = rep.InitialSync(ctx) + if err != nil { + return fmt.Errorf("initial sync: %w", err) + } - t := time.NewTicker(10 * time.Millisecond) - defer t.Stop() - for range t.C { - s := e.grp.Status() - var pausing, waiting int - for _, node := range s.Nodes { - for _, task := range node.Tasks { - if task.Name == "pause" { - pausing++ - } - if task.Name == "resume-after" { - waiting++ - } + for i := 0; i < 10; i++ { + err = rep.LogicalSync(ctx) + if err != nil { + return fmt.Errorf("logical sync: %w", err) } } - if pausing == 0 && waiting == len(s.Nodes) { - break - } - if waiting == 0 { - e.err = fmt.Errorf("pause: timed out waiting for nodes to pause") - return + err = m.PauseApps(ctx) + if err != nil { + return fmt.Errorf("pause apps: %w", err) } - } -} - -// DisableTriggers will disable all triggers in the new DB. -func (e *Execute) DisableNextDBTriggers(ctx context.Context) { - if e.err != nil { - return - } - swogrp.Progressf(ctx, "disabling triggers") - - var send pgx.Batch - for _, table := range e.tables { - send.Queue(fmt.Sprintf("ALTER TABLE %s DISABLE TRIGGER USER", table.QuotedName())) - } - - e.err = e.nextDBConn.SendBatch(ctx, &send).Close() - if e.err != nil { - e.err = fmt.Errorf("disable triggers on next DB: %w", e.err) - } -} - -// EnableTriggers will re-enable triggers in the new DB. -func (e *Execute) enableTriggers(ctx context.Context) error { - var send pgx.Batch - - for _, table := range e.tables { - send.Queue(fmt.Sprintf("ALTER TABLE %s ENABLE TRIGGER USER", table.QuotedName())) - } - - e.err = e.nextDBConn.SendBatch(ctx, &send).Close() - if e.err != nil { - return fmt.Errorf("enable triggers on next DB: %w", e.err) - } - return nil -} - -// stopTheWorld grabs the exclusive advisory lock and then ensures the current state -// is set to in_progress. -func (e *Execute) stopTheWorld(ctx context.Context, srcTx pgx.Tx) error { - e.Progressf(ctx, "stop-the-world") - err := swodb.New(srcTx).GlobalSwitchoverTxExclusiveConnLock(ctx) - if err != nil { - return err - } - - stat, err := swodb.New(srcTx).CurrentSwitchoverStateNoWait(ctx) - if err != nil { - return err - } - switch stat { - case swodb.EnumSwitchoverStateInProgress: - return nil - case swodb.EnumSwitchoverStateUseNextDb: - return swogrp.ErrDone - case swodb.EnumSwitchoverStateIdle: - return errors.New("not in progress") - default: - if e.err == nil { - return errors.New("unknown state: " + string(stat)) + for i := 0; i < 10; i++ { + err = rep.LogicalSync(ctx) + if err != nil { + return fmt.Errorf("logical sync (after pause): %w", err) + } } - return e.err - } -} - -// FinalSync will attempt to lock and finalize the switchover. -func (e *Execute) FinalSync(ctx context.Context) { - if e.err != nil { - return - } - - e.Progressf(ctx, "finalizing") - - // set timeouts before waiting on locks - e.exec(ctx, e.mainDBConn, "set idle_in_transaction_session_timeout = 3000") - e.exec(ctx, e.mainDBConn, "set lock_timeout = 3000") - e.SyncLoop(ctx) - if e.err != nil { - return - } - - srcTx, dstTx, err := e.syncTx(ctx, false) - if err != nil { - e.err = fmt.Errorf("final sync: %w", err) - return - } - defer srcTx.Rollback(ctx) - defer dstTx.Rollback(ctx) - - if err = e.stopTheWorld(ctx, srcTx); err != nil { - e.err = fmt.Errorf("final sync: stop-the-world: %w", err) - return - } - - go e.Progressf(ctx, "last sync") - _, err = e.syncChanges(ctx, srcTx, dstTx) - if err != nil { - e.err = fmt.Errorf("sync change log: %w", err) - return - } - - if err = e.syncSequences(ctx, srcTx, dstTx); err != nil { - e.err = fmt.Errorf("sync sequences: %w", err) - return - } - - if err = dstTx.Commit(ctx); err != nil { - e.err = fmt.Errorf("commit dst: %w", err) - return - } - - if err = e.enableTriggers(ctx); err != nil { - return - } - - _, err = srcTx.Exec(ctx, "update switchover_state set current_state = 'use_next_db' where current_state = 'in_progress'") - if err != nil { - e.err = fmt.Errorf("update switchover state: %w", err) - return - } - err = srcTx.Commit(ctx) - if err != nil { - e.err = fmt.Errorf("commit src: %w", err) - return - } - - e.Progressf(ctx, "done") -} - -func (e *Execute) syncTx(ctx context.Context, readOnly bool) (src, dst pgx.Tx, err error) { - var srcOpts pgx.TxOptions - if readOnly { - srcOpts = pgx.TxOptions{ - AccessMode: pgx.ReadOnly, - IsoLevel: pgx.Serializable, - DeferrableMode: pgx.Deferrable, + err = rep.FinalSync(ctx) + if err != nil { + return fmt.Errorf("final sync: %w", err) } - } - - srcTx, err := e.mainDBConn.BeginTx(ctx, srcOpts) - if err != nil { - return nil, nil, fmt.Errorf("begin src: %w", err) - } - - dstTx, err := e.nextDBConn.BeginTx(ctx, pgx.TxOptions{}) - if err != nil { - srcTx.Rollback(ctx) - return nil, nil, fmt.Errorf("begin dst: %w", err) - } - - return srcTx, dstTx, nil -} -func (t Table) IDs(ids []string) interface{} { - switch t.IDCol.DataType { - case "integer", "bigint": - return sqlutil.IntArray(intIDs(ids)) - case "uuid": - return sqlutil.UUIDArray(ids) - } - return sqlutil.StringArray(ids) -} - -type syncData struct { - t Table - toInsert []syncRow - toUpdate []syncRow - toDelete []string -} - -type syncRow struct { - table string - id string - data json.RawMessage -} -type pgxQueryer interface { - SendBatch(ctx context.Context, b *pgx.Batch) pgx.BatchResults - QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row - Query(context.Context, string, ...interface{}) (pgx.Rows, error) - Exec(context.Context, string, ...interface{}) (pgconn.CommandTag, error) - QueryFunc(context.Context, string, []interface{}, []interface{}, func(pgx.QueryFuncRow) error) (pgconn.CommandTag, error) + return nil + }) } diff --git a/swo/manager.go b/swo/manager.go index 9490f4ba3c..40a1f5fccc 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -10,8 +10,10 @@ import ( "github.com/jackc/pgx/v4" "github.com/jackc/pgx/v4/stdlib" "github.com/target/goalert/app/lifecycle" + "github.com/target/goalert/swo/swodb" "github.com/target/goalert/swo/swogrp" "github.com/target/goalert/swo/swomsg" + "github.com/target/goalert/swo/swosync" "github.com/target/goalert/util/log" ) @@ -78,6 +80,17 @@ func NewManager(cfg Config) (*Manager, error) { return m, nil } +func (m *Manager) DoReset(ctx context.Context) error { + return m.withConnFromOld(ctx, func(ctx context.Context, conn *pgx.Conn) error { + _, err := conn.Exec(ctx, swosync.ConnLockQuery) + if err != nil { + return err + } + + return swodb.New(conn).DisableChangeLogTriggers(ctx) + }) +} + func (m *Manager) DoPause(ctx context.Context) error { if m.pauseResume == nil { return errors.New("not initialized") @@ -128,12 +141,7 @@ func WithLockedConn(ctx context.Context, db *sql.DB, runFunc func(context.Contex return conn.Raw(func(driverConn interface{}) error { conn := driverConn.(*stdlib.Conn).Conn() - err := SwitchOverExecLock(ctx, conn) - if err != nil { - return err - } defer conn.Close(context.Background()) - defer UnlockConn(context.Background(), conn) return runFunc(ctx, conn) }) diff --git a/swo/pauseapps.go b/swo/pauseapps.go new file mode 100644 index 0000000000..72a6b4d814 --- /dev/null +++ b/swo/pauseapps.go @@ -0,0 +1,47 @@ +package swo + +import ( + "context" + "fmt" + "time" + + "github.com/target/goalert/swo/swogrp" +) + +// PauseApps puts all nodes into a "paused" state: +// - Engine no longer cycles +// - Idle DB connections are disabled +// - Event listeners (postgres pub/sub) are disabled +func (m *Manager) PauseApps(ctx context.Context) error { + swogrp.Progressf(ctx, "pausing apps") + err := m.grp.Pause(ctx) + if err != nil { + return fmt.Errorf("pause: %w", err) + } + + t := time.NewTicker(10 * time.Millisecond) + defer t.Stop() + for range t.C { + s := m.grp.Status() + var pausing, waiting int + for _, node := range s.Nodes { + for _, task := range node.Tasks { + if task.Name == "pause" { + pausing++ + } + if task.Name == "resume-after" { + waiting++ + } + } + } + + if pausing == 0 && waiting == len(s.Nodes) { + break + } + if waiting == 0 { + return fmt.Errorf("pause: timed out waiting for nodes to pause") + } + } + + return nil +} diff --git a/swo/preflightlocks.go b/swo/preflightlocks.go deleted file mode 100644 index 0e5e2732f2..0000000000 --- a/swo/preflightlocks.go +++ /dev/null @@ -1,68 +0,0 @@ -package swo - -import ( - "context" - "errors" - "fmt" - - "github.com/jackc/pgx/v4" - "github.com/jackc/pgx/v4/stdlib" - "github.com/target/goalert/swo/swodb" - "github.com/target/goalert/swo/swogrp" -) - -var ErrNoLock = errors.New("no lock") - -// SwitchOverExecLock will attempt to grab the GlobalSwitchOverExec lock. -// -// After acquiring the lock, it will ensure the switchover has not yet been -// completed. -// -// This lock should be acquired by an engine instance that is going to perform -// the sync & switchover. -func SwitchOverExecLock(ctx context.Context, conn *pgx.Conn) error { - gotLock, err := swodb.New(conn).GlobalSwitchoverExecLock(ctx) - if errors.Is(err, pgx.ErrNoRows) { - return swogrp.ErrDone - } - if err != nil { - return err - } - - if !gotLock { - return ErrNoLock - } - - return nil -} - -// UnlockConn will release all session locks or close the connection. -func UnlockConn(ctx context.Context, conn *pgx.Conn) { - err := swodb.New(conn).UnlockAll(ctx) - if err != nil { - conn.Close(ctx) - } -} - -// SessionLock will get a shared advisory lock for the connection. -func SessionLock(ctx context.Context, c *stdlib.Conn) error { - // Using literal here so we can avoid a prepared statement round trip. - // - // This will run for every new connection in SWO mode and for every - // query while idle connections are disabled during critical phase. - err := swodb.New(c.Conn()).GlobalSwitchoverSharedConnLock(ctx) - if err != nil { - return fmt.Errorf("get SWO shared session lock: %w", err) - } - - state, err := swodb.New(c.Conn()).CurrentSwitchoverState(ctx) - if err != nil { - return fmt.Errorf("get current SWO state: %w", err) - } - - if state == swodb.EnumSwitchoverStateUseNextDb { - return swogrp.ErrDone - } - - return nil -} diff --git a/swo/queries.sql b/swo/queries.sql deleted file mode 100644 index a3d23d266d..0000000000 --- a/swo/queries.sql +++ /dev/null @@ -1,87 +0,0 @@ --- name: ForeignKeys :many -SELECT src.relname::text, - dst.relname::text -FROM pg_catalog.pg_constraint con - JOIN pg_catalog.pg_namespace ns ON ns.nspname = 'public' - AND ns.oid = con.connamespace - JOIN pg_catalog.pg_class src ON src.oid = con.conrelid - JOIN pg_catalog.pg_class dst ON dst.oid = con.confrelid -WHERE con.contype = 'f' - AND NOT con.condeferrable; - --- name: Changes :many -SELECT id, - table_name, - row_id -FROM change_log; - --- name: DeleteChanges :exec -DELETE FROM change_log -WHERE id = ANY($1); - --- name: CurrentTime :one -SELECT now()::timestamptz; - --- name: ActiveTxCount :one -SELECT COUNT(*) -FROM pg_stat_activity -WHERE "state" <> 'idle' - AND "xact_start" <= $1; - --- name: GlobalSwitchoverSharedConnLock :exec -SELECT pg_advisory_lock_shared(4369); - --- name: GlobalSwitchoverTxExclusiveConnLock :exec -SELECT pg_advisory_xact_lock(4369); - --- name: GlobalSwitchoverExecLock :one -SELECT pg_try_advisory_lock(4370) -FROM switchover_state -WHERE current_state != 'use_next_db'; - --- name: TableColumns :many -SELECT col.table_name, - col.column_name, - col.data_type, - col.ordinal_position -FROM information_schema.columns col - JOIN information_schema.tables t ON t.table_catalog = col.table_catalog - AND t.table_schema = col.table_schema - AND t.table_name = col.table_name - AND t.table_type = 'BASE TABLE' -WHERE col.table_catalog = current_database() - AND col.table_schema = 'public'; - --- name: SetIdleTimeout :exec -SET idle_in_transaction_session_timeout = 3000; - --- name: CurrentSwitchoverState :one -SELECT current_state -FROM switchover_state; - --- name: CurrentSwitchoverStateNoWait :one -SELECT current_state -FROM switchover_state NOWAIT; - --- name: UnlockAll :exec -SELECT pg_advisory_unlock_all(); - --- name: LogEvents :many -SELECT id, - TIMESTAMP, - DATA -FROM switchover_log -WHERE id > $1 -ORDER BY id ASC -LIMIT 100; - --- name: LastLogID :one -SELECT COALESCE(MAX(id), 0)::bigint -FROM switchover_log; - --- name: SequenceNames :many -SELECT sequence_name -FROM information_schema.sequences -WHERE sequence_catalog = current_database() - AND sequence_schema = 'public' - AND sequence_name != 'change_log_id_seq'; diff --git a/swo/reset.go b/swo/reset.go deleted file mode 100644 index 6840d75e9b..0000000000 --- a/swo/reset.go +++ /dev/null @@ -1,105 +0,0 @@ -package swo - -import ( - "context" - "fmt" - "strings" - - "github.com/jackc/pgx/v4" -) - -func (m *Manager) DoReset(ctx context.Context) error { - err := m.withConnFromOld(ctx, ResetOldDB) - if err != nil { - return fmt.Errorf("reset old db: %w", err) - } - - err = m.withConnFromNew(ctx, ResetNewDB) - if err != nil { - return fmt.Errorf("reset new db: %w", err) - } - - return nil -} - -// ResetNewDB will reset the new database to a clean state. -func ResetNewDB(ctx context.Context, conn *pgx.Conn) error { - tables, err := ScanTables(ctx, conn) - if err != nil { - return fmt.Errorf("scan tables: %w", err) - } - - var names []string - // truncate sync tables - for _, table := range tables { - if table.SkipSync() { - continue - } - names = append(names, table.QuotedName()) - } - - _, err = conn.Exec(ctx, fmt.Sprintf("truncate %s", strings.Join(names, ","))) - if err != nil { - return fmt.Errorf("truncate tables: %w", err) - } - - // drop the change_log table - _, err = conn.Exec(ctx, "drop table if exists change_log") - if err != nil { - return fmt.Errorf("drop change_log: %w", err) - } - - return nil -} - -// ResetOldDB will reset the old database to a clean state. -// -// It will remove all change triggers and cleanup switchover data. -func ResetOldDB(ctx context.Context, conn *pgx.Conn) error { - _, err := conn.Exec(ctx, "update switchover_state set current_state = 'idle' where current_state = 'in_progress'") - if err != nil { - return fmt.Errorf("set state to idle: %w", err) - } - - tables, err := ScanTables(ctx, conn) - if err != nil { - return fmt.Errorf("scan tables: %w", err) - } - - // drop change triggers - for _, table := range tables { - if table.SkipSync() { - continue - } - - _, err = conn.Exec(ctx, fmt.Sprintf("drop trigger if exists %s on %s", table.QuotedChangeTriggerName(), table.QuotedName())) - if err != nil { - return fmt.Errorf("drop trigger %s: %w", table.QuotedChangeTriggerName(), err) - } - - _, err = conn.Exec(ctx, fmt.Sprintf("drop trigger if exists %s on %s", table.QuotedLockTriggerName(), table.QuotedName())) - if err != nil { - return fmt.Errorf("drop trigger %s: %w", table.QuotedChangeTriggerName(), err) - } - - } - - // TODO: ensure no deps get missed - _, err = conn.Exec(ctx, "DROP FUNCTION IF EXISTS fn_switchover_change_log_lock()") - if err != nil { - return fmt.Errorf("drop fn_switchover_change_log_lock: %w", err) - } - - _, err = conn.Exec(ctx, "DROP FUNCTION IF EXISTS fn_process_change_log()") - if err != nil { - return fmt.Errorf("drop fn_process_change_log: %w", err) - } - - // drop the change_log table - _, err = conn.Exec(ctx, "drop table if exists change_log") - if err != nil { - return fmt.Errorf("drop change_log: %w", err) - } - - return nil -} diff --git a/swo/rowtracker.go b/swo/rowtracker.go deleted file mode 100644 index 4be2ce877d..0000000000 --- a/swo/rowtracker.go +++ /dev/null @@ -1,158 +0,0 @@ -package swo - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "strconv" - - "github.com/jackc/pgx/v4" -) - -type rowTracker struct { - tables []Table - rowIDs map[string]map[string]struct{} - - stagedInserts []stagedID - stagedDeletes []stagedID -} -type stagedID struct { - table string - id string -} - -// ReadRowIDs reads the row IDs for all tables in the next-db to distinguish -// between those that need an INSERT vs UPDATE. -func (e *Execute) ReadRowIDs(ctx context.Context) { - if e.err != nil { - return - } - e.Progressf(ctx, "recording next DB row IDs") - e.rowIDs = make(map[string]map[string]struct{}) - - for _, table := range e.tables { - if table.SkipSync() { - continue - } - e.rowIDs[table.Name] = make(map[string]struct{}) - rows, err := e.nextDBConn.Query(ctx, fmt.Sprintf("SELECT id::text FROM %s", table.QuotedName())) - if err != nil { - e.err = fmt.Errorf("read row ids for %s: %w", table.Name, err) - return - } - - for rows.Next() { - var id string - if err := rows.Scan(&id); err != nil { - rows.Close() - e.err = fmt.Errorf("read row ids for %s: scan: %w", table.Name, err) - return - } - - e._Insert(table.Name, id) - } - } -} - -func (e *Execute) Insert(table, id string) { - e.stagedInserts = append(e.stagedInserts, stagedID{table, id}) -} - -func (e *Execute) Delete(table, id string) { - e.stagedDeletes = append(e.stagedDeletes, stagedID{table, id}) -} -func (e *Execute) _Insert(table, id string) { e.rowIDs[table][id] = struct{}{} } -func (e *Execute) _Delete(table, id string) { delete(e.rowIDs[table], id) } -func (e *Execute) Rollback() { - e.stagedDeletes = nil - e.stagedInserts = nil -} - -func (e *Execute) Commit() { - for _, staged := range e.stagedInserts { - e._Insert(staged.table, staged.id) - } - e.stagedInserts = nil - - for _, staged := range e.stagedDeletes { - e._Delete(staged.table, staged.id) - } - e.stagedDeletes = nil -} - -func (e *Execute) Exists(table, id string) bool { _, ok := e.rowIDs[table][id]; return ok } - -func (e *Execute) queueChanges(b *pgx.Batch, q string, rows []syncRow) error { - if len(rows) == 0 { - return nil - } - - var rowsData []json.RawMessage - for _, row := range rows { - rowsData = append(rowsData, row.data) - } - - data, err := json.Marshal(rowsData) - if err != nil { - return fmt.Errorf("marshal rows: %w", err) - } - - b.Queue(q, data) - - return nil -} - -func (e *Execute) readChanges(ctx context.Context, table Table, res pgx.BatchResults, ids []string) (*syncData, error) { - rows, err := res.Query() - if errors.Is(err, pgx.ErrNoRows) { - return &syncData{toDelete: ids}, nil - } - defer rows.Close() - if err != nil { - return nil, fmt.Errorf("fetch rows: %w", err) - } - - sd := syncData{t: table} - existsInOld := make(map[string]struct{}) - for rows.Next() { - var id string - var data []byte - err = rows.Scan(&id, &data) - if err != nil { - return nil, fmt.Errorf("scan row: %w", err) - } - existsInOld[id] = struct{}{} - if e.Exists(table.Name, id) { - sd.toUpdate = append(sd.toUpdate, syncRow{table.Name, id, data}) - } else { - e.Insert(table.Name, id) - sd.toInsert = append(sd.toInsert, syncRow{table.Name, id, data}) - } - } - - for _, id := range ids { - if _, ok := existsInOld[id]; ok { - continue - } - if !e.Exists(table.Name, id) { - continue - } - e.Delete(table.Name, id) - sd.toDelete = append(sd.toDelete, id) - } - - return &sd, nil -} - -func intIDs(ids []string) []int { - var ints []int - for _, id := range ids { - i, err := strconv.Atoi(id) - if err != nil { - panic(err) - } - ints = append(ints, i) - } - return ints -} diff --git a/swo/swodb/queries.sql.go b/swo/swodb/queries.sql.go index a16997f06b..023e3979cb 100644 --- a/swo/swodb/queries.sql.go +++ b/swo/swodb/queries.sql.go @@ -24,79 +24,29 @@ func (q *Queries) ActiveTxCount(ctx context.Context, xactStart time.Time) (int64 return count, err } -const changes = `-- name: Changes :many -SELECT id, - table_name, - row_id -FROM change_log +const disableChangeLogTriggers = `-- name: DisableChangeLogTriggers :exec +UPDATE switchover_state +SET current_state = 'idle' +WHERE current_state = 'in_progress' ` -func (q *Queries) Changes(ctx context.Context) ([]ChangeLog, error) { - rows, err := q.db.Query(ctx, changes) - if err != nil { - return nil, err - } - defer rows.Close() - var items []ChangeLog - for rows.Next() { - var i ChangeLog - if err := rows.Scan(&i.ID, &i.TableName, &i.RowID); err != nil { - return nil, err - } - items = append(items, i) - } - if err := rows.Err(); err != nil { - return nil, err - } - return items, nil -} - -const currentSwitchoverState = `-- name: CurrentSwitchoverState :one -SELECT current_state -FROM switchover_state -` - -func (q *Queries) CurrentSwitchoverState(ctx context.Context) (EnumSwitchoverState, error) { - row := q.db.QueryRow(ctx, currentSwitchoverState) - var current_state EnumSwitchoverState - err := row.Scan(¤t_state) - return current_state, err -} - -const currentSwitchoverStateNoWait = `-- name: CurrentSwitchoverStateNoWait :one -SELECT current_state -FROM switchover_state NOWAIT -` - -func (q *Queries) CurrentSwitchoverStateNoWait(ctx context.Context) (EnumSwitchoverState, error) { - row := q.db.QueryRow(ctx, currentSwitchoverStateNoWait) - var current_state EnumSwitchoverState - err := row.Scan(¤t_state) - return current_state, err -} - -const currentTime = `-- name: CurrentTime :one -SELECT now()::timestamptz -` - -func (q *Queries) CurrentTime(ctx context.Context) (time.Time, error) { - row := q.db.QueryRow(ctx, currentTime) - var column_1 time.Time - err := row.Scan(&column_1) - return column_1, err +func (q *Queries) DisableChangeLogTriggers(ctx context.Context) error { + _, err := q.db.Exec(ctx, disableChangeLogTriggers) + return err } -const deleteChanges = `-- name: DeleteChanges :exec -DELETE FROM change_log -WHERE id = ANY($1) +const enableChangeLogTriggers = `-- name: EnableChangeLogTriggers :exec +UPDATE switchover_state +SET current_state = 'in_progress' +WHERE current_state = 'idle' ` -func (q *Queries) DeleteChanges(ctx context.Context, id int64) error { - _, err := q.db.Exec(ctx, deleteChanges, id) +func (q *Queries) EnableChangeLogTriggers(ctx context.Context) error { + _, err := q.db.Exec(ctx, enableChangeLogTriggers) return err } -const foreignKeys = `-- name: ForeignKeys :many +const foreignKeyRefs = `-- name: ForeignKeyRefs :many SELECT src.relname::text, dst.relname::text FROM pg_catalog.pg_constraint con @@ -108,20 +58,20 @@ WHERE con.contype = 'f' AND NOT con.condeferrable ` -type ForeignKeysRow struct { +type ForeignKeyRefsRow struct { SrcRelname string DstRelname string } -func (q *Queries) ForeignKeys(ctx context.Context) ([]ForeignKeysRow, error) { - rows, err := q.db.Query(ctx, foreignKeys) +func (q *Queries) ForeignKeyRefs(ctx context.Context) ([]ForeignKeyRefsRow, error) { + rows, err := q.db.Query(ctx, foreignKeyRefs) if err != nil { return nil, err } defer rows.Close() - var items []ForeignKeysRow + var items []ForeignKeyRefsRow for rows.Next() { - var i ForeignKeysRow + var i ForeignKeyRefsRow if err := rows.Scan(&i.SrcRelname, &i.DstRelname); err != nil { return nil, err } @@ -133,37 +83,6 @@ func (q *Queries) ForeignKeys(ctx context.Context) ([]ForeignKeysRow, error) { return items, nil } -const globalSwitchoverExecLock = `-- name: GlobalSwitchoverExecLock :one -SELECT pg_try_advisory_lock(4370) -FROM switchover_state -WHERE current_state != 'use_next_db' -` - -func (q *Queries) GlobalSwitchoverExecLock(ctx context.Context) (bool, error) { - row := q.db.QueryRow(ctx, globalSwitchoverExecLock) - var pg_try_advisory_lock bool - err := row.Scan(&pg_try_advisory_lock) - return pg_try_advisory_lock, err -} - -const globalSwitchoverSharedConnLock = `-- name: GlobalSwitchoverSharedConnLock :exec -SELECT pg_advisory_lock_shared(4369) -` - -func (q *Queries) GlobalSwitchoverSharedConnLock(ctx context.Context) error { - _, err := q.db.Exec(ctx, globalSwitchoverSharedConnLock) - return err -} - -const globalSwitchoverTxExclusiveConnLock = `-- name: GlobalSwitchoverTxExclusiveConnLock :exec -SELECT pg_advisory_xact_lock(4369) -` - -func (q *Queries) GlobalSwitchoverTxExclusiveConnLock(ctx context.Context) error { - _, err := q.db.Exec(ctx, globalSwitchoverTxExclusiveConnLock) - return err -} - const lastLogID = `-- name: LastLogID :one SELECT COALESCE(MAX(id), 0)::bigint FROM switchover_log @@ -206,6 +125,17 @@ func (q *Queries) LogEvents(ctx context.Context, id int64) ([]SwitchoverLog, err return items, nil } +const now = `-- name: Now :one +SELECT now()::timestamptz +` + +func (q *Queries) Now(ctx context.Context) (time.Time, error) { + row := q.db.QueryRow(ctx, now) + var column_1 time.Time + err := row.Scan(&column_1) + return column_1, err +} + const sequenceNames = `-- name: SequenceNames :many SELECT sequence_name FROM information_schema.sequences @@ -272,12 +202,3 @@ func (q *Queries) TableColumns(ctx context.Context) ([]InformationSchemaColumn, } return items, nil } - -const unlockAll = `-- name: UnlockAll :exec -SELECT pg_advisory_unlock_all() -` - -func (q *Queries) UnlockAll(ctx context.Context) error { - _, err := q.db.Exec(ctx, unlockAll) - return err -} diff --git a/swo/pgtables.sql b/swo/swoinfo/pgtables.sql similarity index 100% rename from swo/pgtables.sql rename to swo/swoinfo/pgtables.sql diff --git a/swo/swoinfo/queries.sql b/swo/swoinfo/queries.sql new file mode 100644 index 0000000000..4e6e750446 --- /dev/null +++ b/swo/swoinfo/queries.sql @@ -0,0 +1,30 @@ +-- name: ForeignKeyRefs :many +SELECT src.relname::text, + dst.relname::text +FROM pg_catalog.pg_constraint con + JOIN pg_catalog.pg_namespace ns ON ns.nspname = 'public' + AND ns.oid = con.connamespace + JOIN pg_catalog.pg_class src ON src.oid = con.conrelid + JOIN pg_catalog.pg_class dst ON dst.oid = con.confrelid +WHERE con.contype = 'f' + AND NOT con.condeferrable; + +-- name: TableColumns :many +SELECT col.table_name, + col.column_name, + col.data_type, + col.ordinal_position +FROM information_schema.columns col + JOIN information_schema.tables t ON t.table_catalog = col.table_catalog + AND t.table_schema = col.table_schema + AND t.table_name = col.table_name + AND t.table_type = 'BASE TABLE' +WHERE col.table_catalog = current_database() + AND col.table_schema = 'public'; + +-- name: SequenceNames :many +SELECT sequence_name +FROM information_schema.sequences +WHERE sequence_catalog = current_database() + AND sequence_schema = 'public' + AND sequence_name != 'change_log_id_seq'; diff --git a/swo/swoinfo/scansequences.go b/swo/swoinfo/scansequences.go new file mode 100644 index 0000000000..5e83a1967c --- /dev/null +++ b/swo/swoinfo/scansequences.go @@ -0,0 +1,21 @@ +package swoinfo + +import ( + "context" + "sort" + + "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swodb" +) + +// ScanSequences will return the names of all sequences in the database, ordered +// by name. +func ScanSequences(ctx context.Context, conn *pgx.Conn) ([]string, error) { + names, err := swodb.New(conn).SequenceNames(ctx) + if err != nil { + return nil, err + } + + sort.Strings(names) + return names, nil +} diff --git a/swo/scantables.go b/swo/swoinfo/scantables.go similarity index 52% rename from swo/scantables.go rename to swo/swoinfo/scantables.go index 47ebcbfea3..347caac8dd 100644 --- a/swo/scantables.go +++ b/swo/swoinfo/scantables.go @@ -1,4 +1,4 @@ -package swo +package swoinfo import ( "context" @@ -11,9 +11,10 @@ import ( "github.com/target/goalert/swo/swogrp" ) -type Column swodb.InformationSchemaColumn - -// ScanTables scans the database for tables, their columns, and dependencies. +// ScanTables scans the database for tables returning them in insert-safe-order, +// meaning the first table returned will not have any foreign keys to other tables. +// +// Tables with migrate-only data, or those used by switchover code will be omitted. func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { swogrp.Progressf(ctx, "scanning tables...") @@ -22,54 +23,73 @@ func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { return nil, fmt.Errorf("scan table columns: %w", err) } + refs, err := swodb.New(conn).ForeignKeyRefs(ctx) + if err != nil { + return nil, fmt.Errorf("scan foreign keys: %w", err) + } + tables := make(map[string]*Table) for _, cRow := range columns { + switch cRow.TableName { + case "engine_processing_versions", "gorp_migrations": + // skip migrate-only tables + continue + case "switchover_state", "switchover_log", "change_log": + // skip SWO tables + continue + } + if tables[cRow.TableName] == nil { - tables[cRow.TableName] = &Table{Name: cRow.TableName, deps: make(map[string]struct{})} + tables[cRow.TableName] = &Table{name: cRow.TableName, deps: make(map[string]struct{})} } - tables[cRow.TableName].Columns = append(tables[cRow.TableName].Columns, Column(cRow)) + tables[cRow.TableName].cols = append(tables[cRow.TableName].cols, column(cRow)) if cRow.ColumnName == "id" { - tables[cRow.TableName].IDCol = Column(cRow) + tables[cRow.TableName].id = column(cRow) } } - refs, err := swodb.New(conn).ForeignKeys(ctx) - if err != nil { - return nil, fmt.Errorf("scan foreign keys: %w", err) + for _, t := range tables { + if t.id.ColumnName == "" { + return nil, fmt.Errorf("table %s has no id column", t.name) + } } + for _, fRow := range refs { tables[fRow.SrcRelname].deps[fRow.DstRelname] = struct{}{} } var tableList []*Table for _, t := range tables { - sort.Slice(t.Columns, func(i, j int) bool { - return t.Columns[i].OrdinalPosition < t.Columns[j].OrdinalPosition + sort.Slice(t.cols, func(i, j int) bool { + return t.cols[i].OrdinalPosition < t.cols[j].OrdinalPosition }) tableList = append(tableList, t) } // sort tables by name sort.Slice(tableList, func(i, j int) bool { - return tableList[i].Name < tableList[j].Name + return tableList[i].name < tableList[j].name }) - remove := func(i int) *Table { + // take the next table, remove it from other dependency lists + pick := func(i int) *Table { t := tableList[i] tableList = append(tableList[:i], tableList[i+1:]...) // delete table name from all deps for _, t2 := range tableList { - delete(t2.deps, t.Name) + delete(t2.deps, t.name) } return t } + + // get the next table to pick (zero dependencies) next := func() *Table { for i, t := range tableList { if len(t.deps) == 0 { - return remove(i) + return pick(i) } } diff --git a/swo/swoinfo/table.go b/swo/swoinfo/table.go new file mode 100644 index 0000000000..0503b9a37e --- /dev/null +++ b/swo/swoinfo/table.go @@ -0,0 +1,23 @@ +package swoinfo + +import "github.com/target/goalert/swo/swodb" + +type Table struct { + name string + deps map[string]struct{} + cols []column + id column +} +type column swodb.InformationSchemaColumn + +func (t Table) Name() string { return t.name } + +func (t Table) IDType() string { return t.id.DataType } + +func (t Table) Columns() []string { + var cols []string + for _, c := range t.cols { + cols = append(cols, c.ColumnName) + } + return cols +} diff --git a/swo/swomsg/queries.sql b/swo/swomsg/queries.sql new file mode 100644 index 0000000000..16369116ca --- /dev/null +++ b/swo/swomsg/queries.sql @@ -0,0 +1,12 @@ +-- name: LogEvents :many +SELECT id, + TIMESTAMP, + DATA +FROM switchover_log +WHERE id > $1 +ORDER BY id ASC +LIMIT 100; + +-- name: LastLogID :one +SELECT COALESCE(MAX(id), 0)::bigint +FROM switchover_log; diff --git a/swo/changelog.sql b/swo/swosync/changelog.sql similarity index 100% rename from swo/changelog.sql rename to swo/swosync/changelog.sql diff --git a/swo/swosync/initialsync.go b/swo/swosync/initialsync.go new file mode 100644 index 0000000000..4edb335101 --- /dev/null +++ b/swo/swosync/initialsync.go @@ -0,0 +1,124 @@ +package swosync + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swoinfo" + "github.com/target/goalert/util/sqlutil" +) + +func insertRowsQuery(table swoinfo.Table) string { + return fmt.Sprintf(` + insert into %s + select * from + json_populate_recordset(null::%s, $1) + `, sqlutil.QuoteID(table.Name()), sqlutil.QuoteID(table.Name())) +} + +// InitialSync will insert all rows from the source database into the destination database. +// +// While doing so it will update the rowID maps to track the rows that have been inserted. +func (l *LogicalReplicator) InitialSync(ctx context.Context) error { + srcTx, err := l.srcConn.BeginTx(ctx, pgx.TxOptions{ + IsoLevel: pgx.Serializable, + DeferrableMode: pgx.Deferrable, + AccessMode: pgx.ReadOnly, + }) + if err != nil { + return fmt.Errorf("begin src tx: %w", err) + } + defer srcTx.Rollback(ctx) + + _, err = srcTx.Exec(ctx, txInProgressLock) + if err != nil { + return fmt.Errorf("lock tx: %w", err) + } + + dstTx, err := l.dstConn.BeginTx(ctx, pgx.TxOptions{}) + if err != nil { + return fmt.Errorf("begin dst tx: %w", err) + } + defer dstTx.Rollback(ctx) + + _, err = dstTx.Exec(ctx, "set constraints all deferred") + if err != nil { + return fmt.Errorf("defer constraints: %w", err) + } + + for _, table := range l.tables { + _, err := l.initialSyncTable(ctx, srcTx, dstTx, table) + if err != nil { + return fmt.Errorf("initial sync table %s: %w", table.Name(), err) + } + } + + err = srcTx.Commit(ctx) + if err != nil { + return fmt.Errorf("commit src tx: %w", err) + } + + err = dstTx.Commit(ctx) + if err != nil { + return fmt.Errorf("commit dst tx: %w", err) + } + + _, err = l.dstConn.Exec(ctx, "vacuum analyze") + if err != nil { + return fmt.Errorf("vacuum analyze: %w", err) + } + + return nil +} + +func (l *LogicalReplicator) initialSyncTable(ctx context.Context, srcTx, dstTx pgx.Tx, table swoinfo.Table) (int64, error) { + l.printf(ctx, "sync %s", table.Name()) + var count int64 + err := srcTx.QueryRow(ctx, fmt.Sprintf("select count(*) from %s", sqlutil.QuoteID(table.Name()))).Scan(&count) + if err != nil { + return 0, fmt.Errorf("count: %w", err) + } + + rows, err := srcTx.Query(ctx, fmt.Sprintf("select id::text, to_jsonb(tbl_row) from %s as tbl_row", sqlutil.QuoteID(table.Name()))) + if err != nil { + return 0, fmt.Errorf("select: %w", err) + } + defer rows.Close() + + insertSQL := insertRowsQuery(table) + + var insertRows []json.RawMessage + var inserted int + for rows.Next() { + var id string + var rowData json.RawMessage + if err := rows.Scan(&id, &rowData); err != nil { + return 0, fmt.Errorf("scan: %w", err) + } + insertRows = append(insertRows, rowData) + l.dstRows.Set(RowID{table.Name(), id}) + + if len(insertRows) < 10000 { + continue + } + + l.printf(ctx, "sync %s: %d/%d", table.Name(), inserted, count) + _, err := dstTx.Exec(ctx, insertSQL, insertRows) + if err != nil { + return 0, fmt.Errorf("insert: %w", err) + } + inserted += len(insertRows) + insertRows = insertRows[:0] + } + + if len(insertRows) > 0 { + _, err := dstTx.Exec(ctx, insertSQL, insertRows) + if err != nil { + return 0, fmt.Errorf("insert: %w", err) + } + } + + return count, nil +} diff --git a/swo/swosync/logicalreplicator.go b/swo/swosync/logicalreplicator.go new file mode 100644 index 0000000000..9ef0d16735 --- /dev/null +++ b/swo/swosync/logicalreplicator.go @@ -0,0 +1,41 @@ +package swosync + +import ( + "context" + + "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swoinfo" +) + +type LogicalReplicator struct { + srcConn *pgx.Conn + dstConn *pgx.Conn + + tables []swoinfo.Table + seqNames []string + + progFn func(ctx context.Context, format string, args ...interface{}) + + dstRows RowSet +} + +func NewLogicalReplicator() *LogicalReplicator { + return &LogicalReplicator{ + dstRows: make(RowSet), + } +} + +func (l *LogicalReplicator) SetSourceDB(db *pgx.Conn) { l.srcConn = db } +func (l *LogicalReplicator) SetDestinationDB(db *pgx.Conn) { l.dstConn = db } + +func (l *LogicalReplicator) SetProgressFunc(fn func(ctx context.Context, format string, args ...interface{})) { + l.progFn = fn +} + +func (l *LogicalReplicator) printf(ctx context.Context, format string, args ...interface{}) { + if l.progFn == nil { + return + } + + l.progFn(ctx, format, args...) +} diff --git a/swo/swosync/logicalsync.go b/swo/swosync/logicalsync.go new file mode 100644 index 0000000000..08e1e88f2f --- /dev/null +++ b/swo/swosync/logicalsync.go @@ -0,0 +1,154 @@ +package swosync + +import ( + "context" + "fmt" + + "github.com/jackc/pgx/v4" + "github.com/target/goalert/util/sqlutil" +) + +/* + 1. Read all changes (table and row ids) + 2. Fetch all rows from each table + 3. Insert missing rows (table-order) + 4. Update existing rows (table-order) + 5. Delete rows that are no longer in the source database (reverse-table-order) + + Round Trips (normal sync): + - 1 to start tx and read all changes + - 1 to fetch all rows from each table (single batch, 1 query per table) & commit + - 1 to delete all change rows from the DB and commit (background) + - 1 for all updates to new DB +*/ + +// bgTx will start a transaction in the background, returning a function that will +// wait for and return the transaction (so multiple can be started simultaneously). +func bgTx(ctx context.Context, conn *pgx.Conn, opts pgx.TxOptions) func() (pgx.Tx, error) { + ch := make(chan struct{}) + var err error + var tx pgx.Tx + go func() { + tx, err = conn.BeginTx(ctx, opts) + close(ch) + }() + + return func() (pgx.Tx, error) { + <-ch + return tx, err + } +} + +func cancelTx(ctx context.Context, fn func() (pgx.Tx, error)) { + tx, err := fn() + if err != nil { + return + } + tx.Rollback(ctx) +} + +// LogicalSync will sync the source database to the destination database as fast as possible. +func (l *LogicalReplicator) LogicalSync(ctx context.Context) error { return l.doSync(ctx, false) } + +// FinalSync will sync the source database to the destination database, using the stop-the-world lock +// and updating switchover_state to use_next_db. +func (l *LogicalReplicator) FinalSync(ctx context.Context) error { return l.doSync(ctx, true) } + +func (l *LogicalReplicator) doSync(ctx context.Context, final bool) error { + b := new(pgx.Batch) + if final { + b.Queue(`begin isolation level serializable`) + } else { + b.Queue(`begin isolation level serializable read only deferrable`) + } + b.Queue(txInProgressLock) + if final { + // stop-the-world lock before reads + b.Queue(txStopTheWorld) + } + + seqSync := NewSequenceSync(l.seqNames) + seqSync.AddBatchReads(b) + + tblSync := NewTableSync(l.tables) + tblSync.AddBatchChangeRead(b) + + res := l.srcConn.SendBatch(ctx, b) + _, err := res.Exec() // begin tx + if err != nil { + return fmt.Errorf("read changes: begin tx: %w", err) + } + defer l.srcConn.Exec(ctx, `rollback`) + + // in-progress lock & check + _, err = res.Exec() + if err != nil { + return fmt.Errorf("read changes: set tx timeout: %w", err) + } + + if final { + // stop-the-world lock before reads + _, err = res.Exec() + if err != nil { + return fmt.Errorf("read changes: stop-the-world lock: %w", err) + } + } + + err = seqSync.ScanBatchReads(res) + if err != nil { + return fmt.Errorf("read changes: scan seqs: %w", err) + } + + err = tblSync.ScanBatchChangeRead(res) + if err != nil { + return fmt.Errorf("read changes: scan changes: %w", err) + } + res.Close() + + var readRows pgx.Batch + tblSync.AddBatchRowReads(&readRows) + if readRows.Len() > 0 { + res = l.srcConn.SendBatch(ctx, &readRows) + err = tblSync.ScanBatchRowReads(res) + if err != nil { + return fmt.Errorf("read changes: scan rows: %w", err) + } + res.Close() + } + + var applyChanges pgx.Batch + applyChanges.Queue("begin") + applyChanges.Queue("set constraints all deferred") + seqSync.AddBatchWrites(&applyChanges) + tblSync.AddBatchWrites(&applyChanges, l.dstRows) + if final { + // re-enable triggers in destination DB + for _, t := range l.tables { + applyChanges.Queue(fmt.Sprintf(`alter table %s enable trigger user`, sqlutil.QuoteID(t.Name()))) + } + } + applyChanges.Queue("commit") + err = l.dstConn.SendBatch(ctx, &applyChanges).Close() + if err != nil { + l.dstConn.Exec(ctx, `rollback`) + return fmt.Errorf("apply changes: %w", err) + } + + var finish pgx.Batch + if final { + // world is stopped, changes in new DB, triggers enabled, so we can safely update switchover_state + finish.Queue("update switchover_state set current_state = 'use_next_db' where current_state = 'in_progress'") + } + finish.Queue("commit") + err = l.srcConn.SendBatch(ctx, &finish).Close() + if err != nil { + return fmt.Errorf("commit sync read: %w", err) + } + + if final { + // no cleanup/err check for final + return nil + } + _, err = tblSync.ExecDeleteChanges(ctx, l.srcConn) + return err +} diff --git a/swo/swosync/queries.sql b/swo/swosync/queries.sql new file mode 100644 index 0000000000..b62ddbbe52 --- /dev/null +++ b/swo/swosync/queries.sql @@ -0,0 +1,18 @@ +-- name: EnableChangeLogTriggers :exec +UPDATE switchover_state +SET current_state = 'in_progress' +WHERE current_state = 'idle'; + +-- name: DisableChangeLogTriggers :exec +UPDATE switchover_state +SET current_state = 'idle' +WHERE current_state = 'in_progress'; + +-- name: Now :one +SELECT now()::timestamptz; + +-- name: ActiveTxCount :one +SELECT COUNT(*) +FROM pg_stat_activity +WHERE "state" <> 'idle' + AND "xact_start" <= $1; diff --git a/swo/swosync/reset.go b/swo/swosync/reset.go new file mode 100644 index 0000000000..358760546b --- /dev/null +++ b/swo/swosync/reset.go @@ -0,0 +1,63 @@ +package swosync + +import ( + "context" + "fmt" + "strings" + + "github.com/target/goalert/swo/swodb" + "github.com/target/goalert/swo/swoinfo" + "github.com/target/goalert/util/sqlutil" +) + +func (l *LogicalReplicator) Reset(ctx context.Context) error { + l.printf(ctx, "disabling logical replication...") + + _, err := l.srcConn.Exec(ctx, ConnLockQuery) + if err != nil { + return fmt.Errorf("error locking source database: %w", err) + } + + err = swodb.New(l.srcConn).DisableChangeLogTriggers(ctx) + if err != nil { + return fmt.Errorf("disable change log triggers: %w", err) + } + + l.tables, err = swoinfo.ScanTables(ctx, l.srcConn) + if err != nil { + return fmt.Errorf("scan tables: %w", err) + } + + var tableNames []string + for _, table := range l.tables { + // delete change trigger in source DB + chgTrigQuery := fmt.Sprintf(`drop trigger if exists %s on %s`, triggerName(table.Name()), sqlutil.QuoteID(table.Name())) + _, err := l.srcConn.Exec(ctx, chgTrigQuery) + if err != nil { + return fmt.Errorf("delete change trigger for %s: %w", table.Name(), err) + } + + tableNames = append(tableNames, sqlutil.QuoteID(table.Name())) + } + + // drop change_log table and func + _, err = l.srcConn.Exec(ctx, `drop function if exists fn_process_change_log()`) + if err != nil { + return fmt.Errorf("drop fn_process_change_log: %w", err) + } + _, err = l.srcConn.Exec(ctx, `drop table if exists change_log`) + if err != nil { + return fmt.Errorf("drop change_log: %w", err) + } + + l.printf(ctx, "clearing dest DB") + _, err = l.dstConn.Exec(ctx, "truncate "+strings.Join(tableNames, ",")) + if err != nil { + return fmt.Errorf("truncate tables: %w", err) + } + + l.tables = nil + l.seqNames = nil + + return nil +} diff --git a/swo/swosync/rowset.go b/swo/swosync/rowset.go new file mode 100644 index 0000000000..813547ab8a --- /dev/null +++ b/swo/swosync/rowset.go @@ -0,0 +1,14 @@ +package swosync + +type ( + RowSet map[RowID]struct{} + RowID struct{ Table, Row string } +) + +func (r RowSet) Set(id RowID) { r[id] = struct{}{} } +func (r RowSet) Delete(id RowID) { delete(r, id) } + +func (r RowSet) Has(id RowID) bool { + _, ok := r[id] + return ok +} diff --git a/swo/swosync/safety.go b/swo/swosync/safety.go new file mode 100644 index 0000000000..952e277486 --- /dev/null +++ b/swo/swosync/safety.go @@ -0,0 +1,40 @@ +package swosync + +// txInProgressLock will cause the transaction to abort if it's unable to get +// the exec lock and/or switchover state is not currently in_progress +const txInProgressLock = ` +do $$ +declare +begin + set local idle_in_transaction_session_timeout = 60000; + set local lock_timeout = 60000; + assert (select pg_try_advisory_xact_lock(4370)), 'failed to get exec lock'; + assert (select current_state = 'in_progress' from switchover_state), 'switchover state is not in_progress'; +end $$; +` + +// txInProgressLock will cause the transaction to abort if it's unable to get +// the exec lock and/or switchover state is not currently in_progress +const txStopTheWorld = ` +do $$ +declare +begin + set local idle_in_transaction_session_timeout = 3000; + set local lock_timeout = 3000; + perform pg_advisory_xact_lock(4369); + assert (select current_state = 'in_progress' from switchover_state), 'switchover state is not in_progress'; +end $$; +` + +// ConnLockQuery will result in a failed assertion if it is unable to get the exec lock +// or switchover state is use_next_db +const ConnLockQuery = ` +do $$ +declare +begin + set idle_in_transaction_session_timeout = 60000; + set lock_timeout = 60000; + assert (select pg_try_advisory_lock(4370)), 'failed to get exec lock'; + assert (select current_state != 'use_next_db' from switchover_state), 'switchover state is use_next_db'; +end $$; +` diff --git a/swo/swosync/sequencesync.go b/swo/swosync/sequencesync.go new file mode 100644 index 0000000000..eb43649eff --- /dev/null +++ b/swo/swosync/sequencesync.go @@ -0,0 +1,48 @@ +package swosync + +import ( + "fmt" + + "github.com/jackc/pgx/v4" + "github.com/target/goalert/util/sqlutil" +) + +type SequenceSync struct { + names []string + lastValue []int64 + isCalled []bool +} + +func NewSequenceSync(names []string) *SequenceSync { + return &SequenceSync{names: names} +} + +// AddBatchReads queues up select statements to retrieve the current values of the sequences. +func (s *SequenceSync) AddBatchReads(b *pgx.Batch) { + for _, seqName := range s.names { + b.Queue("select last_value, is_called from " + sqlutil.QuoteID(seqName)) + } +} + +// ScanReads scans the results of the added batch reads. +func (s *SequenceSync) ScanBatchReads(res pgx.BatchResults) error { + for _, seqName := range s.names { + var last int64 + var isCalled bool + err := res.QueryRow().Scan(&last, &isCalled) + if err != nil { + return fmt.Errorf("read changes: scan seq %s: %w", seqName, err) + } + s.lastValue = append(s.lastValue, last) + s.isCalled = append(s.isCalled, isCalled) + } + + return nil +} + +// AddBatchWrites queues up update statements to set the current values of the sequences. +func (s *SequenceSync) AddBatchWrites(b *pgx.Batch) { + for i, seqName := range s.names { + b.Queue("select pg_catalog.setval($1, $2, $3)", seqName, s.lastValue[i], s.isCalled[i]) + } +} diff --git a/swo/swosync/start.go b/swo/swosync/start.go new file mode 100644 index 0000000000..f50f37e06c --- /dev/null +++ b/swo/swosync/start.go @@ -0,0 +1,99 @@ +package swosync + +import ( + "context" + _ "embed" + "fmt" + "time" + + "github.com/target/goalert/swo/swodb" + "github.com/target/goalert/swo/swoinfo" + "github.com/target/goalert/util/sqlutil" +) + +//go:embed changelog.sql +var changelogQuery string + +func triggerName(table string) string { + return sqlutil.QuoteID(fmt.Sprintf("zz_99_change_log_%s", table)) +} + +// Start intruments and begins tracking changes to the DB. +func (l *LogicalReplicator) Start(ctx context.Context) error { + l.printf(ctx, "enabling logical replication...") + _, err := l.srcConn.Exec(ctx, changelogQuery) + if err != nil { + return fmt.Errorf("create change_log and fn: %w", err) + } + + l.tables, err = swoinfo.ScanTables(ctx, l.srcConn) + if err != nil { + return fmt.Errorf("scan tables: %w", err) + } + + l.seqNames, err = swoinfo.ScanSequences(ctx, l.srcConn) + if err != nil { + return fmt.Errorf("scan sequences: %w", err) + } + + for _, table := range l.tables { + // create change trigger in source DB + chgTrigQuery := fmt.Sprintf(` + CREATE TRIGGER %s AFTER INSERT OR UPDATE OR DELETE ON %s + FOR EACH ROW EXECUTE PROCEDURE fn_process_change_log() + `, triggerName(table.Name()), sqlutil.QuoteID(table.Name())) + _, err := l.srcConn.Exec(ctx, chgTrigQuery) + if err != nil { + return fmt.Errorf("create change trigger for %s: %w", table.Name(), err) + } + + // disable triggers in destination DB + disableTrigQuery := fmt.Sprintf(`ALTER TABLE %s DISABLE TRIGGER USER`, sqlutil.QuoteID(table.Name())) + _, err = l.dstConn.Exec(ctx, disableTrigQuery) + if err != nil { + return fmt.Errorf("disable trigger for %s: %w", table.Name(), err) + } + } + + err = swodb.New(l.srcConn).EnableChangeLogTriggers(ctx) + if err != nil { + return fmt.Errorf("enable change log triggers: %w", err) + } + + // wait for in-flight transactions to finish + l.printf(ctx, "waiting for in-flight transactions to finish") + + db := swodb.New(l.srcConn) + + now, err := db.Now(ctx) + if err != nil { + return fmt.Errorf("wait for active tx: get current time: %w", err) + } + + for { + n, err := db.ActiveTxCount(ctx, now) + if err != nil { + return fmt.Errorf("wait for active tx: get active tx count: %w", err) + } + if n == 0 { + break + } + + l.printf(ctx, "waiting for %d transaction(s) to finish", n) + err = ctxSleep(ctx, time.Second) + if err != nil { + return fmt.Errorf("wait for active tx: sleep: %w", err) + } + } + + return nil +} + +func ctxSleep(ctx context.Context, dur time.Duration) error { + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(dur): + } + return nil +} diff --git a/swo/swosync/tablesync.go b/swo/swosync/tablesync.go new file mode 100644 index 0000000000..6cec4bdd45 --- /dev/null +++ b/swo/swosync/tablesync.go @@ -0,0 +1,235 @@ +package swosync + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strconv" + "strings" + + "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swoinfo" + "github.com/target/goalert/util/sqlutil" +) + +type TableSync struct { + tables []swoinfo.Table + + changes []changeEntry + + changedTables []string + changedData map[RowID]json.RawMessage +} + +type changeEntry struct { + id int64 + RowID +} + +type changeData struct{} + +func NewTableSync(tables []swoinfo.Table) *TableSync { + return &TableSync{ + tables: tables, + changedData: make(map[RowID]json.RawMessage), + } +} + +// AddBatchChangeRead adds a query to the batch to read the changes from the source database. +func (c *TableSync) AddBatchChangeRead(b *pgx.Batch) { + b.Queue(`select id, table_name, row_id from change_log`) +} + +// ScanBatchChangeRead scans the results of the change read query. +func (c *TableSync) ScanBatchChangeRead(res pgx.BatchResults) error { + rows, err := res.Query() + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var id int64 + var table string + var rowID string + if err := rows.Scan(&id, &table, &rowID); err != nil { + return err + } + + c.changes = append(c.changes, changeEntry{id: id, RowID: RowID{table, rowID}}) + } + + return rows.Err() +} + +// HasChanges returns true after ScanBatchChangeRead has been called, if there are changes. +func (c *TableSync) HasChanges() bool { return len(c.changes) > 0 } + +func intIDs(ids []string) []int { + var ints []int + for _, id := range ids { + i, err := strconv.Atoi(id) + if err != nil { + panic(err) + } + ints = append(ints, i) + } + return ints +} + +// AddBatchRowReads adds a query to the batch to read all changed rows from the source database. +func (c *TableSync) AddBatchRowReads(b *pgx.Batch) { + rowIDsByTable := make(map[string][]string) + for _, chg := range c.changes { + rowIDsByTable[chg.Table] = append(rowIDsByTable[chg.Table], chg.Row) + } + + for _, table := range c.tables { + rowIDs := rowIDsByTable[table.Name()] + if len(rowIDs) == 0 { + continue + } + + c.changedTables = append(c.changedTables, table.Name()) + arg, cast := castIDs(table, rowIDs) + b.Queue(fmt.Sprintf(`select id::text, to_jsonb(row) from %s row where id%s = any($1)`, sqlutil.QuoteID(table.Name()), cast), arg) + } +} + +func castIDs(t swoinfo.Table, rowIDs []string) (interface{}, string) { + var cast string + switch t.IDType() { + case "integer", "bigint": + return sqlutil.IntArray(intIDs(rowIDs)), "" + case "uuid": + return sqlutil.UUIDArray(rowIDs), "" + default: + // anything else/unknown should be cast to text and compared to the string version + // this is slower, but should only happen for small tables where the id column is an enum + cast = "::text" + fallthrough + case "text": + return sqlutil.StringArray(rowIDs), cast + } +} + +func (c *TableSync) table(name string) swoinfo.Table { + for _, table := range c.tables { + if table.Name() != name { + continue + } + + return table + } + panic(fmt.Sprintf("unknown table %s", name)) +} + +// ScanBatchRowReads scans the results of the row read queries. +func (c *TableSync) ScanBatchRowReads(res pgx.BatchResults) error { + if len(c.changedTables) == 0 { + return nil + } + + for _, tableName := range c.changedTables { + rows, err := res.Query() + if errors.Is(err, pgx.ErrNoRows) { + continue + } + if err != nil { + return fmt.Errorf("query changed rows from %s: %w", tableName, err) + } + defer rows.Close() + + for rows.Next() { + var id string + var row json.RawMessage + if err := rows.Scan(&id, &row); err != nil { + return fmt.Errorf("scan changed rows from %s: %w", tableName, err) + } + + c.changedData[RowID{tableName, id}] = row + } + } + + return nil +} + +// ExecDeleteChanges executes a query to deleted the change_log entries from the source database. +func (c *TableSync) ExecDeleteChanges(ctx context.Context, srcConn *pgx.Conn) (int64, error) { + if len(c.changes) == 0 { + return 0, nil + } + + var ids []int + for _, chg := range c.changes { + ids = append(ids, int(chg.id)) + } + _, err := srcConn.Exec(ctx, `delete from change_log where id = any($1)`, sqlutil.IntArray(ids)) + if err != nil { + return 0, fmt.Errorf("delete %d change log rows: %w", len(ids), err) + } + + return int64(len(ids)), nil +} + +func (c *TableSync) AddBatchWrites(b *pgx.Batch, dstRows RowSet) { + type pending struct { + inserts []json.RawMessage + updates []json.RawMessage + deletes []string + } + pendingByTable := make(map[string]*pending) + for _, chg := range c.changes { + p := pendingByTable[chg.Table] + if p == nil { + p = &pending{} + pendingByTable[chg.Table] = p + } + newRowData := c.changedData[chg.RowID] + if newRowData == nil { + // row was deleted + dstRows.Delete(chg.RowID) + p.deletes = append(p.deletes, chg.Row) + continue + } + + if dstRows.Has(chg.RowID) { + // row was updated + p.updates = append(p.updates, newRowData) + } else { + // row was inserted + dstRows.Set(chg.RowID) + p.inserts = append(p.inserts, newRowData) + } + } + + for tableName, p := range pendingByTable { + if len(p.inserts) > 0 { + b.Queue(insertRowsQuery(c.table(tableName)), p.inserts) + } + if len(p.updates) > 0 { + b.Queue(updateQuery(c.table(tableName)), p.updates) + } + if len(p.deletes) > 0 { + arg, cast := castIDs(c.table(tableName), p.deletes) + b.Queue(fmt.Sprintf(`delete from %s where id%s = any($1)`, sqlutil.QuoteID(tableName), cast), arg) + } + } +} + +func updateQuery(t swoinfo.Table) string { + var s strings.Builder + fmt.Fprintf(&s, "update %s dst\n", sqlutil.QuoteID(t.Name())) + fmt.Fprintf(&s, "set ") + for i, col := range t.Columns() { + if i > 0 { + fmt.Fprintf(&s, ", ") + } + fmt.Fprintf(&s, "%s = data.%s", sqlutil.QuoteID(col), sqlutil.QuoteID(col)) + } + fmt.Fprintf(&s, "\nfrom json_populate_recordset(null::%s, $1) as data\n", sqlutil.QuoteID(t.Name())) + fmt.Fprintf(&s, "where dst.id = data.id") + + return s.String() +} diff --git a/swo/syncchanges.go b/swo/syncchanges.go deleted file mode 100644 index 0ff68170ca..0000000000 --- a/swo/syncchanges.go +++ /dev/null @@ -1,99 +0,0 @@ -package swo - -import ( - "context" - "fmt" - - "github.com/jackc/pgx/v4" - "github.com/target/goalert/swo/swodb" -) - -// syncChanges will apply all changes recorded in the change_log table to the next DB. -func (e *Execute) syncChanges(ctx context.Context, srcTx, dstTx pgxQueryer) ([]int, error) { - changeRows, err := swodb.New(srcTx).Changes(ctx) - if err != nil { - return nil, fmt.Errorf("fetch changes: %w", err) - } - - type rowID struct { - id string - table string - } - changes := make(map[rowID]struct{}) - rowIDs := make(map[string][]string) - var changeIDs []int - for _, row := range changeRows { - changes[rowID{row.RowID, row.TableName}] = struct{}{} - rowIDs[row.TableName] = append(rowIDs[row.TableName], row.RowID) - changeIDs = append(changeIDs, int(row.ID)) - } - if len(changes) == 0 { - return nil, nil - } - - var applyChanges pgx.Batch - applyChanges.Queue("SET CONSTRAINTS ALL DEFERRED") - - type pendingDelete struct { - query string - idArg interface{} - count int - } - var deletes []pendingDelete - - var queryChanges pgx.Batch - var changedTables []Table - for _, table := range e.tables { - if table.SkipSync() { - continue - } - ids := rowIDs[table.Name] - if len(ids) == 0 { - continue - } - - queryChanges.Queue(table.SelectRowsQuery(), table.IDs(ids)) - changedTables = append(changedTables, table) - } - - res := srcTx.SendBatch(ctx, &queryChanges) - defer res.Close() - - // go in insert order for fetching updates/inserts, note deleted rows - for _, table := range changedTables { - sd, err := e.readChanges(ctx, table, res, rowIDs[table.Name]) - if err != nil { - return changeIDs, fmt.Errorf("fetch changed rows: %w", err) - } - if len(sd.toDelete) > 0 { - deletes = append(deletes, pendingDelete{table.DeleteRowsQuery(), table.IDs(sd.toDelete), len(sd.toDelete)}) - } - - err = e.queueChanges(&applyChanges, table.UpdateRowsQuery(), sd.toUpdate) - if err != nil { - return changeIDs, fmt.Errorf("apply updates: %w", err) - } - - err = e.queueChanges(&applyChanges, table.InsertRowsQuery(), sd.toInsert) - if err != nil { - return changeIDs, fmt.Errorf("apply inserts: %w", err) - } - } - - // handle pendingDeletes in reverse table order - for i := len(deletes) - 1; i >= 0; i-- { - applyChanges.Queue(deletes[i].query, deletes[i].idArg) - } - - if applyChanges.Len() == 1 { - // no changes (just defer constraints) - return nil, nil - } - - err = dstTx.SendBatch(ctx, &applyChanges).Close() - if err != nil { - return changeIDs, fmt.Errorf("apply changes: %w", err) - } - - return changeIDs, nil -} diff --git a/swo/syncfull.go b/swo/syncfull.go deleted file mode 100644 index 3652e1fefb..0000000000 --- a/swo/syncfull.go +++ /dev/null @@ -1,164 +0,0 @@ -package swo - -import ( - "bytes" - "context" - "fmt" - "io" - "sync" - "time" - - "github.com/jackc/pgx/v4" - "github.com/target/goalert/swo/swogrp" -) - -type lineCount struct { - n int - mx sync.Mutex -} - -func (lc *lineCount) Write(p []byte) (n int, err error) { - lc.mx.Lock() - lc.n += bytes.Count(p, []byte("\n")) - lc.mx.Unlock() - return len(p), nil -} - -func (lc *lineCount) Lines() int { - lc.mx.Lock() - defer lc.mx.Unlock() - return lc.n -} - -func (e *Execute) Progressf(ctx context.Context, format string, args ...interface{}) { - if e.err != nil { - return - } - - swogrp.Progressf(ctx, format, args...) -} - -func (e *Execute) do(ctx context.Context, desc string, fn func(context.Context) error) { - if e.err != nil { - return - } - - e.err = fn(ctx) - if e.err != nil { - e.err = fmt.Errorf("%s: %w", desc, e.err) - } -} - -// SyncFull performs a full initial sync of the database by copying contents of each table directly to the -// destination database. -func (e *Execute) SyncFull(ctx context.Context) { - if e.err != nil { - return - } - e.Progressf(ctx, "performing initial sync") - - srcTx, dstTx, err := e.syncTx(ctx, true) - if err != nil { - e.err = fmt.Errorf("initial sync: begin: %w", err) - return - } - defer srcTx.Rollback(ctx) - defer dstTx.Rollback(ctx) - - // defer all constraints - if _, err = dstTx.Exec(ctx, "SET CONSTRAINTS ALL DEFERRED"); err != nil { - e.err = fmt.Errorf("initial sync: defer constraints: %w", err) - return - } - - for _, table := range e.tables { - if table.SkipSync() { - continue - } - - if err = e.syncTableFull(ctx, table, srcTx, dstTx); err != nil { - e.err = fmt.Errorf("initial sync: copy %s: %w", table.Name, err) - return - } - } - - e.Progressf(ctx, "commit initial sync") - // Important to validate src commit, even though it's read-only. - // - // A failure here indicates the isolation level has been violated - // and we will need to try again. - if err := srcTx.Commit(ctx); err != nil { - e.err = fmt.Errorf("initial sync: src commit: %w", err) - return - } - if err := dstTx.Commit(ctx); err != nil { - e.err = fmt.Errorf("initial sync: dst commit: %w", err) - return - } - - // vacuum analyze new DB - e.Progressf(ctx, "vacuum analyze") - if _, err := e.nextDBConn.Exec(ctx, "VACUUM ANALYZE"); err != nil { - e.err = fmt.Errorf("initial sync: vacuum analyze: %w", err) - return - } -} - -// syncTableFull will copy the contents of the table from the source database to the destination database using -// COPY TO and COPY FROM. -func (e *Execute) syncTableFull(origCtx context.Context, t Table, srcTx, dstTx pgx.Tx) error { - ctx, cancel := context.WithCancel(origCtx) - defer cancel() - - var rowCount int - err := srcTx.QueryRow(ctx, fmt.Sprintf("select count(*) from %s", t.QuotedName())).Scan(&rowCount) - if err != nil { - return fmt.Errorf("sync table %s: get row count: %w", t.Name, err) - } - - var wg sync.WaitGroup - wg.Add(3) - - pr, pw := io.Pipe() - var lc lineCount - go func() { - defer wg.Done() - prog := time.NewTimer(500 * time.Millisecond) - defer prog.Stop() - for { - swogrp.Progressf(origCtx, "syncing table %s (%d/%d)", t.Name, lc.Lines(), rowCount) - select { - case <-ctx.Done(): - pw.CloseWithError(ctx.Err()) - pr.CloseWithError(ctx.Err()) - return - case <-prog.C: - } - } - }() - - var srcErr, dstErr error - - go func() { - defer wg.Done() - _, srcErr = srcTx.Conn().PgConn().CopyTo(ctx, pw, fmt.Sprintf(`copy %s to stdout`, t.QuotedName())) - pw.Close() - }() - go func() { - defer wg.Done() - defer cancel() - _, dstErr = dstTx.Conn().PgConn().CopyFrom(ctx, io.TeeReader(pr, &lc), fmt.Sprintf(`copy %s from stdin`, t.QuotedName())) - pr.Close() - }() - - wg.Wait() - - if dstErr != nil { - return fmt.Errorf("copy to dst: %w", dstErr) - } - if srcErr != nil { - return fmt.Errorf("copy from src: %w", srcErr) - } - - return nil -} diff --git a/swo/syncloop.go b/swo/syncloop.go deleted file mode 100644 index b1f42887af..0000000000 --- a/swo/syncloop.go +++ /dev/null @@ -1,78 +0,0 @@ -package swo - -import ( - "context" - "fmt" - "time" - - "github.com/target/goalert/util/log" - "github.com/target/goalert/util/sqlutil" -) - -// SyncLoop will loop the logical replication sequence until the number of changes reaches zero. -func (e *Execute) SyncLoop(ctx context.Context) { - if e.err != nil { - return - } - - sync := func(ctx context.Context) (ok, pend int, err error) { - srcTx, dstTx, err := e.syncTx(ctx, true) - if err != nil { - return 0, 0, fmt.Errorf("sync tx: %w", err) - } - defer srcTx.Rollback(ctx) - defer dstTx.Rollback(ctx) - - ids, err := e.syncChanges(ctx, srcTx, dstTx) - if err != nil { - return 0, len(ids), fmt.Errorf("sync change log: %w", err) - } - - err = srcTx.Commit(ctx) - if err != nil { - return len(ids), 0, fmt.Errorf("commit src: %w", err) - } - - err = dstTx.Commit(ctx) - if err != nil { - return 0, len(ids), fmt.Errorf("commit dst: %w", err) - } - - _, err = e.mainDBConn.Exec(ctx, "DELETE FROM change_log WHERE id = any($1)", sqlutil.IntArray(ids)) - if err != nil { - return len(ids), 0, fmt.Errorf("update change log: %w", err) - } - - return len(ids), 0, nil - } - - for ctx.Err() == nil { - // sync in a loop until DB is up-to-date - s := time.Now() - n, pend, err := sync(ctx) - dur := time.Since(s) - - if pend > 0 { - e.Progressf(ctx, "sync: %d rows pending", pend) - } - if err != nil { - log.Log(ctx, err) - e.Rollback() - if n > 0 { - e.err = fmt.Errorf("sync changes: sync failure (commit without record): %w", err) - return - } - continue - } - e.Commit() - - if n > 10 { - e.Progressf(ctx, "sync: %d rows replicated in %s", n, dur.Truncate(time.Millisecond)) - continue - } - - return - } - - e.err = fmt.Errorf("sync changes: %w", ctx.Err()) -} diff --git a/swo/syncsequences.go b/swo/syncsequences.go deleted file mode 100644 index 32f9d02a3e..0000000000 --- a/swo/syncsequences.go +++ /dev/null @@ -1,39 +0,0 @@ -package swo - -import ( - "context" - "fmt" - - "github.com/jackc/pgx/v4" - "github.com/target/goalert/util/sqlutil" -) - -func (e *Execute) syncSequences(ctx context.Context, src, dst pgx.Tx) error { - go e.Progressf(ctx, "syncing sequences") - var seqRead pgx.Batch - for _, name := range e.seqNames { - seqRead.Queue("select last_value, is_called from " + sqlutil.QuoteID(name)) - } - - res := src.SendBatch(ctx, &seqRead) - var setSeq pgx.Batch - for _, name := range e.seqNames { - var last int64 - var called bool - err := res.QueryRow().Scan(&last, &called) - if err != nil { - return fmt.Errorf("get sequence %s: %w", name, err) - } - setSeq.Queue("select pg_catalog.setval($1, $2, $3)", name, last, called) - } - if err := res.Close(); err != nil { - return fmt.Errorf("close seq batch: %w", err) - } - - err := dst.SendBatch(ctx, &setSeq).Close() - if err != nil { - return fmt.Errorf("set sequences: %w", err) - } - - return nil -} diff --git a/swo/table.go b/swo/table.go deleted file mode 100644 index 7efc0a78df..0000000000 --- a/swo/table.go +++ /dev/null @@ -1,86 +0,0 @@ -package swo - -import ( - "fmt" - "strings" - - "github.com/target/goalert/util/sqlutil" -) - -// Table describes a database table for a switchover operation. -type Table struct { - Name string - Columns []Column - IDCol Column - - deps map[string]struct{} -} - -// SkipSync returns true if the table should not be synced or instrumented with triggers. -// -// This could be because the data comes from migration or is stateful/related -// to the switchover. -func (t Table) SkipSync() bool { - switch t.Name { - case "switchover_state", "switchover_log", "engine_processing_versions", "gorp_migrations", "change_log": - return true - } - - return false -} - -func (t Table) QuotedName() string { - return sqlutil.QuoteID(t.Name) -} - -func (t Table) QuotedChangeTriggerName() string { - return sqlutil.QuoteID(fmt.Sprintf("zz_99_change_log_%s", t.Name)) -} - -func (t Table) QuotedLockTriggerName() string { - return sqlutil.QuoteID(fmt.Sprintf("!_change_log_%s", t.Name)) -} - -func (t Table) ColumnNames() []string { - colNames := make([]string, len(t.Columns)) - for i, col := range t.Columns { - colNames[i] = col.ColumnName - } - return colNames -} - -func (t Table) SelectRowsQuery() string { - if t.IDCol.DataType == "USER-DEFINED" { - return fmt.Sprintf(`select id::text, to_jsonb(row) from %s row where id::text = any($1)`, t.QuotedName()) - } - return fmt.Sprintf(`select id::text, to_jsonb(row) from %s row where id = any($1)`, t.QuotedName()) -} - -func (t Table) DeleteRowsQuery() string { - return fmt.Sprintf(`delete from %s where id = any($1)`, t.QuotedName()) -} - -func (t Table) InsertRowsQuery() string { - return fmt.Sprintf(` - insert into %s - select * from - json_populate_recordset(null::%s, $1) - `, t.QuotedName(), t.QuotedName()) -} - -func (t Table) UpdateRowsQuery() string { - var set strings.Builder - for i, col := range t.Columns { - if i > 0 { - set.WriteString(", ") - } - set.WriteString(fmt.Sprintf("%s = data.%s", col.ColumnName, col.ColumnName)) - } - - return fmt.Sprintf(` - update %s dst - set %s - from json_populate_recordset(null::%s, $1) as data - where dst.id = data.id - `, t.QuotedName(), set.String(), t.QuotedName()) -} diff --git a/swo/waitforactivetx.go b/swo/waitforactivetx.go deleted file mode 100644 index fd7c427035..0000000000 --- a/swo/waitforactivetx.go +++ /dev/null @@ -1,42 +0,0 @@ -package swo - -import ( - "context" - "fmt" - "time" - - "github.com/target/goalert/swo/swodb" - "github.com/target/goalert/swo/swogrp" -) - -// WaitForActiveTx waits for all currently active transactions to complete in the main DB. -func (e *Execute) WaitForActiveTx(ctx context.Context) { - if e.err != nil { - return - } - - swogrp.Progressf(ctx, "waiting for in-flight transactions to finish") - - db := swodb.New(e.mainDBConn) - - var now time.Time - now, err := db.CurrentTime(ctx) - if err != nil { - e.err = fmt.Errorf("wait for active tx: get current time: %w", err) - return - } - - for { - n, err := db.ActiveTxCount(ctx, now) - if err != nil { - e.err = fmt.Errorf("wait for active tx: get active tx count: %w", err) - return - } - if n == 0 { - break - } - - swogrp.Progressf(ctx, "waiting for %d transaction(s) to finish", n) - time.Sleep(time.Second) - } -} From 826333c9a5add4db84798d0039008c2a2b6c603d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Fri, 1 Jul 2022 16:35:52 -0500 Subject: [PATCH 109/225] sync tables in order --- swo/swosync/tablesync.go | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/swo/swosync/tablesync.go b/swo/swosync/tablesync.go index 6cec4bdd45..9cdc7505d2 100644 --- a/swo/swosync/tablesync.go +++ b/swo/swosync/tablesync.go @@ -204,21 +204,36 @@ func (c *TableSync) AddBatchWrites(b *pgx.Batch, dstRows RowSet) { } } - for tableName, p := range pendingByTable { - if len(p.inserts) > 0 { - b.Queue(insertRowsQuery(c.table(tableName)), p.inserts) + // insert, then update, then reverse delete + for _, t := range c.tables { + p := pendingByTable[t.Name()] + if p == nil || len(p.inserts) == 0 { + continue } - if len(p.updates) > 0 { - b.Queue(updateQuery(c.table(tableName)), p.updates) + b.Queue(insertRowsQuery(t), p.inserts) + } + + for _, t := range c.tables { + p := pendingByTable[t.Name()] + if p == nil || len(p.updates) == 0 { + continue } - if len(p.deletes) > 0 { - arg, cast := castIDs(c.table(tableName), p.deletes) - b.Queue(fmt.Sprintf(`delete from %s where id%s = any($1)`, sqlutil.QuoteID(tableName), cast), arg) + b.Queue(updateRowsQuery(t), p.updates) + } + + for i := range c.tables { + // reverse-order tables + t := c.tables[len(c.tables)-i-1] + p := pendingByTable[t.Name()] + if p == nil || len(p.deletes) == 0 { + continue } + arg, cast := castIDs(t, p.deletes) + b.Queue(fmt.Sprintf(`delete from %s where id%s = any($1)`, sqlutil.QuoteID(t.Name()), cast), arg) } } -func updateQuery(t swoinfo.Table) string { +func updateRowsQuery(t swoinfo.Table) string { var s strings.Builder fmt.Fprintf(&s, "update %s dst\n", sqlutil.QuoteID(t.Name())) fmt.Fprintf(&s, "set ") From f76a072fdf7fc8f588eff66687eaf3c6bf731da2 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Fri, 1 Jul 2022 16:36:14 -0500 Subject: [PATCH 110/225] use slow proxy to emulate db conditions --- Procfile.swo | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Procfile.swo b/Procfile.swo index eb21f4cb5e..b228a2f915 100644 --- a/Procfile.swo +++ b/Procfile.swo @@ -1,7 +1,7 @@ build: while true; do make -qs bin/goalert || make bin/goalert || (echo '\033[0;31mBuild Failure'; sleep 3); sleep 0.1; done @watch-file=./bin/goalert -goalert: ./bin/goalert -l=localhost:3030 --ui-dir=web/src/build --db-url=postgres://goalert@localhost --listen-sysapi=localhost:1234 --listen-prometheus=localhost:2112 --db-url-next=postgres://goalert@localhost:5432/goalert2 +goalert: ./bin/goalert -l=localhost:3030 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435 --listen-sysapi=localhost:1234 --listen-prometheus=localhost:2112 --db-url-next=postgres://goalert@localhost:5435/goalert2 smtp: go run github.com/mailhog/MailHog -ui-bind-addr=localhost:8025 -api-bind-addr=localhost:8025 -smtp-bind-addr=localhost:1025 | grep -v KEEPALIVE @@ -9,28 +9,28 @@ smtp: go run github.com/mailhog/MailHog -ui-bind-addr=localhost:8025 -api-bind-a ui: yarn workspace goalert-web run esbuild --watch @watch-file=./bin/goalert -ga2: ./bin/goalert -l=localhost:3050 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 --api-only +ga2: ./bin/goalert -l=localhost:3050 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga3: ./bin/goalert -l=localhost:3051 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 --api-only +ga3: ./bin/goalert -l=localhost:3051 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga4: ./bin/goalert -l=localhost:3052 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 --api-only +ga4: ./bin/goalert -l=localhost:3052 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga5: ./bin/goalert -l=localhost:3053 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 --api-only +ga5: ./bin/goalert -l=localhost:3053 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga6: ./bin/goalert -l=localhost:3054 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 --api-only +ga6: ./bin/goalert -l=localhost:3054 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga7: ./bin/goalert -l=localhost:3055 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 +ga7: ./bin/goalert -l=localhost:3055 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 @watch-file=./bin/goalert -ga8: ./bin/goalert -l=localhost:3056 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 +ga8: ./bin/goalert -l=localhost:3056 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 @watch-file=./bin/goalert -ga9: ./bin/goalert -l=localhost:3057 --db-url=postgres://goalert@localhost:5432/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5432/goalert2 +ga9: ./bin/goalert -l=localhost:3057 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 proxy: go run ./devtools/simpleproxy /=http://localhost:3030,http://localhost:3050,http://localhost:3051,http://localhost:3052,http://localhost:3053,http://localhost:3054,http://localhost:3055,http://localhost:3056,http://localhost:3057 From 40f1d9482276410f2c7c4585aff378a737a5adff Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Jul 2022 11:43:24 -0500 Subject: [PATCH 111/225] use prooxy for ui --- Procfile.swo | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Procfile.swo b/Procfile.swo index b228a2f915..c444c10a4b 100644 --- a/Procfile.swo +++ b/Procfile.swo @@ -1,7 +1,7 @@ build: while true; do make -qs bin/goalert || make bin/goalert || (echo '\033[0;31mBuild Failure'; sleep 3); sleep 0.1; done @watch-file=./bin/goalert -goalert: ./bin/goalert -l=localhost:3030 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435 --listen-sysapi=localhost:1234 --listen-prometheus=localhost:2112 --db-url-next=postgres://goalert@localhost:5435/goalert2 +goalert: ./bin/goalert -l=localhost:3040 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435 --listen-sysapi=localhost:1234 --listen-prometheus=localhost:2112 --db-url-next=postgres://goalert@localhost:5435/goalert2 smtp: go run github.com/mailhog/MailHog -ui-bind-addr=localhost:8025 -api-bind-addr=localhost:8025 -smtp-bind-addr=localhost:1025 | grep -v KEEPALIVE @@ -32,6 +32,6 @@ ga8: ./bin/goalert -l=localhost:3056 --db-url=postgres://goalert@localhost:5435/ @watch-file=./bin/goalert ga9: ./bin/goalert -l=localhost:3057 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 -proxy: go run ./devtools/simpleproxy /=http://localhost:3030,http://localhost:3050,http://localhost:3051,http://localhost:3052,http://localhost:3053,http://localhost:3054,http://localhost:3055,http://localhost:3056,http://localhost:3057 +proxy: go run ./devtools/simpleproxy -addr localhost:3030 /=http://localhost:3040,http://localhost:3050,http://localhost:3051,http://localhost:3052,http://localhost:3053,http://localhost:3054,http://localhost:3055,http://localhost:3056,http://localhost:3057 slow: go run ./devtools/slowproxy/ -d 25ms -i 1000000 -o 1000000 -j 10ms From 8e27595b2bb90d476963b13bcf6944cd787bf6bd Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Jul 2022 11:44:34 -0500 Subject: [PATCH 112/225] limit conn lifetime in pause mode --- app/pause.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/pause.go b/app/pause.go index d28863a83e..d63743f4e4 100644 --- a/app/pause.go +++ b/app/pause.go @@ -2,6 +2,7 @@ package app import ( "context" + "time" "github.com/target/goalert/util/log" ) @@ -19,13 +20,14 @@ func (app *App) Resume(ctx context.Context) error { func (app *App) _pause(ctx context.Context) error { app.db.SetMaxIdleConns(0) + app.db.SetConnMaxLifetime(time.Second) app.events.Stop() - return nil } func (app *App) _resume(ctx context.Context) error { app.db.SetMaxIdleConns(app.cfg.DBMaxIdle) + app.db.SetConnMaxLifetime(0) app.events.Start() return nil From ba87e5075cd941019ad22eb968c95e2e56c419fc Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Jul 2022 11:45:07 -0500 Subject: [PATCH 113/225] always exit on error --- util/sqlutil/listener.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/util/sqlutil/listener.go b/util/sqlutil/listener.go index c933ba94a9..284fa6af28 100644 --- a/util/sqlutil/listener.go +++ b/util/sqlutil/listener.go @@ -148,8 +148,11 @@ func (l *Listener) handleNotifications(ctx context.Context) error { default: } n, err := c.(*stdlib.Conn).Conn().WaitForNotification(ctx) - if err != nil && ctx.Err() == nil { - return errors.Wrap(err, "wait for notifications") + if err != nil { + if ctx.Err() == nil { + return errors.Wrap(err, "wait for notifications") + } + return nil } if n == nil { From 0018c09485cdf23acd632e702521b7753e7cd17a Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Jul 2022 11:45:25 -0500 Subject: [PATCH 114/225] check for missing override --- graphql2/graphqlapp/useroverride.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/graphql2/graphqlapp/useroverride.go b/graphql2/graphqlapp/useroverride.go index 1e19b7f52a..ee9849676d 100644 --- a/graphql2/graphqlapp/useroverride.go +++ b/graphql2/graphqlapp/useroverride.go @@ -25,6 +25,9 @@ func (m *Mutation) UpdateUserOverride(ctx context.Context, input graphql2.Update if err != nil { return err } + if u == nil { + return validation.NewFieldError("ID", "user override not found") + } if input.Start != nil { u.Start = *input.Start @@ -72,18 +75,21 @@ func (m *Mutation) CreateUserOverride(ctx context.Context, input graphql2.Create } return u, nil } + func (u *UserOverride) AddUser(ctx context.Context, raw *override.UserOverride) (*user.User, error) { if raw.AddUserID == "" { return nil, nil } return (*App)(u).FindOneUser(ctx, raw.AddUserID) } + func (u *UserOverride) RemoveUser(ctx context.Context, raw *override.UserOverride) (*user.User, error) { if raw.RemoveUserID == "" { return nil, nil } return (*App)(u).FindOneUser(ctx, raw.RemoveUserID) } + func (u *UserOverride) Target(ctx context.Context, raw *override.UserOverride) (*assignment.RawTarget, error) { tgt := assignment.NewRawTarget(raw.Target) return &tgt, nil From 946440792f10982debb02c3c3a8d782452d5948c Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Jul 2022 12:57:40 -0500 Subject: [PATCH 115/225] commit changes before enabling triggers --- swo/swosync/logicalsync.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swo/swosync/logicalsync.go b/swo/swosync/logicalsync.go index 08e1e88f2f..edd79821ea 100644 --- a/swo/swosync/logicalsync.go +++ b/swo/swosync/logicalsync.go @@ -121,13 +121,13 @@ func (l *LogicalReplicator) doSync(ctx context.Context, final bool) error { applyChanges.Queue("set constraints all deferred") seqSync.AddBatchWrites(&applyChanges) tblSync.AddBatchWrites(&applyChanges, l.dstRows) + applyChanges.Queue("commit") if final { // re-enable triggers in destination DB for _, t := range l.tables { applyChanges.Queue(fmt.Sprintf(`alter table %s enable trigger user`, sqlutil.QuoteID(t.Name()))) } } - applyChanges.Queue("commit") err = l.dstConn.SendBatch(ctx, &applyChanges).Close() if err != nil { l.dstConn.Exec(ctx, `rollback`) From c0f1127b3342a3ea8609b59e6f96f1a02600a2df Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Jul 2022 12:59:14 -0500 Subject: [PATCH 116/225] increase pause time --- swo/swogrp/group.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swo/swogrp/group.go b/swo/swogrp/group.go index bf7f75215e..238ef89478 100644 --- a/swo/swogrp/group.go +++ b/swo/swogrp/group.go @@ -414,7 +414,7 @@ func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { } g.resumeNow = make(chan struct{}) err := g.startTask(ctx, "resume-after", func(ctx context.Context) error { - t := time.NewTimer(15 * time.Second) + t := time.NewTimer(30 * time.Second) defer t.Stop() select { case <-ctx.Done(): From 1ed2321ecba4b282ea4f42a271aa2d3064b57205 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Jul 2022 14:36:13 -0500 Subject: [PATCH 117/225] add db versions to graphql query --- graphql2/generated.go | 138 ++++++++++++++++++++++++++++++++++--- graphql2/graphqlapp/swo.go | 3 + graphql2/models_gen.go | 18 ++--- graphql2/schema.graphql | 3 + swo/manager.go | 34 ++++++++- swo/swodb/queries.sql.go | 11 +++ swo/swoinfo/queries.sql | 3 + 7 files changed, 191 insertions(+), 19 deletions(-) diff --git a/graphql2/generated.go b/graphql2/generated.go index b3af532973..06518c3c0d 100644 --- a/graphql2/generated.go +++ b/graphql2/generated.go @@ -413,14 +413,16 @@ type ComplexityRoot struct { } SWOStatus struct { - Connections func(childComplexity int) int - Details func(childComplexity int) int - Errors func(childComplexity int) int - IsDone func(childComplexity int) int - IsExecuting func(childComplexity int) int - IsIdle func(childComplexity int) int - IsResetting func(childComplexity int) int - Nodes func(childComplexity int) int + Connections func(childComplexity int) int + Details func(childComplexity int) int + Errors func(childComplexity int) int + IsDone func(childComplexity int) int + IsExecuting func(childComplexity int) int + IsIdle func(childComplexity int) int + IsResetting func(childComplexity int) int + MainDBVersion func(childComplexity int) int + NextDBVersion func(childComplexity int) int + Nodes func(childComplexity int) int } Schedule struct { @@ -2759,6 +2761,20 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWOStatus.IsResetting(childComplexity), true + case "SWOStatus.mainDBVersion": + if e.complexity.SWOStatus.MainDBVersion == nil { + break + } + + return e.complexity.SWOStatus.MainDBVersion(childComplexity), true + + case "SWOStatus.nextDBVersion": + if e.complexity.SWOStatus.NextDBVersion == nil { + break + } + + return e.complexity.SWOStatus.NextDBVersion(childComplexity), true + case "SWOStatus.nodes": if e.complexity.SWOStatus.Nodes == nil { break @@ -15147,6 +15163,10 @@ func (ec *executionContext) fieldContext_Query_swoStatus(ctx context.Context, fi return ec.fieldContext_SWOStatus_nodes(ctx, field) case "connections": return ec.fieldContext_SWOStatus_connections(ctx, field) + case "mainDBVersion": + return ec.fieldContext_SWOStatus_mainDBVersion(ctx, field) + case "nextDBVersion": + return ec.fieldContext_SWOStatus_nextDBVersion(ctx, field) } return nil, fmt.Errorf("no field named %q was found under type SWOStatus", field.Name) }, @@ -16692,6 +16712,94 @@ func (ec *executionContext) fieldContext_SWOStatus_connections(ctx context.Conte return fc, nil } +func (ec *executionContext) _SWOStatus_mainDBVersion(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOStatus_mainDBVersion(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.MainDBVersion, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNString2string(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_SWOStatus_mainDBVersion(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "SWOStatus", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type String does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _SWOStatus_nextDBVersion(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOStatus_nextDBVersion(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.NextDBVersion, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNString2string(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_SWOStatus_nextDBVersion(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "SWOStatus", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type String does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _Schedule_id(ctx context.Context, field graphql.CollectedField, obj *schedule.Schedule) (ret graphql.Marshaler) { fc, err := ec.fieldContext_Schedule_id(ctx, field) if err != nil { @@ -29924,6 +30032,20 @@ func (ec *executionContext) _SWOStatus(ctx context.Context, sel ast.SelectionSet out.Values[i] = ec._SWOStatus_connections(ctx, field, obj) + if out.Values[i] == graphql.Null { + invalids++ + } + case "mainDBVersion": + + out.Values[i] = ec._SWOStatus_mainDBVersion(ctx, field, obj) + + if out.Values[i] == graphql.Null { + invalids++ + } + case "nextDBVersion": + + out.Values[i] = ec._SWOStatus_nextDBVersion(ctx, field, obj) + if out.Values[i] == graphql.Null { invalids++ } diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index d5efc9d63c..62c2eca1da 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -96,5 +96,8 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { Nodes: nodes, Errors: errs, Connections: conns, + + NextDBVersion: s.NextDBVersion, + MainDBVersion: s.MainDBVersion, }, nil } diff --git a/graphql2/models_gen.go b/graphql2/models_gen.go index 79a0a2a65f..40ce9bc5cb 100644 --- a/graphql2/models_gen.go +++ b/graphql2/models_gen.go @@ -348,14 +348,16 @@ type SWONode struct { } type SWOStatus struct { - IsIdle bool `json:"isIdle"` - IsDone bool `json:"isDone"` - IsResetting bool `json:"isResetting"` - IsExecuting bool `json:"isExecuting"` - Details string `json:"details"` - Errors []string `json:"errors"` - Nodes []SWONode `json:"nodes"` - Connections []SWOConnection `json:"connections"` + IsIdle bool `json:"isIdle"` + IsDone bool `json:"isDone"` + IsResetting bool `json:"isResetting"` + IsExecuting bool `json:"isExecuting"` + Details string `json:"details"` + Errors []string `json:"errors"` + Nodes []SWONode `json:"nodes"` + Connections []SWOConnection `json:"connections"` + MainDBVersion string `json:"mainDBVersion"` + NextDBVersion string `json:"nextDBVersion"` } type ScheduleConnection struct { diff --git a/graphql2/schema.graphql b/graphql2/schema.graphql index f63cf37c40..acffe3bc4c 100644 --- a/graphql2/schema.graphql +++ b/graphql2/schema.graphql @@ -126,6 +126,9 @@ type SWOStatus { nodes: [SWONode!]! connections: [SWOConnection!]! + + mainDBVersion: String! + nextDBVersion: String! } type SWOConnection { diff --git a/swo/manager.go b/swo/manager.go index 40a1f5fccc..c95cebbafe 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -5,6 +5,7 @@ import ( "database/sql" "database/sql/driver" "errors" + "fmt" "github.com/google/uuid" "github.com/jackc/pgx/v4" @@ -28,6 +29,9 @@ type Manager struct { Config grp *swogrp.Group + + MainDBVersion string + NextDBVersion string } type Node struct { @@ -64,6 +68,22 @@ func NewManager(cfg Config) (*Manager, error) { return nil, err } + err = m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { + var err error + m.MainDBVersion, err = swodb.New(oldConn).ServerVersion(ctx) + if err != nil { + return err + } + m.NextDBVersion, err = swodb.New(newConn).ServerVersion(ctx) + if err != nil { + return err + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("failed to get server version: %w", err) + } + m.grp = swogrp.NewGroup(swogrp.Config{ CanExec: cfg.CanExec, @@ -148,7 +168,13 @@ func WithLockedConn(ctx context.Context, db *sql.DB, runFunc func(context.Contex } // Status will return the current switchover status. -func (m *Manager) Status() swogrp.Status { return m.grp.Status() } +func (m *Manager) Status() Status { + return Status{ + MainDBVersion: m.MainDBVersion, + NextDBVersion: m.NextDBVersion, + Status: m.grp.Status(), + } +} // SendPing will ping all nodes in the cluster. func (m *Manager) SendPing(ctx context.Context) error { return m.grp.Ping(ctx) } @@ -162,6 +188,8 @@ func (m *Manager) SendExecute(ctx context.Context) error { return m.grp.Execute( func (m *Manager) DB() *sql.DB { return m.dbApp } type Status struct { - Details string - Nodes []Node + swogrp.Status + + MainDBVersion string + NextDBVersion string } diff --git a/swo/swodb/queries.sql.go b/swo/swodb/queries.sql.go index 023e3979cb..eb3941580d 100644 --- a/swo/swodb/queries.sql.go +++ b/swo/swodb/queries.sql.go @@ -164,6 +164,17 @@ func (q *Queries) SequenceNames(ctx context.Context) ([]string, error) { return items, nil } +const serverVersion = `-- name: ServerVersion :one +SELECT version() +` + +func (q *Queries) ServerVersion(ctx context.Context) (string, error) { + row := q.db.QueryRow(ctx, serverVersion) + var version string + err := row.Scan(&version) + return version, err +} + const tableColumns = `-- name: TableColumns :many SELECT col.table_name, col.column_name, diff --git a/swo/swoinfo/queries.sql b/swo/swoinfo/queries.sql index 4e6e750446..bb297cf6a2 100644 --- a/swo/swoinfo/queries.sql +++ b/swo/swoinfo/queries.sql @@ -28,3 +28,6 @@ FROM information_schema.sequences WHERE sequence_catalog = current_database() AND sequence_schema = 'public' AND sequence_name != 'change_log_id_seq'; + +-- name: ServerVersion :one +SELECT version(); From 0ef6b745148882811e8f5a6720377369ec295416 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Jul 2022 14:41:57 -0500 Subject: [PATCH 118/225] use SWO instead of S/O --- app/cmd.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/cmd.go b/app/cmd.go index 11c6e6c598..ef536d5666 100644 --- a/app/cmd.go +++ b/app/cmd.go @@ -92,7 +92,7 @@ var RootCmd = &cobra.Command{ } q := u.Query() if cfg.DBURLNext != "" { - q.Set("application_name", fmt.Sprintf("GoAlert %s (S/O Mode)", version.GitVersion())) + q.Set("application_name", fmt.Sprintf("GoAlert %s (SWO Mode)", version.GitVersion())) } else { q.Set("application_name", fmt.Sprintf("GoAlert %s", version.GitVersion())) } @@ -145,7 +145,7 @@ var RootCmd = &cobra.Command{ return errors.Wrap(err, "parse next URL") } q := u.Query() - q.Set("application_name", fmt.Sprintf("GoAlert %s (S/O Mode)", version.GitVersion())) + q.Set("application_name", fmt.Sprintf("GoAlert %s (SWO Mode)", version.GitVersion())) q.Set("enable_seqscan", "off") u.RawQuery = q.Encode() cfg.DBURLNext = u.String() From 21c2b8b43b8043bf70517ae98633e15deb23010e Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 11 Jul 2022 15:31:37 -0500 Subject: [PATCH 119/225] add db_id to switchover_state --- migrate/migrations/20220711151242-db-id.sql | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 migrate/migrations/20220711151242-db-id.sql diff --git a/migrate/migrations/20220711151242-db-id.sql b/migrate/migrations/20220711151242-db-id.sql new file mode 100644 index 0000000000..4620812b7d --- /dev/null +++ b/migrate/migrations/20220711151242-db-id.sql @@ -0,0 +1,6 @@ +-- +migrate Up +ALTER TABLE switchover_state +ADD column db_id UUID NOT NULL DEFAULT gen_random_uuid(); + +-- +migrate Down +ALTER TABLE switchover_state DROP column db_id; From 3ebee78db5d82a98a8ca9f32eb5d1f79c97bbfd1 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 13 Jul 2022 10:32:11 -0500 Subject: [PATCH 120/225] push instead of pull for ratelimiter --- devtools/slowproxy/ratelimiter.go | 61 ++++++++++++++++++------------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/devtools/slowproxy/ratelimiter.go b/devtools/slowproxy/ratelimiter.go index fc925d573c..65708eda51 100644 --- a/devtools/slowproxy/ratelimiter.go +++ b/devtools/slowproxy/ratelimiter.go @@ -7,44 +7,53 @@ import ( ) type rateLimiter struct { - bucket chan int - overflow chan int - rate bool - latency time.Duration - jitter time.Duration + bucket chan int + rate bool + latency time.Duration + jitter time.Duration } func newRateLimiter(bps int, latency, jitter time.Duration) *rateLimiter { ch := make(chan int) + + bpMs := float64(bps) / 1000 go func() { - t := time.NewTicker(time.Second) - for range t.C { - ch <- bps + t := time.NewTicker(time.Millisecond) + var count float64 + for { + if count >= bpMs { + <-t.C + count -= bpMs + if count < 0 { + count = 0 + } + continue + } + + select { + case <-t.C: + count -= bpMs + if count < 0 { + count = 0 + } + case val := <-ch: + count += float64(val) + } } }() + return &rateLimiter{ - rate: bps > 0, - bucket: ch, - overflow: make(chan int, 1000), - latency: latency, - jitter: jitter, + rate: bps > 0, + bucket: ch, + latency: latency, + jitter: jitter, } } func (r *rateLimiter) WaitFor(count int) time.Duration { - var n int - for r.rate && n < count { - select { - case val := <-r.bucket: - n += val - case val := <-r.overflow: - n += val - } - } - if n > count { - r.overflow <- n - count - } - return (r.latency - (r.jitter / 2) + time.Duration(rand.Float64()*float64(r.jitter))) / 2 + waitUntil := time.Now().Add((r.latency - (r.jitter / 2) + time.Duration(rand.Float64()*float64(r.jitter)))) + r.bucket <- count + return time.Until(waitUntil) / 2 } func (r *rateLimiter) NewWriter(w io.Writer) io.Writer { From bdb89dffc1ffc4c01c50ec505a24838235ce790e Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 13 Jul 2022 10:33:25 -0500 Subject: [PATCH 121/225] don't display ... as number --- web/src/app/users/UserContactMethodVerificationDialog.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/app/users/UserContactMethodVerificationDialog.js b/web/src/app/users/UserContactMethodVerificationDialog.js index 9846cb1ff3..c914ac2bb8 100644 --- a/web/src/app/users/UserContactMethodVerificationDialog.js +++ b/web/src/app/users/UserContactMethodVerificationDialog.js @@ -53,7 +53,7 @@ export default function UserContactMethodVerificationDialog(props) { }) const fromNumber = - data?.userContactMethod?.lastVerifyMessageState?.formattedSrcValue ?? '...' + data?.userContactMethod?.lastVerifyMessageState?.formattedSrcValue ?? '' const cm = data?.userContactMethod ?? {} const { loading, error } = status From 04787bdce45fa2b7ad4a0c18fc7b683bdbb322ef Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 13 Jul 2022 11:15:47 -0500 Subject: [PATCH 122/225] add data and use urql --- .../app/admin/switchover/AdminSwitchover.tsx | 99 ++++++++++--------- web/src/schema.d.ts | 2 + 2 files changed, 53 insertions(+), 48 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 91a4aaeafb..77ea7597c5 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -1,4 +1,4 @@ -import React, { useState } from 'react' +import React, { useEffect, useState } from 'react' import ButtonGroup from '@mui/material/ButtonGroup' import Card from '@mui/material/Card' import CardContent from '@mui/material/CardContent' @@ -7,7 +7,6 @@ import Grid from '@mui/material/Grid' import Skeleton from '@mui/material/Skeleton' import Typography from '@mui/material/Typography' import { Fade, SvgIconProps, Zoom } from '@mui/material' -import PingIcon from 'mdi-material-ui/DatabaseMarker' import NoResetIcon from 'mdi-material-ui/DatabaseRefreshOutline' import ResetIcon from 'mdi-material-ui/DatabaseRefresh' import NoExecuteIcon from 'mdi-material-ui/DatabaseExportOutline' @@ -15,7 +14,7 @@ import ExecuteIcon from 'mdi-material-ui/DatabaseExport' import ErrorIcon from 'mdi-material-ui/DatabaseAlert' import IdleIcon from 'mdi-material-ui/DatabaseSettings' import InProgressIcon from 'mdi-material-ui/DatabaseEdit' -import { gql, useMutation, useQuery } from '@apollo/client' +import { gql, useMutation, useQuery } from 'urql' import { DateTime } from 'luxon' import { SWONode as SWONodeType, SWOStatus } from '../../../schema' import Notices, { Notice } from '../../details/Notices' @@ -40,6 +39,8 @@ const query = gql` isExecuting details errors + mainDBVersion + nextDBVersion connections { name count @@ -84,13 +85,20 @@ function cptlz(s: string): string { } export default function AdminSwitchover(): JSX.Element { - const { loading, error, data: _data } = useQuery(query, { pollInterval: 250 }) + const [{ fetching, error, data: _data }, refetch] = useQuery({ + query, + }) const data = _data?.swoStatus as SWOStatus const [lastAction, setLastAction] = useState('') const [_statusNotices, setStatusNotices] = useState([]) - const [commit, mutationStatus] = useMutation(mutation) + const [mutationStatus, commit] = useMutation(mutation) - if (loading) { + useEffect(() => { + const t = setInterval(refetch, 250) + return () => clearInterval(t) + }, []) + + if (fetching) { return } @@ -142,40 +150,35 @@ export default function AdminSwitchover(): JSX.Element { ) } - function actionHandler(action: 'ping' | 'reset' | 'execute'): () => void { + function actionHandler(action: 'reset' | 'execute'): () => void { return () => { setLastAction(action) - commit({ - variables: { - action, - }, - onError: (error) => { - setStatusNotices([ - ..._statusNotices, - { - type: 'error', - message: 'Failed to ' + action, - details: cptlz(error.message), - endNote: DateTime.local().toFormat('fff'), - }, - ]) - }, - }) + commit({ action }) } } - - const statusNotices = _statusNotices.concat( - (data?.errors ?? []).map((message: string) => ({ + const statusNotices = [] + if (mutationStatus.error) { + console.log(mutationStatus) + statusNotices.push({ type: 'error', - message, - })), - ) + message: 'Failed to ' + mutationStatus.operation?.variables?.action, + details: cptlz(mutationStatus.error.message), + endNote: DateTime.local().toFormat('fff'), + }) + } + if (data?.errors) { + data?.errors.forEach((message: string) => { + statusNotices.push({ + type: 'error', + message, + }) + }) + } - const pingLoad = lastAction === 'ping' && mutationStatus.loading const resetLoad = - data?.isResetting || (lastAction === 'reset' && mutationStatus.loading) + data?.isResetting || (lastAction === 'reset' && mutationStatus.fetching) const executeLoad = - data?.isExecuting || (lastAction === 'execute' && mutationStatus.loading) + data?.isExecuting || (lastAction === 'execute' && mutationStatus.fetching) function getIcon(): React.ReactNode { const i: SvgIconProps = { color: 'primary', sx: { fontSize: '3.5rem' } } @@ -183,7 +186,7 @@ export default function AdminSwitchover(): JSX.Element { if (error) { return } - if (loading && !data) { + if (fetching && !data) { return ( @@ -242,25 +245,14 @@ export default function AdminSwitchover(): JSX.Element { {getDetails()} - } - variant='outlined' - size='large' - disabled={mutationStatus.loading} - loading={pingLoad} - loadingPosition='start' - onClick={actionHandler('ping')} - > - {pingLoad ? 'Sending ping...' : 'Ping'} - : } - disabled={data?.isDone || mutationStatus.loading} + disabled={data?.isDone || mutationStatus.fetching} variant='outlined' size='large' loading={ data?.isResetting || - (lastAction === 'reset' && mutationStatus.loading) + (lastAction === 'reset' && mutationStatus.fetching) } loadingPosition='start' onClick={actionHandler('reset')} @@ -271,12 +263,12 @@ export default function AdminSwitchover(): JSX.Element { startIcon={ !data?.isIdle ? : } - disabled={!data?.isIdle || mutationStatus.loading} + disabled={!data?.isIdle || mutationStatus.fetching} variant='outlined' size='large' loading={ data?.isExecuting || - (lastAction === 'execute' && mutationStatus.loading) + (lastAction === 'execute' && mutationStatus.fetching) } loadingPosition='start' onClick={actionHandler('execute')} @@ -291,6 +283,10 @@ export default function AdminSwitchover(): JSX.Element { + Main DB Version: {data?.mainDBVersion} +
+ Next DB Version: {data?.nextDBVersion} +
@@ -302,6 +298,13 @@ export default function AdminSwitchover(): JSX.Element { {data?.connections?.map((row) => ( Date: Wed, 13 Jul 2022 12:09:40 -0500 Subject: [PATCH 123/225] simplify to single log --- .../20220628125954-switchover-mk2.sql | 9 ++++- migrate/migrations/20220711151242-db-id.sql | 6 --- swo/manager.go | 37 ++++++++----------- swo/swodb/models.go | 2 + swo/swodb/queries.sql.go | 31 ++++++++++------ swo/swogrp/config.go | 5 +-- swo/swoinfo/db.go | 25 +++++++++++++ swo/swoinfo/queries.sql | 6 ++- swo/swosync/safety.go | 14 +++++-- 9 files changed, 85 insertions(+), 50 deletions(-) delete mode 100644 migrate/migrations/20220711151242-db-id.sql create mode 100644 swo/swoinfo/db.go diff --git a/migrate/migrations/20220628125954-switchover-mk2.sql b/migrate/migrations/20220628125954-switchover-mk2.sql index 671bccce47..6e8783166d 100644 --- a/migrate/migrations/20220628125954-switchover-mk2.sql +++ b/migrate/migrations/20220628125954-switchover-mk2.sql @@ -1,9 +1,14 @@ -- +migrate Up CREATE TABLE switchover_log ( id BIGINT PRIMARY KEY, - timestamp timestamp with time zone NOT NULL DEFAULT now(), - data jsonb NOT NULL + TIMESTAMP TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), + DATA jsonb NOT NULL ); +ALTER TABLE switchover_state +ADD column db_id UUID NOT NULL DEFAULT gen_random_uuid(); + -- +migrate Down DROP TABLE switchover_log; + +ALTER TABLE switchover_state DROP column db_id; diff --git a/migrate/migrations/20220711151242-db-id.sql b/migrate/migrations/20220711151242-db-id.sql deleted file mode 100644 index 4620812b7d..0000000000 --- a/migrate/migrations/20220711151242-db-id.sql +++ /dev/null @@ -1,6 +0,0 @@ --- +migrate Up -ALTER TABLE switchover_state -ADD column db_id UUID NOT NULL DEFAULT gen_random_uuid(); - --- +migrate Down -ALTER TABLE switchover_state DROP column db_id; diff --git a/swo/manager.go b/swo/manager.go index c95cebbafe..b253d973f9 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -13,6 +13,7 @@ import ( "github.com/target/goalert/app/lifecycle" "github.com/target/goalert/swo/swodb" "github.com/target/goalert/swo/swogrp" + "github.com/target/goalert/swo/swoinfo" "github.com/target/goalert/swo/swomsg" "github.com/target/goalert/swo/swosync" "github.com/target/goalert/util/log" @@ -30,8 +31,8 @@ type Manager struct { grp *swogrp.Group - MainDBVersion string - NextDBVersion string + MainDBInfo *swoinfo.DB + NextDBInfo *swoinfo.DB } type Node struct { @@ -59,22 +60,18 @@ func NewManager(cfg Config) (*Manager, error) { } ctx := cfg.Logger.BackgroundContext() - mainLog, err := swomsg.NewLog(ctx, m.dbMain) - if err != nil { - return nil, err - } - nextLog, err := swomsg.NewLog(ctx, m.dbNext) + messages, err := swomsg.NewLog(ctx, m.dbMain) if err != nil { return nil, err } err = m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { var err error - m.MainDBVersion, err = swodb.New(oldConn).ServerVersion(ctx) + m.MainDBInfo, err = swoinfo.DBInfo(ctx, oldConn) if err != nil { return err } - m.NextDBVersion, err = swodb.New(newConn).ServerVersion(ctx) + m.NextDBInfo, err = swoinfo.DBInfo(ctx, newConn) if err != nil { return err } @@ -87,9 +84,8 @@ func NewManager(cfg Config) (*Manager, error) { m.grp = swogrp.NewGroup(swogrp.Config{ CanExec: cfg.CanExec, - Logger: cfg.Logger, - MainLog: mainLog, - NextLog: nextLog, + Logger: cfg.Logger, + Msgs: messages, ResetFunc: m.DoReset, ExecuteFunc: m.DoExecute, @@ -134,25 +130,25 @@ func (m *Manager) Init(app lifecycle.PauseResumer) { // withConnFromOld allows performing operations with a raw connection to the old database. func (m *Manager) withConnFromOld(ctx context.Context, f func(context.Context, *pgx.Conn) error) error { - return WithLockedConn(ctx, m.dbMain, f) + return WithPGXConn(ctx, m.dbMain, f) } // withConnFromNew allows performing operations with a raw connection to the new database. func (m *Manager) withConnFromNew(ctx context.Context, f func(context.Context, *pgx.Conn) error) error { - return WithLockedConn(ctx, m.dbNext, f) + return WithPGXConn(ctx, m.dbNext, f) } // withConnFromBoth allows performing operations with a raw connection to both databases database. func (m *Manager) withConnFromBoth(ctx context.Context, f func(ctx context.Context, oldConn, newConn *pgx.Conn) error) error { // grab lock with old DB first - return WithLockedConn(ctx, m.dbMain, func(ctx context.Context, connMain *pgx.Conn) error { - return WithLockedConn(ctx, m.dbNext, func(ctx context.Context, connNext *pgx.Conn) error { + return WithPGXConn(ctx, m.dbMain, func(ctx context.Context, connMain *pgx.Conn) error { + return WithPGXConn(ctx, m.dbNext, func(ctx context.Context, connNext *pgx.Conn) error { return f(ctx, connMain, connNext) }) }) } -func WithLockedConn(ctx context.Context, db *sql.DB, runFunc func(context.Context, *pgx.Conn) error) error { +func WithPGXConn(ctx context.Context, db *sql.DB, runFunc func(context.Context, *pgx.Conn) error) error { conn, err := db.Conn(ctx) if err != nil { return err @@ -170,15 +166,12 @@ func WithLockedConn(ctx context.Context, db *sql.DB, runFunc func(context.Contex // Status will return the current switchover status. func (m *Manager) Status() Status { return Status{ - MainDBVersion: m.MainDBVersion, - NextDBVersion: m.NextDBVersion, + MainDBVersion: m.MainDBInfo.Version, + NextDBVersion: m.NextDBInfo.Version, Status: m.grp.Status(), } } -// SendPing will ping all nodes in the cluster. -func (m *Manager) SendPing(ctx context.Context) error { return m.grp.Ping(ctx) } - // SendReset will trigger a reset of the switchover. func (m *Manager) SendReset(ctx context.Context) error { return m.grp.Reset(ctx) } diff --git a/swo/swodb/models.go b/swo/swodb/models.go index f0c7658e9e..1d2b2804c5 100644 --- a/swo/swodb/models.go +++ b/swo/swodb/models.go @@ -9,6 +9,7 @@ import ( "fmt" "time" + "github.com/google/uuid" "github.com/jackc/pgtype" ) @@ -66,4 +67,5 @@ type SwitchoverLog struct { type SwitchoverState struct { Ok bool CurrentState EnumSwitchoverState + DbID uuid.UUID } diff --git a/swo/swodb/queries.sql.go b/swo/swodb/queries.sql.go index eb3941580d..aa7419ad1e 100644 --- a/swo/swodb/queries.sql.go +++ b/swo/swodb/queries.sql.go @@ -8,6 +8,8 @@ package swodb import ( "context" "time" + + "github.com/google/uuid" ) const activeTxCount = `-- name: ActiveTxCount :one @@ -24,6 +26,24 @@ func (q *Queries) ActiveTxCount(ctx context.Context, xactStart time.Time) (int64 return count, err } +const databaseInfo = `-- name: DatabaseInfo :one +SELECT db_id AS id, + version() +FROM switchover_state +` + +type DatabaseInfoRow struct { + ID uuid.UUID + Version string +} + +func (q *Queries) DatabaseInfo(ctx context.Context) (DatabaseInfoRow, error) { + row := q.db.QueryRow(ctx, databaseInfo) + var i DatabaseInfoRow + err := row.Scan(&i.ID, &i.Version) + return i, err +} + const disableChangeLogTriggers = `-- name: DisableChangeLogTriggers :exec UPDATE switchover_state SET current_state = 'idle' @@ -164,17 +184,6 @@ func (q *Queries) SequenceNames(ctx context.Context) ([]string, error) { return items, nil } -const serverVersion = `-- name: ServerVersion :one -SELECT version() -` - -func (q *Queries) ServerVersion(ctx context.Context) (string, error) { - row := q.db.QueryRow(ctx, serverVersion) - var version string - err := row.Scan(&version) - return version, err -} - const tableColumns = `-- name: TableColumns :many SELECT col.table_name, col.column_name, diff --git a/swo/swogrp/config.go b/swo/swogrp/config.go index afe1d64c31..2d3ee3c3a2 100644 --- a/swo/swogrp/config.go +++ b/swo/swogrp/config.go @@ -10,9 +10,8 @@ import ( type Config struct { CanExec bool - Logger *log.Logger - MainLog *swomsg.Log - NextLog *swomsg.Log + Logger *log.Logger + Msgs *swomsg.Log ResetFunc func(context.Context) error ExecuteFunc func(context.Context) error diff --git a/swo/swoinfo/db.go b/swo/swoinfo/db.go new file mode 100644 index 0000000000..e4cfd33bac --- /dev/null +++ b/swo/swoinfo/db.go @@ -0,0 +1,25 @@ +package swoinfo + +import ( + "context" + + "github.com/google/uuid" + "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swodb" +) + +type DB struct { + ID uuid.UUID + Version string +} + +func DBInfo(ctx context.Context, conn *pgx.Conn) (*DB, error) { + info, err := swodb.New(conn).DatabaseInfo(ctx) + if err != nil { + return nil, err + } + return &DB{ + ID: info.ID, + Version: info.Version, + }, nil +} diff --git a/swo/swoinfo/queries.sql b/swo/swoinfo/queries.sql index bb297cf6a2..fe89b2ba94 100644 --- a/swo/swoinfo/queries.sql +++ b/swo/swoinfo/queries.sql @@ -29,5 +29,7 @@ WHERE sequence_catalog = current_database() AND sequence_schema = 'public' AND sequence_name != 'change_log_id_seq'; --- name: ServerVersion :one -SELECT version(); +-- name: DatabaseInfo :one +SELECT db_id AS id, + version() +FROM switchover_state; diff --git a/swo/swosync/safety.go b/swo/swosync/safety.go index 952e277486..2542c6b72e 100644 --- a/swo/swosync/safety.go +++ b/swo/swosync/safety.go @@ -1,3 +1,10 @@ +/* + Locks: + - 4919: migration lock, used to ensure only a single instance is performing migrations (or any sync operations) + - 4369: global switchover lock, in SWO mode, all instances must acquire this lock before performing any queries + during the switchover, an exclusive lock is acquired by the executing node (stop-the-world). +*/ + package swosync // txInProgressLock will cause the transaction to abort if it's unable to get @@ -8,13 +15,12 @@ declare begin set local idle_in_transaction_session_timeout = 60000; set local lock_timeout = 60000; - assert (select pg_try_advisory_xact_lock(4370)), 'failed to get exec lock'; + assert (select pg_try_advisory_xact_lock(4919)), 'failed to get migration lock'; assert (select current_state = 'in_progress' from switchover_state), 'switchover state is not in_progress'; end $$; ` -// txInProgressLock will cause the transaction to abort if it's unable to get -// the exec lock and/or switchover state is not currently in_progress +// txStopTheWorld will grab the global switchover lock, halting all database activity const txStopTheWorld = ` do $$ declare @@ -34,7 +40,7 @@ declare begin set idle_in_transaction_session_timeout = 60000; set lock_timeout = 60000; - assert (select pg_try_advisory_lock(4370)), 'failed to get exec lock'; + assert (select pg_try_advisory_lock(4919)), 'failed to get migration lock'; assert (select current_state != 'use_next_db' from switchover_state), 'switchover state is use_next_db'; end $$; ` From 35990581c8842d24e5f78c4a13d477f31fa8857a Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 20 Jul 2022 13:26:38 -0500 Subject: [PATCH 124/225] refactor cluster mgmt --- Makefile | 2 +- app/app.go | 5 - app/startup.go | 12 +- graphql2/generated.go | 344 ++---------- graphql2/graphqlapp/swo.go | 63 +-- graphql2/models_gen.go | 65 ++- graphql2/schema.graphql | 23 +- swo/execute.go | 60 -- swo/executor.go | 112 ++++ swo/manager.go | 90 ++- swo/pauseapps.go | 47 -- swo/status.go | 15 + swo/swogrp/clusterstate.go | 13 + swo/swogrp/config.go | 25 +- swo/swogrp/group.go | 511 ------------------ swo/swogrp/node.go | 12 + swo/swogrp/progressf.go | 44 -- swo/swogrp/starttask.go | 47 ++ swo/swogrp/state.go | 12 - swo/swogrp/status.go | 29 + swo/swogrp/taskman.go | 255 +++++++++ swo/swoinfo/scantables.go | 3 - swo/swosync/safety.go | 21 +- .../app/admin/switchover/AdminSwitchover.tsx | 67 ++- web/src/schema.d.ts | 21 +- 25 files changed, 769 insertions(+), 1129 deletions(-) delete mode 100644 swo/execute.go create mode 100644 swo/executor.go delete mode 100644 swo/pauseapps.go create mode 100644 swo/status.go create mode 100644 swo/swogrp/clusterstate.go delete mode 100644 swo/swogrp/group.go create mode 100644 swo/swogrp/node.go delete mode 100644 swo/swogrp/progressf.go create mode 100644 swo/swogrp/starttask.go delete mode 100644 swo/swogrp/state.go create mode 100644 swo/swogrp/status.go create mode 100644 swo/swogrp/taskman.go diff --git a/Makefile b/Makefile index 971e18eb1a..29b762dbb5 100644 --- a/Makefile +++ b/Makefile @@ -94,7 +94,7 @@ web/src/schema.d.ts: graphql2/schema.graphql node_modules web/src/genschema.go start-swo: bin/psql-lite bin/goalert bin/waitfor bin/runproc ./bin/waitfor -timeout 1s "$(DB_URL)" || make postgres ./bin/goalert migrate --db-url=postgres://goalert@localhost/goalert - ./bin/psql-lite -d postgres://goalert@localhost -c "update switchover_state set current_state = 'idle'; drop database if exists goalert2; create database goalert2;" + ./bin/psql-lite -d postgres://goalert@localhost -c "update switchover_state set current_state = 'idle'; truncate table switchover_log; drop database if exists goalert2; create database goalert2;" ./bin/goalert migrate --db-url=postgres://goalert@localhost/goalert2 GOALERT_VERSION=$(GIT_VERSION) ./bin/runproc -f Procfile.swo -l Procfile.local diff --git a/app/app.go b/app/app.go index 869a8e7537..19facb99b0 100644 --- a/app/app.go +++ b/app/app.go @@ -181,11 +181,6 @@ func NewApp(c Config, db *sql.DB) (*App, error) { doneCh: make(chan struct{}), } - if c.SWO != nil { - c.SWO.Init(app) - log.Logf(app.LogBackgroundContext(), "SWO Enabled.") - } - gCfg := &gorm.Config{ PrepareStmt: true, NowFunc: app.Now, diff --git a/app/startup.go b/app/startup.go index 32612214da..d660fc4dd6 100644 --- a/app/startup.go +++ b/app/startup.go @@ -73,8 +73,18 @@ func (app *App) startup(ctx context.Context) error { return app.startupErr } - return app.mgr.SetPauseResumer(lifecycle.MultiPauseResume( + err := app.mgr.SetPauseResumer(lifecycle.MultiPauseResume( app.Engine, lifecycle.PauseResumerFunc(app._pause, app._resume), )) + if err != nil { + return err + } + + if app.cfg.SWO != nil { + app.cfg.SWO.Init(app) + log.Logf(app.LogBackgroundContext(), "SWO Enabled.") + } + + return nil } diff --git a/graphql2/generated.go b/graphql2/generated.go index 06518c3c0d..f38fb2ba1b 100644 --- a/graphql2/generated.go +++ b/graphql2/generated.go @@ -409,20 +409,16 @@ type ComplexityRoot struct { IsLeader func(childComplexity int) int NewValid func(childComplexity int) int OldValid func(childComplexity int) int - Status func(childComplexity int) int } SWOStatus struct { Connections func(childComplexity int) int - Details func(childComplexity int) int - Errors func(childComplexity int) int - IsDone func(childComplexity int) int - IsExecuting func(childComplexity int) int - IsIdle func(childComplexity int) int - IsResetting func(childComplexity int) int + LastError func(childComplexity int) int + LastStatus func(childComplexity int) int MainDBVersion func(childComplexity int) int NextDBVersion func(childComplexity int) int Nodes func(childComplexity int) int + State func(childComplexity int) int } Schedule struct { @@ -2705,13 +2701,6 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWONode.OldValid(childComplexity), true - case "SWONode.status": - if e.complexity.SWONode.Status == nil { - break - } - - return e.complexity.SWONode.Status(childComplexity), true - case "SWOStatus.connections": if e.complexity.SWOStatus.Connections == nil { break @@ -2719,47 +2708,19 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWOStatus.Connections(childComplexity), true - case "SWOStatus.details": - if e.complexity.SWOStatus.Details == nil { - break - } - - return e.complexity.SWOStatus.Details(childComplexity), true - - case "SWOStatus.errors": - if e.complexity.SWOStatus.Errors == nil { + case "SWOStatus.lastError": + if e.complexity.SWOStatus.LastError == nil { break } - return e.complexity.SWOStatus.Errors(childComplexity), true + return e.complexity.SWOStatus.LastError(childComplexity), true - case "SWOStatus.isDone": - if e.complexity.SWOStatus.IsDone == nil { + case "SWOStatus.lastStatus": + if e.complexity.SWOStatus.LastStatus == nil { break } - return e.complexity.SWOStatus.IsDone(childComplexity), true - - case "SWOStatus.isExecuting": - if e.complexity.SWOStatus.IsExecuting == nil { - break - } - - return e.complexity.SWOStatus.IsExecuting(childComplexity), true - - case "SWOStatus.isIdle": - if e.complexity.SWOStatus.IsIdle == nil { - break - } - - return e.complexity.SWOStatus.IsIdle(childComplexity), true - - case "SWOStatus.isResetting": - if e.complexity.SWOStatus.IsResetting == nil { - break - } - - return e.complexity.SWOStatus.IsResetting(childComplexity), true + return e.complexity.SWOStatus.LastStatus(childComplexity), true case "SWOStatus.mainDBVersion": if e.complexity.SWOStatus.MainDBVersion == nil { @@ -2782,6 +2743,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWOStatus.Nodes(childComplexity), true + case "SWOStatus.state": + if e.complexity.SWOStatus.State == nil { + break + } + + return e.complexity.SWOStatus.State(childComplexity), true + case "Schedule.assignedTo": if e.complexity.Schedule.AssignedTo == nil { break @@ -15147,18 +15115,12 @@ func (ec *executionContext) fieldContext_Query_swoStatus(ctx context.Context, fi IsResolver: true, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { switch field.Name { - case "isIdle": - return ec.fieldContext_SWOStatus_isIdle(ctx, field) - case "isDone": - return ec.fieldContext_SWOStatus_isDone(ctx, field) - case "isResetting": - return ec.fieldContext_SWOStatus_isResetting(ctx, field) - case "isExecuting": - return ec.fieldContext_SWOStatus_isExecuting(ctx, field) - case "details": - return ec.fieldContext_SWOStatus_details(ctx, field) - case "errors": - return ec.fieldContext_SWOStatus_errors(ctx, field) + case "state": + return ec.fieldContext_SWOStatus_state(ctx, field) + case "lastStatus": + return ec.fieldContext_SWOStatus_lastStatus(ctx, field) + case "lastError": + return ec.fieldContext_SWOStatus_lastError(ctx, field) case "nodes": return ec.fieldContext_SWOStatus_nodes(ctx, field) case "connections": @@ -16296,96 +16258,8 @@ func (ec *executionContext) fieldContext_SWONode_isLeader(ctx context.Context, f return fc, nil } -func (ec *executionContext) _SWONode_status(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWONode_status(ctx, field) - if err != nil { - return graphql.Null - } - ctx = graphql.WithFieldContext(ctx, fc) - defer func() { - if r := recover(); r != nil { - ec.Error(ctx, ec.Recover(ctx, r)) - ret = graphql.Null - } - }() - resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { - ctx = rctx // use context from middleware stack in children - return obj.Status, nil - }) - if err != nil { - ec.Error(ctx, err) - return graphql.Null - } - if resTmp == nil { - if !graphql.HasFieldError(ctx, fc) { - ec.Errorf(ctx, "must not be null") - } - return graphql.Null - } - res := resTmp.(string) - fc.Result = res - return ec.marshalNString2string(ctx, field.Selections, res) -} - -func (ec *executionContext) fieldContext_SWONode_status(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { - fc = &graphql.FieldContext{ - Object: "SWONode", - Field: field, - IsMethod: false, - IsResolver: false, - Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type String does not have child fields") - }, - } - return fc, nil -} - -func (ec *executionContext) _SWOStatus_isIdle(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_isIdle(ctx, field) - if err != nil { - return graphql.Null - } - ctx = graphql.WithFieldContext(ctx, fc) - defer func() { - if r := recover(); r != nil { - ec.Error(ctx, ec.Recover(ctx, r)) - ret = graphql.Null - } - }() - resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { - ctx = rctx // use context from middleware stack in children - return obj.IsIdle, nil - }) - if err != nil { - ec.Error(ctx, err) - return graphql.Null - } - if resTmp == nil { - if !graphql.HasFieldError(ctx, fc) { - ec.Errorf(ctx, "must not be null") - } - return graphql.Null - } - res := resTmp.(bool) - fc.Result = res - return ec.marshalNBoolean2bool(ctx, field.Selections, res) -} - -func (ec *executionContext) fieldContext_SWOStatus_isIdle(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { - fc = &graphql.FieldContext{ - Object: "SWOStatus", - Field: field, - IsMethod: false, - IsResolver: false, - Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type Boolean does not have child fields") - }, - } - return fc, nil -} - -func (ec *executionContext) _SWOStatus_isDone(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_isDone(ctx, field) +func (ec *executionContext) _SWOStatus_state(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOStatus_state(ctx, field) if err != nil { return graphql.Null } @@ -16398,95 +16272,7 @@ func (ec *executionContext) _SWOStatus_isDone(ctx context.Context, field graphql }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.IsDone, nil - }) - if err != nil { - ec.Error(ctx, err) - return graphql.Null - } - if resTmp == nil { - if !graphql.HasFieldError(ctx, fc) { - ec.Errorf(ctx, "must not be null") - } - return graphql.Null - } - res := resTmp.(bool) - fc.Result = res - return ec.marshalNBoolean2bool(ctx, field.Selections, res) -} - -func (ec *executionContext) fieldContext_SWOStatus_isDone(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { - fc = &graphql.FieldContext{ - Object: "SWOStatus", - Field: field, - IsMethod: false, - IsResolver: false, - Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type Boolean does not have child fields") - }, - } - return fc, nil -} - -func (ec *executionContext) _SWOStatus_isResetting(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_isResetting(ctx, field) - if err != nil { - return graphql.Null - } - ctx = graphql.WithFieldContext(ctx, fc) - defer func() { - if r := recover(); r != nil { - ec.Error(ctx, ec.Recover(ctx, r)) - ret = graphql.Null - } - }() - resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { - ctx = rctx // use context from middleware stack in children - return obj.IsResetting, nil - }) - if err != nil { - ec.Error(ctx, err) - return graphql.Null - } - if resTmp == nil { - if !graphql.HasFieldError(ctx, fc) { - ec.Errorf(ctx, "must not be null") - } - return graphql.Null - } - res := resTmp.(bool) - fc.Result = res - return ec.marshalNBoolean2bool(ctx, field.Selections, res) -} - -func (ec *executionContext) fieldContext_SWOStatus_isResetting(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { - fc = &graphql.FieldContext{ - Object: "SWOStatus", - Field: field, - IsMethod: false, - IsResolver: false, - Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type Boolean does not have child fields") - }, - } - return fc, nil -} - -func (ec *executionContext) _SWOStatus_isExecuting(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_isExecuting(ctx, field) - if err != nil { - return graphql.Null - } - ctx = graphql.WithFieldContext(ctx, fc) - defer func() { - if r := recover(); r != nil { - ec.Error(ctx, ec.Recover(ctx, r)) - ret = graphql.Null - } - }() - resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { - ctx = rctx // use context from middleware stack in children - return obj.IsExecuting, nil + return obj.State, nil }) if err != nil { ec.Error(ctx, err) @@ -16498,26 +16284,26 @@ func (ec *executionContext) _SWOStatus_isExecuting(ctx context.Context, field gr } return graphql.Null } - res := resTmp.(bool) + res := resTmp.(SWOState) fc.Result = res - return ec.marshalNBoolean2bool(ctx, field.Selections, res) + return ec.marshalNSWOState2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWOState(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWOStatus_isExecuting(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWOStatus_state(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "SWOStatus", Field: field, IsMethod: false, IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type Boolean does not have child fields") + return nil, errors.New("field of type SWOState does not have child fields") }, } return fc, nil } -func (ec *executionContext) _SWOStatus_details(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_details(ctx, field) +func (ec *executionContext) _SWOStatus_lastStatus(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOStatus_lastStatus(ctx, field) if err != nil { return graphql.Null } @@ -16530,7 +16316,7 @@ func (ec *executionContext) _SWOStatus_details(ctx context.Context, field graphq }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.Details, nil + return obj.LastStatus, nil }) if err != nil { ec.Error(ctx, err) @@ -16547,7 +16333,7 @@ func (ec *executionContext) _SWOStatus_details(ctx context.Context, field graphq return ec.marshalNString2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWOStatus_details(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWOStatus_lastStatus(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "SWOStatus", Field: field, @@ -16560,8 +16346,8 @@ func (ec *executionContext) fieldContext_SWOStatus_details(ctx context.Context, return fc, nil } -func (ec *executionContext) _SWOStatus_errors(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_errors(ctx, field) +func (ec *executionContext) _SWOStatus_lastError(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOStatus_lastError(ctx, field) if err != nil { return graphql.Null } @@ -16574,7 +16360,7 @@ func (ec *executionContext) _SWOStatus_errors(ctx context.Context, field graphql }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.Errors, nil + return obj.LastError, nil }) if err != nil { ec.Error(ctx, err) @@ -16586,12 +16372,12 @@ func (ec *executionContext) _SWOStatus_errors(ctx context.Context, field graphql } return graphql.Null } - res := resTmp.([]string) + res := resTmp.(string) fc.Result = res - return ec.marshalNString2ᚕstringᚄ(ctx, field.Selections, res) + return ec.marshalNString2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWOStatus_errors(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWOStatus_lastError(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "SWOStatus", Field: field, @@ -16653,8 +16439,6 @@ func (ec *executionContext) fieldContext_SWOStatus_nodes(ctx context.Context, fi return ec.fieldContext_SWONode_canExec(ctx, field) case "isLeader": return ec.fieldContext_SWONode_isLeader(ctx, field) - case "status": - return ec.fieldContext_SWONode_status(ctx, field) } return nil, fmt.Errorf("no field named %q was found under type SWONode", field.Name) }, @@ -29948,13 +29732,6 @@ func (ec *executionContext) _SWONode(ctx context.Context, sel ast.SelectionSet, out.Values[i] = ec._SWONode_isLeader(ctx, field, obj) - if out.Values[i] == graphql.Null { - invalids++ - } - case "status": - - out.Values[i] = ec._SWONode_status(ctx, field, obj) - if out.Values[i] == graphql.Null { invalids++ } @@ -29979,44 +29756,23 @@ func (ec *executionContext) _SWOStatus(ctx context.Context, sel ast.SelectionSet switch field.Name { case "__typename": out.Values[i] = graphql.MarshalString("SWOStatus") - case "isIdle": - - out.Values[i] = ec._SWOStatus_isIdle(ctx, field, obj) - - if out.Values[i] == graphql.Null { - invalids++ - } - case "isDone": - - out.Values[i] = ec._SWOStatus_isDone(ctx, field, obj) - - if out.Values[i] == graphql.Null { - invalids++ - } - case "isResetting": - - out.Values[i] = ec._SWOStatus_isResetting(ctx, field, obj) - - if out.Values[i] == graphql.Null { - invalids++ - } - case "isExecuting": + case "state": - out.Values[i] = ec._SWOStatus_isExecuting(ctx, field, obj) + out.Values[i] = ec._SWOStatus_state(ctx, field, obj) if out.Values[i] == graphql.Null { invalids++ } - case "details": + case "lastStatus": - out.Values[i] = ec._SWOStatus_details(ctx, field, obj) + out.Values[i] = ec._SWOStatus_lastStatus(ctx, field, obj) if out.Values[i] == graphql.Null { invalids++ } - case "errors": + case "lastError": - out.Values[i] = ec._SWOStatus_errors(ctx, field, obj) + out.Values[i] = ec._SWOStatus_lastError(ctx, field, obj) if out.Values[i] == graphql.Null { invalids++ @@ -33464,6 +33220,16 @@ func (ec *executionContext) marshalNSWONode2ᚕgithub.comᚋtargetᚋgoalert return ret } +func (ec *executionContext) unmarshalNSWOState2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWOState(ctx context.Context, v interface{}) (SWOState, error) { + var res SWOState + err := res.UnmarshalGQL(v) + return res, graphql.ErrorOnPath(ctx, err) +} + +func (ec *executionContext) marshalNSWOState2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWOState(ctx context.Context, sel ast.SelectionSet, v SWOState) graphql.Marshaler { + return v +} + func (ec *executionContext) marshalNSWOStatus2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWOStatus(ctx context.Context, sel ast.SelectionSet, v SWOStatus) graphql.Marshaler { return ec._SWOStatus(ctx, sel, &v) } diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index 62c2eca1da..5e20ed9386 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -2,10 +2,11 @@ package graphqlapp import ( "context" - "strings" + "fmt" "github.com/target/goalert/graphql2" "github.com/target/goalert/permission" + "github.com/target/goalert/swo/swogrp" "github.com/target/goalert/util/sqlutil" "github.com/target/goalert/validation" ) @@ -21,12 +22,10 @@ func (m *Mutation) SwoAction(ctx context.Context, action graphql2.SWOAction) (bo } switch action { - case graphql2.SWOActionPing: - err = m.SWO.SendPing(ctx) case graphql2.SWOActionReset: - err = m.SWO.SendReset(ctx) + err = m.SWO.Reset(ctx) case graphql2.SWOActionExecute: - err = m.SWO.SendExecute(ctx) + err = m.SWO.StartExecute(ctx) default: return false, validation.NewGenericError("invalid SWO action") } @@ -57,44 +56,42 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { s := a.SWO.Status() var nodes []graphql2.SWONode - var prog string for _, n := range s.Nodes { - var tasks []string - for _, t := range n.Tasks { - tasks = append(tasks, t.Name) - if t.Name == "reset-db" || t.Name == "exec" { - prog = t.Status - } - } - nodes = append(nodes, graphql2.SWONode{ ID: n.ID.String(), - OldValid: n.OldDBValid(), - NewValid: n.NewDBValid(), - IsLeader: n.IsLeader, + OldValid: n.OldID == s.MainDBID, + NewValid: n.NewID == s.NextDBID, CanExec: n.CanExec, - Status: strings.Join(tasks, ","), + IsLeader: n.ID == s.LeaderID, }) } - status := string(s.State) - if prog != "" { - status += ": " + prog - } - - var errs []string - for _, t := range s.Errors { - errs = append(errs, t.Name+": "+t.Error) + var state graphql2.SWOState + switch s.State { + case swogrp.ClusterStateUnknown: + state = graphql2.SWOStateUnknown + case swogrp.ClusterStateResetting: + state = graphql2.SWOStateResetting + case swogrp.ClusterStateIdle: + state = graphql2.SWOStateIdle + case swogrp.ClusterStateSyncing: + state = graphql2.SWOStateSyncing + case swogrp.ClusterStatePausing: + state = graphql2.SWOStatePausing + case swogrp.ClusterStateExecuting: + state = graphql2.SWOStateExecuting + case swogrp.ClusterStateDone: + state = graphql2.SWOStateDone + default: + return nil, fmt.Errorf("unknown state: %d", s.State) } return &graphql2.SWOStatus{ - IsIdle: s.State == "idle", - IsDone: s.State == "done", - Details: status, - IsExecuting: strings.HasPrefix(string(s.State), "exec"), - IsResetting: strings.HasPrefix(string(s.State), "reset"), - Nodes: nodes, - Errors: errs, + State: state, + + LastStatus: s.LastStatus, + LastError: s.LastError, + Connections: conns, NextDBVersion: s.NextDBVersion, diff --git a/graphql2/models_gen.go b/graphql2/models_gen.go index 40ce9bc5cb..9caab6bbb3 100644 --- a/graphql2/models_gen.go +++ b/graphql2/models_gen.go @@ -344,16 +344,12 @@ type SWONode struct { NewValid bool `json:"newValid"` CanExec bool `json:"canExec"` IsLeader bool `json:"isLeader"` - Status string `json:"status"` } type SWOStatus struct { - IsIdle bool `json:"isIdle"` - IsDone bool `json:"isDone"` - IsResetting bool `json:"isResetting"` - IsExecuting bool `json:"isExecuting"` - Details string `json:"details"` - Errors []string `json:"errors"` + State SWOState `json:"state"` + LastStatus string `json:"lastStatus"` + LastError string `json:"lastError"` Nodes []SWONode `json:"nodes"` Connections []SWOConnection `json:"connections"` MainDBVersion string `json:"mainDBVersion"` @@ -828,20 +824,18 @@ func (e NotificationStatus) MarshalGQL(w io.Writer) { type SWOAction string const ( - SWOActionPing SWOAction = "ping" SWOActionReset SWOAction = "reset" SWOActionExecute SWOAction = "execute" ) var AllSWOAction = []SWOAction{ - SWOActionPing, SWOActionReset, SWOActionExecute, } func (e SWOAction) IsValid() bool { switch e { - case SWOActionPing, SWOActionReset, SWOActionExecute: + case SWOActionReset, SWOActionExecute: return true } return false @@ -868,6 +862,57 @@ func (e SWOAction) MarshalGQL(w io.Writer) { fmt.Fprint(w, strconv.Quote(e.String())) } +type SWOState string + +const ( + SWOStateUnknown SWOState = "unknown" + SWOStateResetting SWOState = "resetting" + SWOStateIdle SWOState = "idle" + SWOStateSyncing SWOState = "syncing" + SWOStatePausing SWOState = "pausing" + SWOStateExecuting SWOState = "executing" + SWOStateDone SWOState = "done" +) + +var AllSWOState = []SWOState{ + SWOStateUnknown, + SWOStateResetting, + SWOStateIdle, + SWOStateSyncing, + SWOStatePausing, + SWOStateExecuting, + SWOStateDone, +} + +func (e SWOState) IsValid() bool { + switch e { + case SWOStateUnknown, SWOStateResetting, SWOStateIdle, SWOStateSyncing, SWOStatePausing, SWOStateExecuting, SWOStateDone: + return true + } + return false +} + +func (e SWOState) String() string { + return string(e) +} + +func (e *SWOState) UnmarshalGQL(v interface{}) error { + str, ok := v.(string) + if !ok { + return fmt.Errorf("enums must be strings") + } + + *e = SWOState(str) + if !e.IsValid() { + return fmt.Errorf("%s is not a valid SWOState", str) + } + return nil +} + +func (e SWOState) MarshalGQL(w io.Writer) { + fmt.Fprint(w, strconv.Quote(e.String())) +} + type UserRole string const ( diff --git a/graphql2/schema.graphql b/graphql2/schema.graphql index acffe3bc4c..1ffc1130a6 100644 --- a/graphql2/schema.graphql +++ b/graphql2/schema.graphql @@ -114,14 +114,9 @@ type Query { } type SWOStatus { - isIdle: Boolean! - isDone: Boolean! - - isResetting: Boolean! - isExecuting: Boolean! - - details: String! - errors: [String!]! + state: SWOState! + lastStatus: String! + lastError: String! nodes: [SWONode!]! @@ -131,6 +126,16 @@ type SWOStatus { nextDBVersion: String! } +enum SWOState { + unknown + resetting + idle + syncing + pausing + executing + done +} + type SWOConnection { name: String! count: Int! @@ -142,7 +147,6 @@ type SWONode { newValid: Boolean! canExec: Boolean! isLeader: Boolean! - status: String! } input AlertMetricsOptions { @@ -374,7 +378,6 @@ input SetScheduleShiftInput { } enum SWOAction { - ping reset execute } diff --git a/swo/execute.go b/swo/execute.go deleted file mode 100644 index 3aa1a8ab11..0000000000 --- a/swo/execute.go +++ /dev/null @@ -1,60 +0,0 @@ -package swo - -import ( - "context" - "fmt" - - "github.com/jackc/pgx/v4" - "github.com/target/goalert/swo/swogrp" - "github.com/target/goalert/swo/swosync" -) - -func (m *Manager) DoExecute(ctx context.Context) error { - return m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { - rep := swosync.NewLogicalReplicator() - rep.SetSourceDB(oldConn) - rep.SetDestinationDB(newConn) - rep.SetProgressFunc(swogrp.Progressf) - - err := rep.Reset(ctx) - if err != nil { - return fmt.Errorf("reset: %w", err) - } - - err = rep.Start(ctx) - if err != nil { - return fmt.Errorf("start: %w", err) - } - - err = rep.InitialSync(ctx) - if err != nil { - return fmt.Errorf("initial sync: %w", err) - } - - for i := 0; i < 10; i++ { - err = rep.LogicalSync(ctx) - if err != nil { - return fmt.Errorf("logical sync: %w", err) - } - } - - err = m.PauseApps(ctx) - if err != nil { - return fmt.Errorf("pause apps: %w", err) - } - - for i := 0; i < 10; i++ { - err = rep.LogicalSync(ctx) - if err != nil { - return fmt.Errorf("logical sync (after pause): %w", err) - } - } - - err = rep.FinalSync(ctx) - if err != nil { - return fmt.Errorf("final sync: %w", err) - } - - return nil - }) -} diff --git a/swo/executor.go b/swo/executor.go new file mode 100644 index 0000000000..9167d747d2 --- /dev/null +++ b/swo/executor.go @@ -0,0 +1,112 @@ +package swo + +import ( + "context" + "fmt" + "sync" + + "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swogrp" + "github.com/target/goalert/swo/swosync" +) + +type Executor struct { + mgr *Manager + mx sync.Mutex + + ctxCh chan context.Context + errCh chan error + cancel func() +} + +var _ swogrp.Executor = (*Executor)(nil) + +func (e *Executor) init() { + e.mx.Lock() + defer e.mx.Unlock() + if e.cancel != nil { + panic("already running") + } + + ctx, cancel := context.WithCancel(e.mgr.Logger.BackgroundContext()) + e.cancel = cancel + e.ctxCh = make(chan context.Context) + e.errCh = make(chan error, 2) + + go func() { + defer e.Cancel() + e.errCh <- e.mgr.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { + rep := swosync.NewLogicalReplicator() + rep.SetSourceDB(oldConn) + rep.SetDestinationDB(newConn) + rep.SetProgressFunc(e.mgr.taskMgr.Statusf) + + // sync + ctx = <-e.ctxCh + err := rep.Reset(ctx) + if err != nil { + return fmt.Errorf("reset: %w", err) + } + + err = rep.Start(ctx) + if err != nil { + return fmt.Errorf("start: %w", err) + } + + err = rep.InitialSync(ctx) + if err != nil { + return fmt.Errorf("initial sync: %w", err) + } + + for i := 0; i < 10; i++ { + err = rep.LogicalSync(ctx) + if err != nil { + return fmt.Errorf("logical sync: %w", err) + } + } + e.errCh <- nil + + // wait for pause + ctx = <-e.ctxCh + for i := 0; i < 10; i++ { + err := rep.LogicalSync(ctx) + if err != nil { + return fmt.Errorf("logical sync (after pause): %w", err) + } + } + + err = rep.FinalSync(ctx) + if err != nil { + return fmt.Errorf("final sync: %w", err) + } + + return nil + }) + }() +} + +func (e *Executor) Sync(ctx context.Context) error { + e.init() + + e.ctxCh <- ctx + return <-e.errCh +} + +func (e *Executor) Exec(ctx context.Context) error { + e.ctxCh <- ctx + return <-e.errCh +} + +func (e *Executor) Cancel() { + e.mx.Lock() + defer e.mx.Unlock() + + if e.cancel == nil { + return + } + + e.cancel() + e.ctxCh = nil + e.errCh = nil + e.cancel = nil +} diff --git a/swo/manager.go b/swo/manager.go index b253d973f9..1f814c85e2 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -4,7 +4,6 @@ import ( "context" "database/sql" "database/sql/driver" - "errors" "fmt" "github.com/google/uuid" @@ -29,7 +28,7 @@ type Manager struct { Config - grp *swogrp.Group + taskMgr *swogrp.TaskMan MainDBInfo *swoinfo.DB NextDBInfo *swoinfo.DB @@ -78,47 +77,24 @@ func NewManager(cfg Config) (*Manager, error) { return nil }) if err != nil { - return nil, fmt.Errorf("failed to get server version: %w", err) + return nil, fmt.Errorf("et server version: %w", err) } - m.grp = swogrp.NewGroup(swogrp.Config{ + m.taskMgr, err = swogrp.NewTaskMan(ctx, swogrp.Config{ CanExec: cfg.CanExec, - Logger: cfg.Logger, - Msgs: messages, + Logger: cfg.Logger, + Messages: messages, - ResetFunc: m.DoReset, - ExecuteFunc: m.DoExecute, - PauseFunc: m.DoPause, - ResumeFunc: m.DoResume, + Executor: &Executor{mgr: m}, + PauseFunc: func(ctx context.Context) error { return m.pauseResume.Pause(ctx) }, + ResumeFunc: func(ctx context.Context) error { return m.pauseResume.Resume(ctx) }, }) - - return m, nil -} - -func (m *Manager) DoReset(ctx context.Context) error { - return m.withConnFromOld(ctx, func(ctx context.Context, conn *pgx.Conn) error { - _, err := conn.Exec(ctx, swosync.ConnLockQuery) - if err != nil { - return err - } - - return swodb.New(conn).DisableChangeLogTriggers(ctx) - }) -} - -func (m *Manager) DoPause(ctx context.Context) error { - if m.pauseResume == nil { - return errors.New("not initialized") + if err != nil { + return nil, fmt.Errorf("init task manager: %w", err) } - return m.pauseResume.Pause(ctx) -} -func (m *Manager) DoResume(ctx context.Context) error { - if m.pauseResume == nil { - return errors.New("not initialized") - } - return m.pauseResume.Resume(ctx) + return m, nil } func (m *Manager) Init(app lifecycle.PauseResumer) { @@ -126,6 +102,7 @@ func (m *Manager) Init(app lifecycle.PauseResumer) { panic("already set") } m.pauseResume = app + m.taskMgr.Init() } // withConnFromOld allows performing operations with a raw connection to the old database. @@ -133,11 +110,6 @@ func (m *Manager) withConnFromOld(ctx context.Context, f func(context.Context, * return WithPGXConn(ctx, m.dbMain, f) } -// withConnFromNew allows performing operations with a raw connection to the new database. -func (m *Manager) withConnFromNew(ctx context.Context, f func(context.Context, *pgx.Conn) error) error { - return WithPGXConn(ctx, m.dbNext, f) -} - // withConnFromBoth allows performing operations with a raw connection to both databases database. func (m *Manager) withConnFromBoth(ctx context.Context, f func(ctx context.Context, oldConn, newConn *pgx.Conn) error) error { // grab lock with old DB first @@ -154,6 +126,7 @@ func WithPGXConn(ctx context.Context, db *sql.DB, runFunc func(context.Context, return err } defer conn.Close() + defer conn.ExecContext(context.Background(), "select pg_advisory_unlock_all()") return conn.Raw(func(driverConn interface{}) error { conn := driverConn.(*stdlib.Conn).Conn() @@ -166,23 +139,40 @@ func WithPGXConn(ctx context.Context, db *sql.DB, runFunc func(context.Context, // Status will return the current switchover status. func (m *Manager) Status() Status { return Status{ + Status: m.taskMgr.Status(), MainDBVersion: m.MainDBInfo.Version, NextDBVersion: m.NextDBInfo.Version, - Status: m.grp.Status(), } } -// SendReset will trigger a reset of the switchover. -func (m *Manager) SendReset(ctx context.Context) error { return m.grp.Reset(ctx) } +// Reset will disable the changelog and reset the cluster state. +func (m *Manager) Reset(ctx context.Context) error { + err := m.taskMgr.Cancel(ctx) + if err != nil { + return fmt.Errorf("cancel task: %w", err) + } -// SendExecute will trigger the switchover to begin. -func (m *Manager) SendExecute(ctx context.Context) error { return m.grp.Execute(ctx) } + err = m.withConnFromOld(ctx, func(ctx context.Context, conn *pgx.Conn) error { + _, err := conn.Exec(ctx, swosync.ConnWaitLockQuery) + if err != nil { + return err + } -func (m *Manager) DB() *sql.DB { return m.dbApp } + return swodb.New(conn).DisableChangeLogTriggers(ctx) + }) + if err != nil { + return fmt.Errorf("failed to disable change log triggers: %w", err) + } -type Status struct { - swogrp.Status + err = m.taskMgr.Reset(ctx) + if err != nil { + return fmt.Errorf("reset cluster state: %w", err) + } - MainDBVersion string - NextDBVersion string + return nil } + +// StartExecute will trigger the switchover to begin. +func (m *Manager) StartExecute(ctx context.Context) error { return m.taskMgr.Execute(ctx) } + +func (m *Manager) DB() *sql.DB { return m.dbApp } diff --git a/swo/pauseapps.go b/swo/pauseapps.go deleted file mode 100644 index 72a6b4d814..0000000000 --- a/swo/pauseapps.go +++ /dev/null @@ -1,47 +0,0 @@ -package swo - -import ( - "context" - "fmt" - "time" - - "github.com/target/goalert/swo/swogrp" -) - -// PauseApps puts all nodes into a "paused" state: -// - Engine no longer cycles -// - Idle DB connections are disabled -// - Event listeners (postgres pub/sub) are disabled -func (m *Manager) PauseApps(ctx context.Context) error { - swogrp.Progressf(ctx, "pausing apps") - err := m.grp.Pause(ctx) - if err != nil { - return fmt.Errorf("pause: %w", err) - } - - t := time.NewTicker(10 * time.Millisecond) - defer t.Stop() - for range t.C { - s := m.grp.Status() - var pausing, waiting int - for _, node := range s.Nodes { - for _, task := range node.Tasks { - if task.Name == "pause" { - pausing++ - } - if task.Name == "resume-after" { - waiting++ - } - } - } - - if pausing == 0 && waiting == len(s.Nodes) { - break - } - if waiting == 0 { - return fmt.Errorf("pause: timed out waiting for nodes to pause") - } - } - - return nil -} diff --git a/swo/status.go b/swo/status.go new file mode 100644 index 0000000000..301a380aaa --- /dev/null +++ b/swo/status.go @@ -0,0 +1,15 @@ +package swo + +import ( + "github.com/google/uuid" + "github.com/target/goalert/swo/swogrp" +) + +type Status struct { + swogrp.Status + + MainDBID uuid.UUID + NextDBID uuid.UUID + MainDBVersion string + NextDBVersion string +} diff --git a/swo/swogrp/clusterstate.go b/swo/swogrp/clusterstate.go new file mode 100644 index 0000000000..9f2b67db81 --- /dev/null +++ b/swo/swogrp/clusterstate.go @@ -0,0 +1,13 @@ +package swogrp + +type ClusterState int + +const ( + ClusterStateUnknown ClusterState = iota + ClusterStateResetting + ClusterStateIdle + ClusterStateSyncing + ClusterStatePausing + ClusterStateExecuting + ClusterStateDone +) diff --git a/swo/swogrp/config.go b/swo/swogrp/config.go index 2d3ee3c3a2..a25c48b849 100644 --- a/swo/swogrp/config.go +++ b/swo/swogrp/config.go @@ -3,18 +3,29 @@ package swogrp import ( "context" + "github.com/google/uuid" "github.com/target/goalert/swo/swomsg" "github.com/target/goalert/util/log" ) +type TaskFn func(context.Context) error + type Config struct { - CanExec bool + CanExec bool + OldID, NewID uuid.UUID + + Logger *log.Logger + Messages *swomsg.Log + + PauseFunc TaskFn + ResumeFunc TaskFn + + Executor Executor +} - Logger *log.Logger - Msgs *swomsg.Log +type Executor interface { + Sync(context.Context) error + Exec(context.Context) error - ResetFunc func(context.Context) error - ExecuteFunc func(context.Context) error - PauseFunc func(context.Context) error - ResumeFunc func(context.Context) error + Cancel() } diff --git a/swo/swogrp/group.go b/swo/swogrp/group.go deleted file mode 100644 index 238ef89478..0000000000 --- a/swo/swogrp/group.go +++ /dev/null @@ -1,511 +0,0 @@ -package swogrp - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "sync" - "time" - - "github.com/google/uuid" - "github.com/target/goalert/swo/swomsg" - "github.com/target/goalert/util/log" -) - -/* - -Input -> Send ping message, wait ack-all -Reset -> Send reset message, (no-wait), elect single node to run reset, track progress, signal done or err -Execute -> Send exec message, wait ack-all, elect single node to run exec, track progress, all sync TX refresh (send & wait), Plan (send & wait for all ack), continue work, signal done or err - -- Send Message -- Send Message and wait for all-ack (from user, from exec) -- Elect node -- Track progress - -reset part of leader, first hello with exec is leader - - -reset -hello, hello-exec, hello-next -start - - -*/ - -type Group struct { - Config - State - - failed []TaskInfo - - nodeID uuid.UUID - reset bool - resetS time.Time - nodes map[uuid.UUID]*Node - tasks map[uuid.UUID]TaskInfo - leader bool - mx sync.Mutex - - nextDBValid *Set - oldDBValid *Set - nodeIDs *Set - - ackMsgs chan map[uuid.UUID]*ackWait - - resumeNow chan struct{} -} - -type ackWait struct { - msgID uuid.UUID - waitAck map[uuid.UUID]struct{} - done chan error -} - -type TaskInfo struct { - ID uuid.UUID - Name string - Error string `json:",omitempty"` - Status string `json:",omitempty"` - - cancel func() -} - -type Node struct { - ID uuid.UUID - - IsLeader bool - CanExec bool - - OldDBValid func() bool - NewDBValid func() bool - - Tasks []TaskInfo -} - -func NewGroup(cfg Config) *Group { - g := &Group{ - Config: cfg, - nodeID: uuid.New(), - nodes: make(map[uuid.UUID]*Node), - tasks: make(map[uuid.UUID]TaskInfo), - State: stateNeedsReset, - ackMsgs: make(chan map[uuid.UUID]*ackWait, 1), - nextDBValid: NewSet(), - oldDBValid: NewSet(), - nodeIDs: NewSet(), - } - g.ackMsgs <- make(map[uuid.UUID]*ackWait) - - err := g.sendMessage(cfg.Logger.BackgroundContext(), "hello", nil, false) - if err != nil { - cfg.Logger.Error(context.Background(), err) - } - - go g.loopNextLog() - go g.loopMainLog() - - return g -} - -type Status struct { - Nodes []Node - State State - Errors []TaskInfo -} - -func cloneTasks(in []TaskInfo) []TaskInfo { - out := make([]TaskInfo, len(in)) - copy(out, in) - return out -} - -func (g *Group) Status() Status { - var nodes []Node - for _, id := range g.nodeIDs.List() { - node := Node{ - ID: id, - } - g.mx.Lock() - if n := g.nodes[id]; n != nil { - node = *n - } - g.mx.Unlock() - - node.NewDBValid = func() bool { return g.nextDBValid.Has(node.ID) } - node.OldDBValid = func() bool { return g.oldDBValid.Has(node.ID) } - nodes = append(nodes, node) - } - - failed := make([]TaskInfo, len(g.failed)) - g.mx.Lock() - defer g.mx.Unlock() - copy(failed, g.failed) - if g.State == stateReset && time.Since(g.resetS) > time.Minute { - g.State = stateNeedsReset - } - - return Status{ - Nodes: nodes, - State: g.State, - Errors: failed, - } -} - -func (g *Group) loopNextLog() { - for msg := range g.NextLog.Events() { - g.nodeIDs.Add(msg.Node) - if msg.Type != "hello-next" { - // ignore - continue - } - - g.nextDBValid.Add(msg.Node) - } -} - -func (g *Group) loopMainLog() { - buf := newMsgBuf() - go func() { - for msg := range buf.Next() { - ctx := g.Logger.BackgroundContext() - err := g.processMessage(ctx, msg) - if err != nil { - g.Logger.Error(ctx, fmt.Errorf("process message: %w", err)) - } - } - }() - - for msg := range g.MainLog.Events() { - if msg.Type == "ack" { - g.recordAck(msg) - continue - } - - buf.Append(msg) - } -} - -func (g *Group) startTask(ctx context.Context, name string, fn func(context.Context) error) error { - info := TaskInfo{ID: uuid.New(), Name: name} - err := g.sendMessage(ctx, "task-start", info, false) - if err != nil { - return err - } - - ctx = log.FromContext(ctx).BackgroundContext() - ctx, info.cancel = context.WithCancel(ctx) - g.tasks[info.ID] = info - go func() { - err := fn(withTask(ctx, g, info)) - if errors.Is(err, ErrDone) { - g.sendMessage(g.Logger.BackgroundContext(), "done", nil, false) - err = nil - } - if err != nil { - info.Error = err.Error() - } - - err = g.sendMessage(ctx, "task-end", info, false) - if err != nil { - log.Log(ctx, fmt.Errorf("send task-end: %w", err)) - } - - info.cancel() - g.mx.Lock() - delete(g.tasks, info.ID) - g.mx.Unlock() - }() - - return nil -} - -func (g *Group) resetState() { - for id := range g.nodes { - delete(g.nodes, id) - } - if g.resumeNow != nil { - close(g.resumeNow) - g.resumeNow = nil - } - g.ResumeFunc(g.Logger.BackgroundContext()) - g.failed = nil - g.reset = true - g.resetS = time.Now() - g.leader = false - g.State = stateReset - for _, t := range g.tasks { - t.cancel() - } - - msgs := <-g.ackMsgs - for id, aw := range msgs { - aw.done <- fmt.Errorf("reset") - delete(msgs, id) - } - g.ackMsgs <- msgs -} - -// addNode adds a node to the group, returns true if we have become the leader node -// after a reset. -func (g *Group) addNode(id uuid.UUID, exec bool) bool { - g.oldDBValid.Add(id) - if g.State != stateReset { - g.State = stateNeedsReset - } - n := g.nodes[id] - if n == nil { - n = &Node{ID: id} - g.nodes[id] = n - } - n.CanExec = n.CanExec || exec - - var isNewLeader bool - if g.reset && exec { - g.reset = false - g.leader = g.nodeID == id - n.IsLeader = true - isNewLeader = g.leader - } - - return isNewLeader -} - -func (g *Group) ack(ctx context.Context, msgID uuid.UUID) { - err := g.MainLog.Append(ctx, swomsg.Message{ - Type: "ack", - ID: uuid.New(), - Node: g.nodeID, - AckID: msgID, - }) - if err != nil { - log.Log(ctx, fmt.Errorf("send ack: %w", err)) - } -} - -func (g *Group) recordAck(msg swomsg.Message) { - msgs := <-g.ackMsgs - aw := msgs[msg.AckID] - if aw == nil { - g.ackMsgs <- msgs - return - } - - delete(aw.waitAck, msg.Node) - if len(aw.waitAck) == 0 { - aw.done <- nil - delete(msgs, msg.AckID) - } - g.ackMsgs <- msgs -} - -func (g *Group) updateTask(msg swomsg.Message, upsert bool) error { - n := g.nodes[msg.Node] - if n == nil { - return nil - } - var info TaskInfo - err := json.Unmarshal(msg.Data, &info) - if err != nil { - return err - } - filtered := n.Tasks[:0] - for _, t := range n.Tasks { - if t.ID == info.ID { - continue - } - filtered = append(filtered, t) - } - n.Tasks = filtered - if upsert { - n.Tasks = append(n.Tasks, info) - } else { - switch info.Name { - case "exec": - if g.resumeNow != nil { - close(g.resumeNow) - g.resumeNow = nil - } - if g.State == stateDone { - break - } - if info.Error == "" { - g.State = stateDone - } else { - g.State = stateError - } - case "reset-db": - if g.State == stateDone { - break - } - if info.Error == "" { - g.State = stateIdle - } else { - g.State = stateError - } - } - } - - if info.Error != "" { - g.failed = append(g.failed, info) - } - - return nil -} - -var ErrDone = errors.New("already done") - -func (g *Group) processMessage(ctx context.Context, msg swomsg.Message) error { - g.nodeIDs.Add(msg.Node) - g.mx.Lock() - defer g.mx.Unlock() - - if msg.Ack { - defer g.ack(ctx, msg.ID) - } - - switch msg.Type { - case "hello-exec": - if g.addNode(msg.Node, true) { - // we are the new leader, perform DB reset - return g.startTask(ctx, "reset-db", g.ResetFunc) - } - case "task-end": - return g.updateTask(msg, false) - case "task-start": - return g.updateTask(msg, true) - case "task-progress": - return g.updateTask(msg, true) - case "hello": - g.addNode(msg.Node, false) - case "ping": - case "done": - g.State = stateDone - case "reset": - if g.State == stateDone { - break - } - g.resetState() - - if err := g.startTask(ctx, "resume", g.ResumeFunc); err != nil { - return err - } - if err := g.sendMessageNext(ctx, "hello-next", nil, false); err != nil { - return err - } - - if g.CanExec { - return g.sendMessage(ctx, "hello-exec", nil, false) - } - - return g.sendMessage(ctx, "hello", nil, false) - case "exec": - if g.State != stateIdle { - break - } - g.State = stateExec - if g.leader { - return g.startTask(ctx, "exec", g.ExecuteFunc) - } - case "pause": - if g.resumeNow != nil { - close(g.resumeNow) - } - g.resumeNow = make(chan struct{}) - err := g.startTask(ctx, "resume-after", func(ctx context.Context) error { - t := time.NewTimer(30 * time.Second) - defer t.Stop() - select { - case <-ctx.Done(): - return ctx.Err() - case <-t.C: - case <-g.resumeNow: - } - - return g.ResumeFunc(ctx) - }) - if err != nil { - return err - } - - err = g.startTask(ctx, "pause", g.PauseFunc) - if err != nil { - return err - } - default: - } - - return nil -} - -func (g *Group) sendMessageNext(ctx context.Context, msgType string, v interface{}, wait bool) error { - return g.sendMessageWith(ctx, g.NextLog, msgType, v, wait) -} - -func (g *Group) sendMessage(ctx context.Context, msgType string, v interface{}, wait bool) error { - return g.sendMessageWith(ctx, g.MainLog, msgType, v, wait) -} - -func (g *Group) sendMessageWith(ctx context.Context, log *swomsg.Log, msgType string, v interface{}, wait bool) error { - msg := swomsg.Message{ - Type: msgType, - ID: uuid.New(), - Node: g.nodeID, - Ack: wait, - } - if v != nil { - data, err := json.Marshal(v) - if err != nil { - return err - } - msg.Data = data - } - if err := log.Append(ctx, msg); err != nil { - return err - } - if !wait { - return nil - } - - m := make(map[uuid.UUID]struct{}) - for _, n := range g.nodes { - m[n.ID] = struct{}{} - } - - aw := &ackWait{ - msgID: msg.ID, - done: make(chan error, 1), - waitAck: m, - } - - acks := <-g.ackMsgs - acks[msg.ID] = aw - g.ackMsgs <- acks - - return <-aw.done -} - -func (g *Group) Reset(ctx context.Context) error { - if g.Status().State == stateDone { - return errors.New("cannot reset, already done") - } - defer time.Sleep(time.Second) - return g.sendMessage(ctx, "reset", nil, false) -} - -func (g *Group) Ping(ctx context.Context) error { - return g.sendMessage(ctx, "ping", nil, true) -} - -func (g *Group) Execute(ctx context.Context) error { - if g.Status().State != stateIdle { - return fmt.Errorf("cannot execute, group is not idle") - } - - return g.sendMessage(ctx, "exec", nil, true) -} - -func (g *Group) Pause(ctx context.Context) error { - return g.sendMessage(ctx, "pause", nil, true) -} diff --git a/swo/swogrp/node.go b/swo/swogrp/node.go new file mode 100644 index 0000000000..4378e3e61e --- /dev/null +++ b/swo/swogrp/node.go @@ -0,0 +1,12 @@ +package swogrp + +import "github.com/google/uuid" + +type Node struct { + ID uuid.UUID + + CanExec bool + + OldID uuid.UUID + NewID uuid.UUID +} diff --git a/swo/swogrp/progressf.go b/swo/swogrp/progressf.go deleted file mode 100644 index 6f8f126db3..0000000000 --- a/swo/swogrp/progressf.go +++ /dev/null @@ -1,44 +0,0 @@ -package swogrp - -import ( - "context" - "fmt" -) - -type ctxKey int - -const ( - ctxKeyTask ctxKey = iota -) - -type taskCtx struct { - *Group - TaskInfo -} - -func withTask(ctx context.Context, grp *Group, info TaskInfo) context.Context { - return context.WithValue(ctx, ctxKeyTask, &taskCtx{Group: grp, TaskInfo: info}) -} - -func task(ctx context.Context) *taskCtx { - v := ctx.Value(ctxKeyTask) - if v == nil { - return nil - } - - return v.(*taskCtx) -} - -func Progressf(ctx context.Context, format string, args ...interface{}) { - t := task(ctx) - if t == nil { - // not a running task - return - } - - t.TaskInfo.Status = fmt.Sprintf(format, args...) - err := t.sendMessage(ctx, "task-progress", t.TaskInfo, false) - if err != nil { - t.Logger.Error(ctx, fmt.Errorf("send task-progress: %w", err)) - } -} diff --git a/swo/swogrp/starttask.go b/swo/swogrp/starttask.go new file mode 100644 index 0000000000..5c780ee670 --- /dev/null +++ b/swo/swogrp/starttask.go @@ -0,0 +1,47 @@ +package swogrp + +import ( + "context" +) + +func (t *TaskMan) startTask(fn func(context.Context) error, successMsg string) { + if t.cancelTask != nil { + panic("already running a task") + } + + ctx, cancel := context.WithCancel(t.cfg.Logger.BackgroundContext()) + t.cancelTask = cancel + + ackID := t.lastMsgID + ctx = withMsgID(ctx, ackID) + + go func() { + err := fn(ctx) + if err != nil { + t.mx.Lock() + t.sendAck(ctx, "error", err.Error(), ackID) + t.mx.Unlock() + return + } + + t.mx.Lock() + t.sendAck(ctx, successMsg, nil, ackID) + t.cancelTask() + t.cancelTask = nil + t.mx.Unlock() + }() +} + +func (t *TaskMan) cancel() { + if t.cancelTask != nil { + t.cancelTask() + } + t.cfg.Executor.Cancel() + + t.cancelTask = nil + ctx := t.cfg.Logger.BackgroundContext() + err := t.cfg.ResumeFunc(ctx) + if err != nil { + t.cfg.Logger.Error(ctx, err) + } +} diff --git a/swo/swogrp/state.go b/swo/swogrp/state.go deleted file mode 100644 index ca0e5d72cb..0000000000 --- a/swo/swogrp/state.go +++ /dev/null @@ -1,12 +0,0 @@ -package swogrp - -type State string - -const ( - stateNeedsReset State = "needs-reset" - stateIdle State = "idle" - stateReset State = "reset" - stateError State = "error" - stateExec State = "exec" - stateDone State = "done" -) diff --git a/swo/swogrp/status.go b/swo/swogrp/status.go new file mode 100644 index 0000000000..5e6bc4e8c9 --- /dev/null +++ b/swo/swogrp/status.go @@ -0,0 +1,29 @@ +package swogrp + +import "github.com/google/uuid" + +type Status struct { + State ClusterState + Nodes []Node + LeaderID uuid.UUID + LastStatus string + LastError string +} + +func (t *TaskMan) Status() Status { + t.mx.Lock() + defer t.mx.Unlock() + + nodes := make([]Node, 0, len(t.nodes)) + for _, n := range t.nodes { + nodes = append(nodes, n) + } + + return Status{ + State: t.state, + Nodes: nodes, + LeaderID: t.leaderID, + LastStatus: t.lastStatus, + LastError: t.lastError, + } +} diff --git a/swo/swogrp/taskman.go b/swo/swogrp/taskman.go new file mode 100644 index 0000000000..2a994a1105 --- /dev/null +++ b/swo/swogrp/taskman.go @@ -0,0 +1,255 @@ +package swogrp + +import ( + "context" + "encoding/json" + "fmt" + "sync" + "time" + + "github.com/google/uuid" + "github.com/target/goalert/swo/swomsg" +) + +type TaskMan struct { + local Node + + cfg Config + nodes map[uuid.UUID]Node + paused map[uuid.UUID]struct{} + state ClusterState + + cancelTask func() + lastMsgID uuid.UUID + leaderID uuid.UUID + + lastError string + lastStatus string + pendingStatus string + mx sync.Mutex +} + +func NewTaskMan(ctx context.Context, cfg Config) (*TaskMan, error) { + t := &TaskMan{ + cfg: cfg, + local: Node{ + ID: uuid.New(), + OldID: cfg.OldID, + NewID: cfg.NewID, + + CanExec: cfg.CanExec, + }, + nodes: make(map[uuid.UUID]Node), + paused: make(map[uuid.UUID]struct{}), + } + + t.send(ctx, "hello", t.local) + + return t, nil +} + +func (t *TaskMan) Init() { + go t.statusLoop() + go t.messageLoop() +} + +func (t *TaskMan) allNodesPaused() bool { + for _, n := range t.nodes { + _, ok := t.paused[n.ID] + if !ok { + return false + } + } + + return true +} + +func (t *TaskMan) statusLoop() { + ctx := t.cfg.Logger.BackgroundContext() + // debounce/throttle status messages + var lastStatus string + for range time.NewTicker(time.Second).C { + t.mx.Lock() + status := t.pendingStatus + id := t.lastMsgID + t.mx.Unlock() + if status == lastStatus { + continue + } + + t.sendAck(ctx, "status", status, id) + } +} + +func (t *TaskMan) messageLoop() { + ctx := t.cfg.Logger.BackgroundContext() + for msg := range t.cfg.Messages.Events() { + t.mx.Lock() + switch { + case msg.Type == "reset": + t.state = ClusterStateResetting + t.cancel() + t.leaderID = uuid.Nil + t.lastStatus = "" + t.lastError = "" + t.pendingStatus = "" + t.lastMsgID = msg.ID + for id := range t.nodes { + delete(t.nodes, id) + delete(t.paused, id) + } + t.sendAck(ctx, "hello", t.local, t.lastMsgID) + case t.state == ClusterStateResetting && msg.Type == "hello" && msg.AckID == t.lastMsgID: + var n Node + err := json.Unmarshal(msg.Data, &n) + if err != nil { + t.send(ctx, "error", fmt.Sprintf("unmarshal hello: %v", err)) + continue + } + t.nodes[msg.Node] = n + if t.leaderID != uuid.Nil { + // already have leader + break + } + if !n.CanExec { + // can't be leader + break + } + t.leaderID = n.ID + if t.leaderID != t.local.ID { + // not us + break + } + + // leader, start timer + t.startTask(resetDelay, "reset-end") + case t.state == ClusterStateResetting && msg.Type == "reset-end" && msg.AckID == t.lastMsgID: + t.lastMsgID = msg.ID + t.state = ClusterStateIdle + case t.state == ClusterStateIdle && msg.Type == "execute" && msg.AckID == t.lastMsgID: + t.state = ClusterStateSyncing + t.lastMsgID = msg.ID + if t.leaderID != t.local.ID { + break + } + + t.startTask(t.cfg.Executor.Sync, "pause") + case t.state == ClusterStateSyncing && msg.Type == "pause" && msg.AckID == t.lastMsgID: + t.state = ClusterStatePausing + t.lastMsgID = msg.ID + + t.startTask(func(ctx context.Context) error { + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + return t.cfg.PauseFunc(ctx) + }, "paused") + case t.state == ClusterStatePausing && msg.Type == "paused" && msg.AckID == t.lastMsgID: + t.paused[msg.Node] = struct{}{} + if !t.allNodesPaused() { + break + } + t.state = ClusterStateExecuting + t.lastMsgID = msg.ID + if t.leaderID != t.local.ID { + break + } + t.startTask(t.cfg.Executor.Exec, "done") + case t.state == ClusterStateExecuting && msg.Type == "done" && msg.AckID == t.lastMsgID: + t.state = ClusterStateDone + case msg.Type == "status": + if msg.AckID != t.lastMsgID { + break + } + t.lastStatus = t.parseString(msg.Data) + case msg.Type == "error": + if msg.AckID != t.lastMsgID { + break + } + t.lastError = t.parseString(msg.Data) + fallthrough + case msg.Type == "cancel": + t.cancel() + t.state = ClusterStateUnknown + default: + if t.state != ClusterStateUnknown { + // only report on change + t.sendAck(ctx, "cancel", "unexpected or invalid message", msg.ID) + } + t.cancel() + t.state = ClusterStateUnknown + } + t.mx.Unlock() + } +} + +func (t *TaskMan) parseString(data json.RawMessage) string { + var s string + err := json.Unmarshal(data, &s) + if err != nil { + t.cfg.Logger.Error(context.Background(), fmt.Errorf("unmarshal string: %w", err)) + return "" + } + return s +} + +func resetDelay(ctx context.Context) error { + t := time.NewTimer(3 * time.Second) + defer t.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.C: + return nil + } +} + +func (t *TaskMan) send(ctx context.Context, msgType string, v interface{}) { + t.sendAck(ctx, msgType, v, uuid.Nil) +} + +func (t *TaskMan) sendAck(ctx context.Context, msgType string, v interface{}, ackID uuid.UUID) { + data, err := json.Marshal(v) + if err != nil { + panic(fmt.Errorf("marshal %s: %w", msgType, err)) + } + + err = t.cfg.Messages.Append(ctx, swomsg.Message{ + ID: uuid.New(), + Node: t.local.ID, + AckID: ackID, + + Type: msgType, + Data: data, + }) + if err != nil { + t.cfg.Logger.Error(ctx, fmt.Errorf("append %s: %w", msgType, err)) + } +} + +type taskCtx string + +func withMsgID(ctx context.Context, id uuid.UUID) context.Context { + return context.WithValue(ctx, taskCtx("msgID"), id) +} +func msgID(ctx context.Context) uuid.UUID { return ctx.Value(taskCtx("msgID")).(uuid.UUID) } + +func (t *TaskMan) Statusf(ctx context.Context, format string, args ...interface{}) { + t.mx.Lock() + if t.lastMsgID == msgID(ctx) { + t.pendingStatus = fmt.Sprintf(format, args...) + } + t.mx.Unlock() +} + +func (mgr *TaskMan) Cancel(ctx context.Context) error { mgr.send(ctx, "cancel", nil); return nil } +func (mgr *TaskMan) Reset(ctx context.Context) error { mgr.send(ctx, "reset", nil); return nil } +func (mgr *TaskMan) Execute(ctx context.Context) error { + mgr.mx.Lock() + defer mgr.mx.Unlock() + if mgr.state != ClusterStateIdle { + return fmt.Errorf("cannot execute unless idle") + } + + mgr.sendAck(ctx, "execute", nil, mgr.lastMsgID) + return nil +} diff --git a/swo/swoinfo/scantables.go b/swo/swoinfo/scantables.go index 347caac8dd..afd683e07d 100644 --- a/swo/swoinfo/scantables.go +++ b/swo/swoinfo/scantables.go @@ -8,7 +8,6 @@ import ( "github.com/jackc/pgx/v4" "github.com/target/goalert/swo/swodb" - "github.com/target/goalert/swo/swogrp" ) // ScanTables scans the database for tables returning them in insert-safe-order, @@ -16,8 +15,6 @@ import ( // // Tables with migrate-only data, or those used by switchover code will be omitted. func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { - swogrp.Progressf(ctx, "scanning tables...") - columns, err := swodb.New(conn).TableColumns(ctx) if err != nil { return nil, fmt.Errorf("scan table columns: %w", err) diff --git a/swo/swosync/safety.go b/swo/swosync/safety.go index 2542c6b72e..911f61255b 100644 --- a/swo/swosync/safety.go +++ b/swo/swosync/safety.go @@ -15,7 +15,8 @@ declare begin set local idle_in_transaction_session_timeout = 60000; set local lock_timeout = 60000; - assert (select pg_try_advisory_xact_lock(4919)), 'failed to get migration lock'; + assert (select pg_try_advisory_xact_lock_shared(4919)), 'failed to get shared migration lock'; + assert (select pg_try_advisory_xact_lock(4370)), 'failed to get exec lock'; assert (select current_state = 'in_progress' from switchover_state), 'switchover state is not in_progress'; end $$; ` @@ -27,6 +28,7 @@ declare begin set local idle_in_transaction_session_timeout = 3000; set local lock_timeout = 3000; + assert (select pg_try_advisory_xact_lock_shared(4919)), 'failed to get shared migration lock'; perform pg_advisory_xact_lock(4369); assert (select current_state = 'in_progress' from switchover_state), 'switchover state is not in_progress'; end $$; @@ -40,7 +42,22 @@ declare begin set idle_in_transaction_session_timeout = 60000; set lock_timeout = 60000; - assert (select pg_try_advisory_lock(4919)), 'failed to get migration lock'; + assert (select pg_try_advisory_lock_shared(4919)), 'failed to get shared migration lock'; + assert (select pg_try_advisory_lock(4370)), 'failed to get exec lock'; + assert (select current_state != 'use_next_db' from switchover_state), 'switchover state is use_next_db'; +end $$; +` + +// ConnLockQuery will result in a failed assertion if it is unable to get the exec lock +// or switchover state is use_next_db +const ConnWaitLockQuery = ` +do $$ +declare +begin + set idle_in_transaction_session_timeout = 60000; + set lock_timeout = 60000; + assert (select pg_try_advisory_lock_shared(4919)), 'failed to get shared migration lock'; + perform pg_advisory_lock(4370); assert (select current_state != 'use_next_db' from switchover_state), 'switchover state is use_next_db'; end $$; ` diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 77ea7597c5..33dd468524 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -33,12 +33,9 @@ import Spinner from '../../loading/components/Spinner' const query = gql` query { swoStatus { - isDone - isIdle - isResetting - isExecuting - details - errors + state + lastError + lastStatus mainDBVersion nextDBVersion connections { @@ -47,7 +44,6 @@ const query = gql` } nodes { id - status canExec oldValid newValid @@ -126,7 +122,7 @@ export default function AdminSwitchover(): JSX.Element { ) } - if (data?.isDone) { + if (data?.state === 'done') { return ( @@ -158,7 +154,6 @@ export default function AdminSwitchover(): JSX.Element { } const statusNotices = [] if (mutationStatus.error) { - console.log(mutationStatus) statusNotices.push({ type: 'error', message: 'Failed to ' + mutationStatus.operation?.variables?.action, @@ -166,19 +161,19 @@ export default function AdminSwitchover(): JSX.Element { endNote: DateTime.local().toFormat('fff'), }) } - if (data?.errors) { - data?.errors.forEach((message: string) => { - statusNotices.push({ - type: 'error', - message, - }) + if (data?.state === 'unknown' && data?.lastError) { + statusNotices.push({ + type: 'error', + message: data.lastError, }) } const resetLoad = - data?.isResetting || (lastAction === 'reset' && mutationStatus.fetching) + data?.state === 'resetting' || + (lastAction === 'reset' && mutationStatus.fetching) const executeLoad = - data?.isExecuting || (lastAction === 'execute' && mutationStatus.fetching) + ['syncing', 'pausing', 'executing'].includes(data?.state) || + (lastAction === 'execute' && mutationStatus.fetching) function getIcon(): React.ReactNode { const i: SvgIconProps = { color: 'primary', sx: { fontSize: '3.5rem' } } @@ -193,10 +188,10 @@ export default function AdminSwitchover(): JSX.Element { ) } - if (!data.isIdle && !data.isDone) { + if (!['unknown', 'idle', 'done'].includes(data.state)) { return } - if (data.isIdle) { + if (data.state === 'idle') { return } } @@ -204,9 +199,9 @@ export default function AdminSwitchover(): JSX.Element { function getSubheader(): React.ReactNode { if (error) return 'Error' if (!data) return 'Loading...' - if (data.isDone) return 'Complete' - if (data.isIdle) return 'Ready' - if (!data.isExecuting && !data.isResetting) return 'Needs Reset' + if (data.state === 'done') return 'Complete' + if (data.state === 'idle') return 'Ready' + if (data.state === 'unknown') return 'Needs Reset' return 'Busy' } @@ -218,8 +213,8 @@ export default function AdminSwitchover(): JSX.Element { ) } - if (data?.details) { - return {cptlz(data.details)} + if (data.state !== 'unknown' && data.lastStatus) { + return {cptlz(data.lastStatus)} } return   // reserves whitespace } @@ -246,14 +241,13 @@ export default function AdminSwitchover(): JSX.Element { {getDetails()} : } - disabled={data?.isDone || mutationStatus.fetching} + startIcon={ + data?.state === 'done' ? : + } + disabled={data?.state === 'done' || mutationStatus.fetching} variant='outlined' size='large' - loading={ - data?.isResetting || - (lastAction === 'reset' && mutationStatus.fetching) - } + loading={resetLoad} loadingPosition='start' onClick={actionHandler('reset')} > @@ -261,15 +255,16 @@ export default function AdminSwitchover(): JSX.Element { : + data.state !== 'idle' ? ( + + ) : ( + + ) } - disabled={!data?.isIdle || mutationStatus.fetching} + disabled={data.state !== 'idle' || mutationStatus.fetching} variant='outlined' size='large' - loading={ - data?.isExecuting || - (lastAction === 'execute' && mutationStatus.fetching) - } + loading={executeLoad} loadingPosition='start' onClick={actionHandler('execute')} > diff --git a/web/src/schema.d.ts b/web/src/schema.d.ts index 0913b85aac..1fc75aa5c0 100644 --- a/web/src/schema.d.ts +++ b/web/src/schema.d.ts @@ -38,18 +38,24 @@ export interface Query { } export interface SWOStatus { - isIdle: boolean - isDone: boolean - isResetting: boolean - isExecuting: boolean - details: string - errors: string[] + state: SWOState + lastStatus: string + lastError: string nodes: SWONode[] connections: SWOConnection[] mainDBVersion: string nextDBVersion: string } +export type SWOState = + | 'unknown' + | 'resetting' + | 'idle' + | 'syncing' + | 'pausing' + | 'executing' + | 'done' + export interface SWOConnection { name: string count: number @@ -61,7 +67,6 @@ export interface SWONode { newValid: boolean canExec: boolean isLeader: boolean - status: string } export interface AlertMetricsOptions { @@ -282,7 +287,7 @@ export interface SetScheduleShiftInput { end: ISOTimestamp } -export type SWOAction = 'ping' | 'reset' | 'execute' +export type SWOAction = 'reset' | 'execute' export interface Mutation { swoAction: boolean From cbe060e05c885d1c85307eb7e286946eca39a035 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 20 Jul 2022 13:33:53 -0500 Subject: [PATCH 125/225] fix nodes --- graphql2/graphqlapp/swo.go | 1 + web/src/app/admin/switchover/SWONode.tsx | 12 +----------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index 5e20ed9386..2fe947354f 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -91,6 +91,7 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { LastStatus: s.LastStatus, LastError: s.LastError, + Nodes: nodes, Connections: conns, diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index e711dc0c12..229990837a 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -27,17 +27,7 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { {name} - - - - Status: {node.status || 'Unknown'} - - - - } - > + From 62ffc637450aaf315bc902e358a6ee5c16ec6d7d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 20 Jul 2022 14:02:16 -0500 Subject: [PATCH 126/225] increase interval --- web/src/app/admin/switchover/AdminSwitchover.tsx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 33dd468524..83aca4dc4c 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -86,11 +86,12 @@ export default function AdminSwitchover(): JSX.Element { }) const data = _data?.swoStatus as SWOStatus const [lastAction, setLastAction] = useState('') - const [_statusNotices, setStatusNotices] = useState([]) const [mutationStatus, commit] = useMutation(mutation) useEffect(() => { - const t = setInterval(refetch, 250) + const t = setInterval(() => { + if (!fetching) refetch() + }, 1000) return () => clearInterval(t) }, []) From 8311d00cad767f97a25d46c4a7e7f54d81f15bd1 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 20 Jul 2022 14:47:37 -0500 Subject: [PATCH 127/225] wait for log echo --- swo/manager.go | 4 +- swo/swogrp/starttask.go | 4 +- swo/swogrp/status.go | 2 +- swo/swogrp/taskman.go | 117 +++++++++++++----- .../app/admin/switchover/AdminSwitchover.tsx | 2 +- 5 files changed, 95 insertions(+), 34 deletions(-) diff --git a/swo/manager.go b/swo/manager.go index 1f814c85e2..f3524ccdcb 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -28,7 +28,7 @@ type Manager struct { Config - taskMgr *swogrp.TaskMan + taskMgr *swogrp.TaskMgr MainDBInfo *swoinfo.DB NextDBInfo *swoinfo.DB @@ -80,7 +80,7 @@ func NewManager(cfg Config) (*Manager, error) { return nil, fmt.Errorf("et server version: %w", err) } - m.taskMgr, err = swogrp.NewTaskMan(ctx, swogrp.Config{ + m.taskMgr, err = swogrp.NewTaskMgr(ctx, swogrp.Config{ CanExec: cfg.CanExec, Logger: cfg.Logger, diff --git a/swo/swogrp/starttask.go b/swo/swogrp/starttask.go index 5c780ee670..78d527a0bf 100644 --- a/swo/swogrp/starttask.go +++ b/swo/swogrp/starttask.go @@ -4,7 +4,7 @@ import ( "context" ) -func (t *TaskMan) startTask(fn func(context.Context) error, successMsg string) { +func (t *TaskMgr) startTask(fn func(context.Context) error, successMsg string) { if t.cancelTask != nil { panic("already running a task") } @@ -32,7 +32,7 @@ func (t *TaskMan) startTask(fn func(context.Context) error, successMsg string) { }() } -func (t *TaskMan) cancel() { +func (t *TaskMgr) cancel() { if t.cancelTask != nil { t.cancelTask() } diff --git a/swo/swogrp/status.go b/swo/swogrp/status.go index 5e6bc4e8c9..766f175ffe 100644 --- a/swo/swogrp/status.go +++ b/swo/swogrp/status.go @@ -10,7 +10,7 @@ type Status struct { LastError string } -func (t *TaskMan) Status() Status { +func (t *TaskMgr) Status() Status { t.mx.Lock() defer t.mx.Unlock() diff --git a/swo/swogrp/taskman.go b/swo/swogrp/taskman.go index 2a994a1105..cbbd1b8421 100644 --- a/swo/swogrp/taskman.go +++ b/swo/swogrp/taskman.go @@ -11,13 +11,14 @@ import ( "github.com/target/goalert/swo/swomsg" ) -type TaskMan struct { +type TaskMgr struct { local Node - cfg Config - nodes map[uuid.UUID]Node - paused map[uuid.UUID]struct{} - state ClusterState + cfg Config + nodes map[uuid.UUID]Node + paused map[uuid.UUID]struct{} + waitMsg map[uuid.UUID]chan struct{} + state ClusterState cancelTask func() lastMsgID uuid.UUID @@ -29,8 +30,8 @@ type TaskMan struct { mx sync.Mutex } -func NewTaskMan(ctx context.Context, cfg Config) (*TaskMan, error) { - t := &TaskMan{ +func NewTaskMgr(ctx context.Context, cfg Config) (*TaskMgr, error) { + t := &TaskMgr{ cfg: cfg, local: Node{ ID: uuid.New(), @@ -39,21 +40,22 @@ func NewTaskMan(ctx context.Context, cfg Config) (*TaskMan, error) { CanExec: cfg.CanExec, }, - nodes: make(map[uuid.UUID]Node), - paused: make(map[uuid.UUID]struct{}), + nodes: make(map[uuid.UUID]Node), + paused: make(map[uuid.UUID]struct{}), + waitMsg: make(map[uuid.UUID]chan struct{}), } - t.send(ctx, "hello", t.local) + t.sendAck(ctx, "hello", t.local, uuid.Nil) return t, nil } -func (t *TaskMan) Init() { +func (t *TaskMgr) Init() { go t.statusLoop() go t.messageLoop() } -func (t *TaskMan) allNodesPaused() bool { +func (t *TaskMgr) allNodesPaused() bool { for _, n := range t.nodes { _, ok := t.paused[n.ID] if !ok { @@ -64,7 +66,7 @@ func (t *TaskMan) allNodesPaused() bool { return true } -func (t *TaskMan) statusLoop() { +func (t *TaskMgr) statusLoop() { ctx := t.cfg.Logger.BackgroundContext() // debounce/throttle status messages var lastStatus string @@ -73,18 +75,23 @@ func (t *TaskMan) statusLoop() { status := t.pendingStatus id := t.lastMsgID t.mx.Unlock() - if status == lastStatus { + if status == lastStatus || status == "" { continue } + lastStatus = status t.sendAck(ctx, "status", status, id) } } -func (t *TaskMan) messageLoop() { +func (t *TaskMgr) messageLoop() { ctx := t.cfg.Logger.BackgroundContext() for msg := range t.cfg.Messages.Events() { t.mx.Lock() + if ch, ok := t.waitMsg[msg.ID]; ok { + close(ch) + delete(t.waitMsg, msg.ID) + } switch { case msg.Type == "reset": t.state = ClusterStateResetting @@ -103,7 +110,7 @@ func (t *TaskMan) messageLoop() { var n Node err := json.Unmarshal(msg.Data, &n) if err != nil { - t.send(ctx, "error", fmt.Sprintf("unmarshal hello: %v", err)) + t.sendAck(ctx, "error", fmt.Sprintf("unmarshal hello: %v", err), msg.ID) continue } t.nodes[msg.Node] = n @@ -182,7 +189,7 @@ func (t *TaskMan) messageLoop() { } } -func (t *TaskMan) parseString(data json.RawMessage) string { +func (t *TaskMgr) parseString(data json.RawMessage) string { var s string err := json.Unmarshal(data, &s) if err != nil { @@ -203,11 +210,35 @@ func resetDelay(ctx context.Context) error { } } -func (t *TaskMan) send(ctx context.Context, msgType string, v interface{}) { - t.sendAck(ctx, msgType, v, uuid.Nil) +func (t *TaskMgr) sendAckWait(ctx context.Context, msgType string, v interface{}, ackID uuid.UUID) <-chan struct{} { + data, err := json.Marshal(v) + if err != nil { + panic(fmt.Errorf("marshal %s: %w", msgType, err)) + } + + ch := make(chan struct{}) + id := uuid.New() + t.mx.Lock() + t.waitMsg[id] = ch + t.mx.Unlock() + + err = t.cfg.Messages.Append(ctx, swomsg.Message{ + ID: id, + Node: t.local.ID, + AckID: ackID, + + Type: msgType, + Data: data, + }) + if err != nil { + close(ch) + t.cfg.Logger.Error(ctx, fmt.Errorf("append %s: %w", msgType, err)) + } + + return ch } -func (t *TaskMan) sendAck(ctx context.Context, msgType string, v interface{}, ackID uuid.UUID) { +func (t *TaskMgr) sendAck(ctx context.Context, msgType string, v interface{}, ackID uuid.UUID) { data, err := json.Marshal(v) if err != nil { panic(fmt.Errorf("marshal %s: %w", msgType, err)) @@ -233,7 +264,7 @@ func withMsgID(ctx context.Context, id uuid.UUID) context.Context { } func msgID(ctx context.Context) uuid.UUID { return ctx.Value(taskCtx("msgID")).(uuid.UUID) } -func (t *TaskMan) Statusf(ctx context.Context, format string, args ...interface{}) { +func (t *TaskMgr) Statusf(ctx context.Context, format string, args ...interface{}) { t.mx.Lock() if t.lastMsgID == msgID(ctx) { t.pendingStatus = fmt.Sprintf(format, args...) @@ -241,15 +272,45 @@ func (t *TaskMan) Statusf(ctx context.Context, format string, args ...interface{ t.mx.Unlock() } -func (mgr *TaskMan) Cancel(ctx context.Context) error { mgr.send(ctx, "cancel", nil); return nil } -func (mgr *TaskMan) Reset(ctx context.Context) error { mgr.send(ctx, "reset", nil); return nil } -func (mgr *TaskMan) Execute(ctx context.Context) error { - mgr.mx.Lock() - defer mgr.mx.Unlock() - if mgr.state != ClusterStateIdle { +func (t *TaskMgr) Cancel(ctx context.Context) error { + ch := t.sendAckWait(ctx, "cancel", nil, uuid.Nil) + + select { + case <-ctx.Done(): + return ctx.Err() + case <-ch: + } + + return nil +} + +func (t *TaskMgr) Reset(ctx context.Context) error { + ch := t.sendAckWait(ctx, "reset", nil, uuid.Nil) + + select { + case <-ctx.Done(): + return ctx.Err() + case <-ch: + } + + return nil +} + +func (t *TaskMgr) Execute(ctx context.Context) error { + t.mx.Lock() + state := t.state + t.mx.Unlock() + if state != ClusterStateIdle { return fmt.Errorf("cannot execute unless idle") } - mgr.sendAck(ctx, "execute", nil, mgr.lastMsgID) + ch := t.sendAckWait(ctx, "execute", nil, t.lastMsgID) + + select { + case <-ctx.Done(): + return ctx.Err() + case <-ch: + } + return nil } diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 83aca4dc4c..80218bb9f3 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -150,7 +150,7 @@ export default function AdminSwitchover(): JSX.Element { function actionHandler(action: 'reset' | 'execute'): () => void { return () => { setLastAction(action) - commit({ action }) + commit({ action }, { additionalTypenames: ['SWOStatus'] }) } } const statusNotices = [] From 20466f2882489bf97ff3d5c515df9ec78902a2d5 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 20 Jul 2022 15:10:54 -0500 Subject: [PATCH 128/225] increas max wait for heavy-load scenarios --- swo/swosync/safety.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swo/swosync/safety.go b/swo/swosync/safety.go index 911f61255b..f248faaf15 100644 --- a/swo/swosync/safety.go +++ b/swo/swosync/safety.go @@ -26,8 +26,8 @@ const txStopTheWorld = ` do $$ declare begin - set local idle_in_transaction_session_timeout = 3000; - set local lock_timeout = 3000; + set local idle_in_transaction_session_timeout = 5000; + set local lock_timeout = 5000; assert (select pg_try_advisory_xact_lock_shared(4919)), 'failed to get shared migration lock'; perform pg_advisory_xact_lock(4369); assert (select current_state = 'in_progress' from switchover_state), 'switchover state is not in_progress'; From 5f97d13972a1cb2be23cf9c76f36901bbec4b76a Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 21 Jul 2022 10:24:48 -0500 Subject: [PATCH 129/225] always attempt to update log --- swo/swosync/logicalsync.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/swo/swosync/logicalsync.go b/swo/swosync/logicalsync.go index edd79821ea..2b14f9f4a1 100644 --- a/swo/swosync/logicalsync.go +++ b/swo/swosync/logicalsync.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/jackc/pgx/v4" + "github.com/target/goalert/util/log" "github.com/target/goalert/util/sqlutil" ) @@ -145,10 +146,14 @@ func (l *LogicalReplicator) doSync(ctx context.Context, final bool) error { return fmt.Errorf("commit sync read: %w", err) } - if final { - // no cleanup/err check for final - return nil - } _, err = tblSync.ExecDeleteChanges(ctx, l.srcConn) - return err + if !final { + return err + } + + if err != nil { + // log but don't return error in final since switchover is complete + log.Log(ctx, err) + } + return nil } From 701542b0d5a9c6239847eb4bc534b2f4731a60b7 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 21 Jul 2022 10:57:37 -0500 Subject: [PATCH 130/225] only release the intended advisory lock --- engine/message/db.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/message/db.go b/engine/message/db.go index 910243097d..516d143430 100644 --- a/engine/message/db.go +++ b/engine/message/db.go @@ -598,7 +598,7 @@ func (db *DB) _SendMessages(ctx context.Context, send SendFunc, status StatusFun return errors.Wrap(err, "acquire global sending advisory lock") } defer func() { - cLock.ExecWithoutLock(log.FromContext(execCtx).BackgroundContext(), `select pg_advisory_unlock_all()`) + cLock.ExecWithoutLock(log.FromContext(execCtx).BackgroundContext(), `select pg_advisory_unlock(4912)`) }() tx, err := cLock.BeginTx(execCtx, nil) From 0f6c8f1fe3a7f3d6422bc9d06dce1afcfcaf6e92 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 21 Jul 2022 10:58:31 -0500 Subject: [PATCH 131/225] move app name change to swo code --- app/cmd.go | 6 +----- swo/drvconnector.go | 13 +++++++++++++ swo/manager.go | 4 ++-- swo/mgrconnector.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 7 deletions(-) create mode 100644 swo/mgrconnector.go diff --git a/app/cmd.go b/app/cmd.go index ef536d5666..4c9a20bbe8 100644 --- a/app/cmd.go +++ b/app/cmd.go @@ -91,11 +91,7 @@ var RootCmd = &cobra.Command{ return errors.Wrap(err, "parse old URL") } q := u.Query() - if cfg.DBURLNext != "" { - q.Set("application_name", fmt.Sprintf("GoAlert %s (SWO Mode)", version.GitVersion())) - } else { - q.Set("application_name", fmt.Sprintf("GoAlert %s", version.GitVersion())) - } + q.Set("application_name", fmt.Sprintf("GoAlert %s", version.GitVersion())) q.Set("enable_seqscan", "off") u.RawQuery = q.Encode() cfg.DBURL = u.String() diff --git a/swo/drvconnector.go b/swo/drvconnector.go index 7c6a7d6548..0ad94a0b9a 100644 --- a/swo/drvconnector.go +++ b/swo/drvconnector.go @@ -3,10 +3,12 @@ package swo import ( "context" "database/sql/driver" + "fmt" "sync" "github.com/jackc/pgx/v4" "github.com/jackc/pgx/v4/stdlib" + "github.com/target/goalert/version" ) type Connector struct { @@ -42,7 +44,14 @@ func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { } conn := c.(*stdlib.Conn) + str, err := conn.Conn().PgConn().EscapeString(fmt.Sprintf("GoAlert %s (SWO Node)", version.GitVersion())) + if err != nil { + conn.Close() + return nil, err + } + var b pgx.Batch + b.Queue(fmt.Sprintf("set application_name = '%s'", str)) b.Queue("select pg_advisory_lock_shared(4369)") b.Queue("select current_state = 'use_next_db' FROM switchover_state") @@ -51,6 +60,10 @@ func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { conn.Close() return nil, err } + if _, err := res.Exec(); err != nil { + conn.Close() + return nil, err + } defer res.Close() var useNext bool diff --git a/swo/manager.go b/swo/manager.go index f3524ccdcb..1161d41d06 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -54,8 +54,8 @@ func NewManager(cfg Config) (*Manager, error) { m := &Manager{ Config: cfg, dbApp: sql.OpenDB(NewConnector(cfg.OldDBC, cfg.NewDBC)), - dbMain: sql.OpenDB(cfg.OldDBC), - dbNext: sql.OpenDB(cfg.NewDBC), + dbMain: sql.OpenDB(newMgrConnector(cfg.OldDBC)), + dbNext: sql.OpenDB(newMgrConnector(cfg.NewDBC)), } ctx := cfg.Logger.BackgroundContext() diff --git a/swo/mgrconnector.go b/swo/mgrconnector.go new file mode 100644 index 0000000000..566394598b --- /dev/null +++ b/swo/mgrconnector.go @@ -0,0 +1,44 @@ +package swo + +import ( + "context" + "database/sql/driver" + "fmt" + + "github.com/jackc/pgx/v4/stdlib" + "github.com/target/goalert/version" +) + +type mgrConnector struct { + dbc driver.Connector +} + +var _ driver.Connector = (*mgrConnector)(nil) + +func newMgrConnector(dbc driver.Connector) *mgrConnector { + return &mgrConnector{dbc: dbc} +} + +func (drv *mgrConnector) Driver() driver.Driver { return nil } + +func (drv *mgrConnector) Connect(ctx context.Context) (driver.Conn, error) { + c, err := drv.dbc.Connect(ctx) + if err != nil { + return nil, err + } + + conn := c.(*stdlib.Conn) + str, err := conn.Conn().PgConn().EscapeString(fmt.Sprintf("GoAlert %s (SWO Manager)", version.GitVersion())) + if err != nil { + conn.Close() + return nil, err + } + + _, err = conn.ExecContext(ctx, fmt.Sprintf("set application_name = '%s'", str), nil) + if err != nil { + conn.Close() + return nil, err + } + + return c, nil +} From 0e3cb62bf0259eca25da5122f8743e74b37a849b Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 25 Jul 2022 11:29:11 -0500 Subject: [PATCH 132/225] fix swo error states --- web/src/app/admin/switchover/AdminSwitchover.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 80218bb9f3..a2d8cfc929 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -99,7 +99,7 @@ export default function AdminSwitchover(): JSX.Element { return } - if (error && error.message === 'not in SWO mode') { + if (error && error.message === '[GraphQL] not in SWO mode') { return ( @@ -214,7 +214,7 @@ export default function AdminSwitchover(): JSX.Element { ) } - if (data.state !== 'unknown' && data.lastStatus) { + if (data?.state !== 'unknown' && data.lastStatus) { return {cptlz(data.lastStatus)} } return   // reserves whitespace @@ -256,13 +256,13 @@ export default function AdminSwitchover(): JSX.Element { ) : ( ) } - disabled={data.state !== 'idle' || mutationStatus.fetching} + disabled={data?.state !== 'idle' || mutationStatus.fetching} variant='outlined' size='large' loading={executeLoad} From e6cfdac86c8a03981edd4e3667748613939050c8 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 25 Jul 2022 16:18:24 -0500 Subject: [PATCH 133/225] fix stale UI issue on port 3030 in SWO mode --- Procfile.swo | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Procfile.swo b/Procfile.swo index c444c10a4b..52f02cf366 100644 --- a/Procfile.swo +++ b/Procfile.swo @@ -9,28 +9,28 @@ smtp: go run github.com/mailhog/MailHog -ui-bind-addr=localhost:8025 -api-bind-a ui: yarn workspace goalert-web run esbuild --watch @watch-file=./bin/goalert -ga2: ./bin/goalert -l=localhost:3050 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only +ga2: ./bin/goalert -l=localhost:3050 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga3: ./bin/goalert -l=localhost:3051 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only +ga3: ./bin/goalert -l=localhost:3051 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga4: ./bin/goalert -l=localhost:3052 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only +ga4: ./bin/goalert -l=localhost:3052 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga5: ./bin/goalert -l=localhost:3053 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only +ga5: ./bin/goalert -l=localhost:3053 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga6: ./bin/goalert -l=localhost:3054 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only +ga6: ./bin/goalert -l=localhost:3054 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga7: ./bin/goalert -l=localhost:3055 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 +ga7: ./bin/goalert -l=localhost:3055 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 @watch-file=./bin/goalert -ga8: ./bin/goalert -l=localhost:3056 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 +ga8: ./bin/goalert -l=localhost:3056 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 @watch-file=./bin/goalert -ga9: ./bin/goalert -l=localhost:3057 --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 +ga9: ./bin/goalert -l=localhost:3057 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 proxy: go run ./devtools/simpleproxy -addr localhost:3030 /=http://localhost:3040,http://localhost:3050,http://localhost:3051,http://localhost:3052,http://localhost:3053,http://localhost:3054,http://localhost:3055,http://localhost:3056,http://localhost:3057 From 4e687a205cc35b8520d0f75c52c6873b41fcec5b Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 25 Jul 2022 16:45:57 -0500 Subject: [PATCH 134/225] fix notice types --- .../app/admin/switchover/AdminSwitchover.tsx | 14 ++++++---- web/src/app/details/Notices.tsx | 28 +++++++++++++++++-- web/src/app/lists/FlatList.tsx | 6 ++-- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index a2d8cfc929..304468bbf6 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -16,7 +16,7 @@ import IdleIcon from 'mdi-material-ui/DatabaseSettings' import InProgressIcon from 'mdi-material-ui/DatabaseEdit' import { gql, useMutation, useQuery } from 'urql' import { DateTime } from 'luxon' -import { SWONode as SWONodeType, SWOStatus } from '../../../schema' +import { SWOAction, SWONode as SWONodeType, SWOStatus } from '../../../schema' import Notices, { Notice } from '../../details/Notices' import SWONode from './SWONode' import LoadingButton from '@mui/lab/LoadingButton' @@ -104,7 +104,7 @@ export default function AdminSwitchover(): JSX.Element { -
+
or --db-url-next to perform a switchover. -
+
) @@ -153,11 +153,15 @@ export default function AdminSwitchover(): JSX.Element { commit({ action }, { additionalTypenames: ['SWOStatus'] }) } } - const statusNotices = [] + const statusNotices: Notice[] = [] if (mutationStatus.error) { + const vars: { action?: SWOAction } = mutationStatus.operation + ?.variables || { + action: '', + } statusNotices.push({ type: 'error', - message: 'Failed to ' + mutationStatus.operation?.variables?.action, + message: 'Failed to ' + vars.action, details: cptlz(mutationStatus.error.message), endNote: DateTime.local().toFormat('fff'), }) diff --git a/web/src/app/details/Notices.tsx b/web/src/app/details/Notices.tsx index d2ecdc62a2..51717c8ef5 100644 --- a/web/src/app/details/Notices.tsx +++ b/web/src/app/details/Notices.tsx @@ -12,6 +12,10 @@ import makeStyles from '@mui/styles/makeStyles' import ExpandIcon from '@mui/icons-material/KeyboardArrowDown' import CollapseIcon from '@mui/icons-material/KeyboardArrowUp' import toTitleCase from '../util/toTitleCase' +import { + NoticeType as SchemaNoticeType, + NotificationStatus, +} from '../../schema' const useStyles = makeStyles({ alertAction: { @@ -31,8 +35,28 @@ const useStyles = makeStyles({ }, }) +export type NoticeType = SchemaNoticeType | AlertColor | NotificationStatus + +export function toSeverity(notice: NoticeType): AlertColor { + switch (notice.toLowerCase()) { + case 'success': + return 'success' + case 'info': + return 'info' + case 'warning': + case 'warn': + return 'warning' + case 'error': + return 'error' + case 'info': + return 'info' + default: + throw new Error('Unknown notice type: ' + notice) + } +} + export interface Notice { - type: AlertColor + type: NoticeType message: string | JSX.Element details?: string | JSX.Element endNote?: string | JSX.Element @@ -90,7 +114,7 @@ export default function Notices({ return ( ({ alert: { @@ -182,7 +182,7 @@ export default function FlatList({ {item.message && {item.message}} @@ -196,7 +196,7 @@ export default function FlatList({ {item.message && {item.message}} From dd6d3c04642c751f835c7bd5e861de214c12e6e1 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 25 Jul 2022 16:46:43 -0500 Subject: [PATCH 135/225] remove unused import --- web/src/app/admin/switchover/SWONode.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index 229990837a..9a23eced11 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -1,6 +1,5 @@ import React from 'react' import Card from '@mui/material/Card' -import Divider from '@mui/material/Divider' import Grid from '@mui/material/Grid' import List from '@mui/material/List' import ListItem from '@mui/material/ListItem' From ef7b44b1d3cebb8ac2aaf8c17be9fe413671814f Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Mon, 25 Jul 2022 15:58:31 -0700 Subject: [PATCH 136/225] ui updates --- .../app/admin/switchover/AdminSwitchover.tsx | 271 ++++++++++-------- web/src/app/admin/switchover/SWONode.tsx | 2 +- 2 files changed, 157 insertions(+), 116 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 304468bbf6..6783d2fc88 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -1,4 +1,6 @@ import React, { useEffect, useState } from 'react' +import { useTheme, SvgIconProps, Zoom } from '@mui/material' +import Alert from '@mui/material/Alert' import ButtonGroup from '@mui/material/ButtonGroup' import Card from '@mui/material/Card' import CardContent from '@mui/material/CardContent' @@ -6,7 +8,6 @@ import CardHeader from '@mui/material/CardHeader' import Grid from '@mui/material/Grid' import Skeleton from '@mui/material/Skeleton' import Typography from '@mui/material/Typography' -import { Fade, SvgIconProps, Zoom } from '@mui/material' import NoResetIcon from 'mdi-material-ui/DatabaseRefreshOutline' import ResetIcon from 'mdi-material-ui/DatabaseRefresh' import NoExecuteIcon from 'mdi-material-ui/DatabaseExportOutline' @@ -27,6 +28,10 @@ import TableBody from '@mui/material/TableBody' import TableCell from '@mui/material/TableCell' import TableHead from '@mui/material/TableHead' import TableRow from '@mui/material/TableRow' +import Tooltip from '@mui/material/Tooltip' +import RemoveIcon from '@mui/icons-material/PlaylistRemove' +import AddIcon from '@mui/icons-material/PlaylistAdd' +import DownIcon from '@mui/icons-material/ArrowDownward' import { TransitionGroup } from 'react-transition-group' import Spinner from '../../loading/components/Spinner' @@ -87,6 +92,10 @@ export default function AdminSwitchover(): JSX.Element { const data = _data?.swoStatus as SWOStatus const [lastAction, setLastAction] = useState('') const [mutationStatus, commit] = useMutation(mutation) + const theme = useTheme() + + const curVer = data?.mainDBVersion.split(' on ') + const nextVer = data?.mainDBVersion.split(' on ') useEffect(() => { const t = setInterval(() => { @@ -224,123 +233,155 @@ export default function AdminSwitchover(): JSX.Element { return   // reserves whitespace } + const headerSize = { titleTypographyProps: { sx: { fontSize: '1.25rem' } } } + return ( - - - - {statusNotices.length > 0 && ( - - - - )} - - - - - {getDetails()} - - : - } - disabled={data?.state === 'done' || mutationStatus.fetching} - variant='outlined' - size='large' - loading={resetLoad} - loadingPosition='start' - onClick={actionHandler('reset')} - > - {resetLoad ? 'Resetting...' : 'Reset'} - - - ) : ( - - ) - } - disabled={data?.state !== 'idle' || mutationStatus.fetching} - variant='outlined' - size='large' - loading={executeLoad} - loadingPosition='start' - onClick={actionHandler('execute')} - > - {executeLoad ? 'Executing...' : 'Execute'} - - - - - + + {statusNotices.length > 0 && ( + + + + )} + + + + + {getDetails()} +
+ + : + } + disabled={data?.state === 'done' || mutationStatus.fetching} + variant='outlined' + size='large' + loading={resetLoad} + loadingPosition='start' + onClick={actionHandler('reset')} + > + {resetLoad ? 'Resetting...' : 'Reset'} + + : + } + disabled={data?.state !== 'idle' || mutationStatus.fetching} + variant='outlined' + size='large' + loading={executeLoad} + loadingPosition='start' + onClick={actionHandler('execute')} + > + {executeLoad ? 'Executing...' : 'Execute'} + + + + + - - - - Main DB Version: {data?.mainDBVersion} -
- Next DB Version: {data?.nextDBVersion} -
-
- - - Application Name - Count - - - - {data?.connections?.map((row) => ( - - - {row.name || '(no name)'} - - {row.count} - - ))} - -
-
-
+ + + + + + + Application + Info + Count + + + + {data?.connections?.map((row) => ( + + + {row?.name?.split('(')[0].replace(/[)(]/g, '') ?? + '(no name)'} + + + {row?.name?.split('(')[1]?.replace(/[)(]/g, '') ?? '-'} + + {row.count} + + ))} + +
+
+
- - {data?.nodes.length > 0 && - data.nodes - .slice() - .sort((a: SWONodeType, b: SWONodeType) => { - const aName = friendlyName(a.id) - const bName = friendlyName(b.id) - if (aName < bName) return -1 - if (aName > bName) return 1 - return 0 - }) - .map((node: SWONodeType) => ( - - ))} - + + + +
+ + + } severity='error'> + From {curVer[0]} + + + theme.palette.primary.main, + }} + /> + + } + severity='success' + sx={{ mb: '16px' }} + > + To {nextVer[0]} + + +
+
-
-
+ + {data?.nodes.length > 0 && + data.nodes + .slice() + .sort((a: SWONodeType, b: SWONodeType) => { + const aName = friendlyName(a.id) + const bName = friendlyName(b.id) + if (aName < bName) return -1 + if (aName > bName) return 1 + return 0 + }) + .map((node: SWONodeType) => ( + + ))} +
+
) } diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index 9a23eced11..dabb6dcd74 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -21,7 +21,7 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { const theme = useTheme() return ( - + {name} From 305f6fd37531e7a3534b03c3f9d3efa42aef97c3 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 26 Jul 2022 10:19:42 -0500 Subject: [PATCH 137/225] use warning icon for old DB --- web/src/app/admin/switchover/AdminSwitchover.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 6783d2fc88..4cc0ffbf2c 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -344,7 +344,7 @@ export default function AdminSwitchover(): JSX.Element { > - } severity='error'> + } severity='warning'> From {curVer[0]} From dff87942bbb1dbc2dc93e2b45f06eec5239f1652 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 27 Jul 2022 14:31:40 -0500 Subject: [PATCH 138/225] remove unused test file --- smoketest/switchover_test.go | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 smoketest/switchover_test.go diff --git a/smoketest/switchover_test.go b/smoketest/switchover_test.go deleted file mode 100644 index 4d8c875eb3..0000000000 --- a/smoketest/switchover_test.go +++ /dev/null @@ -1,30 +0,0 @@ -package smoketest - -import ( - "context" - "testing" - - "github.com/jackc/pgx/v4/stdlib" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/target/goalert/smoketest/harness" - "github.com/target/goalert/swo" -) - -// TestDBSyncTables ensures the latest state of the database is compatible with the dbsync package. -func TestDBSyncTables(t *testing.T) { - t.Parallel() - - h := harness.NewHarness(t, "", "") - defer h.Close() - - c, err := h.App().DB().Conn(context.Background()) - require.NoError(t, err) - defer c.Close() - - err = c.Raw(func(c interface{}) error { - _, err := swo.ScanTables(context.Background(), c.(*stdlib.Conn).Conn()) - return err - }) - assert.NoError(t, err) -} From 3f3cc265482dde4579d0a853ec9d29f5752eac52 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 27 Jul 2022 14:52:08 -0500 Subject: [PATCH 139/225] ctx use --- swo/executor.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swo/executor.go b/swo/executor.go index 9167d747d2..86043b80f1 100644 --- a/swo/executor.go +++ b/swo/executor.go @@ -35,14 +35,14 @@ func (e *Executor) init() { go func() { defer e.Cancel() - e.errCh <- e.mgr.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { + e.errCh <- e.mgr.withConnFromBoth(ctx, func(_ context.Context, oldConn, newConn *pgx.Conn) error { rep := swosync.NewLogicalReplicator() rep.SetSourceDB(oldConn) rep.SetDestinationDB(newConn) rep.SetProgressFunc(e.mgr.taskMgr.Statusf) // sync - ctx = <-e.ctxCh + ctx := <-e.ctxCh err := rep.Reset(ctx) if err != nil { return fmt.Errorf("reset: %w", err) From b221d4e2bffa1364f9549a5b0bcd94448cc5bbf7 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 27 Jul 2022 14:54:29 -0500 Subject: [PATCH 140/225] remove unused code --- swo/swogrp/msgbuf.go | 42 -------------------------------------- swo/swomsg/log.go | 8 -------- swo/swosync/logicalsync.go | 25 ----------------------- swo/swosync/tablesync.go | 13 ------------ 4 files changed, 88 deletions(-) delete mode 100644 swo/swogrp/msgbuf.go diff --git a/swo/swogrp/msgbuf.go b/swo/swogrp/msgbuf.go deleted file mode 100644 index 652ed31fd1..0000000000 --- a/swo/swogrp/msgbuf.go +++ /dev/null @@ -1,42 +0,0 @@ -package swogrp - -import "github.com/target/goalert/swo/swomsg" - -type msgBuf struct { - full chan []swomsg.Message - empty chan []swomsg.Message - - next chan swomsg.Message -} - -func (buf *msgBuf) Append(msg swomsg.Message) { - var msgs []swomsg.Message - select { - case msgs = <-buf.empty: - case msgs = <-buf.full: - } - msgs = append(msgs, msg) - buf.full <- msgs -} -func (buf *msgBuf) Next() <-chan swomsg.Message { return buf.next } - -func newMsgBuf() *msgBuf { - buf := &msgBuf{ - full: make(chan []swomsg.Message, 1), - empty: make(chan []swomsg.Message, 1), - next: make(chan swomsg.Message), - } - buf.empty <- nil - go func() { - for msgs := range buf.full { - msg := msgs[0] - if len(msgs) > 1 { - buf.full <- msgs[1:] - } else { - buf.empty <- msgs[1:] - } - buf.next <- msg - } - }() - return buf -} diff --git a/swo/swomsg/log.go b/swo/swomsg/log.go index f36cd113cc..299ec7f327 100644 --- a/swo/swomsg/log.go +++ b/swo/swomsg/log.go @@ -18,8 +18,6 @@ const PollInterval = time.Second / 3 type Log struct { db *sql.DB - readID int64 - lastLoad time.Time eventCh chan Message @@ -27,12 +25,6 @@ type Log struct { var ErrStaleLog = fmt.Errorf("cannot append until log is read") -type logEvent struct { - ID int64 - Timestamp time.Time - Data []byte -} - func NewLog(ctx context.Context, db *sql.DB) (*Log, error) { conn, err := stdlib.AcquireConn(db) if err != nil { diff --git a/swo/swosync/logicalsync.go b/swo/swosync/logicalsync.go index 2b14f9f4a1..5cb285a56f 100644 --- a/swo/swosync/logicalsync.go +++ b/swo/swosync/logicalsync.go @@ -23,31 +23,6 @@ import ( - 1 for all updates to new DB */ -// bgTx will start a transaction in the background, returning a function that will -// wait for and return the transaction (so multiple can be started simultaneously). -func bgTx(ctx context.Context, conn *pgx.Conn, opts pgx.TxOptions) func() (pgx.Tx, error) { - ch := make(chan struct{}) - var err error - var tx pgx.Tx - go func() { - tx, err = conn.BeginTx(ctx, opts) - close(ch) - }() - - return func() (pgx.Tx, error) { - <-ch - return tx, err - } -} - -func cancelTx(ctx context.Context, fn func() (pgx.Tx, error)) { - tx, err := fn() - if err != nil { - return - } - tx.Rollback(ctx) -} - // LogicalSync will sync the source database to the destination database as fast as possible. func (l *LogicalReplicator) LogicalSync(ctx context.Context) error { return l.doSync(ctx, false) } diff --git a/swo/swosync/tablesync.go b/swo/swosync/tablesync.go index 9cdc7505d2..0702b0a37e 100644 --- a/swo/swosync/tablesync.go +++ b/swo/swosync/tablesync.go @@ -27,8 +27,6 @@ type changeEntry struct { RowID } -type changeData struct{} - func NewTableSync(tables []swoinfo.Table) *TableSync { return &TableSync{ tables: tables, @@ -114,17 +112,6 @@ func castIDs(t swoinfo.Table, rowIDs []string) (interface{}, string) { } } -func (c *TableSync) table(name string) swoinfo.Table { - for _, table := range c.tables { - if table.Name() != name { - continue - } - - return table - } - panic(fmt.Sprintf("unknown table %s", name)) -} - // ScanBatchRowReads scans the results of the row read queries. func (c *TableSync) ScanBatchRowReads(res pgx.BatchResults) error { if len(c.changedTables) == 0 { From 8b9a51c2e2177ea3f87875b409ab8c721e7ca5e4 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 27 Jul 2022 14:55:42 -0500 Subject: [PATCH 141/225] remove duplicate case --- web/src/app/details/Notices.tsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/web/src/app/details/Notices.tsx b/web/src/app/details/Notices.tsx index 51717c8ef5..6e1afe9828 100644 --- a/web/src/app/details/Notices.tsx +++ b/web/src/app/details/Notices.tsx @@ -41,8 +41,6 @@ export function toSeverity(notice: NoticeType): AlertColor { switch (notice.toLowerCase()) { case 'success': return 'success' - case 'info': - return 'info' case 'warning': case 'warn': return 'warning' From 5af9f7da21b527fdcd24757b1a5c7faf584dd6a7 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 27 Jul 2022 15:10:24 -0500 Subject: [PATCH 142/225] can't be 'done' due to previous check --- web/src/app/admin/switchover/AdminSwitchover.tsx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 4cc0ffbf2c..ba8596b39a 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -263,10 +263,8 @@ export default function AdminSwitchover(): JSX.Element { sx={{ width: '100%', pb: '32px' }} > : - } - disabled={data?.state === 'done' || mutationStatus.fetching} + startIcon={} + disabled={mutationStatus.fetching} variant='outlined' size='large' loading={resetLoad} From 6bd4cd0b859b1d84d515fc5a8014c7c579efa967 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 27 Jul 2022 15:11:39 -0500 Subject: [PATCH 143/225] remove unused import --- web/src/app/admin/switchover/AdminSwitchover.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index ba8596b39a..9dca2fb392 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -8,7 +8,6 @@ import CardHeader from '@mui/material/CardHeader' import Grid from '@mui/material/Grid' import Skeleton from '@mui/material/Skeleton' import Typography from '@mui/material/Typography' -import NoResetIcon from 'mdi-material-ui/DatabaseRefreshOutline' import ResetIcon from 'mdi-material-ui/DatabaseRefresh' import NoExecuteIcon from 'mdi-material-ui/DatabaseExportOutline' import ExecuteIcon from 'mdi-material-ui/DatabaseExport' From aff97029fd6ce77e98d1059d613249f207c1ae2a Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 27 Jul 2022 15:19:57 -0500 Subject: [PATCH 144/225] add sqlc regen target --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 29b762dbb5..2503b824dd 100644 --- a/Makefile +++ b/Makefile @@ -88,6 +88,9 @@ cy-wide-prod-run: web/src/build/static/app.js cypress cy-mobile-prod-run: web/src/build/static/app.js cypress $(MAKE) $(MFLAGS) cy-mobile-prod CY_ACTION=run CONTAINER_TOOL=$(CONTAINER_TOOL) BUNDLE=1 +swo/swodb/queries.sql.go: bin/tools/sqlc sqlc.yaml swo/*/*.sql migrate/migrations/*.sql + ./bin/tools/sqlc generate + web/src/schema.d.ts: graphql2/schema.graphql node_modules web/src/genschema.go go generate ./web/src From 77f5c8f2463c35826c1a522f29b693581243262d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 1 Aug 2022 11:03:55 -0500 Subject: [PATCH 145/225] spelling --- swo/swosync/start.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swo/swosync/start.go b/swo/swosync/start.go index f50f37e06c..e05d80db96 100644 --- a/swo/swosync/start.go +++ b/swo/swosync/start.go @@ -18,7 +18,7 @@ func triggerName(table string) string { return sqlutil.QuoteID(fmt.Sprintf("zz_99_change_log_%s", table)) } -// Start intruments and begins tracking changes to the DB. +// Start instruments and begins tracking changes to the DB. func (l *LogicalReplicator) Start(ctx context.Context) error { l.printf(ctx, "enabling logical replication...") _, err := l.srcConn.Exec(ctx, changelogQuery) From 40ce73c725ce45c1110cecaff23f884fc1de5940 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 1 Aug 2022 11:31:37 -0500 Subject: [PATCH 146/225] document logical sync process --- swo/swosync/logicalsync.go | 59 +++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/swo/swosync/logicalsync.go b/swo/swosync/logicalsync.go index 5cb285a56f..88d47aa97d 100644 --- a/swo/swosync/logicalsync.go +++ b/swo/swosync/logicalsync.go @@ -10,17 +10,54 @@ import ( ) /* - 1. Read all changes (table and row ids) - 2. Fetch all rows from each table - 3. Insert missing rows (table-order) - 4. Update existing rows (table-order) - 5. Delete rows that are no longer in the source database (reverse-table-order) - - Round Trips (normal sync): - - 1 to start tx and read all changes - - 1 to fetch all rows from each table (single batch, 1 query per table) & commit - - 1 to delete all change rows from the DB and commit (background) - - 1 for all updates to new DB + # Logical Sync + + ## Theory of operation + + All changes (INSERT, UPDATE, DELETE) are recorded by triggers in the change_log table as + table/row_id pairs, only tracking a set of changed rows (but not their point-in-time data). + The changes are then read in and applied in batches, by reading the CURRENT state of the row + from the source database and writing it to the destination database, at the time of sync. + + This avoids the need to attempt to find a sequential solution to concurrent updates, as well as + intermediate row states, by only syncing the final result. It also avoids the need to record + intermediate updates. + + As an example, if a row is inserted and then updated multiple times, the next sync will result in + a single insert. + + The process depends on having a valid & consistent view of the source database which can be + obtained by by a serializable transaction. Since only the final state of data is used, dependency + solving/ordering for concurrent updates is not necessary. + + ## Basic strategy + + 1. Read all changes as table and row ids + 2. Fetch row data for each changed row + 3. Insert rows from old DB that are missing in new DB, in fkey-dependency order + 4. Update rows from old DB that exist in both, in fkey-dependency order + 5. Delete rows missing from old DB that exist in new DB, in reverse-fkey-dependency order + 6. Delete synced entries from change_log table + + ### Further Notes + + It is important to keep the sync loop as tight as is possible, particularly in "final sync" mode. + When performing the final sync, the database will be locked for the full duration so no additional + changes can be made. This is necessary to ensure that the database is in a consistent state with no + leftover changes before switchover state is updated to `use_next_db`. + + A commit to the source DB ensures the Serializable state of the transaction is maintained, and is + done AFTER sending changes to the new DB as the final sync also points to the new one. + + Round Trips: + - 1 to start tx, read all change ids & sequences (also stop-the-world lock in final mode) + - 1 to fetch row data from each table (single batch, 1 query per table) + - 1 to apply all updates to new DB + - 1 to commit src tx (also switches over to new DB in final mode) + - 1 to delete all synced change rows from the DB + + There is an extra round-trip for last delete as a tradoff to favor shorter stop-the-world time, + since deleting the last set of changes isn't necessary to wait for after the switchover has been made. */ // LogicalSync will sync the source database to the destination database as fast as possible. From b570c97ec919608eed7f436ee5278019efc87996 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 1 Aug 2022 11:51:12 -0500 Subject: [PATCH 147/225] update swosync docs --- swo/swosync/doc.go | 50 +++++++++++++++++++++++++++++++ swo/swosync/initialsync.go | 2 +- swo/swosync/logicalreplicator.go | 12 ++++++-- swo/swosync/logicalsync.go | 51 -------------------------------- swo/swosync/reset.go | 1 + swo/swosync/rowset.go | 10 +++---- swo/swosync/safety.go | 4 +-- swo/swosync/sequencesync.go | 2 ++ swo/swosync/tablesync.go | 22 +++++++------- 9 files changed, 82 insertions(+), 72 deletions(-) create mode 100644 swo/swosync/doc.go diff --git a/swo/swosync/doc.go b/swo/swosync/doc.go new file mode 100644 index 0000000000..1e537c8a8d --- /dev/null +++ b/swo/swosync/doc.go @@ -0,0 +1,50 @@ +/* +Package swosync handles the logical replication from the source DB to the destination database during switchover. + +Theory of operation + +All changes (INSERT, UPDATE, DELETE) are recorded by triggers in the change_log table as +table/row_id pairs, only tracking a set of changed rows (but not their point-in-time data). +The changes are then read in and applied in batches, by reading the CURRENT state of the row +from the source database and writing it to the destination database, at the time of sync. + +This avoids the need to attempt to find a sequential solution to concurrent updates, as well as +intermediate row states, by only syncing the final result. It also avoids the need to record +intermediate updates. + +As an example, if a row is inserted and then updated multiple times, the next sync will result in +a single insert. + +The process depends on having a valid & consistent view of the source database which can be +obtained by by a serializable transaction. Since only the final state of data is used, dependency +solving/ordering for concurrent updates is not necessary. + +Basic strategy: + 1. Read all changes as table and row ids + 2. Fetch row data for each changed row + 3. Insert rows from old DB that are missing in new DB, in fkey-dependency order + 4. Update rows from old DB that exist in both, in fkey-dependency order + 5. Delete rows missing from old DB that exist in new DB, in reverse-fkey-dependency order + 6. Delete synced entries from change_log table + +Further Notes + +It is important to keep the sync loop as tight as is possible, particularly in "final sync" mode. +When performing the final sync, the database will be locked for the full duration so no additional +changes can be made. This is necessary to ensure that the database is in a consistent state with no +leftover changes before switchover state is updated to `use_next_db`. + +A commit to the source DB ensures the Serializable state of the transaction is maintained, and is +done AFTER sending changes to the new DB as the final sync also points to the new one. + +Round Trips: + - 1 to start tx, read all change ids & sequences (also stop-the-world lock in final mode) + - 1 to fetch row data from each table (single batch, 1 query per table) + - 1 to apply all updates to new DB + - 1 to commit src tx (also switches over to new DB in final mode) + - 1 to delete all synced change rows from the DB + +There is an extra round-trip for last delete as a tradoff to favor shorter stop-the-world time, +since deleting the last set of changes isn't necessary to wait for after the switchover has been made. +*/ +package swosync diff --git a/swo/swosync/initialsync.go b/swo/swosync/initialsync.go index 4edb335101..495823fb81 100644 --- a/swo/swosync/initialsync.go +++ b/swo/swosync/initialsync.go @@ -98,7 +98,7 @@ func (l *LogicalReplicator) initialSyncTable(ctx context.Context, srcTx, dstTx p return 0, fmt.Errorf("scan: %w", err) } insertRows = append(insertRows, rowData) - l.dstRows.Set(RowID{table.Name(), id}) + l.dstRows.Set(changeID{table.Name(), id}) if len(insertRows) < 10000 { continue diff --git a/swo/swosync/logicalreplicator.go b/swo/swosync/logicalreplicator.go index 9ef0d16735..d4b6fa41ee 100644 --- a/swo/swosync/logicalreplicator.go +++ b/swo/swosync/logicalreplicator.go @@ -7,6 +7,7 @@ import ( "github.com/target/goalert/swo/swoinfo" ) +// LogicalReplicator manages syncronizing the source database to the destination database. type LogicalReplicator struct { srcConn *pgx.Conn dstConn *pgx.Conn @@ -16,18 +17,23 @@ type LogicalReplicator struct { progFn func(ctx context.Context, format string, args ...interface{}) - dstRows RowSet + dstRows rowSet } +// NewLogicalReplicator creates a new LogicalReplicator. func NewLogicalReplicator() *LogicalReplicator { return &LogicalReplicator{ - dstRows: make(RowSet), + dstRows: make(rowSet), } } -func (l *LogicalReplicator) SetSourceDB(db *pgx.Conn) { l.srcConn = db } +// SetSourceDB sets the source database and must be called before Start. +func (l *LogicalReplicator) SetSourceDB(db *pgx.Conn) { l.srcConn = db } + +// SetDestinationDB sets the destination database and must be called before Start. func (l *LogicalReplicator) SetDestinationDB(db *pgx.Conn) { l.dstConn = db } +// SetProgressFunc sets the function to call when progress is made, such as the currently syncing table. func (l *LogicalReplicator) SetProgressFunc(fn func(ctx context.Context, format string, args ...interface{})) { l.progFn = fn } diff --git a/swo/swosync/logicalsync.go b/swo/swosync/logicalsync.go index 88d47aa97d..0e04853fa2 100644 --- a/swo/swosync/logicalsync.go +++ b/swo/swosync/logicalsync.go @@ -9,57 +9,6 @@ import ( "github.com/target/goalert/util/sqlutil" ) -/* - # Logical Sync - - ## Theory of operation - - All changes (INSERT, UPDATE, DELETE) are recorded by triggers in the change_log table as - table/row_id pairs, only tracking a set of changed rows (but not their point-in-time data). - The changes are then read in and applied in batches, by reading the CURRENT state of the row - from the source database and writing it to the destination database, at the time of sync. - - This avoids the need to attempt to find a sequential solution to concurrent updates, as well as - intermediate row states, by only syncing the final result. It also avoids the need to record - intermediate updates. - - As an example, if a row is inserted and then updated multiple times, the next sync will result in - a single insert. - - The process depends on having a valid & consistent view of the source database which can be - obtained by by a serializable transaction. Since only the final state of data is used, dependency - solving/ordering for concurrent updates is not necessary. - - ## Basic strategy - - 1. Read all changes as table and row ids - 2. Fetch row data for each changed row - 3. Insert rows from old DB that are missing in new DB, in fkey-dependency order - 4. Update rows from old DB that exist in both, in fkey-dependency order - 5. Delete rows missing from old DB that exist in new DB, in reverse-fkey-dependency order - 6. Delete synced entries from change_log table - - ### Further Notes - - It is important to keep the sync loop as tight as is possible, particularly in "final sync" mode. - When performing the final sync, the database will be locked for the full duration so no additional - changes can be made. This is necessary to ensure that the database is in a consistent state with no - leftover changes before switchover state is updated to `use_next_db`. - - A commit to the source DB ensures the Serializable state of the transaction is maintained, and is - done AFTER sending changes to the new DB as the final sync also points to the new one. - - Round Trips: - - 1 to start tx, read all change ids & sequences (also stop-the-world lock in final mode) - - 1 to fetch row data from each table (single batch, 1 query per table) - - 1 to apply all updates to new DB - - 1 to commit src tx (also switches over to new DB in final mode) - - 1 to delete all synced change rows from the DB - - There is an extra round-trip for last delete as a tradoff to favor shorter stop-the-world time, - since deleting the last set of changes isn't necessary to wait for after the switchover has been made. -*/ - // LogicalSync will sync the source database to the destination database as fast as possible. func (l *LogicalReplicator) LogicalSync(ctx context.Context) error { return l.doSync(ctx, false) } diff --git a/swo/swosync/reset.go b/swo/swosync/reset.go index 358760546b..2d5300846a 100644 --- a/swo/swosync/reset.go +++ b/swo/swosync/reset.go @@ -10,6 +10,7 @@ import ( "github.com/target/goalert/util/sqlutil" ) +// Reset disables tracking changes and truncates the tables in the destination database. func (l *LogicalReplicator) Reset(ctx context.Context) error { l.printf(ctx, "disabling logical replication...") diff --git a/swo/swosync/rowset.go b/swo/swosync/rowset.go index 813547ab8a..b68ae57d42 100644 --- a/swo/swosync/rowset.go +++ b/swo/swosync/rowset.go @@ -1,14 +1,14 @@ package swosync type ( - RowSet map[RowID]struct{} - RowID struct{ Table, Row string } + rowSet map[changeID]struct{} + changeID struct{ Table, Row string } ) -func (r RowSet) Set(id RowID) { r[id] = struct{}{} } -func (r RowSet) Delete(id RowID) { delete(r, id) } +func (r rowSet) Set(id changeID) { r[id] = struct{}{} } +func (r rowSet) Delete(id changeID) { delete(r, id) } -func (r RowSet) Has(id RowID) bool { +func (r rowSet) Has(id changeID) bool { _, ok := r[id] return ok } diff --git a/swo/swosync/safety.go b/swo/swosync/safety.go index f248faaf15..a23f64cbdc 100644 --- a/swo/swosync/safety.go +++ b/swo/swosync/safety.go @@ -1,3 +1,5 @@ +package swosync + /* Locks: - 4919: migration lock, used to ensure only a single instance is performing migrations (or any sync operations) @@ -5,8 +7,6 @@ during the switchover, an exclusive lock is acquired by the executing node (stop-the-world). */ -package swosync - // txInProgressLock will cause the transaction to abort if it's unable to get // the exec lock and/or switchover state is not currently in_progress const txInProgressLock = ` diff --git a/swo/swosync/sequencesync.go b/swo/swosync/sequencesync.go index eb43649eff..5f29b2bbfb 100644 --- a/swo/swosync/sequencesync.go +++ b/swo/swosync/sequencesync.go @@ -7,12 +7,14 @@ import ( "github.com/target/goalert/util/sqlutil" ) +// SequenceSync is a helper for synchronizing sequences. type SequenceSync struct { names []string lastValue []int64 isCalled []bool } +// NewSequenceSync creates a new SequenceSync for the given sequence names. func NewSequenceSync(names []string) *SequenceSync { return &SequenceSync{names: names} } diff --git a/swo/swosync/tablesync.go b/swo/swosync/tablesync.go index 0702b0a37e..59c5cb7aba 100644 --- a/swo/swosync/tablesync.go +++ b/swo/swosync/tablesync.go @@ -13,24 +13,26 @@ import ( "github.com/target/goalert/util/sqlutil" ) +// TableSync is a helper for syncing tables from the source database to the target database. type TableSync struct { tables []swoinfo.Table changes []changeEntry changedTables []string - changedData map[RowID]json.RawMessage + changedData map[changeID]json.RawMessage } type changeEntry struct { id int64 - RowID + changeID } +// NewTableSync creates a new TableSync for the given tables. func NewTableSync(tables []swoinfo.Table) *TableSync { return &TableSync{ tables: tables, - changedData: make(map[RowID]json.RawMessage), + changedData: make(map[changeID]json.RawMessage), } } @@ -55,7 +57,7 @@ func (c *TableSync) ScanBatchChangeRead(res pgx.BatchResults) error { return err } - c.changes = append(c.changes, changeEntry{id: id, RowID: RowID{table, rowID}}) + c.changes = append(c.changes, changeEntry{id: id, changeID: changeID{table, rowID}}) } return rows.Err() @@ -135,7 +137,7 @@ func (c *TableSync) ScanBatchRowReads(res pgx.BatchResults) error { return fmt.Errorf("scan changed rows from %s: %w", tableName, err) } - c.changedData[RowID{tableName, id}] = row + c.changedData[changeID{tableName, id}] = row } } @@ -160,7 +162,7 @@ func (c *TableSync) ExecDeleteChanges(ctx context.Context, srcConn *pgx.Conn) (i return int64(len(ids)), nil } -func (c *TableSync) AddBatchWrites(b *pgx.Batch, dstRows RowSet) { +func (c *TableSync) AddBatchWrites(b *pgx.Batch, dstRows rowSet) { type pending struct { inserts []json.RawMessage updates []json.RawMessage @@ -173,20 +175,20 @@ func (c *TableSync) AddBatchWrites(b *pgx.Batch, dstRows RowSet) { p = &pending{} pendingByTable[chg.Table] = p } - newRowData := c.changedData[chg.RowID] + newRowData := c.changedData[chg.changeID] if newRowData == nil { // row was deleted - dstRows.Delete(chg.RowID) + dstRows.Delete(chg.changeID) p.deletes = append(p.deletes, chg.Row) continue } - if dstRows.Has(chg.RowID) { + if dstRows.Has(chg.changeID) { // row was updated p.updates = append(p.updates, newRowData) } else { // row was inserted - dstRows.Set(chg.RowID) + dstRows.Set(chg.changeID) p.inserts = append(p.inserts, newRowData) } } From 152c10887658bdaecdc05a895e03a23cdde07cb4 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 1 Aug 2022 11:57:11 -0500 Subject: [PATCH 148/225] just use readme --- swo/swosync/{doc.go => README.md} | 39 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) rename swo/swosync/{doc.go => README.md} (65%) diff --git a/swo/swosync/doc.go b/swo/swosync/README.md similarity index 65% rename from swo/swosync/doc.go rename to swo/swosync/README.md index 1e537c8a8d..102e7b7b44 100644 --- a/swo/swosync/doc.go +++ b/swo/swosync/README.md @@ -1,7 +1,8 @@ -/* -Package swosync handles the logical replication from the source DB to the destination database during switchover. +# Logical Sync -Theory of operation +Package `swosync` handles the logical replication from the source DB to the destination database during switchover. + +## Theory of operation All changes (INSERT, UPDATE, DELETE) are recorded by triggers in the change_log table as table/row_id pairs, only tracking a set of changed rows (but not their point-in-time data). @@ -19,15 +20,16 @@ The process depends on having a valid & consistent view of the source database w obtained by by a serializable transaction. Since only the final state of data is used, dependency solving/ordering for concurrent updates is not necessary. -Basic strategy: - 1. Read all changes as table and row ids - 2. Fetch row data for each changed row - 3. Insert rows from old DB that are missing in new DB, in fkey-dependency order - 4. Update rows from old DB that exist in both, in fkey-dependency order - 5. Delete rows missing from old DB that exist in new DB, in reverse-fkey-dependency order - 6. Delete synced entries from change_log table +### Basic strategy + +1. Read all changes as table and row ids +2. Fetch row data for each changed row +3. Insert rows from old DB that are missing in new DB, in fkey-dependency order +4. Update rows from old DB that exist in both, in fkey-dependency order +5. Delete rows missing from old DB that exist in new DB, in reverse-fkey-dependency order +6. Delete synced entries from change_log table -Further Notes +## Further Notes It is important to keep the sync loop as tight as is possible, particularly in "final sync" mode. When performing the final sync, the database will be locked for the full duration so no additional @@ -37,14 +39,13 @@ leftover changes before switchover state is updated to `use_next_db`. A commit to the source DB ensures the Serializable state of the transaction is maintained, and is done AFTER sending changes to the new DB as the final sync also points to the new one. -Round Trips: - - 1 to start tx, read all change ids & sequences (also stop-the-world lock in final mode) - - 1 to fetch row data from each table (single batch, 1 query per table) - - 1 to apply all updates to new DB - - 1 to commit src tx (also switches over to new DB in final mode) - - 1 to delete all synced change rows from the DB +### Round Trips + +- 1 to start tx, read all change ids & sequences (also stop-the-world lock in final mode) +- 1 to fetch row data from each table (single batch, 1 query per table) +- 1 to apply all updates to new DB +- 1 to commit src tx (also switches over to new DB in final mode) +- 1 to delete all synced change rows from the DB There is an extra round-trip for last delete as a tradoff to favor shorter stop-the-world time, since deleting the last set of changes isn't necessary to wait for after the switchover has been made. -*/ -package swosync From 5f4d9546132af7f7ca9789aced990de869d515e6 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 1 Aug 2022 12:23:30 -0500 Subject: [PATCH 149/225] more docs --- swo/swogrp/README.md | 66 ++++++++++++++++++++++++++++++++++++++++++++ swo/swomsg/log.go | 2 ++ 2 files changed, 68 insertions(+) create mode 100644 swo/swogrp/README.md diff --git a/swo/swogrp/README.md b/swo/swogrp/README.md new file mode 100644 index 0000000000..b0eed982a2 --- /dev/null +++ b/swo/swogrp/README.md @@ -0,0 +1,66 @@ +# SWO Group + +The `swogrp` package handles orchestrating the state and transitions of the SWO process. The state of the cluster can be determined by following the sequence in the message log, which is the source of truth. + +## Cluster State + +```mermaid +sequenceDiagram +actor admin as Admin +participant api as API Node +participant log as Message Log +participant engine as Engine Node + + +note over admin,engine: Cluster State: **Unknown** +admin ->> api : Click(Reset) +activate api +api ->> log : "cancel" +api ->> api : DisableTriggers() +api ->> log : "reset" +api -->> admin: OK +deactivate api + +note over admin,engine: Cluster State: **Resetting** + +engine ->> log: "hello" +activate engine +note over engine: Becomes Leader +api ->> log: "hello" +engine ->> engine: Wait 3s for "hello" messages +engine ->> log: "reset-end" +deactivate engine +note over admin,engine: Cluster State: **Idle** + +admin ->> api: Click(Execute) +activate api +api ->> log: "execute" +api -->> admin: OK +deactivate api + +note over admin,engine: Cluster State: **Syncing** + +log -->> engine: "execute" +activate engine +engine ->> engine: EnableTriggers() +engine ->> engine: InitialSync() +engine ->> engine: LogicalSync() x10 +engine ->> log: "pause" +deactivate engine + +note over admin,engine: Cluster State: **Pausing** +engine ->> engine: Pause() +engine ->> log: "paused" +api ->> api: Pause() +api ->> log: "paused" + +note over admin,engine: Cluster State: **Executing** +log -->> engine: 2/2 "paused" +activate engine +engine ->> engine: LogicalSync() x10 +engine ->> engine: FinalSync() +engine ->> log: "done" +deactivate engine + +note over admin,engine: Cluster State: **Done** +``` diff --git a/swo/swomsg/log.go b/swo/swomsg/log.go index 299ec7f327..0efaac75c7 100644 --- a/swo/swomsg/log.go +++ b/swo/swomsg/log.go @@ -102,6 +102,8 @@ func (l *Log) loadEvents(ctx context.Context, lastID int64) ([]swodb.SwitchoverL return swodb.New(conn).LogEvents(ctx, lastID) } +// Append will append a message to the end of the log. Using an exclusive lock on the table, it ensures that each message will increment the log ID +// by exactly 1 with no gaps. All observers will see the messages in the same order. func (l *Log) Append(ctx context.Context, msg Message) error { data, err := json.Marshal(msg) if err != nil { From c605c294f05c84cd4fc599ee4d4f59b23c9e03e9 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 8 Aug 2022 10:38:39 -0500 Subject: [PATCH 150/225] replace slowproxy with speedbump --- Makefile | 2 +- Procfile.swo | 2 +- devtools/slowproxy/main.go | 53 ------------- devtools/slowproxy/ratelimiter.go | 76 ------------------- devtools/tools.go | 1 + go.mod | 11 ++- go.sum | 13 +++- ....sql => 20220808103727-switchover-mk2.sql} | 0 8 files changed, 24 insertions(+), 134 deletions(-) delete mode 100644 devtools/slowproxy/main.go delete mode 100644 devtools/slowproxy/ratelimiter.go rename migrate/migrations/{20220628125954-switchover-mk2.sql => 20220808103727-switchover-mk2.sql} (100%) diff --git a/Makefile b/Makefile index 2503b824dd..06c21931f3 100644 --- a/Makefile +++ b/Makefile @@ -207,7 +207,7 @@ postgres: bin/waitfor -e POSTGRES_HOST_AUTH_METHOD=trust \ --name goalert-postgres \ -p 5432:5432 \ - docker.io/library/postgres:13-alpine && ./bin/waitfor "$(DB_URL)" && make regendb) || $(CONTAINER_TOOL) start goalert-postgres + docker.io/library/postgres:13-alpine && ./bin/waitfor "$(DB_URL)" && make regendb) || ($(CONTAINER_TOOL) start goalert-postgres && ./bin/waitfor "$(DB_URL)") regendb: bin/resetdb bin/goalert config.json.bak ./bin/resetdb -with-rand-data -admin-id=00000000-0000-0000-0000-000000000001 diff --git a/Procfile.swo b/Procfile.swo index 52f02cf366..74b2b2d494 100644 --- a/Procfile.swo +++ b/Procfile.swo @@ -34,4 +34,4 @@ ga9: ./bin/goalert -l=localhost:3057 --ui-dir=web/src/build --db-url=postgres:// proxy: go run ./devtools/simpleproxy -addr localhost:3030 /=http://localhost:3040,http://localhost:3050,http://localhost:3051,http://localhost:3052,http://localhost:3053,http://localhost:3054,http://localhost:3055,http://localhost:3056,http://localhost:3057 -slow: go run ./devtools/slowproxy/ -d 25ms -i 1000000 -o 1000000 -j 10ms +slow: go run github.com/kffl/speedbump --port=5435 --latency=10ms --saw-amplitude=25ms --saw-period=1s localhost:5432 diff --git a/devtools/slowproxy/main.go b/devtools/slowproxy/main.go deleted file mode 100644 index 968a642316..0000000000 --- a/devtools/slowproxy/main.go +++ /dev/null @@ -1,53 +0,0 @@ -package main - -import ( - "flag" - "io" - "log" - "net" -) - -func main() { - rateOut := flag.Int("o", 0, "Max data rate (in bytes/sec) from client to server.") - rateIn := flag.Int("i", 0, "Max data rate (in bytes/sec) from server to client.") - latency := flag.Duration("d", 0, "Min latency (one-way).") - jitter := flag.Duration("j", 0, "Jitter in (random +/- to latency).") - l := flag.String("l", "localhost:5435", "Listen address.") - c := flag.String("c", "localhost:5432", "Server connect address.") - flag.Parse() - log.SetFlags(log.Lshortfile) - - limitOut := newRateLimiter(*rateOut, *latency, *jitter) - limitIn := newRateLimiter(*rateIn, *latency, *jitter) - - srv, err := net.Listen("tcp", *l) - if err != nil { - log.Fatal(err) - } - - proxy := func(dst, src net.Conn, limiter *rateLimiter) { - defer dst.Close() - defer src.Close() - - io.Copy(limiter.NewWriter(dst), src) - } - - for { - conn, err := srv.Accept() - if err != nil { - log.Fatal(err) - } - go func() { - dbConn, err := net.Dial("tcp", *c) - if err != nil { - log.Println("connect error:", err) - conn.Close() - return - } - - log.Println("CONNECT", conn.RemoteAddr().String()) - go proxy(conn, dbConn, limitOut) - go proxy(dbConn, conn, limitIn) - }() - } -} diff --git a/devtools/slowproxy/ratelimiter.go b/devtools/slowproxy/ratelimiter.go deleted file mode 100644 index 65708eda51..0000000000 --- a/devtools/slowproxy/ratelimiter.go +++ /dev/null @@ -1,76 +0,0 @@ -package main - -import ( - "io" - "math/rand" - "time" -) - -type rateLimiter struct { - bucket chan int - rate bool - latency time.Duration - jitter time.Duration -} - -func newRateLimiter(bps int, latency, jitter time.Duration) *rateLimiter { - ch := make(chan int) - - bpMs := float64(bps) / 1000 - go func() { - t := time.NewTicker(time.Millisecond) - var count float64 - for { - if count >= bpMs { - <-t.C - count -= bpMs - if count < 0 { - count = 0 - } - continue - } - - select { - case <-t.C: - count -= bpMs - if count < 0 { - count = 0 - } - case val := <-ch: - count += float64(val) - } - } - }() - - return &rateLimiter{ - rate: bps > 0, - bucket: ch, - latency: latency, - jitter: jitter, - } -} - -func (r *rateLimiter) WaitFor(count int) time.Duration { - waitUntil := time.Now().Add((r.latency - (r.jitter / 2) + time.Duration(rand.Float64()*float64(r.jitter)))) - r.bucket <- count - return time.Until(waitUntil) / 2 -} - -func (r *rateLimiter) NewWriter(w io.Writer) io.Writer { - return &rateLimitWriter{ - w: w, - l: r, - } -} - -type rateLimitWriter struct { - l *rateLimiter - w io.Writer -} - -func (w *rateLimitWriter) Write(p []byte) (int, error) { - dur := w.l.WaitFor(len(p)) - time.Sleep(dur) - defer time.Sleep(dur) - return w.w.Write(p) -} diff --git a/devtools/tools.go b/devtools/tools.go index ca98374270..6cb360d281 100644 --- a/devtools/tools.go +++ b/devtools/tools.go @@ -6,6 +6,7 @@ package devtools import ( _ "github.com/99designs/gqlgen" _ "github.com/gordonklaus/ineffassign" + _ "github.com/kffl/speedbump" _ "github.com/kyleconroy/sqlc/cmd/sqlc" _ "github.com/mailhog/MailHog" _ "golang.org/x/tools/cmd/goimports" diff --git a/go.mod b/go.mod index 5eb2079917..0b66c27649 100644 --- a/go.mod +++ b/go.mod @@ -35,7 +35,7 @@ require ( github.com/slack-go/slack v0.10.3 github.com/spf13/cobra v1.4.0 github.com/spf13/viper v1.12.0 - github.com/stretchr/testify v1.7.2 + github.com/stretchr/testify v1.8.0 github.com/ttacon/libphonenumber v1.2.1 github.com/vektah/gqlparser/v2 v2.4.4 golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e @@ -54,7 +54,10 @@ require ( honnef.co/go/tools v0.3.2 ) -require github.com/kyleconroy/sqlc v1.14.0 +require ( + github.com/kffl/speedbump v0.2.0 + github.com/kyleconroy/sqlc v1.14.0 +) require ( github.com/BurntSushi/toml v1.1.0 // indirect @@ -63,6 +66,8 @@ require ( github.com/Masterminds/sprig v2.22.0+incompatible // indirect github.com/PuerkitoBio/goquery v1.8.0 // indirect github.com/agnivade/levenshtein v1.1.1 // indirect + github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect + github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect github.com/andybalholm/cascadia v1.3.1 // indirect github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20220209173558-ad29539cd2e9 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -77,6 +82,7 @@ require ( github.com/gorilla/mux v1.8.0 // indirect github.com/gorilla/pat v1.0.1 // indirect github.com/gorilla/websocket v1.5.0 // indirect + github.com/hashicorp/go-hclog v1.2.1 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/huandu/xstrings v1.3.2 // indirect @@ -144,6 +150,7 @@ require ( golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect golang.org/x/text v0.3.7 // indirect google.golang.org/appengine v1.6.7 // indirect + gopkg.in/alecthomas/kingpin.v2 v2.2.6 // indirect gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect gopkg.in/ini.v1 v1.66.6 // indirect gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect diff --git a/go.sum b/go.sum index 3d3dd49182..f3b7d17edb 100644 --- a/go.sum +++ b/go.sum @@ -65,10 +65,13 @@ github.com/agnivade/levenshtein v1.0.1/go.mod h1:CURSv5d9Uaml+FovSIICkLbAUZ9S4Rq github.com/agnivade/levenshtein v1.1.1 h1:QY8M92nrzkmr798gCo3kmMyqXFzdQVpxLlGPRBij0P8= github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= +github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= @@ -286,6 +289,8 @@ github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBt github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-hclog v1.2.1 h1:YQsLlGDJgwhXFpucSPyVbCBviQtjlHv3jLTlp8YmtEw= +github.com/hashicorp/go-hclog v1.2.1/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= @@ -407,6 +412,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/karrick/godirwalk v1.16.1/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk= github.com/kevinmbeaulieu/eq-go v1.0.0/go.mod h1:G3S8ajA56gKBZm4UB9AOyoOS37JO3roToPzKNM8dtdM= +github.com/kffl/speedbump v0.2.0 h1:SEmqKa9DIcN2FZaFeT8raG7noAhcWu2vdBSE8DhqqEs= +github.com/kffl/speedbump v0.2.0/go.mod h1:6nNWIwc8zM0l41fIArBiVdvcomulEd8v5RX9YBjJoQ4= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -626,6 +633,7 @@ github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02n github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= @@ -633,8 +641,9 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.2 h1:4jaiDzPyXQvSd7D0EjG45355tLlV3VOECpq10pLC+8s= github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= +github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/subosito/gotenv v1.4.0 h1:yAzM1+SmVcz5R4tXGsNMu1jUl2aOJXoiWUCEwwnGrvs= github.com/subosito/gotenv v1.4.0/go.mod h1:mZd6rFysKEcUhUHXJk0C/08wAgyDBFuwEYL7vWWGaGo= @@ -909,6 +918,7 @@ golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a h1:dGzPydgVsqGcTRVwiLJ1jVbufYwmzD3LfVPLKsKg+0k= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= @@ -1115,6 +1125,7 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc h1:2gGKlE2+asNV9m7xrywl36YYNnBG5ZQ0r/BOOxqPpmk= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk= diff --git a/migrate/migrations/20220628125954-switchover-mk2.sql b/migrate/migrations/20220808103727-switchover-mk2.sql similarity index 100% rename from migrate/migrations/20220628125954-switchover-mk2.sql rename to migrate/migrations/20220808103727-switchover-mk2.sql From f25fc7fad0499001c7d97ac868c6c05946adc417 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 8 Aug 2022 10:54:03 -0500 Subject: [PATCH 151/225] copy Alert color for bad app config --- web/src/app/admin/switchover/AdminSwitchover.tsx | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 9dca2fb392..f1e99e75f6 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -33,6 +33,7 @@ import AddIcon from '@mui/icons-material/PlaylistAdd' import DownIcon from '@mui/icons-material/ArrowDownward' import { TransitionGroup } from 'react-transition-group' import Spinner from '../../loading/components/Spinner' +import { darken, lighten } from '@mui/system' const query = gql` query { @@ -92,6 +93,8 @@ export default function AdminSwitchover(): JSX.Element { const [lastAction, setLastAction] = useState('') const [mutationStatus, commit] = useMutation(mutation) const theme = useTheme() + const getColor = theme.palette.mode === 'light' ? darken : lighten + const getBackgroundColor = theme.palette.mode === 'light' ? lighten : darken const curVer = data?.mainDBVersion.split(' on ') const nextVer = data?.mainDBVersion.split(' on ') @@ -305,11 +308,16 @@ export default function AdminSwitchover(): JSX.Element { {data?.connections?.map((row) => ( + row.name.includes('GoAlert') && !row.name.includes('SWO') + ? getBackgroundColor(theme.palette.error.light, 0.9) + : 'inherit', + color: (theme) => + row.name.includes('GoAlert') && !row.name.includes('SWO') + ? getColor(theme.palette.error.light, 0.6) + : 'inherit', }} > From 73be9f809904562285c1b26c9ca54780127ba793 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 8 Aug 2022 11:52:51 -0500 Subject: [PATCH 152/225] update mk and increase max wait --- Makefile.binaries.mk | 29 +++++------------------------ swo/swosync/safety.go | 2 +- 2 files changed, 6 insertions(+), 25 deletions(-) diff --git a/Makefile.binaries.mk b/Makefile.binaries.mk index 28b2e7ac1e..acfdfecea8 100644 --- a/Makefile.binaries.mk +++ b/Makefile.binaries.mk @@ -383,25 +383,6 @@ $(BIN_DIR)/windows-amd64/simpleproxy.exe: $(GO_DEPS) GOOS=windows GOARCH=amd64 go build -trimpath -o $@ ./devtools/simpleproxy -$(BIN_DIR)/slowproxy: $(GO_DEPS) - go build -o $@ ./devtools/slowproxy - -$(BIN_DIR)/darwin-amd64/slowproxy: $(GO_DEPS) - GOOS=darwin GOARCH=amd64 go build -trimpath -o $@ ./devtools/slowproxy - -$(BIN_DIR)/linux-amd64/slowproxy: $(GO_DEPS) - GOOS=linux GOARCH=amd64 go build -trimpath -o $@ ./devtools/slowproxy - -$(BIN_DIR)/linux-arm/slowproxy: $(GO_DEPS) - GOOS=linux GOARCH=arm GOARM=7 go build -trimpath -o $@ ./devtools/slowproxy - -$(BIN_DIR)/linux-arm64/slowproxy: $(GO_DEPS) - GOOS=linux GOARCH=arm64 go build -trimpath -o $@ ./devtools/slowproxy - -$(BIN_DIR)/windows-amd64/slowproxy.exe: $(GO_DEPS) - GOOS=windows GOARCH=amd64 go build -trimpath -o $@ ./devtools/slowproxy - - $(BIN_DIR)/waitfor: $(GO_DEPS) go build -o $@ ./devtools/waitfor @@ -423,27 +404,27 @@ $(BIN_DIR)/windows-amd64/waitfor.exe: $(GO_DEPS) -$(BIN_DIR)/darwin-amd64/_all: $(BIN_DIR)/darwin-amd64/goalert-smoketest $(BIN_DIR)/darwin-amd64/goalert $(BIN_DIR)/darwin-amd64/goalert-slack-email-sync $(BIN_DIR)/darwin-amd64/mockslack $(BIN_DIR)/darwin-amd64/pgdump-lite $(BIN_DIR)/darwin-amd64/pgmocktime $(BIN_DIR)/darwin-amd64/procwrap $(BIN_DIR)/darwin-amd64/psql-lite $(BIN_DIR)/darwin-amd64/resetdb $(BIN_DIR)/darwin-amd64/runproc $(BIN_DIR)/darwin-amd64/sendit $(BIN_DIR)/darwin-amd64/sendit-server $(BIN_DIR)/darwin-amd64/sendit-token $(BIN_DIR)/darwin-amd64/simpleproxy $(BIN_DIR)/darwin-amd64/slowproxy $(BIN_DIR)/darwin-amd64/waitfor +$(BIN_DIR)/darwin-amd64/_all: $(BIN_DIR)/darwin-amd64/goalert-smoketest $(BIN_DIR)/darwin-amd64/goalert $(BIN_DIR)/darwin-amd64/goalert-slack-email-sync $(BIN_DIR)/darwin-amd64/mockslack $(BIN_DIR)/darwin-amd64/pgdump-lite $(BIN_DIR)/darwin-amd64/pgmocktime $(BIN_DIR)/darwin-amd64/procwrap $(BIN_DIR)/darwin-amd64/psql-lite $(BIN_DIR)/darwin-amd64/resetdb $(BIN_DIR)/darwin-amd64/runproc $(BIN_DIR)/darwin-amd64/sendit $(BIN_DIR)/darwin-amd64/sendit-server $(BIN_DIR)/darwin-amd64/sendit-token $(BIN_DIR)/darwin-amd64/simpleproxy $(BIN_DIR)/darwin-amd64/waitfor $(BIN_DIR)/darwin-amd64/goalert-smoketest: $(GO_DEPS) GOOS=darwin GOARCH=amd64 go test ./smoketest -c -o $@ -$(BIN_DIR)/linux-amd64/_all: $(BIN_DIR)/linux-amd64/goalert-smoketest $(BIN_DIR)/linux-amd64/goalert $(BIN_DIR)/linux-amd64/goalert-slack-email-sync $(BIN_DIR)/linux-amd64/mockslack $(BIN_DIR)/linux-amd64/pgdump-lite $(BIN_DIR)/linux-amd64/pgmocktime $(BIN_DIR)/linux-amd64/procwrap $(BIN_DIR)/linux-amd64/psql-lite $(BIN_DIR)/linux-amd64/resetdb $(BIN_DIR)/linux-amd64/runproc $(BIN_DIR)/linux-amd64/sendit $(BIN_DIR)/linux-amd64/sendit-server $(BIN_DIR)/linux-amd64/sendit-token $(BIN_DIR)/linux-amd64/simpleproxy $(BIN_DIR)/linux-amd64/slowproxy $(BIN_DIR)/linux-amd64/waitfor +$(BIN_DIR)/linux-amd64/_all: $(BIN_DIR)/linux-amd64/goalert-smoketest $(BIN_DIR)/linux-amd64/goalert $(BIN_DIR)/linux-amd64/goalert-slack-email-sync $(BIN_DIR)/linux-amd64/mockslack $(BIN_DIR)/linux-amd64/pgdump-lite $(BIN_DIR)/linux-amd64/pgmocktime $(BIN_DIR)/linux-amd64/procwrap $(BIN_DIR)/linux-amd64/psql-lite $(BIN_DIR)/linux-amd64/resetdb $(BIN_DIR)/linux-amd64/runproc $(BIN_DIR)/linux-amd64/sendit $(BIN_DIR)/linux-amd64/sendit-server $(BIN_DIR)/linux-amd64/sendit-token $(BIN_DIR)/linux-amd64/simpleproxy $(BIN_DIR)/linux-amd64/waitfor $(BIN_DIR)/linux-amd64/goalert-smoketest: $(GO_DEPS) GOOS=linux GOARCH=amd64 go test ./smoketest -c -o $@ -$(BIN_DIR)/linux-arm/_all: $(BIN_DIR)/linux-arm/goalert-smoketest $(BIN_DIR)/linux-arm/goalert $(BIN_DIR)/linux-arm/goalert-slack-email-sync $(BIN_DIR)/linux-arm/mockslack $(BIN_DIR)/linux-arm/pgdump-lite $(BIN_DIR)/linux-arm/pgmocktime $(BIN_DIR)/linux-arm/procwrap $(BIN_DIR)/linux-arm/psql-lite $(BIN_DIR)/linux-arm/resetdb $(BIN_DIR)/linux-arm/runproc $(BIN_DIR)/linux-arm/sendit $(BIN_DIR)/linux-arm/sendit-server $(BIN_DIR)/linux-arm/sendit-token $(BIN_DIR)/linux-arm/simpleproxy $(BIN_DIR)/linux-arm/slowproxy $(BIN_DIR)/linux-arm/waitfor +$(BIN_DIR)/linux-arm/_all: $(BIN_DIR)/linux-arm/goalert-smoketest $(BIN_DIR)/linux-arm/goalert $(BIN_DIR)/linux-arm/goalert-slack-email-sync $(BIN_DIR)/linux-arm/mockslack $(BIN_DIR)/linux-arm/pgdump-lite $(BIN_DIR)/linux-arm/pgmocktime $(BIN_DIR)/linux-arm/procwrap $(BIN_DIR)/linux-arm/psql-lite $(BIN_DIR)/linux-arm/resetdb $(BIN_DIR)/linux-arm/runproc $(BIN_DIR)/linux-arm/sendit $(BIN_DIR)/linux-arm/sendit-server $(BIN_DIR)/linux-arm/sendit-token $(BIN_DIR)/linux-arm/simpleproxy $(BIN_DIR)/linux-arm/waitfor $(BIN_DIR)/linux-arm/goalert-smoketest: $(GO_DEPS) GOOS=linux GOARCH=arm GOARM=7 go test ./smoketest -c -o $@ -$(BIN_DIR)/linux-arm64/_all: $(BIN_DIR)/linux-arm64/goalert-smoketest $(BIN_DIR)/linux-arm64/goalert $(BIN_DIR)/linux-arm64/goalert-slack-email-sync $(BIN_DIR)/linux-arm64/mockslack $(BIN_DIR)/linux-arm64/pgdump-lite $(BIN_DIR)/linux-arm64/pgmocktime $(BIN_DIR)/linux-arm64/procwrap $(BIN_DIR)/linux-arm64/psql-lite $(BIN_DIR)/linux-arm64/resetdb $(BIN_DIR)/linux-arm64/runproc $(BIN_DIR)/linux-arm64/sendit $(BIN_DIR)/linux-arm64/sendit-server $(BIN_DIR)/linux-arm64/sendit-token $(BIN_DIR)/linux-arm64/simpleproxy $(BIN_DIR)/linux-arm64/slowproxy $(BIN_DIR)/linux-arm64/waitfor +$(BIN_DIR)/linux-arm64/_all: $(BIN_DIR)/linux-arm64/goalert-smoketest $(BIN_DIR)/linux-arm64/goalert $(BIN_DIR)/linux-arm64/goalert-slack-email-sync $(BIN_DIR)/linux-arm64/mockslack $(BIN_DIR)/linux-arm64/pgdump-lite $(BIN_DIR)/linux-arm64/pgmocktime $(BIN_DIR)/linux-arm64/procwrap $(BIN_DIR)/linux-arm64/psql-lite $(BIN_DIR)/linux-arm64/resetdb $(BIN_DIR)/linux-arm64/runproc $(BIN_DIR)/linux-arm64/sendit $(BIN_DIR)/linux-arm64/sendit-server $(BIN_DIR)/linux-arm64/sendit-token $(BIN_DIR)/linux-arm64/simpleproxy $(BIN_DIR)/linux-arm64/waitfor $(BIN_DIR)/linux-arm64/goalert-smoketest: $(GO_DEPS) GOOS=linux GOARCH=arm64 go test ./smoketest -c -o $@ -$(BIN_DIR)/windows-amd64/_all: $(BIN_DIR)/windows-amd64/goalert-smoketest $(BIN_DIR)/windows-amd64/goalert.exe $(BIN_DIR)/windows-amd64/goalert-slack-email-sync.exe $(BIN_DIR)/windows-amd64/mockslack.exe $(BIN_DIR)/windows-amd64/pgdump-lite.exe $(BIN_DIR)/windows-amd64/pgmocktime.exe $(BIN_DIR)/windows-amd64/procwrap.exe $(BIN_DIR)/windows-amd64/psql-lite.exe $(BIN_DIR)/windows-amd64/resetdb.exe $(BIN_DIR)/windows-amd64/runproc.exe $(BIN_DIR)/windows-amd64/sendit.exe $(BIN_DIR)/windows-amd64/sendit-server.exe $(BIN_DIR)/windows-amd64/sendit-token.exe $(BIN_DIR)/windows-amd64/simpleproxy.exe $(BIN_DIR)/windows-amd64/slowproxy.exe $(BIN_DIR)/windows-amd64/waitfor.exe +$(BIN_DIR)/windows-amd64/_all: $(BIN_DIR)/windows-amd64/goalert-smoketest $(BIN_DIR)/windows-amd64/goalert.exe $(BIN_DIR)/windows-amd64/goalert-slack-email-sync.exe $(BIN_DIR)/windows-amd64/mockslack.exe $(BIN_DIR)/windows-amd64/pgdump-lite.exe $(BIN_DIR)/windows-amd64/pgmocktime.exe $(BIN_DIR)/windows-amd64/procwrap.exe $(BIN_DIR)/windows-amd64/psql-lite.exe $(BIN_DIR)/windows-amd64/resetdb.exe $(BIN_DIR)/windows-amd64/runproc.exe $(BIN_DIR)/windows-amd64/sendit.exe $(BIN_DIR)/windows-amd64/sendit-server.exe $(BIN_DIR)/windows-amd64/sendit-token.exe $(BIN_DIR)/windows-amd64/simpleproxy.exe $(BIN_DIR)/windows-amd64/waitfor.exe $(BIN_DIR)/windows-amd64/goalert-smoketest: $(GO_DEPS) GOOS=windows GOARCH=amd64 go test ./smoketest -c -o $@ diff --git a/swo/swosync/safety.go b/swo/swosync/safety.go index a23f64cbdc..bff079cce0 100644 --- a/swo/swosync/safety.go +++ b/swo/swosync/safety.go @@ -27,7 +27,7 @@ do $$ declare begin set local idle_in_transaction_session_timeout = 5000; - set local lock_timeout = 5000; + set local lock_timeout = 10000; assert (select pg_try_advisory_xact_lock_shared(4919)), 'failed to get shared migration lock'; perform pg_advisory_xact_lock(4369); assert (select current_state = 'in_progress' from switchover_state), 'switchover state is not in_progress'; From f61a72a88e4575d0d5a4d502cb7b351fa75a6c17 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 8 Aug 2022 13:25:39 -0500 Subject: [PATCH 153/225] don't use unlock all --- swo/manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swo/manager.go b/swo/manager.go index 1161d41d06..267c74ae96 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -126,11 +126,11 @@ func WithPGXConn(ctx context.Context, db *sql.DB, runFunc func(context.Context, return err } defer conn.Close() - defer conn.ExecContext(context.Background(), "select pg_advisory_unlock_all()") return conn.Raw(func(driverConn interface{}) error { conn := driverConn.(*stdlib.Conn).Conn() defer conn.Close(context.Background()) + defer conn.PgConn().Close(context.Background()) return runFunc(ctx, conn) }) From 8cc5caa6aff900b0cdd30c48fb58d3d065a6b602 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 8 Aug 2022 15:34:42 -0500 Subject: [PATCH 154/225] separate connection pools --- app/cmd.go | 40 +++++----------------------------- graphql2/graphqlapp/swo.go | 17 ++++++++------- sqlc.yaml | 2 +- swo/drvconnector.go | 10 --------- swo/manager.go | 42 ++++++++++++++++++++++++++++++------ swo/mgrconnector.go | 44 -------------------------------------- swo/swodb/models.go | 5 +++-- swo/swodb/queries.sql.go | 34 +++++++++++++++++++++++++++++ swo/swoinfo/conninfo.go | 40 ++++++++++++++++++++++++++++++++++ swo/swoinfo/pgtables.sql | 6 +++++- swo/swoinfo/queries.sql | 7 ++++++ util/sqldrv/connector.go | 34 +++++++++++++++++++++++++++++ 12 files changed, 173 insertions(+), 108 deletions(-) delete mode 100644 swo/mgrconnector.go create mode 100644 swo/swoinfo/conninfo.go create mode 100644 util/sqldrv/connector.go diff --git a/app/cmd.go b/app/cmd.go index 2e7494737b..404511a0e7 100644 --- a/app/cmd.go +++ b/app/cmd.go @@ -13,7 +13,6 @@ import ( "strings" "time" - "github.com/jackc/pgx/v4/stdlib" toml "github.com/pelletier/go-toml" "github.com/pkg/errors" "github.com/spf13/cobra" @@ -84,18 +83,6 @@ var RootCmd = &cobra.Command{ return err } - wrappedDriver := sqldrv.NewRetryDriver(&stdlib.Driver{}, 10) - - u, err := url.Parse(cfg.DBURL) - if err != nil { - return errors.Wrap(err, "parse old URL") - } - q := u.Query() - q.Set("application_name", fmt.Sprintf("GoAlert %s", version.GitVersion())) - q.Set("enable_seqscan", "off") - u.RawQuery = q.Encode() - cfg.DBURL = u.String() - doMigrations := func(url string) error { if cfg.APIOnly { err = migrate.VerifyAll(log.WithDebug(ctx), url) @@ -129,36 +116,19 @@ var RootCmd = &cobra.Command{ } } - dbc, err := wrappedDriver.OpenConnector(cfg.DBURL) - if err != nil { - return errors.Wrap(err, "connect to postgres") - } - var db *sql.DB if cfg.DBURLNext != "" { - u, err := url.Parse(cfg.DBURLNext) - if err != nil { - return errors.Wrap(err, "parse next URL") - } - q := u.Query() - q.Set("application_name", fmt.Sprintf("GoAlert %s (SWO Mode)", version.GitVersion())) - q.Set("enable_seqscan", "off") - u.RawQuery = q.Encode() - cfg.DBURLNext = u.String() - - dbcNext, err := wrappedDriver.OpenConnector(cfg.DBURLNext) - if err != nil { - return errors.Wrap(err, "connect to postres (next)") - } - - mgr, err := swo.NewManager(swo.Config{OldDBC: dbc, NewDBC: dbcNext, CanExec: !cfg.APIOnly}) + mgr, err := swo.NewManager(swo.Config{OldDBURL: cfg.DBURL, NewDBURL: cfg.DBURLNext, CanExec: !cfg.APIOnly}) if err != nil { return errors.Wrap(err, "init switchover handler") } db = mgr.DB() cfg.SWO = mgr } else { - db = sql.OpenDB(dbc) + db, err = sqldrv.NewDB(cfg.DBURL, fmt.Sprintf("GoAlert %s", version.GitVersion())) + if err != nil { + return errors.Wrap(err, "connect to postgres") + } } app, err := NewApp(cfg, db) diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index 2fe947354f..51ba414b46 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -7,7 +7,6 @@ import ( "github.com/target/goalert/graphql2" "github.com/target/goalert/permission" "github.com/target/goalert/swo/swogrp" - "github.com/target/goalert/util/sqlutil" "github.com/target/goalert/validation" ) @@ -43,17 +42,19 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { return nil, err } - var conns []graphql2.SWOConnection - err = sqlutil.FromContext(ctx). - Table("pg_stat_activity"). - Select("application_name as name, count(*)"). - Where("datname = current_database()"). - Group("name"). - Find(&conns).Error + _conns, err := a.SWO.ConnInfo(ctx) if err != nil { return nil, err } + var conns []graphql2.SWOConnection + for _, c := range _conns { + conns = append(conns, graphql2.SWOConnection{ + Name: c.Name, + Count: c.Count, + }) + } + s := a.SWO.Status() var nodes []graphql2.SWONode for _, n := range s.Nodes { diff --git a/sqlc.yaml b/sqlc.yaml index 2860aff936..c7c4b72aa5 100644 --- a/sqlc.yaml +++ b/sqlc.yaml @@ -4,7 +4,7 @@ sql: - swo/swoinfo/pgtables.sql - swo/swosync/changelog.sql - migrate/migrations/20180816094955-switchover-state.sql - - migrate/migrations/20220628125954-switchover-mk2.sql + - migrate/migrations/20220808103727-switchover-mk2.sql queries: - swo/swosync/queries.sql - swo/swoinfo/queries.sql diff --git a/swo/drvconnector.go b/swo/drvconnector.go index 0ad94a0b9a..1e841717c9 100644 --- a/swo/drvconnector.go +++ b/swo/drvconnector.go @@ -44,14 +44,8 @@ func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { } conn := c.(*stdlib.Conn) - str, err := conn.Conn().PgConn().EscapeString(fmt.Sprintf("GoAlert %s (SWO Node)", version.GitVersion())) - if err != nil { - conn.Close() - return nil, err - } var b pgx.Batch - b.Queue(fmt.Sprintf("set application_name = '%s'", str)) b.Queue("select pg_advisory_lock_shared(4369)") b.Queue("select current_state = 'use_next_db' FROM switchover_state") @@ -60,10 +54,6 @@ func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { conn.Close() return nil, err } - if _, err := res.Exec(); err != nil { - conn.Close() - return nil, err - } defer res.Close() var useNext bool diff --git a/swo/manager.go b/swo/manager.go index 267c74ae96..26a14546b6 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -3,7 +3,6 @@ package swo import ( "context" "database/sql" - "database/sql/driver" "fmt" "github.com/google/uuid" @@ -16,6 +15,8 @@ import ( "github.com/target/goalert/swo/swomsg" "github.com/target/goalert/swo/swosync" "github.com/target/goalert/util/log" + "github.com/target/goalert/util/sqldrv" + "github.com/target/goalert/version" ) type Manager struct { @@ -45,17 +46,35 @@ type Node struct { } type Config struct { - OldDBC, NewDBC driver.Connector - CanExec bool - Logger *log.Logger + OldDBURL, NewDBURL string + CanExec bool + Logger *log.Logger } func NewManager(cfg Config) (*Manager, error) { + mainDB, err := sqldrv.NewDB(cfg.OldDBURL, fmt.Sprintf("GoAlert %s (SWO Manager - Main)", version.GitVersion())) + if err != nil { + return nil, fmt.Errorf("connect to old db: %w", err) + } + nextDB, err := sqldrv.NewDB(cfg.NewDBURL, fmt.Sprintf("GoAlert %s (SWO Manager - Next)", version.GitVersion())) + if err != nil { + return nil, fmt.Errorf("connect to new db: %w", err) + } + + mainAppDBC, err := sqldrv.NewConnector(cfg.OldDBURL, fmt.Sprintf("GoAlert %s (SWO Node - Main)", version.GitVersion())) + if err != nil { + return nil, fmt.Errorf("connect to old db: %w", err) + } + nextAppDBC, err := sqldrv.NewConnector(cfg.NewDBURL, fmt.Sprintf("GoAlert %s (SWO Node - Next)", version.GitVersion())) + if err != nil { + return nil, fmt.Errorf("connect to new db: %w", err) + } + m := &Manager{ Config: cfg, - dbApp: sql.OpenDB(NewConnector(cfg.OldDBC, cfg.NewDBC)), - dbMain: sql.OpenDB(newMgrConnector(cfg.OldDBC)), - dbNext: sql.OpenDB(newMgrConnector(cfg.NewDBC)), + dbApp: sql.OpenDB(NewConnector(mainAppDBC, nextAppDBC)), + dbMain: mainDB, + dbNext: nextDB, } ctx := cfg.Logger.BackgroundContext() @@ -105,6 +124,15 @@ func (m *Manager) Init(app lifecycle.PauseResumer) { m.taskMgr.Init() } +func (m *Manager) ConnInfo(ctx context.Context) (counts []swoinfo.ConnCount, err error) { + err = m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { + counts, err = swoinfo.ConnInfo(ctx, oldConn, newConn) + return err + }) + + return +} + // withConnFromOld allows performing operations with a raw connection to the old database. func (m *Manager) withConnFromOld(ctx context.Context, f func(context.Context, *pgx.Conn) error) error { return WithPGXConn(ctx, m.dbMain, f) diff --git a/swo/mgrconnector.go b/swo/mgrconnector.go deleted file mode 100644 index 566394598b..0000000000 --- a/swo/mgrconnector.go +++ /dev/null @@ -1,44 +0,0 @@ -package swo - -import ( - "context" - "database/sql/driver" - "fmt" - - "github.com/jackc/pgx/v4/stdlib" - "github.com/target/goalert/version" -) - -type mgrConnector struct { - dbc driver.Connector -} - -var _ driver.Connector = (*mgrConnector)(nil) - -func newMgrConnector(dbc driver.Connector) *mgrConnector { - return &mgrConnector{dbc: dbc} -} - -func (drv *mgrConnector) Driver() driver.Driver { return nil } - -func (drv *mgrConnector) Connect(ctx context.Context) (driver.Conn, error) { - c, err := drv.dbc.Connect(ctx) - if err != nil { - return nil, err - } - - conn := c.(*stdlib.Conn) - str, err := conn.Conn().PgConn().EscapeString(fmt.Sprintf("GoAlert %s (SWO Manager)", version.GitVersion())) - if err != nil { - conn.Close() - return nil, err - } - - _, err = conn.ExecContext(ctx, fmt.Sprintf("set application_name = '%s'", str), nil) - if err != nil { - conn.Close() - return nil, err - } - - return c, nil -} diff --git a/swo/swodb/models.go b/swo/swodb/models.go index 1d2b2804c5..1bba246825 100644 --- a/swo/swodb/models.go +++ b/swo/swodb/models.go @@ -54,8 +54,9 @@ type InformationSchemaTable struct { } type PgStatActivity struct { - State sql.NullString - XactStart time.Time + State sql.NullString + XactStart time.Time + ApplicationName sql.NullString } type SwitchoverLog struct { diff --git a/swo/swodb/queries.sql.go b/swo/swodb/queries.sql.go index aa7419ad1e..8475b913e1 100644 --- a/swo/swodb/queries.sql.go +++ b/swo/swodb/queries.sql.go @@ -7,6 +7,7 @@ package swodb import ( "context" + "database/sql" "time" "github.com/google/uuid" @@ -26,6 +27,39 @@ func (q *Queries) ActiveTxCount(ctx context.Context, xactStart time.Time) (int64 return count, err } +const connectionInfo = `-- name: ConnectionInfo :many +SELECT application_name AS NAME, + COUNT(*) +FROM pg_stat_activity +WHERE datname = current_database() +GROUP BY NAME +` + +type ConnectionInfoRow struct { + Name sql.NullString + Count int64 +} + +func (q *Queries) ConnectionInfo(ctx context.Context) ([]ConnectionInfoRow, error) { + rows, err := q.db.Query(ctx, connectionInfo) + if err != nil { + return nil, err + } + defer rows.Close() + var items []ConnectionInfoRow + for rows.Next() { + var i ConnectionInfoRow + if err := rows.Scan(&i.Name, &i.Count); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const databaseInfo = `-- name: DatabaseInfo :one SELECT db_id AS id, version() diff --git a/swo/swoinfo/conninfo.go b/swo/swoinfo/conninfo.go new file mode 100644 index 0000000000..9297bbb403 --- /dev/null +++ b/swo/swoinfo/conninfo.go @@ -0,0 +1,40 @@ +package swoinfo + +import ( + "context" + + "github.com/jackc/pgx/v4" + "github.com/target/goalert/swo/swodb" +) + +type ConnCount struct { + Name string + Count int +} + +// ConnInfo provides information about the connections to both old and new databases. +func ConnInfo(ctx context.Context, oldConn, newConn *pgx.Conn) ([]ConnCount, error) { + oldConns, err := swodb.New(oldConn).ConnectionInfo(ctx) + if err != nil { + return nil, err + } + newConns, err := swodb.New(newConn).ConnectionInfo(ctx) + if err != nil { + return nil, err + } + + counts := make(map[string]int) + for _, oldConn := range oldConns { + counts[oldConn.Name.String] += int(oldConn.Count) + } + for _, newConn := range newConns { + counts[newConn.Name.String] += int(newConn.Count) + } + + var result []ConnCount + for name, count := range counts { + result = append(result, ConnCount{Name: name, Count: count}) + } + + return result, nil +} diff --git a/swo/swoinfo/pgtables.sql b/swo/swoinfo/pgtables.sql index f26929e971..2ad427caab 100644 --- a/swo/swoinfo/pgtables.sql +++ b/swo/swoinfo/pgtables.sql @@ -71,7 +71,11 @@ CREATE TABLE pg_catalog.pg_constraint ( ); -- just for type info -CREATE TABLE pg_stat_activity (state TEXT, XACT_START timestamptz NOT NULL); +CREATE TABLE pg_stat_activity ( + state TEXT, + XACT_START timestamptz NOT NULL, + application_name TEXT +); CREATE SCHEMA information_schema; diff --git a/swo/swoinfo/queries.sql b/swo/swoinfo/queries.sql index fe89b2ba94..613408993e 100644 --- a/swo/swoinfo/queries.sql +++ b/swo/swoinfo/queries.sql @@ -33,3 +33,10 @@ WHERE sequence_catalog = current_database() SELECT db_id AS id, version() FROM switchover_state; + +-- name: ConnectionInfo :many +SELECT application_name AS NAME, + COUNT(*) +FROM pg_stat_activity +WHERE datname = current_database() +GROUP BY NAME; diff --git a/util/sqldrv/connector.go b/util/sqldrv/connector.go new file mode 100644 index 0000000000..faae16a4fc --- /dev/null +++ b/util/sqldrv/connector.go @@ -0,0 +1,34 @@ +package sqldrv + +import ( + "database/sql" + "database/sql/driver" + "fmt" + "net/url" + + "github.com/jackc/pgx/v4/stdlib" +) + +// NewDB is a convenience function for creating a *sql.DB from a DB URL and application_name. +func NewDB(urlStr, appName string) (*sql.DB, error) { + c, err := NewConnector(urlStr, appName) + if err != nil { + return nil, err + } + return sql.OpenDB(c), nil +} + +// NewConnector will create a new driver.Connector with retry enabled and the provided application_name. +func NewConnector(urlStr, appName string) (driver.Connector, error) { + u, err := url.Parse(urlStr) + if err != nil { + return nil, fmt.Errorf("parse db url: %w", err) + } + q := u.Query() + q.Set("application_name", appName) + q.Set("enable_seqscan", "off") + u.RawQuery = q.Encode() + urlStr = u.String() + + return NewRetryDriver(&stdlib.Driver{}, 10).OpenConnector(urlStr) +} From 95acd511518b6e921aeacaa0d01983ac06a91464 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 8 Aug 2022 16:40:26 -0500 Subject: [PATCH 155/225] unexport conn method --- swo/manager.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/swo/manager.go b/swo/manager.go index 26a14546b6..96845650e1 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -135,20 +135,20 @@ func (m *Manager) ConnInfo(ctx context.Context) (counts []swoinfo.ConnCount, err // withConnFromOld allows performing operations with a raw connection to the old database. func (m *Manager) withConnFromOld(ctx context.Context, f func(context.Context, *pgx.Conn) error) error { - return WithPGXConn(ctx, m.dbMain, f) + return withPGXConn(ctx, m.dbMain, f) } // withConnFromBoth allows performing operations with a raw connection to both databases database. func (m *Manager) withConnFromBoth(ctx context.Context, f func(ctx context.Context, oldConn, newConn *pgx.Conn) error) error { // grab lock with old DB first - return WithPGXConn(ctx, m.dbMain, func(ctx context.Context, connMain *pgx.Conn) error { - return WithPGXConn(ctx, m.dbNext, func(ctx context.Context, connNext *pgx.Conn) error { + return withPGXConn(ctx, m.dbMain, func(ctx context.Context, connMain *pgx.Conn) error { + return withPGXConn(ctx, m.dbNext, func(ctx context.Context, connNext *pgx.Conn) error { return f(ctx, connMain, connNext) }) }) } -func WithPGXConn(ctx context.Context, db *sql.DB, runFunc func(context.Context, *pgx.Conn) error) error { +func withPGXConn(ctx context.Context, db *sql.DB, runFunc func(context.Context, *pgx.Conn) error) error { conn, err := db.Conn(ctx) if err != nil { return err From 6aca708086d4574e7c1d8ac1dfc90f70971147e2 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 8 Aug 2022 16:41:06 -0500 Subject: [PATCH 156/225] don't use snapshot isolation in final mode --- swo/swosync/logicalsync.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swo/swosync/logicalsync.go b/swo/swosync/logicalsync.go index 0e04853fa2..7cfb1c4824 100644 --- a/swo/swosync/logicalsync.go +++ b/swo/swosync/logicalsync.go @@ -19,7 +19,7 @@ func (l *LogicalReplicator) FinalSync(ctx context.Context) error { return l.doSy func (l *LogicalReplicator) doSync(ctx context.Context, final bool) error { b := new(pgx.Batch) if final { - b.Queue(`begin isolation level serializable`) + b.Queue(`begin`) } else { b.Queue(`begin isolation level serializable read only deferrable`) } From 8a7b7d852322ffda48ab0aa1c5374f325807a960 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 8 Aug 2022 16:45:05 -0500 Subject: [PATCH 157/225] fix imports --- swo/drvconnector.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/swo/drvconnector.go b/swo/drvconnector.go index 1e841717c9..fb445a0cec 100644 --- a/swo/drvconnector.go +++ b/swo/drvconnector.go @@ -3,12 +3,10 @@ package swo import ( "context" "database/sql/driver" - "fmt" "sync" "github.com/jackc/pgx/v4" "github.com/jackc/pgx/v4/stdlib" - "github.com/target/goalert/version" ) type Connector struct { From 09b3ae3b14c4828caa85755f5b77816bc3ff9c4d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 9 Aug 2022 20:36:32 -0500 Subject: [PATCH 158/225] use connection-based node list --- graphql2/generated.go | 449 +++++++++++------- graphql2/graphqlapp/swo.go | 99 +++- graphql2/models_gen.go | 30 +- graphql2/schema.graphql | 20 +- swo/manager.go | 33 +- swo/swogrp/config.go | 2 + swo/swogrp/{taskman.go => taskmgr.go} | 2 +- swo/swoinfo/conninfo.go | 19 +- swo/swoinfo/db.go | 4 + .../app/admin/switchover/AdminSwitchover.tsx | 120 ++--- web/src/app/admin/switchover/SWONode.tsx | 38 +- web/src/schema.d.ts | 18 +- 12 files changed, 516 insertions(+), 318 deletions(-) rename swo/swogrp/{taskman.go => taskmgr.go} (99%) diff --git a/graphql2/generated.go b/graphql2/generated.go index e751a99392..d2d89ee9c9 100644 --- a/graphql2/generated.go +++ b/graphql2/generated.go @@ -400,20 +400,22 @@ type ComplexityRoot struct { } SWOConnection struct { - Count func(childComplexity int) int - Name func(childComplexity int) int + Count func(childComplexity int) int + IsNext func(childComplexity int) int + Name func(childComplexity int) int + Type func(childComplexity int) int + Version func(childComplexity int) int } SWONode struct { - CanExec func(childComplexity int) int - ID func(childComplexity int) int - IsLeader func(childComplexity int) int - NewValid func(childComplexity int) int - OldValid func(childComplexity int) int + CanExec func(childComplexity int) int + Connections func(childComplexity int) int + ID func(childComplexity int) int + IsConfigValid func(childComplexity int) int + IsLeader func(childComplexity int) int } SWOStatus struct { - Connections func(childComplexity int) int LastError func(childComplexity int) int LastStatus func(childComplexity int) int MainDBVersion func(childComplexity int) int @@ -2669,6 +2671,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWOConnection.Count(childComplexity), true + case "SWOConnection.isNext": + if e.complexity.SWOConnection.IsNext == nil { + break + } + + return e.complexity.SWOConnection.IsNext(childComplexity), true + case "SWOConnection.name": if e.complexity.SWOConnection.Name == nil { break @@ -2676,47 +2685,54 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWOConnection.Name(childComplexity), true - case "SWONode.canExec": - if e.complexity.SWONode.CanExec == nil { + case "SWOConnection.type": + if e.complexity.SWOConnection.Type == nil { break } - return e.complexity.SWONode.CanExec(childComplexity), true + return e.complexity.SWOConnection.Type(childComplexity), true - case "SWONode.id": - if e.complexity.SWONode.ID == nil { + case "SWOConnection.version": + if e.complexity.SWOConnection.Version == nil { break } - return e.complexity.SWONode.ID(childComplexity), true + return e.complexity.SWOConnection.Version(childComplexity), true - case "SWONode.isLeader": - if e.complexity.SWONode.IsLeader == nil { + case "SWONode.canExec": + if e.complexity.SWONode.CanExec == nil { break } - return e.complexity.SWONode.IsLeader(childComplexity), true + return e.complexity.SWONode.CanExec(childComplexity), true + + case "SWONode.connections": + if e.complexity.SWONode.Connections == nil { + break + } + + return e.complexity.SWONode.Connections(childComplexity), true - case "SWONode.newValid": - if e.complexity.SWONode.NewValid == nil { + case "SWONode.id": + if e.complexity.SWONode.ID == nil { break } - return e.complexity.SWONode.NewValid(childComplexity), true + return e.complexity.SWONode.ID(childComplexity), true - case "SWONode.oldValid": - if e.complexity.SWONode.OldValid == nil { + case "SWONode.isConfigValid": + if e.complexity.SWONode.IsConfigValid == nil { break } - return e.complexity.SWONode.OldValid(childComplexity), true + return e.complexity.SWONode.IsConfigValid(childComplexity), true - case "SWOStatus.connections": - if e.complexity.SWOStatus.Connections == nil { + case "SWONode.isLeader": + if e.complexity.SWONode.IsLeader == nil { break } - return e.complexity.SWOStatus.Connections(childComplexity), true + return e.complexity.SWONode.IsLeader(childComplexity), true case "SWOStatus.lastError": if e.complexity.SWOStatus.LastError == nil { @@ -15192,8 +15208,6 @@ func (ec *executionContext) fieldContext_Query_swoStatus(ctx context.Context, fi return ec.fieldContext_SWOStatus_lastError(ctx, field) case "nodes": return ec.fieldContext_SWOStatus_nodes(ctx, field) - case "connections": - return ec.fieldContext_SWOStatus_connections(ctx, field) case "mainDBVersion": return ec.fieldContext_SWOStatus_mainDBVersion(ctx, field) case "nextDBVersion": @@ -16063,8 +16077,8 @@ func (ec *executionContext) fieldContext_SWOConnection_name(ctx context.Context, return fc, nil } -func (ec *executionContext) _SWOConnection_count(ctx context.Context, field graphql.CollectedField, obj *SWOConnection) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOConnection_count(ctx, field) +func (ec *executionContext) _SWOConnection_version(ctx context.Context, field graphql.CollectedField, obj *SWOConnection) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOConnection_version(ctx, field) if err != nil { return graphql.Null } @@ -16077,7 +16091,7 @@ func (ec *executionContext) _SWOConnection_count(ctx context.Context, field grap }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.Count, nil + return obj.Version, nil }) if err != nil { ec.Error(ctx, err) @@ -16089,26 +16103,26 @@ func (ec *executionContext) _SWOConnection_count(ctx context.Context, field grap } return graphql.Null } - res := resTmp.(int) + res := resTmp.(string) fc.Result = res - return ec.marshalNInt2int(ctx, field.Selections, res) + return ec.marshalNString2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWOConnection_count(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWOConnection_version(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "SWOConnection", Field: field, IsMethod: false, IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type Int does not have child fields") + return nil, errors.New("field of type String does not have child fields") }, } return fc, nil } -func (ec *executionContext) _SWONode_id(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWONode_id(ctx, field) +func (ec *executionContext) _SWOConnection_type(ctx context.Context, field graphql.CollectedField, obj *SWOConnection) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOConnection_type(ctx, field) if err != nil { return graphql.Null } @@ -16121,7 +16135,7 @@ func (ec *executionContext) _SWONode_id(ctx context.Context, field graphql.Colle }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.ID, nil + return obj.Type, nil }) if err != nil { ec.Error(ctx, err) @@ -16135,24 +16149,24 @@ func (ec *executionContext) _SWONode_id(ctx context.Context, field graphql.Colle } res := resTmp.(string) fc.Result = res - return ec.marshalNID2string(ctx, field.Selections, res) + return ec.marshalNString2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWONode_id(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWOConnection_type(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ - Object: "SWONode", + Object: "SWOConnection", Field: field, IsMethod: false, IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type ID does not have child fields") + return nil, errors.New("field of type String does not have child fields") }, } return fc, nil } -func (ec *executionContext) _SWONode_oldValid(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWONode_oldValid(ctx, field) +func (ec *executionContext) _SWOConnection_isNext(ctx context.Context, field graphql.CollectedField, obj *SWOConnection) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOConnection_isNext(ctx, field) if err != nil { return graphql.Null } @@ -16165,7 +16179,7 @@ func (ec *executionContext) _SWONode_oldValid(ctx context.Context, field graphql }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.OldValid, nil + return obj.IsNext, nil }) if err != nil { ec.Error(ctx, err) @@ -16182,9 +16196,9 @@ func (ec *executionContext) _SWONode_oldValid(ctx context.Context, field graphql return ec.marshalNBoolean2bool(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWONode_oldValid(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWOConnection_isNext(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ - Object: "SWONode", + Object: "SWOConnection", Field: field, IsMethod: false, IsResolver: false, @@ -16195,8 +16209,8 @@ func (ec *executionContext) fieldContext_SWONode_oldValid(ctx context.Context, f return fc, nil } -func (ec *executionContext) _SWONode_newValid(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWONode_newValid(ctx, field) +func (ec *executionContext) _SWOConnection_count(ctx context.Context, field graphql.CollectedField, obj *SWOConnection) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOConnection_count(ctx, field) if err != nil { return graphql.Null } @@ -16209,7 +16223,7 @@ func (ec *executionContext) _SWONode_newValid(ctx context.Context, field graphql }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.NewValid, nil + return obj.Count, nil }) if err != nil { ec.Error(ctx, err) @@ -16221,19 +16235,63 @@ func (ec *executionContext) _SWONode_newValid(ctx context.Context, field graphql } return graphql.Null } - res := resTmp.(bool) + res := resTmp.(int) fc.Result = res - return ec.marshalNBoolean2bool(ctx, field.Selections, res) + return ec.marshalNInt2int(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_SWOConnection_count(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "SWOConnection", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Int does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _SWONode_id(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWONode_id(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.ID, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNID2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWONode_newValid(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWONode_id(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "SWONode", Field: field, IsMethod: false, IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type Boolean does not have child fields") + return nil, errors.New("field of type ID does not have child fields") }, } return fc, nil @@ -16327,8 +16385,8 @@ func (ec *executionContext) fieldContext_SWONode_isLeader(ctx context.Context, f return fc, nil } -func (ec *executionContext) _SWOStatus_state(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_state(ctx, field) +func (ec *executionContext) _SWONode_isConfigValid(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWONode_isConfigValid(ctx, field) if err != nil { return graphql.Null } @@ -16341,7 +16399,7 @@ func (ec *executionContext) _SWOStatus_state(ctx context.Context, field graphql. }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.State, nil + return obj.IsConfigValid, nil }) if err != nil { ec.Error(ctx, err) @@ -16353,26 +16411,26 @@ func (ec *executionContext) _SWOStatus_state(ctx context.Context, field graphql. } return graphql.Null } - res := resTmp.(SWOState) + res := resTmp.(bool) fc.Result = res - return ec.marshalNSWOState2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWOState(ctx, field.Selections, res) + return ec.marshalNBoolean2bool(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWOStatus_state(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWONode_isConfigValid(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ - Object: "SWOStatus", + Object: "SWONode", Field: field, IsMethod: false, IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type SWOState does not have child fields") + return nil, errors.New("field of type Boolean does not have child fields") }, } return fc, nil } -func (ec *executionContext) _SWOStatus_lastStatus(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_lastStatus(ctx, field) +func (ec *executionContext) _SWONode_connections(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWONode_connections(ctx, field) if err != nil { return graphql.Null } @@ -16385,7 +16443,60 @@ func (ec *executionContext) _SWOStatus_lastStatus(ctx context.Context, field gra }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.LastStatus, nil + return obj.Connections, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.([]SWOConnection) + fc.Result = res + return ec.marshalOSWOConnection2ᚕgithub.comᚋtargetᚋgoalertᚋgraphql2ᚐSWOConnectionᚄ(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_SWONode_connections(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "SWONode", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + switch field.Name { + case "name": + return ec.fieldContext_SWOConnection_name(ctx, field) + case "version": + return ec.fieldContext_SWOConnection_version(ctx, field) + case "type": + return ec.fieldContext_SWOConnection_type(ctx, field) + case "isNext": + return ec.fieldContext_SWOConnection_isNext(ctx, field) + case "count": + return ec.fieldContext_SWOConnection_count(ctx, field) + } + return nil, fmt.Errorf("no field named %q was found under type SWOConnection", field.Name) + }, + } + return fc, nil +} + +func (ec *executionContext) _SWOStatus_state(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOStatus_state(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.State, nil }) if err != nil { ec.Error(ctx, err) @@ -16397,26 +16508,26 @@ func (ec *executionContext) _SWOStatus_lastStatus(ctx context.Context, field gra } return graphql.Null } - res := resTmp.(string) + res := resTmp.(SWOState) fc.Result = res - return ec.marshalNString2string(ctx, field.Selections, res) + return ec.marshalNSWOState2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWOState(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWOStatus_lastStatus(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWOStatus_state(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "SWOStatus", Field: field, IsMethod: false, IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type String does not have child fields") + return nil, errors.New("field of type SWOState does not have child fields") }, } return fc, nil } -func (ec *executionContext) _SWOStatus_lastError(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_lastError(ctx, field) +func (ec *executionContext) _SWOStatus_lastStatus(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOStatus_lastStatus(ctx, field) if err != nil { return graphql.Null } @@ -16429,7 +16540,7 @@ func (ec *executionContext) _SWOStatus_lastError(ctx context.Context, field grap }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.LastError, nil + return obj.LastStatus, nil }) if err != nil { ec.Error(ctx, err) @@ -16446,7 +16557,7 @@ func (ec *executionContext) _SWOStatus_lastError(ctx context.Context, field grap return ec.marshalNString2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWOStatus_lastError(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWOStatus_lastStatus(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "SWOStatus", Field: field, @@ -16459,8 +16570,8 @@ func (ec *executionContext) fieldContext_SWOStatus_lastError(ctx context.Context return fc, nil } -func (ec *executionContext) _SWOStatus_nodes(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_nodes(ctx, field) +func (ec *executionContext) _SWOStatus_lastError(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOStatus_lastError(ctx, field) if err != nil { return graphql.Null } @@ -16473,7 +16584,7 @@ func (ec *executionContext) _SWOStatus_nodes(ctx context.Context, field graphql. }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.Nodes, nil + return obj.LastError, nil }) if err != nil { ec.Error(ctx, err) @@ -16485,38 +16596,26 @@ func (ec *executionContext) _SWOStatus_nodes(ctx context.Context, field graphql. } return graphql.Null } - res := resTmp.([]SWONode) + res := resTmp.(string) fc.Result = res - return ec.marshalNSWONode2ᚕgithub.comᚋtargetᚋgoalertᚋgraphql2ᚐSWONodeᚄ(ctx, field.Selections, res) + return ec.marshalNString2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWOStatus_nodes(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWOStatus_lastError(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "SWOStatus", Field: field, IsMethod: false, IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - switch field.Name { - case "id": - return ec.fieldContext_SWONode_id(ctx, field) - case "oldValid": - return ec.fieldContext_SWONode_oldValid(ctx, field) - case "newValid": - return ec.fieldContext_SWONode_newValid(ctx, field) - case "canExec": - return ec.fieldContext_SWONode_canExec(ctx, field) - case "isLeader": - return ec.fieldContext_SWONode_isLeader(ctx, field) - } - return nil, fmt.Errorf("no field named %q was found under type SWONode", field.Name) + return nil, errors.New("field of type String does not have child fields") }, } return fc, nil } -func (ec *executionContext) _SWOStatus_connections(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWOStatus_connections(ctx, field) +func (ec *executionContext) _SWOStatus_nodes(ctx context.Context, field graphql.CollectedField, obj *SWOStatus) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWOStatus_nodes(ctx, field) if err != nil { return graphql.Null } @@ -16529,7 +16628,7 @@ func (ec *executionContext) _SWOStatus_connections(ctx context.Context, field gr }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.Connections, nil + return obj.Nodes, nil }) if err != nil { ec.Error(ctx, err) @@ -16541,12 +16640,12 @@ func (ec *executionContext) _SWOStatus_connections(ctx context.Context, field gr } return graphql.Null } - res := resTmp.([]SWOConnection) + res := resTmp.([]SWONode) fc.Result = res - return ec.marshalNSWOConnection2ᚕgithub.comᚋtargetᚋgoalertᚋgraphql2ᚐSWOConnectionᚄ(ctx, field.Selections, res) + return ec.marshalNSWONode2ᚕgithub.comᚋtargetᚋgoalertᚋgraphql2ᚐSWONodeᚄ(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWOStatus_connections(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWOStatus_nodes(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "SWOStatus", Field: field, @@ -16554,12 +16653,18 @@ func (ec *executionContext) fieldContext_SWOStatus_connections(ctx context.Conte IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { switch field.Name { - case "name": - return ec.fieldContext_SWOConnection_name(ctx, field) - case "count": - return ec.fieldContext_SWOConnection_count(ctx, field) + case "id": + return ec.fieldContext_SWONode_id(ctx, field) + case "canExec": + return ec.fieldContext_SWONode_canExec(ctx, field) + case "isLeader": + return ec.fieldContext_SWONode_isLeader(ctx, field) + case "isConfigValid": + return ec.fieldContext_SWONode_isConfigValid(ctx, field) + case "connections": + return ec.fieldContext_SWONode_connections(ctx, field) } - return nil, fmt.Errorf("no field named %q was found under type SWOConnection", field.Name) + return nil, fmt.Errorf("no field named %q was found under type SWONode", field.Name) }, } return fc, nil @@ -29796,6 +29901,27 @@ func (ec *executionContext) _SWOConnection(ctx context.Context, sel ast.Selectio out.Values[i] = ec._SWOConnection_name(ctx, field, obj) + if out.Values[i] == graphql.Null { + invalids++ + } + case "version": + + out.Values[i] = ec._SWOConnection_version(ctx, field, obj) + + if out.Values[i] == graphql.Null { + invalids++ + } + case "type": + + out.Values[i] = ec._SWOConnection_type(ctx, field, obj) + + if out.Values[i] == graphql.Null { + invalids++ + } + case "isNext": + + out.Values[i] = ec._SWOConnection_isNext(ctx, field, obj) + if out.Values[i] == graphql.Null { invalids++ } @@ -29834,34 +29960,31 @@ func (ec *executionContext) _SWONode(ctx context.Context, sel ast.SelectionSet, if out.Values[i] == graphql.Null { invalids++ } - case "oldValid": + case "canExec": - out.Values[i] = ec._SWONode_oldValid(ctx, field, obj) + out.Values[i] = ec._SWONode_canExec(ctx, field, obj) if out.Values[i] == graphql.Null { invalids++ } - case "newValid": + case "isLeader": - out.Values[i] = ec._SWONode_newValid(ctx, field, obj) + out.Values[i] = ec._SWONode_isLeader(ctx, field, obj) if out.Values[i] == graphql.Null { invalids++ } - case "canExec": + case "isConfigValid": - out.Values[i] = ec._SWONode_canExec(ctx, field, obj) + out.Values[i] = ec._SWONode_isConfigValid(ctx, field, obj) if out.Values[i] == graphql.Null { invalids++ } - case "isLeader": + case "connections": - out.Values[i] = ec._SWONode_isLeader(ctx, field, obj) + out.Values[i] = ec._SWONode_connections(ctx, field, obj) - if out.Values[i] == graphql.Null { - invalids++ - } default: panic("unknown field " + strconv.Quote(field.Name)) } @@ -29908,13 +30031,6 @@ func (ec *executionContext) _SWOStatus(ctx context.Context, sel ast.SelectionSet out.Values[i] = ec._SWOStatus_nodes(ctx, field, obj) - if out.Values[i] == graphql.Null { - invalids++ - } - case "connections": - - out.Values[i] = ec._SWOStatus_connections(ctx, field, obj) - if out.Values[i] == graphql.Null { invalids++ } @@ -33259,50 +33375,6 @@ func (ec *executionContext) marshalNSWOConnection2githubᚗcomᚋtargetᚋgoaler return ec._SWOConnection(ctx, sel, &v) } -func (ec *executionContext) marshalNSWOConnection2ᚕgithub.comᚋtargetᚋgoalertᚋgraphql2ᚐSWOConnectionᚄ(ctx context.Context, sel ast.SelectionSet, v []SWOConnection) graphql.Marshaler { - ret := make(graphql.Array, len(v)) - var wg sync.WaitGroup - isLen1 := len(v) == 1 - if !isLen1 { - wg.Add(len(v)) - } - for i := range v { - i := i - fc := &graphql.FieldContext{ - Index: &i, - Result: &v[i], - } - ctx := graphql.WithFieldContext(ctx, fc) - f := func(i int) { - defer func() { - if r := recover(); r != nil { - ec.Error(ctx, ec.Recover(ctx, r)) - ret = nil - } - }() - if !isLen1 { - defer wg.Done() - } - ret[i] = ec.marshalNSWOConnection2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWOConnection(ctx, sel, v[i]) - } - if isLen1 { - f(i) - } else { - go f(i) - } - - } - wg.Wait() - - for _, e := range ret { - if e == graphql.Null { - return graphql.Null - } - } - - return ret -} - func (ec *executionContext) marshalNSWONode2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWONode(ctx context.Context, sel ast.SelectionSet, v SWONode) graphql.Marshaler { return ec._SWONode(ctx, sel, &v) } @@ -35433,6 +35505,53 @@ func (ec *executionContext) marshalORotationType2ᚖgithub.comᚋtargetᚋgoal return v } +func (ec *executionContext) marshalOSWOConnection2ᚕgithub.comᚋtargetᚋgoalertᚋgraphql2ᚐSWOConnectionᚄ(ctx context.Context, sel ast.SelectionSet, v []SWOConnection) graphql.Marshaler { + if v == nil { + return graphql.Null + } + ret := make(graphql.Array, len(v)) + var wg sync.WaitGroup + isLen1 := len(v) == 1 + if !isLen1 { + wg.Add(len(v)) + } + for i := range v { + i := i + fc := &graphql.FieldContext{ + Index: &i, + Result: &v[i], + } + ctx := graphql.WithFieldContext(ctx, fc) + f := func(i int) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = nil + } + }() + if !isLen1 { + defer wg.Done() + } + ret[i] = ec.marshalNSWOConnection2githubᚗcomᚋtargetᚋgoalertᚋgraphql2ᚐSWOConnection(ctx, sel, v[i]) + } + if isLen1 { + f(i) + } else { + go f(i) + } + + } + wg.Wait() + + for _, e := range ret { + if e == graphql.Null { + return graphql.Null + } + } + + return ret +} + func (ec *executionContext) marshalOSchedule2ᚖgithub.comᚋtargetᚋgoalertᚋscheduleᚐSchedule(ctx context.Context, sel ast.SelectionSet, v *schedule.Schedule) graphql.Marshaler { if v == nil { return graphql.Null diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index 51ba414b46..3d1ab456d0 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -2,8 +2,12 @@ package graphqlapp import ( "context" + "encoding/base64" "fmt" + "regexp" + "sort" + "github.com/google/uuid" "github.com/target/goalert/graphql2" "github.com/target/goalert/permission" "github.com/target/goalert/swo/swogrp" @@ -32,6 +36,8 @@ func (m *Mutation) SwoAction(ctx context.Context, action graphql2.SWOAction) (bo return err == nil, err } +var swoRx = regexp.MustCompile(`^GoAlert ([^ ]+)(?: SWO:([A-D]):(.{24}))?$`) + func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { if a.SWO == nil { return nil, validation.NewGenericError("not in SWO mode") @@ -42,29 +48,80 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { return nil, err } - _conns, err := a.SWO.ConnInfo(ctx) + conns, err := a.SWO.ConnInfo(ctx) if err != nil { return nil, err } - var conns []graphql2.SWOConnection - for _, c := range _conns { - conns = append(conns, graphql2.SWOConnection{ - Name: c.Name, - Count: c.Count, + nodes := make(map[string]*graphql2.SWONode) + for _, conn := range conns { + m := swoRx.FindStringSubmatch(conn.Name) + var connType, version string + idStr := "unknown-" + conn.Name + if len(m) == 4 { + version = m[1] + connType = m[2] + id, err := base64.URLEncoding.DecodeString(m[3]) + if err == nil && len(id) == 16 { + var u uuid.UUID + copy(u[:], id) + idStr = u.String() + } + } + n := nodes[idStr] + if n == nil { + n = &graphql2.SWONode{ID: idStr} + nodes[idStr] = n + } + n.Connections = append(n.Connections, graphql2.SWOConnection{ + Name: conn.Name, + IsNext: conn.IsNext, + Version: version, + Type: string(connType), + Count: conn.Count, }) } s := a.SWO.Status() - var nodes []graphql2.SWONode - for _, n := range s.Nodes { - nodes = append(nodes, graphql2.SWONode{ - ID: n.ID.String(), - OldValid: n.OldID == s.MainDBID, - NewValid: n.NewID == s.NextDBID, - CanExec: n.CanExec, - IsLeader: n.ID == s.LeaderID, - }) +validateNodes: + for _, node := range s.Nodes { + n := nodes[node.ID.String()] + if n == nil { + n = &graphql2.SWONode{ID: n.ID} + nodes[node.ID.String()] = n + } + n.IsLeader = node.ID == s.LeaderID + n.CanExec = node.CanExec + + if node.NewID != s.NextDBID { + continue + } + if node.OldID != s.MainDBID { + continue + } + + if len(n.Connections) == 0 { + fmt.Println("no connections") + continue + } + + version := n.Connections[0].Version + for _, conn := range n.Connections { + if conn.Version != version { + fmt.Println("invalid version") + continue validateNodes + } + if !conn.IsNext && (conn.Type != "A" && conn.Type != "B") { + fmt.Println("invalid type old") + continue validateNodes + } + if conn.IsNext && (conn.Type != "C" && conn.Type != "D") { + fmt.Println("invalid type new") + continue validateNodes + } + } + + n.IsConfigValid = true } var state graphql2.SWOState @@ -87,14 +144,20 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { return nil, fmt.Errorf("unknown state: %d", s.State) } + var nodeList []graphql2.SWONode + for _, n := range nodes { + nodeList = append(nodeList, *n) + } + sort.Slice(nodeList, func(i, j int) bool { + return nodeList[i].ID < nodeList[j].ID + }) + return &graphql2.SWOStatus{ State: state, LastStatus: s.LastStatus, LastError: s.LastError, - Nodes: nodes, - - Connections: conns, + Nodes: nodeList, NextDBVersion: s.NextDBVersion, MainDBVersion: s.MainDBVersion, diff --git a/graphql2/models_gen.go b/graphql2/models_gen.go index dc526ffd56..01b7f67876 100644 --- a/graphql2/models_gen.go +++ b/graphql2/models_gen.go @@ -335,26 +335,28 @@ type RotationSearchOptions struct { } type SWOConnection struct { - Name string `json:"name"` - Count int `json:"count"` + Name string `json:"name"` + Version string `json:"version"` + Type string `json:"type"` + IsNext bool `json:"isNext"` + Count int `json:"count"` } type SWONode struct { - ID string `json:"id"` - OldValid bool `json:"oldValid"` - NewValid bool `json:"newValid"` - CanExec bool `json:"canExec"` - IsLeader bool `json:"isLeader"` + ID string `json:"id"` + CanExec bool `json:"canExec"` + IsLeader bool `json:"isLeader"` + IsConfigValid bool `json:"isConfigValid"` + Connections []SWOConnection `json:"connections"` } type SWOStatus struct { - State SWOState `json:"state"` - LastStatus string `json:"lastStatus"` - LastError string `json:"lastError"` - Nodes []SWONode `json:"nodes"` - Connections []SWOConnection `json:"connections"` - MainDBVersion string `json:"mainDBVersion"` - NextDBVersion string `json:"nextDBVersion"` + State SWOState `json:"state"` + LastStatus string `json:"lastStatus"` + LastError string `json:"lastError"` + Nodes []SWONode `json:"nodes"` + MainDBVersion string `json:"mainDBVersion"` + NextDBVersion string `json:"nextDBVersion"` } type ScheduleConnection struct { diff --git a/graphql2/schema.graphql b/graphql2/schema.graphql index 9ce2be116b..ce4c590310 100644 --- a/graphql2/schema.graphql +++ b/graphql2/schema.graphql @@ -120,8 +120,6 @@ type SWOStatus { nodes: [SWONode!]! - connections: [SWOConnection!]! - mainDBVersion: String! nextDBVersion: String! } @@ -136,17 +134,21 @@ enum SWOState { done } -type SWOConnection { - name: String! - count: Int! -} - type SWONode { id: ID! - oldValid: Boolean! - newValid: Boolean! + canExec: Boolean! isLeader: Boolean! + + isConfigValid: Boolean! + connections: [SWOConnection!] +} +type SWOConnection { + name: String! + version: String! + type: String! + isNext: Boolean! + count: Int! } input AlertMetricsOptions { diff --git a/swo/manager.go b/swo/manager.go index 96845650e1..9201836dd2 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -3,6 +3,7 @@ package swo import ( "context" "database/sql" + "encoding/base64" "fmt" "github.com/google/uuid" @@ -51,21 +52,33 @@ type Config struct { Logger *log.Logger } +// GoAlert v0.28.0-3141-g8a7b7d852-dirty func NewManager(cfg Config) (*Manager, error) { - mainDB, err := sqldrv.NewDB(cfg.OldDBURL, fmt.Sprintf("GoAlert %s (SWO Manager - Main)", version.GitVersion())) + id := uuid.New() + + appStr := func(typ byte) string { + vers := version.GitVersion() + id := base64.URLEncoding.EncodeToString(id[:]) + if len(vers) > 24 { + vers = vers[:24] + } + + return fmt.Sprintf("GoAlert %s SWO:%c:%s", vers, typ, id) + } + + mainDB, err := sqldrv.NewDB(cfg.OldDBURL, appStr('A')) if err != nil { return nil, fmt.Errorf("connect to old db: %w", err) } - nextDB, err := sqldrv.NewDB(cfg.NewDBURL, fmt.Sprintf("GoAlert %s (SWO Manager - Next)", version.GitVersion())) + mainAppDBC, err := sqldrv.NewConnector(cfg.OldDBURL, appStr('B')) if err != nil { - return nil, fmt.Errorf("connect to new db: %w", err) + return nil, fmt.Errorf("connect to old db: %w", err) } - - mainAppDBC, err := sqldrv.NewConnector(cfg.OldDBURL, fmt.Sprintf("GoAlert %s (SWO Node - Main)", version.GitVersion())) + nextDB, err := sqldrv.NewDB(cfg.NewDBURL, appStr('C')) if err != nil { - return nil, fmt.Errorf("connect to old db: %w", err) + return nil, fmt.Errorf("connect to new db: %w", err) } - nextAppDBC, err := sqldrv.NewConnector(cfg.NewDBURL, fmt.Sprintf("GoAlert %s (SWO Node - Next)", version.GitVersion())) + nextAppDBC, err := sqldrv.NewConnector(cfg.NewDBURL, appStr('D')) if err != nil { return nil, fmt.Errorf("connect to new db: %w", err) } @@ -100,11 +113,15 @@ func NewManager(cfg Config) (*Manager, error) { } m.taskMgr, err = swogrp.NewTaskMgr(ctx, swogrp.Config{ + NodeID: id, CanExec: cfg.CanExec, Logger: cfg.Logger, Messages: messages, + OldID: m.MainDBInfo.ID, + NewID: m.NextDBInfo.ID, + Executor: &Executor{mgr: m}, PauseFunc: func(ctx context.Context) error { return m.pauseResume.Pause(ctx) }, ResumeFunc: func(ctx context.Context) error { return m.pauseResume.Resume(ctx) }, @@ -168,6 +185,8 @@ func withPGXConn(ctx context.Context, db *sql.DB, runFunc func(context.Context, func (m *Manager) Status() Status { return Status{ Status: m.taskMgr.Status(), + MainDBID: m.MainDBInfo.ID, + NextDBID: m.NextDBInfo.ID, MainDBVersion: m.MainDBInfo.Version, NextDBVersion: m.NextDBInfo.Version, } diff --git a/swo/swogrp/config.go b/swo/swogrp/config.go index a25c48b849..bba6ac4dab 100644 --- a/swo/swogrp/config.go +++ b/swo/swogrp/config.go @@ -11,6 +11,8 @@ import ( type TaskFn func(context.Context) error type Config struct { + NodeID uuid.UUID + CanExec bool OldID, NewID uuid.UUID diff --git a/swo/swogrp/taskman.go b/swo/swogrp/taskmgr.go similarity index 99% rename from swo/swogrp/taskman.go rename to swo/swogrp/taskmgr.go index cbbd1b8421..3b393f321f 100644 --- a/swo/swogrp/taskman.go +++ b/swo/swogrp/taskmgr.go @@ -34,7 +34,7 @@ func NewTaskMgr(ctx context.Context, cfg Config) (*TaskMgr, error) { t := &TaskMgr{ cfg: cfg, local: Node{ - ID: uuid.New(), + ID: cfg.NodeID, OldID: cfg.OldID, NewID: cfg.NewID, diff --git a/swo/swoinfo/conninfo.go b/swo/swoinfo/conninfo.go index 9297bbb403..3219627b15 100644 --- a/swo/swoinfo/conninfo.go +++ b/swo/swoinfo/conninfo.go @@ -8,8 +8,9 @@ import ( ) type ConnCount struct { - Name string - Count int + Name string + IsNext bool + Count int } // ConnInfo provides information about the connections to both old and new databases. @@ -23,17 +24,21 @@ func ConnInfo(ctx context.Context, oldConn, newConn *pgx.Conn) ([]ConnCount, err return nil, err } - counts := make(map[string]int) + type connType struct { + Name string + IsNext bool + } + counts := make(map[connType]int) for _, oldConn := range oldConns { - counts[oldConn.Name.String] += int(oldConn.Count) + counts[connType{Name: oldConn.Name.String}] += int(oldConn.Count) } for _, newConn := range newConns { - counts[newConn.Name.String] += int(newConn.Count) + counts[connType{Name: newConn.Name.String, IsNext: true}] += int(newConn.Count) } var result []ConnCount - for name, count := range counts { - result = append(result, ConnCount{Name: name, Count: count}) + for t, count := range counts { + result = append(result, ConnCount{Name: t.Name, IsNext: t.IsNext, Count: count}) } return result, nil diff --git a/swo/swoinfo/db.go b/swo/swoinfo/db.go index e4cfd33bac..634ee0a2bc 100644 --- a/swo/swoinfo/db.go +++ b/swo/swoinfo/db.go @@ -2,6 +2,7 @@ package swoinfo import ( "context" + "fmt" "github.com/google/uuid" "github.com/jackc/pgx/v4" @@ -18,6 +19,9 @@ func DBInfo(ctx context.Context, conn *pgx.Conn) (*DB, error) { if err != nil { return nil, err } + if info.ID == uuid.Nil { + return nil, fmt.Errorf("no database ID") + } return &DB{ ID: info.ID, Version: info.Version, diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index f1e99e75f6..0eb1874ce8 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -43,22 +43,25 @@ const query = gql` lastStatus mainDBVersion nextDBVersion - connections { - name - count - } nodes { id canExec - oldValid - newValid isLeader + isConfigValid + connections { + name + version + type + isNext + count + } } } } ` let n = 1 +let u = 1 const names: { [key: string]: string } = {} // friendlyName will assign a persistant "friendly" name to the node. @@ -70,7 +73,8 @@ const names: { [key: string]: string } = {} // on another browser tab. function friendlyName(id: string): string { if (!names[id]) { - names[id] = `Node ${n++}` + if (id.startsWith('unknown')) return (names[id] = 'Unknown ' + u++) + return (names[id] = 'Node ' + n++) } return names[id] } @@ -295,84 +299,38 @@ export default function AdminSwitchover(): JSX.Element { - - - - - Application - Info - Count - - - - {data?.connections?.map((row) => ( - - row.name.includes('GoAlert') && !row.name.includes('SWO') - ? getBackgroundColor(theme.palette.error.light, 0.9) - : 'inherit', - color: (theme) => - row.name.includes('GoAlert') && !row.name.includes('SWO') - ? getColor(theme.palette.error.light, 0.6) - : 'inherit', - }} - > - - {row?.name?.split('(')[0].replace(/[)(]/g, '') ?? - '(no name)'} - - - {row?.name?.split('(')[1]?.replace(/[)(]/g, '') ?? '-'} - - {row.count} - - ))} - -
+
+ + + } severity='warning'> + From {curVer[0]} + + + theme.palette.primary.main, + }} + /> + + } severity='success' sx={{ mb: '16px' }}> + To {nextVer[0]} + + +
- - -
- - - } severity='warning'> - From {curVer[0]} - - - theme.palette.primary.main, - }} - /> - - } - severity='success' - sx={{ mb: '16px' }} - > - To {nextVer[0]} - - -
-
-
- {data?.nodes.length > 0 && data.nodes .slice() diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index dabb6dcd74..4e7cb4f2a2 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -20,6 +20,32 @@ interface SWONodeProps { export default function SWONode({ node, name }: SWONodeProps): JSX.Element { const theme = useTheme() + if (node.id.startsWith('unknown-')) { + return ( + + + + {name} + + + + + + + + + {node.connections?.reduce((acc, cur) => acc + cur.count, 0)} + + + + + + ) + } + return ( @@ -38,9 +64,9 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { - + - {node.oldValid ? ( + {node.isConfigValid ? ( ) : ( @@ -48,13 +74,9 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { - + - {node.newValid ? ( - - ) : ( - - )} + {node.connections?.reduce((acc, cur) => acc + cur.count, 0)} diff --git a/web/src/schema.d.ts b/web/src/schema.d.ts index 78e9137339..c7cdf7a35e 100644 --- a/web/src/schema.d.ts +++ b/web/src/schema.d.ts @@ -42,7 +42,6 @@ export interface SWOStatus { lastStatus: string lastError: string nodes: SWONode[] - connections: SWOConnection[] mainDBVersion: string nextDBVersion: string } @@ -56,17 +55,20 @@ export type SWOState = | 'executing' | 'done' -export interface SWOConnection { - name: string - count: number -} - export interface SWONode { id: string - oldValid: boolean - newValid: boolean canExec: boolean isLeader: boolean + isConfigValid: boolean + connections?: null | SWOConnection[] +} + +export interface SWOConnection { + name: string + version: string + type: string + isNext: boolean + count: number } export interface AlertMetricsOptions { From 4597115d874496953779f0d7766003f4eed48a92 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 9 Aug 2022 20:36:47 -0500 Subject: [PATCH 159/225] remove unused imports --- web/src/app/admin/switchover/AdminSwitchover.tsx | 5 ----- 1 file changed, 5 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 0eb1874ce8..deb6b43c6e 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -22,11 +22,6 @@ import SWONode from './SWONode' import LoadingButton from '@mui/lab/LoadingButton' import DatabaseOff from 'mdi-material-ui/DatabaseOff' import DatabaseCheck from 'mdi-material-ui/DatabaseCheck' -import Table from '@mui/material/Table' -import TableBody from '@mui/material/TableBody' -import TableCell from '@mui/material/TableCell' -import TableHead from '@mui/material/TableHead' -import TableRow from '@mui/material/TableRow' import Tooltip from '@mui/material/Tooltip' import RemoveIcon from '@mui/icons-material/PlaylistRemove' import AddIcon from '@mui/icons-material/PlaylistAdd' From 8ab80257a7e613028a9759eae77c0ce4c51cd7fe Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 9 Aug 2022 20:48:51 -0500 Subject: [PATCH 160/225] remove unused code/imports --- web/src/app/admin/switchover/AdminSwitchover.tsx | 3 --- 1 file changed, 3 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index deb6b43c6e..e0ba348e68 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -28,7 +28,6 @@ import AddIcon from '@mui/icons-material/PlaylistAdd' import DownIcon from '@mui/icons-material/ArrowDownward' import { TransitionGroup } from 'react-transition-group' import Spinner from '../../loading/components/Spinner' -import { darken, lighten } from '@mui/system' const query = gql` query { @@ -92,8 +91,6 @@ export default function AdminSwitchover(): JSX.Element { const [lastAction, setLastAction] = useState('') const [mutationStatus, commit] = useMutation(mutation) const theme = useTheme() - const getColor = theme.palette.mode === 'light' ? darken : lighten - const getBackgroundColor = theme.palette.mode === 'light' ? lighten : darken const curVer = data?.mainDBVersion.split(' on ') const nextVer = data?.mainDBVersion.split(' on ') From 1daa0c313d6aa895387fe621df821134695a452e Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 11 Aug 2022 15:50:45 -0500 Subject: [PATCH 161/225] add error icon for non-swo GoAlert --- web/src/app/admin/switchover/SWONode.tsx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index 4e7cb4f2a2..ca2176b4b9 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -33,6 +33,12 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { primary='Application' secondary={node.id.substring(8) || '(No name given)'} /> + + {node.id.includes('GoAlert') && ( + + + + )} From 5e9cc15969ba3c651eabeb3dceea5e73474df257 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 11 Aug 2022 16:00:25 -0500 Subject: [PATCH 162/225] display reasons for invalid config --- graphql2/generated.go | 46 +++++++++---------- graphql2/graphqlapp/swo.go | 12 ++--- graphql2/models_gen.go | 10 ++-- graphql2/schema.graphql | 2 +- .../app/admin/switchover/AdminSwitchover.tsx | 2 +- web/src/app/admin/switchover/SWONode.tsx | 4 +- web/src/schema.d.ts | 2 +- 7 files changed, 39 insertions(+), 39 deletions(-) diff --git a/graphql2/generated.go b/graphql2/generated.go index d2d89ee9c9..855e47f13f 100644 --- a/graphql2/generated.go +++ b/graphql2/generated.go @@ -408,11 +408,11 @@ type ComplexityRoot struct { } SWONode struct { - CanExec func(childComplexity int) int - Connections func(childComplexity int) int - ID func(childComplexity int) int - IsConfigValid func(childComplexity int) int - IsLeader func(childComplexity int) int + CanExec func(childComplexity int) int + ConfigError func(childComplexity int) int + Connections func(childComplexity int) int + ID func(childComplexity int) int + IsLeader func(childComplexity int) int } SWOStatus struct { @@ -2706,6 +2706,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWONode.CanExec(childComplexity), true + case "SWONode.configError": + if e.complexity.SWONode.ConfigError == nil { + break + } + + return e.complexity.SWONode.ConfigError(childComplexity), true + case "SWONode.connections": if e.complexity.SWONode.Connections == nil { break @@ -2720,13 +2727,6 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWONode.ID(childComplexity), true - case "SWONode.isConfigValid": - if e.complexity.SWONode.IsConfigValid == nil { - break - } - - return e.complexity.SWONode.IsConfigValid(childComplexity), true - case "SWONode.isLeader": if e.complexity.SWONode.IsLeader == nil { break @@ -16385,8 +16385,8 @@ func (ec *executionContext) fieldContext_SWONode_isLeader(ctx context.Context, f return fc, nil } -func (ec *executionContext) _SWONode_isConfigValid(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_SWONode_isConfigValid(ctx, field) +func (ec *executionContext) _SWONode_configError(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWONode_configError(ctx, field) if err != nil { return graphql.Null } @@ -16399,7 +16399,7 @@ func (ec *executionContext) _SWONode_isConfigValid(ctx context.Context, field gr }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.IsConfigValid, nil + return obj.ConfigError, nil }) if err != nil { ec.Error(ctx, err) @@ -16411,19 +16411,19 @@ func (ec *executionContext) _SWONode_isConfigValid(ctx context.Context, field gr } return graphql.Null } - res := resTmp.(bool) + res := resTmp.(string) fc.Result = res - return ec.marshalNBoolean2bool(ctx, field.Selections, res) + return ec.marshalNString2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_SWONode_isConfigValid(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_SWONode_configError(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "SWONode", Field: field, IsMethod: false, IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type Boolean does not have child fields") + return nil, errors.New("field of type String does not have child fields") }, } return fc, nil @@ -16659,8 +16659,8 @@ func (ec *executionContext) fieldContext_SWOStatus_nodes(ctx context.Context, fi return ec.fieldContext_SWONode_canExec(ctx, field) case "isLeader": return ec.fieldContext_SWONode_isLeader(ctx, field) - case "isConfigValid": - return ec.fieldContext_SWONode_isConfigValid(ctx, field) + case "configError": + return ec.fieldContext_SWONode_configError(ctx, field) case "connections": return ec.fieldContext_SWONode_connections(ctx, field) } @@ -29974,9 +29974,9 @@ func (ec *executionContext) _SWONode(ctx context.Context, sel ast.SelectionSet, if out.Values[i] == graphql.Null { invalids++ } - case "isConfigValid": + case "configError": - out.Values[i] = ec._SWONode_isConfigValid(ctx, field, obj) + out.Values[i] = ec._SWONode_configError(ctx, field, obj) if out.Values[i] == graphql.Null { invalids++ diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index 3d1ab456d0..c6b10722a5 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -94,34 +94,34 @@ validateNodes: n.CanExec = node.CanExec if node.NewID != s.NextDBID { + n.ConfigError = "next-db-url is invalid" continue } if node.OldID != s.MainDBID { + n.ConfigError = "db-url is invalid" continue } if len(n.Connections) == 0 { - fmt.Println("no connections") + n.ConfigError = "node is not connected to any DB" continue } version := n.Connections[0].Version for _, conn := range n.Connections { if conn.Version != version { - fmt.Println("invalid version") + n.ConfigError = "node is connected with multiple versions of GoAlert" continue validateNodes } if !conn.IsNext && (conn.Type != "A" && conn.Type != "B") { - fmt.Println("invalid type old") + n.ConfigError = fmt.Sprintf("connected to db-url (main) with invalid type %s (expected A or B)", conn.Type) continue validateNodes } if conn.IsNext && (conn.Type != "C" && conn.Type != "D") { - fmt.Println("invalid type new") + n.ConfigError = fmt.Sprintf("connected to next-db-url (next) with invalid type %s (expected C or D)", conn.Type) continue validateNodes } } - - n.IsConfigValid = true } var state graphql2.SWOState diff --git a/graphql2/models_gen.go b/graphql2/models_gen.go index 01b7f67876..e559355fbe 100644 --- a/graphql2/models_gen.go +++ b/graphql2/models_gen.go @@ -343,11 +343,11 @@ type SWOConnection struct { } type SWONode struct { - ID string `json:"id"` - CanExec bool `json:"canExec"` - IsLeader bool `json:"isLeader"` - IsConfigValid bool `json:"isConfigValid"` - Connections []SWOConnection `json:"connections"` + ID string `json:"id"` + CanExec bool `json:"canExec"` + IsLeader bool `json:"isLeader"` + ConfigError string `json:"configError"` + Connections []SWOConnection `json:"connections"` } type SWOStatus struct { diff --git a/graphql2/schema.graphql b/graphql2/schema.graphql index ce4c590310..d4aa59d629 100644 --- a/graphql2/schema.graphql +++ b/graphql2/schema.graphql @@ -140,7 +140,7 @@ type SWONode { canExec: Boolean! isLeader: Boolean! - isConfigValid: Boolean! + configError: String! connections: [SWOConnection!] } type SWOConnection { diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index e0ba348e68..c64c493b3c 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -41,7 +41,7 @@ const query = gql` id canExec isLeader - isConfigValid + configError connections { name version diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index ca2176b4b9..65ae6993a1 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -71,8 +71,8 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { - - {node.isConfigValid ? ( + + {!node.configError ? ( ) : ( diff --git a/web/src/schema.d.ts b/web/src/schema.d.ts index c7cdf7a35e..2b3545e8a6 100644 --- a/web/src/schema.d.ts +++ b/web/src/schema.d.ts @@ -59,7 +59,7 @@ export interface SWONode { id: string canExec: boolean isLeader: boolean - isConfigValid: boolean + configError: string connections?: null | SWOConnection[] } From c90b3483c0f923d86d7cf8abcd27dd488d46d831 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 11 Aug 2022 16:14:29 -0500 Subject: [PATCH 163/225] fix swo refetch --- web/src/app/admin/switchover/AdminSwitchover.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index c64c493b3c..812ca0865f 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -96,11 +96,13 @@ export default function AdminSwitchover(): JSX.Element { const nextVer = data?.mainDBVersion.split(' on ') useEffect(() => { + if (data?.state === 'done') return + const t = setInterval(() => { if (!fetching) refetch() }, 1000) return () => clearInterval(t) - }, []) + }, [fetching, refetch, data?.state]) if (fetching) { return From bbb545ad95a496322e7d7f093332c28149ac96a9 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 15 Aug 2022 10:35:36 -0500 Subject: [PATCH 164/225] cleanup readme --- swo/swosync/README.md | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/swo/swosync/README.md b/swo/swosync/README.md index 102e7b7b44..1ba1ccc841 100644 --- a/swo/swosync/README.md +++ b/swo/swosync/README.md @@ -4,48 +4,36 @@ Package `swosync` handles the logical replication from the source DB to the dest ## Theory of operation -All changes (INSERT, UPDATE, DELETE) are recorded by triggers in the change_log table as -table/row_id pairs, only tracking a set of changed rows (but not their point-in-time data). -The changes are then read in and applied in batches, by reading the CURRENT state of the row -from the source database and writing it to the destination database, at the time of sync. +Triggers record all changes (INSERT, UPDATE, DELETE) in the `change_log` table as `table, row_id` pairs, only tracking a set of changed rows but not their point-in-time data. The changes are then read in and applied in batches by reading the CURRENT state of the row from the source database and writing it to the destination database at the time of sync. -This avoids the need to attempt to find a sequential solution to concurrent updates, as well as -intermediate row states, by only syncing the final result. It also avoids the need to record -intermediate updates. +Replicating point-in-time differences between "snapshots" avoids the need for a sequential solution for concurrent updates and intermediate row states by only syncing the final result. It also becomes more efficient because each row must be replicated at most once, even when multiple updates occur between sync points. -As an example, if a row is inserted and then updated multiple times, the next sync will result in -a single insert. - -The process depends on having a valid & consistent view of the source database which can be -obtained by by a serializable transaction. Since only the final state of data is used, dependency -solving/ordering for concurrent updates is not necessary. +The process depends on having a consistent view of the source database, which a serializable transaction can obtain, or during a stop-the-world lock (during the final sync). ### Basic strategy -1. Read all changes as table and row ids +1. Read all changes (table and row ids) 2. Fetch row data for each changed row 3. Insert rows from old DB that are missing in new DB, in fkey-dependency order 4. Update rows from old DB that exist in both, in fkey-dependency order -5. Delete rows missing from old DB that exist in new DB, in reverse-fkey-dependency order -6. Delete synced entries from change_log table +5. Delete rows missing from the old DB that exist in the new DB, in reverse-fkey-dependency order +6. Delete synced entries from the `change_log` table +7. Repeat until both DBs are close in sync +8. Obtain a stop-the-world lock +9. Perform final sync, and update the `use_next_db` pointer +10. Release the stop-the-world lock +11. New DB is used for all future transactions ## Further Notes -It is important to keep the sync loop as tight as is possible, particularly in "final sync" mode. -When performing the final sync, the database will be locked for the full duration so no additional -changes can be made. This is necessary to ensure that the database is in a consistent state with no -leftover changes before switchover state is updated to `use_next_db`. - -A commit to the source DB ensures the Serializable state of the transaction is maintained, and is -done AFTER sending changes to the new DB as the final sync also points to the new one. +It is essential to keep the sync loop as tight as possible, particularly in "final sync" mode. The final sync will pause all transactions during its synchronization process; this is necessary to ensure that the database is in a consistent state with no leftover changes before setting the `use_next_db` pointer. ### Round Trips - 1 to start tx, read all change ids & sequences (also stop-the-world lock in final mode) - 1 to fetch row data from each table (single batch, 1 query per table) -- 1 to apply all updates to new DB -- 1 to commit src tx (also switches over to new DB in final mode) +- 1 to apply all updates to the new DB +- 1 to commit src tx (also updates `use_next_db` in final mode) - 1 to delete all synced change rows from the DB -There is an extra round-trip for last delete as a tradoff to favor shorter stop-the-world time, -since deleting the last set of changes isn't necessary to wait for after the switchover has been made. +An extra round-trip for the last delete is a trade-off to favor a shorter stop-the-world time since deleting the previous change records isn't necessary after the switchover. From 735ee308f194c458ba019b197062cf86f65ce237 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 15 Aug 2022 14:08:59 -0500 Subject: [PATCH 165/225] cleanup SWO readme --- swo/README.md | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/swo/README.md b/swo/README.md index 0f22344f3f..6a43318891 100644 --- a/swo/README.md +++ b/swo/README.md @@ -1,26 +1,13 @@ # Switchover (SWO) -Switchover (SWO) is a feature that allows a live system to safely switch from one database to another. +Switchover (SWO) is a feature that allows a live system to switch from one database to another safely and with little to no user impact. ## Theory of Operation -During SWO, 2 DB url's are involved. "old" and "new". +Switchover mode is initiated by starting GoAlert with an additional DB URL `--db-url-next`. The database referenced by `--db-url` is referred to as the "old" DB and the `--db-url-next` is the "new" DB. -- All app-related DB connections acquire a shared advisory lock `GlobalSwitchOver` to the "old" DB, followed by checking switchover state is not `use_next_db`. These locks are at the session level and persist as long as the connections remain in the pool. -- If it is `use_next_db`, SWO is complete, the connection is closed, and future connections are made to the "new" DB without the lock. -- Once initiated, the first engine instance to acquire the `GlobalSwitchOverExec` lock (separate from `GlobalSwitchOver`) will begin the switch. -- When the switch is started, a `change_log` table is created and populated by triggers added to existing tables for INSERT/UPDATE/DELETE operations. -- An initial sync is performed effectively copying a snapshot of all data from the "old" DB to the "new" DB. -- Subsequent syncs are performed by applying records from the `change_log` table to the "new" DB. -- After each sync, the synced rows are deleted from the `change_log` table, so that it always represents the diff between both DBs. -- This is repeated until the `change_log` table has less than 100 rows at the start of a sync. -- Once the DBs are relatively similar, SWO goes into "critical phase". -- In this phase, idle connections are disabled until a shared deadline (meaning each query requires the shared lock, as connections are not re-used). -- When the final sync begins, an exclusive `GlobalSwitchOver` lock is acquired, and behaves as a stop-the-world lock. -- After the final sync, sequences are also copied from the "old" DB to the "new" DB. -- Finally, the `current_state` column is updated to `use_next_db`, and the `GlobalSwitchOver` lock is released. +All new application DB connections first acquire a shared advisory lock, then check the `use_next_db` pointer. If the pointer is set, all new connections will be made to the "new" DB (without the checking overhead), and the connection to the "old" DB will be terminated. -If deadlines are reached, or any error is encountered, the connection for the switchover is dropped, and syncing resumes. If a commit to the new DB succeeds, but fails on the old DB, an error state is entered. - -From an error state, only RESET can be performed, which wipes the "new" DB and recreates `change_log` and all triggers to begin again with another attempt. +The switch is performed by first replicating a complete snapshot of the "old" DB to the "new" DB. After the initial sync, subsequent synchronization is an incremental "diff" of snapshots—more info on how this works is available in the `swosync` package. +When both DBs are reasonably in-sync, a stop-the-world lock (i.e., an exclusive lock that conflicts with the shared advisory locks) is acquired, followed by the final logical sync. During the same transaction, the `use_next_db` pointer is set. After the lock is released, the connector will send all new queries to the "new" DB. From 57ae9e49d614c782519c07405576b3660cdff0ca Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 17 Aug 2022 16:50:57 -0500 Subject: [PATCH 166/225] fix nil pointer --- graphql2/graphqlapp/swo.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index c6b10722a5..e473518036 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -87,7 +87,7 @@ validateNodes: for _, node := range s.Nodes { n := nodes[node.ID.String()] if n == nil { - n = &graphql2.SWONode{ID: n.ID} + n = &graphql2.SWONode{ID: node.ID.String()} nodes[node.ID.String()] = n } n.IsLeader = node.ID == s.LeaderID From 7b8ed27e59858ff803e5152ad4f0516a05bcaf1b Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 17 Aug 2022 17:04:25 -0500 Subject: [PATCH 167/225] add failsafe reset timeout --- swo/swogrp/taskmgr.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/swo/swogrp/taskmgr.go b/swo/swogrp/taskmgr.go index 3b393f321f..eabc893c94 100644 --- a/swo/swogrp/taskmgr.go +++ b/swo/swogrp/taskmgr.go @@ -86,7 +86,23 @@ func (t *TaskMgr) statusLoop() { func (t *TaskMgr) messageLoop() { ctx := t.cfg.Logger.BackgroundContext() - for msg := range t.cfg.Messages.Events() { + for { + var msg swomsg.Message + if t.state == ClusterStateResetting { + tm := time.NewTimer(15 * time.Second) + select { + case <-tm.C: + // timeout if no messages for 15 sec + t.state = ClusterStateUnknown + t.cfg.Logger.Error(ctx, fmt.Errorf("timeout waiting for messages during reset")) + tm.Stop() + continue + case msg = <-t.cfg.Messages.Events(): + tm.Stop() + } + } else { + msg = <-t.cfg.Messages.Events() + } t.mx.Lock() if ch, ok := t.waitMsg[msg.ID]; ok { close(ch) From 5e0fff1fde5e8187d8a9c150d357a322cdf6d80d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 17 Aug 2022 17:14:04 -0500 Subject: [PATCH 168/225] add uptime to UI --- graphql2/generated.go | 55 +++++++++++++++++++ graphql2/graphqlapp/swo.go | 4 ++ graphql2/models_gen.go | 1 + graphql2/schema.graphql | 2 + swo/swogrp/node.go | 8 ++- swo/swogrp/taskmgr.go | 2 + .../app/admin/switchover/AdminSwitchover.tsx | 3 +- web/src/app/admin/switchover/SWONode.tsx | 4 ++ web/src/schema.d.ts | 1 + 9 files changed, 78 insertions(+), 2 deletions(-) diff --git a/graphql2/generated.go b/graphql2/generated.go index 855e47f13f..eb2e01345c 100644 --- a/graphql2/generated.go +++ b/graphql2/generated.go @@ -413,6 +413,7 @@ type ComplexityRoot struct { Connections func(childComplexity int) int ID func(childComplexity int) int IsLeader func(childComplexity int) int + Uptime func(childComplexity int) int } SWOStatus struct { @@ -2734,6 +2735,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SWONode.IsLeader(childComplexity), true + case "SWONode.uptime": + if e.complexity.SWONode.Uptime == nil { + break + } + + return e.complexity.SWONode.Uptime(childComplexity), true + case "SWOStatus.lastError": if e.complexity.SWOStatus.LastError == nil { break @@ -16385,6 +16393,47 @@ func (ec *executionContext) fieldContext_SWONode_isLeader(ctx context.Context, f return fc, nil } +func (ec *executionContext) _SWONode_uptime(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_SWONode_uptime(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.Uptime, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.(*string) + fc.Result = res + return ec.marshalOString2ᚖstring(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_SWONode_uptime(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "SWONode", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type String does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _SWONode_configError(ctx context.Context, field graphql.CollectedField, obj *SWONode) (ret graphql.Marshaler) { fc, err := ec.fieldContext_SWONode_configError(ctx, field) if err != nil { @@ -16659,6 +16708,8 @@ func (ec *executionContext) fieldContext_SWOStatus_nodes(ctx context.Context, fi return ec.fieldContext_SWONode_canExec(ctx, field) case "isLeader": return ec.fieldContext_SWONode_isLeader(ctx, field) + case "uptime": + return ec.fieldContext_SWONode_uptime(ctx, field) case "configError": return ec.fieldContext_SWONode_configError(ctx, field) case "connections": @@ -29974,6 +30025,10 @@ func (ec *executionContext) _SWONode(ctx context.Context, sel ast.SelectionSet, if out.Values[i] == graphql.Null { invalids++ } + case "uptime": + + out.Values[i] = ec._SWONode_uptime(ctx, field, obj) + case "configError": out.Values[i] = ec._SWONode_configError(ctx, field, obj) diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index e473518036..386ea7e2ae 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -6,6 +6,7 @@ import ( "fmt" "regexp" "sort" + "time" "github.com/google/uuid" "github.com/target/goalert/graphql2" @@ -93,6 +94,9 @@ validateNodes: n.IsLeader = node.ID == s.LeaderID n.CanExec = node.CanExec + up := time.Since(node.StartedAt).Truncate(time.Second).String() + n.Uptime = &up + if node.NewID != s.NextDBID { n.ConfigError = "next-db-url is invalid" continue diff --git a/graphql2/models_gen.go b/graphql2/models_gen.go index e559355fbe..6b8b7cc2d8 100644 --- a/graphql2/models_gen.go +++ b/graphql2/models_gen.go @@ -346,6 +346,7 @@ type SWONode struct { ID string `json:"id"` CanExec bool `json:"canExec"` IsLeader bool `json:"isLeader"` + Uptime *string `json:"uptime"` ConfigError string `json:"configError"` Connections []SWOConnection `json:"connections"` } diff --git a/graphql2/schema.graphql b/graphql2/schema.graphql index d4aa59d629..1bb80188c0 100644 --- a/graphql2/schema.graphql +++ b/graphql2/schema.graphql @@ -140,6 +140,8 @@ type SWONode { canExec: Boolean! isLeader: Boolean! + uptime: String + configError: String! connections: [SWOConnection!] } diff --git a/swo/swogrp/node.go b/swo/swogrp/node.go index 4378e3e61e..fcbd907bd1 100644 --- a/swo/swogrp/node.go +++ b/swo/swogrp/node.go @@ -1,6 +1,10 @@ package swogrp -import "github.com/google/uuid" +import ( + "time" + + "github.com/google/uuid" +) type Node struct { ID uuid.UUID @@ -9,4 +13,6 @@ type Node struct { OldID uuid.UUID NewID uuid.UUID + + StartedAt time.Time } diff --git a/swo/swogrp/taskmgr.go b/swo/swogrp/taskmgr.go index eabc893c94..7282038a55 100644 --- a/swo/swogrp/taskmgr.go +++ b/swo/swogrp/taskmgr.go @@ -39,6 +39,8 @@ func NewTaskMgr(ctx context.Context, cfg Config) (*TaskMgr, error) { NewID: cfg.NewID, CanExec: cfg.CanExec, + + StartedAt: time.Now(), }, nodes: make(map[uuid.UUID]Node), paused: make(map[uuid.UUID]struct{}), diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 812ca0865f..2b04c7165a 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -39,6 +39,7 @@ const query = gql` nextDBVersion nodes { id + uptime canExec isLeader configError @@ -104,7 +105,7 @@ export default function AdminSwitchover(): JSX.Element { return () => clearInterval(t) }, [fetching, refetch, data?.state]) - if (fetching) { + if (fetching && !data?.state) { return } diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index 65ae6993a1..43fcdf4e5a 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -79,6 +79,10 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { )} + + + {node.uptime} + diff --git a/web/src/schema.d.ts b/web/src/schema.d.ts index 2b3545e8a6..261717360f 100644 --- a/web/src/schema.d.ts +++ b/web/src/schema.d.ts @@ -59,6 +59,7 @@ export interface SWONode { id: string canExec: boolean isLeader: boolean + uptime?: null | string configError: string connections?: null | SWOConnection[] } From 314431263db7a505e4ddb1b40e364fe83716fc91 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 10:20:39 -0500 Subject: [PATCH 169/225] fix region id race --- app/initengine.go | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/app/initengine.go b/app/initengine.go index 7319cb212e..71ce1a2356 100644 --- a/app/initengine.go +++ b/app/initengine.go @@ -10,21 +10,18 @@ import ( ) func (app *App) initEngine(ctx context.Context) error { - var regionIndex int err := app.db.QueryRowContext(ctx, `SELECT id FROM region_ids WHERE name = $1`, app.cfg.RegionName).Scan(®ionIndex) if errors.Is(err, sql.ErrNoRows) { // doesn't exist, try to create - err = app.db.QueryRowContext(ctx, ` - WITH inserted AS ( - INSERT INTO region_ids (name) VALUES ($1) - ON CONFLICT DO NOTHING - RETURNING id - ) - SELECT id FROM region_ids WHERE name = $1 - UNION - SELECT id FROM inserted - `, app.cfg.RegionName).Scan(®ionIndex) + _, err = app.db.ExecContext(ctx, ` + INSERT INTO region_ids (name) VALUES ($1) + ON CONFLICT DO NOTHING`, app.cfg.RegionName) + if err != nil { + return errors.Wrap(err, "insert region") + } + + err = app.db.QueryRowContext(ctx, `SELECT id FROM region_ids WHERE name = $1`, app.cfg.RegionName).Scan(®ionIndex) } if err != nil { return errors.Wrap(err, "get region index") From 98f5571ebb0ab3ab7ca15c4745716ba51f90c147 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 10:20:47 -0500 Subject: [PATCH 170/225] fix shutdown race --- app/shutdown.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/shutdown.go b/app/shutdown.go index b48dc91cb4..b0bfb105c6 100644 --- a/app/shutdown.go +++ b/app/shutdown.go @@ -3,6 +3,7 @@ package app import ( "context" "os" + "reflect" "time" "github.com/pkg/errors" @@ -34,6 +35,11 @@ func (app *App) _Shutdown(ctx context.Context) error { if sh == nil { return } + t := reflect.TypeOf(sh) + if reflect.ValueOf(sh) == reflect.Zero(t) { + // check for nil pointer + return + } err := sh.Shutdown(ctx) if err != nil { errs = append(errs, errors.Wrap(err, msg)) From 9c893a4ded2064c6b7bde34a657cc1bda1f82d7f Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 10:33:09 -0500 Subject: [PATCH 171/225] resume after done --- swo/swogrp/taskmgr.go | 1 + 1 file changed, 1 insertion(+) diff --git a/swo/swogrp/taskmgr.go b/swo/swogrp/taskmgr.go index 7282038a55..7a8ef01f12 100644 --- a/swo/swogrp/taskmgr.go +++ b/swo/swogrp/taskmgr.go @@ -180,6 +180,7 @@ func (t *TaskMgr) messageLoop() { } t.startTask(t.cfg.Executor.Exec, "done") case t.state == ClusterStateExecuting && msg.Type == "done" && msg.AckID == t.lastMsgID: + t.cancel() t.state = ClusterStateDone case msg.Type == "status": if msg.AckID != t.lastMsgID { From 3f0a195cccd50d4553f4aac208e8a0889fdc9630 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 10:33:31 -0500 Subject: [PATCH 172/225] add err state for bad uptime --- web/src/app/admin/switchover/SWONode.tsx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/web/src/app/admin/switchover/SWONode.tsx b/web/src/app/admin/switchover/SWONode.tsx index 43fcdf4e5a..0162e354d2 100644 --- a/web/src/app/admin/switchover/SWONode.tsx +++ b/web/src/app/admin/switchover/SWONode.tsx @@ -61,7 +61,7 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { - + {node.canExec ? ( ) : ( @@ -81,7 +81,13 @@ export default function SWONode({ node, name }: SWONodeProps): JSX.Element { - {node.uptime} + + {node.uptime ? node.uptime : } + From 260953fa2f989dbb268330008bb19456fd9c387f Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 10:33:43 -0500 Subject: [PATCH 173/225] vacuum analyze after datagen --- devtools/resetdb/main.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/devtools/resetdb/main.go b/devtools/resetdb/main.go index f304700748..c04bb42d3a 100644 --- a/devtools/resetdb/main.go +++ b/devtools/resetdb/main.go @@ -278,6 +278,10 @@ func fillDB(ctx context.Context, url string) error { // fix sequences _, err = pool.Exec(ctx, "SELECT pg_catalog.setval('public.alerts_id_seq', (select max(id)+1 from public.alerts), true)") must(err) + + _, err = pool.Exec(ctx, "vacuum analyze") + must(err) + return nil } From 88d05fff1b68dbcdff5aac041176e3728ce4c5a4 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 10:34:19 -0500 Subject: [PATCH 174/225] add prometheus to swo --- Procfile.swo | 18 ++++++++++-------- devtools/prometheus/prometheus-swo.yml | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) create mode 100644 devtools/prometheus/prometheus-swo.yml diff --git a/Procfile.swo b/Procfile.swo index 74b2b2d494..38bdfc0c3c 100644 --- a/Procfile.swo +++ b/Procfile.swo @@ -5,32 +5,34 @@ goalert: ./bin/goalert -l=localhost:3040 --ui-dir=web/src/build --db-url=postgre smtp: go run github.com/mailhog/MailHog -ui-bind-addr=localhost:8025 -api-bind-addr=localhost:8025 -smtp-bind-addr=localhost:1025 | grep -v KEEPALIVE +prom: bin/tools/prometheus --log.level=warn --config.file=devtools/prometheus/prometheus-swo.yml --storage.tsdb.path=bin/prom-data/ --web.listen-address=localhost:9090 + @watch-file=./web/src/esbuild.config.js ui: yarn workspace goalert-web run esbuild --watch @watch-file=./bin/goalert -ga2: ./bin/goalert -l=localhost:3050 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only +ga2: ./bin/goalert -l=localhost:3050 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --listen-prometheus=localhost:2113 --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga3: ./bin/goalert -l=localhost:3051 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only +ga3: ./bin/goalert -l=localhost:3051 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --listen-prometheus=localhost:2114 --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga4: ./bin/goalert -l=localhost:3052 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only +ga4: ./bin/goalert -l=localhost:3052 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --listen-prometheus=localhost:2115 --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga5: ./bin/goalert -l=localhost:3053 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only +ga5: ./bin/goalert -l=localhost:3053 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --listen-prometheus=localhost:2116 --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga6: ./bin/goalert -l=localhost:3054 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only +ga6: ./bin/goalert -l=localhost:3054 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --listen-prometheus=localhost:2117 --db-url-next=postgres://goalert@localhost:5435/goalert2 --api-only @watch-file=./bin/goalert -ga7: ./bin/goalert -l=localhost:3055 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 +ga7: ./bin/goalert -l=localhost:3055 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --listen-prometheus=localhost:2118 --db-url-next=postgres://goalert@localhost:5435/goalert2 @watch-file=./bin/goalert -ga8: ./bin/goalert -l=localhost:3056 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 +ga8: ./bin/goalert -l=localhost:3056 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --listen-prometheus=localhost:2119 --db-url-next=postgres://goalert@localhost:5435/goalert2 @watch-file=./bin/goalert -ga9: ./bin/goalert -l=localhost:3057 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --db-url-next=postgres://goalert@localhost:5435/goalert2 +ga9: ./bin/goalert -l=localhost:3057 --ui-dir=web/src/build --db-url=postgres://goalert@localhost:5435/goalert?sslmode=disable --listen-prometheus=localhost:2120 --db-url-next=postgres://goalert@localhost:5435/goalert2 proxy: go run ./devtools/simpleproxy -addr localhost:3030 /=http://localhost:3040,http://localhost:3050,http://localhost:3051,http://localhost:3052,http://localhost:3053,http://localhost:3054,http://localhost:3055,http://localhost:3056,http://localhost:3057 diff --git a/devtools/prometheus/prometheus-swo.yml b/devtools/prometheus/prometheus-swo.yml new file mode 100644 index 0000000000..2fdf98f220 --- /dev/null +++ b/devtools/prometheus/prometheus-swo.yml @@ -0,0 +1,15 @@ +global: + scrape_interval: 3s +scrape_configs: + - job_name: goalert + static_configs: + - targets: + - localhost:2112 + - localhost:2113 + - localhost:2114 + - localhost:2115 + - localhost:2116 + - localhost:2117 + - localhost:2118 + - localhost:2119 + - localhost:2120 From 3badad662a2303e7bab6c2f7a7c47f57cb53fac6 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 12:12:36 -0500 Subject: [PATCH 175/225] switch to upsert --- swo/swosync/initialsync.go | 1 - swo/swosync/logicalreplicator.go | 6 +---- swo/swosync/logicalsync.go | 2 +- swo/swosync/rowset.go | 14 ---------- swo/swosync/tablesync.go | 45 +++++++++++++------------------- 5 files changed, 20 insertions(+), 48 deletions(-) delete mode 100644 swo/swosync/rowset.go diff --git a/swo/swosync/initialsync.go b/swo/swosync/initialsync.go index 495823fb81..a6dfa05618 100644 --- a/swo/swosync/initialsync.go +++ b/swo/swosync/initialsync.go @@ -98,7 +98,6 @@ func (l *LogicalReplicator) initialSyncTable(ctx context.Context, srcTx, dstTx p return 0, fmt.Errorf("scan: %w", err) } insertRows = append(insertRows, rowData) - l.dstRows.Set(changeID{table.Name(), id}) if len(insertRows) < 10000 { continue diff --git a/swo/swosync/logicalreplicator.go b/swo/swosync/logicalreplicator.go index d4b6fa41ee..05f49859a6 100644 --- a/swo/swosync/logicalreplicator.go +++ b/swo/swosync/logicalreplicator.go @@ -16,15 +16,11 @@ type LogicalReplicator struct { seqNames []string progFn func(ctx context.Context, format string, args ...interface{}) - - dstRows rowSet } // NewLogicalReplicator creates a new LogicalReplicator. func NewLogicalReplicator() *LogicalReplicator { - return &LogicalReplicator{ - dstRows: make(rowSet), - } + return &LogicalReplicator{} } // SetSourceDB sets the source database and must be called before Start. diff --git a/swo/swosync/logicalsync.go b/swo/swosync/logicalsync.go index 7cfb1c4824..a6ce1d0c42 100644 --- a/swo/swosync/logicalsync.go +++ b/swo/swosync/logicalsync.go @@ -82,7 +82,7 @@ func (l *LogicalReplicator) doSync(ctx context.Context, final bool) error { applyChanges.Queue("begin") applyChanges.Queue("set constraints all deferred") seqSync.AddBatchWrites(&applyChanges) - tblSync.AddBatchWrites(&applyChanges, l.dstRows) + tblSync.AddBatchWrites(&applyChanges) applyChanges.Queue("commit") if final { // re-enable triggers in destination DB diff --git a/swo/swosync/rowset.go b/swo/swosync/rowset.go deleted file mode 100644 index b68ae57d42..0000000000 --- a/swo/swosync/rowset.go +++ /dev/null @@ -1,14 +0,0 @@ -package swosync - -type ( - rowSet map[changeID]struct{} - changeID struct{ Table, Row string } -) - -func (r rowSet) Set(id changeID) { r[id] = struct{}{} } -func (r rowSet) Delete(id changeID) { delete(r, id) } - -func (r rowSet) Has(id changeID) bool { - _, ok := r[id] - return ok -} diff --git a/swo/swosync/tablesync.go b/swo/swosync/tablesync.go index 59c5cb7aba..b62cbed72e 100644 --- a/swo/swosync/tablesync.go +++ b/swo/swosync/tablesync.go @@ -27,6 +27,7 @@ type changeEntry struct { id int64 changeID } +type changeID struct{ Table, Row string } // NewTableSync creates a new TableSync for the given tables. func NewTableSync(tables []swoinfo.Table) *TableSync { @@ -162,10 +163,9 @@ func (c *TableSync) ExecDeleteChanges(ctx context.Context, srcConn *pgx.Conn) (i return int64(len(ids)), nil } -func (c *TableSync) AddBatchWrites(b *pgx.Batch, dstRows rowSet) { +func (c *TableSync) AddBatchWrites(b *pgx.Batch) { type pending struct { - inserts []json.RawMessage - updates []json.RawMessage + upserts []json.RawMessage deletes []string } pendingByTable := make(map[string]*pending) @@ -178,36 +178,21 @@ func (c *TableSync) AddBatchWrites(b *pgx.Batch, dstRows rowSet) { newRowData := c.changedData[chg.changeID] if newRowData == nil { // row was deleted - dstRows.Delete(chg.changeID) p.deletes = append(p.deletes, chg.Row) continue } - if dstRows.Has(chg.changeID) { - // row was updated - p.updates = append(p.updates, newRowData) - } else { - // row was inserted - dstRows.Set(chg.changeID) - p.inserts = append(p.inserts, newRowData) - } + p.upserts = append(p.upserts, newRowData) } // insert, then update, then reverse delete for _, t := range c.tables { p := pendingByTable[t.Name()] - if p == nil || len(p.inserts) == 0 { + if p == nil || len(p.upserts) == 0 { continue } - b.Queue(insertRowsQuery(t), p.inserts) - } - for _, t := range c.tables { - p := pendingByTable[t.Name()] - if p == nil || len(p.updates) == 0 { - continue - } - b.Queue(updateRowsQuery(t), p.updates) + b.Queue(upsertRowsQuery(t), p.upserts) } for i := range c.tables { @@ -222,18 +207,24 @@ func (c *TableSync) AddBatchWrites(b *pgx.Batch, dstRows rowSet) { } } -func updateRowsQuery(t swoinfo.Table) string { +func upsertRowsQuery(t swoinfo.Table) string { var s strings.Builder - fmt.Fprintf(&s, "update %s dst\n", sqlutil.QuoteID(t.Name())) - fmt.Fprintf(&s, "set ") + fmt.Fprintf(&s, ` + insert into %s + select * from json_populate_recordset(null::%s, $1) + on conflict (id) do update + set + `, sqlutil.QuoteID(t.Name()), sqlutil.QuoteID(t.Name())) for i, col := range t.Columns() { + if col == "id" { + continue + } + if i > 0 { fmt.Fprintf(&s, ", ") } - fmt.Fprintf(&s, "%s = data.%s", sqlutil.QuoteID(col), sqlutil.QuoteID(col)) + fmt.Fprintf(&s, "%s = EXCLUDED.%s", sqlutil.QuoteID(col), sqlutil.QuoteID(col)) } - fmt.Fprintf(&s, "\nfrom json_populate_recordset(null::%s, $1) as data\n", sqlutil.QuoteID(t.Name())) - fmt.Fprintf(&s, "where dst.id = data.id") return s.String() } From 290fb944440ee316acae9a6b20c21f9221545ed1 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 12:49:57 -0500 Subject: [PATCH 176/225] limit memory usage during initial sync --- swo/swoinfo/table.go | 25 ++++++++++- swo/swoinfo/table_test.go | 22 ++++++++++ swo/swosync/initialsync.go | 85 ++++++++++++++++++++++++++------------ swo/swosync/tablesync.go | 25 +---------- 4 files changed, 105 insertions(+), 52 deletions(-) create mode 100644 swo/swoinfo/table_test.go diff --git a/swo/swoinfo/table.go b/swo/swoinfo/table.go index 0503b9a37e..7e6a58a854 100644 --- a/swo/swoinfo/table.go +++ b/swo/swoinfo/table.go @@ -1,6 +1,12 @@ package swoinfo -import "github.com/target/goalert/swo/swodb" +import ( + "fmt" + "strings" + + "github.com/target/goalert/swo/swodb" + "github.com/target/goalert/util/sqlutil" +) type Table struct { name string @@ -21,3 +27,20 @@ func (t Table) Columns() []string { } return cols } + +func (t Table) InsertJSONRowsQuery(upsert bool) string { + query := fmt.Sprintf("insert into %s select * from json_populate_recordset(null::%s, $1)", sqlutil.QuoteID(t.Name()), sqlutil.QuoteID(t.Name())) + if !upsert { + return query + } + + sets := make([]string, 0, len(t.cols)) + for _, col := range t.Columns() { + if col == "id" { + continue + } + sets = append(sets, fmt.Sprintf("%s = excluded.%s", sqlutil.QuoteID(col), sqlutil.QuoteID(col))) + } + + return query + " on conflict (id) do update set " + strings.Join(sets, ", ") +} diff --git a/swo/swoinfo/table_test.go b/swo/swoinfo/table_test.go new file mode 100644 index 0000000000..3b2e6f5015 --- /dev/null +++ b/swo/swoinfo/table_test.go @@ -0,0 +1,22 @@ +package swoinfo + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTable_InsertJSONRowsQuery(t *testing.T) { + tbl := Table{ + name: "test", + cols: []column{ + {ColumnName: "id"}, + {ColumnName: "foo"}, + {ColumnName: "bar"}, + }, + } + query := tbl.InsertJSONRowsQuery(false) + assert.Equal(t, `insert into "test" select * from json_populate_recordset(null::"test", $1)`, query) + query = tbl.InsertJSONRowsQuery(true) + assert.Equal(t, `insert into "test" select * from json_populate_recordset(null::"test", $1) on conflict (id) do update set "foo" = excluded."foo", "bar" = excluded."bar"`, query) +} diff --git a/swo/swosync/initialsync.go b/swo/swosync/initialsync.go index a6dfa05618..ec524d5195 100644 --- a/swo/swosync/initialsync.go +++ b/swo/swosync/initialsync.go @@ -10,13 +10,7 @@ import ( "github.com/target/goalert/util/sqlutil" ) -func insertRowsQuery(table swoinfo.Table) string { - return fmt.Sprintf(` - insert into %s - select * from - json_populate_recordset(null::%s, $1) - `, sqlutil.QuoteID(table.Name()), sqlutil.QuoteID(table.Name())) -} +const maxBatchSize = 1024 * 1024 // 1MB // InitialSync will insert all rows from the source database into the destination database. // @@ -87,37 +81,74 @@ func (l *LogicalReplicator) initialSyncTable(ctx context.Context, srcTx, dstTx p } defer rows.Close() - insertSQL := insertRowsQuery(table) + insertSQL := table.InsertJSONRowsQuery(false) + + doneCh := make(chan error) + rowCh := make(chan json.RawMessage) + go func() { + var inserted int + var dataSize int + var batch []json.RawMessage + sendLoop: + for { + var row json.RawMessage + select { + case row = <-rowCh: + if row == nil { + break sendLoop + } + case <-ctx.Done(): + return + } + batch = append(batch, row) + dataSize += len(row) + if dataSize < maxBatchSize { + continue + } + + l.printf(ctx, "sync %s: %d/%d", table.Name(), inserted, count) + _, err := dstTx.Exec(ctx, insertSQL, batch) + if err != nil { + doneCh <- fmt.Errorf("insert: %w", err) + return + } + + inserted += len(batch) + dataSize = 0 + batch = batch[:0] + } + + if len(batch) > 0 { + l.printf(ctx, "sync %s: %d/%d", table.Name(), inserted, count) + _, err := dstTx.Exec(ctx, insertSQL, batch) + if err != nil { + doneCh <- fmt.Errorf("insert: %w", err) + return + } + } + + doneCh <- nil + }() - var insertRows []json.RawMessage - var inserted int for rows.Next() { var id string var rowData json.RawMessage if err := rows.Scan(&id, &rowData); err != nil { return 0, fmt.Errorf("scan: %w", err) } - insertRows = append(insertRows, rowData) - - if len(insertRows) < 10000 { + select { + case <-ctx.Done(): + return 0, ctx.Err() + case rowCh <- rowData: continue + case err := <-doneCh: + return 0, err } - - l.printf(ctx, "sync %s: %d/%d", table.Name(), inserted, count) - _, err := dstTx.Exec(ctx, insertSQL, insertRows) - if err != nil { - return 0, fmt.Errorf("insert: %w", err) - } - inserted += len(insertRows) - insertRows = insertRows[:0] } - if len(insertRows) > 0 { - _, err := dstTx.Exec(ctx, insertSQL, insertRows) - if err != nil { - return 0, fmt.Errorf("insert: %w", err) - } + close(rowCh) + if err := <-doneCh; err != nil { + return 0, err } - return count, nil } diff --git a/swo/swosync/tablesync.go b/swo/swosync/tablesync.go index b62cbed72e..4d96de1466 100644 --- a/swo/swosync/tablesync.go +++ b/swo/swosync/tablesync.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "strconv" - "strings" "github.com/jackc/pgx/v4" "github.com/target/goalert/swo/swoinfo" @@ -192,7 +191,7 @@ func (c *TableSync) AddBatchWrites(b *pgx.Batch) { continue } - b.Queue(upsertRowsQuery(t), p.upserts) + b.Queue(t.InsertJSONRowsQuery(true), p.upserts) } for i := range c.tables { @@ -206,25 +205,3 @@ func (c *TableSync) AddBatchWrites(b *pgx.Batch) { b.Queue(fmt.Sprintf(`delete from %s where id%s = any($1)`, sqlutil.QuoteID(t.Name()), cast), arg) } } - -func upsertRowsQuery(t swoinfo.Table) string { - var s strings.Builder - fmt.Fprintf(&s, ` - insert into %s - select * from json_populate_recordset(null::%s, $1) - on conflict (id) do update - set - `, sqlutil.QuoteID(t.Name()), sqlutil.QuoteID(t.Name())) - for i, col := range t.Columns() { - if col == "id" { - continue - } - - if i > 0 { - fmt.Fprintf(&s, ", ") - } - fmt.Fprintf(&s, "%s = EXCLUDED.%s", sqlutil.QuoteID(col), sqlutil.QuoteID(col)) - } - - return s.String() -} From 123841cf128f62c48e606602c1f65f0715817967 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 13:10:20 -0500 Subject: [PATCH 177/225] allow scaling regendb --- Makefile | 4 +++- devtools/resetdb/datagen.go | 24 ++++++++++++++++++++++-- devtools/resetdb/main.go | 11 +++++++---- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 8b61248ee4..c9a495db8d 100644 --- a/Makefile +++ b/Makefile @@ -22,6 +22,7 @@ export GOOS = $(shell go env GOOS) export GOALERT_DB_URL_NEXT = $(DB_URL_NEXT) PROD_CY_PROC = Procfile.cypress.prod +SIZE:=1 PUBLIC_URL := http://localhost:3030$(HTTP_PREFIX) export GOALERT_PUBLIC_URL := $(PUBLIC_URL) @@ -209,11 +210,12 @@ postgres: bin/waitfor -e POSTGRES_USER=goalert \ -e POSTGRES_HOST_AUTH_METHOD=trust \ --name goalert-postgres \ + --shm-size 1g \ -p 5432:5432 \ docker.io/library/postgres:13-alpine && ./bin/waitfor "$(DB_URL)" && make regendb) || ($(CONTAINER_TOOL) start goalert-postgres && ./bin/waitfor "$(DB_URL)") regendb: bin/resetdb bin/goalert config.json.bak - ./bin/resetdb -with-rand-data -admin-id=00000000-0000-0000-0000-000000000001 + ./bin/resetdb -with-rand-data -admin-id=00000000-0000-0000-0000-000000000001 -mult $(SIZE) test -f config.json.bak && bin/goalert set-config --allow-empty-data-encryption-key "--db-url=$(DB_URL)" Date: Thu, 18 Aug 2022 13:10:33 -0500 Subject: [PATCH 178/225] add WHERE to upsert --- swo/swoinfo/table.go | 2 +- swo/swoinfo/table_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/swo/swoinfo/table.go b/swo/swoinfo/table.go index 7e6a58a854..c3db5a907f 100644 --- a/swo/swoinfo/table.go +++ b/swo/swoinfo/table.go @@ -42,5 +42,5 @@ func (t Table) InsertJSONRowsQuery(upsert bool) string { sets = append(sets, fmt.Sprintf("%s = excluded.%s", sqlutil.QuoteID(col), sqlutil.QuoteID(col))) } - return query + " on conflict (id) do update set " + strings.Join(sets, ", ") + return query + " on conflict (id) do update set " + strings.Join(sets, ", ") + " where id = excluded.id" } diff --git a/swo/swoinfo/table_test.go b/swo/swoinfo/table_test.go index 3b2e6f5015..674c2f1b7d 100644 --- a/swo/swoinfo/table_test.go +++ b/swo/swoinfo/table_test.go @@ -18,5 +18,5 @@ func TestTable_InsertJSONRowsQuery(t *testing.T) { query := tbl.InsertJSONRowsQuery(false) assert.Equal(t, `insert into "test" select * from json_populate_recordset(null::"test", $1)`, query) query = tbl.InsertJSONRowsQuery(true) - assert.Equal(t, `insert into "test" select * from json_populate_recordset(null::"test", $1) on conflict (id) do update set "foo" = excluded."foo", "bar" = excluded."bar"`, query) + assert.Equal(t, `insert into "test" select * from json_populate_recordset(null::"test", $1) on conflict (id) do update set "foo" = excluded."foo", "bar" = excluded."bar" where id = excluded.id`, query) } From 1711376a6883a7e07e89cb40a48232ea236c22f4 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 13:53:45 -0500 Subject: [PATCH 179/225] if ambiguous col name --- swo/swoinfo/table.go | 2 +- swo/swoinfo/table_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/swo/swoinfo/table.go b/swo/swoinfo/table.go index c3db5a907f..0e8c8f1fa1 100644 --- a/swo/swoinfo/table.go +++ b/swo/swoinfo/table.go @@ -42,5 +42,5 @@ func (t Table) InsertJSONRowsQuery(upsert bool) string { sets = append(sets, fmt.Sprintf("%s = excluded.%s", sqlutil.QuoteID(col), sqlutil.QuoteID(col))) } - return query + " on conflict (id) do update set " + strings.Join(sets, ", ") + " where id = excluded.id" + return fmt.Sprintf("%s on conflict (id) do update set %s where %s.id = excluded.id", query, strings.Join(sets, ", "), sqlutil.QuoteID(t.Name())) } diff --git a/swo/swoinfo/table_test.go b/swo/swoinfo/table_test.go index 674c2f1b7d..38398d87d1 100644 --- a/swo/swoinfo/table_test.go +++ b/swo/swoinfo/table_test.go @@ -18,5 +18,5 @@ func TestTable_InsertJSONRowsQuery(t *testing.T) { query := tbl.InsertJSONRowsQuery(false) assert.Equal(t, `insert into "test" select * from json_populate_recordset(null::"test", $1)`, query) query = tbl.InsertJSONRowsQuery(true) - assert.Equal(t, `insert into "test" select * from json_populate_recordset(null::"test", $1) on conflict (id) do update set "foo" = excluded."foo", "bar" = excluded."bar" where id = excluded.id`, query) + assert.Equal(t, `insert into "test" select * from json_populate_recordset(null::"test", $1) on conflict (id) do update set "foo" = excluded."foo", "bar" = excluded."bar" where "test".id = excluded.id`, query) } From 2bfdaa2743fad60b7d8f89f1410a023748421fa4 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 13:54:04 -0500 Subject: [PATCH 180/225] solve duplicate row error --- swo/swosync/tablesync.go | 65 +++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/swo/swosync/tablesync.go b/swo/swosync/tablesync.go index 4d96de1466..ea29c65b97 100644 --- a/swo/swosync/tablesync.go +++ b/swo/swosync/tablesync.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "sort" "strconv" "github.com/jackc/pgx/v4" @@ -16,10 +17,10 @@ import ( type TableSync struct { tables []swoinfo.Table - changes []changeEntry - changedTables []string + changedRowIDs map[string][]string changedData map[changeID]json.RawMessage + changeLogIDs []int } type changeEntry struct { @@ -31,8 +32,9 @@ type changeID struct{ Table, Row string } // NewTableSync creates a new TableSync for the given tables. func NewTableSync(tables []swoinfo.Table) *TableSync { return &TableSync{ - tables: tables, - changedData: make(map[changeID]json.RawMessage), + tables: tables, + changedData: make(map[changeID]json.RawMessage), + changedRowIDs: make(map[string][]string), } } @@ -56,15 +58,16 @@ func (c *TableSync) ScanBatchChangeRead(res pgx.BatchResults) error { if err := rows.Scan(&id, &table, &rowID); err != nil { return err } - - c.changes = append(c.changes, changeEntry{id: id, changeID: changeID{table, rowID}}) + c.changeLogIDs = append(c.changeLogIDs, int(id)) + c.changedData[changeID{table, rowID}] = nil // mark as changed + c.changedRowIDs[table] = append(c.changedRowIDs[table], rowID) } return rows.Err() } // HasChanges returns true after ScanBatchChangeRead has been called, if there are changes. -func (c *TableSync) HasChanges() bool { return len(c.changes) > 0 } +func (c *TableSync) HasChanges() bool { return len(c.changeLogIDs) > 0 } func intIDs(ids []string) []int { var ints []int @@ -80,13 +83,8 @@ func intIDs(ids []string) []int { // AddBatchRowReads adds a query to the batch to read all changed rows from the source database. func (c *TableSync) AddBatchRowReads(b *pgx.Batch) { - rowIDsByTable := make(map[string][]string) - for _, chg := range c.changes { - rowIDsByTable[chg.Table] = append(rowIDsByTable[chg.Table], chg.Row) - } - for _, table := range c.tables { - rowIDs := rowIDsByTable[table.Name()] + rowIDs := unique(c.changedRowIDs[table.Name()]) if len(rowIDs) == 0 { continue } @@ -97,6 +95,21 @@ func (c *TableSync) AddBatchRowReads(b *pgx.Batch) { } } +func unique(ids []string) []string { + sort.Strings(ids) + + uniq := ids[:0] + var last string + for _, id := range ids { + if id == last { + continue + } + uniq = append(uniq, id) + last = id + } + return uniq +} + func castIDs(t swoinfo.Table, rowIDs []string) (interface{}, string) { var cast string switch t.IDType() { @@ -146,20 +159,16 @@ func (c *TableSync) ScanBatchRowReads(res pgx.BatchResults) error { // ExecDeleteChanges executes a query to deleted the change_log entries from the source database. func (c *TableSync) ExecDeleteChanges(ctx context.Context, srcConn *pgx.Conn) (int64, error) { - if len(c.changes) == 0 { + if len(c.changeLogIDs) == 0 { return 0, nil } - var ids []int - for _, chg := range c.changes { - ids = append(ids, int(chg.id)) - } - _, err := srcConn.Exec(ctx, `delete from change_log where id = any($1)`, sqlutil.IntArray(ids)) + _, err := srcConn.Exec(ctx, `delete from change_log where id = any($1)`, sqlutil.IntArray(c.changeLogIDs)) if err != nil { - return 0, fmt.Errorf("delete %d change log rows: %w", len(ids), err) + return 0, fmt.Errorf("delete %d change log rows: %w", len(c.changeLogIDs), err) } - return int64(len(ids)), nil + return int64(len(c.changeLogIDs)), nil } func (c *TableSync) AddBatchWrites(b *pgx.Batch) { @@ -168,20 +177,20 @@ func (c *TableSync) AddBatchWrites(b *pgx.Batch) { deletes []string } pendingByTable := make(map[string]*pending) - for _, chg := range c.changes { - p := pendingByTable[chg.Table] + for id, data := range c.changedData { + p := pendingByTable[id.Table] if p == nil { p = &pending{} - pendingByTable[chg.Table] = p + pendingByTable[id.Table] = p } - newRowData := c.changedData[chg.changeID] - if newRowData == nil { + + if data == nil { // row was deleted - p.deletes = append(p.deletes, chg.Row) + p.deletes = append(p.deletes, id.Row) continue } - p.upserts = append(p.upserts, newRowData) + p.upserts = append(p.upserts, data) } // insert, then update, then reverse delete From 4768c25e00c9d60a4728c2c7fd30bb44264844ac Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 18 Aug 2022 14:41:08 -0500 Subject: [PATCH 181/225] sort oncall rows --- swo/swosync/tablesync.go | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/swo/swosync/tablesync.go b/swo/swosync/tablesync.go index ea29c65b97..d90c8f49e8 100644 --- a/swo/swosync/tablesync.go +++ b/swo/swosync/tablesync.go @@ -7,6 +7,7 @@ import ( "fmt" "sort" "strconv" + "time" "github.com/jackc/pgx/v4" "github.com/target/goalert/swo/swoinfo" @@ -199,7 +200,11 @@ func (c *TableSync) AddBatchWrites(b *pgx.Batch) { if p == nil || len(p.upserts) == 0 { continue } - + switch t.Name() { + case "ep_step_on_call_users", "schedule_on_call_users": + // due to unique constraint on shifts, we need to sort shift ends before new shifts + sortOnCallData(p.upserts) + } b.Queue(t.InsertJSONRowsQuery(true), p.upserts) } @@ -214,3 +219,20 @@ func (c *TableSync) AddBatchWrites(b *pgx.Batch) { b.Queue(fmt.Sprintf(`delete from %s where id%s = any($1)`, sqlutil.QuoteID(t.Name()), cast), arg) } } + +// sort entries with a non-nil end time before entries with a nil end time +func sortOnCallData(data []json.RawMessage) { + type onCallData struct { + End *time.Time `json:"end_time"` + } + sort.Slice(data, func(i, j int) bool { + var a, b onCallData + if err := json.Unmarshal(data[i], &a); err != nil { + panic(err) + } + if err := json.Unmarshal(data[j], &b); err != nil { + panic(err) + } + return a.End != nil && b.End == nil + }) +} From 1973561a5c35e44259e33a6453c75a71ad7422c5 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 24 Aug 2022 10:13:09 -0500 Subject: [PATCH 182/225] fix mod --- go.mod | 1 - 1 file changed, 1 deletion(-) diff --git a/go.mod b/go.mod index fd67c03581..d5cb046d67 100644 --- a/go.mod +++ b/go.mod @@ -109,7 +109,6 @@ require ( github.com/jhump/protoreflect v1.12.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect - github.com/kffl/speedbump v0.2.0 github.com/kyleconroy/sqlc v1.14.0 github.com/magiconair/properties v1.8.6 // indirect github.com/mailhog/MailHog-UI v1.0.1 // indirect From 67dd47f1d69a9ab8502753a0ea9e481254f11a83 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 20 Sep 2022 17:05:51 -0500 Subject: [PATCH 183/225] rename init method --- app/startup.go | 2 +- swo/manager.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/startup.go b/app/startup.go index d660fc4dd6..90dbdb1b87 100644 --- a/app/startup.go +++ b/app/startup.go @@ -82,7 +82,7 @@ func (app *App) startup(ctx context.Context) error { } if app.cfg.SWO != nil { - app.cfg.SWO.Init(app) + app.cfg.SWO.SetPauseResumer(app) log.Logf(app.LogBackgroundContext(), "SWO Enabled.") } diff --git a/swo/manager.go b/swo/manager.go index 9201836dd2..6ce532796e 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -133,7 +133,7 @@ func NewManager(cfg Config) (*Manager, error) { return m, nil } -func (m *Manager) Init(app lifecycle.PauseResumer) { +func (m *Manager) SetPauseResumer(app lifecycle.PauseResumer) { if m.pauseResume != nil { panic("already set") } From 0c81e4df236e96d30642c8a75602bb016559c859 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 22 Sep 2022 09:25:25 -0500 Subject: [PATCH 184/225] use q as receiver --- graphql2/graphqlapp/swo.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index 386ea7e2ae..40eefcde2b 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -39,8 +39,8 @@ func (m *Mutation) SwoAction(ctx context.Context, action graphql2.SWOAction) (bo var swoRx = regexp.MustCompile(`^GoAlert ([^ ]+)(?: SWO:([A-D]):(.{24}))?$`) -func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { - if a.SWO == nil { +func (q *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { + if q.SWO == nil { return nil, validation.NewGenericError("not in SWO mode") } @@ -49,7 +49,7 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { return nil, err } - conns, err := a.SWO.ConnInfo(ctx) + conns, err := q.SWO.ConnInfo(ctx) if err != nil { return nil, err } @@ -83,7 +83,7 @@ func (a *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { }) } - s := a.SWO.Status() + s := q.SWO.Status() validateNodes: for _, node := range s.Nodes { n := nodes[node.ID.String()] From adbea677479678b2ca6f4b3d40593257f603b8da Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 22 Sep 2022 09:27:30 -0500 Subject: [PATCH 185/225] remove unused type --- swo/swosync/tablesync.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/swo/swosync/tablesync.go b/swo/swosync/tablesync.go index d90c8f49e8..b653c2a67a 100644 --- a/swo/swosync/tablesync.go +++ b/swo/swosync/tablesync.go @@ -24,10 +24,6 @@ type TableSync struct { changeLogIDs []int } -type changeEntry struct { - id int64 - changeID -} type changeID struct{ Table, Row string } // NewTableSync creates a new TableSync for the given tables. From c2b64f6086041bd287be442539a13c511d228820 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 22 Sep 2022 09:35:39 -0500 Subject: [PATCH 186/225] add description for uptime field --- graphql2/generated.go | 10 ++++++++-- graphql2/graphqlapp/swo.go | 3 +-- graphql2/models_gen.go | 9 +++++---- graphql2/schema.graphql | 5 ++++- web/src/schema.d.ts | 2 +- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/graphql2/generated.go b/graphql2/generated.go index 4ad244f219..df08df4b6a 100644 --- a/graphql2/generated.go +++ b/graphql2/generated.go @@ -16740,11 +16740,14 @@ func (ec *executionContext) _SWONode_uptime(ctx context.Context, field graphql.C return graphql.Null } if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } return graphql.Null } - res := resTmp.(*string) + res := resTmp.(string) fc.Result = res - return ec.marshalOString2ᚖstring(ctx, field.Selections, res) + return ec.marshalNString2string(ctx, field.Selections, res) } func (ec *executionContext) fieldContext_SWONode_uptime(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { @@ -30423,6 +30426,9 @@ func (ec *executionContext) _SWONode(ctx context.Context, sel ast.SelectionSet, out.Values[i] = ec._SWONode_uptime(ctx, field, obj) + if out.Values[i] == graphql.Null { + invalids++ + } case "configError": out.Values[i] = ec._SWONode_configError(ctx, field, obj) diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index 40eefcde2b..cca557f899 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -94,8 +94,7 @@ validateNodes: n.IsLeader = node.ID == s.LeaderID n.CanExec = node.CanExec - up := time.Since(node.StartedAt).Truncate(time.Second).String() - n.Uptime = &up + n.Uptime = time.Since(node.StartedAt).Truncate(time.Second).String() if node.NewID != s.NextDBID { n.ConfigError = "next-db-url is invalid" diff --git a/graphql2/models_gen.go b/graphql2/models_gen.go index 78c32dcda5..2f8809cc18 100644 --- a/graphql2/models_gen.go +++ b/graphql2/models_gen.go @@ -349,10 +349,11 @@ type SWOConnection struct { } type SWONode struct { - ID string `json:"id"` - CanExec bool `json:"canExec"` - IsLeader bool `json:"isLeader"` - Uptime *string `json:"uptime"` + ID string `json:"id"` + CanExec bool `json:"canExec"` + IsLeader bool `json:"isLeader"` + // The uptime of the node in seconds. Empty if the node/connection is *not* a GoAlert instance in SWO mode. + Uptime string `json:"uptime"` ConfigError string `json:"configError"` Connections []SWOConnection `json:"connections"` } diff --git a/graphql2/schema.graphql b/graphql2/schema.graphql index 833ff9677f..63b216924d 100644 --- a/graphql2/schema.graphql +++ b/graphql2/schema.graphql @@ -142,7 +142,10 @@ type SWONode { canExec: Boolean! isLeader: Boolean! - uptime: String + """ + The uptime of the node in seconds. Empty if the node/connection is *not* a GoAlert instance in SWO mode. + """ + uptime: String! configError: String! connections: [SWOConnection!] diff --git a/web/src/schema.d.ts b/web/src/schema.d.ts index a967b454d9..e5776c8743 100644 --- a/web/src/schema.d.ts +++ b/web/src/schema.d.ts @@ -60,7 +60,7 @@ export interface SWONode { id: string canExec: boolean isLeader: boolean - uptime?: null | string + uptime: string configError: string connections?: null | SWOConnection[] } From 8ec67e22860fdd5d8277d631f5bc85ad3ac03625 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 22 Sep 2022 10:02:04 -0500 Subject: [PATCH 187/225] add conn info and tests --- swo/conninfo.go | 85 ++++++++++++++++++++++++++++++++++++++++++++ swo/conninfo_test.go | 28 +++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 swo/conninfo.go create mode 100644 swo/conninfo_test.go diff --git a/swo/conninfo.go b/swo/conninfo.go new file mode 100644 index 0000000000..148a62a276 --- /dev/null +++ b/swo/conninfo.go @@ -0,0 +1,85 @@ +package swo + +import ( + "encoding/base64" + "fmt" + "strings" + + "github.com/google/uuid" +) + +// ConnInfo contains information about a connection to the DB for SWO. +type ConnInfo struct { + Version string + Type ConnType + ID uuid.UUID +} + +type ConnType byte + +const ( + ConnTypeMainMgr ConnType = iota + 'A' + ConnTypeMainApp + ConnTypeNextMgr + ConnTypeNextApp +) + +// IsValid returns true if the ConnType is valid. +func (t ConnType) IsValid() bool { + return t >= ConnTypeMainMgr && t <= ConnTypeNextApp +} + +// String returns a string representation of the ConnInfo. +func (c ConnInfo) String() string { + // ensure c.Version is <= 24 characters + if len(c.Version) > 24 { + c.Version = c.Version[:24] + } + + if !c.Type.IsValid() { + panic(fmt.Sprintf("invalid connection type: 0x%0x", c.Type)) + } + + id := base64.RawURLEncoding.EncodeToString(c.ID[:]) + return fmt.Sprintf("GoAlert %s SWO:%c:%s", c.Version, c.Type, id) +} + +// ParseConnInfo parses a connection string into a ConnInfo. +func ParseConnInfo(s string) (*ConnInfo, error) { + if !strings.HasPrefix(s, "GoAlert ") { + return nil, fmt.Errorf("missing 'GoAlert' prefix: %q", s) + } + s = strings.TrimPrefix(s, "GoAlert ") + + parts := strings.Split(s, ":") + if len(parts) != 3 { + return nil, fmt.Errorf("incorrect number of segments: %q", s) + } + + if !strings.HasSuffix(parts[0], " SWO") { + return nil, fmt.Errorf("missing 'SWO' suffix: %q", s) + } + parts[0] = strings.TrimSuffix(parts[0], " SWO") + + var info ConnInfo + info.Version = parts[0] + + if len(parts[1]) != 1 { + return nil, fmt.Errorf("invalid connection type: %q", s) + } + info.Type = ConnType(parts[1][0]) + if !info.Type.IsValid() { + return nil, fmt.Errorf("invalid connection type: %q", s) + } + + id, err := base64.RawURLEncoding.DecodeString(parts[2]) + if err != nil { + return nil, fmt.Errorf("invalid ID '%s': %w", parts[2], err) + } + if len(id) != 16 { + return nil, fmt.Errorf("invalid ID '%s': incorrect length", parts[2]) + } + copy(info.ID[:], id) + + return &info, nil +} diff --git a/swo/conninfo_test.go b/swo/conninfo_test.go new file mode 100644 index 0000000000..217b09d5ac --- /dev/null +++ b/swo/conninfo_test.go @@ -0,0 +1,28 @@ +package swo + +import ( + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" +) + +func TestConnInfo(t *testing.T) { + info := &ConnInfo{ + Version: "v0.31.0", + Type: ConnTypeMainMgr, + ID: uuid.Nil, + } + + assert.Equal(t, "GoAlert v0.31.0 SWO:A:AAAAAAAAAAAAAAAAAAAAAA", info.String()) + + _, err := ParseConnInfo("GoAlert 1.0.0 SWO:0:AAAAAAAAAAAAAAAAAAAAAA") + assert.ErrorContains(t, err, "invalid connection type") + + info, err = ParseConnInfo("GoAlert 1.0.0 SWO:A:AAAAAAAAAAAAAAAAAAAAAA") + assert.NoError(t, err) + assert.Equal(t, ConnTypeMainMgr, info.Type) + assert.Equal(t, "1.0.0", info.Version) + assert.Equal(t, "GoAlert 1.0.0 SWO:A:AAAAAAAAAAAAAAAAAAAAAA", info.String()) + assert.Equal(t, "00000000-0000-0000-0000-000000000000", info.ID.String()) +} From b3bff55e3ab458232dfb297e7534812201ab9be6 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 22 Sep 2022 10:06:12 -0500 Subject: [PATCH 188/225] use ConnInfo for connection strings --- graphql2/graphqlapp/swo.go | 22 +++++++--------------- swo/manager.go | 23 ++++++++++------------- 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index cca557f899..b5c46c1894 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -2,15 +2,13 @@ package graphqlapp import ( "context" - "encoding/base64" "fmt" - "regexp" "sort" "time" - "github.com/google/uuid" "github.com/target/goalert/graphql2" "github.com/target/goalert/permission" + "github.com/target/goalert/swo" "github.com/target/goalert/swo/swogrp" "github.com/target/goalert/validation" ) @@ -37,8 +35,6 @@ func (m *Mutation) SwoAction(ctx context.Context, action graphql2.SWOAction) (bo return err == nil, err } -var swoRx = regexp.MustCompile(`^GoAlert ([^ ]+)(?: SWO:([A-D]):(.{24}))?$`) - func (q *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { if q.SWO == nil { return nil, validation.NewGenericError("not in SWO mode") @@ -56,19 +52,15 @@ func (q *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { nodes := make(map[string]*graphql2.SWONode) for _, conn := range conns { - m := swoRx.FindStringSubmatch(conn.Name) var connType, version string idStr := "unknown-" + conn.Name - if len(m) == 4 { - version = m[1] - connType = m[2] - id, err := base64.URLEncoding.DecodeString(m[3]) - if err == nil && len(id) == 16 { - var u uuid.UUID - copy(u[:], id) - idStr = u.String() - } + info, _ := swo.ParseConnInfo(conn.Name) + if info != nil { + version = info.Version + connType = string(info.Type) + idStr = info.ID.String() } + n := nodes[idStr] if n == nil { n = &graphql2.SWONode{ID: idStr} diff --git a/swo/manager.go b/swo/manager.go index 6ce532796e..82727439ec 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -3,7 +3,6 @@ package swo import ( "context" "database/sql" - "encoding/base64" "fmt" "github.com/google/uuid" @@ -56,29 +55,27 @@ type Config struct { func NewManager(cfg Config) (*Manager, error) { id := uuid.New() - appStr := func(typ byte) string { - vers := version.GitVersion() - id := base64.URLEncoding.EncodeToString(id[:]) - if len(vers) > 24 { - vers = vers[:24] - } - - return fmt.Sprintf("GoAlert %s SWO:%c:%s", vers, typ, id) + appStr := func(typ ConnType) string { + return ConnInfo{ + Version: version.GitVersion(), + ID: id, + Type: typ, + }.String() } - mainDB, err := sqldrv.NewDB(cfg.OldDBURL, appStr('A')) + mainDB, err := sqldrv.NewDB(cfg.OldDBURL, appStr(ConnTypeMainMgr)) if err != nil { return nil, fmt.Errorf("connect to old db: %w", err) } - mainAppDBC, err := sqldrv.NewConnector(cfg.OldDBURL, appStr('B')) + mainAppDBC, err := sqldrv.NewConnector(cfg.OldDBURL, appStr(ConnTypeMainApp)) if err != nil { return nil, fmt.Errorf("connect to old db: %w", err) } - nextDB, err := sqldrv.NewDB(cfg.NewDBURL, appStr('C')) + nextDB, err := sqldrv.NewDB(cfg.NewDBURL, appStr(ConnTypeNextMgr)) if err != nil { return nil, fmt.Errorf("connect to new db: %w", err) } - nextAppDBC, err := sqldrv.NewConnector(cfg.NewDBURL, appStr('D')) + nextAppDBC, err := sqldrv.NewConnector(cfg.NewDBURL, appStr(ConnTypeNextApp)) if err != nil { return nil, fmt.Errorf("connect to new db: %w", err) } From b46cd12f8d8b255b2139ac675b08af3271b39b7f Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 26 Sep 2022 14:38:51 -0500 Subject: [PATCH 189/225] add IsNext to ConnType --- swo/conninfo.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/swo/conninfo.go b/swo/conninfo.go index 148a62a276..6b2ffca7bf 100644 --- a/swo/conninfo.go +++ b/swo/conninfo.go @@ -24,6 +24,11 @@ const ( ConnTypeNextApp ) +// IsNext returns true if the connection is for the next DB. +func (t ConnType) IsNext() bool { + return t == ConnTypeNextMgr || t == ConnTypeNextApp +} + // IsValid returns true if the ConnType is valid. func (t ConnType) IsValid() bool { return t >= ConnTypeMainMgr && t <= ConnTypeNextApp From ae873c39887fb439616bb976b57f925e57fcf42c Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 26 Sep 2022 15:13:25 -0500 Subject: [PATCH 190/225] break out swo gql logic and test --- graphql2/graphqlapp/swo.go | 177 +++++++++++++++++++------------- graphql2/graphqlapp/swo_test.go | 158 ++++++++++++++++++++++++++++ swo/conninfo_test.go | 10 +- 3 files changed, 271 insertions(+), 74 deletions(-) create mode 100644 graphql2/graphqlapp/swo_test.go diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index b5c46c1894..ef28a20900 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -10,6 +10,7 @@ import ( "github.com/target/goalert/permission" "github.com/target/goalert/swo" "github.com/target/goalert/swo/swogrp" + "github.com/target/goalert/swo/swoinfo" "github.com/target/goalert/validation" ) @@ -35,48 +36,100 @@ func (m *Mutation) SwoAction(ctx context.Context, action graphql2.SWOAction) (bo return err == nil, err } -func (q *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { - if q.SWO == nil { - return nil, validation.NewGenericError("not in SWO mode") +func validateSWOGrpNode(s swo.Status, node swogrp.Node) error { + if node.NewID != s.NextDBID { + return fmt.Errorf("next-db-url is invalid") + } + if node.OldID != s.MainDBID { + return fmt.Errorf("db-url is invalid") } - err := permission.LimitCheckAny(ctx, permission.Admin) + return nil +} + +func gqlSWOConnFromConnName(countInfo swoinfo.ConnCount) (nodeID string, conn graphql2.SWOConnection) { + var connType, version string + idStr := "unknown-" + countInfo.Name + info, err := swo.ParseConnInfo(countInfo.Name) if err != nil { - return nil, err + fmt.Println("invalid connection name:", countInfo.Name) + } + if info != nil { + version = info.Version + connType = string(info.Type) + idStr = info.ID.String() + fmt.Println("idStr", idStr) } - conns, err := q.SWO.ConnInfo(ctx) - if err != nil { - return nil, err + return idStr, graphql2.SWOConnection{ + Name: countInfo.Name, + IsNext: countInfo.IsNext, + Version: version, + Type: string(connType), + Count: countInfo.Count, + } +} + +func validateNodeConnections(n graphql2.SWONode) error { + if len(n.Connections) == 0 { + return fmt.Errorf("node is not connected to any DB") } + version := n.Connections[0].Version + for _, conn := range n.Connections { + if conn.Version != version { + return fmt.Errorf("node has multiple versions: %s and %s", version, conn.Version) + } + + if len(conn.Type) != 1 { + return fmt.Errorf("invalid connection type: %s", conn.Type) + } + + if conn.IsNext != swo.ConnType(conn.Type[0]).IsNext() { + return fmt.Errorf("node has invalid connection type: %s", conn.Type) + } + } + + return nil +} + +func gqlStateFromSWOState(st swogrp.ClusterState) (graphql2.SWOState, error) { + switch st { + case swogrp.ClusterStateUnknown: + return graphql2.SWOStateUnknown, nil + case swogrp.ClusterStateResetting: + return graphql2.SWOStateResetting, nil + case swogrp.ClusterStateIdle: + return graphql2.SWOStateIdle, nil + case swogrp.ClusterStateSyncing: + return graphql2.SWOStateSyncing, nil + case swogrp.ClusterStatePausing: + return graphql2.SWOStatePausing, nil + case swogrp.ClusterStateExecuting: + return graphql2.SWOStateExecuting, nil + case swogrp.ClusterStateDone: + return graphql2.SWOStateDone, nil + } + + return "", fmt.Errorf("invalid state: %d", st) +} + +func gqlSWOStatus(s swo.Status, conns []swoinfo.ConnCount) (*graphql2.SWOStatus, error) { + sort.Slice(conns, func(i, j int) bool { + return conns[i].Name < conns[j].Name + }) nodes := make(map[string]*graphql2.SWONode) for _, conn := range conns { - var connType, version string - idStr := "unknown-" + conn.Name - info, _ := swo.ParseConnInfo(conn.Name) - if info != nil { - version = info.Version - connType = string(info.Type) - idStr = info.ID.String() - } + idStr, c := gqlSWOConnFromConnName(conn) n := nodes[idStr] if n == nil { n = &graphql2.SWONode{ID: idStr} nodes[idStr] = n } - n.Connections = append(n.Connections, graphql2.SWOConnection{ - Name: conn.Name, - IsNext: conn.IsNext, - Version: version, - Type: string(connType), - Count: conn.Count, - }) + n.Connections = append(n.Connections, c) } - s := q.SWO.Status() -validateNodes: for _, node := range s.Nodes { n := nodes[node.ID.String()] if n == nil { @@ -85,58 +138,19 @@ validateNodes: } n.IsLeader = node.ID == s.LeaderID n.CanExec = node.CanExec - n.Uptime = time.Since(node.StartedAt).Truncate(time.Second).String() - if node.NewID != s.NextDBID { - n.ConfigError = "next-db-url is invalid" - continue - } - if node.OldID != s.MainDBID { - n.ConfigError = "db-url is invalid" + err := validateSWOGrpNode(s, node) + if err != nil { + n.ConfigError = err.Error() continue } - if len(n.Connections) == 0 { - n.ConfigError = "node is not connected to any DB" + err = validateNodeConnections(*n) + if err != nil { + n.ConfigError = err.Error() continue } - - version := n.Connections[0].Version - for _, conn := range n.Connections { - if conn.Version != version { - n.ConfigError = "node is connected with multiple versions of GoAlert" - continue validateNodes - } - if !conn.IsNext && (conn.Type != "A" && conn.Type != "B") { - n.ConfigError = fmt.Sprintf("connected to db-url (main) with invalid type %s (expected A or B)", conn.Type) - continue validateNodes - } - if conn.IsNext && (conn.Type != "C" && conn.Type != "D") { - n.ConfigError = fmt.Sprintf("connected to next-db-url (next) with invalid type %s (expected C or D)", conn.Type) - continue validateNodes - } - } - } - - var state graphql2.SWOState - switch s.State { - case swogrp.ClusterStateUnknown: - state = graphql2.SWOStateUnknown - case swogrp.ClusterStateResetting: - state = graphql2.SWOStateResetting - case swogrp.ClusterStateIdle: - state = graphql2.SWOStateIdle - case swogrp.ClusterStateSyncing: - state = graphql2.SWOStateSyncing - case swogrp.ClusterStatePausing: - state = graphql2.SWOStatePausing - case swogrp.ClusterStateExecuting: - state = graphql2.SWOStateExecuting - case swogrp.ClusterStateDone: - state = graphql2.SWOStateDone - default: - return nil, fmt.Errorf("unknown state: %d", s.State) } var nodeList []graphql2.SWONode @@ -147,6 +161,11 @@ validateNodes: return nodeList[i].ID < nodeList[j].ID }) + state, err := gqlStateFromSWOState(s.State) + if err != nil { + return nil, err + } + return &graphql2.SWOStatus{ State: state, @@ -158,3 +177,21 @@ validateNodes: MainDBVersion: s.MainDBVersion, }, nil } + +func (q *Query) SwoStatus(ctx context.Context) (*graphql2.SWOStatus, error) { + if q.SWO == nil { + return nil, validation.NewGenericError("not in SWO mode") + } + + err := permission.LimitCheckAny(ctx, permission.Admin) + if err != nil { + return nil, err + } + + conns, err := q.SWO.ConnInfo(ctx) + if err != nil { + return nil, err + } + + return gqlSWOStatus(q.SWO.Status(), conns) +} diff --git a/graphql2/graphqlapp/swo_test.go b/graphql2/graphqlapp/swo_test.go new file mode 100644 index 0000000000..41d63f4be3 --- /dev/null +++ b/graphql2/graphqlapp/swo_test.go @@ -0,0 +1,158 @@ +package graphqlapp + +import ( + "encoding/base64" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/target/goalert/graphql2" + "github.com/target/goalert/swo" + "github.com/target/goalert/swo/swogrp" + "github.com/target/goalert/swo/swoinfo" +) + +func Test_validateSWOGrpNode(t *testing.T) { + var s swo.Status + s.MainDBID = uuid.New() + s.NextDBID = uuid.New() + + // no error for valid node + err := validateSWOGrpNode(s, swogrp.Node{OldID: s.MainDBID, NewID: s.NextDBID}) + assert.NoError(t, err) + + // should return error for invalid OldID + err = validateSWOGrpNode(s, swogrp.Node{OldID: s.NextDBID, NewID: s.NextDBID}) + assert.Error(t, err) + + // should return error for invalid NewID + err = validateSWOGrpNode(s, swogrp.Node{OldID: s.MainDBID, NewID: s.MainDBID}) + assert.Error(t, err) + + // should return error for invalid OldID and NewID + err = validateSWOGrpNode(s, swogrp.Node{OldID: s.NextDBID, NewID: s.MainDBID}) + assert.Error(t, err) + + // should return error for empty OldID + err = validateSWOGrpNode(s, swogrp.Node{OldID: uuid.Nil, NewID: s.NextDBID}) + assert.Error(t, err) + + // should return error for empty NewID + err = validateSWOGrpNode(s, swogrp.Node{OldID: s.MainDBID, NewID: uuid.Nil}) + assert.Error(t, err) + + // should return error for empty OldID and NewID + err = validateSWOGrpNode(s, swogrp.Node{OldID: uuid.Nil, NewID: uuid.Nil}) + assert.Error(t, err) +} + +func Test_gqlSWOConnFromConnName(t *testing.T) { + // should return unknown-conn for unknown connection + nodeID, conn := gqlSWOConnFromConnName(swoinfo.ConnCount{Name: "foobar", Count: 1, IsNext: true}) + assert.Equal(t, "unknown-foobar", nodeID) + assert.Equal(t, graphql2.SWOConnection{Name: "foobar", Count: 1, IsNext: true}, conn) + + nodeID, conn = gqlSWOConnFromConnName(swoinfo.ConnCount{Name: "GoAlert v0.31.0 SWO:B:AAAAAAAAAAAAAAAAAAAAAA", Count: 1, IsNext: true}) + assert.Equal(t, "00000000-0000-0000-0000-000000000000", nodeID) // should return nodeID for valid connection + assert.Equal(t, graphql2.SWOConnection{ + Name: "GoAlert v0.31.0 SWO:B:AAAAAAAAAAAAAAAAAAAAAA", + Version: "v0.31.0", + Count: 1, + Type: "B", + IsNext: true, + }, conn) +} + +func Test_validateNodeConnections(t *testing.T) { + err := validateNodeConnections(graphql2.SWONode{}) + assert.Error(t, err) // no connections + + err = validateNodeConnections(graphql2.SWONode{Connections: []graphql2.SWOConnection{{Name: "foobar"}}}) + assert.Error(t, err) // invalid connection + + err = validateNodeConnections(graphql2.SWONode{Connections: []graphql2.SWOConnection{{Type: string(swo.ConnTypeMainApp), IsNext: true}}}) + assert.Error(t, err) // invalid connection (wrong DB) + + err = validateNodeConnections(graphql2.SWONode{Connections: []graphql2.SWOConnection{{Type: string(swo.ConnTypeNextApp), IsNext: true}}}) + assert.NoError(t, err) // valid connection + + // test mismatched connection versions + err = validateNodeConnections(graphql2.SWONode{Connections: []graphql2.SWOConnection{ + {Type: string(swo.ConnTypeNextApp), IsNext: true, Version: "v1.0.0"}, + {Type: string(swo.ConnTypeMainApp), IsNext: false, Version: "v1.0.1"}, + }}) + assert.Error(t, err) + + // matching versions + err = validateNodeConnections(graphql2.SWONode{Connections: []graphql2.SWOConnection{ + {Type: string(swo.ConnTypeNextApp), IsNext: true, Version: "v1.0.0"}, + {Type: string(swo.ConnTypeMainApp), IsNext: false, Version: "v1.0.0"}, + }}) + assert.NoError(t, err) +} + +func b64(id uuid.UUID) string { + return base64.RawURLEncoding.EncodeToString(id[:]) +} + +func Test_gqlSWOStatus(t *testing.T) { + node1ID := uuid.MustParse("11111111-1111-1111-1111-111111111111") + node2ID := uuid.MustParse("22222222-2222-2222-2222-222222222222") + mainDBID := uuid.New() + nextDBID := uuid.New() + + s := swo.Status{ + MainDBID: mainDBID, + NextDBID: nextDBID, + Status: swogrp.Status{ + State: swogrp.ClusterStateIdle, + LeaderID: node2ID, + Nodes: []swogrp.Node{ + {ID: node1ID, OldID: mainDBID, NewID: nextDBID, StartedAt: time.Now().Add(-time.Minute)}, + {ID: node2ID, CanExec: true, OldID: mainDBID, NewID: nextDBID, StartedAt: time.Now().Add(-2 * time.Minute)}, + }, + }, + MainDBVersion: "v1.0.0", // not realistic, but good enough for testing + NextDBVersion: "v2.0.0", + } + conns := []swoinfo.ConnCount{ + {Name: "GoAlert v0.31.0 SWO:D:" + b64(node1ID), Count: 1, IsNext: true}, + {Name: "GoAlert v0.31.0 SWO:C:" + b64(node1ID), Count: 1, IsNext: true}, + {Name: "GoAlert v0.31.0 SWO:A:" + b64(node2ID), Count: 1, IsNext: false}, + {Name: "foobar", Count: 1, IsNext: true}, + } + + gql, err := gqlSWOStatus(s, conns) + assert.NoError(t, err) + assert.Equal(t, &graphql2.SWOStatus{ + State: graphql2.SWOStateIdle, + MainDBVersion: "v1.0.0", + NextDBVersion: "v2.0.0", + Nodes: []graphql2.SWONode{ + { + ID: node1ID.String(), + Uptime: "1m0s", + Connections: []graphql2.SWOConnection{ + {Name: "GoAlert v0.31.0 SWO:C:" + b64(node1ID), Version: "v0.31.0", Count: 1, Type: "C", IsNext: true}, + {Name: "GoAlert v0.31.0 SWO:D:" + b64(node1ID), Version: "v0.31.0", Count: 1, Type: "D", IsNext: true}, + }, + }, + { + ID: node2ID.String(), + Uptime: "2m0s", + CanExec: true, + IsLeader: true, + Connections: []graphql2.SWOConnection{ + {Name: "GoAlert v0.31.0 SWO:A:" + b64(node2ID), Version: "v0.31.0", Count: 1, Type: "A", IsNext: false}, + }, + }, + { + ID: "unknown-foobar", + Connections: []graphql2.SWOConnection{ + {Name: "foobar", Count: 1, IsNext: true}, + }, + }, + }, + }, gql) +} diff --git a/swo/conninfo_test.go b/swo/conninfo_test.go index 217b09d5ac..efe9b36e06 100644 --- a/swo/conninfo_test.go +++ b/swo/conninfo_test.go @@ -15,14 +15,16 @@ func TestConnInfo(t *testing.T) { } assert.Equal(t, "GoAlert v0.31.0 SWO:A:AAAAAAAAAAAAAAAAAAAAAA", info.String()) + info.ID = uuid.MustParse("11111111-1111-1111-1111-111111111111") + assert.Equal(t, "GoAlert v0.31.0 SWO:A:EREREREREREREREREREREQ", info.String()) - _, err := ParseConnInfo("GoAlert 1.0.0 SWO:0:AAAAAAAAAAAAAAAAAAAAAA") + _, err := ParseConnInfo("GoAlert 1.0.0 SWO:0:EREREREREREREREREREREQ") assert.ErrorContains(t, err, "invalid connection type") - info, err = ParseConnInfo("GoAlert 1.0.0 SWO:A:AAAAAAAAAAAAAAAAAAAAAA") + info, err = ParseConnInfo("GoAlert 1.0.0 SWO:A:EREREREREREREREREREREQ") assert.NoError(t, err) assert.Equal(t, ConnTypeMainMgr, info.Type) assert.Equal(t, "1.0.0", info.Version) - assert.Equal(t, "GoAlert 1.0.0 SWO:A:AAAAAAAAAAAAAAAAAAAAAA", info.String()) - assert.Equal(t, "00000000-0000-0000-0000-000000000000", info.ID.String()) + assert.Equal(t, "GoAlert 1.0.0 SWO:A:EREREREREREREREREREREQ", info.String()) + assert.Equal(t, "11111111-1111-1111-1111-111111111111", info.ID.String()) } From 3c3464745ffa4d91bc23156e60685e83dcc4a56f Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 26 Sep 2022 15:16:08 -0500 Subject: [PATCH 191/225] add test for node without connections --- graphql2/graphqlapp/swo_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/graphql2/graphqlapp/swo_test.go b/graphql2/graphqlapp/swo_test.go index 41d63f4be3..a151dae2b4 100644 --- a/graphql2/graphqlapp/swo_test.go +++ b/graphql2/graphqlapp/swo_test.go @@ -99,6 +99,7 @@ func b64(id uuid.UUID) string { func Test_gqlSWOStatus(t *testing.T) { node1ID := uuid.MustParse("11111111-1111-1111-1111-111111111111") node2ID := uuid.MustParse("22222222-2222-2222-2222-222222222222") + node3ID := uuid.MustParse("33333333-3333-3333-3333-333333333333") mainDBID := uuid.New() nextDBID := uuid.New() @@ -111,6 +112,7 @@ func Test_gqlSWOStatus(t *testing.T) { Nodes: []swogrp.Node{ {ID: node1ID, OldID: mainDBID, NewID: nextDBID, StartedAt: time.Now().Add(-time.Minute)}, {ID: node2ID, CanExec: true, OldID: mainDBID, NewID: nextDBID, StartedAt: time.Now().Add(-2 * time.Minute)}, + {ID: node3ID, OldID: mainDBID, NewID: nextDBID, StartedAt: time.Now().Add(-3 * time.Minute)}, }, }, MainDBVersion: "v1.0.0", // not realistic, but good enough for testing @@ -147,6 +149,11 @@ func Test_gqlSWOStatus(t *testing.T) { {Name: "GoAlert v0.31.0 SWO:A:" + b64(node2ID), Version: "v0.31.0", Count: 1, Type: "A", IsNext: false}, }, }, + { + ID: node3ID.String(), + Uptime: "3m0s", + ConfigError: "node is not connected to any DB", + }, { ID: "unknown-foobar", Connections: []graphql2.SWOConnection{ From 42e10ad18f87c0a611b9d3da54a52eb98837cd3f Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 26 Sep 2022 15:24:30 -0500 Subject: [PATCH 192/225] add comments to swo funcs --- graphql2/graphqlapp/swo.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/graphql2/graphqlapp/swo.go b/graphql2/graphqlapp/swo.go index ef28a20900..ecee7ab2fa 100644 --- a/graphql2/graphqlapp/swo.go +++ b/graphql2/graphqlapp/swo.go @@ -36,6 +36,7 @@ func (m *Mutation) SwoAction(ctx context.Context, action graphql2.SWOAction) (bo return err == nil, err } +// validateSWOGrpNode validates that the node has the correct DB urls. func validateSWOGrpNode(s swo.Status, node swogrp.Node) error { if node.NewID != s.NextDBID { return fmt.Errorf("next-db-url is invalid") @@ -47,18 +48,15 @@ func validateSWOGrpNode(s swo.Status, node swogrp.Node) error { return nil } +// gwlSWOConnFromConnName maps a DB connection count to a GraphQL type. func gqlSWOConnFromConnName(countInfo swoinfo.ConnCount) (nodeID string, conn graphql2.SWOConnection) { var connType, version string idStr := "unknown-" + countInfo.Name - info, err := swo.ParseConnInfo(countInfo.Name) - if err != nil { - fmt.Println("invalid connection name:", countInfo.Name) - } + info, _ := swo.ParseConnInfo(countInfo.Name) if info != nil { version = info.Version connType = string(info.Type) idStr = info.ID.String() - fmt.Println("idStr", idStr) } return idStr, graphql2.SWOConnection{ @@ -70,6 +68,7 @@ func gqlSWOConnFromConnName(countInfo swoinfo.ConnCount) (nodeID string, conn gr } } +// validateNodeConnections ensures that the node has the correct number of connections, identified as the correct & expected type(s). func validateNodeConnections(n graphql2.SWONode) error { if len(n.Connections) == 0 { return fmt.Errorf("node is not connected to any DB") @@ -93,6 +92,7 @@ func validateNodeConnections(n graphql2.SWONode) error { return nil } +// gqlStateFromSWOState maps a SWO state to a GraphQL type. func gqlStateFromSWOState(st swogrp.ClusterState) (graphql2.SWOState, error) { switch st { case swogrp.ClusterStateUnknown: @@ -114,11 +114,15 @@ func gqlStateFromSWOState(st swogrp.ClusterState) (graphql2.SWOState, error) { return "", fmt.Errorf("invalid state: %d", st) } +// gqlSWOStatus maps a SWO status and connection list to a GraphQL type. func gqlSWOStatus(s swo.Status, conns []swoinfo.ConnCount) (*graphql2.SWOStatus, error) { + nodes := make(map[string]*graphql2.SWONode) + + // sort connections by name to ensure consistent ordering sort.Slice(conns, func(i, j int) bool { return conns[i].Name < conns[j].Name }) - nodes := make(map[string]*graphql2.SWONode) + // map connections to nodes for _, conn := range conns { idStr, c := gqlSWOConnFromConnName(conn) @@ -130,6 +134,7 @@ func gqlSWOStatus(s swo.Status, conns []swoinfo.ConnCount) (*graphql2.SWOStatus, n.Connections = append(n.Connections, c) } + // update nodes from switchover_log and validate for _, node := range s.Nodes { n := nodes[node.ID.String()] if n == nil { @@ -153,6 +158,7 @@ func gqlSWOStatus(s swo.Status, conns []swoinfo.ConnCount) (*graphql2.SWOStatus, } } + // convert to list, sort by ID (for consistency) var nodeList []graphql2.SWONode for _, n := range nodes { nodeList = append(nodeList, *n) @@ -161,6 +167,7 @@ func gqlSWOStatus(s swo.Status, conns []swoinfo.ConnCount) (*graphql2.SWOStatus, return nodeList[i].ID < nodeList[j].ID }) + // map state to GraphQL type state, err := gqlStateFromSWOState(s.State) if err != nil { return nil, err From 5ddd06d0c7e7c0bf830bb74f99b9a8fbdb385af7 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 26 Sep 2022 15:32:47 -0500 Subject: [PATCH 193/225] be more explicit about sync operations --- swo/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swo/README.md b/swo/README.md index 6a43318891..769535efab 100644 --- a/swo/README.md +++ b/swo/README.md @@ -8,6 +8,6 @@ Switchover mode is initiated by starting GoAlert with an additional DB URL `--db All new application DB connections first acquire a shared advisory lock, then check the `use_next_db` pointer. If the pointer is set, all new connections will be made to the "new" DB (without the checking overhead), and the connection to the "old" DB will be terminated. -The switch is performed by first replicating a complete snapshot of the "old" DB to the "new" DB. After the initial sync, subsequent synchronization is an incremental "diff" of snapshots—more info on how this works is available in the `swosync` package. +The switch is performed by first replicating a complete snapshot of the "old" DB to the "new" DB. After the initial sync, subsequent synchronization is an incremental "diff" of snapshots -- more info on how this works is available in the `swosync` package. -When both DBs are reasonably in-sync, a stop-the-world lock (i.e., an exclusive lock that conflicts with the shared advisory locks) is acquired, followed by the final logical sync. During the same transaction, the `use_next_db` pointer is set. After the lock is released, the connector will send all new queries to the "new" DB. +After repeated logical sync operations (to keep the next-sync time low), a stop-the-world lock (i.e., an exclusive lock that conflicts with the shared advisory locks) is acquired, followed by the final logical sync. During the same transaction, the `use_next_db` pointer is set. After the lock is released, the connector will send all new queries to the "new" DB. From 3318a28294a52574c449d0bdecb25de5d74aa0ad Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 26 Sep 2022 15:37:13 -0500 Subject: [PATCH 194/225] document ConnInfo and ConnType better --- swo/conninfo.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/swo/conninfo.go b/swo/conninfo.go index 6b2ffca7bf..4b6a1a266f 100644 --- a/swo/conninfo.go +++ b/swo/conninfo.go @@ -9,18 +9,32 @@ import ( ) // ConnInfo contains information about a connection to the DB for SWO. +// +// This is stored as the `application_name` for a connection in Postgres in +// the format of "GoAlert SWO::" where id is a base64 +// encoded UUID that should match what ends up in a `switchover_log` hello message. type ConnInfo struct { Version string Type ConnType ID uuid.UUID } +// ConnType indicates a type of SWO connection. type ConnType byte const ( + // ConnTypeMainMgr is the connection pool to the main/old DB used to coordinate the switchover. ConnTypeMainMgr ConnType = iota + 'A' + + // ConnTypeMainApp is the connection pool used by the GoAlert application to the main/old DB. + // + // Connections here are protected with a shared advisory lock. ConnTypeMainApp + + // ConnTypeNextMgr is the connection pool to the next/new DB used for applying changes during the switchover. ConnTypeNextMgr + + // ConnTypeNextApp is the connection pool used by the GoAlert application to the next/new DB, after the switchover is completed. ConnTypeNextApp ) From 3a31938e3d00ac415cd9f06e411e9a5a5e589643 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 26 Sep 2022 15:41:54 -0500 Subject: [PATCH 195/225] add comment for the Connector type --- swo/drvconnector.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/swo/drvconnector.go b/swo/drvconnector.go index fb445a0cec..54cc41beaa 100644 --- a/swo/drvconnector.go +++ b/swo/drvconnector.go @@ -9,6 +9,11 @@ import ( "github.com/jackc/pgx/v4/stdlib" ) +// Connector is a driver.Connector that will use the old database until the +// switchover_state table indicates that the new database should be used. +// +// Until the switchover is complete, the old database will be protected with a +// shared advisory lock (4369). type Connector struct { dbcOld, dbcNew driver.Connector From 057be50b6c7c6b818c6d7a7143ccb40b65824fef Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 26 Sep 2022 15:42:16 -0500 Subject: [PATCH 196/225] filename based on type --- swo/{drvconnector.go => connector.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename swo/{drvconnector.go => connector.go} (100%) diff --git a/swo/drvconnector.go b/swo/connector.go similarity index 100% rename from swo/drvconnector.go rename to swo/connector.go From 9dcd45e29181212102bf4d515376499ea10a4269 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 26 Sep 2022 15:44:37 -0500 Subject: [PATCH 197/225] add comment for the Executor type --- swo/executor.go | 1 + 1 file changed, 1 insertion(+) diff --git a/swo/executor.go b/swo/executor.go index 86043b80f1..0f2eac16a4 100644 --- a/swo/executor.go +++ b/swo/executor.go @@ -10,6 +10,7 @@ import ( "github.com/target/goalert/swo/swosync" ) +// Executor is responsible for executing the switchover process. type Executor struct { mgr *Manager mx sync.Mutex From 2491e88c370f296faf14eebd6de109c6244eb531 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 27 Sep 2022 16:23:33 -0500 Subject: [PATCH 198/225] use more clear naming --- swo/executor.go | 6 +++--- swo/swosync/initialsync.go | 4 ++-- swo/swosync/reset.go | 4 ++-- swo/swosync/start.go | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/swo/executor.go b/swo/executor.go index 0f2eac16a4..3d8b672596 100644 --- a/swo/executor.go +++ b/swo/executor.go @@ -44,17 +44,17 @@ func (e *Executor) init() { // sync ctx := <-e.ctxCh - err := rep.Reset(ctx) + err := rep.ResetChangeTracking(ctx) if err != nil { return fmt.Errorf("reset: %w", err) } - err = rep.Start(ctx) + err = rep.StartTrackingChanges(ctx) if err != nil { return fmt.Errorf("start: %w", err) } - err = rep.InitialSync(ctx) + err = rep.FullInitialSync(ctx) if err != nil { return fmt.Errorf("initial sync: %w", err) } diff --git a/swo/swosync/initialsync.go b/swo/swosync/initialsync.go index ec524d5195..adbfa463a2 100644 --- a/swo/swosync/initialsync.go +++ b/swo/swosync/initialsync.go @@ -12,10 +12,10 @@ import ( const maxBatchSize = 1024 * 1024 // 1MB -// InitialSync will insert all rows from the source database into the destination database. +// FullInitialSync will insert all rows from the source database into the destination database. // // While doing so it will update the rowID maps to track the rows that have been inserted. -func (l *LogicalReplicator) InitialSync(ctx context.Context) error { +func (l *LogicalReplicator) FullInitialSync(ctx context.Context) error { srcTx, err := l.srcConn.BeginTx(ctx, pgx.TxOptions{ IsoLevel: pgx.Serializable, DeferrableMode: pgx.Deferrable, diff --git a/swo/swosync/reset.go b/swo/swosync/reset.go index 2d5300846a..7c18ca50a7 100644 --- a/swo/swosync/reset.go +++ b/swo/swosync/reset.go @@ -10,8 +10,8 @@ import ( "github.com/target/goalert/util/sqlutil" ) -// Reset disables tracking changes and truncates the tables in the destination database. -func (l *LogicalReplicator) Reset(ctx context.Context) error { +// ResetChangeTracking disables tracking changes and truncates the tables in the destination database. +func (l *LogicalReplicator) ResetChangeTracking(ctx context.Context) error { l.printf(ctx, "disabling logical replication...") _, err := l.srcConn.Exec(ctx, ConnLockQuery) diff --git a/swo/swosync/start.go b/swo/swosync/start.go index e05d80db96..374e76146e 100644 --- a/swo/swosync/start.go +++ b/swo/swosync/start.go @@ -18,8 +18,8 @@ func triggerName(table string) string { return sqlutil.QuoteID(fmt.Sprintf("zz_99_change_log_%s", table)) } -// Start instruments and begins tracking changes to the DB. -func (l *LogicalReplicator) Start(ctx context.Context) error { +// StartTrackingChanges instruments and begins tracking changes to the DB. +func (l *LogicalReplicator) StartTrackingChanges(ctx context.Context) error { l.printf(ctx, "enabling logical replication...") _, err := l.srcConn.Exec(ctx, changelogQuery) if err != nil { From 74d07f039848afe24b819927c03c3ba2b36ed9e1 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 27 Sep 2022 17:05:14 -0500 Subject: [PATCH 199/225] add missing target deps --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 121c6aea0a..3f8a47d065 100644 --- a/Makefile +++ b/Makefile @@ -104,7 +104,7 @@ swo/swodb/queries.sql.go: bin/tools/sqlc sqlc.yaml swo/*/*.sql migrate/migration web/src/schema.d.ts: graphql2/schema.graphql node_modules web/src/genschema.go go generate ./web/src -start-swo: bin/psql-lite bin/goalert bin/waitfor bin/runproc +start-swo: bin/psql-lite bin/goalert bin/waitfor bin/runproc node_modules web/src/schema.d.ts $(BIN_DIR)/tools/prometheus ./bin/waitfor -timeout 1s "$(DB_URL)" || make postgres ./bin/goalert migrate --db-url=postgres://goalert@localhost/goalert ./bin/psql-lite -d postgres://goalert@localhost -c "update switchover_state set current_state = 'idle'; truncate table switchover_log; drop database if exists goalert2; create database goalert2;" From 9d571376f76afefa95c7babb5eeae74056537ac2 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Fri, 30 Sep 2022 10:56:53 -0500 Subject: [PATCH 200/225] add missing logger --- app/cmd.go | 2 +- util/log/log.go | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/app/cmd.go b/app/cmd.go index b25ef06702..ab67909cca 100644 --- a/app/cmd.go +++ b/app/cmd.go @@ -118,7 +118,7 @@ var RootCmd = &cobra.Command{ var db *sql.DB if cfg.DBURLNext != "" { - mgr, err := swo.NewManager(swo.Config{OldDBURL: cfg.DBURL, NewDBURL: cfg.DBURLNext, CanExec: !cfg.APIOnly}) + mgr, err := swo.NewManager(swo.Config{OldDBURL: cfg.DBURL, NewDBURL: cfg.DBURLNext, CanExec: !cfg.APIOnly, Logger: cfg.Logger}) if err != nil { return errors.Wrap(err, "init switchover handler") } diff --git a/util/log/log.go b/util/log/log.go index e36e1b54ce..07b852a96d 100644 --- a/util/log/log.go +++ b/util/log/log.go @@ -32,11 +32,19 @@ func NewLogger() *Logger { return &Logger{l: l, info: true} } -func (l *Logger) BackgroundContext() context.Context { return WithLogger(context.Background(), l) } + +func (l *Logger) BackgroundContext() context.Context { + if l == nil { + panic("nil logger") + } + + return WithLogger(context.Background(), l) +} func WithLogger(ctx context.Context, l *Logger) context.Context { return context.WithValue(ctx, logContextKeyLogger, l) } + func FromContext(ctx context.Context) *Logger { l, _ := ctx.Value(logContextKeyLogger).(*Logger) if l == nil { @@ -69,7 +77,6 @@ func (l *Logger) ErrorsOnly() { func (l *Logger) EnableDebug() { l.debug = true } func (l *Logger) entry(ctx context.Context) *logrus.Entry { - e := logrus.NewEntry(l.l) if ctx == nil { return e From d480a646b81d6a5d2d1924a5944f2fbde66d0da5 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Fri, 30 Sep 2022 10:57:31 -0500 Subject: [PATCH 201/225] break apart exec logic from concurrency logic --- swo/executor.go | 154 +++++++++++++++++++++++++----------------------- swo/manager.go | 2 +- swo/withfunc.go | 67 +++++++++++++++++++++ 3 files changed, 147 insertions(+), 76 deletions(-) create mode 100644 swo/withfunc.go diff --git a/swo/executor.go b/swo/executor.go index 3d8b672596..20e1d4b612 100644 --- a/swo/executor.go +++ b/swo/executor.go @@ -13,101 +13,105 @@ import ( // Executor is responsible for executing the switchover process. type Executor struct { mgr *Manager + + stateCh chan execState + + wf *WithFunc[*swosync.LogicalReplicator] + rep *swosync.LogicalReplicator mx sync.Mutex +} - ctxCh chan context.Context - errCh chan error - cancel func() +func NewExecutor(mgr *Manager) *Executor { + e := &Executor{ + mgr: mgr, + stateCh: make(chan execState, 1), + } + e.stateCh <- execStateIdle + e.wf = NewWithFunc(func(ctx context.Context, fn func(*swosync.LogicalReplicator)) error { + return mgr.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { + rep := swosync.NewLogicalReplicator() + rep.SetSourceDB(oldConn) + rep.SetDestinationDB(newConn) + rep.SetProgressFunc(mgr.taskMgr.Statusf) + fn(rep) + return nil + }) + }) + return e } +type execState int + +const ( + execStateIdle execState = iota + execStateSync +) + var _ swogrp.Executor = (*Executor)(nil) -func (e *Executor) init() { +func (e *Executor) Sync(ctx context.Context) error { e.mx.Lock() defer e.mx.Unlock() - if e.cancel != nil { - panic("already running") + + if e.rep != nil { + return fmt.Errorf("already syncing") } - ctx, cancel := context.WithCancel(e.mgr.Logger.BackgroundContext()) - e.cancel = cancel - e.ctxCh = make(chan context.Context) - e.errCh = make(chan error, 2) + rep, err := e.wf.Begin(e.mgr.Logger.BackgroundContext()) + if err != nil { + return err + } - go func() { - defer e.Cancel() - e.errCh <- e.mgr.withConnFromBoth(ctx, func(_ context.Context, oldConn, newConn *pgx.Conn) error { - rep := swosync.NewLogicalReplicator() - rep.SetSourceDB(oldConn) - rep.SetDestinationDB(newConn) - rep.SetProgressFunc(e.mgr.taskMgr.Statusf) - - // sync - ctx := <-e.ctxCh - err := rep.ResetChangeTracking(ctx) - if err != nil { - return fmt.Errorf("reset: %w", err) - } - - err = rep.StartTrackingChanges(ctx) - if err != nil { - return fmt.Errorf("start: %w", err) - } - - err = rep.FullInitialSync(ctx) - if err != nil { - return fmt.Errorf("initial sync: %w", err) - } - - for i := 0; i < 10; i++ { - err = rep.LogicalSync(ctx) - if err != nil { - return fmt.Errorf("logical sync: %w", err) - } - } - e.errCh <- nil - - // wait for pause - ctx = <-e.ctxCh - for i := 0; i < 10; i++ { - err := rep.LogicalSync(ctx) - if err != nil { - return fmt.Errorf("logical sync (after pause): %w", err) - } - } - - err = rep.FinalSync(ctx) - if err != nil { - return fmt.Errorf("final sync: %w", err) - } + err = rep.ResetChangeTracking(ctx) + if err != nil { + return fmt.Errorf("reset: %w", err) + } - return nil - }) - }() -} + err = rep.StartTrackingChanges(ctx) + if err != nil { + return fmt.Errorf("start: %w", err) + } -func (e *Executor) Sync(ctx context.Context) error { - e.init() + err = rep.FullInitialSync(ctx) + if err != nil { + return fmt.Errorf("initial sync: %w", err) + } - e.ctxCh <- ctx - return <-e.errCh -} + for i := 0; i < 10; i++ { + err = rep.LogicalSync(ctx) + if err != nil { + return fmt.Errorf("logical sync: %w", err) + } + } -func (e *Executor) Exec(ctx context.Context) error { - e.ctxCh <- ctx - return <-e.errCh + e.rep = rep + return nil } -func (e *Executor) Cancel() { +func (e *Executor) Exec(ctx context.Context) error { e.mx.Lock() defer e.mx.Unlock() - if e.cancel == nil { - return + if e.rep == nil { + return fmt.Errorf("not syncing") + } + + rep := e.rep + e.rep = nil + + for i := 0; i < 10; i++ { + err := rep.LogicalSync(ctx) + if err != nil { + return fmt.Errorf("logical sync (after pause): %w", err) + } + } + + err := rep.FinalSync(ctx) + if err != nil { + return fmt.Errorf("final sync: %w", err) } - e.cancel() - e.ctxCh = nil - e.errCh = nil - e.cancel = nil + return nil } + +func (e *Executor) Cancel() { e.wf.Cancel() } diff --git a/swo/manager.go b/swo/manager.go index 82727439ec..b82f22174f 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -119,7 +119,7 @@ func NewManager(cfg Config) (*Manager, error) { OldID: m.MainDBInfo.ID, NewID: m.NextDBInfo.ID, - Executor: &Executor{mgr: m}, + Executor: NewExecutor(m), PauseFunc: func(ctx context.Context) error { return m.pauseResume.Pause(ctx) }, ResumeFunc: func(ctx context.Context) error { return m.pauseResume.Resume(ctx) }, }) diff --git a/swo/withfunc.go b/swo/withfunc.go new file mode 100644 index 0000000000..3ea314ff33 --- /dev/null +++ b/swo/withfunc.go @@ -0,0 +1,67 @@ +package swo + +import ( + "context" + "fmt" + "sync" +) + +type WithFunc[V any] struct { + withFn func(context.Context, func(V)) error + + mx sync.Mutex + wg sync.WaitGroup + cancel func() +} + +func NewWithFunc[V any](withFn func(context.Context, func(V)) error) *WithFunc[V] { + return &WithFunc[V]{ + withFn: withFn, + } +} + +func (w *WithFunc[V]) Begin(ctx context.Context) (v V, err error) { + w.mx.Lock() + defer w.mx.Unlock() + + if w.cancel != nil { + return v, fmt.Errorf("already in progress") + } + + ctx, w.cancel = context.WithCancel(ctx) + + ch := make(chan V, 1) + errCh := make(chan error, 1) + w.wg.Add(1) + + go func() { + defer w.wg.Done() + errCh <- w.withFn(ctx, func(v V) { + ch <- v + <-ctx.Done() + }) + }() + + select { + case <-ctx.Done(): + return v, ctx.Err() + case err = <-errCh: + return v, err + case v = <-ch: + return v, nil + } +} + +func (w *WithFunc[V]) Cancel() { + defer w.wg.Wait() + + w.mx.Lock() + defer w.mx.Unlock() + + if w.cancel == nil { + return + } + + w.cancel() + w.cancel = nil +} From 87f2b86ede7f3b92f9022c91cbe4f9d083e1f097 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 6 Oct 2022 11:05:00 -0500 Subject: [PATCH 202/225] wait during lock --- swo/withfunc.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swo/withfunc.go b/swo/withfunc.go index 3ea314ff33..5d7499f1fb 100644 --- a/swo/withfunc.go +++ b/swo/withfunc.go @@ -53,8 +53,6 @@ func (w *WithFunc[V]) Begin(ctx context.Context) (v V, err error) { } func (w *WithFunc[V]) Cancel() { - defer w.wg.Wait() - w.mx.Lock() defer w.mx.Unlock() @@ -64,4 +62,6 @@ func (w *WithFunc[V]) Cancel() { w.cancel() w.cancel = nil + + w.wg.Wait() } From cdeb9006a088a67bae3fcb270e49ecc9d36a7b6c Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 6 Oct 2022 11:10:29 -0500 Subject: [PATCH 203/225] add comment for WithFunc --- swo/withfunc.go | 1 + 1 file changed, 1 insertion(+) diff --git a/swo/withfunc.go b/swo/withfunc.go index 5d7499f1fb..3ca08b2a93 100644 --- a/swo/withfunc.go +++ b/swo/withfunc.go @@ -6,6 +6,7 @@ import ( "sync" ) +// WithFunc flattens a with-type func providing it's value with Begin() and ending with Cancel(). type WithFunc[V any] struct { withFn func(context.Context, func(V)) error From b3085a6fa88715d5fae0e19071f5b31e2b8475dc Mon Sep 17 00:00:00 2001 From: KatieMSB Date: Fri, 7 Oct 2022 11:31:06 -0500 Subject: [PATCH 204/225] add panic check for Begin --- swo/withfunc.go | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/swo/withfunc.go b/swo/withfunc.go index 3ca08b2a93..f9462a3d4f 100644 --- a/swo/withfunc.go +++ b/swo/withfunc.go @@ -15,12 +15,18 @@ type WithFunc[V any] struct { cancel func() } -func NewWithFunc[V any](withFn func(context.Context, func(V)) error) *WithFunc[V] { +// NewWithFunc creates a new WithFunc. +// +// withFn must not return an error after useFn is called. +func NewWithFunc[V any](withFn func(ctx context.Context, useFn func(V)) error) *WithFunc[V] { return &WithFunc[V]{ withFn: withFn, } } +// Begin will return a new instance of V, Cancel should be called when it's no longer needed. +// +// If err is nil, Cancel must be called before calling Begin again. func (w *WithFunc[V]) Begin(ctx context.Context) (v V, err error) { w.mx.Lock() defer w.mx.Unlock() @@ -37,22 +43,38 @@ func (w *WithFunc[V]) Begin(ctx context.Context) (v V, err error) { go func() { defer w.wg.Done() - errCh <- w.withFn(ctx, func(v V) { + var called bool + err := w.withFn(ctx, func(v V) { + called = true ch <- v <-ctx.Done() }) + if err == nil { + if !called { + errCh <-fmt.Errorf("useFn never called") + } + return + } + + if called { + panic(fmt.Errorf("error returned after withFn called: %w",err)) + } + errCh <-err }() select { case <-ctx.Done(): + w._cancel() return v, ctx.Err() case err = <-errCh: + w._cancel() return v, err case v = <-ch: return v, nil } } +// Cancel will cancel context passed to withFn and wait for it to finish. func (w *WithFunc[V]) Cancel() { w.mx.Lock() defer w.mx.Unlock() @@ -61,6 +83,10 @@ func (w *WithFunc[V]) Cancel() { return } + w._cancel() +} + +func (w *WithFunc[V]) _cancel() { w.cancel() w.cancel = nil From 1ecb59daaad89362c488f9614a7da94367b35f5c Mon Sep 17 00:00:00 2001 From: KatieMSB Date: Fri, 7 Oct 2022 11:31:36 -0500 Subject: [PATCH 205/225] add test withFunc --- swo/withfunc_test.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 swo/withfunc_test.go diff --git a/swo/withfunc_test.go b/swo/withfunc_test.go new file mode 100644 index 0000000000..e097b2a079 --- /dev/null +++ b/swo/withfunc_test.go @@ -0,0 +1,22 @@ +package swo + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +// Test 1 immediately return error +// Test 2 call call-back fn, should block until cancel() +func TestBegin(t *testing.T) { + wf1 := NewWithFunc(func(ctx context.Context, fn func(struct{})) error { + return errors.New("expected error") + }) + + _, err := wf1.Begin(context.Background()) + assert.Error(t, err) + + +} From b4bb8fd99c8fed31b7181a3f746d3213776ef738 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Wed, 12 Oct 2022 11:02:59 -0500 Subject: [PATCH 206/225] comments and cleanup --- swo/connector.go | 8 +++++++ swo/executor.go | 4 ++++ swo/manager.go | 22 +++++++++++++++++-- swo/status.go | 1 + swo/swogrp/clusterstate.go | 1 + swo/swogrp/config.go | 7 +++++- swo/swogrp/node.go | 1 + swo/swogrp/set.go | 45 -------------------------------------- swo/swogrp/status.go | 2 ++ swo/swoinfo/conninfo.go | 9 ++++++-- swo/swoinfo/db.go | 3 +++ swo/swoinfo/table.go | 5 +++++ swo/swomsg/log.go | 10 +++++---- swo/swomsg/messages.go | 1 + swo/swosync/start.go | 6 +++++ swo/swosync/tablesync.go | 2 +- 16 files changed, 72 insertions(+), 55 deletions(-) delete mode 100644 swo/swogrp/set.go diff --git a/swo/connector.go b/swo/connector.go index 54cc41beaa..a30ffc6df3 100644 --- a/swo/connector.go +++ b/swo/connector.go @@ -23,6 +23,7 @@ type Connector struct { var _ driver.Connector = (*Connector)(nil) +// NewConnector creates a new Connector pointing to the old and new DBs, respectively. func NewConnector(dbcOld, dbcNew driver.Connector) *Connector { return &Connector{ dbcOld: dbcOld, @@ -30,8 +31,15 @@ func NewConnector(dbcOld, dbcNew driver.Connector) *Connector { } } +// Driver is a stub method for driver.Connector that returns nil. func (drv *Connector) Driver() driver.Driver { return nil } +// Connect returns a new connection to the database. +// +// A shared advisory lock is acquired on the connection to the old DB, and then switchover_state is checked. +// +// When `current_state` is `use_next_db`, the connection is closed and a connection to the new DB is returned +// instead. Future connections then skip the check and are returned directly from the new DB. func (drv *Connector) Connect(ctx context.Context) (driver.Conn, error) { drv.mx.Lock() isDone := drv.isDone diff --git a/swo/executor.go b/swo/executor.go index 20e1d4b612..e38d26f53c 100644 --- a/swo/executor.go +++ b/swo/executor.go @@ -21,6 +21,7 @@ type Executor struct { mx sync.Mutex } +// NewExecutor initializes a new Executor for the given Manager. func NewExecutor(mgr *Manager) *Executor { e := &Executor{ mgr: mgr, @@ -49,6 +50,7 @@ const ( var _ swogrp.Executor = (*Executor)(nil) +// Sync begins the switchover process by resetting and starting the logical replication process. func (e *Executor) Sync(ctx context.Context) error { e.mx.Lock() defer e.mx.Unlock() @@ -88,6 +90,7 @@ func (e *Executor) Sync(ctx context.Context) error { return nil } +// Exec executes the switchover process, blocking until it is complete. func (e *Executor) Exec(ctx context.Context) error { e.mx.Lock() defer e.mx.Unlock() @@ -114,4 +117,5 @@ func (e *Executor) Exec(ctx context.Context) error { return nil } +// Cancel cancels the switchover process. func (e *Executor) Cancel() { e.wf.Cancel() } diff --git a/swo/manager.go b/swo/manager.go index b82f22174f..c035eba8d7 100644 --- a/swo/manager.go +++ b/swo/manager.go @@ -19,6 +19,7 @@ import ( "github.com/target/goalert/version" ) +// A Manager is responsible for managing the switchover process. type Manager struct { // sql.DB instance safe for the application to use (instrumented for safe SWO operation) dbApp *sql.DB @@ -35,23 +36,30 @@ type Manager struct { NextDBInfo *swoinfo.DB } +// Node contains information on a GoAlert instance in SWO mode. type Node struct { ID uuid.UUID + // OldValid indicates that the old database config is valid. OldValid bool + + // NewValid indicates that the new database config is valid. NewValid bool - CanExec bool + + // CanExec indicates the node is NOT in API-only mode and is capable of executing tasks. + CanExec bool Status string } +// Config configures the current node for SWO. type Config struct { OldDBURL, NewDBURL string CanExec bool Logger *log.Logger } -// GoAlert v0.28.0-3141-g8a7b7d852-dirty +// NewManager will create a new Manager with the given configuration. func NewManager(cfg Config) (*Manager, error) { id := uuid.New() @@ -130,6 +138,12 @@ func NewManager(cfg Config) (*Manager, error) { return m, nil } +// SetPauseResumer allows setting the pause/resume functionality for the manager. +// +// Pause is called during the switchover process to minimize the number of +// long-lived DB connections so that the final sync can be performed quickly. +// +// After a switchover, or if it is aborted, Resume will be called. func (m *Manager) SetPauseResumer(app lifecycle.PauseResumer) { if m.pauseResume != nil { panic("already set") @@ -138,6 +152,7 @@ func (m *Manager) SetPauseResumer(app lifecycle.PauseResumer) { m.taskMgr.Init() } +// ConnInfo returns information about all current DB connections. func (m *Manager) ConnInfo(ctx context.Context) (counts []swoinfo.ConnCount, err error) { err = m.withConnFromBoth(ctx, func(ctx context.Context, oldConn, newConn *pgx.Conn) error { counts, err = swoinfo.ConnInfo(ctx, oldConn, newConn) @@ -219,4 +234,7 @@ func (m *Manager) Reset(ctx context.Context) error { // StartExecute will trigger the switchover to begin. func (m *Manager) StartExecute(ctx context.Context) error { return m.taskMgr.Execute(ctx) } +// DB returns a sql.DB that will always return safe connections to be used during the switchover. +// +// All application code/queries should use this DB. func (m *Manager) DB() *sql.DB { return m.dbApp } diff --git a/swo/status.go b/swo/status.go index 301a380aaa..f5bccd05e9 100644 --- a/swo/status.go +++ b/swo/status.go @@ -5,6 +5,7 @@ import ( "github.com/target/goalert/swo/swogrp" ) +// Status represents the current status of the switchover process. type Status struct { swogrp.Status diff --git a/swo/swogrp/clusterstate.go b/swo/swogrp/clusterstate.go index 9f2b67db81..4569a2d647 100644 --- a/swo/swogrp/clusterstate.go +++ b/swo/swogrp/clusterstate.go @@ -1,5 +1,6 @@ package swogrp +// ClusterState represents the current state of the SWO cluster. type ClusterState int const ( diff --git a/swo/swogrp/config.go b/swo/swogrp/config.go index bba6ac4dab..170b708e34 100644 --- a/swo/swogrp/config.go +++ b/swo/swogrp/config.go @@ -10,10 +10,15 @@ import ( type TaskFn func(context.Context) error +// Config is the configuration for a switchover group. type Config struct { + // NodeID is the unique ID of the current node. NodeID uuid.UUID - CanExec bool + // CanExec indicates this member is allowed to execute tasks. + CanExec bool + + // OldID and NewID represents the database IDs of the old and new databases, respectively. OldID, NewID uuid.UUID Logger *log.Logger diff --git a/swo/swogrp/node.go b/swo/swogrp/node.go index fcbd907bd1..e8b8deac5a 100644 --- a/swo/swogrp/node.go +++ b/swo/swogrp/node.go @@ -6,6 +6,7 @@ import ( "github.com/google/uuid" ) +// Node represents a single node in the switchover group. type Node struct { ID uuid.UUID diff --git a/swo/swogrp/set.go b/swo/swogrp/set.go deleted file mode 100644 index 0b8214c796..0000000000 --- a/swo/swogrp/set.go +++ /dev/null @@ -1,45 +0,0 @@ -package swogrp - -import ( - "sync" - - "github.com/google/uuid" -) - -type Set struct { - m map[uuid.UUID]struct{} - mx sync.Mutex -} - -func NewSet() *Set { - return &Set{ - m: make(map[uuid.UUID]struct{}), - } -} - -func (s *Set) Add(id uuid.UUID) { - s.mx.Lock() - defer s.mx.Unlock() - - s.m[id] = struct{}{} -} - -func (s *Set) Has(id uuid.UUID) bool { - s.mx.Lock() - defer s.mx.Unlock() - - _, ok := s.m[id] - return ok -} - -func (s *Set) List() []uuid.UUID { - s.mx.Lock() - defer s.mx.Unlock() - - ids := make([]uuid.UUID, 0, len(s.m)) - for id := range s.m { - ids = append(ids, id) - } - - return ids -} diff --git a/swo/swogrp/status.go b/swo/swogrp/status.go index 766f175ffe..b4dbc0b8de 100644 --- a/swo/swogrp/status.go +++ b/swo/swogrp/status.go @@ -2,6 +2,7 @@ package swogrp import "github.com/google/uuid" +// Status represents the current status of the switchover process. type Status struct { State ClusterState Nodes []Node @@ -10,6 +11,7 @@ type Status struct { LastError string } +// Status returns the current status of the switchover process. func (t *TaskMgr) Status() Status { t.mx.Lock() defer t.mx.Unlock() diff --git a/swo/swoinfo/conninfo.go b/swo/swoinfo/conninfo.go index 3219627b15..64c09dea51 100644 --- a/swo/swoinfo/conninfo.go +++ b/swo/swoinfo/conninfo.go @@ -7,10 +7,15 @@ import ( "github.com/target/goalert/swo/swodb" ) +// ConnCount represents the number of connections to a database for the given application name. type ConnCount struct { - Name string + // Name is the application name of the connection. + Name string + + // IsNext indicates that the connection is to the new database. IsNext bool - Count int + + Count int } // ConnInfo provides information about the connections to both old and new databases. diff --git a/swo/swoinfo/db.go b/swo/swoinfo/db.go index 634ee0a2bc..2b202c869d 100644 --- a/swo/swoinfo/db.go +++ b/swo/swoinfo/db.go @@ -9,11 +9,14 @@ import ( "github.com/target/goalert/swo/swodb" ) +// DB contains information about a database. type DB struct { + // ID is the UUID of the database, stored in the switchover_state table. ID uuid.UUID Version string } +// DBInfo provides information about the database associated with the given connection. func DBInfo(ctx context.Context, conn *pgx.Conn) (*DB, error) { info, err := swodb.New(conn).DatabaseInfo(ctx) if err != nil { diff --git a/swo/swoinfo/table.go b/swo/swoinfo/table.go index 0e8c8f1fa1..f68cf34c73 100644 --- a/swo/swoinfo/table.go +++ b/swo/swoinfo/table.go @@ -8,6 +8,7 @@ import ( "github.com/target/goalert/util/sqlutil" ) +// Table represents a table in the database. type Table struct { name string deps map[string]struct{} @@ -16,10 +17,13 @@ type Table struct { } type column swodb.InformationSchemaColumn +// Name returns the name of the table. func (t Table) Name() string { return t.name } +// IDType returns the type of the ID column. func (t Table) IDType() string { return t.id.DataType } +// Columns returns the names of the columns in the table. func (t Table) Columns() []string { var cols []string for _, c := range t.cols { @@ -28,6 +32,7 @@ func (t Table) Columns() []string { return cols } +// InsesrtJSONRowsQuery returns a query that can be used to insert or upsert rows from the given JSON data. func (t Table) InsertJSONRowsQuery(upsert bool) string { query := fmt.Sprintf("insert into %s select * from json_populate_recordset(null::%s, $1)", sqlutil.QuoteID(t.Name()), sqlutil.QuoteID(t.Name())) if !upsert { diff --git a/swo/swomsg/log.go b/swo/swomsg/log.go index 0efaac75c7..5b43ca20ab 100644 --- a/swo/swomsg/log.go +++ b/swo/swomsg/log.go @@ -13,8 +13,10 @@ import ( "github.com/target/goalert/util/log" ) -const PollInterval = time.Second / 3 +// pollInterval is how often the log will be polled for new events. +const pollInterval = time.Second / 3 +// Log is a reader for the switchover log. type Log struct { db *sql.DB @@ -23,8 +25,7 @@ type Log struct { eventCh chan Message } -var ErrStaleLog = fmt.Errorf("cannot append until log is read") - +// NewLog will create a new log reader, skipping any existing events. func NewLog(ctx context.Context, db *sql.DB) (*Log, error) { conn, err := stdlib.AcquireConn(db) if err != nil { @@ -46,6 +47,7 @@ func NewLog(ctx context.Context, db *sql.DB) (*Log, error) { return l, nil } +// Events will return a channel that will receive all events in the log. func (l *Log) Events() <-chan Message { return l.eventCh } func (l *Log) readLoop(ctx context.Context, lastID int64) { @@ -87,7 +89,7 @@ func ctxSleep(ctx context.Context, d time.Duration) error { } func (l *Log) loadEvents(ctx context.Context, lastID int64) ([]swodb.SwitchoverLog, error) { - err := ctxSleep(ctx, PollInterval-time.Since(l.lastLoad)) + err := ctxSleep(ctx, pollInterval-time.Since(l.lastLoad)) if err != nil { return nil, err } diff --git a/swo/swomsg/messages.go b/swo/swomsg/messages.go index 40c3c768f8..fec39838d5 100644 --- a/swo/swomsg/messages.go +++ b/swo/swomsg/messages.go @@ -7,6 +7,7 @@ import ( "github.com/google/uuid" ) +// Message represents a single event in the switchover log. type Message struct { ID uuid.UUID Node uuid.UUID diff --git a/swo/swosync/start.go b/swo/swosync/start.go index 374e76146e..684e1296ba 100644 --- a/swo/swosync/start.go +++ b/swo/swosync/start.go @@ -19,6 +19,12 @@ func triggerName(table string) string { } // StartTrackingChanges instruments and begins tracking changes to the DB. +// +// - Creates the change_log table +// - Gets the list of tables and sequences to track +// - Creates the change trigger for each table +// - Disables triggers in the new DB +// - Waits for any in-flight transactions to finish (since these may not have picked up the change trigger) func (l *LogicalReplicator) StartTrackingChanges(ctx context.Context) error { l.printf(ctx, "enabling logical replication...") _, err := l.srcConn.Exec(ctx, changelogQuery) diff --git a/swo/swosync/tablesync.go b/swo/swosync/tablesync.go index b653c2a67a..6535269e73 100644 --- a/swo/swosync/tablesync.go +++ b/swo/swosync/tablesync.go @@ -216,7 +216,7 @@ func (c *TableSync) AddBatchWrites(b *pgx.Batch) { } } -// sort entries with a non-nil end time before entries with a nil end time +// sortOnCallData sorts entries with a non-nil end time before entries with a nil end time func sortOnCallData(data []json.RawMessage) { type onCallData struct { End *time.Time `json:"end_time"` From 4817f711de3d126839f688d4fd81c5e4bcfc4bb3 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 7 Nov 2022 11:04:59 -0600 Subject: [PATCH 207/225] add dev section to swo readme --- swo/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/swo/README.md b/swo/README.md index 769535efab..9f0f596229 100644 --- a/swo/README.md +++ b/swo/README.md @@ -2,6 +2,10 @@ Switchover (SWO) is a feature that allows a live system to switch from one database to another safely and with little to no user impact. +## Development + +To start the dev instance in switchover mode, run `make start-swo` + ## Theory of Operation Switchover mode is initiated by starting GoAlert with an additional DB URL `--db-url-next`. The database referenced by `--db-url` is referred to as the "old" DB and the `--db-url-next` is the "new" DB. From 5411722f865664daabf1f1794982ed82c5ed2eef Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 7 Nov 2022 11:05:38 -0600 Subject: [PATCH 208/225] do migrations in existing conditional block --- app/cmd.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/cmd.go b/app/cmd.go index ab67909cca..40166b67d7 100644 --- a/app/cmd.go +++ b/app/cmd.go @@ -109,15 +109,13 @@ var RootCmd = &cobra.Command{ return err } + var db *sql.DB if cfg.DBURLNext != "" { err = doMigrations(cfg.DBURLNext) if err != nil { return errors.Wrap(err, "nextdb") } - } - var db *sql.DB - if cfg.DBURLNext != "" { mgr, err := swo.NewManager(swo.Config{OldDBURL: cfg.DBURL, NewDBURL: cfg.DBURLNext, CanExec: !cfg.APIOnly, Logger: cfg.Logger}) if err != nil { return errors.Wrap(err, "init switchover handler") From c43f96b2edf7bb3131f44b08c57071f8977edabe Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 21 Nov 2022 14:49:58 -0600 Subject: [PATCH 209/225] limit max conns during pause --- app/pause.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/pause.go b/app/pause.go index d63743f4e4..1902b88ce5 100644 --- a/app/pause.go +++ b/app/pause.go @@ -21,11 +21,13 @@ func (app *App) Resume(ctx context.Context) error { func (app *App) _pause(ctx context.Context) error { app.db.SetMaxIdleConns(0) app.db.SetConnMaxLifetime(time.Second) + app.db.SetMaxOpenConns(3) app.events.Stop() return nil } func (app *App) _resume(ctx context.Context) error { + app.db.SetMaxOpenConns(app.cfg.DBMaxOpen) app.db.SetMaxIdleConns(app.cfg.DBMaxIdle) app.db.SetConnMaxLifetime(0) app.events.Start() From 83e2de6e6f2dd502b19abdd192d091a1e6f2fa48 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 21 Nov 2022 14:50:22 -0600 Subject: [PATCH 210/225] retry on lock timeout --- retry/do.go | 9 ++++----- retry/temporary.go | 4 +++- util/sqlutil/error.go | 7 ++++++- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/retry/do.go b/retry/do.go index 90f11ce90d..e1ad8c4271 100644 --- a/retry/do.go +++ b/retry/do.go @@ -17,21 +17,20 @@ func fib(n int) int { } return _fib[n] } + func init() { fib(30) } -// DoFunc is a function that can be retried. It is passed the current attempt number (starting with 0) -// and should return true if a retry should be attempted. -type DoFunc func(int) (bool, error) - // An Option takes the attempt number and the last error value (can be nil) and should indicate // if a retry should be made. type Option func(int, error) bool // Do will retry the given DoFunc until it or an option returns false. The last returned // error value (can be nil) of fn will be returned. -func Do(fn DoFunc, opts ...Option) error { +// +// fn will be passed the current attempt number (starting with 0). +func Do(fn func(attempt int) (shouldRetry bool, err error), opts ...Option) error { var n int var err error var retry bool diff --git a/retry/temporary.go b/retry/temporary.go index 8d8bb3b688..932e60302b 100644 --- a/retry/temporary.go +++ b/retry/temporary.go @@ -65,6 +65,8 @@ func IsTemporaryError(err error) bool { // https://www.postgresql.org/docs/10/static/errcodes-appendix.html case strings.HasPrefix(e.Code, "40"), strings.HasPrefix(e.Code, "08"): return true + case e.Code == "55P03": // lock_timeout + return true } } return false @@ -75,7 +77,7 @@ type DoTempFunc func(int) error // DoTemporaryError will retry as long as the error returned from fn is // temporary as defined by IsTemporaryError. -func DoTemporaryError(fn DoTempFunc, opts ...Option) error { +func DoTemporaryError(fn func(attempt int) error, opts ...Option) error { return Do(func(n int) (bool, error) { err := fn(n) return IsTemporaryError(err), err diff --git a/util/sqlutil/error.go b/util/sqlutil/error.go index d1ead5eb4a..3087f82fdb 100644 --- a/util/sqlutil/error.go +++ b/util/sqlutil/error.go @@ -23,7 +23,6 @@ func (e Error) Error() string { return e.err.Error() } // MapError will return a Error from the given err object or nil otherwise. func MapError(err error) *Error { - var pgxErr *pgconn.PgError if errors.As(err, &pgxErr) { @@ -40,5 +39,11 @@ func MapError(err error) *Error { Position: int(pgxErr.Position), } } + + var e Error + if errors.As(err, &e) { + return &e + } + return nil } From a87785bf132f11115031d2e87a89430bf04946de Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 21 Nov 2022 14:51:15 -0600 Subject: [PATCH 211/225] retry deleteAll mutations --- graphql2/graphqlapp/mutation.go | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/graphql2/graphqlapp/mutation.go b/graphql2/graphqlapp/mutation.go index 10e0b1fdc1..22283ef517 100644 --- a/graphql2/graphqlapp/mutation.go +++ b/graphql2/graphqlapp/mutation.go @@ -4,11 +4,13 @@ import ( context "context" "database/sql" "fmt" + "time" "github.com/target/goalert/assignment" "github.com/target/goalert/graphql2" "github.com/target/goalert/notificationchannel" "github.com/target/goalert/permission" + "github.com/target/goalert/retry" "github.com/target/goalert/schedule" "github.com/target/goalert/user" "github.com/target/goalert/validation" @@ -157,9 +159,23 @@ func (a *Mutation) EndAllAuthSessionsByCurrentUser(ctx context.Context) (bool, e } func (a *Mutation) DeleteAll(ctx context.Context, input []assignment.RawTarget) (bool, error) { + // Retry because deleting frequently can cause a deadlock + // under heavy load. + err := retry.DoTemporaryError(func(int) error { + return a.tryDeleteAll(ctx, input) + }, + retry.Log(ctx), + retry.Limit(5), + retry.FibBackoff(time.Second), + ) + + return err == nil, err +} + +func (a *Mutation) tryDeleteAll(ctx context.Context, input []assignment.RawTarget) error { tx, err := a.DB.BeginTx(ctx, nil) if err != nil { - return false, err + return err } defer tx.Rollback() @@ -214,17 +230,17 @@ func (a *Mutation) DeleteAll(ctx context.Context, input []assignment.RawTarget) case assignment.TargetTypeUserSession: err = errors.Wrap(a.AuthHandler.EndUserSessionTx(ctx, tx, ids...), "end user sessions") default: - return false, validation.NewFieldError("type", "unsupported type "+typ.String()) + return validation.NewFieldError("type", "unsupported type "+typ.String()) } if err != nil { - return false, err + return err } } err = tx.Commit() if err != nil { - return false, err + return err } - return true, nil + return nil } From 00dbb77971016bdcc990ff9f2ad4da0dcb82d165 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 29 Nov 2022 13:37:31 -0600 Subject: [PATCH 212/225] fmt --- swo/withfunc.go | 8 ++++---- swo/withfunc_test.go | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/swo/withfunc.go b/swo/withfunc.go index f9462a3d4f..d23e0da261 100644 --- a/swo/withfunc.go +++ b/swo/withfunc.go @@ -16,7 +16,7 @@ type WithFunc[V any] struct { } // NewWithFunc creates a new WithFunc. -// +// // withFn must not return an error after useFn is called. func NewWithFunc[V any](withFn func(ctx context.Context, useFn func(V)) error) *WithFunc[V] { return &WithFunc[V]{ @@ -51,15 +51,15 @@ func (w *WithFunc[V]) Begin(ctx context.Context) (v V, err error) { }) if err == nil { if !called { - errCh <-fmt.Errorf("useFn never called") + errCh <- fmt.Errorf("useFn never called") } return } if called { - panic(fmt.Errorf("error returned after withFn called: %w",err)) + panic(fmt.Errorf("error returned after withFn called: %w", err)) } - errCh <-err + errCh <- err }() select { diff --git a/swo/withfunc_test.go b/swo/withfunc_test.go index e097b2a079..86961b855d 100644 --- a/swo/withfunc_test.go +++ b/swo/withfunc_test.go @@ -11,12 +11,11 @@ import ( // Test 1 immediately return error // Test 2 call call-back fn, should block until cancel() func TestBegin(t *testing.T) { - wf1 := NewWithFunc(func(ctx context.Context, fn func(struct{})) error { + wf1 := NewWithFunc(func(ctx context.Context, fn func(struct{})) error { return errors.New("expected error") }) _, err := wf1.Begin(context.Background()) assert.Error(t, err) - } From 6837f2bee5d1ccd2c1baa3661d5f042ddf875519 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 29 Nov 2022 13:44:26 -0600 Subject: [PATCH 213/225] fix migration test --- test/smoke/migrations_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/smoke/migrations_test.go b/test/smoke/migrations_test.go index 94c42588ba..d26704f6da 100644 --- a/test/smoke/migrations_test.go +++ b/test/smoke/migrations_test.go @@ -81,6 +81,9 @@ var ignoreRules = []ignoreRule{ // System default limits once set are not unset {MigrationName: "set-default-system-limits", TableName: "config_limits", ExtraRows: true}, + + // Every DB must have a unique ID. + {MigrationName: "switchover-mk2", TableName: "switchover_state", ColumnName: "db_id"}, } const migrateInitData = ` From 904425060da8063316cd0c06e53d7c69efdc30cb Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 1 Dec 2022 16:17:53 -0600 Subject: [PATCH 214/225] fix flicker and error rendering in swo --- .../app/admin/switchover/AdminSwitchover.tsx | 86 ++++++++++--------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 2b04c7165a..9e4c7a1da5 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -88,7 +88,13 @@ export default function AdminSwitchover(): JSX.Element { const [{ fetching, error, data: _data }, refetch] = useQuery({ query, }) - const data = _data?.swoStatus as SWOStatus + + const [data, setData] = useState(null) + useEffect(() => { + if (!_data?.swoStatus) return + + setData(_data.swoStatus) + }, [_data?.swoStatus]) const [lastAction, setLastAction] = useState('') const [mutationStatus, commit] = useMutation(mutation) const theme = useTheme() @@ -105,11 +111,32 @@ export default function AdminSwitchover(): JSX.Element { return () => clearInterval(t) }, [fetching, refetch, data?.state]) - if (fetching && !data?.state) { - return + // remember if we are done and stay that way + if (data?.state === 'done') { + return ( + + + + + + + DB switchover is complete. + + + + + + ) } - if (error && error.message === '[GraphQL] not in SWO mode') { + if (error && error.message === '[GraphQL] not in SWO mode' && !data) { return ( @@ -133,28 +160,8 @@ export default function AdminSwitchover(): JSX.Element { ) } - if (data?.state === 'done') { - return ( - - - - - - - DB switchover is complete. - - - - - - ) + if (!data) { + return } function actionHandler(action: 'reset' | 'execute'): () => void { @@ -176,10 +183,12 @@ export default function AdminSwitchover(): JSX.Element { endNote: DateTime.local().toFormat('fff'), }) } - if (data?.state === 'unknown' && data?.lastError) { + if (error && error.message !== '[GraphQL] not in SWO mode') { statusNotices.push({ type: 'error', - message: data.lastError, + message: 'Failed to fetch status', + details: cptlz(error.message), + endNote: DateTime.local().toFormat('fff'), }) } @@ -190,10 +199,10 @@ export default function AdminSwitchover(): JSX.Element { ['syncing', 'pausing', 'executing'].includes(data?.state) || (lastAction === 'execute' && mutationStatus.fetching) - function getIcon(): React.ReactNode { + function getIcon(data: SWOStatus): React.ReactNode { const i: SvgIconProps = { color: 'primary', sx: { fontSize: '3.5rem' } } - if (error) { + if (data.lastError) { return } if (fetching && !data) { @@ -211,20 +220,19 @@ export default function AdminSwitchover(): JSX.Element { } } - function getSubheader(): React.ReactNode { - if (error) return 'Error' - if (!data) return 'Loading...' + function getSubheader(data: SWOStatus): React.ReactNode { + if (data.lastError) return 'Error' if (data.state === 'done') return 'Complete' if (data.state === 'idle') return 'Ready' if (data.state === 'unknown') return 'Needs Reset' return 'Busy' } - function getDetails(): React.ReactNode { - if (error) { + function getDetails(data: SWOStatus): React.ReactNode { + if (data.lastError) { return ( - {cptlz(error.message)} + {cptlz(data.lastError)} ) } @@ -250,12 +258,12 @@ export default function AdminSwitchover(): JSX.Element { > - {getDetails()} + {getDetails(data)}
Date: Thu, 1 Dec 2022 16:40:50 -0600 Subject: [PATCH 215/225] add confirmation when errs found --- .../switchover/AdminSWOConfirmDialog.tsx | 21 +++++++++++++++++++ .../app/admin/switchover/AdminSwitchover.tsx | 19 ++++++++++++++++- web/src/app/admin/switchover/errCheck.ts | 15 +++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx create mode 100644 web/src/app/admin/switchover/errCheck.ts diff --git a/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx b/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx new file mode 100644 index 0000000000..dfa46c66d0 --- /dev/null +++ b/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx @@ -0,0 +1,21 @@ +import React from 'react' +import FormDialog from '../../dialogs/FormDialog' + +export default function AdminSWOConfirmDialog(props: { + message: string + onConfirm: () => void + onClose: () => void +}): JSX.Element { + return ( + { + props.onConfirm() + props.onClose() + }} + /> + ) +} diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index 9e4c7a1da5..b2e1e51ba4 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -28,6 +28,8 @@ import AddIcon from '@mui/icons-material/PlaylistAdd' import DownIcon from '@mui/icons-material/ArrowDownward' import { TransitionGroup } from 'react-transition-group' import Spinner from '../../loading/components/Spinner' +import AdminSWOConfirmDialog from './AdminSWOConfirmDialog' +import { errCheck } from './errCheck' const query = gql` query { @@ -89,6 +91,8 @@ export default function AdminSwitchover(): JSX.Element { query, }) + const [showConfirm, setShowConfirm] = useState(false) + const [data, setData] = useState(null) useEffect(() => { if (!_data?.swoStatus) return @@ -244,8 +248,17 @@ export default function AdminSwitchover(): JSX.Element { const headerSize = { titleTypographyProps: { sx: { fontSize: '1.25rem' } } } + const configErr = errCheck(data).join('\n') + return ( + {showConfirm && ( + setShowConfirm(false)} + onConfirm={actionHandler('execute')} + /> + )} {statusNotices.length > 0 && ( @@ -291,7 +304,11 @@ export default function AdminSwitchover(): JSX.Element { size='large' loading={executeLoad} loadingPosition='start' - onClick={actionHandler('execute')} + onClick={ + configErr + ? () => setShowConfirm(true) + : actionHandler('execute') + } > {executeLoad ? 'Executing...' : 'Execute'} diff --git a/web/src/app/admin/switchover/errCheck.ts b/web/src/app/admin/switchover/errCheck.ts new file mode 100644 index 0000000000..861b006bc8 --- /dev/null +++ b/web/src/app/admin/switchover/errCheck.ts @@ -0,0 +1,15 @@ +import { SWOStatus } from '../../../schema' + +export function errCheck(status: SWOStatus): string[] { + const errs = [] + if (status.state !== 'idle') + errs.push('Cluster is not ready, try running Reset.') + + status.nodes.forEach((node) => { + if (node.configError) errs.push(`Node ${node.id} has config error`) + if (node.id.includes('GoAlert')) + errs.push(`Node ${node.id} is a GoAlert node that is NOT in SWO mode`) + }) + + return errs +} From 98771709f3158d7d63f9beee3776dd0fb1e8c984 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 1 Dec 2022 17:25:50 -0600 Subject: [PATCH 216/225] break into separate components --- .../switchover/AdminSWODBVersionCard.tsx | 55 +++++ web/src/app/admin/switchover/AdminSWODone.tsx | 26 ++ .../admin/switchover/AdminSWOStatusCard.tsx | 121 +++++++++ .../admin/switchover/AdminSWOWrongMode.tsx | 23 ++ .../app/admin/switchover/AdminSwitchover.tsx | 233 ++---------------- 5 files changed, 242 insertions(+), 216 deletions(-) create mode 100644 web/src/app/admin/switchover/AdminSWODBVersionCard.tsx create mode 100644 web/src/app/admin/switchover/AdminSWODone.tsx create mode 100644 web/src/app/admin/switchover/AdminSWOStatusCard.tsx create mode 100644 web/src/app/admin/switchover/AdminSWOWrongMode.tsx diff --git a/web/src/app/admin/switchover/AdminSWODBVersionCard.tsx b/web/src/app/admin/switchover/AdminSWODBVersionCard.tsx new file mode 100644 index 0000000000..17cfb42cc3 --- /dev/null +++ b/web/src/app/admin/switchover/AdminSWODBVersionCard.tsx @@ -0,0 +1,55 @@ +import React from 'react' +import Alert from '@mui/material/Alert' +import Card from '@mui/material/Card' +import CardHeader from '@mui/material/CardHeader' +import Tooltip from '@mui/material/Tooltip' +import RemoveIcon from '@mui/icons-material/PlaylistRemove' +import AddIcon from '@mui/icons-material/PlaylistAdd' +import DownIcon from '@mui/icons-material/ArrowDownward' +import { Theme } from '@mui/system' + +interface DBVersionProps { + mainDBVersion: string + nextDBVersion: string +} + +export function AdminSWODBVersionCard(props: { data: DBVersionProps }) { + const curVer = props.data.mainDBVersion.split(' on ') + const nextVer = props.data.nextDBVersion.split(' on ') + + return ( + +
+ + + } severity='warning'> + From {curVer[0]} + + + theme.palette.primary.main, + }} + /> + + } severity='success' sx={{ mb: '16px' }}> + To {nextVer[0]} + + +
+
+ ) +} diff --git a/web/src/app/admin/switchover/AdminSWODone.tsx b/web/src/app/admin/switchover/AdminSWODone.tsx new file mode 100644 index 0000000000..504afe9406 --- /dev/null +++ b/web/src/app/admin/switchover/AdminSWODone.tsx @@ -0,0 +1,26 @@ +import React from 'react' +import { Zoom } from '@mui/material' +import Grid from '@mui/material/Grid' +import Typography from '@mui/material/Typography' +import DatabaseCheck from 'mdi-material-ui/DatabaseCheck' +import { TransitionGroup } from 'react-transition-group' + +export function AdminSWODone() { + return ( + + + + + + + DB switchover is complete. + + + + + + ) +} diff --git a/web/src/app/admin/switchover/AdminSWOStatusCard.tsx b/web/src/app/admin/switchover/AdminSWOStatusCard.tsx new file mode 100644 index 0000000000..afbf952629 --- /dev/null +++ b/web/src/app/admin/switchover/AdminSWOStatusCard.tsx @@ -0,0 +1,121 @@ +import React, { useEffect, useState } from 'react' +import { useTheme, SvgIconProps } from '@mui/material' +import ButtonGroup from '@mui/material/ButtonGroup' +import Card from '@mui/material/Card' +import CardContent from '@mui/material/CardContent' +import CardHeader from '@mui/material/CardHeader' +import Typography from '@mui/material/Typography' +import ResetIcon from 'mdi-material-ui/DatabaseRefresh' +import NoExecuteIcon from 'mdi-material-ui/DatabaseExportOutline' +import ExecuteIcon from 'mdi-material-ui/DatabaseExport' +import ErrorIcon from 'mdi-material-ui/DatabaseAlert' +import IdleIcon from 'mdi-material-ui/DatabaseSettings' +import InProgressIcon from 'mdi-material-ui/DatabaseEdit' +import { SWOStatus } from '../../../schema' +import LoadingButton from '@mui/lab/LoadingButton' + +function getIcon(data: SWOStatus) { + const i: SvgIconProps = { color: 'primary', sx: { fontSize: '3.5rem' } } + + if (data.lastError) { + return + } + + if (data.state === 'idle') { + return + } + + return +} + +function getSubheader(data: SWOStatus): React.ReactNode { + if (data.lastError) return 'Error' + if (data.state === 'done') return 'Complete' + if (data.state === 'idle') return 'Ready' + if (data.state === 'unknown') return 'Needs Reset' + return 'Busy' +} + +const toTitle = (s: string) => s.charAt(0).toUpperCase() + s.slice(1) + +function getDetails(data: SWOStatus): React.ReactNode { + if (data.lastError) { + return ( + + {toTitle(data.lastError)} + + ) + } + if (data?.state !== 'unknown' && data.lastStatus) { + return {toTitle(data.lastStatus)} + } + return   // reserves whitespace +} + +type AdminSWOStatusCardProps = { + data: SWOStatus + + onResetClick: () => void + onExecClick: () => void +} + +export function AdminSWOStatusCard(props: AdminSWOStatusCardProps) { + const theme = useTheme() + const [state, setState] = useState(props.data.state) + useEffect(() => { + setState(props.data.state) + }, [props.data.state]) + + const isExec = ['syncing', 'pausing', 'executing'].includes(state) + + return ( + + + + {getDetails(props.data)} +
+ + } + // disabled={mutationStatus.fetching} + variant='outlined' + size='large' + loading={state === 'resetting'} + loadingPosition='start' + onClick={() => { + setState('resetting') + props.onResetClick() + }} + > + {state === 'resetting' ? 'Resetting...' : 'Reset'} + + : } + disabled={state !== 'idle'} + variant='outlined' + size='large' + loading={isExec} + loadingPosition='start' + onClick={() => { + setState('syncing') + props.onExecClick() + }} + > + {isExec ? 'Executing...' : 'Execute'} + + + + + ) +} diff --git a/web/src/app/admin/switchover/AdminSWOWrongMode.tsx b/web/src/app/admin/switchover/AdminSWOWrongMode.tsx new file mode 100644 index 0000000000..af22bfd499 --- /dev/null +++ b/web/src/app/admin/switchover/AdminSWOWrongMode.tsx @@ -0,0 +1,23 @@ +import React from 'react' +import Grid from '@mui/material/Grid' +import Typography from '@mui/material/Typography' +import DatabaseOff from 'mdi-material-ui/DatabaseOff' + +export function AdminSWOWrongMode() { + return ( + + + +
+ + Unavailable: Application is not in switchover mode. +
+
+ You must start GoAlert with GOALERT_DB_URL_NEXT or{' '} + --db-url-next to perform a switchover. +
+
+
+
+ ) +} diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index b2e1e51ba4..b9aebb9ab7 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -1,35 +1,17 @@ import React, { useEffect, useState } from 'react' -import { useTheme, SvgIconProps, Zoom } from '@mui/material' -import Alert from '@mui/material/Alert' -import ButtonGroup from '@mui/material/ButtonGroup' -import Card from '@mui/material/Card' -import CardContent from '@mui/material/CardContent' -import CardHeader from '@mui/material/CardHeader' import Grid from '@mui/material/Grid' -import Skeleton from '@mui/material/Skeleton' -import Typography from '@mui/material/Typography' -import ResetIcon from 'mdi-material-ui/DatabaseRefresh' -import NoExecuteIcon from 'mdi-material-ui/DatabaseExportOutline' -import ExecuteIcon from 'mdi-material-ui/DatabaseExport' -import ErrorIcon from 'mdi-material-ui/DatabaseAlert' -import IdleIcon from 'mdi-material-ui/DatabaseSettings' -import InProgressIcon from 'mdi-material-ui/DatabaseEdit' import { gql, useMutation, useQuery } from 'urql' import { DateTime } from 'luxon' import { SWOAction, SWONode as SWONodeType, SWOStatus } from '../../../schema' import Notices, { Notice } from '../../details/Notices' import SWONode from './SWONode' -import LoadingButton from '@mui/lab/LoadingButton' -import DatabaseOff from 'mdi-material-ui/DatabaseOff' -import DatabaseCheck from 'mdi-material-ui/DatabaseCheck' -import Tooltip from '@mui/material/Tooltip' -import RemoveIcon from '@mui/icons-material/PlaylistRemove' -import AddIcon from '@mui/icons-material/PlaylistAdd' -import DownIcon from '@mui/icons-material/ArrowDownward' -import { TransitionGroup } from 'react-transition-group' import Spinner from '../../loading/components/Spinner' import AdminSWOConfirmDialog from './AdminSWOConfirmDialog' import { errCheck } from './errCheck' +import { AdminSWODone } from './AdminSWODone' +import { AdminSWOWrongMode } from './AdminSWOWrongMode' +import { AdminSWODBVersionCard } from './AdminSWODBVersionCard' +import { AdminSWOStatusCard } from './AdminSWOStatusCard' const query = gql` query { @@ -93,84 +75,29 @@ export default function AdminSwitchover(): JSX.Element { const [showConfirm, setShowConfirm] = useState(false) - const [data, setData] = useState(null) - useEffect(() => { - if (!_data?.swoStatus) return - - setData(_data.swoStatus) - }, [_data?.swoStatus]) - const [lastAction, setLastAction] = useState('') const [mutationStatus, commit] = useMutation(mutation) - const theme = useTheme() - - const curVer = data?.mainDBVersion.split(' on ') - const nextVer = data?.mainDBVersion.split(' on ') + const data = _data?.swoStatus as SWOStatus useEffect(() => { if (data?.state === 'done') return + if (mutationStatus.fetching) return const t = setInterval(() => { if (!fetching) refetch() }, 1000) return () => clearInterval(t) - }, [fetching, refetch, data?.state]) + }, [fetching, refetch, data?.state, mutationStatus.fetching]) // remember if we are done and stay that way - if (data?.state === 'done') { - return ( - - - - - - - DB switchover is complete. - - - - - - ) - } + if (data?.state === 'done') return - if (error && error.message === '[GraphQL] not in SWO mode' && !data) { - return ( - - - -
- - Unavailable: Application is not in switchover mode. -
-
- You must start GoAlert with - GOALERT_DB_URL_NEXT - or --db-url-next to perform a switchover. -
-
-
-
- ) - } + if (error && error.message === '[GraphQL] not in SWO mode' && !data) + return - if (!data) { - return - } + if (!data) return function actionHandler(action: 'reset' | 'execute'): () => void { return () => { - setLastAction(action) commit({ action }, { additionalTypenames: ['SWOStatus'] }) } } @@ -196,58 +123,6 @@ export default function AdminSwitchover(): JSX.Element { }) } - const resetLoad = - data?.state === 'resetting' || - (lastAction === 'reset' && mutationStatus.fetching) - const executeLoad = - ['syncing', 'pausing', 'executing'].includes(data?.state) || - (lastAction === 'execute' && mutationStatus.fetching) - - function getIcon(data: SWOStatus): React.ReactNode { - const i: SvgIconProps = { color: 'primary', sx: { fontSize: '3.5rem' } } - - if (data.lastError) { - return - } - if (fetching && !data) { - return ( - - - - ) - } - if (!['unknown', 'idle', 'done'].includes(data.state)) { - return - } - if (data.state === 'idle') { - return - } - } - - function getSubheader(data: SWOStatus): React.ReactNode { - if (data.lastError) return 'Error' - if (data.state === 'done') return 'Complete' - if (data.state === 'idle') return 'Ready' - if (data.state === 'unknown') return 'Needs Reset' - return 'Busy' - } - - function getDetails(data: SWOStatus): React.ReactNode { - if (data.lastError) { - return ( - - {cptlz(data.lastError)} - - ) - } - if (data?.state !== 'unknown' && data.lastStatus) { - return {cptlz(data.lastStatus)} - } - return   // reserves whitespace - } - - const headerSize = { titleTypographyProps: { sx: { fontSize: '1.25rem' } } } - const configErr = errCheck(data).join('\n') return ( @@ -265,89 +140,15 @@ export default function AdminSwitchover(): JSX.Element { )} - - - - {getDetails(data)} -
- - } - disabled={mutationStatus.fetching} - variant='outlined' - size='large' - loading={resetLoad} - loadingPosition='start' - onClick={actionHandler('reset')} - > - {resetLoad ? 'Resetting...' : 'Reset'} - - : - } - disabled={data?.state !== 'idle' || mutationStatus.fetching} - variant='outlined' - size='large' - loading={executeLoad} - loadingPosition='start' - onClick={ - configErr - ? () => setShowConfirm(true) - : actionHandler('execute') - } - > - {executeLoad ? 'Executing...' : 'Execute'} - - - - + - -
- - - } severity='warning'> - From {curVer[0]} - - - theme.palette.primary.main, - }} - /> - - } severity='success' sx={{ mb: '16px' }}> - To {nextVer[0]} - - -
-
+
From e94e20ee667c575e1c37194392bf0330c69b6983 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Thu, 1 Dec 2022 17:29:18 -0600 Subject: [PATCH 217/225] add comment for use of useEffect --- web/src/app/admin/switchover/AdminSWOStatusCard.tsx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/web/src/app/admin/switchover/AdminSWOStatusCard.tsx b/web/src/app/admin/switchover/AdminSWOStatusCard.tsx index afbf952629..bd778fcd08 100644 --- a/web/src/app/admin/switchover/AdminSWOStatusCard.tsx +++ b/web/src/app/admin/switchover/AdminSWOStatusCard.tsx @@ -61,6 +61,9 @@ type AdminSWOStatusCardProps = { export function AdminSWOStatusCard(props: AdminSWOStatusCardProps) { const theme = useTheme() + + // We track this separately so we can wait for a NEW status without + // our button flickering back to idle. const [state, setState] = useState(props.data.state) useEffect(() => { setState(props.data.state) From e1596c319b8baa97ea04d7cc60ff3f8f5ae4c3a8 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 5 Dec 2022 14:43:33 -0600 Subject: [PATCH 218/225] fix exec state on confirm --- .../app/admin/switchover/AdminSWOConfirmDialog.tsx | 4 ++-- .../app/admin/switchover/AdminSWOStatusCard.tsx | 5 ++--- web/src/app/admin/switchover/AdminSwitchover.tsx | 14 +++++++++++--- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx b/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx index dfa46c66d0..246b823c8a 100644 --- a/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx +++ b/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx @@ -2,7 +2,7 @@ import React from 'react' import FormDialog from '../../dialogs/FormDialog' export default function AdminSWOConfirmDialog(props: { - message: string + messages: string[] onConfirm: () => void onClose: () => void }): JSX.Element { @@ -10,7 +10,7 @@ export default function AdminSWOConfirmDialog(props: { { props.onConfirm() diff --git a/web/src/app/admin/switchover/AdminSWOStatusCard.tsx b/web/src/app/admin/switchover/AdminSWOStatusCard.tsx index bd778fcd08..1c482677f2 100644 --- a/web/src/app/admin/switchover/AdminSWOStatusCard.tsx +++ b/web/src/app/admin/switchover/AdminSWOStatusCard.tsx @@ -56,7 +56,7 @@ type AdminSWOStatusCardProps = { data: SWOStatus onResetClick: () => void - onExecClick: () => void + onExecClick: () => boolean } export function AdminSWOStatusCard(props: AdminSWOStatusCardProps) { @@ -111,8 +111,7 @@ export function AdminSWOStatusCard(props: AdminSWOStatusCardProps) { loading={isExec} loadingPosition='start' onClick={() => { - setState('syncing') - props.onExecClick() + if (props.onExecClick()) setState('syncing') }} > {isExec ? 'Executing...' : 'Execute'} diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index b9aebb9ab7..e8809fd245 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -123,13 +123,13 @@ export default function AdminSwitchover(): JSX.Element { }) } - const configErr = errCheck(data).join('\n') + const configErr = errCheck(data) return ( {showConfirm && ( setShowConfirm(false)} onConfirm={actionHandler('execute')} /> @@ -142,7 +142,15 @@ export default function AdminSwitchover(): JSX.Element { { + if (configErr) { + setShowConfirm(true) + return false + } + + actionHandler('execute')() + return true + }} onResetClick={actionHandler('reset')} /> From 8494177944f46e050e71666cb51f0c672bdf23df Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 5 Dec 2022 14:47:08 -0600 Subject: [PATCH 219/225] add missing type info --- web/src/app/admin/switchover/AdminSWODBVersionCard.tsx | 4 +++- web/src/app/admin/switchover/AdminSWODone.tsx | 2 +- web/src/app/admin/switchover/AdminSWOStatusCard.tsx | 8 +++++--- web/src/app/admin/switchover/AdminSWOWrongMode.tsx | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/web/src/app/admin/switchover/AdminSWODBVersionCard.tsx b/web/src/app/admin/switchover/AdminSWODBVersionCard.tsx index 17cfb42cc3..002fb994f7 100644 --- a/web/src/app/admin/switchover/AdminSWODBVersionCard.tsx +++ b/web/src/app/admin/switchover/AdminSWODBVersionCard.tsx @@ -13,7 +13,9 @@ interface DBVersionProps { nextDBVersion: string } -export function AdminSWODBVersionCard(props: { data: DBVersionProps }) { +export function AdminSWODBVersionCard(props: { + data: DBVersionProps +}): JSX.Element { const curVer = props.data.mainDBVersion.split(' on ') const nextVer = props.data.nextDBVersion.split(' on ') diff --git a/web/src/app/admin/switchover/AdminSWODone.tsx b/web/src/app/admin/switchover/AdminSWODone.tsx index 504afe9406..0daa4d10ca 100644 --- a/web/src/app/admin/switchover/AdminSWODone.tsx +++ b/web/src/app/admin/switchover/AdminSWODone.tsx @@ -5,7 +5,7 @@ import Typography from '@mui/material/Typography' import DatabaseCheck from 'mdi-material-ui/DatabaseCheck' import { TransitionGroup } from 'react-transition-group' -export function AdminSWODone() { +export function AdminSWODone(): JSX.Element { return ( diff --git a/web/src/app/admin/switchover/AdminSWOStatusCard.tsx b/web/src/app/admin/switchover/AdminSWOStatusCard.tsx index 1c482677f2..af4036e759 100644 --- a/web/src/app/admin/switchover/AdminSWOStatusCard.tsx +++ b/web/src/app/admin/switchover/AdminSWOStatusCard.tsx @@ -14,7 +14,7 @@ import InProgressIcon from 'mdi-material-ui/DatabaseEdit' import { SWOStatus } from '../../../schema' import LoadingButton from '@mui/lab/LoadingButton' -function getIcon(data: SWOStatus) { +function getIcon(data: SWOStatus): JSX.Element { const i: SvgIconProps = { color: 'primary', sx: { fontSize: '3.5rem' } } if (data.lastError) { @@ -36,7 +36,7 @@ function getSubheader(data: SWOStatus): React.ReactNode { return 'Busy' } -const toTitle = (s: string) => s.charAt(0).toUpperCase() + s.slice(1) +const toTitle = (s: string): string => s.charAt(0).toUpperCase() + s.slice(1) function getDetails(data: SWOStatus): React.ReactNode { if (data.lastError) { @@ -59,7 +59,9 @@ type AdminSWOStatusCardProps = { onExecClick: () => boolean } -export function AdminSWOStatusCard(props: AdminSWOStatusCardProps) { +export function AdminSWOStatusCard( + props: AdminSWOStatusCardProps, +): JSX.Element { const theme = useTheme() // We track this separately so we can wait for a NEW status without diff --git a/web/src/app/admin/switchover/AdminSWOWrongMode.tsx b/web/src/app/admin/switchover/AdminSWOWrongMode.tsx index af22bfd499..80f76be692 100644 --- a/web/src/app/admin/switchover/AdminSWOWrongMode.tsx +++ b/web/src/app/admin/switchover/AdminSWOWrongMode.tsx @@ -3,7 +3,7 @@ import Grid from '@mui/material/Grid' import Typography from '@mui/material/Typography' import DatabaseOff from 'mdi-material-ui/DatabaseOff' -export function AdminSWOWrongMode() { +export function AdminSWOWrongMode(): JSX.Element { return ( From c0ca18b5f9c8b6e3881ebf166b14bbeda091599c Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Mon, 5 Dec 2022 16:06:50 -0600 Subject: [PATCH 220/225] render confirm errs as
  • --- .../switchover/AdminSWOConfirmDialog.tsx | 9 ++++- .../admin/switchover/AdminSWOStatusCard.tsx | 3 +- .../app/admin/switchover/AdminSwitchover.tsx | 31 ++------------ web/src/app/admin/switchover/errCheck.ts | 15 ------- web/src/app/admin/switchover/util.ts | 40 +++++++++++++++++++ 5 files changed, 53 insertions(+), 45 deletions(-) delete mode 100644 web/src/app/admin/switchover/errCheck.ts create mode 100644 web/src/app/admin/switchover/util.ts diff --git a/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx b/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx index 246b823c8a..ff495ca728 100644 --- a/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx +++ b/web/src/app/admin/switchover/AdminSWOConfirmDialog.tsx @@ -10,12 +10,19 @@ export default function AdminSWOConfirmDialog(props: { { props.onConfirm() props.onClose() }} + form={ +
      + {props.messages.map((m, idx) => { + return
    • {m}
    • + })} +
    + } /> ) } diff --git a/web/src/app/admin/switchover/AdminSWOStatusCard.tsx b/web/src/app/admin/switchover/AdminSWOStatusCard.tsx index af4036e759..c035a1efad 100644 --- a/web/src/app/admin/switchover/AdminSWOStatusCard.tsx +++ b/web/src/app/admin/switchover/AdminSWOStatusCard.tsx @@ -13,6 +13,7 @@ import IdleIcon from 'mdi-material-ui/DatabaseSettings' import InProgressIcon from 'mdi-material-ui/DatabaseEdit' import { SWOStatus } from '../../../schema' import LoadingButton from '@mui/lab/LoadingButton' +import { toTitle } from './util' function getIcon(data: SWOStatus): JSX.Element { const i: SvgIconProps = { color: 'primary', sx: { fontSize: '3.5rem' } } @@ -36,8 +37,6 @@ function getSubheader(data: SWOStatus): React.ReactNode { return 'Busy' } -const toTitle = (s: string): string => s.charAt(0).toUpperCase() + s.slice(1) - function getDetails(data: SWOStatus): React.ReactNode { if (data.lastError) { return ( diff --git a/web/src/app/admin/switchover/AdminSwitchover.tsx b/web/src/app/admin/switchover/AdminSwitchover.tsx index e8809fd245..d14bcd37bf 100644 --- a/web/src/app/admin/switchover/AdminSwitchover.tsx +++ b/web/src/app/admin/switchover/AdminSwitchover.tsx @@ -7,7 +7,7 @@ import Notices, { Notice } from '../../details/Notices' import SWONode from './SWONode' import Spinner from '../../loading/components/Spinner' import AdminSWOConfirmDialog from './AdminSWOConfirmDialog' -import { errCheck } from './errCheck' +import { errCheck, friendlyName, toTitle } from './util' import { AdminSWODone } from './AdminSWODone' import { AdminSWOWrongMode } from './AdminSWOWrongMode' import { AdminSWODBVersionCard } from './AdminSWODBVersionCard' @@ -39,35 +39,12 @@ const query = gql` } ` -let n = 1 -let u = 1 -const names: { [key: string]: string } = {} - -// friendlyName will assign a persistant "friendly" name to the node. -// -// This ensures a specific ID will always refer to the same node. This -// is so that it is clear if a node dissapears or a new one appears. -// -// Note: `Node 1` on one browser tab may not be the same node as `Node 1` -// on another browser tab. -function friendlyName(id: string): string { - if (!names[id]) { - if (id.startsWith('unknown')) return (names[id] = 'Unknown ' + u++) - return (names[id] = 'Node ' + n++) - } - return names[id] -} - const mutation = gql` mutation ($action: SWOAction!) { swoAction(action: $action) } ` -function cptlz(s: string): string { - return s.charAt(0).toUpperCase() + s.substring(1) -} - export default function AdminSwitchover(): JSX.Element { const [{ fetching, error, data: _data }, refetch] = useQuery({ query, @@ -110,7 +87,7 @@ export default function AdminSwitchover(): JSX.Element { statusNotices.push({ type: 'error', message: 'Failed to ' + vars.action, - details: cptlz(mutationStatus.error.message), + details: toTitle(mutationStatus.error.message), endNote: DateTime.local().toFormat('fff'), }) } @@ -118,7 +95,7 @@ export default function AdminSwitchover(): JSX.Element { statusNotices.push({ type: 'error', message: 'Failed to fetch status', - details: cptlz(error.message), + details: toTitle(error.message), endNote: DateTime.local().toFormat('fff'), }) } @@ -143,7 +120,7 @@ export default function AdminSwitchover(): JSX.Element { { - if (configErr) { + if (configErr.length) { setShowConfirm(true) return false } diff --git a/web/src/app/admin/switchover/errCheck.ts b/web/src/app/admin/switchover/errCheck.ts deleted file mode 100644 index 861b006bc8..0000000000 --- a/web/src/app/admin/switchover/errCheck.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { SWOStatus } from '../../../schema' - -export function errCheck(status: SWOStatus): string[] { - const errs = [] - if (status.state !== 'idle') - errs.push('Cluster is not ready, try running Reset.') - - status.nodes.forEach((node) => { - if (node.configError) errs.push(`Node ${node.id} has config error`) - if (node.id.includes('GoAlert')) - errs.push(`Node ${node.id} is a GoAlert node that is NOT in SWO mode`) - }) - - return errs -} diff --git a/web/src/app/admin/switchover/util.ts b/web/src/app/admin/switchover/util.ts new file mode 100644 index 0000000000..df0786830a --- /dev/null +++ b/web/src/app/admin/switchover/util.ts @@ -0,0 +1,40 @@ +import { SWOStatus } from '../../../schema' + +export function errCheck(status: SWOStatus): string[] { + const errs = [] + if (status.state !== 'idle') + errs.push('Cluster is not ready, try running Reset.') + + status.nodes.forEach((node) => { + if (node.configError) + errs.push(`${friendlyName(node.id)} has incorrect DB URL(s).`) + if (node.id.includes('GoAlert')) + errs.push( + `${friendlyName(node.id)} is a GoAlert node that is NOT in SWO mode`, + ) + }) + + return errs +} + +export const toTitle = (s: string): string => + s.charAt(0).toUpperCase() + s.slice(1) + +let n = 1 +let u = 1 +const names: { [key: string]: string } = {} + +// friendlyName will assign a persistant "friendly" name to the node. +// +// This ensures a specific ID will always refer to the same node. This +// is so that it is clear if a node dissapears or a new one appears. +// +// Note: `Node 1` on one browser tab may not be the same node as `Node 1` +// on another browser tab. +export function friendlyName(id: string): string { + if (!names[id]) { + if (id.startsWith('unknown')) return (names[id] = 'Unknown ' + u++) + return (names[id] = 'Node ' + n++) + } + return names[id] +} From fde8671de2b99847631fbd146da812f864786fb0 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 6 Dec 2022 09:29:37 -0600 Subject: [PATCH 221/225] fix function order --- web/src/app/admin/switchover/util.ts | 38 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/web/src/app/admin/switchover/util.ts b/web/src/app/admin/switchover/util.ts index df0786830a..a53c1f4e99 100644 --- a/web/src/app/admin/switchover/util.ts +++ b/web/src/app/admin/switchover/util.ts @@ -1,5 +1,24 @@ import { SWOStatus } from '../../../schema' +let n = 1 +let u = 1 +const names: { [key: string]: string } = {} + +// friendlyName will assign a persistant "friendly" name to the node. +// +// This ensures a specific ID will always refer to the same node. This +// is so that it is clear if a node dissapears or a new one appears. +// +// Note: `Node 1` on one browser tab may not be the same node as `Node 1` +// on another browser tab. +export function friendlyName(id: string): string { + if (!names[id]) { + if (id.startsWith('unknown')) return (names[id] = 'Unknown ' + u++) + return (names[id] = 'Node ' + n++) + } + return names[id] +} + export function errCheck(status: SWOStatus): string[] { const errs = [] if (status.state !== 'idle') @@ -19,22 +38,3 @@ export function errCheck(status: SWOStatus): string[] { export const toTitle = (s: string): string => s.charAt(0).toUpperCase() + s.slice(1) - -let n = 1 -let u = 1 -const names: { [key: string]: string } = {} - -// friendlyName will assign a persistant "friendly" name to the node. -// -// This ensures a specific ID will always refer to the same node. This -// is so that it is clear if a node dissapears or a new one appears. -// -// Note: `Node 1` on one browser tab may not be the same node as `Node 1` -// on another browser tab. -export function friendlyName(id: string): string { - if (!names[id]) { - if (id.startsWith('unknown')) return (names[id] = 'Unknown ' + u++) - return (names[id] = 'Node ' + n++) - } - return names[id] -} From 80e45cd6880aa0603fb757ff387b6c40ac077856 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 6 Dec 2022 16:19:59 -0600 Subject: [PATCH 222/225] update sqlc and use gettool --- Makefile | 14 +++--- devtools/gettool/getsqlc.go | 37 ++++++++++++++++ devtools/gettool/run.go | 2 + devtools/tools.go | 3 -- sqlc.version | 1 + swo/swodb/db.go | 2 +- swo/swodb/models.go | 36 ++++++++++------ swo/swodb/queries.sql.go | 33 ++++++++------ swo/swoinfo/pgtables.sql | 85 ------------------------------------- swo/swoinfo/queries.sql | 10 ++--- swo/swoinfo/scantables.go | 16 +++---- swo/swoinfo/table.go | 6 +-- swo/swoinfo/table_test.go | 6 +-- 13 files changed, 110 insertions(+), 141 deletions(-) create mode 100644 devtools/gettool/getsqlc.go create mode 100644 sqlc.version diff --git a/Makefile b/Makefile index 4c0509094e..c2d31dd334 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,9 @@ Makefile.binaries.mk: devtools/genmake/* $(BIN_DIR)/tools/protoc: protoc.version go run ./devtools/gettool -t protoc -v $(shell cat protoc.version) -o $@ +$(BIN_DIR)/tools/sqlc: sqlc.version + go run ./devtools/gettool -t sqlc -v $(shell cat sqlc.version) -o $@ + $(BIN_DIR)/tools/prometheus: prometheus.version go run ./devtools/gettool -t prometheus -v $(shell cat prometheus.version) -o $@ @@ -98,8 +101,8 @@ cy-wide-prod-run: web/src/build/static/app.js cypress cy-mobile-prod-run: web/src/build/static/app.js cypress $(MAKE) $(MFLAGS) cy-mobile-prod CY_ACTION=run CONTAINER_TOOL=$(CONTAINER_TOOL) BUNDLE=1 -swo/swodb/queries.sql.go: bin/tools/sqlc sqlc.yaml swo/*/*.sql migrate/migrations/*.sql - ./bin/tools/sqlc generate +swo/swodb/queries.sql.go: $(BIN_DIR)/tools/sqlc sqlc.yaml swo/*/*.sql migrate/migrations/*.sql + $(BIN_DIR)/tools/sqlc generate web/src/schema.d.ts: graphql2/schema.graphql node_modules web/src/genschema.go go generate ./web/src @@ -175,11 +178,8 @@ pkg/sysapi/sysapi_grpc.pb.go: pkg/sysapi/sysapi.proto $(BIN_DIR)/tools/protoc-ge pkg/sysapi/sysapi.pb.go: pkg/sysapi/sysapi.proto $(BIN_DIR)/tools/protoc-gen-go $(BIN_DIR)/tools/protoc PATH="$(BIN_DIR)/tools" protoc --go_out=. --go_opt=paths=source_relative pkg/sysapi/sysapi.proto -bin/tools/sqlc: go.mod go.sum - CGO_ENABLED=1 go build -o bin/tools/sqlc github.com/kyleconroy/sqlc/cmd/sqlc - -generate: node_modules pkg/sysapi/sysapi.pb.go pkg/sysapi/sysapi_grpc.pb.go bin/tools/sqlc - ./bin/tools/sqlc generate +generate: node_modules pkg/sysapi/sysapi.pb.go pkg/sysapi/sysapi_grpc.pb.go $(BIN_DIR)/tools/sqlc + $(BIN_DIR)/tools/sqlc generate go generate ./... diff --git a/devtools/gettool/getsqlc.go b/devtools/gettool/getsqlc.go new file mode 100644 index 0000000000..b4cac6b1fc --- /dev/null +++ b/devtools/gettool/getsqlc.go @@ -0,0 +1,37 @@ +package main + +import ( + "archive/zip" + "fmt" + "path/filepath" + "runtime" +) + +func getSqlc(version, output string) error { + url := fmt.Sprintf("https://github.com/kyleconroy/sqlc/releases/download/v%s/sqlc_%s_%s_%s.zip", + version, version, runtime.GOOS, runtime.GOARCH, + ) + fd, n, err := fetchFile(url) + if err != nil { + return fmt.Errorf("fetch: %w", err) + } + defer fd.Close() + + name := "sqlc" + if runtime.GOOS == "windows" { + name += ".exe" + } + + outDir := filepath.Dir(output) + z, err := zip.NewReader(fd, n) + if err != nil { + return fmt.Errorf("unzip: %w", err) + } + + err = extractFromZip(z, name, filepath.Join(outDir, name), true) + if err != nil { + return fmt.Errorf("extract bin: %w", err) + } + + return nil +} diff --git a/devtools/gettool/run.go b/devtools/gettool/run.go index ce8188568f..fa45241095 100644 --- a/devtools/gettool/run.go +++ b/devtools/gettool/run.go @@ -28,6 +28,8 @@ func main() { err = getPrometheus(*version, *output) case "protoc": err = getProtoC(*version, *output) + case "sqlc": + err = getSqlc(*version, *output) default: log.Fatalf("unknown tool '%s'", *tool) } diff --git a/devtools/tools.go b/devtools/tools.go index e494b4e124..d7e3d1f9b3 100644 --- a/devtools/tools.go +++ b/devtools/tools.go @@ -8,11 +8,8 @@ import ( _ "github.com/fullstorydev/grpcui/cmd/grpcui" _ "github.com/gordonklaus/ineffassign" _ "github.com/kffl/speedbump" - _ "github.com/kyleconroy/sqlc/cmd/sqlc" _ "github.com/mailhog/MailHog" _ "golang.org/x/tools/cmd/goimports" _ "golang.org/x/tools/cmd/stringer" - _ "google.golang.org/grpc/cmd/protoc-gen-go-grpc" - _ "google.golang.org/protobuf/cmd/protoc-gen-go" _ "honnef.co/go/tools/cmd/staticcheck" ) diff --git a/sqlc.version b/sqlc.version new file mode 100644 index 0000000000..15b989e398 --- /dev/null +++ b/sqlc.version @@ -0,0 +1 @@ +1.16.0 diff --git a/swo/swodb/db.go b/swo/swodb/db.go index ed75f5e582..19f2208525 100644 --- a/swo/swodb/db.go +++ b/swo/swodb/db.go @@ -1,6 +1,6 @@ // Code generated by sqlc. DO NOT EDIT. // versions: -// sqlc v1.14.0 +// sqlc v1.16.0 package swodb diff --git a/swo/swodb/models.go b/swo/swodb/models.go index 1bba246825..25449bdb4c 100644 --- a/swo/swodb/models.go +++ b/swo/swodb/models.go @@ -1,11 +1,12 @@ // Code generated by sqlc. DO NOT EDIT. // versions: -// sqlc v1.14.0 +// sqlc v1.16.0 package swodb import ( "database/sql" + "database/sql/driver" "fmt" "time" @@ -33,24 +34,33 @@ func (e *EnumSwitchoverState) Scan(src interface{}) error { return nil } -type ChangeLog struct { - ID int64 - TableName string - RowID string +type NullEnumSwitchoverState struct { + EnumSwitchoverState EnumSwitchoverState + Valid bool // Valid is true if EnumSwitchoverState is not NULL } -type InformationSchemaColumn struct { - TableName string - ColumnName string - DataType string - OrdinalPosition int32 +// Scan implements the Scanner interface. +func (ns *NullEnumSwitchoverState) Scan(value interface{}) error { + if value == nil { + ns.EnumSwitchoverState, ns.Valid = "", false + return nil + } + ns.Valid = true + return ns.EnumSwitchoverState.Scan(value) } -type InformationSchemaSequence struct { - SequenceName string +// Value implements the driver Valuer interface. +func (ns NullEnumSwitchoverState) Value() (driver.Value, error) { + if !ns.Valid { + return nil, nil + } + return ns.EnumSwitchoverState, nil } -type InformationSchemaTable struct { +type ChangeLog struct { + ID int64 + TableName string + RowID string } type PgStatActivity struct { diff --git a/swo/swodb/queries.sql.go b/swo/swodb/queries.sql.go index 8475b913e1..c6e20acd9a 100644 --- a/swo/swodb/queries.sql.go +++ b/swo/swodb/queries.sql.go @@ -1,6 +1,6 @@ // Code generated by sqlc. DO NOT EDIT. // versions: -// sqlc v1.14.0 +// sqlc v1.16.0 // source: queries.sql package swodb @@ -191,7 +191,7 @@ func (q *Queries) Now(ctx context.Context) (time.Time, error) { } const sequenceNames = `-- name: SequenceNames :many -SELECT sequence_name +SELECT sequence_name::text FROM information_schema.sequences WHERE sequence_catalog = current_database() AND sequence_schema = 'public' @@ -219,10 +219,10 @@ func (q *Queries) SequenceNames(ctx context.Context) ([]string, error) { } const tableColumns = `-- name: TableColumns :many -SELECT col.table_name, - col.column_name, - col.data_type, - col.ordinal_position +SELECT col.table_name::text, + col.column_name::text, + col.data_type::text, + col.ordinal_position::INT FROM information_schema.columns col JOIN information_schema.tables t ON t.table_catalog = col.table_catalog AND t.table_schema = col.table_schema @@ -232,20 +232,27 @@ WHERE col.table_catalog = current_database() AND col.table_schema = 'public' ` -func (q *Queries) TableColumns(ctx context.Context) ([]InformationSchemaColumn, error) { +type TableColumnsRow struct { + ColTableName string + ColColumnName string + ColDataType string + ColOrdinalPosition int32 +} + +func (q *Queries) TableColumns(ctx context.Context) ([]TableColumnsRow, error) { rows, err := q.db.Query(ctx, tableColumns) if err != nil { return nil, err } defer rows.Close() - var items []InformationSchemaColumn + var items []TableColumnsRow for rows.Next() { - var i InformationSchemaColumn + var i TableColumnsRow if err := rows.Scan( - &i.TableName, - &i.ColumnName, - &i.DataType, - &i.OrdinalPosition, + &i.ColTableName, + &i.ColColumnName, + &i.ColDataType, + &i.ColOrdinalPosition, ); err != nil { return nil, err } diff --git a/swo/swoinfo/pgtables.sql b/swo/swoinfo/pgtables.sql index 2ad427caab..f9a0f8da2a 100644 --- a/swo/swoinfo/pgtables.sql +++ b/swo/swoinfo/pgtables.sql @@ -1,91 +1,6 @@ --- pg_catalog tables used by SWO -CREATE TABLE pg_catalog.pg_namespace ( - oid oid NOT NULL, - nspname NAME NOT NULL, - nspowner oid NOT NULL, - nspacl aclitem [ ] -); - -CREATE TABLE pg_catalog.pg_class ( - oid oid NOT NULL, - relname NAME NOT NULL, - relnamespace oid NOT NULL, - reltype oid NOT NULL, - reloftype oid NOT NULL, - relowner oid NOT NULL, - relam oid NOT NULL, - relfilenode oid NOT NULL, - reltablespace oid NOT NULL, - relpages INTEGER NOT NULL, - reltuples REAL NOT NULL, - relallvisible INTEGER NOT NULL, - reltoastrelid oid NOT NULL, - relhasindex BOOLEAN NOT NULL, - relisshared BOOLEAN NOT NULL, - relpersistence "char" NOT NULL, - relkind "char" NOT NULL, - relnatts SMALLINT NOT NULL, - relchecks SMALLINT NOT NULL, - relhasrules BOOLEAN NOT NULL, - relhastriggers BOOLEAN NOT NULL, - relhassubclass BOOLEAN NOT NULL, - relrowsecurity BOOLEAN NOT NULL, - relforcerowsecurity BOOLEAN NOT NULL, - relispopulated BOOLEAN NOT NULL, - relreplident "char" NOT NULL, - relispartition BOOLEAN NOT NULL, - relrewrite oid NOT NULL, - relfrozenxid xid NOT NULL, - relminmxid xid NOT NULL, - relacl aclitem [ ], - reloptions text [ ] COLLATE pg_catalog. "C", - relpartbound pg_node_tree COLLATE pg_catalog. "C" -); - -CREATE TABLE pg_catalog.pg_constraint ( - oid oid NOT NULL, - conname NAME NOT NULL, - connamespace oid NOT NULL, - contype "char" NOT NULL, - condeferrable BOOLEAN NOT NULL, - condeferred BOOLEAN NOT NULL, - convalidated BOOLEAN NOT NULL, - conrelid oid NOT NULL, - contypid oid NOT NULL, - conindid oid NOT NULL, - conparentid oid NOT NULL, - confrelid oid NOT NULL, - confupdtype "char" NOT NULL, - confdeltype "char" NOT NULL, - confmatchtype "char" NOT NULL, - conislocal BOOLEAN NOT NULL, - coninhcount INTEGER NOT NULL, - connoinherit BOOLEAN NOT NULL, - conkey SMALLINT [ ], - confkey SMALLINT [ ], - conpfeqop oid [ ], - conppeqop oid [ ], - conffeqop oid [ ], - conexclop oid [ ], - conbin pg_node_tree COLLATE pg_catalog. "C" -); - -- just for type info CREATE TABLE pg_stat_activity ( state TEXT, XACT_START timestamptz NOT NULL, application_name TEXT ); - -CREATE SCHEMA information_schema; - -CREATE TABLE information_schema.columns ( - table_name TEXT NOT NULL, - column_name TEXT NOT NULL, - data_type TEXT NOT NULL, - ordinal_position INTEGER NOT NULL -); - -CREATE TABLE information_schema.tables (); - -CREATE TABLE information_schema.sequences (sequence_name TEXT NOT NULL); diff --git a/swo/swoinfo/queries.sql b/swo/swoinfo/queries.sql index 613408993e..166ffb733c 100644 --- a/swo/swoinfo/queries.sql +++ b/swo/swoinfo/queries.sql @@ -10,10 +10,10 @@ WHERE con.contype = 'f' AND NOT con.condeferrable; -- name: TableColumns :many -SELECT col.table_name, - col.column_name, - col.data_type, - col.ordinal_position +SELECT col.table_name::text, + col.column_name::text, + col.data_type::text, + col.ordinal_position::INT FROM information_schema.columns col JOIN information_schema.tables t ON t.table_catalog = col.table_catalog AND t.table_schema = col.table_schema @@ -23,7 +23,7 @@ WHERE col.table_catalog = current_database() AND col.table_schema = 'public'; -- name: SequenceNames :many -SELECT sequence_name +SELECT sequence_name::text FROM information_schema.sequences WHERE sequence_catalog = current_database() AND sequence_schema = 'public' diff --git a/swo/swoinfo/scantables.go b/swo/swoinfo/scantables.go index afd683e07d..b380ab4a14 100644 --- a/swo/swoinfo/scantables.go +++ b/swo/swoinfo/scantables.go @@ -27,7 +27,7 @@ func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { tables := make(map[string]*Table) for _, cRow := range columns { - switch cRow.TableName { + switch cRow.ColTableName { case "engine_processing_versions", "gorp_migrations": // skip migrate-only tables continue @@ -36,18 +36,18 @@ func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { continue } - if tables[cRow.TableName] == nil { - tables[cRow.TableName] = &Table{name: cRow.TableName, deps: make(map[string]struct{})} + if tables[cRow.ColTableName] == nil { + tables[cRow.ColTableName] = &Table{name: cRow.ColTableName, deps: make(map[string]struct{})} } - tables[cRow.TableName].cols = append(tables[cRow.TableName].cols, column(cRow)) - if cRow.ColumnName == "id" { - tables[cRow.TableName].id = column(cRow) + tables[cRow.ColTableName].cols = append(tables[cRow.ColTableName].cols, column(cRow)) + if cRow.ColColumnName == "id" { + tables[cRow.ColTableName].id = column(cRow) } } for _, t := range tables { - if t.id.ColumnName == "" { + if t.id.ColColumnName == "" { return nil, fmt.Errorf("table %s has no id column", t.name) } } @@ -59,7 +59,7 @@ func ScanTables(ctx context.Context, conn *pgx.Conn) ([]Table, error) { var tableList []*Table for _, t := range tables { sort.Slice(t.cols, func(i, j int) bool { - return t.cols[i].OrdinalPosition < t.cols[j].OrdinalPosition + return t.cols[i].ColOrdinalPosition < t.cols[j].ColOrdinalPosition }) tableList = append(tableList, t) } diff --git a/swo/swoinfo/table.go b/swo/swoinfo/table.go index f68cf34c73..155461b0b9 100644 --- a/swo/swoinfo/table.go +++ b/swo/swoinfo/table.go @@ -15,19 +15,19 @@ type Table struct { cols []column id column } -type column swodb.InformationSchemaColumn +type column swodb.TableColumnsRow // Name returns the name of the table. func (t Table) Name() string { return t.name } // IDType returns the type of the ID column. -func (t Table) IDType() string { return t.id.DataType } +func (t Table) IDType() string { return t.id.ColDataType } // Columns returns the names of the columns in the table. func (t Table) Columns() []string { var cols []string for _, c := range t.cols { - cols = append(cols, c.ColumnName) + cols = append(cols, c.ColColumnName) } return cols } diff --git a/swo/swoinfo/table_test.go b/swo/swoinfo/table_test.go index 38398d87d1..2fc471c12e 100644 --- a/swo/swoinfo/table_test.go +++ b/swo/swoinfo/table_test.go @@ -10,9 +10,9 @@ func TestTable_InsertJSONRowsQuery(t *testing.T) { tbl := Table{ name: "test", cols: []column{ - {ColumnName: "id"}, - {ColumnName: "foo"}, - {ColumnName: "bar"}, + {ColColumnName: "id"}, + {ColColumnName: "foo"}, + {ColColumnName: "bar"}, }, } query := tbl.InsertJSONRowsQuery(false) From 0a1e93ce915482e6e47e18647fa6b1a47e50973d Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 6 Dec 2022 16:21:07 -0600 Subject: [PATCH 223/225] cleanup deps --- go.mod | 15 --------------- go.sum | 44 -------------------------------------------- 2 files changed, 59 deletions(-) diff --git a/go.mod b/go.mod index 083b795b98..f168032b8d 100644 --- a/go.mod +++ b/go.mod @@ -46,7 +46,6 @@ require ( golang.org/x/tools v0.3.0 google.golang.org/genproto v0.0.0-20221024183307-1bc688fe9f3e // indirect google.golang.org/grpc v1.50.1 - google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 google.golang.org/protobuf v1.28.1 gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df gorm.io/driver/postgres v1.4.5 @@ -59,11 +58,6 @@ require ( github.com/kffl/speedbump v1.0.0 ) -require ( - github.com/benbjohnson/clock v1.1.0 // indirect - github.com/pganalyze/pg_query_go/v2 v2.1.0 // indirect -) - require ( cloud.google.com/go/compute v1.12.1 // indirect cloud.google.com/go/compute/metadata v0.2.1 // indirect @@ -76,7 +70,6 @@ require ( github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect github.com/andybalholm/cascadia v1.3.1 // indirect - github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20220209173558-ad29539cd2e9 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/census-instrumentation/opencensus-proto v0.2.1 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect @@ -116,7 +109,6 @@ require ( github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/kr/pretty v0.3.1 // indirect - github.com/kyleconroy/sqlc v1.14.0 github.com/lib/pq v1.10.7 // indirect github.com/magiconair/properties v1.8.6 // indirect github.com/mailhog/MailHog-UI v1.0.1 // indirect @@ -134,9 +126,6 @@ require ( github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/pelletier/go-toml/v2 v2.0.5 // indirect github.com/philhofer/fwd v1.1.1 // indirect - github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63 // indirect - github.com/pingcap/log v0.0.0-20210906054005-afc726e70354 // indirect - github.com/pingcap/parser v0.0.0-20210914110036-002913dd28ec // indirect github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/pquerna/cachecontrol v0.1.0 // indirect @@ -159,9 +148,6 @@ require ( github.com/vanng822/css v1.0.1 // indirect github.com/vanng822/go-premailer v1.20.1 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect - go.uber.org/atomic v1.9.0 // indirect - go.uber.org/multierr v1.8.0 // indirect - go.uber.org/zap v1.21.0 // indirect golang.org/x/exp/typeparams v0.0.0-20220602145555-4a0574d9293f // indirect golang.org/x/mod v0.7.0 // indirect golang.org/x/text v0.4.0 // indirect @@ -170,7 +156,6 @@ require ( gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect - gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect gopkg.in/square/go-jose.v2 v2.6.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index a1f9ef5a4d..104f98ce9b 100644 --- a/go.sum +++ b/go.sum @@ -85,16 +85,12 @@ github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9Pq github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20220209173558-ad29539cd2e9 h1:zvkJv+9Pxm1nnEMcKnShREt4qtduHKz4iw4AB4ul0Ao= -github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20220209173558-ad29539cd2e9/go.mod h1:F7bn7fEU90QkQ3tnmaTx3LTKLEDqnwWODIYppRQ5hnY= github.com/aokoli/goutils v1.0.1/go.mod h1:SijmP0QR8LtwsmDs8Yii5Z/S4trXFGFC2oO5g9DP+DQ= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= -github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -140,12 +136,6 @@ github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7Do github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= -github.com/cznic/golex v0.0.0-20181122101858-9c343928389c/go.mod h1:+bmmJDNmKlhWNG+gwWCkaBoTy39Fs+bzRxVBzoTQbIc= -github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM= -github.com/cznic/parser v0.0.0-20160622100904-31edd927e5b1/go.mod h1:2B43mz36vGZNZEwkWi8ayRSSUXLfjL8OkbzwW4NcPMM= -github.com/cznic/sortutil v0.0.0-20181122101858-f5f958428db8/go.mod h1:q2w6Bg5jeox1B+QkJ6Wp/+Vn0G/bo3f1uY7Fn3vivIQ= -github.com/cznic/strutil v0.0.0-20171016134553-529a34b1c186/go.mod h1:AHHPPPXTw0h6pVabbcbyGRK1DckRn7r/STdZEeIDzZc= -github.com/cznic/y v0.0.0-20170802143616-045f81c6662a/go.mod h1:1rk5VM7oSnA4vjp+hrLQ3HWHa+Y4yPCa3/CsJrcNnvs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -191,10 +181,8 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= -github.com/go-sql-driver/mysql v1.3.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= -github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-test/deep v1.0.4 h1:u2CU3YKy9I2pmu9pX0eq50wCgjfGIt539SqR7FbHiho= github.com/go-test/deep v1.0.4/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= @@ -454,8 +442,6 @@ github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kyleconroy/sqlc v1.14.0 h1:ZBmnFUaZNzHJkM+s9vSrW1k2/K8FQMEOi/srofRm300= -github.com/kyleconroy/sqlc v1.14.0/go.mod h1:xFQtnsSjT57ap4nhyfwgHPMRSiirTJQnAv3Q6f914Qg= github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.1.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= @@ -551,20 +537,8 @@ github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3v github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml/v2 v2.0.5 h1:ipoSadvV8oGUjnUbMub59IDPPwfxF694nG/jwbMiyQg= github.com/pelletier/go-toml/v2 v2.0.5/go.mod h1:OMHamSCAODeSsVrwwvcJOaoN0LIUIaFVNZzmWyNfXas= -github.com/pganalyze/pg_query_go/v2 v2.1.0 h1:donwPZ4G/X+kMs7j5eYtKjdziqyOLVp3pkUrzb9lDl8= -github.com/pganalyze/pg_query_go/v2 v2.1.0/go.mod h1:XAxmVqz1tEGqizcQ3YSdN90vCOHBWjJi8URL1er5+cA= github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ= github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= -github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8 h1:USx2/E1bX46VG32FIw034Au6seQ2fY9NEILmNh/UlQg= -github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8/go.mod h1:B1+S9LNcuMyLH/4HMTViQOJevkGiik3wW2AN9zb2fNQ= -github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= -github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63 h1:+FZIDR/D97YOPik4N4lPDaUcLDF/EQPogxtlHB2ZZRM= -github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= -github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= -github.com/pingcap/log v0.0.0-20210906054005-afc726e70354 h1:SvWCbCPh1YeHd9yQLksvJYAgft6wLTY1aNG81tpyscQ= -github.com/pingcap/log v0.0.0-20210906054005-afc726e70354/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= -github.com/pingcap/parser v0.0.0-20210914110036-002913dd28ec h1:tUcualrzARkmDCM4OGT27cEnjDyAN1MW5AoZqmTCITA= -github.com/pingcap/parser v0.0.0-20210914110036-002913dd28ec/go.mod h1:+xcMiiZzdIktT/Nqdfm81dkECJ2EPuoAYywd57py4Pk= github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4 h1:49lOXmGaUpV9Fz3gd7TFZY106KVlPVa5jcYD1gaQf98= github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4/go.mod h1:4OwLy04Bl9Ef3GJJCoec+30X3LQs/0/m4HFRt/2LUSA= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= @@ -606,7 +580,6 @@ github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1 github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.8.0 h1:ODq8ZFEaYeCaZOJlZZdJA2AbQR98dSHSM1KW/You5mo= github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4= -github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= @@ -732,27 +705,15 @@ go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= -go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= -go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= -go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= -go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= -go.uber.org/multierr v1.8.0 h1:dg6GjLku4EH+249NNmoIciG9N/jURbDG+pFlTkhzIC8= -go.uber.org/multierr v1.8.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= -go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= -go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= -go.uber.org/zap v1.21.0 h1:WefMeulhovoZ2sYXz7st6K0sLj7bBhpiFaud4r4zST8= -go.uber.org/zap v1.21.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029175232-7e6ffbd03851/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -1010,7 +971,6 @@ golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -1162,8 +1122,6 @@ google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ5 google.golang.org/grpc v1.45.0-dev.0.20220218222403-011544f72939/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= google.golang.org/grpc v1.50.1 h1:DS/BukOZWp8s6p4Dt/tOaJaTQyPyOoCcrjroHuCeLzY= google.golang.org/grpc v1.50.1/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= -google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 h1:TLkBREm4nIsEcexnCjgQd5GQWaHcqMzwQV0TX9pq8S0= -google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0/go.mod h1:DNq5QpG7LJqD2AamLZ7zvKE0DEpVl2BSEVjFycAAjRY= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -1199,8 +1157,6 @@ gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 h1:VpOs+IwYnYBaFnrNAeB8UUWtL3vEUnzSCL1nVjPhqrw= gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= -gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= -gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/square/go-jose.v2 v2.6.0 h1:NGk74WTnPKBNUhNzQX7PYcTLUjoq7mzKk2OKbvwk2iI= gopkg.in/square/go-jose.v2 v2.6.0/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= From ee20f058340e0d92b01b593bd4cb5e72f1778cc9 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 6 Dec 2022 16:25:34 -0600 Subject: [PATCH 224/225] re-add missing tools --- devtools/tools.go | 2 ++ go.mod | 1 + go.sum | 2 ++ 3 files changed, 5 insertions(+) diff --git a/devtools/tools.go b/devtools/tools.go index d7e3d1f9b3..ead97fec37 100644 --- a/devtools/tools.go +++ b/devtools/tools.go @@ -11,5 +11,7 @@ import ( _ "github.com/mailhog/MailHog" _ "golang.org/x/tools/cmd/goimports" _ "golang.org/x/tools/cmd/stringer" + _ "google.golang.org/grpc/cmd/protoc-gen-go-grpc" + _ "google.golang.org/protobuf/cmd/protoc-gen-go" _ "honnef.co/go/tools/cmd/staticcheck" ) diff --git a/go.mod b/go.mod index f168032b8d..1c93e6b709 100644 --- a/go.mod +++ b/go.mod @@ -56,6 +56,7 @@ require ( require ( github.com/fullstorydev/grpcui v1.3.1 github.com/kffl/speedbump v1.0.0 + google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 ) require ( diff --git a/go.sum b/go.sum index 104f98ce9b..7f602abfe0 100644 --- a/go.sum +++ b/go.sum @@ -1122,6 +1122,8 @@ google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ5 google.golang.org/grpc v1.45.0-dev.0.20220218222403-011544f72939/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= google.golang.org/grpc v1.50.1 h1:DS/BukOZWp8s6p4Dt/tOaJaTQyPyOoCcrjroHuCeLzY= google.golang.org/grpc v1.50.1/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= +google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 h1:TLkBREm4nIsEcexnCjgQd5GQWaHcqMzwQV0TX9pq8S0= +google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0/go.mod h1:DNq5QpG7LJqD2AamLZ7zvKE0DEpVl2BSEVjFycAAjRY= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= From b7398e55eb03ec8ed9330b1a560ddec41d099c63 Mon Sep 17 00:00:00 2001 From: Nathaniel Caza Date: Tue, 6 Dec 2022 16:29:20 -0600 Subject: [PATCH 225/225] listen on localhost --- Procfile.swo | 2 +- go.mod | 6 +++--- go.sum | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Procfile.swo b/Procfile.swo index 38bdfc0c3c..9e6a3301ac 100644 --- a/Procfile.swo +++ b/Procfile.swo @@ -36,4 +36,4 @@ ga9: ./bin/goalert -l=localhost:3057 --ui-dir=web/src/build --db-url=postgres:// proxy: go run ./devtools/simpleproxy -addr localhost:3030 /=http://localhost:3040,http://localhost:3050,http://localhost:3051,http://localhost:3052,http://localhost:3053,http://localhost:3054,http://localhost:3055,http://localhost:3056,http://localhost:3057 -slow: go run github.com/kffl/speedbump --port=5435 --latency=10ms --saw-amplitude=25ms --saw-period=1s localhost:5432 +slow: go run github.com/kffl/speedbump --host localhost --port=5435 --latency=10ms --saw-amplitude=25ms --saw-period=1s localhost:5432 diff --git a/go.mod b/go.mod index 1c93e6b709..a9870186e8 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,7 @@ require ( golang.org/x/crypto v0.2.0 golang.org/x/net v0.2.0 // indirect golang.org/x/oauth2 v0.2.0 - golang.org/x/sys v0.2.0 + golang.org/x/sys v0.3.0 golang.org/x/term v0.2.0 golang.org/x/tools v0.3.0 google.golang.org/genproto v0.0.0-20221024183307-1bc688fe9f3e // indirect @@ -55,7 +55,7 @@ require ( require ( github.com/fullstorydev/grpcui v1.3.1 - github.com/kffl/speedbump v1.0.0 + github.com/kffl/speedbump v1.1.0 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 ) @@ -89,7 +89,7 @@ require ( github.com/gorilla/mux v1.8.0 // indirect github.com/gorilla/pat v1.0.1 // indirect github.com/gorilla/websocket v1.5.0 // indirect - github.com/hashicorp/go-hclog v1.2.1 // indirect + github.com/hashicorp/go-hclog v1.4.0 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/huandu/xstrings v1.3.2 // indirect diff --git a/go.sum b/go.sum index 7f602abfe0..99b5d7ee93 100644 --- a/go.sum +++ b/go.sum @@ -297,8 +297,8 @@ github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBt github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-hclog v1.2.1 h1:YQsLlGDJgwhXFpucSPyVbCBviQtjlHv3jLTlp8YmtEw= -github.com/hashicorp/go-hclog v1.2.1/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= +github.com/hashicorp/go-hclog v1.4.0 h1:ctuWFGrhFha8BnnzxqeRGidlEcQkDyL5u8J8t5eA11I= +github.com/hashicorp/go-hclog v1.4.0/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= @@ -423,8 +423,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/karrick/godirwalk v1.16.1/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk= github.com/kevinmbeaulieu/eq-go v1.0.0/go.mod h1:G3S8ajA56gKBZm4UB9AOyoOS37JO3roToPzKNM8dtdM= -github.com/kffl/speedbump v1.0.0 h1:r9w+loOmC9j83+Hl3Sx46z/EcZI7vBIW9uE5EaIBXxg= -github.com/kffl/speedbump v1.0.0/go.mod h1:6nNWIwc8zM0l41fIArBiVdvcomulEd8v5RX9YBjJoQ4= +github.com/kffl/speedbump v1.1.0 h1:mTLW9ZzWP/1FQCmkZgHhKbphhqJmzzajKKuGXvjibHE= +github.com/kffl/speedbump v1.1.0/go.mod h1:6nNWIwc8zM0l41fIArBiVdvcomulEd8v5RX9YBjJoQ4= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -930,8 +930,8 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A= -golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= +golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=