From 778943c6cffb3484eee0463a69584fa955abd0c8 Mon Sep 17 00:00:00 2001 From: r4f43l Date: Fri, 16 Sep 2022 17:27:33 +0200 Subject: [PATCH 01/44] Threshold signer interface --- signer/threshold_signer.go | 43 ++++ signer/threshold_signer_soft.go | 354 ++++++++++++++++++++++++++++++++ 2 files changed, 397 insertions(+) create mode 100644 signer/threshold_signer.go create mode 100644 signer/threshold_signer_soft.go diff --git a/signer/threshold_signer.go b/signer/threshold_signer.go new file mode 100644 index 00000000..45029a52 --- /dev/null +++ b/signer/threshold_signer.go @@ -0,0 +1,43 @@ +package signer + +import ( + tsed25519 "gitlab.com/unit410/threshold-ed25519/pkg" +) + +const ( + SignerTypeSoftSign = "SoftSign" + SignerTypeHSM = "HSM" +) + +// Interface for the local signer whether it's a soft sign or HSM +type ThresholdSigner interface { + Type() string + + DealShares(req CosignerGetEphemeralSecretPartRequest) (HrsMetadata, error) + + GetEphemeralSecretPart(req CosignerGetEphemeralSecretPartRequest, m *LastSignStateStruct, + peers map[int]CosignerPeer) (CosignerEphemeralSecretPart, error) + + SetEphemeralSecretPart(req CosignerSetEphemeralSecretPartRequest, m *LastSignStateStruct, + peers map[int]CosignerPeer) error + + Sign(req CosignerSignRequest, m *LastSignStateStruct) (CosignerSignResponse, error) + + GetID() (int, error) +} + +// PeerMetadata holds the share and the ephermeral secret public key +// Moved from Local cosigner to threshold_ed25519 +type PeerMetadata struct { + Share []byte + EphemeralSecretPublicKey []byte +} + +// HrsMetadata holds the ephemeral nonces from cosigner peers +// for a given height, round, step. +type HrsMetadata struct { + // need to be _total_ entries per player + Secret []byte + DealtShares []tsed25519.Scalar + Peers []PeerMetadata +} diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go new file mode 100644 index 00000000..cc18865d --- /dev/null +++ b/signer/threshold_signer_soft.go @@ -0,0 +1,354 @@ +package signer + +import ( + "bytes" + "crypto" + "crypto/rand" + "crypto/rsa" + "crypto/sha256" + "errors" + "fmt" + + tmcryptoed25519 "github.com/tendermint/tendermint/crypto/ed25519" + tmjson "github.com/tendermint/tendermint/libs/json" + "gitlab.com/unit410/edwards25519" + tsed25519 "gitlab.com/unit410/threshold-ed25519/pkg" +) + +// ThresholdSignerSoft implements the interface and signs the message for each local signer. +// ThresholdSignerSoft is the implementation of a soft sign signer at the local level. +type ThresholdSignerSoft struct { + PubKeyBytes []byte + Key CosignerKey + // Total signers + Total uint8 + Threshold uint8 + // Height, Round, Step, Timestamp --> metadata + HrsMeta map[HRSTKey]HrsMetadata +} + +// NewThresholdSignerSoft constructs a ThresholdSigner +// that signs using the local key share file. +func NewThresholdSignerSoft( + key CosignerKey, + threshold, total uint8, +) ThresholdSigner { + softSigner := &ThresholdSignerSoft{ + Key: key, + HrsMeta: make(map[HRSTKey]HrsMetadata), + Total: total, + Threshold: threshold, + } + + // cache the public key bytes for signing operations + switch ed25519Key := softSigner.Key.PubKey.(type) { + case tmcryptoed25519.PubKey: + softSigner.PubKeyBytes = make([]byte, len(ed25519Key)) + copy(softSigner.PubKeyBytes, ed25519Key[:]) + default: + panic("softSigner.Key.PubKey.(type) is not a tmcryptoed25519.PubKey! i.e not ed25519 public key") + } + return softSigner +} + +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) Type() string { + return "soft" +} + +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) GetID() (int, error) { + return softSigner.Key.ID, nil +} + +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) Sign( + req CosignerSignRequest, m *LastSignStateStruct) (CosignerSignResponse, error) { + m.LastSignStateMutex.Lock() + defer m.LastSignStateMutex.Unlock() + + res := CosignerSignResponse{} + lss := m.LastSignState + + hrst, err := UnpackHRST(req.SignBytes) + if err != nil { + return res, err + } + + sameHRS, err := lss.CheckHRS(hrst) + if err != nil { + return res, err + } + + // If the HRS is the same the sign bytes may still differ by timestamp + // It is ok to re-sign a different timestamp if that is the only difference in the sign bytes + if sameHRS { + if bytes.Equal(req.SignBytes, lss.SignBytes) { + res.EphemeralPublic = lss.EphemeralPublic + res.Signature = lss.Signature + return res, nil + } else if err := lss.OnlyDifferByTimestamp(req.SignBytes); err != nil { + return res, err + } + + // same HRS, and only differ by timestamp - ok to sign again + } + + meta, ok := softSigner.HrsMeta[hrst] + if !ok { + return res, errors.New("no metadata at HRS") + } + + shareParts := make([]tsed25519.Scalar, 0) + publicKeys := make([]tsed25519.Element, 0) + + // calculate secret and public keys + for _, peer := range meta.Peers { + if len(peer.Share) == 0 { + continue + } + shareParts = append(shareParts, peer.Share) + publicKeys = append(publicKeys, peer.EphemeralSecretPublicKey) + } + + ephemeralShare := tsed25519.AddScalars(shareParts) + ephemeralPublic := tsed25519.AddElements(publicKeys) + + // check bounds for ephemeral share to avoid passing out of bounds valids to SignWithShare + { + if len(ephemeralShare) != 32 { + return res, errors.New("ephemeral share is out of bounds") + } + + var scalarBytes [32]byte + copy(scalarBytes[:], ephemeralShare) + if !edwards25519.ScMinimal(&scalarBytes) { + return res, errors.New("ephemeral share is out of bounds") + } + } + + sig := tsed25519.SignWithShare( + req.SignBytes, softSigner.Key.ShareKey, ephemeralShare, softSigner.PubKeyBytes, ephemeralPublic) + + m.LastSignState.EphemeralPublic = ephemeralPublic + err = m.LastSignState.Save(SignStateConsensus{ + Height: hrst.Height, + Round: hrst.Round, + Step: hrst.Step, + Signature: sig, + SignBytes: req.SignBytes, + }, nil, true) // TODO double check true here is correct for async? + + if err != nil { + if _, isSameHRSError := err.(*SameHRSError); !isSameHRSError { + return res, err + } + } + + for existingKey := range softSigner.HrsMeta { + // delete any HRS lower than our signed level + // we will not be providing parts for any lower HRS + if existingKey.Less(hrst) { + delete(softSigner.HrsMeta, existingKey) + } + } + + res.EphemeralPublic = ephemeralPublic + res.Signature = sig + return res, nil +} + +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) DealShares( + req CosignerGetEphemeralSecretPartRequest) (HrsMetadata, error) { + hrsKey := HRSTKey{ + Height: req.Height, + Round: req.Round, + Step: req.Step, + Timestamp: req.Timestamp.UnixNano(), + } + + meta, ok := softSigner.HrsMeta[hrsKey] + + if ok { + return meta, nil + } + + secret := make([]byte, 32) + if _, err := rand.Read(secret); err != nil { + return HrsMetadata{}, err + } + + meta = HrsMetadata{ + Secret: secret, + Peers: make([]PeerMetadata, softSigner.Total), + } + + // split this secret with shamirs + // !! dealt shares need to be saved because dealing produces different shares each time! + meta.DealtShares = tsed25519.DealShares(meta.Secret, softSigner.Threshold, softSigner.Total) + + softSigner.HrsMeta[hrsKey] = meta + + return meta, nil +} + +// Get the ephemeral secret part for an ephemeral share +// The ephemeral secret part is encrypted for the receiver +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( + req CosignerGetEphemeralSecretPartRequest, m *LastSignStateStruct, peers map[int]CosignerPeer) ( + CosignerEphemeralSecretPart, error) { + + res := CosignerEphemeralSecretPart{} + + // protects the meta map + m.LastSignStateMutex.Lock() + defer m.LastSignStateMutex.Unlock() + + hrst := HRSTKey{ + Height: req.Height, + Round: req.Round, + Step: req.Step, + Timestamp: req.Timestamp.UnixNano(), + } + + meta, ok := softSigner.HrsMeta[hrst] + // generate metadata placeholder + if !ok { + newMeta, err := softSigner.DealShares(CosignerGetEphemeralSecretPartRequest{ + Height: req.Height, + Round: req.Round, + Step: req.Step, + Timestamp: req.Timestamp, + }) + + if err != nil { + return res, err + } + + meta = newMeta + softSigner.HrsMeta[hrst] = meta + } + + ourEphPublicKey := tsed25519.ScalarMultiplyBase(meta.Secret) + + // set our values + meta.Peers[softSigner.Key.ID-1].Share = meta.DealtShares[softSigner.Key.ID-1] + meta.Peers[softSigner.Key.ID-1].EphemeralSecretPublicKey = ourEphPublicKey + + // grab the peer info for the ID being requested + peer, ok := peers[req.ID] + if !ok { + return res, errors.New("unknown peer ID") + } + + sharePart := meta.DealtShares[req.ID-1] + + // use RSA public to encrypt user's share part + encrypted, err := rsa.EncryptOAEP(sha256.New(), rand.Reader, &peer.PublicKey, sharePart, nil) + if err != nil { + return res, err + } + + res.SourceID = softSigner.Key.ID + res.SourceEphemeralSecretPublicKey = ourEphPublicKey + res.EncryptedSharePart = encrypted + + // sign the response payload with our private key + // cosigners can verify the signature to confirm sender validity + { + jsonBytes, err := tmjson.Marshal(res) + + if err != nil { + return res, err + } + + digest := sha256.Sum256(jsonBytes) + signature, err := rsa.SignPSS(rand.Reader, &softSigner.Key.RSAKey, crypto.SHA256, digest[:], nil) + if err != nil { + return res, err + } + + res.SourceSig = signature + } + + res.DestinationID = req.ID + + return res, nil +} + +// Store an ephemeral secret share part provided by another cosigner (signer) +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( + req CosignerSetEphemeralSecretPartRequest, m *LastSignStateStruct, peers map[int]CosignerPeer) error { + + // Verify the source signature + { + if req.SourceSig == nil { + return errors.New("SourceSig field is required") + } + + digestMsg := CosignerEphemeralSecretPart{} + digestMsg.SourceID = req.SourceID + digestMsg.SourceEphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey + digestMsg.EncryptedSharePart = req.EncryptedSharePart + + digestBytes, err := tmjson.Marshal(digestMsg) + if err != nil { + return err + } + + digest := sha256.Sum256(digestBytes) + peer, ok := peers[req.SourceID] + + if !ok { + return fmt.Errorf("unknown cosigner: %d", req.SourceID) + } + + peerPub := peer.PublicKey + err = rsa.VerifyPSS(&peerPub, crypto.SHA256, digest[:], req.SourceSig, nil) + if err != nil { + return err + } + } + + // protects the meta map + m.LastSignStateMutex.Lock() + defer m.LastSignStateMutex.Unlock() + + hrst := HRSTKey{ + Height: req.Height, + Round: req.Round, + Step: req.Step, + Timestamp: req.Timestamp.UnixNano(), + } + + meta, ok := softSigner.HrsMeta[hrst] + // generate metadata placeholder + if !ok { + newMeta, err := softSigner.DealShares(CosignerGetEphemeralSecretPartRequest{ + Height: req.Height, + Round: req.Round, + Step: req.Step, + }) + + if err != nil { + return err + } + + meta = newMeta + softSigner.HrsMeta[hrst] = meta + } + + // decrypt share + sharePart, err := rsa.DecryptOAEP(sha256.New(), rand.Reader, &softSigner.Key.RSAKey, req.EncryptedSharePart, nil) + if err != nil { + return err + } + + // set slot + meta.Peers[req.SourceID-1].Share = sharePart + meta.Peers[req.SourceID-1].EphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey + return nil +} From f3c27496779627348d7f08942ceae5f61ad05b9b Mon Sep 17 00:00:00 2001 From: r4f43l Date: Fri, 16 Sep 2022 17:27:48 +0200 Subject: [PATCH 02/44] local cosigner fix --- signer/local_cosigner.go | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/signer/local_cosigner.go b/signer/local_cosigner.go index d7a4fe89..9456723a 100644 --- a/signer/local_cosigner.go +++ b/signer/local_cosigner.go @@ -17,6 +17,15 @@ import ( tsed25519 "gitlab.com/unit410/threshold-ed25519/pkg" ) +type LastSignStateStruct struct { + // Signing is thread safe - lastSignStateMutex is used for putting locks so only one goroutine can r/w to the function + LastSignStateMutex sync.Mutex + + // lastSignState stores the last sign state for a share we have fully signed + // incremented whenever we are asked to sign a share + LastSignState *SignState +} + // return true if we are less than the other key func (hrst *HRSTKey) Less(other HRSTKey) bool { if hrst.Height < other.Height { @@ -71,18 +80,6 @@ type LocalCosignerConfig struct { Threshold uint8 } -type PeerMetadata struct { - Share []byte - EphemeralSecretPublicKey []byte -} - -type HrsMetadata struct { - // need to be _total_ entries per player - Secret []byte - DealtShares []tsed25519.Scalar - Peers []PeerMetadata -} - // LocalCosigner responds to sign requests using their share key // The cosigner maintains a watermark to avoid double-signing // From 4b62d1416d6d63a31cefe01826362d2851a44e1e Mon Sep 17 00:00:00 2001 From: r4f43l Date: Fri, 16 Sep 2022 17:44:21 +0200 Subject: [PATCH 03/44] fixed some minor renaming package --- signer/local_cosigner.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/signer/local_cosigner.go b/signer/local_cosigner.go index 9456723a..40f965c9 100644 --- a/signer/local_cosigner.go +++ b/signer/local_cosigner.go @@ -11,8 +11,8 @@ import ( "sync" "time" - tmCryptoEd25519 "github.com/tendermint/tendermint/crypto/ed25519" - tmJson "github.com/tendermint/tendermint/libs/json" + tmcryptoed25519 "github.com/tendermint/tendermint/crypto/ed25519" + tmjson "github.com/tendermint/tendermint/libs/json" "gitlab.com/unit410/edwards25519" tsed25519 "gitlab.com/unit410/threshold-ed25519/pkg" ) @@ -127,7 +127,7 @@ func NewLocalCosigner(cfg LocalCosignerConfig) *LocalCosigner { // cache the public key bytes for signing operations switch ed25519Key := cosigner.key.PubKey.(type) { - case tmCryptoEd25519.PubKey: + case tmcryptoed25519.PubKey: cosigner.pubKeyBytes = make([]byte, len(ed25519Key)) copy(cosigner.pubKeyBytes, ed25519Key[:]) default: @@ -369,7 +369,7 @@ func (cosigner *LocalCosigner) getEphemeralSecretPart( // sign the response payload with our private key // cosigners can verify the signature to confirm sender validity { - jsonBytes, err := tmJson.Marshal(res) + jsonBytes, err := tmjson.Marshal(res) if err != nil { return res, err @@ -403,7 +403,7 @@ func (cosigner *LocalCosigner) setEphemeralSecretPart(req CosignerSetEphemeralSe digestMsg.SourceEphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey digestMsg.EncryptedSharePart = req.EncryptedSharePart - digestBytes, err := tmJson.Marshal(digestMsg) + digestBytes, err := tmjson.Marshal(digestMsg) if err != nil { return err } From 38e0da76cb483d4983c9d511cfa711c12cae9879 Mon Sep 17 00:00:00 2001 From: r4f43l Date: Fri, 16 Sep 2022 18:10:46 +0200 Subject: [PATCH 04/44] fix switch to casting --- signer/threshold_signer_soft.go | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index cc18865d..2e093676 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -29,10 +29,7 @@ type ThresholdSignerSoft struct { // NewThresholdSignerSoft constructs a ThresholdSigner // that signs using the local key share file. -func NewThresholdSignerSoft( - key CosignerKey, - threshold, total uint8, -) ThresholdSigner { +func NewThresholdSignerSoft(key CosignerKey, threshold, total uint8) ThresholdSigner { softSigner := &ThresholdSignerSoft{ Key: key, HrsMeta: make(map[HRSTKey]HrsMetadata), @@ -40,14 +37,12 @@ func NewThresholdSignerSoft( Threshold: threshold, } - // cache the public key bytes for signing operations - switch ed25519Key := softSigner.Key.PubKey.(type) { - case tmcryptoed25519.PubKey: - softSigner.PubKeyBytes = make([]byte, len(ed25519Key)) - copy(softSigner.PubKeyBytes, ed25519Key[:]) - default: - panic("softSigner.Key.PubKey.(type) is not a tmcryptoed25519.PubKey! i.e not ed25519 public key") - } + // cache the public key bytes for signing operations. + // Ensures casting else it will naturally panic. + ed25519Key := softSigner.Key.PubKey.(tmcryptoed25519.PubKey) + softSigner.PubKeyBytes = make([]byte, len(ed25519Key)) + copy(softSigner.PubKeyBytes, ed25519Key[:]) + return softSigner } From 6b30c2eb2ef631adb5f8dd0f0c0e6e24622baa41 Mon Sep 17 00:00:00 2001 From: r4f43l Date: Fri, 16 Sep 2022 19:07:07 +0200 Subject: [PATCH 05/44] remove scope introducing block --- signer/threshold_signer_soft.go | 42 ++++++++++++++++----------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index 2e093676..9bb2c4f9 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -279,33 +279,31 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( req CosignerSetEphemeralSecretPartRequest, m *LastSignStateStruct, peers map[int]CosignerPeer) error { // Verify the source signature - { - if req.SourceSig == nil { - return errors.New("SourceSig field is required") - } + if req.SourceSig == nil { + return errors.New("SourceSig field is required") + } - digestMsg := CosignerEphemeralSecretPart{} - digestMsg.SourceID = req.SourceID - digestMsg.SourceEphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey - digestMsg.EncryptedSharePart = req.EncryptedSharePart + digestMsg := CosignerEphemeralSecretPart{} + digestMsg.SourceID = req.SourceID + digestMsg.SourceEphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey + digestMsg.EncryptedSharePart = req.EncryptedSharePart - digestBytes, err := tmjson.Marshal(digestMsg) - if err != nil { - return err - } + digestBytes, err := tmjson.Marshal(digestMsg) + if err != nil { + return err + } - digest := sha256.Sum256(digestBytes) - peer, ok := peers[req.SourceID] + digest := sha256.Sum256(digestBytes) + peer, ok := peers[req.SourceID] - if !ok { - return fmt.Errorf("unknown cosigner: %d", req.SourceID) - } + if !ok { + return fmt.Errorf("unknown cosigner: %d", req.SourceID) + } - peerPub := peer.PublicKey - err = rsa.VerifyPSS(&peerPub, crypto.SHA256, digest[:], req.SourceSig, nil) - if err != nil { - return err - } + peerPub := peer.PublicKey + err = rsa.VerifyPSS(&peerPub, crypto.SHA256, digest[:], req.SourceSig, nil) + if err != nil { + return err } // protects the meta map From aaf9488d05c9830062ff91e24603a55e9738c4e0 Mon Sep 17 00:00:00 2001 From: r4f43l Date: Fri, 16 Sep 2022 22:48:16 +0200 Subject: [PATCH 06/44] fixed: TODO check true here is correct for async --- signer/threshold_signer_soft.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index 9bb2c4f9..d31b2764 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -132,7 +132,7 @@ func (softSigner *ThresholdSignerSoft) Sign( Step: hrst.Step, Signature: sig, SignBytes: req.SignBytes, - }, nil, true) // TODO double check true here is correct for async? + }, nil, true) if err != nil { if _, isSameHRSError := err.(*SameHRSError); !isSameHRSError { From ec4e74eec1c0db002ea5d4d42f4a9914351f4364 Mon Sep 17 00:00:00 2001 From: r4f43l <91068974+nitronit@users.noreply.github.com> Date: Tue, 20 Sep 2022 09:29:04 +0200 Subject: [PATCH 07/44] Removed scope-introducing blocks --- signer/threshold_signer_soft.go | 42 ++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index d31b2764..4090486d 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -71,6 +71,7 @@ func (softSigner *ThresholdSignerSoft) Sign( } sameHRS, err := lss.CheckHRS(hrst) + if err != nil { return res, err } @@ -110,16 +111,15 @@ func (softSigner *ThresholdSignerSoft) Sign( ephemeralPublic := tsed25519.AddElements(publicKeys) // check bounds for ephemeral share to avoid passing out of bounds valids to SignWithShare - { - if len(ephemeralShare) != 32 { - return res, errors.New("ephemeral share is out of bounds") - } - var scalarBytes [32]byte - copy(scalarBytes[:], ephemeralShare) - if !edwards25519.ScMinimal(&scalarBytes) { - return res, errors.New("ephemeral share is out of bounds") - } + if len(ephemeralShare) != 32 { + return res, errors.New("ephemeral share is out of bounds") + } + + var scalarBytes [32]byte + copy(scalarBytes[:], ephemeralShare) + if !edwards25519.ScMinimal(&scalarBytes) { + return res, errors.New("ephemeral share is out of bounds") } sig := tsed25519.SignWithShare( @@ -252,22 +252,22 @@ func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( // sign the response payload with our private key // cosigners can verify the signature to confirm sender validity - { - jsonBytes, err := tmjson.Marshal(res) + + jsonBytes, err := tmjson.Marshal(res) - if err != nil { - return res, err - } - - digest := sha256.Sum256(jsonBytes) - signature, err := rsa.SignPSS(rand.Reader, &softSigner.Key.RSAKey, crypto.SHA256, digest[:], nil) - if err != nil { - return res, err - } + if err != nil { + return res, err + } - res.SourceSig = signature + digest := sha256.Sum256(jsonBytes) + signature, err := rsa.SignPSS(rand.Reader, &softSigner.Key.RSAKey, crypto.SHA256, digest[:], nil) + + if err != nil { + return res, err } + res.SourceSig = signature + res.DestinationID = req.ID return res, nil From 1d9454beda450affa199cf46e4011d6d344e9ce4 Mon Sep 17 00:00:00 2001 From: r4f43l Date: Tue, 20 Sep 2022 21:22:54 +0200 Subject: [PATCH 08/44] go lint fix --- signer/threshold_signer_soft.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index 4090486d..ca15065f 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -71,7 +71,7 @@ func (softSigner *ThresholdSignerSoft) Sign( } sameHRS, err := lss.CheckHRS(hrst) - + if err != nil { return res, err } @@ -252,7 +252,7 @@ func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( // sign the response payload with our private key // cosigners can verify the signature to confirm sender validity - + jsonBytes, err := tmjson.Marshal(res) if err != nil { @@ -261,7 +261,7 @@ func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( digest := sha256.Sum256(jsonBytes) signature, err := rsa.SignPSS(rand.Reader, &softSigner.Key.RSAKey, crypto.SHA256, digest[:], nil) - + if err != nil { return res, err } From 9b9e87a23d3a923ebaf7a41fbe482a137f85b8c9 Mon Sep 17 00:00:00 2001 From: Andrew Gouin Date: Tue, 27 Sep 2022 15:57:34 -0600 Subject: [PATCH 09/44] Update raft-boltdb to v2, enable race detection in tests, fix race in test (#105) --- Makefile | 2 +- go.mod | 2 +- go.sum | 7 ++++--- signer/raft_store.go | 2 +- signer/services_test.go | 3 +++ 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 9130b3bb..77d210d9 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ build-linux: @GOOS=linux GOARCH=amd64 go build --mod readonly $(BUILD_FLAGS) -o ./build/horcrux ./cmd/horcrux test: - @go test -timeout 20m -mod readonly -v ./... + @go test -race -timeout 20m -mod readonly -v ./... test-short: @go test -mod readonly -run TestDownedSigners2of3 -v ./... diff --git a/go.mod b/go.mod index 38c10e1f..23aec89a 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/gogo/protobuf v1.3.3 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 github.com/hashicorp/raft v1.3.3 - github.com/hashicorp/raft-boltdb v0.0.0-20211202195631-7d34b9fb3f42 + github.com/hashicorp/raft-boltdb/v2 v2.2.2 github.com/mitchellh/go-homedir v1.1.0 github.com/ory/dockertest v3.3.5+incompatible github.com/spf13/cobra v1.2.1 diff --git a/go.sum b/go.sum index e0bedb76..d1c0b664 100644 --- a/go.sum +++ b/go.sum @@ -111,7 +111,6 @@ github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hC github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg= -github.com/armon/go-metrics v0.3.8/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= github.com/armon/go-metrics v0.3.9 h1:O2sNqxBdvq8Eq5xmzljcYzAORli6RWCvEym4cJf9m18= github.com/armon/go-metrics v0.3.9/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= @@ -486,8 +485,10 @@ github.com/hashicorp/raft v1.3.1/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7H github.com/hashicorp/raft v1.3.3 h1:Xr6DSHC5cIM8kzxu+IgoT/+MeNeUNeWin3ie6nlSrMg= github.com/hashicorp/raft v1.3.3/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM= github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk= -github.com/hashicorp/raft-boltdb v0.0.0-20211202195631-7d34b9fb3f42 h1:Ye8SofeDHJzu9xvvaMmpMkqHELWW7rTcXwdUR0CWW48= -github.com/hashicorp/raft-boltdb v0.0.0-20211202195631-7d34b9fb3f42/go.mod h1:wcXL8otVu5cpJVLjcmq7pmfdRCdaP+xnvu7WQcKJAhs= +github.com/hashicorp/raft-boltdb v0.0.0-20210409134258-03c10cc3d4ea h1:RxcPJuutPRM8PUOyiweMmkuNO+RJyfy2jds2gfvgNmU= +github.com/hashicorp/raft-boltdb v0.0.0-20210409134258-03c10cc3d4ea/go.mod h1:qRd6nFJYYS6Iqnc/8HcUmko2/2Gw8qTFEmxDLii6W5I= +github.com/hashicorp/raft-boltdb/v2 v2.2.2 h1:rlkPtOllgIcKLxVT4nutqlTH2NRFn+tO1wwZk/4Dxqw= +github.com/hashicorp/raft-boltdb/v2 v2.2.2/go.mod h1:N8YgaZgNJLpZC+h+by7vDu5rzsRgONThTEeUS3zWbfY= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hdevalence/ed25519consensus v0.0.0-20210204194344-59a8610d2b87 h1:uUjLpLt6bVvZ72SQc/B4dXcPBw4Vgd7soowdRl52qEM= github.com/hdevalence/ed25519consensus v0.0.0-20210204194344-59a8610d2b87/go.mod h1:XGsKKeXxeRr95aEOgipvluMPlgjr7dGlk9ZTWOjcUcg= diff --git a/signer/raft_store.go b/signer/raft_store.go index 727cb28c..c3d3d84a 100644 --- a/signer/raft_store.go +++ b/signer/raft_store.go @@ -22,7 +22,7 @@ import ( gRPCTransport "github.com/Jille/raft-grpc-transport" "github.com/Jille/raftadmin" "github.com/hashicorp/raft" - boltdb "github.com/hashicorp/raft-boltdb" + boltdb "github.com/hashicorp/raft-boltdb/v2" proto "github.com/strangelove-ventures/horcrux/signer/proto" "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/libs/service" diff --git a/signer/services_test.go b/signer/services_test.go index 0695856e..02115fb4 100644 --- a/signer/services_test.go +++ b/signer/services_test.go @@ -113,9 +113,12 @@ func TestConcurrentStart(t *testing.T) { wg.Add(concurrentAttempts) doneCount := 0 panicCount := 0 + var countMu sync.Mutex recoverFromPanic := func() { _ = recover() + countMu.Lock() + defer countMu.Unlock() panicCount++ if panicCount == concurrentAttempts-1 { for doneCount < concurrentAttempts { From b952860cc46971756180147e4d245823530d3f03 Mon Sep 17 00:00:00 2001 From: Andrew Gouin Date: Wed, 28 Sep 2022 10:45:46 -0600 Subject: [PATCH 10/44] Update raft-grpc-transport (#106) * Update raft-grpc-transport to fix close connections issue * Bump raft to v1.3.10 * Reset counter if block is signed to make test less flaky --- go.mod | 6 ++++-- go.sum | 8 ++++++++ test/test_node.go | 15 ++++++++++++--- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 23aec89a..2c87f935 100644 --- a/go.mod +++ b/go.mod @@ -5,13 +5,13 @@ go 1.19 require ( github.com/Jille/grpc-multi-resolver v1.1.0 github.com/Jille/raft-grpc-leader-rpc v1.1.0 - github.com/Jille/raft-grpc-transport v1.2.0 + github.com/Jille/raft-grpc-transport v1.2.1-0.20220914172309-2f253856eefc github.com/Jille/raftadmin v1.2.0 github.com/avast/retry-go v3.0.0+incompatible github.com/cosmos/cosmos-sdk v0.44.5 github.com/gogo/protobuf v1.3.3 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 - github.com/hashicorp/raft v1.3.3 + github.com/hashicorp/raft v1.3.10 github.com/hashicorp/raft-boltdb/v2 v2.2.2 github.com/mitchellh/go-homedir v1.1.0 github.com/ory/dockertest v3.3.5+incompatible @@ -77,9 +77,11 @@ require ( github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c // indirect github.com/gtank/merlin v0.1.1 // indirect github.com/gtank/ristretto255 v0.1.2 // indirect + github.com/hashicorp/errwrap v1.0.0 // indirect github.com/hashicorp/go-hclog v0.16.2 // indirect github.com/hashicorp/go-immutable-radix v1.3.1 // indirect github.com/hashicorp/go-msgpack v1.1.5 // indirect + github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/hdevalence/ed25519consensus v0.0.0-20210204194344-59a8610d2b87 // indirect diff --git a/go.sum b/go.sum index d1c0b664..db3822a8 100644 --- a/go.sum +++ b/go.sum @@ -75,6 +75,8 @@ github.com/Jille/raft-grpc-leader-rpc v1.1.0 h1:u36rmA4tjp+4FSdZ17jg/1sfSCYNQIe5 github.com/Jille/raft-grpc-leader-rpc v1.1.0/go.mod h1:l+pK+uPuqpFDFcPmyUPSng4257UXrST0Vc3Lo4XwVB0= github.com/Jille/raft-grpc-transport v1.2.0 h1:W/YSPz8IsirEyomjKmDog5Xk71o9+l4KhyMEX2TsgSs= github.com/Jille/raft-grpc-transport v1.2.0/go.mod h1:GQGUXJfjlzwA390Ox1AyVYpjCLhtGd6yqY9Sb5hpQfc= +github.com/Jille/raft-grpc-transport v1.2.1-0.20220914172309-2f253856eefc h1:xF58NlLrijxTgZ/sfwUEVFJj/y0v2SxdIPoyHlLEjxI= +github.com/Jille/raft-grpc-transport v1.2.1-0.20220914172309-2f253856eefc/go.mod h1:77bQXfQSgLTAn1Iwi9MJDNE7KwPmdeW42Pd4HUHdl9E= github.com/Jille/raftadmin v1.2.0 h1:hMLFUK7iKpeXP+CoIhNMWj+F53XOLSjMDSia0C60cps= github.com/Jille/raftadmin v1.2.0/go.mod h1:vtVEpToPGTUPVwwunypWDpi69JpdnHMhWRUlc/65U+Y= github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= @@ -447,6 +449,7 @@ github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBt github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= @@ -461,6 +464,8 @@ github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iP github.com/hashicorp/go-msgpack v1.1.5 h1:9byZdVjKTe5mce63pRVNP1L7UAmdHOTEMGehn6KvJWs= github.com/hashicorp/go-msgpack v1.1.5/go.mod h1:gWVc3sv/wbDmR3rQsj1CAktEZzoz1YNK9NfGLXJ69/4= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= @@ -484,6 +489,8 @@ github.com/hashicorp/raft v1.1.2/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7 github.com/hashicorp/raft v1.3.1/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM= github.com/hashicorp/raft v1.3.3 h1:Xr6DSHC5cIM8kzxu+IgoT/+MeNeUNeWin3ie6nlSrMg= github.com/hashicorp/raft v1.3.3/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM= +github.com/hashicorp/raft v1.3.10 h1:LR5QZX1VQd0DFWZfeCwWawyeKfpS/Tm1yjnJIY5X4Tw= +github.com/hashicorp/raft v1.3.10/go.mod h1:J8naEwc6XaaCfts7+28whSeRvCqTd6e20BlCU3LtEO4= github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk= github.com/hashicorp/raft-boltdb v0.0.0-20210409134258-03c10cc3d4ea h1:RxcPJuutPRM8PUOyiweMmkuNO+RJyfy2jds2gfvgNmU= github.com/hashicorp/raft-boltdb v0.0.0-20210409134258-03c10cc3d4ea/go.mod h1:qRd6nFJYYS6Iqnc/8HcUmko2/2Gw8qTFEmxDLii6W5I= @@ -903,6 +910,7 @@ go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= diff --git a/test/test_node.go b/test/test_node.go index 04898dd1..a937c624 100644 --- a/test/test_node.go +++ b/test/test_node.go @@ -383,7 +383,7 @@ func (tn *TestNode) GetMostRecentConsecutiveSignedBlocks( var status *ctypes.ResultStatus status, err = tn.Client.Status(context.Background()) if err != nil { - return + return 0, 0, err } latestHeight = status.SyncInfo.LatestBlockHeight @@ -392,16 +392,21 @@ func (tn *TestNode) GetMostRecentConsecutiveSignedBlocks( var block *ctypes.ResultBlock block, err = tn.Client.Block(context.Background(), &i) if err != nil { - return + return 0, 0, err } + found := false for _, voter := range block.Block.LastCommit.Signatures { if reflect.DeepEqual(voter.ValidatorAddress, address) { count++ + found = true break } } + if !found { + return count, latestHeight, nil + } } - return + return count, latestHeight, nil } func (tn *TestNode) getMissingBlocks(address tmBytes.HexBytes) (int64, error) { @@ -471,6 +476,10 @@ func (tn *TestNode) WaitForConsecutiveBlocks(blocks int64, address tmBytes.HexBy if err != nil { continue } + if recentSignedBlocksCount > 0 { + // we signed a block within window, so restart counter + i = -1 + } deltaMissed := min(blocks, checkingBlock-1) - recentSignedBlocksCount deltaBlocks := checkingBlock - startingBlock From 0f0d2cbbcb8e9f31cd42d0ee4cbadf3089c841a6 Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Wed, 31 Aug 2022 21:34:46 +0200 Subject: [PATCH 11/44] Add Prometheus Metrics --- cmd/horcrux/cmd/metrics.go | 15 ++++++ cmd/horcrux/cmd/signer.go | 2 + signer/metrics.go | 94 ++++++++++++++++++++++++++++++++++++++ signer/remote_signer.go | 37 +++++++++++++++ 4 files changed, 148 insertions(+) create mode 100644 cmd/horcrux/cmd/metrics.go create mode 100644 signer/metrics.go diff --git a/cmd/horcrux/cmd/metrics.go b/cmd/horcrux/cmd/metrics.go new file mode 100644 index 00000000..d8f2ce46 --- /dev/null +++ b/cmd/horcrux/cmd/metrics.go @@ -0,0 +1,15 @@ +package cmd + +import ( + "fmt" + "net/http" + + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +func StartMetrics() { + http.Handle("/metrics", promhttp.Handler()) + if err := http.ListenAndServe(":2112", nil); err != nil { + fmt.Printf("Prometheus Endpoint failed to start: %s\n", err) + } +} diff --git a/cmd/horcrux/cmd/signer.go b/cmd/horcrux/cmd/signer.go index 1ddc3977..7e40ce2a 100644 --- a/cmd/horcrux/cmd/signer.go +++ b/cmd/horcrux/cmd/signer.go @@ -72,6 +72,8 @@ func StartSignerCmd() *cobra.Command { } logger.Info("Signer", "pubkey", pubkey) + go StartMetrics() + services, err = signer.StartRemoteSigners(services, logger, cfg.ChainID, pv, cfg.Nodes) if err != nil { panic(err) diff --git a/signer/metrics.go b/signer/metrics.go new file mode 100644 index 00000000..889e8f77 --- /dev/null +++ b/signer/metrics.go @@ -0,0 +1,94 @@ +package signer + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + // Variables to calculate Prometheus Metrics + previousPrecommitHeight = int64(0) + previousPrevoteHeight = int64(0) + previousPrecommitTime = time.Now() + previousPrevoteTime = time.Now() + + // Prometheus Metrics + lastPrecommitHeight = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_last_precommit_height", + Help: "Last Height Precommit Signed", + }) + lastPrevoteHeight = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_last_prevote_height", + Help: "Last Height Prevote Signed", + }) + lastProposalHeight = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_last_proposal_height", + Help: "Last Height Proposal Signed", + }) + lastPrecommitRound = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_last_precommit_round", + Help: "Last Round Precommit Signed", + }) + lastPrevoteRound = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_last_prevote_round", + Help: "Last Round Prevote Signed", + }) + lastProposalRound = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_last_proposal_round", + Help: "Last Round Proposal Signed", + }) + + totalPrecommitsSigned = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_total_precommits_signed", + Help: "Total Precommit Signed", + }) + totalPrevotesSigned = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_total_prevotes_signed", + Help: "Total Prevote Signed", + }) + totalProposalsSigned = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_total_proposals_signed", + Help: "Total Proposal Signed", + }) + + secondsSinceLastPrecommit = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_seconds_since_last_precommit", + Help: "Seconds Since Last Precommit", + }) + secondsSinceLastPrevote = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_seconds_since_last_prevote", + Help: "Seconds Since Last Prevote", + }) + + missedPrecommits = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_missed_precommits", + Help: "Consecutive Precommit Missed", + }) + missedPrevotes = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_missed_prevotes", + Help: "Consecutive Prevote Missed", + }) + totalMissedPrecommits = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_total_missed_precommits", + Help: "Total Precommit Missed", + }) + totalMissedPrevotes = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_total_missed_prevotes", + Help: "Total Prevote Missed", + }) + + totalSentryConnectTries = promauto.NewCounter(prometheus.CounterOpts{ + Name: "signer_total_sentry_connect_tries", + Help: "Total Number of times sentry TCP connect has been tried", + }) +) + +func StartMetrics() { + for { + secondsSinceLastPrecommit.Set(time.Since(previousPrecommitTime).Seconds()) + secondsSinceLastPrevote.Set(time.Since(previousPrevoteTime).Seconds()) + <-time.After(250 * time.Millisecond) + } +} diff --git a/signer/remote_signer.go b/signer/remote_signer.go index 46555abe..559ec429 100644 --- a/signer/remote_signer.go +++ b/signer/remote_signer.go @@ -77,6 +77,7 @@ func (rs *ReconnRemoteSigner) loop() { proto, address := tmNet.ProtocolAndAddress(rs.address) netConn, err := rs.dialer.Dial(proto, address) if err != nil { + totalSentryConnectTries.Inc() rs.Logger.Error("Dialing", "err", err) rs.Logger.Info("Retrying", "sleep (s)", 3, "address", rs.address) time.Sleep(time.Second * 3) @@ -155,6 +156,38 @@ func (rs *ReconnRemoteSigner) handleSignVoteRequest(vote *tmProto.Vote) tmProtoP return tmProtoPrivval.Message{Sum: msgSum} } rs.Logger.Info("Signed vote", "node", rs.address, "height", vote.Height, "round", vote.Round, "type", vote.Type) + + if vote.Type == tmProto.PrecommitType { + stepSize := vote.Height - previousPrecommitHeight + if previousPrecommitHeight != 0 && stepSize > 1 { + missedPrecommits.Add(float64(stepSize)) + totalMissedPrecommits.Add(float64(stepSize)) + } else { + missedPrecommits.Set(0) + } + previousPrecommitHeight = vote.Height + previousPrecommitTime = time.Now() + lastPrecommitHeight.Set(float64(vote.Height)) + lastPrecommitRound.Set(float64(vote.Round)) + totalPrecommitsSigned.Inc() + } + if vote.Type == tmProto.PrevoteType { + stepSize := vote.Height - previousPrevoteHeight + if previousPrevoteHeight != 0 && stepSize > 1 { + missedPrevotes.Add(float64(stepSize)) + totalMissedPrevotes.Add(float64(stepSize)) + } else { + missedPrevotes.Set(0) + } + + previousPrevoteHeight = vote.Height + previousPrevoteTime = time.Now() + + lastPrevoteHeight.Set(float64(vote.Height)) + lastPrevoteRound.Set(float64(vote.Round)) + totalPrevotesSigned.Inc() + } + msgSum.SignedVoteResponse.Vote = *vote return tmProtoPrivval.Message{Sum: msgSum} } @@ -177,6 +210,9 @@ func (rs *ReconnRemoteSigner) handleSignProposalRequest(proposal *tmProto.Propos } rs.Logger.Info("Signed proposal", "node", rs.address, "height", proposal.Height, "round", proposal.Round, "type", proposal.Type) + lastProposalHeight.Set(float64(proposal.Height)) + lastProposalRound.Set(float64(proposal.Round)) + totalProposalsSigned.Inc() msgSum.SignedProposalResponse.Proposal = *proposal return tmProtoPrivval.Message{Sum: msgSum} } @@ -219,6 +255,7 @@ func getRemoteSignerError(err error) *tmProtoPrivval.RemoteSignerError { func StartRemoteSigners(services []tmService.Service, logger tmLog.Logger, chainID string, privVal tm.PrivValidator, nodes []NodeConfig) ([]tmService.Service, error) { var err error + go StartMetrics() for _, node := range nodes { dialer := net.Dialer{Timeout: 30 * time.Second} s := NewReconnRemoteSigner(node.Address, logger, chainID, privVal, dialer) From c87ce20266ffbeb27cf96f33dc3c6176859aeeef Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Fri, 2 Sep 2022 11:42:53 +0200 Subject: [PATCH 12/44] Signing metrics between leader and follower nodes --- cmd/horcrux/cmd/config.go | 24 +++++--- cmd/horcrux/cmd/cosigner.go | 2 + cmd/horcrux/cmd/metrics.go | 13 ++++- cmd/horcrux/cmd/state.go | 1 + docs/metrics.md | 103 ++++++++++++++++++++++++++++++++++ signer/local_cosigner.go | 6 +- signer/metrics.go | 97 +++++++++++++++++++++++++++++--- signer/raft_store.go | 1 + signer/remote_signer.go | 6 +- signer/threshold_validator.go | 13 +++++ 10 files changed, 247 insertions(+), 19 deletions(-) create mode 100644 docs/metrics.md diff --git a/cmd/horcrux/cmd/config.go b/cmd/horcrux/cmd/config.go index e45a5ca6..3a664f72 100644 --- a/cmd/horcrux/cmd/config.go +++ b/cmd/horcrux/cmd/config.go @@ -74,7 +74,9 @@ func initCmd() *cobra.Command { if keyFileFlag != "" { keyFile = &keyFileFlag } + prometheusListenAddress, _ := cmdFlags.GetString("metrics") if cs { + // Cosigner Config p, _ := cmdFlags.GetString("peers") threshold, _ := cmdFlags.GetInt("threshold") timeout, _ := cmdFlags.GetString("timeout") @@ -109,19 +111,23 @@ func initCmd() *cobra.Command { Peers: peers, Timeout: timeout, }, - ChainNodes: cn, + ChainNodes: cn, + PrometheusListenAddress: prometheusListenAddress, } if err = validateCosignerConfig(cfg); err != nil { return err } } else { + // Single Signer Config if len(cn) == 0 { return fmt.Errorf("must input at least one node") } + prometheusListenAddress, _ := cmdFlags.GetString("metrics") cfg = DiskConfig{ - PrivValKeyFile: keyFile, - ChainID: cid, - ChainNodes: cn, + PrivValKeyFile: keyFile, + ChainID: cid, + ChainNodes: cn, + PrometheusListenAddress: prometheusListenAddress, } if err = validateSingleSignerConfig(cfg); err != nil { return err @@ -162,6 +168,7 @@ func initCmd() *cobra.Command { "(i.e. \"tcp://node-1:2222|2,tcp://node-2:2222|3\")") cmd.Flags().IntP("threshold", "t", 0, "indicate number of signatures required for threshold signature") cmd.Flags().StringP("listen", "l", "", "listen address of the signer") + cmd.Flags().StringP("metrics", "m", "", "listen address for prometheus metrics") cmd.Flags().StringP("keyfile", "k", "", "priv val key file path (full key for single signer, or key share for cosigner)") cmd.Flags().String("timeout", "1500ms", "configure cosigner rpc server timeout value, \n"+ @@ -481,10 +488,11 @@ func setChainIDCmd() *cobra.Command { // Config maps to the on-disk JSON format type DiskConfig struct { - PrivValKeyFile *string `json:"key-file,omitempty" yaml:"key-file,omitempty"` - ChainID string `json:"chain-id" yaml:"chain-id"` - CosignerConfig *CosignerConfig `json:"cosigner,omitempty" yaml:"cosigner,omitempty"` - ChainNodes []ChainNode `json:"chain-nodes,omitempty" yaml:"chain-nodes,omitempty"` + PrivValKeyFile *string `json:"key-file,omitempty" yaml:"key-file,omitempty"` + ChainID string `json:"chain-id" yaml:"chain-id"` + CosignerConfig *CosignerConfig `json:"cosigner,omitempty" yaml:"cosigner,omitempty"` + ChainNodes []ChainNode `json:"chain-nodes,omitempty" yaml:"chain-nodes,omitempty"` + PrometheusListenAddress string `json:"prometheus-listen-address,omitempty" yaml:"prometheus-listen-address,omitempty"` } func (c *DiskConfig) Nodes() []signer.NodeConfig { diff --git a/cmd/horcrux/cmd/cosigner.go b/cmd/horcrux/cmd/cosigner.go index 548c813b..803c2dec 100644 --- a/cmd/horcrux/cmd/cosigner.go +++ b/cmd/horcrux/cmd/cosigner.go @@ -239,6 +239,8 @@ func StartCosignerCmd() *cobra.Command { } logger.Info("Signer", "address", pubkey.Address()) + go StartMetrics() + services, err = signer.StartRemoteSigners(services, logger, cfg.ChainID, pv, cfg.Nodes) if err != nil { panic(err) diff --git a/cmd/horcrux/cmd/metrics.go b/cmd/horcrux/cmd/metrics.go index d8f2ce46..de36ca54 100644 --- a/cmd/horcrux/cmd/metrics.go +++ b/cmd/horcrux/cmd/metrics.go @@ -3,13 +3,22 @@ package cmd import ( "fmt" "net/http" + "os" "github.com/prometheus/client_golang/prometheus/promhttp" + tmlog "github.com/tendermint/tendermint/libs/log" ) func StartMetrics() { + logger := tmlog.NewTMLogger(tmlog.NewSyncWriter(os.Stdout)).With("module", "metrics") + + if len(config.Config.PrometheusListenAddress) == 0 { + logger.Error("prometheus-listen-address not defined") + return + } + logger.Info("Prometheus Metrics Listening", "address", config.Config.PrometheusListenAddress) http.Handle("/metrics", promhttp.Handler()) - if err := http.ListenAndServe(":2112", nil); err != nil { - fmt.Printf("Prometheus Endpoint failed to start: %s\n", err) + if err := http.ListenAndServe(config.Config.PrometheusListenAddress, nil); err != nil { + logger.Error(fmt.Sprintf("Prometheus Endpoint failed to start: %s", err)) } } diff --git a/cmd/horcrux/cmd/state.go b/cmd/horcrux/cmd/state.go index 7f144bbd..e2d9f40f 100644 --- a/cmd/horcrux/cmd/state.go +++ b/cmd/horcrux/cmd/state.go @@ -102,6 +102,7 @@ func setStateCmd() *cobra.Command { return err } + fmt.Printf("Setting height %d\n", height) pv.EphemeralPublic, share.EphemeralPublic = nil, nil signState := signer.SignStateConsensus{ Height: height, diff --git a/docs/metrics.md b/docs/metrics.md new file mode 100644 index 00000000..a073c2e1 --- /dev/null +++ b/docs/metrics.md @@ -0,0 +1,103 @@ +# Prometheus Metrics + +## Enabling Prometheus +Specify the port for incoming prometheus connections during 'config init' by using the -m flag. +``` +horcrux ..options.. -m 0.0.0.0:8001 +``` + +For earlier adopters, add the following key to your config.toml + +prometheus-listen-address: 0.0.0.0:6001 + +Resulting in a configuration like the following: + +``` +chain-id: testnet-1 +cosigner: + threshold: 2 + shares: 3 + p2p-listen: tcp://localhost:5001 + peers: + - share-id: 2 + p2p-addr: tcp://localhost:5002 + - share-id: 3 + p2p-addr: tcp://localhost:5003 + rpc-timeout: 1500ms +chain-nodes: +- priv-val-addr: tcp://localhost:2300 +prometheus-listen-address: 0.0.0.0:6001 +``` + +## Watching Single Signers + +Single node signers don't execute any cosigner code, so the basic metrics are: + * signer_seconds_since_last_precommit + * signer_seconds_since_last_prevote + * signer_last_precommit_height + * signer_last_prevote_height + +If the 'seconds_since' metrics exceeds the normal block time, it may indicate a sentry failure or a network stall/halt. + +If there are skips in the block heights requested to be signed the following counters will increase AFTER the sentry is able to report the latest block height. Until then, from the perspective of horcrux, it looks no different than a network stall. + * signer_total_missed_precommits + * signer_total_missed_prevotes + +## Watching Sentry Failure + +Watch 'signer_total_sentry_connect_tries' which reports retry connects to the specified sentry. Any increase is an indicator of network or sentry process failure + +## Watching For Cosigner Trouble +Metrics may vary between Cosigner processes since there is only one leader. + +Each block, Ephemeral Secrets are shared between Cosigners. Monitoring 'signer_seconds_since_last_local_ephemeral_share_time' and ensuring it does not exceed the block time will allow you to know when a Cosigner was not contacted for a block. + +## Metrics that don't always correspond to block time +There is no guarantee that a Cosigner will sign a block if the threshold is reached early. You may watch 'signer_seconds_since_last_local_sign_start_time' but there is no guarantee that 'signer_seconds_since_last_local_sign_finish_time' will be reached since there are multiple sanity checks that may cause an early exit in some circumstances (rather rare) + +## Metrics on the raft leader may be different +On the leader you may watch but these metrics will continue to rise on Cosigners who are not the raft leaders (since followers will rarely manage the original signing request) + * signer_seconds_since_last_precommit + * signer_seconds_since_last_prevote + +As a result, followers also do not update these metrics +* signer_last_precommit_height +* signer_last_prevote_height + + +## Checking Signing Performance +We currently only have metrics between the leader and followers (not full p2p metrics). However it is still useful in determining when a particular peer lags significantly. + +Your cluster should reach the threshold for availability in a short time. Monitor the following: + +``` +signer_sign_block_threshold_lag_seconds{quantile="0.5"} 0.019399953 +signer_sign_block_threshold_lag_seconds{quantile="0.9"} 0.028546635 +signer_sign_block_threshold_lag_seconds{quantile="0.99"} 0.029730841 +``` + +After reaching the threshold, all cosigners should sign quickly +``` +signer_sign_block_cosigner_lag_seconds{quantile="0.5"} 0.031424561 +signer_sign_block_cosigner_lag_seconds{quantile="0.9"} 0.0407505 +signer_sign_block_cosigner_lag_seconds{quantile="0.99"} 0.045173791 +``` + +If 'signer_sign_block_cosigner_lag_seconds' takes a significant amount of time, you can check the performance of each cosigner as it is seen by the raft leader. High numbers may indicate a high latency link or a resource. This metric is only available on the Leader and will report 'NaN' on followers. +``` +signer_cosigner_sign_lag_seconds{peerid="tcp://localhost:5001",quantile="0.5"} 0.010391636 +signer_cosigner_sign_lag_seconds{peerid="tcp://localhost:5001",quantile="0.9"} 0.013242445 +signer_cosigner_sign_lag_seconds{peerid="tcp://localhost:5001",quantile="0.99"} 0.017128885 +signer_cosigner_sign_lag_seconds_sum{peerid="tcp://localhost:5001"} 1.1935657130000004 +signer_cosigner_sign_lag_seconds_count{peerid="tcp://localhost:5001"} 120 +signer_cosigner_sign_lag_seconds{peerid="tcp://localhost:5002",quantile="0.5"} 0.010473575 +signer_cosigner_sign_lag_seconds{peerid="tcp://localhost:5002",quantile="0.9"} 0.013052952 +signer_cosigner_sign_lag_seconds{peerid="tcp://localhost:5002",quantile="0.99"} 0.01732663 +signer_cosigner_sign_lag_seconds_sum{peerid="tcp://localhost:5002"} 1.014658521 +signer_cosigner_sign_lag_seconds_count{peerid="tcp://localhost:5002"} 103 +signer_cosigner_sign_lag_seconds{peerid="tcp://localhost:5003",quantile="0.5"} 0.010760536 +signer_cosigner_sign_lag_seconds{peerid="tcp://localhost:5003",quantile="0.9"} 0.012623563 +signer_cosigner_sign_lag_seconds{peerid="tcp://localhost:5003",quantile="0.99"} 0.016456836 +``` + + diff --git a/signer/local_cosigner.go b/signer/local_cosigner.go index d7a4fe89..f6ca010b 100644 --- a/signer/local_cosigner.go +++ b/signer/local_cosigner.go @@ -156,6 +156,8 @@ func (cosigner *LocalCosigner) GetAddress() string { // Return the signed bytes or an error // Implements Cosigner interface func (cosigner *LocalCosigner) sign(req CosignerSignRequest) (CosignerSignResponse, error) { + previousLocalSignStartTime = time.Now() // This function has multiple exit points. Only start time can be guaranteed + cosigner.lastSignStateMutex.Lock() defer cosigner.lastSignStateMutex.Unlock() @@ -247,6 +249,8 @@ func (cosigner *LocalCosigner) sign(req CosignerSignRequest) (CosignerSignRespon res.EphemeralPublic = ephemeralPublic res.Signature = sig + + previousLocalSignFinishTime = time.Now() // Note - Function may return before this line so elapsed time for Finish may be multiple block times return res, nil } @@ -286,6 +290,7 @@ func (cosigner *LocalCosigner) dealShares(req CosignerGetEphemeralSecretPartRequ func (cosigner *LocalCosigner) GetEphemeralSecretParts( hrst HRSTKey) (*CosignerEphemeralSecretPartsResponse, error) { + previousLocalEphemeralShareTime = time.Now() res := &CosignerEphemeralSecretPartsResponse{ EncryptedSecrets: make([]CosignerEphemeralSecretPart, 0, len(cosigner.peers)-1), } @@ -394,7 +399,6 @@ func (cosigner *LocalCosigner) getEphemeralSecretPart( // Store an ephemeral secret share part provided by another cosigner func (cosigner *LocalCosigner) setEphemeralSecretPart(req CosignerSetEphemeralSecretPartRequest) error { - // Verify the source signature { if req.SourceSig == nil { diff --git a/signer/metrics.go b/signer/metrics.go index 889e8f77..4ae76a00 100644 --- a/signer/metrics.go +++ b/signer/metrics.go @@ -9,12 +9,19 @@ import ( var ( // Variables to calculate Prometheus Metrics - previousPrecommitHeight = int64(0) - previousPrevoteHeight = int64(0) - previousPrecommitTime = time.Now() - previousPrevoteTime = time.Now() + previousPrecommitHeight = int64(0) + previousPrevoteHeight = int64(0) + previousPrecommitTime = time.Now() + previousPrevoteTime = time.Now() + previousLocalSignStartTime = time.Now() + previousLocalSignFinishTime = time.Now() + previousLocalEphemeralShareTime = time.Now() // Prometheus Metrics + totalPubKeyRequests = promauto.NewCounter(prometheus.CounterOpts{ + Name: "signer_total_pubkey_requests", + Help: "Total times public key requested (High count may indicate validator restarts)", + }) lastPrecommitHeight = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_last_precommit_height", Help: "Last Height Precommit Signed", @@ -23,6 +30,7 @@ var ( Name: "signer_last_prevote_height", Help: "Last Height Prevote Signed", }) + lastProposalHeight = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_last_proposal_height", Help: "Last Height Proposal Signed", @@ -55,11 +63,24 @@ var ( secondsSinceLastPrecommit = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_seconds_since_last_precommit", - Help: "Seconds Since Last Precommit", + Help: "Seconds Since Last Precommit (Useful for Signing Co-Signer Node, Single Signer)", }) secondsSinceLastPrevote = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_seconds_since_last_prevote", - Help: "Seconds Since Last Prevote", + Help: "Seconds Since Last Prevote (Useful for Signing Co-Signer Node, Single Signer)", + }) + secondsSinceLastLocalSignStart = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_seconds_since_last_local_sign_start_time", + Help: "Seconds Since Last Local Start Sign (May increase beyond block time, Rarely important) ", + }) + secondsSinceLastLocalSignFinish = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_seconds_since_last_local_sign_finish_time", + Help: "Seconds Since Last Local Finish Sign (May increase to about 2 * Block Time; If high, CoSigner is not signing) ", + }) + + secondsSinceLastLocalEphemeralShareTime = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_seconds_since_last_local_ephemeral_share_time", + Help: "Seconds Since Last Local Ephemeral Share Sign (Should not increase beyond block time; If high, may indicate raft joining issue for CoSigner) ", }) missedPrecommits = promauto.NewGauge(prometheus.GaugeOpts{ @@ -81,14 +102,76 @@ var ( totalSentryConnectTries = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_sentry_connect_tries", - Help: "Total Number of times sentry TCP connect has been tried", + Help: "Total Number of times sentry TCP connect has been tried (High count may indicate validator restarts)", + }) + + beyondBlockErrors = promauto.NewCounter(prometheus.CounterOpts{ + Name: "signer_total_beyond_block_errors", + Help: "Total Times Signing Started but duplicate height/round request arrives", + }) + failedSignVote = promauto.NewCounter(prometheus.CounterOpts{ + Name: "signer_total_failed_sign_vote", + Help: "Total Times Signer Failed to sign block - Unstarted and Unexepcted Height", + }) + + flagRaftLeader = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_is_raft_leader", + Help: "Signer is Raft Leader", + }) + totalRaftLeader = promauto.NewCounter(prometheus.CounterOpts{ + Name: "signer_total_raft_leader", + Help: "Total Times Signer is Raft Leader", }) + totalNotRaftLeader = promauto.NewCounter(prometheus.CounterOpts{ + Name: "signer_total_raft_not_leader", + Help: "Total Times Signer is NOT Raft Leader (Proxy signing to Raft Leader)", + }) + + totalInvalidSignature = promauto.NewCounter(prometheus.CounterOpts{ + Name: "signer_error_total_invalid_signatures", + Help: "Total Times Combined Signature is Invalid", + }) + + totalInsufficientCosigners = promauto.NewCounter(prometheus.CounterOpts{ + Name: "signer_error_total_insufficient_cosigners", + Help: "Total Times Cosigners doesn't reach threshold", + }) + + timedSignBlockThresholdLag = promauto.NewSummary(prometheus.SummaryOpts{ + Name: "signer_sign_block_threshold_lag_seconds", + Help: "Seconds taken to get threshold of cosigners available", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }) + + timedSignBlockCosignerLag = promauto.NewSummary(prometheus.SummaryOpts{ + Name: "signer_sign_block_cosigner_lag_seconds", + Help: "Seconds taken to get all cosigner signatures", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }) + + timedSignBlockLag = promauto.NewSummary(prometheus.SummaryOpts{ + Name: "signer_sign_block_lag_seconds", + Help: "Seconds taken to sign block", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }) + + timedCosignerSignLag = promauto.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "signer_cosigner_sign_lag_seconds", + Help: "Time taken to get cosigner signature", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }, + []string{"peerid"}, + ) ) func StartMetrics() { for { secondsSinceLastPrecommit.Set(time.Since(previousPrecommitTime).Seconds()) secondsSinceLastPrevote.Set(time.Since(previousPrevoteTime).Seconds()) + secondsSinceLastLocalSignStart.Set(time.Since(previousLocalSignStartTime).Seconds()) + secondsSinceLastLocalSignFinish.Set(time.Since(previousLocalSignFinishTime).Seconds()) + secondsSinceLastLocalEphemeralShareTime.Set(time.Since(previousLocalEphemeralShareTime).Seconds()) <-time.After(250 * time.Millisecond) } } diff --git a/signer/raft_store.go b/signer/raft_store.go index c3d3d84a..d71e559d 100644 --- a/signer/raft_store.go +++ b/signer/raft_store.go @@ -90,6 +90,7 @@ func (s *RaftStore) init() error { if err != nil { return fmt.Errorf("failed to parse local address: %s, %v", host, err) } + s.logger.Info("Local Raft Listening", "port", port) sock, err := net.Listen("tcp", fmt.Sprintf(":%s", port)) if err != nil { return err diff --git a/signer/remote_signer.go b/signer/remote_signer.go index 559ec429..dd2295f8 100644 --- a/signer/remote_signer.go +++ b/signer/remote_signer.go @@ -84,7 +84,7 @@ func (rs *ReconnRemoteSigner) loop() { continue } - rs.Logger.Info("Connected", "address", rs.address) + rs.Logger.Info("Connected to Sentry", "address", rs.address) conn, err = tmP2pConn.MakeSecretConnection(netConn, rs.privKey) if err != nil { conn = nil @@ -148,9 +148,11 @@ func (rs *ReconnRemoteSigner) handleSignVoteRequest(vote *tmProto.Vote) tmProtoP switch typedErr := err.(type) { case *BeyondBlockError: rs.Logger.Debug("Rejecting sign vote request", "reason", typedErr.msg) + beyondBlockErrors.Inc() default: rs.Logger.Error("Failed to sign vote", "address", rs.address, "error", err, "vote_type", vote.Type, "height", vote.Height, "round", vote.Round, "validator", fmt.Sprintf("%X", vote.ValidatorAddress)) + failedSignVote.Inc() } msgSum.SignedVoteResponse.Error = getRemoteSignerError(err) return tmProtoPrivval.Message{Sum: msgSum} @@ -202,6 +204,7 @@ func (rs *ReconnRemoteSigner) handleSignProposalRequest(proposal *tmProto.Propos switch typedErr := err.(type) { case *BeyondBlockError: rs.Logger.Debug("Rejecting proposal sign request", "reason", typedErr.msg) + beyondBlockErrors.Inc() default: rs.Logger.Error("Failed to sign proposal", "address", rs.address, "error", err, "proposal", proposal) } @@ -218,6 +221,7 @@ func (rs *ReconnRemoteSigner) handleSignProposalRequest(proposal *tmProto.Propos } func (rs *ReconnRemoteSigner) handlePubKeyRequest() tmProtoPrivval.Message { + totalPubKeyRequests.Inc() msgSum := &tmProtoPrivval.Message_PubKeyResponse{PubKeyResponse: &tmProtoPrivval.PubKeyResponse{ PubKey: tmProtoCrypto.PublicKey{}, Error: nil, diff --git a/signer/threshold_validator.go b/signer/threshold_validator.go index 28b26bc5..2b378c40 100644 --- a/signer/threshold_validator.go +++ b/signer/threshold_validator.go @@ -201,6 +201,7 @@ func (pv *ThresholdValidator) waitForPeerSetEphemeralSharesAndSign( ephemeralPublic *[]byte, wg *sync.WaitGroup, ) { + peerStartTime := time.Now() defer wg.Done() peerEphemeralSecretParts := make([]CosignerEphemeralSecretPart, 0, pv.threshold-1) for _, EncryptedSecrets := range *encryptedEphemeralSharesThresholdMap { @@ -233,6 +234,7 @@ func (pv *ThresholdValidator) waitForPeerSetEphemeralSharesAndSign( return } + timedCosignerSignLag.WithLabelValues(peer.GetAddress()).Observe(time.Since(peerStartTime).Seconds()) pv.logger.Debug(fmt.Sprintf("Received signature from %d", peerID)) shareSignaturesMutex.Lock() @@ -290,6 +292,8 @@ func (pv *ThresholdValidator) getExistingBlockSignature(block *Block) ([]byte, t func (pv *ThresholdValidator) SignBlock(chainID string, block *Block) ([]byte, time.Time, error) { height, round, step, stamp, signBytes := block.Height, block.Round, block.Step, block.Timestamp, block.SignBytes + timeStartSignBlock := time.Now() + // Only the leader can execute this function. Followers can handle the requests, // but they just need to proxy the request to the raft leader if pv.raftStore.raft == nil { @@ -297,6 +301,7 @@ func (pv *ThresholdValidator) SignBlock(chainID string, block *Block) ([]byte, t } if pv.raftStore.raft.State() != raft.Leader { pv.logger.Debug("I am not the raft leader. Proxying request to the leader") + totalNotRaftLeader.Inc() signRes, err := pv.raftStore.LeaderSignBlock(CosignerSignBlockRequest{chainID, block}) if err != nil { if _, ok := err.(*rpcTypes.RPCError); ok { @@ -311,6 +316,7 @@ func (pv *ThresholdValidator) SignBlock(chainID string, block *Block) ([]byte, t return signRes.Signature, stamp, nil } + totalRaftLeader.Inc() pv.logger.Debug("I am the raft leader. Managing the sign process for this block") hrst := HRSTKey{ @@ -394,6 +400,7 @@ func (pv *ThresholdValidator) SignBlock(chainID string, block *Block) ([]byte, t encryptedEphemeralSharesThresholdMap[pv.cosigner] = ourEphemeralSecretParts.EncryptedSecrets thresholdPeersMutex.Unlock() + timedSignBlockThresholdLag.Observe(time.Since(timeStartSignBlock).Seconds()) pv.logger.Debug("Have threshold peers") setEphemeralAndSignWaitGroup := sync.WaitGroup{} @@ -421,6 +428,7 @@ func (pv *ThresholdValidator) SignBlock(chainID string, block *Block) ([]byte, t return nil, stamp, errors.New("timed out waiting for peers to sign") } + timedSignBlockCosignerLag.Observe(time.Since(timeStartSignBlock).Seconds()) pv.logger.Debug("Done waiting for cosigners, assembling signatures") // collect all valid responses into array of ids and signatures for the threshold lib @@ -438,6 +446,7 @@ func (pv *ThresholdValidator) SignBlock(chainID string, block *Block) ([]byte, t } if len(sigIds) < pv.threshold { + totalInsufficientCosigners.Inc() return nil, stamp, errors.New("not enough co-signers") } @@ -449,6 +458,7 @@ func (pv *ThresholdValidator) SignBlock(chainID string, block *Block) ([]byte, t // verify the combined signature before saving to watermark if !pv.pubkey.VerifySignature(signBytes, signature) { + totalInvalidSignature.Inc() return nil, stamp, errors.New("combined signature is not valid") } @@ -473,5 +483,8 @@ func (pv *ThresholdValidator) SignBlock(chainID string, block *Block) ([]byte, t pv.logger.Error("Error emitting LSS", err.Error()) } + timeSignBlock := time.Since(timeStartSignBlock).Seconds() + timedSignBlockLag.Observe(timeSignBlock) + return signature, stamp, nil } From 9b995acb5f5da95d51a551d46168131c38a8b44d Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Sun, 4 Sep 2022 08:19:42 +0200 Subject: [PATCH 13/44] Adjust style for linter --- cmd/horcrux/cmd/config.go | 2 +- cmd/horcrux/cmd/metrics.go | 12 +++++++++++- signer/local_cosigner.go | 3 ++- signer/metrics.go | 9 +++------ 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/cmd/horcrux/cmd/config.go b/cmd/horcrux/cmd/config.go index 3a664f72..dd40eda8 100644 --- a/cmd/horcrux/cmd/config.go +++ b/cmd/horcrux/cmd/config.go @@ -492,7 +492,7 @@ type DiskConfig struct { ChainID string `json:"chain-id" yaml:"chain-id"` CosignerConfig *CosignerConfig `json:"cosigner,omitempty" yaml:"cosigner,omitempty"` ChainNodes []ChainNode `json:"chain-nodes,omitempty" yaml:"chain-nodes,omitempty"` - PrometheusListenAddress string `json:"prometheus-listen-address,omitempty" yaml:"prometheus-listen-address,omitempty"` + PrometheusListenAddress string `json:"prometheus-listen-address,omitempty" yaml:"prometheus-listen-address,omitempty"` //nolint } func (c *DiskConfig) Nodes() []signer.NodeConfig { diff --git a/cmd/horcrux/cmd/metrics.go b/cmd/horcrux/cmd/metrics.go index de36ca54..14007062 100644 --- a/cmd/horcrux/cmd/metrics.go +++ b/cmd/horcrux/cmd/metrics.go @@ -4,6 +4,7 @@ import ( "fmt" "net/http" "os" + "time" "github.com/prometheus/client_golang/prometheus/promhttp" tmlog "github.com/tendermint/tendermint/libs/log" @@ -18,7 +19,16 @@ func StartMetrics() { } logger.Info("Prometheus Metrics Listening", "address", config.Config.PrometheusListenAddress) http.Handle("/metrics", promhttp.Handler()) - if err := http.ListenAndServe(config.Config.PrometheusListenAddress, nil); err != nil { + + srv := &http.Server{ + Addr: config.Config.PrometheusListenAddress, + ReadTimeout: 1 * time.Second, + WriteTimeout: 1 * time.Second, + IdleTimeout: 30 * time.Second, + ReadHeaderTimeout: 2 * time.Second, + } + + if err := srv.ListenAndServe(); err != nil { logger.Error(fmt.Sprintf("Prometheus Endpoint failed to start: %s", err)) } } diff --git a/signer/local_cosigner.go b/signer/local_cosigner.go index f6ca010b..26f47151 100644 --- a/signer/local_cosigner.go +++ b/signer/local_cosigner.go @@ -250,7 +250,8 @@ func (cosigner *LocalCosigner) sign(req CosignerSignRequest) (CosignerSignRespon res.EphemeralPublic = ephemeralPublic res.Signature = sig - previousLocalSignFinishTime = time.Now() // Note - Function may return before this line so elapsed time for Finish may be multiple block times + // Note - Function may return before this line so elapsed time for Finish may be multiple block times + previousLocalSignFinishTime = time.Now() return res, nil } diff --git a/signer/metrics.go b/signer/metrics.go index 4ae76a00..091a23f1 100644 --- a/signer/metrics.go +++ b/signer/metrics.go @@ -75,12 +75,13 @@ var ( }) secondsSinceLastLocalSignFinish = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_seconds_since_last_local_sign_finish_time", - Help: "Seconds Since Last Local Finish Sign (May increase to about 2 * Block Time; If high, CoSigner is not signing) ", + Help: "Seconds Since Last Local Finish Sign (Should stay below 2 * Block Time)", }) secondsSinceLastLocalEphemeralShareTime = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_seconds_since_last_local_ephemeral_share_time", - Help: "Seconds Since Last Local Ephemeral Share Sign (Should not increase beyond block time; If high, may indicate raft joining issue for CoSigner) ", + Help: "Seconds Since Last Local Ephemeral Share Sign " + + "(Should not increase beyond block time; If high, may indicate raft joining issue for CoSigner) ", }) missedPrecommits = promauto.NewGauge(prometheus.GaugeOpts{ @@ -114,10 +115,6 @@ var ( Help: "Total Times Signer Failed to sign block - Unstarted and Unexepcted Height", }) - flagRaftLeader = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "signer_is_raft_leader", - Help: "Signer is Raft Leader", - }) totalRaftLeader = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_raft_leader", Help: "Total Times Signer is Raft Leader", From c7d9c9ca7729d43650eeea461e26a6af72ecd56c Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Sun, 4 Sep 2022 08:45:03 +0200 Subject: [PATCH 14/44] Prevent sentry crash loop using shorter dials --- signer/remote_signer.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/signer/remote_signer.go b/signer/remote_signer.go index dd2295f8..c50cc178 100644 --- a/signer/remote_signer.go +++ b/signer/remote_signer.go @@ -261,7 +261,10 @@ func StartRemoteSigners(services []tmService.Service, logger tmLog.Logger, chain var err error go StartMetrics() for _, node := range nodes { - dialer := net.Dialer{Timeout: 30 * time.Second} + // Tendermint requires a connection within 3 seconds of start or crashes + // A long timeout such as 30 seconds would cause the sentry to fail in loops + // Use a short timeout and dial often to connect within 3 second window + dialer := net.Dialer{Timeout: 2 * time.Second} s := NewReconnRemoteSigner(node.Address, logger, chainID, privVal, dialer) err = s.Start() From 0e4fc08b790be41540958036f1f7c0e86dac8ee7 Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Sun, 18 Sep 2022 14:27:34 +0200 Subject: [PATCH 15/44] Add raft metrics and count missing ephemeral shares --- cmd/horcrux/cmd/metrics.go | 21 +++++++++++++++++++++ signer/metrics.go | 22 +++++++++++++++++++++- signer/raft_events.go | 1 + signer/remote_signer.go | 3 ++- signer/threshold_validator.go | 7 +++++++ 5 files changed, 52 insertions(+), 2 deletions(-) diff --git a/cmd/horcrux/cmd/metrics.go b/cmd/horcrux/cmd/metrics.go index 14007062..3c05fc34 100644 --- a/cmd/horcrux/cmd/metrics.go +++ b/cmd/horcrux/cmd/metrics.go @@ -6,6 +6,9 @@ import ( "os" "time" + "github.com/armon/go-metrics" + gmprometheus "github.com/armon/go-metrics/prometheus" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" tmlog "github.com/tendermint/tendermint/libs/log" ) @@ -13,6 +16,24 @@ import ( func StartMetrics() { logger := tmlog.NewTMLogger(tmlog.NewSyncWriter(os.Stdout)).With("module", "metrics") + // Add raft metrics to prometheus + enableRaftMetrics := true + if enableRaftMetrics { + // PrometheusSink config w/ definitions for each metric type + cfg := gmprometheus.DefaultPrometheusOpts + sink, err := gmprometheus.NewPrometheusSinkFrom(cfg) + if err != nil { + logger.Error("Could not configure Raft Metrics") + } + defer prometheus.Unregister(sink) + _, err = metrics.NewGlobal(metrics.DefaultConfig("horcrux"), sink) + if err != nil { + logger.Error("Could not add Raft Metrics") + } + } + + // Configure Prometheus HTTP Server and Handler + if len(config.Config.PrometheusListenAddress) == 0 { logger.Error("prometheus-listen-address not defined") return diff --git a/signer/metrics.go b/signer/metrics.go index 091a23f1..cfaf1cf0 100644 --- a/signer/metrics.go +++ b/signer/metrics.go @@ -101,6 +101,21 @@ var ( Help: "Total Prevote Missed", }) + missedEphemeralShares = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "signer_missed_ephemeral_shares", + Help: "Consecutive Threshold Signature Parts Missed", + }, + []string{"peerid"}, + ) + totalMissedEphemeralShares = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "signer_total_missed_ephemeral_shares", + Help: "Total Threshold Signature Parts Missed", + }, + []string{"peerid"}, + ) + totalSentryConnectTries = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_sentry_connect_tries", Help: "Total Number of times sentry TCP connect has been tried (High count may indicate validator restarts)", @@ -123,6 +138,10 @@ var ( Name: "signer_total_raft_not_leader", Help: "Total Times Signer is NOT Raft Leader (Proxy signing to Raft Leader)", }) + totalRaftLeaderElectiontimeout = promauto.NewCounter(prometheus.CounterOpts{ + Name: "signer_total_raft_leader_election_timeout", + Help: "Total Times Raft Leader Failed Election (Lacking Peers)", + }) totalInvalidSignature = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_error_total_invalid_signatures", @@ -164,11 +183,12 @@ var ( func StartMetrics() { for { + // Update elapsed times on an interval basis secondsSinceLastPrecommit.Set(time.Since(previousPrecommitTime).Seconds()) secondsSinceLastPrevote.Set(time.Since(previousPrevoteTime).Seconds()) secondsSinceLastLocalSignStart.Set(time.Since(previousLocalSignStartTime).Seconds()) secondsSinceLastLocalSignFinish.Set(time.Since(previousLocalSignFinishTime).Seconds()) secondsSinceLastLocalEphemeralShareTime.Set(time.Since(previousLocalEphemeralShareTime).Seconds()) - <-time.After(250 * time.Millisecond) + <-time.After(100 * time.Millisecond) } } diff --git a/signer/raft_events.go b/signer/raft_events.go index a4e9aa75..f438a5fb 100644 --- a/signer/raft_events.go +++ b/signer/raft_events.go @@ -46,6 +46,7 @@ func (s *RaftStore) getLeaderGRPCClient() (proto.CosignerGRPCClient, *grpc.Clien time.Sleep(100 * time.Millisecond) } if leader == "" { + totalRaftLeaderElectiontimeout.Inc() return nil, nil, errors.New("timed out waiting for leader election to complete") } conn, err := grpc.Dial(leader, grpc.WithTransportCredentials(insecure.NewCredentials())) diff --git a/signer/remote_signer.go b/signer/remote_signer.go index c50cc178..5ed7bf60 100644 --- a/signer/remote_signer.go +++ b/signer/remote_signer.go @@ -174,6 +174,7 @@ func (rs *ReconnRemoteSigner) handleSignVoteRequest(vote *tmProto.Vote) tmProtoP totalPrecommitsSigned.Inc() } if vote.Type == tmProto.PrevoteType { + // Determine number of heights since the last Prevote stepSize := vote.Height - previousPrevoteHeight if previousPrevoteHeight != 0 && stepSize > 1 { missedPrevotes.Add(float64(stepSize)) @@ -182,7 +183,7 @@ func (rs *ReconnRemoteSigner) handleSignVoteRequest(vote *tmProto.Vote) tmProtoP missedPrevotes.Set(0) } - previousPrevoteHeight = vote.Height + previousPrevoteHeight = vote.Height // remember last PrevoteHeight previousPrevoteTime = time.Now() lastPrevoteHeight.Set(float64(vote.Height)) diff --git a/signer/threshold_validator.go b/signer/threshold_validator.go index 2b378c40..2fc75bfd 100644 --- a/signer/threshold_validator.go +++ b/signer/threshold_validator.go @@ -178,9 +178,16 @@ func (pv *ThresholdValidator) waitForPeerEphemeralShares( ) { ephemeralSecretParts, err := peer.GetEphemeralSecretParts(hrst) if err != nil { + + // Significant missing shares may lead to signature failure + missedEphemeralShares.WithLabelValues(peer.GetAddress()).Add(float64(1)) + totalMissedEphemeralShares.WithLabelValues(peer.GetAddress()).Inc() pv.logger.Error("Error getting secret parts", "peer", peer.GetID(), "err", err) return } + // Significant missing shares may lead to signature failure + missedEphemeralShares.WithLabelValues(peer.GetAddress()).Set(0) + // Check so that getEphemeralWaitGroup.Done is not called more than (threshold - 1) times which causes hardlock thresholdPeersMutex.Lock() if len(*encryptedEphemeralSharesThresholdMap) < pv.threshold-1 { From 7f6b4f8b5f9f65a491e3d7f5814e14a98a8dbea8 Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Sun, 18 Sep 2022 14:45:28 +0200 Subject: [PATCH 16/44] Add missing ephemeral share metrics documentation --- docs/metrics.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/metrics.md b/docs/metrics.md index a073c2e1..a528668a 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -29,6 +29,18 @@ chain-nodes: prometheus-listen-address: 0.0.0.0:6001 ``` +## Prometheus Cautions + +Prometheus scrapes data every minute by default which is not fast enough to log metrics which change on a fast interval. + +Set the scrape_interval between 1 and 3 seconds in prometheus.yml if you wish to log/monitor these metrics. Note this will take more disk space. + +``` +global: + scrape_interval: 3s +``` + + ## Watching Single Signers Single node signers don't execute any cosigner code, so the basic metrics are: @@ -47,9 +59,16 @@ If there are skips in the block heights requested to be signed the following cou Watch 'signer_total_sentry_connect_tries' which reports retry connects to the specified sentry. Any increase is an indicator of network or sentry process failure +## Watching Cosigner With Grafana + +A sample Grapfana configration is available. See [`horcrux.json`](https://github.com/chillyvee/horcrux-info/blob/master/grafana/horcrux.json) + + ## Watching For Cosigner Trouble Metrics may vary between Cosigner processes since there is only one leader. +Watch 'signer_missed_ephemeral_shares' which will note when the leader is not able to get a signature from the peer. If 'signer_total_missed_ephemeral_shares' increases to a high number, this may indicate a larger issue. + Each block, Ephemeral Secrets are shared between Cosigners. Monitoring 'signer_seconds_since_last_local_ephemeral_share_time' and ensuring it does not exceed the block time will allow you to know when a Cosigner was not contacted for a block. ## Metrics that don't always correspond to block time From 958bf416bb7bad47ce4e173a07869e19455dbd8d Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Sun, 18 Sep 2022 15:04:49 +0200 Subject: [PATCH 17/44] Add consecutive sentry conntect tries --- docs/metrics.md | 4 +++- signer/metrics.go | 4 ++++ signer/remote_signer.go | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/metrics.md b/docs/metrics.md index a528668a..c6f12e20 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -57,7 +57,9 @@ If there are skips in the block heights requested to be signed the following cou ## Watching Sentry Failure -Watch 'signer_total_sentry_connect_tries' which reports retry connects to the specified sentry. Any increase is an indicator of network or sentry process failure +Watch 'signer_sentry_connect_tries' for any increase which indicates retry attempts to reach your sentry. + +If 'signer_total_sentry_connect_tries' is significant, it can indicate network or server issues. ## Watching Cosigner With Grafana diff --git a/signer/metrics.go b/signer/metrics.go index cfaf1cf0..7ac9a583 100644 --- a/signer/metrics.go +++ b/signer/metrics.go @@ -116,6 +116,10 @@ var ( []string{"peerid"}, ) + sentryConnectTries = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "signer_sentry_connect_tries", + Help: "Consecutive Number of times sentry TCP connect has been tried (High count may indicate validator restarts)", + }) totalSentryConnectTries = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_sentry_connect_tries", Help: "Total Number of times sentry TCP connect has been tried (High count may indicate validator restarts)", diff --git a/signer/remote_signer.go b/signer/remote_signer.go index 5ed7bf60..b47a91d2 100644 --- a/signer/remote_signer.go +++ b/signer/remote_signer.go @@ -77,12 +77,14 @@ func (rs *ReconnRemoteSigner) loop() { proto, address := tmNet.ProtocolAndAddress(rs.address) netConn, err := rs.dialer.Dial(proto, address) if err != nil { + sentryConnectTries.Add(float64(1)) totalSentryConnectTries.Inc() rs.Logger.Error("Dialing", "err", err) rs.Logger.Info("Retrying", "sleep (s)", 3, "address", rs.address) time.Sleep(time.Second * 3) continue } + sentryConnectTries.Set(0) rs.Logger.Info("Connected to Sentry", "address", rs.address) conn, err = tmP2pConn.MakeSecretConnection(netConn, rs.privKey) From 80970ca793afb7462ced1a3d30b7bff4192289bd Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Tue, 27 Sep 2022 13:26:20 +0200 Subject: [PATCH 18/44] Add pprof debug server --- cmd/horcrux/cmd/config.go | 26 ++++++++++----------- cmd/horcrux/cmd/cosigner.go | 2 +- cmd/horcrux/cmd/metrics.go | 46 +++++++++++++++++++++++++++++-------- cmd/horcrux/cmd/signer.go | 2 +- 4 files changed, 52 insertions(+), 24 deletions(-) diff --git a/cmd/horcrux/cmd/config.go b/cmd/horcrux/cmd/config.go index dd40eda8..69f8fe28 100644 --- a/cmd/horcrux/cmd/config.go +++ b/cmd/horcrux/cmd/config.go @@ -74,7 +74,7 @@ func initCmd() *cobra.Command { if keyFileFlag != "" { keyFile = &keyFileFlag } - prometheusListenAddress, _ := cmdFlags.GetString("metrics") + debugListenAddress, _ := cmdFlags.GetString("debuglisten") if cs { // Cosigner Config p, _ := cmdFlags.GetString("peers") @@ -111,8 +111,8 @@ func initCmd() *cobra.Command { Peers: peers, Timeout: timeout, }, - ChainNodes: cn, - PrometheusListenAddress: prometheusListenAddress, + ChainNodes: cn, + DebugListenAddress: debugListenAddress, } if err = validateCosignerConfig(cfg); err != nil { return err @@ -124,10 +124,10 @@ func initCmd() *cobra.Command { } prometheusListenAddress, _ := cmdFlags.GetString("metrics") cfg = DiskConfig{ - PrivValKeyFile: keyFile, - ChainID: cid, - ChainNodes: cn, - PrometheusListenAddress: prometheusListenAddress, + PrivValKeyFile: keyFile, + ChainID: cid, + ChainNodes: cn, + DebugListenAddress: prometheusListenAddress, } if err = validateSingleSignerConfig(cfg); err != nil { return err @@ -168,7 +168,7 @@ func initCmd() *cobra.Command { "(i.e. \"tcp://node-1:2222|2,tcp://node-2:2222|3\")") cmd.Flags().IntP("threshold", "t", 0, "indicate number of signatures required for threshold signature") cmd.Flags().StringP("listen", "l", "", "listen address of the signer") - cmd.Flags().StringP("metrics", "m", "", "listen address for prometheus metrics") + cmd.Flags().StringP("debuglisten", "d", "", "listen address for Debug and Prometheus metrics") cmd.Flags().StringP("keyfile", "k", "", "priv val key file path (full key for single signer, or key share for cosigner)") cmd.Flags().String("timeout", "1500ms", "configure cosigner rpc server timeout value, \n"+ @@ -488,11 +488,11 @@ func setChainIDCmd() *cobra.Command { // Config maps to the on-disk JSON format type DiskConfig struct { - PrivValKeyFile *string `json:"key-file,omitempty" yaml:"key-file,omitempty"` - ChainID string `json:"chain-id" yaml:"chain-id"` - CosignerConfig *CosignerConfig `json:"cosigner,omitempty" yaml:"cosigner,omitempty"` - ChainNodes []ChainNode `json:"chain-nodes,omitempty" yaml:"chain-nodes,omitempty"` - PrometheusListenAddress string `json:"prometheus-listen-address,omitempty" yaml:"prometheus-listen-address,omitempty"` //nolint + PrivValKeyFile *string `json:"key-file,omitempty" yaml:"key-file,omitempty"` + ChainID string `json:"chain-id" yaml:"chain-id"` + CosignerConfig *CosignerConfig `json:"cosigner,omitempty" yaml:"cosigner,omitempty"` + ChainNodes []ChainNode `json:"chain-nodes,omitempty" yaml:"chain-nodes,omitempty"` + DebugListenAddress string `json:"debug-listen-address,omitempty" yaml:"debug-listen-address,omitempty"` //nolint } func (c *DiskConfig) Nodes() []signer.NodeConfig { diff --git a/cmd/horcrux/cmd/cosigner.go b/cmd/horcrux/cmd/cosigner.go index 803c2dec..5967e350 100644 --- a/cmd/horcrux/cmd/cosigner.go +++ b/cmd/horcrux/cmd/cosigner.go @@ -239,7 +239,7 @@ func StartCosignerCmd() *cobra.Command { } logger.Info("Signer", "address", pubkey.Address()) - go StartMetrics() + go EnableDebugAndMetrics() services, err = signer.StartRemoteSigners(services, logger, cfg.ChainID, pv, cfg.Nodes) if err != nil { diff --git a/cmd/horcrux/cmd/metrics.go b/cmd/horcrux/cmd/metrics.go index 3c05fc34..0332aa73 100644 --- a/cmd/horcrux/cmd/metrics.go +++ b/cmd/horcrux/cmd/metrics.go @@ -3,6 +3,7 @@ package cmd import ( "fmt" "net/http" + "net/http/pprof" "os" "time" @@ -13,7 +14,7 @@ import ( tmlog "github.com/tendermint/tendermint/libs/log" ) -func StartMetrics() { +func AddPrometheusMetrics(mux *http.ServeMux) { logger := tmlog.NewTMLogger(tmlog.NewSyncWriter(os.Stdout)).With("module", "metrics") // Add raft metrics to prometheus @@ -32,24 +33,51 @@ func StartMetrics() { } } - // Configure Prometheus HTTP Server and Handler + mux.Handle("/metrics", promhttp.Handler()) + logger.Info("Prometheus Metrics Listening", "address", config.Config.DebugListenAddress, "path", "/metrics") +} + +// EnableDebugAndMetrics - Initialization errors are not fatal, only logged +func EnableDebugAndMetrics() { + logger := tmlog.NewTMLogger(tmlog.NewSyncWriter(os.Stdout)).With("module", "debugserver") - if len(config.Config.PrometheusListenAddress) == 0 { - logger.Error("prometheus-listen-address not defined") + // Configure Shared Debug HTTP Server for pprof and prometheus + if len(config.Config.DebugListenAddress) == 0 { + logger.Error("debug-listen-address not defined") return } - logger.Info("Prometheus Metrics Listening", "address", config.Config.PrometheusListenAddress) - http.Handle("/metrics", promhttp.Handler()) + logger.Info("Debug Server Listening", "address", config.Config.DebugListenAddress) + + // Set up new mux identical to the default mux configuration in net/http/pprof. + mux := http.NewServeMux() + mux.HandleFunc("/debug/pprof/", pprof.Index) + mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + mux.HandleFunc("/debug/pprof/profile", pprof.Profile) + mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + mux.HandleFunc("/debug/pprof/trace", pprof.Trace) + + // And redirect the browser to the /debug/pprof root, + // so operators don't see a mysterious 404 page. + mux.Handle("/", http.RedirectHandler("/debug/pprof", http.StatusSeeOther)) + // Add prometheus metrics + AddPrometheusMetrics(mux) + + // Configure Debug Server Network Parameters srv := &http.Server{ - Addr: config.Config.PrometheusListenAddress, + Handler: mux, + //ErrorLog: &logger, + + Addr: config.Config.DebugListenAddress, ReadTimeout: 1 * time.Second, - WriteTimeout: 1 * time.Second, + WriteTimeout: 30 * time.Second, IdleTimeout: 30 * time.Second, ReadHeaderTimeout: 2 * time.Second, } + // Start Debug Server. if err := srv.ListenAndServe(); err != nil { - logger.Error(fmt.Sprintf("Prometheus Endpoint failed to start: %s", err)) + logger.Error(fmt.Sprintf("Debug Endpoint failed to start: %s", err)) + return } } diff --git a/cmd/horcrux/cmd/signer.go b/cmd/horcrux/cmd/signer.go index 7e40ce2a..6d1738a3 100644 --- a/cmd/horcrux/cmd/signer.go +++ b/cmd/horcrux/cmd/signer.go @@ -72,7 +72,7 @@ func StartSignerCmd() *cobra.Command { } logger.Info("Signer", "pubkey", pubkey) - go StartMetrics() + go EnableDebugAndMetrics() services, err = signer.StartRemoteSigners(services, logger, cfg.ChainID, pv, cfg.Nodes) if err != nil { From 399e454c196b8b2c9de3033b0a70b5cc05bfaf13 Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Tue, 27 Sep 2022 13:32:27 +0200 Subject: [PATCH 19/44] Update documentation for shared debug-listen-address --- docs/metrics.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/metrics.md b/docs/metrics.md index c6f12e20..fdbd3f4c 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -8,7 +8,7 @@ horcrux ..options.. -m 0.0.0.0:8001 For earlier adopters, add the following key to your config.toml -prometheus-listen-address: 0.0.0.0:6001 +debug-listen-address: 0.0.0.0:6001 Resulting in a configuration like the following: @@ -26,7 +26,7 @@ cosigner: rpc-timeout: 1500ms chain-nodes: - priv-val-addr: tcp://localhost:2300 -prometheus-listen-address: 0.0.0.0:6001 +debug-listen-address: 0.0.0.0:6001 ``` ## Prometheus Cautions From 4a3c092e6e02caa0c561f0efb63dd1e46b9ac0cd Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Tue, 27 Sep 2022 18:42:23 +0200 Subject: [PATCH 20/44] Monitor Ephemeral Share Lag --- signer/metrics.go | 8 ++++++++ signer/threshold_validator.go | 2 ++ 2 files changed, 10 insertions(+) diff --git a/signer/metrics.go b/signer/metrics.go index 7ac9a583..017fc680 100644 --- a/signer/metrics.go +++ b/signer/metrics.go @@ -175,6 +175,14 @@ var ( Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, }) + timedCosignerEphemeralShareLag = promauto.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "signer_cosigner_ephemeral_share_lag_seconds", + Help: "Time taken to get cosigner ephemeral share", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }, + []string{"peerid"}, + ) timedCosignerSignLag = promauto.NewSummaryVec( prometheus.SummaryOpts{ Name: "signer_cosigner_sign_lag_seconds", diff --git a/signer/threshold_validator.go b/signer/threshold_validator.go index 2fc75bfd..cc81a36a 100644 --- a/signer/threshold_validator.go +++ b/signer/threshold_validator.go @@ -176,6 +176,7 @@ func (pv *ThresholdValidator) waitForPeerEphemeralShares( encryptedEphemeralSharesThresholdMap *map[Cosigner][]CosignerEphemeralSecretPart, thresholdPeersMutex *sync.Mutex, ) { + peerStartTime := time.Now() ephemeralSecretParts, err := peer.GetEphemeralSecretParts(hrst) if err != nil { @@ -187,6 +188,7 @@ func (pv *ThresholdValidator) waitForPeerEphemeralShares( } // Significant missing shares may lead to signature failure missedEphemeralShares.WithLabelValues(peer.GetAddress()).Set(0) + timedCosignerEphemeralShareLag.WithLabelValues(peer.GetAddress()).Observe(time.Since(peerStartTime).Seconds()) // Check so that getEphemeralWaitGroup.Done is not called more than (threshold - 1) times which causes hardlock thresholdPeersMutex.Lock() From b41116b3807d9de064d39913cf9fd25939780558 Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Wed, 28 Sep 2022 12:20:03 +0200 Subject: [PATCH 21/44] Overall code cleanup for PR - flag consistency, variable renaming --- cmd/horcrux/cmd/config.go | 6 +++--- cmd/horcrux/cmd/metrics.go | 28 ++++++++++++---------------- docs/metrics.md | 2 +- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/cmd/horcrux/cmd/config.go b/cmd/horcrux/cmd/config.go index 69f8fe28..d9ae28b3 100644 --- a/cmd/horcrux/cmd/config.go +++ b/cmd/horcrux/cmd/config.go @@ -122,12 +122,12 @@ func initCmd() *cobra.Command { if len(cn) == 0 { return fmt.Errorf("must input at least one node") } - prometheusListenAddress, _ := cmdFlags.GetString("metrics") + debugListenAddress, _ := cmdFlags.GetString("debuglisten") cfg = DiskConfig{ PrivValKeyFile: keyFile, ChainID: cid, ChainNodes: cn, - DebugListenAddress: prometheusListenAddress, + DebugListenAddress: debugListenAddress, } if err = validateSingleSignerConfig(cfg); err != nil { return err @@ -168,7 +168,7 @@ func initCmd() *cobra.Command { "(i.e. \"tcp://node-1:2222|2,tcp://node-2:2222|3\")") cmd.Flags().IntP("threshold", "t", 0, "indicate number of signatures required for threshold signature") cmd.Flags().StringP("listen", "l", "", "listen address of the signer") - cmd.Flags().StringP("debuglisten", "d", "", "listen address for Debug and Prometheus metrics") + cmd.Flags().StringP("debuglisten", "d", "", "listen address for Debug and Prometheus metrics in format localhost:8543") cmd.Flags().StringP("keyfile", "k", "", "priv val key file path (full key for single signer, or key share for cosigner)") cmd.Flags().String("timeout", "1500ms", "configure cosigner rpc server timeout value, \n"+ diff --git a/cmd/horcrux/cmd/metrics.go b/cmd/horcrux/cmd/metrics.go index 0332aa73..2fc6bc97 100644 --- a/cmd/horcrux/cmd/metrics.go +++ b/cmd/horcrux/cmd/metrics.go @@ -9,7 +9,6 @@ import ( "github.com/armon/go-metrics" gmprometheus "github.com/armon/go-metrics/prometheus" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" tmlog "github.com/tendermint/tendermint/libs/log" ) @@ -17,20 +16,17 @@ import ( func AddPrometheusMetrics(mux *http.ServeMux) { logger := tmlog.NewTMLogger(tmlog.NewSyncWriter(os.Stdout)).With("module", "metrics") - // Add raft metrics to prometheus - enableRaftMetrics := true - if enableRaftMetrics { - // PrometheusSink config w/ definitions for each metric type - cfg := gmprometheus.DefaultPrometheusOpts - sink, err := gmprometheus.NewPrometheusSinkFrom(cfg) - if err != nil { - logger.Error("Could not configure Raft Metrics") - } - defer prometheus.Unregister(sink) - _, err = metrics.NewGlobal(metrics.DefaultConfig("horcrux"), sink) - if err != nil { - logger.Error("Could not add Raft Metrics") - } + // Add metrics from raft's implementation of go-metrics + cfg := gmprometheus.DefaultPrometheusOpts + sink, err := gmprometheus.NewPrometheusSinkFrom(cfg) + if err != nil { + logger.Error("Could not configure Raft Metrics") + panic(err) + } + _, err = metrics.NewGlobal(metrics.DefaultConfig("horcrux"), sink) + if err != nil { + logger.Error("Could not add Raft Metrics") + panic(err) } mux.Handle("/metrics", promhttp.Handler()) @@ -43,7 +39,7 @@ func EnableDebugAndMetrics() { // Configure Shared Debug HTTP Server for pprof and prometheus if len(config.Config.DebugListenAddress) == 0 { - logger.Error("debug-listen-address not defined") + logger.Info("debug-listen-address not defined; debug server disabled") return } logger.Info("Debug Server Listening", "address", config.Config.DebugListenAddress) diff --git a/docs/metrics.md b/docs/metrics.md index fdbd3f4c..2116710f 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -63,7 +63,7 @@ If 'signer_total_sentry_connect_tries' is significant, it can indicate network o ## Watching Cosigner With Grafana -A sample Grapfana configration is available. See [`horcrux.json`](https://github.com/chillyvee/horcrux-info/blob/master/grafana/horcrux.json) +A sample Grafana configration is available. See [`horcrux.json`](https://github.com/chillyvee/horcrux-info/blob/master/grafana/horcrux.json) ## Watching For Cosigner Trouble From 727d1d353eb0015a11f89798cbb55b34b99a34d6 Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Wed, 28 Sep 2022 12:41:53 +0200 Subject: [PATCH 22/44] Add mutex for metrics timer updates --- signer/local_cosigner.go | 8 ++++++++ signer/metrics.go | 11 ++++++++++- signer/remote_signer.go | 9 ++++++++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/signer/local_cosigner.go b/signer/local_cosigner.go index 26f47151..95940051 100644 --- a/signer/local_cosigner.go +++ b/signer/local_cosigner.go @@ -156,7 +156,9 @@ func (cosigner *LocalCosigner) GetAddress() string { // Return the signed bytes or an error // Implements Cosigner interface func (cosigner *LocalCosigner) sign(req CosignerSignRequest) (CosignerSignResponse, error) { + metricsPeriodicUpdateMutex.Lock() previousLocalSignStartTime = time.Now() // This function has multiple exit points. Only start time can be guaranteed + metricsPeriodicUpdateMutex.Unlock() cosigner.lastSignStateMutex.Lock() defer cosigner.lastSignStateMutex.Unlock() @@ -251,7 +253,10 @@ func (cosigner *LocalCosigner) sign(req CosignerSignRequest) (CosignerSignRespon res.Signature = sig // Note - Function may return before this line so elapsed time for Finish may be multiple block times + metricsPeriodicUpdateMutex.Lock() previousLocalSignFinishTime = time.Now() + metricsPeriodicUpdateMutex.Unlock() + return res, nil } @@ -291,7 +296,10 @@ func (cosigner *LocalCosigner) dealShares(req CosignerGetEphemeralSecretPartRequ func (cosigner *LocalCosigner) GetEphemeralSecretParts( hrst HRSTKey) (*CosignerEphemeralSecretPartsResponse, error) { + metricsPeriodicUpdateMutex.Lock() previousLocalEphemeralShareTime = time.Now() + metricsPeriodicUpdateMutex.Unlock() + res := &CosignerEphemeralSecretPartsResponse{ EncryptedSecrets: make([]CosignerEphemeralSecretPart, 0, len(cosigner.peers)-1), } diff --git a/signer/metrics.go b/signer/metrics.go index 017fc680..da2b793f 100644 --- a/signer/metrics.go +++ b/signer/metrics.go @@ -1,6 +1,7 @@ package signer import ( + "sync" "time" "github.com/prometheus/client_golang/prometheus" @@ -16,6 +17,7 @@ var ( previousLocalSignStartTime = time.Now() previousLocalSignFinishTime = time.Now() previousLocalEphemeralShareTime = time.Now() + metricsPeriodicUpdateMutex = sync.Mutex{} // Prometheus Metrics totalPubKeyRequests = promauto.NewCounter(prometheus.CounterOpts{ @@ -194,13 +196,20 @@ var ( ) func StartMetrics() { + // Update elapsed times on an interval basis for { - // Update elapsed times on an interval basis + // Use mutex to prevent race. Stats aligned with loop iteration + metricsPeriodicUpdateMutex.Lock() secondsSinceLastPrecommit.Set(time.Since(previousPrecommitTime).Seconds()) secondsSinceLastPrevote.Set(time.Since(previousPrevoteTime).Seconds()) secondsSinceLastLocalSignStart.Set(time.Since(previousLocalSignStartTime).Seconds()) secondsSinceLastLocalSignFinish.Set(time.Since(previousLocalSignFinishTime).Seconds()) secondsSinceLastLocalEphemeralShareTime.Set(time.Since(previousLocalEphemeralShareTime).Seconds()) + metricsPeriodicUpdateMutex.Unlock() + + // Prometheus often only polls every 1 to every few seconds + // Frequent updates minimize reporting error. + // Accuracy of 100ms is probably sufficient <-time.After(100 * time.Millisecond) } } diff --git a/signer/remote_signer.go b/signer/remote_signer.go index b47a91d2..dbee82b8 100644 --- a/signer/remote_signer.go +++ b/signer/remote_signer.go @@ -169,8 +169,12 @@ func (rs *ReconnRemoteSigner) handleSignVoteRequest(vote *tmProto.Vote) tmProtoP } else { missedPrecommits.Set(0) } - previousPrecommitHeight = vote.Height + previousPrecommitHeight = vote.Height // remember last PrecommitHeight + + metricsPeriodicUpdateMutex.Lock() previousPrecommitTime = time.Now() + metricsPeriodicUpdateMutex.Unlock() + lastPrecommitHeight.Set(float64(vote.Height)) lastPrecommitRound.Set(float64(vote.Round)) totalPrecommitsSigned.Inc() @@ -186,7 +190,10 @@ func (rs *ReconnRemoteSigner) handleSignVoteRequest(vote *tmProto.Vote) tmProtoP } previousPrevoteHeight = vote.Height // remember last PrevoteHeight + + metricsPeriodicUpdateMutex.Lock() previousPrevoteTime = time.Now() + metricsPeriodicUpdateMutex.Unlock() lastPrevoteHeight.Set(float64(vote.Height)) lastPrevoteRound.Set(float64(vote.Round)) From 85d0e8f86d5ff38a3093c378fa76c6d48aa9b9e5 Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Wed, 28 Sep 2022 16:23:17 +0200 Subject: [PATCH 23/44] Change from debug-listen-address to debug-addr to match relayer --- cmd/horcrux/cmd/config.go | 27 +++++++++++++-------------- cmd/horcrux/cmd/metrics.go | 10 +++++----- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/cmd/horcrux/cmd/config.go b/cmd/horcrux/cmd/config.go index d9ae28b3..8dc2d1dd 100644 --- a/cmd/horcrux/cmd/config.go +++ b/cmd/horcrux/cmd/config.go @@ -74,7 +74,7 @@ func initCmd() *cobra.Command { if keyFileFlag != "" { keyFile = &keyFileFlag } - debugListenAddress, _ := cmdFlags.GetString("debuglisten") + debugAddr, _ := cmdFlags.GetString("debug-addr") if cs { // Cosigner Config p, _ := cmdFlags.GetString("peers") @@ -111,8 +111,8 @@ func initCmd() *cobra.Command { Peers: peers, Timeout: timeout, }, - ChainNodes: cn, - DebugListenAddress: debugListenAddress, + ChainNodes: cn, + DebugAddr: debugAddr, } if err = validateCosignerConfig(cfg); err != nil { return err @@ -122,12 +122,11 @@ func initCmd() *cobra.Command { if len(cn) == 0 { return fmt.Errorf("must input at least one node") } - debugListenAddress, _ := cmdFlags.GetString("debuglisten") cfg = DiskConfig{ - PrivValKeyFile: keyFile, - ChainID: cid, - ChainNodes: cn, - DebugListenAddress: debugListenAddress, + PrivValKeyFile: keyFile, + ChainID: cid, + ChainNodes: cn, + DebugAddr: debugAddr, } if err = validateSingleSignerConfig(cfg); err != nil { return err @@ -168,7 +167,7 @@ func initCmd() *cobra.Command { "(i.e. \"tcp://node-1:2222|2,tcp://node-2:2222|3\")") cmd.Flags().IntP("threshold", "t", 0, "indicate number of signatures required for threshold signature") cmd.Flags().StringP("listen", "l", "", "listen address of the signer") - cmd.Flags().StringP("debuglisten", "d", "", "listen address for Debug and Prometheus metrics in format localhost:8543") + cmd.Flags().StringP("debug-addr", "d", "", "listen address for Debug and Prometheus metrics in format localhost:8543") cmd.Flags().StringP("keyfile", "k", "", "priv val key file path (full key for single signer, or key share for cosigner)") cmd.Flags().String("timeout", "1500ms", "configure cosigner rpc server timeout value, \n"+ @@ -488,11 +487,11 @@ func setChainIDCmd() *cobra.Command { // Config maps to the on-disk JSON format type DiskConfig struct { - PrivValKeyFile *string `json:"key-file,omitempty" yaml:"key-file,omitempty"` - ChainID string `json:"chain-id" yaml:"chain-id"` - CosignerConfig *CosignerConfig `json:"cosigner,omitempty" yaml:"cosigner,omitempty"` - ChainNodes []ChainNode `json:"chain-nodes,omitempty" yaml:"chain-nodes,omitempty"` - DebugListenAddress string `json:"debug-listen-address,omitempty" yaml:"debug-listen-address,omitempty"` //nolint + PrivValKeyFile *string `json:"key-file,omitempty" yaml:"key-file,omitempty"` + ChainID string `json:"chain-id" yaml:"chain-id"` + CosignerConfig *CosignerConfig `json:"cosigner,omitempty" yaml:"cosigner,omitempty"` + ChainNodes []ChainNode `json:"chain-nodes,omitempty" yaml:"chain-nodes,omitempty"` + DebugAddr string `json:"debug-addr,omitempty" yaml:"debug-addr,omitempty"` //nolint } func (c *DiskConfig) Nodes() []signer.NodeConfig { diff --git a/cmd/horcrux/cmd/metrics.go b/cmd/horcrux/cmd/metrics.go index 2fc6bc97..ba01af86 100644 --- a/cmd/horcrux/cmd/metrics.go +++ b/cmd/horcrux/cmd/metrics.go @@ -30,7 +30,7 @@ func AddPrometheusMetrics(mux *http.ServeMux) { } mux.Handle("/metrics", promhttp.Handler()) - logger.Info("Prometheus Metrics Listening", "address", config.Config.DebugListenAddress, "path", "/metrics") + logger.Info("Prometheus Metrics Listening", "address", config.Config.DebugAddr, "path", "/metrics") } // EnableDebugAndMetrics - Initialization errors are not fatal, only logged @@ -38,11 +38,11 @@ func EnableDebugAndMetrics() { logger := tmlog.NewTMLogger(tmlog.NewSyncWriter(os.Stdout)).With("module", "debugserver") // Configure Shared Debug HTTP Server for pprof and prometheus - if len(config.Config.DebugListenAddress) == 0 { - logger.Info("debug-listen-address not defined; debug server disabled") + if len(config.Config.DebugAddr) == 0 { + logger.Info("debug-addr not defined; debug server disabled") return } - logger.Info("Debug Server Listening", "address", config.Config.DebugListenAddress) + logger.Info("Debug Server Listening", "address", config.Config.DebugAddr) // Set up new mux identical to the default mux configuration in net/http/pprof. mux := http.NewServeMux() @@ -64,7 +64,7 @@ func EnableDebugAndMetrics() { Handler: mux, //ErrorLog: &logger, - Addr: config.Config.DebugListenAddress, + Addr: config.Config.DebugAddr, ReadTimeout: 1 * time.Second, WriteTimeout: 30 * time.Second, IdleTimeout: 30 * time.Second, From a11adc8a0085a8a942ac65bb8b080019a4d73f43 Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Wed, 28 Sep 2022 16:31:05 +0200 Subject: [PATCH 24/44] Adjust output for easier future testing as recommended by mark-rushakoff --- cmd/horcrux/cmd/state.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/horcrux/cmd/state.go b/cmd/horcrux/cmd/state.go index e2d9f40f..b7b4c37c 100644 --- a/cmd/horcrux/cmd/state.go +++ b/cmd/horcrux/cmd/state.go @@ -102,7 +102,8 @@ func setStateCmd() *cobra.Command { return err } - fmt.Printf("Setting height %d\n", height) + fmt.Fprintf(cmd.OutOrStdout(), "Setting height %d\n", height) + pv.EphemeralPublic, share.EphemeralPublic = nil, nil signState := signer.SignStateConsensus{ Height: height, From d862311147a6d1a77f9328e9ae7a22c79edad5af Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Wed, 28 Sep 2022 16:53:36 +0200 Subject: [PATCH 25/44] Move mutex protected time variables into dedicate struct --- signer/local_cosigner.go | 12 ++---- signer/metrics.go | 79 ++++++++++++++++++++++++++++++++-------- signer/remote_signer.go | 8 +--- 3 files changed, 68 insertions(+), 31 deletions(-) diff --git a/signer/local_cosigner.go b/signer/local_cosigner.go index 95940051..a67ebbbf 100644 --- a/signer/local_cosigner.go +++ b/signer/local_cosigner.go @@ -156,9 +156,7 @@ func (cosigner *LocalCosigner) GetAddress() string { // Return the signed bytes or an error // Implements Cosigner interface func (cosigner *LocalCosigner) sign(req CosignerSignRequest) (CosignerSignResponse, error) { - metricsPeriodicUpdateMutex.Lock() - previousLocalSignStartTime = time.Now() // This function has multiple exit points. Only start time can be guaranteed - metricsPeriodicUpdateMutex.Unlock() + metricsTimeKeeper.SetPreviousLocalSignStart(time.Now()) // This function has multiple exit points. Only start time can be guaranteed cosigner.lastSignStateMutex.Lock() defer cosigner.lastSignStateMutex.Unlock() @@ -253,9 +251,7 @@ func (cosigner *LocalCosigner) sign(req CosignerSignRequest) (CosignerSignRespon res.Signature = sig // Note - Function may return before this line so elapsed time for Finish may be multiple block times - metricsPeriodicUpdateMutex.Lock() - previousLocalSignFinishTime = time.Now() - metricsPeriodicUpdateMutex.Unlock() + metricsTimeKeeper.SetPreviousLocalSignFinish(time.Now()) return res, nil } @@ -296,9 +292,7 @@ func (cosigner *LocalCosigner) dealShares(req CosignerGetEphemeralSecretPartRequ func (cosigner *LocalCosigner) GetEphemeralSecretParts( hrst HRSTKey) (*CosignerEphemeralSecretPartsResponse, error) { - metricsPeriodicUpdateMutex.Lock() - previousLocalEphemeralShareTime = time.Now() - metricsPeriodicUpdateMutex.Unlock() + metricsTimeKeeper.SetPreviousLocalEphemeralShare(time.Now()) res := &CosignerEphemeralSecretPartsResponse{ EncryptedSecrets: make([]CosignerEphemeralSecretPart, 0, len(cosigner.peers)-1), diff --git a/signer/metrics.go b/signer/metrics.go index da2b793f..3e435947 100644 --- a/signer/metrics.go +++ b/signer/metrics.go @@ -8,16 +8,70 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" ) +type metricsTimer struct { + mu sync.Mutex + previousPrecommit, previousPrevote time.Time + previousLocalSignStart, previousLocalSignFinish time.Time + previousLocalEphemeralShare time.Time +} + +func newMetricsTimer() *metricsTimer { + now := time.Now() + return &metricsTimer{ + mu: sync.Mutex{}, + previousPrecommit: now, previousPrevote: now, + previousLocalSignStart: now, previousLocalSignFinish: now, + previousLocalEphemeralShare: now, + } +} + +func (mt *metricsTimer) SetPreviousPrecommit(t time.Time) { + mt.mu.Lock() + defer mt.mu.Unlock() + mt.previousPrecommit = t +} + +func (mt *metricsTimer) SetPreviousPrevote(t time.Time) { + mt.mu.Lock() + defer mt.mu.Unlock() + mt.previousPrevote = t +} + +func (mt *metricsTimer) SetPreviousLocalSignStart(t time.Time) { + mt.mu.Lock() + defer mt.mu.Unlock() + mt.previousLocalSignStart = t +} + +func (mt *metricsTimer) SetPreviousLocalSignFinish(t time.Time) { + mt.mu.Lock() + defer mt.mu.Unlock() + mt.previousLocalSignFinish = t +} + +func (mt *metricsTimer) SetPreviousLocalEphemeralShare(t time.Time) { + mt.mu.Lock() + defer mt.mu.Unlock() + mt.previousLocalEphemeralShare = t +} + +func (mt *metricsTimer) UpdatePrometheusMetrics(t time.Time) { + mt.mu.Lock() + defer mt.mu.Unlock() + + // Update Prometheus Gauges + secondsSinceLastPrecommit.Set(time.Since(mt.previousPrecommit).Seconds()) + secondsSinceLastPrevote.Set(time.Since(mt.previousPrevote).Seconds()) + secondsSinceLastLocalSignStart.Set(time.Since(mt.previousLocalSignStart).Seconds()) + secondsSinceLastLocalSignFinish.Set(time.Since(mt.previousLocalSignFinish).Seconds()) + secondsSinceLastLocalEphemeralShareTime.Set(time.Since(mt.previousLocalEphemeralShare).Seconds()) +} + var ( // Variables to calculate Prometheus Metrics - previousPrecommitHeight = int64(0) - previousPrevoteHeight = int64(0) - previousPrecommitTime = time.Now() - previousPrevoteTime = time.Now() - previousLocalSignStartTime = time.Now() - previousLocalSignFinishTime = time.Now() - previousLocalEphemeralShareTime = time.Now() - metricsPeriodicUpdateMutex = sync.Mutex{} + previousPrecommitHeight = int64(0) + previousPrevoteHeight = int64(0) + metricsTimeKeeper = newMetricsTimer() // Prometheus Metrics totalPubKeyRequests = promauto.NewCounter(prometheus.CounterOpts{ @@ -198,14 +252,7 @@ var ( func StartMetrics() { // Update elapsed times on an interval basis for { - // Use mutex to prevent race. Stats aligned with loop iteration - metricsPeriodicUpdateMutex.Lock() - secondsSinceLastPrecommit.Set(time.Since(previousPrecommitTime).Seconds()) - secondsSinceLastPrevote.Set(time.Since(previousPrevoteTime).Seconds()) - secondsSinceLastLocalSignStart.Set(time.Since(previousLocalSignStartTime).Seconds()) - secondsSinceLastLocalSignFinish.Set(time.Since(previousLocalSignFinishTime).Seconds()) - secondsSinceLastLocalEphemeralShareTime.Set(time.Since(previousLocalEphemeralShareTime).Seconds()) - metricsPeriodicUpdateMutex.Unlock() + metricsTimeKeeper.UpdatePrometheusMetrics(time.Now()) // Prometheus often only polls every 1 to every few seconds // Frequent updates minimize reporting error. diff --git a/signer/remote_signer.go b/signer/remote_signer.go index dbee82b8..0c9cece1 100644 --- a/signer/remote_signer.go +++ b/signer/remote_signer.go @@ -171,9 +171,7 @@ func (rs *ReconnRemoteSigner) handleSignVoteRequest(vote *tmProto.Vote) tmProtoP } previousPrecommitHeight = vote.Height // remember last PrecommitHeight - metricsPeriodicUpdateMutex.Lock() - previousPrecommitTime = time.Now() - metricsPeriodicUpdateMutex.Unlock() + metricsTimeKeeper.SetPreviousPrecommit(time.Now()) lastPrecommitHeight.Set(float64(vote.Height)) lastPrecommitRound.Set(float64(vote.Round)) @@ -191,9 +189,7 @@ func (rs *ReconnRemoteSigner) handleSignVoteRequest(vote *tmProto.Vote) tmProtoP previousPrevoteHeight = vote.Height // remember last PrevoteHeight - metricsPeriodicUpdateMutex.Lock() - previousPrevoteTime = time.Now() - metricsPeriodicUpdateMutex.Unlock() + metricsTimeKeeper.SetPreviousPrevote(time.Now()) lastPrevoteHeight.Set(float64(vote.Height)) lastPrevoteRound.Set(float64(vote.Round)) From 2422d411ad1cc06deb46f812febf9509fbbd9c82 Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Wed, 28 Sep 2022 17:43:20 +0200 Subject: [PATCH 26/44] Add context to metrics server --- cmd/horcrux/cmd/cosigner.go | 2 +- cmd/horcrux/cmd/metrics.go | 31 ++++++++++++++++++++++++++----- cmd/horcrux/cmd/signer.go | 2 +- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/cmd/horcrux/cmd/cosigner.go b/cmd/horcrux/cmd/cosigner.go index 5967e350..07ecf25b 100644 --- a/cmd/horcrux/cmd/cosigner.go +++ b/cmd/horcrux/cmd/cosigner.go @@ -239,7 +239,7 @@ func StartCosignerCmd() *cobra.Command { } logger.Info("Signer", "address", pubkey.Address()) - go EnableDebugAndMetrics() + go EnableDebugAndMetrics(cmd.Context()) services, err = signer.StartRemoteSigners(services, logger, cfg.ChainID, pv, cfg.Nodes) if err != nil { diff --git a/cmd/horcrux/cmd/metrics.go b/cmd/horcrux/cmd/metrics.go index ba01af86..090eb6dd 100644 --- a/cmd/horcrux/cmd/metrics.go +++ b/cmd/horcrux/cmd/metrics.go @@ -1,6 +1,7 @@ package cmd import ( + "context" "fmt" "net/http" "net/http/pprof" @@ -34,7 +35,7 @@ func AddPrometheusMetrics(mux *http.ServeMux) { } // EnableDebugAndMetrics - Initialization errors are not fatal, only logged -func EnableDebugAndMetrics() { +func EnableDebugAndMetrics(ctx context.Context) { logger := tmlog.NewTMLogger(tmlog.NewSyncWriter(os.Stdout)).With("module", "debugserver") // Configure Shared Debug HTTP Server for pprof and prometheus @@ -72,8 +73,28 @@ func EnableDebugAndMetrics() { } // Start Debug Server. - if err := srv.ListenAndServe(); err != nil { - logger.Error(fmt.Sprintf("Debug Endpoint failed to start: %s", err)) - return - } + go func() { + if err := srv.ListenAndServe(); err != nil { + if err.Error() == "http: Server closed" { + logger.Info(fmt.Sprintf("Debug Server Shutdown Complete")) + return + } + logger.Error(fmt.Sprintf("Debug Endpoint failed to start: %+v", err)) + panic(err) + } + }() + + // Shutdown Debug Server on ctx request + go func() { + <-ctx.Done() + logger.Info("Gracefully Stopping Debug Server") + if err := srv.Shutdown(context.Background()); err != nil { + logger.Error("Error in Stopping Debug Server", err) + logger.Info("Force Stopping Debug Server") + if err = srv.Close(); err != nil { + logger.Error("Error in Force Stopping Debug Server", err) + } + } + }() + } diff --git a/cmd/horcrux/cmd/signer.go b/cmd/horcrux/cmd/signer.go index 6d1738a3..d8023e47 100644 --- a/cmd/horcrux/cmd/signer.go +++ b/cmd/horcrux/cmd/signer.go @@ -72,7 +72,7 @@ func StartSignerCmd() *cobra.Command { } logger.Info("Signer", "pubkey", pubkey) - go EnableDebugAndMetrics() + go EnableDebugAndMetrics(cmd.Context()) services, err = signer.StartRemoteSigners(services, logger, cfg.ChainID, pv, cfg.Nodes) if err != nil { From 5a2ac537f89379f83fd750665559b1e02a2070ec Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Wed, 28 Sep 2022 18:19:05 +0200 Subject: [PATCH 27/44] Make linter happy --- cmd/horcrux/cmd/config.go | 2 +- cmd/horcrux/cmd/metrics.go | 2 +- signer/local_cosigner.go | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cmd/horcrux/cmd/config.go b/cmd/horcrux/cmd/config.go index 8dc2d1dd..7eaec8ce 100644 --- a/cmd/horcrux/cmd/config.go +++ b/cmd/horcrux/cmd/config.go @@ -491,7 +491,7 @@ type DiskConfig struct { ChainID string `json:"chain-id" yaml:"chain-id"` CosignerConfig *CosignerConfig `json:"cosigner,omitempty" yaml:"cosigner,omitempty"` ChainNodes []ChainNode `json:"chain-nodes,omitempty" yaml:"chain-nodes,omitempty"` - DebugAddr string `json:"debug-addr,omitempty" yaml:"debug-addr,omitempty"` //nolint + DebugAddr string `json:"debug-addr,omitempty" yaml:"debug-addr,omitempty"` } func (c *DiskConfig) Nodes() []signer.NodeConfig { diff --git a/cmd/horcrux/cmd/metrics.go b/cmd/horcrux/cmd/metrics.go index 090eb6dd..0e359d7a 100644 --- a/cmd/horcrux/cmd/metrics.go +++ b/cmd/horcrux/cmd/metrics.go @@ -76,7 +76,7 @@ func EnableDebugAndMetrics(ctx context.Context) { go func() { if err := srv.ListenAndServe(); err != nil { if err.Error() == "http: Server closed" { - logger.Info(fmt.Sprintf("Debug Server Shutdown Complete")) + logger.Info("Debug Server Shutdown Complete") return } logger.Error(fmt.Sprintf("Debug Endpoint failed to start: %+v", err)) diff --git a/signer/local_cosigner.go b/signer/local_cosigner.go index a67ebbbf..3b623d54 100644 --- a/signer/local_cosigner.go +++ b/signer/local_cosigner.go @@ -156,7 +156,8 @@ func (cosigner *LocalCosigner) GetAddress() string { // Return the signed bytes or an error // Implements Cosigner interface func (cosigner *LocalCosigner) sign(req CosignerSignRequest) (CosignerSignResponse, error) { - metricsTimeKeeper.SetPreviousLocalSignStart(time.Now()) // This function has multiple exit points. Only start time can be guaranteed + // This function has multiple exit points. Only start time can be guaranteed + metricsTimeKeeper.SetPreviousLocalSignStart(time.Now()) cosigner.lastSignStateMutex.Lock() defer cosigner.lastSignStateMutex.Unlock() From 256e91a14a29f3c7b009bf87d9d1029aabfc8cb7 Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Thu, 29 Sep 2022 17:59:24 +0200 Subject: [PATCH 28/44] Metrics totals which only increase updated to Counters --- signer/metrics.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/signer/metrics.go b/signer/metrics.go index 3e435947..5709a06a 100644 --- a/signer/metrics.go +++ b/signer/metrics.go @@ -104,15 +104,15 @@ var ( Help: "Last Round Proposal Signed", }) - totalPrecommitsSigned = promauto.NewGauge(prometheus.GaugeOpts{ + totalPrecommitsSigned = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_precommits_signed", Help: "Total Precommit Signed", }) - totalPrevotesSigned = promauto.NewGauge(prometheus.GaugeOpts{ + totalPrevotesSigned = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_prevotes_signed", Help: "Total Prevote Signed", }) - totalProposalsSigned = promauto.NewGauge(prometheus.GaugeOpts{ + totalProposalsSigned = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_proposals_signed", Help: "Total Proposal Signed", }) @@ -148,11 +148,11 @@ var ( Name: "signer_missed_prevotes", Help: "Consecutive Prevote Missed", }) - totalMissedPrecommits = promauto.NewGauge(prometheus.GaugeOpts{ + totalMissedPrecommits = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_missed_precommits", Help: "Total Precommit Missed", }) - totalMissedPrevotes = promauto.NewGauge(prometheus.GaugeOpts{ + totalMissedPrevotes = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_missed_prevotes", Help: "Total Prevote Missed", }) From 5f45fa6af8f02997458af25ed41bdbb2bf7f9b5e Mon Sep 17 00:00:00 2001 From: Chill Validation Date: Sun, 2 Oct 2022 15:09:58 +0200 Subject: [PATCH 29/44] Debug server shutdown checks error type --- cmd/horcrux/cmd/metrics.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cmd/horcrux/cmd/metrics.go b/cmd/horcrux/cmd/metrics.go index 0e359d7a..1380d4e0 100644 --- a/cmd/horcrux/cmd/metrics.go +++ b/cmd/horcrux/cmd/metrics.go @@ -2,6 +2,7 @@ package cmd import ( "context" + "errors" "fmt" "net/http" "net/http/pprof" @@ -62,9 +63,7 @@ func EnableDebugAndMetrics(ctx context.Context) { // Configure Debug Server Network Parameters srv := &http.Server{ - Handler: mux, - //ErrorLog: &logger, - + Handler: mux, Addr: config.Config.DebugAddr, ReadTimeout: 1 * time.Second, WriteTimeout: 30 * time.Second, @@ -75,7 +74,7 @@ func EnableDebugAndMetrics(ctx context.Context) { // Start Debug Server. go func() { if err := srv.ListenAndServe(); err != nil { - if err.Error() == "http: Server closed" { + if errors.Is(err, http.ErrServerClosed) { logger.Info("Debug Server Shutdown Complete") return } From b384dfe57ecda044bb723117143013538e22413b Mon Sep 17 00:00:00 2001 From: Chill Validation <92176880+chillyvee@users.noreply.github.com> Date: Sun, 16 Oct 2022 04:48:13 +0900 Subject: [PATCH 30/44] Fix ipv6 leader election (#107) Co-authored-by: Andrew Gouin --- cmd/horcrux/cmd/leader_election.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cmd/horcrux/cmd/leader_election.go b/cmd/horcrux/cmd/leader_election.go index bacdd4e9..a483f35f 100644 --- a/cmd/horcrux/cmd/leader_election.go +++ b/cmd/horcrux/cmd/leader_election.go @@ -48,16 +48,24 @@ horcrux elect 2 # elect specific leader`, } var grpcAddresses []string + + // Append local host:port u, err := url.Parse(config.Config.CosignerConfig.P2PListen) if err != nil { fmt.Printf("Error parsing peer URL: %v", err) } else { host, port, err := net.SplitHostPort(u.Host) if err == nil { - grpcAddresses = append(grpcAddresses, fmt.Sprintf("%s:%s", host, port)) + if strings.Contains(host, ":") { + // IPv6 Addreses need to be wrapped in brackets + grpcAddresses = append(grpcAddresses, fmt.Sprintf("[%s]:%s", host, port)) + } else { + grpcAddresses = append(grpcAddresses, fmt.Sprintf("%s:%s", host, port)) + } } } + // Append peer host:port for _, peer := range config.Config.CosignerConfig.Peers { u, err := url.Parse(peer.P2PAddr) if err != nil { @@ -65,7 +73,12 @@ horcrux elect 2 # elect specific leader`, } else { host, port, err := net.SplitHostPort(u.Host) if err == nil { - grpcAddresses = append(grpcAddresses, fmt.Sprintf("%s:%s", host, port)) + if strings.Contains(host, ":") { + // IPv6 Addreses need to be wrapped in brackets + grpcAddresses = append(grpcAddresses, fmt.Sprintf("[%s]:%s", host, port)) + } else { + grpcAddresses = append(grpcAddresses, fmt.Sprintf("%s:%s", host, port)) + } } } } From 3d53760df5f85c3e4dadb5fc8005a4efdd22c9e5 Mon Sep 17 00:00:00 2001 From: r4f43l Date: Sun, 16 Oct 2022 00:00:19 +0200 Subject: [PATCH 31/44] not all is fixed but most. --- signer/local_cosigner.go | 4 +-- signer/threshold_signer.go | 6 ++-- signer/threshold_signer_soft.go | 55 ++++++++++++++++++++------------- 3 files changed, 38 insertions(+), 27 deletions(-) diff --git a/signer/local_cosigner.go b/signer/local_cosigner.go index 40f965c9..ccb5ca64 100644 --- a/signer/local_cosigner.go +++ b/signer/local_cosigner.go @@ -17,9 +17,9 @@ import ( tsed25519 "gitlab.com/unit410/threshold-ed25519/pkg" ) -type LastSignStateStruct struct { +type LastSignStateWrapper struct { // Signing is thread safe - lastSignStateMutex is used for putting locks so only one goroutine can r/w to the function - LastSignStateMutex sync.Mutex + lastSignStateMutex sync.Mutex // lastSignState stores the last sign state for a share we have fully signed // incremented whenever we are asked to sign a share diff --git a/signer/threshold_signer.go b/signer/threshold_signer.go index 45029a52..b83ab6b4 100644 --- a/signer/threshold_signer.go +++ b/signer/threshold_signer.go @@ -15,13 +15,13 @@ type ThresholdSigner interface { DealShares(req CosignerGetEphemeralSecretPartRequest) (HrsMetadata, error) - GetEphemeralSecretPart(req CosignerGetEphemeralSecretPartRequest, m *LastSignStateStruct, + GetEphemeralSecretPart(req CosignerGetEphemeralSecretPartRequest, m *LastSignStateWrapper, peers map[int]CosignerPeer) (CosignerEphemeralSecretPart, error) - SetEphemeralSecretPart(req CosignerSetEphemeralSecretPartRequest, m *LastSignStateStruct, + SetEphemeralSecretPart(req CosignerSetEphemeralSecretPartRequest, m *LastSignStateWrapper, peers map[int]CosignerPeer) error - Sign(req CosignerSignRequest, m *LastSignStateStruct) (CosignerSignResponse, error) + Sign(req CosignerSignRequest, m *LastSignStateWrapper) (CosignerSignResponse, error) GetID() (int, error) } diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index ca15065f..5e05c0fa 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -41,14 +41,15 @@ func NewThresholdSignerSoft(key CosignerKey, threshold, total uint8) ThresholdSi // Ensures casting else it will naturally panic. ed25519Key := softSigner.Key.PubKey.(tmcryptoed25519.PubKey) softSigner.PubKeyBytes = make([]byte, len(ed25519Key)) - copy(softSigner.PubKeyBytes, ed25519Key[:]) + softSigner.PubKeyBytes = ed25519Key[:] + // copy(softSigner.PubKeyBytes, ed25519Key[:]) return softSigner } // Implements ThresholdSigner func (softSigner *ThresholdSignerSoft) Type() string { - return "soft" + return SignerTypeSoftSign } // Implements ThresholdSigner @@ -58,9 +59,9 @@ func (softSigner *ThresholdSignerSoft) GetID() (int, error) { // Implements ThresholdSigner func (softSigner *ThresholdSignerSoft) Sign( - req CosignerSignRequest, m *LastSignStateStruct) (CosignerSignResponse, error) { - m.LastSignStateMutex.Lock() - defer m.LastSignStateMutex.Unlock() + req CosignerSignRequest, m *LastSignStateWrapper) (CosignerSignResponse, error) { + m.lastSignStateMutex.Lock() + defer m.lastSignStateMutex.Unlock() res := CosignerSignResponse{} lss := m.LastSignState @@ -71,7 +72,6 @@ func (softSigner *ThresholdSignerSoft) Sign( } sameHRS, err := lss.CheckHRS(hrst) - if err != nil { return res, err } @@ -135,9 +135,14 @@ func (softSigner *ThresholdSignerSoft) Sign( }, nil, true) if err != nil { - if _, isSameHRSError := err.(*SameHRSError); !isSameHRSError { + var isSameHRSError *SameHRSError + // If error is + if !errors.As(err, &isSameHRSError) { return res, err } + // if _, isSameHRSError := err.(*SameHRSError); !isSameHRSError { + // return res, err + // } } for existingKey := range softSigner.HrsMeta { @@ -164,7 +169,6 @@ func (softSigner *ThresholdSignerSoft) DealShares( } meta, ok := softSigner.HrsMeta[hrsKey] - if ok { return meta, nil } @@ -192,14 +196,14 @@ func (softSigner *ThresholdSignerSoft) DealShares( // The ephemeral secret part is encrypted for the receiver // Implements ThresholdSigner func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( - req CosignerGetEphemeralSecretPartRequest, m *LastSignStateStruct, peers map[int]CosignerPeer) ( + req CosignerGetEphemeralSecretPartRequest, m *LastSignStateWrapper, peers map[int]CosignerPeer) ( CosignerEphemeralSecretPart, error) { res := CosignerEphemeralSecretPart{} // protects the meta map - m.LastSignStateMutex.Lock() - defer m.LastSignStateMutex.Unlock() + m.lastSignStateMutex.Lock() + defer m.lastSignStateMutex.Unlock() hrst := HRSTKey{ Height: req.Height, @@ -276,17 +280,20 @@ func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( // Store an ephemeral secret share part provided by another cosigner (signer) // Implements ThresholdSigner func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( - req CosignerSetEphemeralSecretPartRequest, m *LastSignStateStruct, peers map[int]CosignerPeer) error { + req CosignerSetEphemeralSecretPartRequest, m *LastSignStateWrapper, peers map[int]CosignerPeer) error { // Verify the source signature if req.SourceSig == nil { return errors.New("SourceSig field is required") } - digestMsg := CosignerEphemeralSecretPart{} - digestMsg.SourceID = req.SourceID - digestMsg.SourceEphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey - digestMsg.EncryptedSharePart = req.EncryptedSharePart + digestMsg := CosignerEphemeralSecretPart{ + SourceID: req.SourceID, + // DestinationID: 0,// NOTE: I dont think digestMsg.SourceSig is used anywhere + SourceEphemeralSecretPublicKey: req.SourceEphemeralSecretPublicKey, + EncryptedSharePart: req.EncryptedSharePart, + // SourceSig: []byte{}, \\ NOTE Would be good with comments when this is set. + } digestBytes, err := tmjson.Marshal(digestMsg) if err != nil { @@ -307,8 +314,8 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( } // protects the meta map - m.LastSignStateMutex.Lock() - defer m.LastSignStateMutex.Unlock() + m.lastSignStateMutex.Lock() + defer m.lastSignStateMutex.Unlock() hrst := HRSTKey{ Height: req.Height, @@ -317,8 +324,8 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( Timestamp: req.Timestamp.UnixNano(), } - meta, ok := softSigner.HrsMeta[hrst] - // generate metadata placeholder + meta, ok := softSigner.HrsMeta[hrst] // generate metadata placeholder + if !ok { newMeta, err := softSigner.DealShares(CosignerGetEphemeralSecretPartRequest{ Height: req.Height, @@ -331,7 +338,7 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( } meta = newMeta - softSigner.HrsMeta[hrst] = meta + softSigner.HrsMeta[hrst] = meta // This sets the } // decrypt share @@ -341,7 +348,11 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( } // set slot + //TODO: Fix comment. Its also very very strange coding behaviour. + // Add test that meta.Peers[req.SourceID-1].Share == softSigner.HrsMeta[hrst].Peers[req.SourceID-1].Share == sharePart + // softSigner.HrsMeta[hrst] = meta + // Share & EphemeralSecretPublicKey is a SLICE so its a valid change of shared struct softSigner meta.Peers[req.SourceID-1].Share = sharePart - meta.Peers[req.SourceID-1].EphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey + meta.Peers[req.SourceID-1].EphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey // return nil } From 716fd8e2bf816ce9f0dfa0772613e37568913318 Mon Sep 17 00:00:00 2001 From: r4f43l Date: Mon, 17 Oct 2022 17:27:05 +0200 Subject: [PATCH 32/44] Fix the unreadabillity setting slots. --- signer/threshold_signer_soft.go | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index 5e05c0fa..f4e605e1 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -324,35 +324,30 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( Timestamp: req.Timestamp.UnixNano(), } - meta, ok := softSigner.HrsMeta[hrst] // generate metadata placeholder + // decrypt share + sharePart, err := rsa.DecryptOAEP(sha256.New(), rand.Reader, &softSigner.Key.RSAKey, req.EncryptedSharePart, nil) + if err != nil { + return err + } + meta, ok := softSigner.HrsMeta[hrst] // generate metadata placeholder, softSigner.HrsMeta[hrst] is non-addressable if !ok { newMeta, err := softSigner.DealShares(CosignerGetEphemeralSecretPartRequest{ Height: req.Height, Round: req.Round, Step: req.Step, }) - if err != nil { return err } meta = newMeta - softSigner.HrsMeta[hrst] = meta // This sets the - } - - // decrypt share - sharePart, err := rsa.DecryptOAEP(sha256.New(), rand.Reader, &softSigner.Key.RSAKey, req.EncryptedSharePart, nil) - if err != nil { - return err + softSigner.HrsMeta[hrst] = meta // updates the metadata placeholder } - // set slot - //TODO: Fix comment. Its also very very strange coding behaviour. - // Add test that meta.Peers[req.SourceID-1].Share == softSigner.HrsMeta[hrst].Peers[req.SourceID-1].Share == sharePart - // softSigner.HrsMeta[hrst] = meta - // Share & EphemeralSecretPublicKey is a SLICE so its a valid change of shared struct softSigner + // Share & EphemeralSecretPublicKey is a SLICE so its a valid change of the shared struct softSigne! meta.Peers[req.SourceID-1].Share = sharePart - meta.Peers[req.SourceID-1].EphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey // + meta.Peers[req.SourceID-1].EphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey + return nil } From 5c06abb626b18948872d2a8deb566c660e231b72 Mon Sep 17 00:00:00 2001 From: Andrew Gouin Date: Tue, 18 Oct 2022 13:27:22 -0600 Subject: [PATCH 33/44] Add logs for participating cosigners (#111) --- signer/grpc_server.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/signer/grpc_server.go b/signer/grpc_server.go index e524a847..ba308d9b 100644 --- a/signer/grpc_server.go +++ b/signer/grpc_server.go @@ -44,8 +44,14 @@ func (rpc *GRPCServer) SetEphemeralSecretPartsAndSign( SignBytes: req.GetSignBytes(), }) if err != nil { + rpc.raftStore.logger.Error("Failed to sign with share", "error", err) return nil, err } + rpc.raftStore.logger.Info("Signed with share", + "height", req.Hrst.Height, + "round", req.Hrst.Round, + "step", req.Hrst.Step, + ) return &proto.CosignerGRPCSetEphemeralSecretPartsAndSignResponse{ EphemeralPublic: res.EphemeralPublic, Timestamp: res.Timestamp.UnixNano(), From 69b260e979e0f36410e2aa3211ac77123d43312d Mon Sep 17 00:00:00 2001 From: r4f43l <91068974+nitronit@users.noreply.github.com> Date: Wed, 19 Oct 2022 12:32:21 +0200 Subject: [PATCH 34/44] Update threshold_signer_soft.go --- signer/threshold_signer_soft.go | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index f4e605e1..8530d2aa 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -289,10 +289,10 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( digestMsg := CosignerEphemeralSecretPart{ SourceID: req.SourceID, - // DestinationID: 0,// NOTE: I dont think digestMsg.SourceSig is used anywhere + // DestinationID: 0, SourceEphemeralSecretPublicKey: req.SourceEphemeralSecretPublicKey, EncryptedSharePart: req.EncryptedSharePart, - // SourceSig: []byte{}, \\ NOTE Would be good with comments when this is set. + // SourceSig: []byte{}, } digestBytes, err := tmjson.Marshal(digestMsg) @@ -323,13 +323,6 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( Step: req.Step, Timestamp: req.Timestamp.UnixNano(), } - - // decrypt share - sharePart, err := rsa.DecryptOAEP(sha256.New(), rand.Reader, &softSigner.Key.RSAKey, req.EncryptedSharePart, nil) - if err != nil { - return err - } - meta, ok := softSigner.HrsMeta[hrst] // generate metadata placeholder, softSigner.HrsMeta[hrst] is non-addressable if !ok { newMeta, err := softSigner.DealShares(CosignerGetEphemeralSecretPartRequest{ @@ -344,8 +337,13 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( meta = newMeta softSigner.HrsMeta[hrst] = meta // updates the metadata placeholder } + // decrypt share + sharePart, err := rsa.DecryptOAEP(sha256.New(), rand.Reader, &softSigner.Key.RSAKey, req.EncryptedSharePart, nil) + if err != nil { + return err + } // set slot - // Share & EphemeralSecretPublicKey is a SLICE so its a valid change of the shared struct softSigne! + // Share & EphemeralSecretPublicKey is a SLICE so its a valid change of the shared struct softSigner! meta.Peers[req.SourceID-1].Share = sharePart meta.Peers[req.SourceID-1].EphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey From 9329cecfb83c001cff88ede56694464cb2c9807d Mon Sep 17 00:00:00 2001 From: r4f43l Date: Thu, 20 Oct 2022 21:01:02 +0200 Subject: [PATCH 35/44] lint fix --- signer/threshold_signer_soft.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index 8530d2aa..4fd275e1 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -337,7 +337,7 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( meta = newMeta softSigner.HrsMeta[hrst] = meta // updates the metadata placeholder } - // decrypt share + // decrypt share sharePart, err := rsa.DecryptOAEP(sha256.New(), rand.Reader, &softSigner.Key.RSAKey, req.EncryptedSharePart, nil) if err != nil { return err From 3e91b6542197b85ec11b9fa7b9703a45809100e8 Mon Sep 17 00:00:00 2001 From: omahs <73983677+omahs@users.noreply.github.com> Date: Thu, 20 Oct 2022 22:02:04 +0200 Subject: [PATCH 36/44] Fix: typos (#113) Fix: typos Co-authored-by: Andrew Gouin --- docs/migrating.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/migrating.md b/docs/migrating.md index 631e26d7..c57b0018 100644 --- a/docs/migrating.md +++ b/docs/migrating.md @@ -4,7 +4,7 @@ Before starting, \***\*please make sure to have a clear understanding of node and validator operational requirements\*\***. This guide is medium to high difficulty. Operation of `horcrux` assumes significant prior knowledge of these systems. Debugging problems that may arise will entail a significant amount financial risk (double sign) if you are running on mainnet so a clear understanding of the systems you are working with is important. Please attempt this operation on a testnet before you do so on a mainnet validator. -> **CAUTION:** This operation will require you to take your validator down for some time. If you work quickly and follow the guide, this downtime shouldn't be more than 5-10 minutes. But reguardless, be aware of the downtime slashing on your chain and be careful not to exceed that limit. +> **CAUTION:** This operation will require you to take your validator down for some time. If you work quickly and follow the guide, this downtime shouldn't be more than 5-10 minutes. But regardless, be aware of the downtime slashing on your chain and be careful not to exceed that limit. ## Validator System Migration @@ -14,7 +14,7 @@ This document will describe a migration from a "starting system" to a 2-of-3 mul - VM: 4 CPU, 16 GB RAM, 500GB SSD storage running fully synced chain daemon also acting as a validator -### Example Migration Infrastrcuture +### Example Migration Infrastructure - Sentries: 3x VM w/ 4 CPU, 16GB RAM, 500GB SSD storage running fully synced chain daemon - These chain daemons should only expose the `:26656` (p2p) port to the open internet @@ -45,7 +45,7 @@ signer-2: 10.168.1.2 signer-3: 10.168.1.3 ``` -When installing `horcrux` we recommend using the prebuilt binaries from the [releases page](https://github.com/strangelove-ventures/horcrux/releases). Pick the release cooresponding to the `tendermint` dependancy for the `go.mod` of your chain binary. You should be able to get this with `{binary} version --long`. Install like so: +When installing `horcrux` we recommend using the prebuilt binaries from the [releases page](https://github.com/strangelove-ventures/horcrux/releases). Pick the release corresponding to the `tendermint` dependency for the `go.mod` of your chain binary. You should be able to get this with `{binary} version --long`. Install like so: ```bash # On each signer VM @@ -89,11 +89,11 @@ $ horcrux config init {my_chain_id} "tcp://10.168.0.3:1234" -c -p "tcp://10.168. > **NOTE:** The `-k` or `--keyfile` flag lets you set the file path for the private key share file if you would like to use a different path than `~/.horcrux/share.json`. -> **NOTE:** The `--timeout` value defaults to `1000ms`. If you are running in disconnected data centers (i.e. accross amazon AZs or gcp zones) increasing the timeout slightly helps to avoid missed blocks especially around proposals. +> **NOTE:** The `--timeout` value defaults to `1000ms`. If you are running in disconnected data centers (i.e. across amazon AZs or gcp zones) increasing the timeout slightly helps to avoid missed blocks especially around proposals. ### 3. Split `priv_validator_key.json` and distribute key material -> **CAUTION:** **The security of any key material is outside the scope of this guide. The suggested proceedure here is not necessarily the one you will use. We aim to make this guide easy to understand, not necessarily the most secure. The tooling here is all written in go and can be compiled and used in an airgapped setup if needed. Please open issues if you have questions around how to fit `horcrux` into your infra.** +> **CAUTION:** **The security of any key material is outside the scope of this guide. The suggested procedure here is not necessarily the one you will use. We aim to make this guide easy to understand, not necessarily the most secure. The tooling here is all written in go and can be compiled and used in an airgapped setup if needed. Please open issues if you have questions about how to fit `horcrux` into your infra.** On some computer that contains your `priv_validator_key.json` create a folder to split the key through the following command. This may take a moment o complete: From c24ca49fdb9b2cbfc9a80f28ac6a4e3fde1e09c1 Mon Sep 17 00:00:00 2001 From: Andrew Gouin Date: Thu, 20 Oct 2022 15:45:29 -0600 Subject: [PATCH 37/44] Leader election tests (#110) * Add tests for leader election and cleanup related code * lint fix * Show container logs for failed tests * Add retries for privval timeout * Add cmd to get current leader from CLI * wait for signed blocks after electing leader before checking leader among cluster * Make client package for address utils. Don't swallow errors * Simplify sanitize address --- client/address.go | 30 +++ client/address_test.go | 55 +++++ cmd/horcrux/cmd/config.go | 10 + cmd/horcrux/cmd/leader_election.go | 109 ++++++---- signer/grpc_server.go | 8 + signer/proto/cosigner_grpc_server.pb.go | 218 +++++++++++++++---- signer/proto/cosigner_grpc_server.proto | 9 +- signer/proto/cosigner_grpc_server_grpc.pb.go | 40 +++- test/horcrux_test.go | 139 ++++++++---- test/test_node.go | 148 ++++++++----- test/test_setup.go | 44 ++-- test/test_signer.go | 204 ++++++++--------- test/test_validator.go | 14 +- 13 files changed, 707 insertions(+), 321 deletions(-) create mode 100644 client/address.go create mode 100644 client/address_test.go diff --git a/client/address.go b/client/address.go new file mode 100644 index 00000000..94315824 --- /dev/null +++ b/client/address.go @@ -0,0 +1,30 @@ +package client + +import ( + "fmt" + "net/url" + "strings" +) + +func SanitizeAddress(address string) (string, error) { + u, err := url.Parse(address) + if err != nil { + return "", fmt.Errorf("error parsing peer URL: %w", err) + } + + return u.Host, nil +} + +func MultiAddress(addresses []string) (string, error) { + grpcAddresses := make([]string, len(addresses)) + + for i, addr := range addresses { + peerAddr, err := SanitizeAddress(addr) + if err != nil { + return "", err + } + grpcAddresses[i] = peerAddr + } + + return fmt.Sprintf("multi:///%s", strings.Join(grpcAddresses, ",")), nil +} diff --git a/client/address_test.go b/client/address_test.go new file mode 100644 index 00000000..18b2e20f --- /dev/null +++ b/client/address_test.go @@ -0,0 +1,55 @@ +package client_test + +import ( + "testing" + + "github.com/strangelove-ventures/horcrux/client" + "github.com/stretchr/testify/require" +) + +func TestLeaderElectionMultiAddressDomain(t *testing.T) { + addresses := []string{ + "tcp://signer-1:2222", + "tcp://signer-2:2222", + "tcp://signer-3:2222", + } + + multiAddress, err := client.MultiAddress(addresses) + require.NoError(t, err, "failed to assemble fqdn multi address") + + require.Equal(t, "multi:///signer-1:2222,signer-2:2222,signer-3:2222", multiAddress) +} + +func TestLeaderElectionMultiAddressIPv4(t *testing.T) { + addresses := []string{ + "tcp://10.0.0.1:2222", + "tcp://10.0.0.2:2222", + "tcp://10.0.0.3:2222", + } + + multiAddress, err := client.MultiAddress(addresses) + require.NoError(t, err, "failed to assemble ipv4 multi address") + + require.Equal(t, "multi:///10.0.0.1:2222,10.0.0.2:2222,10.0.0.3:2222", multiAddress) +} + +func TestLeaderElectionMultiAddressIPv6(t *testing.T) { + addresses := []string{ + "tcp://[2001:db8:3333:4444:5555:6666:7777:8888]:2222", + "tcp://[::]:2222", + "tcp://[::1234:5678]:2222", + "tcp://[2001:db8::]:2222", + "tcp://[2001:db8::1234:5678]:2222", + } + + multiAddress, err := client.MultiAddress(addresses) + require.NoError(t, err, "failed to assemble ipv6 multi address") + + const expected = "multi:///" + + "[2001:db8:3333:4444:5555:6666:7777:8888]:2222" + + ",[::]:2222,[::1234:5678]:2222" + + ",[2001:db8::]:2222" + + ",[2001:db8::1234:5678]:2222" + + require.Equal(t, expected, multiAddress) +} diff --git a/cmd/horcrux/cmd/config.go b/cmd/horcrux/cmd/config.go index 7eaec8ce..fe72c8b7 100644 --- a/cmd/horcrux/cmd/config.go +++ b/cmd/horcrux/cmd/config.go @@ -12,6 +12,7 @@ import ( "time" "github.com/spf13/cobra" + "github.com/strangelove-ventures/horcrux/client" "github.com/strangelove-ventures/horcrux/signer" "gopkg.in/yaml.v2" ) @@ -548,6 +549,15 @@ type CosignerConfig struct { Timeout string `json:"rpc-timeout" yaml:"rpc-timeout"` } +func (cfg *CosignerConfig) LeaderElectMultiAddress() (string, error) { + addresses := make([]string, 1+len(cfg.Peers)) + addresses[0] = cfg.P2PListen + for i, peer := range cfg.Peers { + addresses[i+1] = peer.P2PAddr + } + return client.MultiAddress(addresses) +} + func (c *DiskConfig) CosignerPeers() (out []signer.CosignerConfig) { for _, p := range c.CosignerConfig.Peers { out = append(out, signer.CosignerConfig{ID: p.ShareID, Address: p.P2PAddr}) diff --git a/cmd/horcrux/cmd/leader_election.go b/cmd/horcrux/cmd/leader_election.go index a483f35f..68163f6b 100644 --- a/cmd/horcrux/cmd/leader_election.go +++ b/cmd/horcrux/cmd/leader_election.go @@ -4,22 +4,20 @@ import ( "context" "fmt" "log" - "net" - "net/url" - "strings" "time" _ "github.com/Jille/grpc-multi-resolver" - "github.com/strangelove-ventures/horcrux/signer/proto" - grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" "github.com/spf13/cobra" + "github.com/strangelove-ventures/horcrux/client" + "github.com/strangelove-ventures/horcrux/signer/proto" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" ) func init() { rootCmd.AddCommand(leaderElectionCmd) + rootCmd.AddCommand(getLeaderCmd) } var leaderElectionCmd = &cobra.Command{ @@ -47,45 +45,12 @@ horcrux elect 2 # elect specific leader`, grpc_retry.WithMax(5), } - var grpcAddresses []string - - // Append local host:port - u, err := url.Parse(config.Config.CosignerConfig.P2PListen) + grpcAddress, err := config.Config.CosignerConfig.LeaderElectMultiAddress() if err != nil { - fmt.Printf("Error parsing peer URL: %v", err) - } else { - host, port, err := net.SplitHostPort(u.Host) - if err == nil { - if strings.Contains(host, ":") { - // IPv6 Addreses need to be wrapped in brackets - grpcAddresses = append(grpcAddresses, fmt.Sprintf("[%s]:%s", host, port)) - } else { - grpcAddresses = append(grpcAddresses, fmt.Sprintf("%s:%s", host, port)) - } - } - } - - // Append peer host:port - for _, peer := range config.Config.CosignerConfig.Peers { - u, err := url.Parse(peer.P2PAddr) - if err != nil { - fmt.Printf("Error parsing peer URL: %v", err) - } else { - host, port, err := net.SplitHostPort(u.Host) - if err == nil { - if strings.Contains(host, ":") { - // IPv6 Addreses need to be wrapped in brackets - grpcAddresses = append(grpcAddresses, fmt.Sprintf("[%s]:%s", host, port)) - } else { - grpcAddresses = append(grpcAddresses, fmt.Sprintf("%s:%s", host, port)) - } - } - } + return err } - grpcAddress := fmt.Sprintf("multi:///%s", strings.Join(grpcAddresses, ",")) - - fmt.Println(grpcAddress) + fmt.Printf("Broadcasting to address: %s\n", grpcAddress) conn, err := grpc.Dial(grpcAddress, grpc.WithDefaultServiceConfig(serviceConfig), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithDefaultCallOptions(grpc.WaitForReady(true)), @@ -101,18 +66,76 @@ horcrux elect 2 # elect specific leader`, leaderID = args[0] } - context, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second) + ctx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second) defer cancelFunc() grpcClient := proto.NewCosignerGRPCClient(conn) _, err = grpcClient.TransferLeadership( - context, + ctx, &proto.CosignerGRPCTransferLeadershipRequest{LeaderID: leaderID}, ) if err != nil { return err } + res, err := grpcClient.GetLeader(ctx, &proto.CosignerGRPCGetLeaderRequest{}) + if err != nil { + return err + } + + fmt.Printf("Leader election successful. New leader: %s\n", res.Leader) + + return nil + }, +} + +var getLeaderCmd = &cobra.Command{ + Use: "leader", + Short: "Get current raft leader", + Args: cobra.NoArgs, + Example: `horcrux leader`, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) (err error) { + if config.Config.CosignerConfig == nil { + return fmt.Errorf("cosigner configuration is not present in config file") + } + + if len(config.Config.CosignerConfig.Peers) == 0 { + return fmt.Errorf("cosigner configuration has no peers") + } + + retryOpts := []grpc_retry.CallOption{ + grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100 * time.Millisecond)), + grpc_retry.WithMax(5), + } + + grpcAddress, err := client.SanitizeAddress(config.Config.CosignerConfig.P2PListen) + if err != nil { + return err + } + + fmt.Printf("Request address: %s\n", grpcAddress) + conn, err := grpc.Dial(grpcAddress, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithDefaultCallOptions(grpc.WaitForReady(true)), + grpc.WithUnaryInterceptor(grpc_retry.UnaryClientInterceptor(retryOpts...))) + if err != nil { + log.Fatalf("dialing failed: %v", err) + } + defer conn.Close() + + ctx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second) + defer cancelFunc() + + grpcClient := proto.NewCosignerGRPCClient(conn) + + res, err := grpcClient.GetLeader(ctx, &proto.CosignerGRPCGetLeaderRequest{}) + if err != nil { + return err + } + + fmt.Printf("Current leader: %s\n", res.Leader) + return nil }, } diff --git a/signer/grpc_server.go b/signer/grpc_server.go index ba308d9b..1ab9b3bb 100644 --- a/signer/grpc_server.go +++ b/signer/grpc_server.go @@ -92,3 +92,11 @@ func (rpc *GRPCServer) TransferLeadership( rpc.raftStore.raft.LeadershipTransfer() return &proto.CosignerGRPCTransferLeadershipResponse{}, nil } + +func (rpc *GRPCServer) GetLeader( + ctx context.Context, + req *proto.CosignerGRPCGetLeaderRequest, +) (*proto.CosignerGRPCGetLeaderResponse, error) { + leader := rpc.raftStore.GetLeader() + return &proto.CosignerGRPCGetLeaderResponse{Leader: string(leader)}, nil +} diff --git a/signer/proto/cosigner_grpc_server.pb.go b/signer/proto/cosigner_grpc_server.pb.go index a49aaada..f8836e28 100644 --- a/signer/proto/cosigner_grpc_server.pb.go +++ b/signer/proto/cosigner_grpc_server.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.27.1 -// protoc v3.18.1 +// protoc-gen-go v1.28.1 +// protoc v3.21.6 // source: signer/proto/cosigner_grpc_server.proto package proto @@ -673,6 +673,91 @@ func (x *CosignerGRPCTransferLeadershipResponse) GetLeaderAddress() string { return "" } +type CosignerGRPCGetLeaderRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *CosignerGRPCGetLeaderRequest) Reset() { + *x = CosignerGRPCGetLeaderRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_signer_proto_cosigner_grpc_server_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CosignerGRPCGetLeaderRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CosignerGRPCGetLeaderRequest) ProtoMessage() {} + +func (x *CosignerGRPCGetLeaderRequest) ProtoReflect() protoreflect.Message { + mi := &file_signer_proto_cosigner_grpc_server_proto_msgTypes[11] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CosignerGRPCGetLeaderRequest.ProtoReflect.Descriptor instead. +func (*CosignerGRPCGetLeaderRequest) Descriptor() ([]byte, []int) { + return file_signer_proto_cosigner_grpc_server_proto_rawDescGZIP(), []int{11} +} + +type CosignerGRPCGetLeaderResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Leader string `protobuf:"bytes,1,opt,name=leader,proto3" json:"leader,omitempty"` +} + +func (x *CosignerGRPCGetLeaderResponse) Reset() { + *x = CosignerGRPCGetLeaderResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_signer_proto_cosigner_grpc_server_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CosignerGRPCGetLeaderResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CosignerGRPCGetLeaderResponse) ProtoMessage() {} + +func (x *CosignerGRPCGetLeaderResponse) ProtoReflect() protoreflect.Message { + mi := &file_signer_proto_cosigner_grpc_server_proto_msgTypes[12] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CosignerGRPCGetLeaderResponse.ProtoReflect.Descriptor instead. +func (*CosignerGRPCGetLeaderResponse) Descriptor() ([]byte, []int) { + return file_signer_proto_cosigner_grpc_server_proto_rawDescGZIP(), []int{12} +} + +func (x *CosignerGRPCGetLeaderResponse) GetLeader() string { + if x != nil { + return x.Leader + } + return "" +} + var File_signer_proto_cosigner_grpc_server_proto protoreflect.FileDescriptor var file_signer_proto_cosigner_grpc_server_proto_rawDesc = []byte{ @@ -763,43 +848,54 @@ var file_signer_proto_cosigner_grpc_server_proto_rawDesc = []byte{ 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x49, 0x44, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x49, 0x44, 0x12, 0x24, 0x0a, 0x0d, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x41, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0d, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x41, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x32, 0xfc, - 0x03, 0x0a, 0x0c, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x12, - 0x58, 0x0a, 0x09, 0x53, 0x69, 0x67, 0x6e, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x12, 0x23, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, - 0x43, 0x53, 0x69, 0x67, 0x6e, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x1a, 0x24, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, - 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x53, 0x69, 0x67, 0x6e, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x97, 0x01, 0x0a, 0x1e, 0x53, 0x65, - 0x74, 0x45, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x72, 0x61, 0x6c, 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, - 0x50, 0x61, 0x72, 0x74, 0x73, 0x41, 0x6e, 0x64, 0x53, 0x69, 0x67, 0x6e, 0x12, 0x38, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, - 0x43, 0x53, 0x65, 0x74, 0x45, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x72, 0x61, 0x6c, 0x53, 0x65, 0x63, - 0x72, 0x65, 0x74, 0x50, 0x61, 0x72, 0x74, 0x73, 0x41, 0x6e, 0x64, 0x53, 0x69, 0x67, 0x6e, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x39, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, - 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x53, 0x65, 0x74, 0x45, 0x70, - 0x68, 0x65, 0x6d, 0x65, 0x72, 0x61, 0x6c, 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x50, 0x61, 0x72, - 0x74, 0x73, 0x41, 0x6e, 0x64, 0x53, 0x69, 0x67, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x22, 0x00, 0x12, 0x82, 0x01, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x45, 0x70, 0x68, 0x65, 0x6d, - 0x65, 0x72, 0x61, 0x6c, 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x50, 0x61, 0x72, 0x74, 0x73, 0x12, - 0x31, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, - 0x47, 0x52, 0x50, 0x43, 0x47, 0x65, 0x74, 0x45, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x72, 0x61, 0x6c, - 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x50, 0x61, 0x72, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x1a, 0x32, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, - 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x47, 0x65, 0x74, 0x45, 0x70, 0x68, 0x65, 0x6d, 0x65, - 0x72, 0x61, 0x6c, 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x50, 0x61, 0x72, 0x74, 0x73, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x73, 0x0a, 0x12, 0x54, 0x72, 0x61, 0x6e, - 0x73, 0x66, 0x65, 0x72, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x73, 0x68, 0x69, 0x70, 0x12, 0x2c, - 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, - 0x52, 0x50, 0x43, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x66, 0x65, 0x72, 0x4c, 0x65, 0x61, 0x64, 0x65, - 0x72, 0x73, 0x68, 0x69, 0x70, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2d, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, - 0x43, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x66, 0x65, 0x72, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x73, - 0x68, 0x69, 0x70, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x36, 0x5a, - 0x34, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x74, 0x72, 0x61, - 0x6e, 0x67, 0x65, 0x6c, 0x6f, 0x76, 0x65, 0x2d, 0x76, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x65, 0x73, - 0x2f, 0x68, 0x6f, 0x72, 0x63, 0x72, 0x75, 0x78, 0x2f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x2f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x0d, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x41, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x22, 0x1e, + 0x0a, 0x1c, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x47, 0x65, + 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x37, + 0x0a, 0x1d, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x47, 0x65, + 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x32, 0xd6, 0x04, 0x0a, 0x0c, 0x43, 0x6f, 0x73, 0x69, + 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x12, 0x58, 0x0a, 0x09, 0x53, 0x69, 0x67, 0x6e, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x12, 0x23, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, + 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x53, 0x69, 0x67, 0x6e, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x53, + 0x69, 0x67, 0x6e, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x22, 0x00, 0x12, 0x97, 0x01, 0x0a, 0x1e, 0x53, 0x65, 0x74, 0x45, 0x70, 0x68, 0x65, 0x6d, 0x65, + 0x72, 0x61, 0x6c, 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x50, 0x61, 0x72, 0x74, 0x73, 0x41, 0x6e, + 0x64, 0x53, 0x69, 0x67, 0x6e, 0x12, 0x38, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, + 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x53, 0x65, 0x74, 0x45, 0x70, 0x68, + 0x65, 0x6d, 0x65, 0x72, 0x61, 0x6c, 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x50, 0x61, 0x72, 0x74, + 0x73, 0x41, 0x6e, 0x64, 0x53, 0x69, 0x67, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, + 0x39, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, + 0x47, 0x52, 0x50, 0x43, 0x53, 0x65, 0x74, 0x45, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x72, 0x61, 0x6c, + 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x50, 0x61, 0x72, 0x74, 0x73, 0x41, 0x6e, 0x64, 0x53, 0x69, + 0x67, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x82, 0x01, 0x0a, + 0x17, 0x47, 0x65, 0x74, 0x45, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x72, 0x61, 0x6c, 0x53, 0x65, 0x63, + 0x72, 0x65, 0x74, 0x50, 0x61, 0x72, 0x74, 0x73, 0x12, 0x31, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x47, 0x65, 0x74, + 0x45, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x72, 0x61, 0x6c, 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x50, + 0x61, 0x72, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x32, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, + 0x47, 0x65, 0x74, 0x45, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x72, 0x61, 0x6c, 0x53, 0x65, 0x63, 0x72, + 0x65, 0x74, 0x50, 0x61, 0x72, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, + 0x00, 0x12, 0x73, 0x0a, 0x12, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x66, 0x65, 0x72, 0x4c, 0x65, 0x61, + 0x64, 0x65, 0x72, 0x73, 0x68, 0x69, 0x70, 0x12, 0x2c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x54, 0x72, 0x61, 0x6e, + 0x73, 0x66, 0x65, 0x72, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x73, 0x68, 0x69, 0x70, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2d, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, + 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x66, + 0x65, 0x72, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x73, 0x68, 0x69, 0x70, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x58, 0x0a, 0x09, 0x47, 0x65, 0x74, 0x4c, 0x65, 0x61, + 0x64, 0x65, 0x72, 0x12, 0x23, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x73, 0x69, + 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x47, 0x65, 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, + 0x72, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2e, 0x43, 0x6f, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x72, 0x47, 0x52, 0x50, 0x43, 0x47, 0x65, 0x74, + 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, + 0x42, 0x36, 0x5a, 0x34, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, + 0x74, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x6c, 0x6f, 0x76, 0x65, 0x2d, 0x76, 0x65, 0x6e, 0x74, 0x75, + 0x72, 0x65, 0x73, 0x2f, 0x68, 0x6f, 0x72, 0x63, 0x72, 0x75, 0x78, 0x2f, 0x73, 0x69, 0x67, 0x6e, + 0x65, 0x72, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -814,7 +910,7 @@ func file_signer_proto_cosigner_grpc_server_proto_rawDescGZIP() []byte { return file_signer_proto_cosigner_grpc_server_proto_rawDescData } -var file_signer_proto_cosigner_grpc_server_proto_msgTypes = make([]protoimpl.MessageInfo, 11) +var file_signer_proto_cosigner_grpc_server_proto_msgTypes = make([]protoimpl.MessageInfo, 13) var file_signer_proto_cosigner_grpc_server_proto_goTypes = []interface{}{ (*Block)(nil), // 0: proto.Block (*CosignerGRPCSignBlockRequest)(nil), // 1: proto.CosignerGRPCSignBlockRequest @@ -827,6 +923,8 @@ var file_signer_proto_cosigner_grpc_server_proto_goTypes = []interface{}{ (*CosignerGRPCGetEphemeralSecretPartsResponse)(nil), // 8: proto.CosignerGRPCGetEphemeralSecretPartsResponse (*CosignerGRPCTransferLeadershipRequest)(nil), // 9: proto.CosignerGRPCTransferLeadershipRequest (*CosignerGRPCTransferLeadershipResponse)(nil), // 10: proto.CosignerGRPCTransferLeadershipResponse + (*CosignerGRPCGetLeaderRequest)(nil), // 11: proto.CosignerGRPCGetLeaderRequest + (*CosignerGRPCGetLeaderResponse)(nil), // 12: proto.CosignerGRPCGetLeaderResponse } var file_signer_proto_cosigner_grpc_server_proto_depIdxs = []int32{ 0, // 0: proto.CosignerGRPCSignBlockRequest.block:type_name -> proto.Block @@ -838,12 +936,14 @@ var file_signer_proto_cosigner_grpc_server_proto_depIdxs = []int32{ 5, // 6: proto.CosignerGRPC.SetEphemeralSecretPartsAndSign:input_type -> proto.CosignerGRPCSetEphemeralSecretPartsAndSignRequest 7, // 7: proto.CosignerGRPC.GetEphemeralSecretParts:input_type -> proto.CosignerGRPCGetEphemeralSecretPartsRequest 9, // 8: proto.CosignerGRPC.TransferLeadership:input_type -> proto.CosignerGRPCTransferLeadershipRequest - 2, // 9: proto.CosignerGRPC.SignBlock:output_type -> proto.CosignerGRPCSignBlockResponse - 6, // 10: proto.CosignerGRPC.SetEphemeralSecretPartsAndSign:output_type -> proto.CosignerGRPCSetEphemeralSecretPartsAndSignResponse - 8, // 11: proto.CosignerGRPC.GetEphemeralSecretParts:output_type -> proto.CosignerGRPCGetEphemeralSecretPartsResponse - 10, // 12: proto.CosignerGRPC.TransferLeadership:output_type -> proto.CosignerGRPCTransferLeadershipResponse - 9, // [9:13] is the sub-list for method output_type - 5, // [5:9] is the sub-list for method input_type + 11, // 9: proto.CosignerGRPC.GetLeader:input_type -> proto.CosignerGRPCGetLeaderRequest + 2, // 10: proto.CosignerGRPC.SignBlock:output_type -> proto.CosignerGRPCSignBlockResponse + 6, // 11: proto.CosignerGRPC.SetEphemeralSecretPartsAndSign:output_type -> proto.CosignerGRPCSetEphemeralSecretPartsAndSignResponse + 8, // 12: proto.CosignerGRPC.GetEphemeralSecretParts:output_type -> proto.CosignerGRPCGetEphemeralSecretPartsResponse + 10, // 13: proto.CosignerGRPC.TransferLeadership:output_type -> proto.CosignerGRPCTransferLeadershipResponse + 12, // 14: proto.CosignerGRPC.GetLeader:output_type -> proto.CosignerGRPCGetLeaderResponse + 10, // [10:15] is the sub-list for method output_type + 5, // [5:10] is the sub-list for method input_type 5, // [5:5] is the sub-list for extension type_name 5, // [5:5] is the sub-list for extension extendee 0, // [0:5] is the sub-list for field type_name @@ -987,6 +1087,30 @@ func file_signer_proto_cosigner_grpc_server_proto_init() { return nil } } + file_signer_proto_cosigner_grpc_server_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CosignerGRPCGetLeaderRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_signer_proto_cosigner_grpc_server_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CosignerGRPCGetLeaderResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } type x struct{} out := protoimpl.TypeBuilder{ @@ -994,7 +1118,7 @@ func file_signer_proto_cosigner_grpc_server_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_signer_proto_cosigner_grpc_server_proto_rawDesc, NumEnums: 0, - NumMessages: 11, + NumMessages: 13, NumExtensions: 0, NumServices: 1, }, diff --git a/signer/proto/cosigner_grpc_server.proto b/signer/proto/cosigner_grpc_server.proto index d7c39df2..c2fe1af3 100644 --- a/signer/proto/cosigner_grpc_server.proto +++ b/signer/proto/cosigner_grpc_server.proto @@ -9,6 +9,7 @@ service CosignerGRPC { rpc SetEphemeralSecretPartsAndSign (CosignerGRPCSetEphemeralSecretPartsAndSignRequest) returns (CosignerGRPCSetEphemeralSecretPartsAndSignResponse) {} rpc GetEphemeralSecretParts (CosignerGRPCGetEphemeralSecretPartsRequest) returns (CosignerGRPCGetEphemeralSecretPartsResponse) {} rpc TransferLeadership (CosignerGRPCTransferLeadershipRequest) returns (CosignerGRPCTransferLeadershipResponse) {} + rpc GetLeader (CosignerGRPCGetLeaderRequest) returns (CosignerGRPCGetLeaderResponse) {} } message Block { @@ -70,4 +71,10 @@ message CosignerGRPCTransferLeadershipRequest { message CosignerGRPCTransferLeadershipResponse { string leaderID = 1; string leaderAddress = 2; -} \ No newline at end of file +} + +message CosignerGRPCGetLeaderRequest {} + +message CosignerGRPCGetLeaderResponse { + string leader = 1; +} diff --git a/signer/proto/cosigner_grpc_server_grpc.pb.go b/signer/proto/cosigner_grpc_server_grpc.pb.go index b79d09b0..e8889725 100644 --- a/signer/proto/cosigner_grpc_server_grpc.pb.go +++ b/signer/proto/cosigner_grpc_server_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.1.0 -// - protoc v3.18.1 +// - protoc-gen-go-grpc v1.2.0 +// - protoc v3.21.6 // source: signer/proto/cosigner_grpc_server.proto package proto @@ -26,6 +26,7 @@ type CosignerGRPCClient interface { SetEphemeralSecretPartsAndSign(ctx context.Context, in *CosignerGRPCSetEphemeralSecretPartsAndSignRequest, opts ...grpc.CallOption) (*CosignerGRPCSetEphemeralSecretPartsAndSignResponse, error) GetEphemeralSecretParts(ctx context.Context, in *CosignerGRPCGetEphemeralSecretPartsRequest, opts ...grpc.CallOption) (*CosignerGRPCGetEphemeralSecretPartsResponse, error) TransferLeadership(ctx context.Context, in *CosignerGRPCTransferLeadershipRequest, opts ...grpc.CallOption) (*CosignerGRPCTransferLeadershipResponse, error) + GetLeader(ctx context.Context, in *CosignerGRPCGetLeaderRequest, opts ...grpc.CallOption) (*CosignerGRPCGetLeaderResponse, error) } type cosignerGRPCClient struct { @@ -72,6 +73,15 @@ func (c *cosignerGRPCClient) TransferLeadership(ctx context.Context, in *Cosigne return out, nil } +func (c *cosignerGRPCClient) GetLeader(ctx context.Context, in *CosignerGRPCGetLeaderRequest, opts ...grpc.CallOption) (*CosignerGRPCGetLeaderResponse, error) { + out := new(CosignerGRPCGetLeaderResponse) + err := c.cc.Invoke(ctx, "/proto.CosignerGRPC/GetLeader", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + // CosignerGRPCServer is the server API for CosignerGRPC service. // All implementations must embed UnimplementedCosignerGRPCServer // for forward compatibility @@ -80,6 +90,7 @@ type CosignerGRPCServer interface { SetEphemeralSecretPartsAndSign(context.Context, *CosignerGRPCSetEphemeralSecretPartsAndSignRequest) (*CosignerGRPCSetEphemeralSecretPartsAndSignResponse, error) GetEphemeralSecretParts(context.Context, *CosignerGRPCGetEphemeralSecretPartsRequest) (*CosignerGRPCGetEphemeralSecretPartsResponse, error) TransferLeadership(context.Context, *CosignerGRPCTransferLeadershipRequest) (*CosignerGRPCTransferLeadershipResponse, error) + GetLeader(context.Context, *CosignerGRPCGetLeaderRequest) (*CosignerGRPCGetLeaderResponse, error) mustEmbedUnimplementedCosignerGRPCServer() } @@ -99,6 +110,9 @@ func (UnimplementedCosignerGRPCServer) GetEphemeralSecretParts(context.Context, func (UnimplementedCosignerGRPCServer) TransferLeadership(context.Context, *CosignerGRPCTransferLeadershipRequest) (*CosignerGRPCTransferLeadershipResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method TransferLeadership not implemented") } +func (UnimplementedCosignerGRPCServer) GetLeader(context.Context, *CosignerGRPCGetLeaderRequest) (*CosignerGRPCGetLeaderResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetLeader not implemented") +} func (UnimplementedCosignerGRPCServer) mustEmbedUnimplementedCosignerGRPCServer() {} // UnsafeCosignerGRPCServer may be embedded to opt out of forward compatibility for this service. @@ -184,6 +198,24 @@ func _CosignerGRPC_TransferLeadership_Handler(srv interface{}, ctx context.Conte return interceptor(ctx, in, info, handler) } +func _CosignerGRPC_GetLeader_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CosignerGRPCGetLeaderRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CosignerGRPCServer).GetLeader(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/proto.CosignerGRPC/GetLeader", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CosignerGRPCServer).GetLeader(ctx, req.(*CosignerGRPCGetLeaderRequest)) + } + return interceptor(ctx, in, info, handler) +} + // CosignerGRPC_ServiceDesc is the grpc.ServiceDesc for CosignerGRPC service. // It's only intended for direct use with grpc.RegisterService, // and not to be introspected or modified (even as a copy) @@ -207,6 +239,10 @@ var CosignerGRPC_ServiceDesc = grpc.ServiceDesc{ MethodName: "TransferLeadership", Handler: _CosignerGRPC_TransferLeadership_Handler, }, + { + MethodName: "GetLeader", + Handler: _CosignerGRPC_GetLeader_Handler, + }, }, Streams: []grpc.StreamDesc{}, Metadata: "signer/proto/cosigner_grpc_server.proto", diff --git a/test/horcrux_test.go b/test/horcrux_test.go index b4b66a77..2671408d 100644 --- a/test/horcrux_test.go +++ b/test/horcrux_test.go @@ -27,17 +27,18 @@ func Test4Of7SignerTwoSentries(t *testing.T) { chain := getSimdChain() // setup a horcrux validator for us - ourValidator, err := NewHorcruxValidator(t, pool, home, chainID, 0, totalSentries, totalSigners, threshold, chain) + ourValidator, err := NewHorcruxValidator(t, pool, network, home, + chainID, 0, totalSentries, totalSigners, threshold, chain) require.NoError(t, err) // other vals are single node (non-horcrux) - otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, t) + otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, network, t) // start our validator's horcrux cluster - require.NoError(t, ourValidator.StartHorcruxCluster(ctx, network, sentriesPerSigner)) + require.NoError(t, ourValidator.StartHorcruxCluster(ctx, sentriesPerSigner)) // assemble and combine gentx to get genesis file, configure peering between sentries, then start the chain - require.NoError(t, Genesis(t, ctx, network, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) + require.NoError(t, Genesis(t, ctx, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) // Wait for all nodes to get to given block height require.NoError(t, GetAllNodes(otherValidatorNodes, ourValidator.Sentries).WaitForHeight(5)) @@ -61,17 +62,18 @@ func Test2Of3SignerTwoSentries(t *testing.T) { chain := getSimdChain() // setup a horcrux validator for us - ourValidator, err := NewHorcruxValidator(t, pool, home, chainID, 0, totalSentries, totalSigners, threshold, chain) + ourValidator, err := NewHorcruxValidator(t, pool, network, home, + chainID, 0, totalSentries, totalSigners, threshold, chain) require.NoError(t, err) // remaining validators are single-node non-horcrux - otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, t) + otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, network, t) // start our validator's horcrux cluster - require.NoError(t, ourValidator.StartHorcruxCluster(ctx, network, sentriesPerSigner)) + require.NoError(t, ourValidator.StartHorcruxCluster(ctx, sentriesPerSigner)) // assemble and combine gentx to get genesis file, configure peering between sentries, then start the chain - require.NoError(t, Genesis(t, ctx, network, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) + require.NoError(t, Genesis(t, ctx, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) // Wait for all nodes to get to given block height require.NoError(t, GetAllNodes(otherValidatorNodes, ourValidator.Sentries).WaitForHeight(5)) @@ -95,17 +97,18 @@ func Test2Of3SignerUniqueSentry(t *testing.T) { chain := getSimdChain() // setup a horcrux validator for us - ourValidator, err := NewHorcruxValidator(t, pool, home, chainID, 0, totalSentries, totalSigners, threshold, chain) + ourValidator, err := NewHorcruxValidator(t, pool, network, home, + chainID, 0, totalSentries, totalSigners, threshold, chain) require.NoError(t, err) // remaining validators are single-node non-horcrux - otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, t) + otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, network, t) // start our validator's horcrux cluster - require.NoError(t, ourValidator.StartHorcruxCluster(ctx, network, sentriesPerSigner)) + require.NoError(t, ourValidator.StartHorcruxCluster(ctx, sentriesPerSigner)) // assemble and combine gentx to get genesis file, configure peering between sentries, then start the chain - require.NoError(t, Genesis(t, ctx, network, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) + require.NoError(t, Genesis(t, ctx, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) // Wait for all nodes to get to given block height require.NoError(t, GetAllNodes(otherValidatorNodes, ourValidator.Sentries).WaitForHeight(5)) @@ -127,13 +130,13 @@ func TestSingleSignerTwoSentries(t *testing.T) { chain := getSimdChain() // get total sentries nodes for our validator - ourValidatorNodes := GetValidators(0, 1, totalSentries, home, chainID, chain, pool, t) + ourValidatorNodes := GetValidators(0, 1, totalSentries, home, chainID, chain, pool, network, t) // using the first node for account and consensus key to create gentx ourValidatorAccountNode := ourValidatorNodes[0] // other vals are single node (non-horcrux) - otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, t) + otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, network, t) // nodes that will be used for account and consensus key to create gentx validatorAccountNodes := GetAllNodes([]*TestNode{ourValidatorAccountNode}, otherValidatorNodes) @@ -144,10 +147,10 @@ func TestSingleSignerTwoSentries(t *testing.T) { sentries := []*TestNode{ourValidatorNodes[1]} // initialize horcrux signer nodes for our validator - signers := MakeTestSigners(0, totalSigners, home, pool, t) + signers := MakeTestSigners(0, totalSigners, home, pool, network, t) // assemble and combine gentx to get genesis file, configure peering between sentries, then start the chain - require.NoError(t, Genesis(t, ctx, network, chain, validatorAccountNodes, sentries, []*TestValidator{})) + require.NoError(t, Genesis(t, ctx, chain, validatorAccountNodes, sentries, []*TestValidator{})) allNodes := GetAllNodes(validatorAccountNodes, sentries) @@ -155,7 +158,7 @@ func TestSingleSignerTwoSentries(t *testing.T) { require.NoError(t, allNodes.WaitForHeight(5)) // start remote signer - require.NoError(t, StartSingleSignerContainers(signers, ourValidatorAccountNode, ourValidatorNodes, network)) + require.NoError(t, StartSingleSignerContainers(signers, ourValidatorAccountNode, ourValidatorNodes)) // Stop the validator node and sentry node before upgrading to horcrux t.Logf("{%s} -> Stopping Node...", ourValidatorAccountNode.Name()) @@ -178,11 +181,8 @@ func TestSingleSignerTwoSentries(t *testing.T) { t.Logf("{%s} -> Restarting Node...", ourValidatorAccountNode.Name()) t.Logf("{%s} -> Restarting Node...", sentries[0].Name()) - require.NoError(t, ourValidatorAccountNode.CreateNodeContainer(network.ID)) - require.NoError(t, sentries[0].CreateNodeContainer(network.ID)) - - require.NoError(t, ourValidatorAccountNode.StartContainer(ctx)) - require.NoError(t, sentries[0].StartContainer(ctx)) + require.NoError(t, ourValidatorAccountNode.Start(ctx, nil)) + require.NoError(t, sentries[0].Start(ctx, nil)) // wait for our validator and all sentries to be reachable hosts := ourValidatorAccountNode.GetHosts() @@ -210,13 +210,13 @@ func TestUpgradeValidatorToHorcrux(t *testing.T) { chain := getSimdChain() // initially all vals are single node (non-horcrux) - validators := GetValidators(0, totalValidators, 1, home, chainID, chain, pool, t) + validators := GetValidators(0, totalValidators, 1, home, chainID, chain, pool, network, t) // for this test we will upgrade the first validator to horcrux ourValidatorNode := validators[0] // assemble and combine gentx to get genesis file, configure peering between sentries, then start the chain - require.NoError(t, Genesis(t, ctx, network, chain, validators, []*TestNode{}, []*TestValidator{})) + require.NoError(t, Genesis(t, ctx, chain, validators, []*TestNode{}, []*TestValidator{})) // Wait for all validators to get to given block height require.NoError(t, validators.WaitForHeight(5)) @@ -226,7 +226,7 @@ func TestUpgradeValidatorToHorcrux(t *testing.T) { require.NoError(t, err) // create horcrux validator with same consensus key - ourValidatorUpgradedToHorcrux, err := NewHorcruxValidatorWithPrivValKey(t, pool, home, + ourValidatorUpgradedToHorcrux, err := NewHorcruxValidatorWithPrivValKey(t, pool, network, home, chainID, 0, 0, totalSigners, threshold, getSimdChain(), ourValidatorPrivValKey) require.NoError(t, err) @@ -247,11 +247,10 @@ func TestUpgradeValidatorToHorcrux(t *testing.T) { ourValidatorNode.GenNewPrivVal() // start our new validator - require.NoError(t, ourValidatorUpgradedToHorcrux.StartHorcruxCluster(ctx, network, sentriesPerSigner)) + require.NoError(t, ourValidatorUpgradedToHorcrux.StartHorcruxCluster(ctx, sentriesPerSigner)) t.Logf("{%s} -> Restarting Node...", ourValidatorNode.Name()) - require.NoError(t, ourValidatorNode.CreateNodeContainer(network.ID)) - require.NoError(t, ourValidatorNode.StartContainer(ctx)) + require.NoError(t, ourValidatorNode.Start(ctx, nil)) // wait for validator to be reachable require.NoError(t, ourValidatorNode.GetHosts().WaitForAllToStart(t, 10)) @@ -272,24 +271,22 @@ func TestDownedSigners2of3(t *testing.T) { chain := getSimdChain() // setup a horcrux validator for us - ourValidator, err := NewHorcruxValidator(t, pool, home, chainID, 0, totalSentries, totalSigners, threshold, chain) + ourValidator, err := NewHorcruxValidator(t, pool, network, home, + chainID, 0, totalSentries, totalSigners, threshold, chain) require.NoError(t, err) // remaining validators are single-node non-horcrux - otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, t) + otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, network, t) // start our validator's horcrux cluster - require.NoError(t, ourValidator.StartHorcruxCluster(ctx, network, sentriesPerSigner)) + require.NoError(t, ourValidator.StartHorcruxCluster(ctx, sentriesPerSigner)) // assemble and combine gentx to get genesis file, configure peering between sentries, then start the chain - require.NoError(t, Genesis(t, ctx, network, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) + require.NoError(t, Genesis(t, ctx, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) // Wait for all nodes to get to given block height require.NoError(t, GetAllNodes(otherValidatorNodes, ourValidator.Sentries).WaitForHeight(5)) - t.Logf("{%s} -> Checking that slashing has not occurred...", ourValidator.Name()) - require.NoError(t, ourValidator.EnsureNotSlashed()) - // Test taking down each node in the signer cluster for a period of time for _, signer := range ourValidator.Signers { t.Logf("{%s} -> Stopping signer...", signer.Name()) @@ -299,7 +296,7 @@ func TestDownedSigners2of3(t *testing.T) { require.NoError(t, ourValidator.WaitForConsecutiveBlocks(10)) t.Logf("{%s} -> Restarting signer...", signer.Name()) - require.NoError(t, signer.CreateCosignerContainer(network.ID)) + require.NoError(t, signer.CreateCosignerContainer()) require.NoError(t, signer.StartContainer()) require.NoError(t, signer.GetHosts().WaitForAllToStart(t, 10)) // Wait to ensure signer is back up require.NoError(t, ourValidator.WaitForConsecutiveBlocks(10)) @@ -308,6 +305,56 @@ func TestDownedSigners2of3(t *testing.T) { require.NoError(t, ourValidator.EnsureNotSlashed()) } +func TestLeaderElection2of3(t *testing.T) { + t.Parallel() + ctx, home, pool, network := SetupTestRun(t) + + const totalValidators = 4 + const totalSigners = 3 + const totalSentries = 2 + const threshold = 2 + const sentriesPerSigner = 3 + chain := getSimdChain() + + // setup a horcrux validator for us + ourValidator, err := NewHorcruxValidator(t, pool, network, home, + chainID, 0, totalSentries, totalSigners, threshold, chain) + require.NoError(t, err) + + // remaining validators are single-node non-horcrux + otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, network, t) + + // start our validator's horcrux cluster + require.NoError(t, ourValidator.StartHorcruxCluster(ctx, sentriesPerSigner)) + + // assemble and combine gentx to get genesis file, configure peering between sentries, then start the chain + require.NoError(t, Genesis(t, ctx, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) + + // Wait for all nodes to get to given block height + require.NoError(t, GetAllNodes(otherValidatorNodes, ourValidator.Sentries).WaitForHeight(5)) + + // Test electing each node in the signer cluster for a period of time + for _, signer := range ourValidator.Signers { + t.Logf("{%s} -> Electing leader...", signer.Name()) + err := signer.TransferLeadership(ctx, signer.Index) + require.NoError(t, err, "failed to transfer leadership to %d", signer.Index) + + t.Logf("{%s} -> Waiting for signed blocks with signer as leader {%s}", ourValidator.Name(), signer.Name()) + require.NoError(t, ourValidator.WaitForConsecutiveBlocks(2)) + + // Make sure all cosigners have the same leader + for _, s := range ourValidator.Signers { + leader, err := s.GetLeader(ctx) + require.NoError(t, err, "failed to get leader from signer: %s", s.Name()) + require.Equal(t, signer.Name()+":"+signerPort, leader) + } + + require.NoError(t, ourValidator.WaitForConsecutiveBlocks(8)) + } + t.Logf("{%s} -> Checking that slashing has not occurred...", ourValidator.Name()) + require.NoError(t, ourValidator.EnsureNotSlashed()) +} + func TestDownedSigners3of5(t *testing.T) { t.Parallel() ctx, home, pool, network := SetupTestRun(t) @@ -320,24 +367,22 @@ func TestDownedSigners3of5(t *testing.T) { chain := getSimdChain() // setup a horcrux validator for us - ourValidator, err := NewHorcruxValidator(t, pool, home, chainID, 0, totalSentries, totalSigners, threshold, chain) + ourValidator, err := NewHorcruxValidator(t, pool, network, home, + chainID, 0, totalSentries, totalSigners, threshold, chain) require.NoError(t, err) // remaining validators are single-node non-horcrux - otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, t) + otherValidatorNodes := GetValidators(1, totalValidators-1, 1, home, chainID, chain, pool, network, t) // start our validator's horcrux cluster - require.NoError(t, ourValidator.StartHorcruxCluster(ctx, network, sentriesPerSigner)) + require.NoError(t, ourValidator.StartHorcruxCluster(ctx, sentriesPerSigner)) // assemble and combine gentx to get genesis file, configure peering between sentries, then start the chain - require.NoError(t, Genesis(t, ctx, network, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) + require.NoError(t, Genesis(t, ctx, chain, otherValidatorNodes, []*TestNode{}, []*TestValidator{ourValidator})) // Wait for all nodes to get to given block height require.NoError(t, GetAllNodes(otherValidatorNodes, ourValidator.Sentries).WaitForHeight(5)) - t.Logf("{%s} -> Checking that slashing has not occurred...", ourValidator.Name()) - require.NoError(t, ourValidator.EnsureNotSlashed()) - // Test taking down 2 nodes at a time in the signer cluster for a period of time for i := 0; i < len(ourValidator.Signers); i++ { signer1 := ourValidator.Signers[i] @@ -362,7 +407,7 @@ func TestDownedSigners3of5(t *testing.T) { require.NoError(t, ourValidator.WaitForConsecutiveBlocks(10)) t.Logf("{%s} -> Restarting signer...", signer1.Name()) - require.NoError(t, signer1.CreateCosignerContainer(network.ID)) + require.NoError(t, signer1.CreateCosignerContainer()) require.NoError(t, signer1.StartContainer()) require.NoError(t, signer1.GetHosts().WaitForAllToStart(t, 10)) // Wait to ensure signer is back up require.NoError(t, ourValidator.WaitForConsecutiveBlocks(10)) @@ -396,20 +441,20 @@ func TestChainPureHorcrux(t *testing.T) { // start horcrux cluster for each validator for i := 0; i < totalValidators; i++ { - validator, err := NewHorcruxValidator(t, pool, home, chainID, i, + validator, err := NewHorcruxValidator(t, pool, network, home, chainID, i, sentriesPerValidator, signersPerValidator, threshold, chain) require.NoError(t, err) validators = append(validators, validator) allNodes = append(allNodes, validator.Sentries...) startValidatorsErrGroup.Go(func() error { - return validator.StartHorcruxCluster(ctx, network, sentriesPerSigner) + return validator.StartHorcruxCluster(ctx, sentriesPerSigner) }) } require.NoError(t, startValidatorsErrGroup.Wait()) // assemble and combine gentx to get genesis file, configure peering between sentries, then start the chain - require.NoError(t, Genesis(t, ctx, network, chain, []*TestNode{}, []*TestNode{}, validators)) + require.NoError(t, Genesis(t, ctx, chain, []*TestNode{}, []*TestNode{}, validators)) require.NoError(t, allNodes.WaitForHeight(5)) diff --git a/test/test_node.go b/test/test_node.go index a937c624..6d5cfa54 100644 --- a/test/test_node.go +++ b/test/test_node.go @@ -96,7 +96,8 @@ func getSentinelChain(ctx context.Context, version string) *ChainType { return err } command := []string{"sed", "-i", fmt.Sprintf(`s/"approve_by": ""/"approve_by": "%s"/g`, address), genesisJSON} - return handleNodeJobError(tn.NodeJob(ctx, command)) + _, _, err = tn.Exec(ctx, command) + return err } return getHeighlinerChain("sentinel", version, "sentinelhub", "sent", true, sentinelGenesisJSONModification) @@ -127,6 +128,7 @@ type TestNode struct { GenesisCoins string Validator bool Pool *dockertest.Pool + networkID string Client rpcclient.Client Container *docker.Container tl TestLogger @@ -162,6 +164,7 @@ func MakeTestNodes( chainID string, chainType *ChainType, pool *dockertest.Pool, + networkID string, tl TestLogger, ) (out TestNodes) { err := pool.Client.PullImage(docker.PullImageOptions{ @@ -173,7 +176,7 @@ func MakeTestNodes( } for i := 0; i < count; i++ { tn := &TestNode{Home: home, Index: i, ValidatorIndex: validatorIndex, Chain: chainType, ChainID: chainID, - Pool: pool, tl: tl, ec: simapp.MakeTestEncodingConfig()} + Pool: pool, networkID: networkID, tl: tl, ec: simapp.MakeTestEncodingConfig()} tn.MkDir() out = append(out, tn) } @@ -189,10 +192,11 @@ func GetValidators( chainID string, chain *ChainType, pool *dockertest.Pool, + networkID string, t *testing.T, ) (out TestNodes) { for i := startingValidatorIndex; i < startingValidatorIndex+count; i++ { - out = append(out, MakeTestNodes(i, sentriesPerValidator, home, chainID, chain, pool, t)...) + out = append(out, MakeTestNodes(i, sentriesPerValidator, home, chainID, chain, pool, networkID, t)...) } return } @@ -516,9 +520,9 @@ func stdconfigchanges(cfg *tmconfig.Config, peers string, enablePrivVal bool) { cfg.P2P.PersistentPeers = peers } -// NodeJob run a container for a specific job and block until the container exits +// Exec runs a container for a specific job and block until the container exits // NOTE: on job containers generate random name -func (tn *TestNode) NodeJob(ctx context.Context, cmd []string) (string, int, string, string, error) { +func (tn *TestNode) Exec(ctx context.Context, cmd []string) (string, string, error) { container := RandLowerCaseLetterString(10) tn.tl.Logf("{%s}[%s] -> '%s'", tn.Name(), container, strings.Join(cmd, " ")) cont, err := tn.Pool.Client.CreateContainer(docker.CreateContainerOptions{ @@ -543,76 +547,136 @@ func (tn *TestNode) NodeJob(ctx context.Context, cmd []string) (string, int, str Context: nil, }) if err != nil { - return container, 1, "", "", err + return "", "", err } if err := tn.Pool.Client.StartContainer(cont.ID, nil); err != nil { - return container, 1, "", "", err + return "", "", err } exitCode, err := tn.Pool.Client.WaitContainerWithContext(cont.ID, ctx) - stdout := new(bytes.Buffer) - stderr := new(bytes.Buffer) + outputStream := new(bytes.Buffer) + errorStream := new(bytes.Buffer) _ = tn.Pool.Client.Logs(docker.LogsOptions{ Context: ctx, Container: cont.ID, - OutputStream: stdout, - ErrorStream: stderr, + OutputStream: outputStream, + ErrorStream: errorStream, Stdout: true, Stderr: true, Tail: "100", Follow: false, Timestamps: false, }) + stdout := outputStream.String() + stderr := errorStream.String() _ = tn.Pool.Client.RemoveContainer(docker.RemoveContainerOptions{ID: cont.ID}) - return container, exitCode, stdout.String(), stderr.String(), err + return stdout, stderr, containerExitError(container, exitCode, stdout, stderr, err) } // InitHomeFolder initializes a home folder for the given node func (tn *TestNode) InitHomeFolder(ctx context.Context) error { - command := []string{tn.Chain.Bin, "init", tn.Name(), + cmd := []string{tn.Chain.Bin, "init", tn.Name(), "--chain-id", tn.ChainID, "--home", tn.NodeHome(), } - return handleNodeJobError(tn.NodeJob(ctx, command)) + _, _, err := tn.Exec(ctx, cmd) + return err } // CreateKey creates a key in the keyring backend test for the given node func (tn *TestNode) CreateKey(ctx context.Context, name string) error { - command := []string{tn.Chain.Bin, "keys", "add", name, + cmd := []string{tn.Chain.Bin, "keys", "add", name, "--keyring-backend", "test", "--output", "json", "--home", tn.NodeHome(), } - return handleNodeJobError(tn.NodeJob(ctx, command)) + _, _, err := tn.Exec(ctx, cmd) + return err } // AddGenesisAccount adds a genesis account for each key func (tn *TestNode) AddGenesisAccount(ctx context.Context, address string) error { - command := []string{tn.Chain.Bin, "add-genesis-account", address, "1000000000000stake", + cmd := []string{tn.Chain.Bin, "add-genesis-account", address, "1000000000000stake", "--home", tn.NodeHome(), } - return handleNodeJobError(tn.NodeJob(ctx, command)) + _, _, err := tn.Exec(ctx, cmd) + return err } // Gentx generates the gentx for a given node func (tn *TestNode) Gentx(ctx context.Context, name, pubKey string) error { - command := []string{tn.Chain.Bin, "gentx", valKey, "100000000000stake", + cmd := []string{tn.Chain.Bin, "gentx", valKey, "100000000000stake", "--pubkey", pubKey, "--keyring-backend", "test", "--home", tn.NodeHome(), "--chain-id", tn.ChainID, } - return handleNodeJobError(tn.NodeJob(ctx, command)) + _, _, err := tn.Exec(ctx, cmd) + return err } // CollectGentxs runs collect gentxs on the node's home folders func (tn *TestNode) CollectGentxs(ctx context.Context) error { - command := []string{tn.Chain.Bin, "collect-gentxs", + cmd := []string{tn.Chain.Bin, "collect-gentxs", "--home", tn.NodeHome(), } - return handleNodeJobError(tn.NodeJob(ctx, command)) + _, _, err := tn.Exec(ctx, cmd) + return err } -func (tn *TestNode) CreateNodeContainer(networkID string) error { +func (tn *TestNode) Start(ctx context.Context, preStart func()) error { + // Retry loop for running container. + err := retry.Do(func() error { + // forcefully remove existing container, ignoring error + _ = tn.StopAndRemoveContainer(true) + if err := tn.createContainer(); err != nil { + return err + } + if preStart != nil { + preStart() + } + if err := tn.startContainer(ctx); err != nil { + return err + } + + for i := 0; i < 10; i++ { + container, err := tn.Pool.Client.InspectContainer(tn.Container.ID) + if err != nil { + return err + } + if !container.State.Running { + return fmt.Errorf("container is not running") + } + + ctx, cancel := context.WithTimeout(ctx, 1*time.Second) + _, err = tn.Client.Status(ctx) + cancel() + if err == nil { + return nil + } + time.Sleep(1 * time.Second) + } + + return fmt.Errorf("node is running but not responding with status") + }, retry.DelayType(retry.FixedDelay), retry.Attempts(5)) + if err != nil { + return fmt.Errorf("error starting node container after max retries: %w", err) + } + + // Retry loop for in sync with chain + return retry.Do(func() error { + stat, err := tn.Client.Status(ctx) + if err != nil { + return err + } + if stat != nil && stat.SyncInfo.CatchingUp { + return fmt.Errorf("still catching up: height(%d) catching-up(%t)", + stat.SyncInfo.LatestBlockHeight, stat.SyncInfo.CatchingUp) + } + return nil + }, retry.DelayType(retry.BackOffDelay)) +} + +func (tn *TestNode) createContainer() error { cont, err := tn.Pool.Client.CreateContainer(docker.CreateContainerOptions{ Name: tn.Name(), Config: &docker.Config{ @@ -631,7 +695,7 @@ func (tn *TestNode) CreateNodeContainer(networkID string) error { }, NetworkingConfig: &docker.NetworkingConfig{ EndpointsConfig: map[string]*docker.EndpointConfig{ - networkID: {}, + tn.networkID: {}, }, }, Context: nil, @@ -643,12 +707,14 @@ func (tn *TestNode) CreateNodeContainer(networkID string) error { return nil } -func (tn *TestNode) StopContainer() error { - return tn.Pool.Client.StopContainer(tn.Container.ID, 60) -} - +// StopAndRemoveContainer stops and removes a TestSigners docker container. +// If force is true, error for stopping container will be ignored and container +// will be forcefully removed. func (tn *TestNode) StopAndRemoveContainer(force bool) error { - if err := tn.StopContainer(); err != nil && !force { + if tn.Container == nil { + return nil + } + if err := tn.Pool.Client.StopContainer(tn.Container.ID, 60); err != nil && !force { return err } return tn.Pool.Client.RemoveContainer(docker.RemoveContainerOptions{ @@ -657,7 +723,7 @@ func (tn *TestNode) StopAndRemoveContainer(force bool) error { }) } -func (tn *TestNode) StartContainer(ctx context.Context) error { +func (tn *TestNode) startContainer(ctx context.Context) error { if err := tn.Pool.Client.StartContainer(tn.Container.ID, nil); err != nil { return err } @@ -671,25 +737,7 @@ func (tn *TestNode) StartContainer(ctx context.Context) error { port := GetHostPort(c, "26657/tcp") tn.tl.Logf("{%s} RPC => %s", tn.Name(), port) - err = tn.NewClient(fmt.Sprintf("tcp://%s", port)) - if err != nil { - return err - } - - time.Sleep(5 * time.Second) - return retry.Do(func() error { - stat, err := tn.Client.Status(ctx) - if err != nil { - // tn.t.Log(err) - return err - } - // TODO: reenable this check, having trouble with it for some reason - if stat != nil && stat.SyncInfo.CatchingUp { - return fmt.Errorf("still catching up: height(%d) catching-up(%t)", - stat.SyncInfo.LatestBlockHeight, stat.SyncInfo.CatchingUp) - } - return nil - }, retry.DelayType(retry.BackOffDelay)) + return tn.NewClient(fmt.Sprintf("tcp://%s", port)) } func (tn *TestNode) Bech32AddressForKey(keyName string) (string, error) { @@ -747,7 +795,7 @@ func (tn *TestNode) InitFullNodeFiles(ctx context.Context) error { return tn.InitHomeFolder(ctx) } -func handleNodeJobError(container string, i int, stdout string, stderr string, err error) error { +func containerExitError(container string, i int, stdout string, stderr string, err error) error { if err != nil { return fmt.Errorf("%v\n%s\n%s", err, stdout, stderr) } diff --git a/test/test_setup.go b/test/test_setup.go index aa2a68e6..96c7a161 100644 --- a/test/test_setup.go +++ b/test/test_setup.go @@ -24,17 +24,17 @@ type TestLogger interface { Logf(string, ...interface{}) } -func SetupTestRun(t *testing.T) (context.Context, string, *dockertest.Pool, *docker.Network) { +func SetupTestRun(t *testing.T) (context.Context, string, *dockertest.Pool, string) { home := t.TempDir() pool, err := dockertest.NewPool("") require.NoError(t, err) // set the test cleanup function - t.Cleanup(Cleanup(pool, t.Name(), home)) + t.Cleanup(Cleanup(pool, t, home)) // run cleanup to cleanup stale resources from any killed tests - Cleanup(pool, t.Name(), home)() + Cleanup(pool, t, home)() network, err := CreateTestNetwork(pool, fmt.Sprintf("horcrux-%s", RandLowerCaseLetterString(8)), t) require.NoError(t, err) @@ -42,14 +42,13 @@ func SetupTestRun(t *testing.T) (context.Context, string, *dockertest.Pool, *doc // build the horcrux image require.NoError(t, BuildTestSignerImage(pool)) - return context.Background(), home, pool, network + return context.Background(), home, pool, network.ID } // assemble gentx, build genesis file, configure peering, and start chain func Genesis( tl TestLogger, ctx context.Context, - net *docker.Network, chain *ChainType, nonHorcruxValidators, fullnodes []*TestNode, @@ -166,16 +165,6 @@ func Genesis( return err } - for _, n := range nodes { - n := n - eg.Go(func() error { - return n.CreateNodeContainer(net.ID) - }) - } - if err := eg.Wait(); err != nil { - return err - } - peers := nodes.PeerString() // start horcrux sentries. privval listener enabled @@ -184,8 +173,9 @@ func Genesis( s := sentry tl.Logf("{%s} => starting container...", s.Name()) eg.Go(func() error { - s.SetValidatorConfigAndPeers(peers, true) - return s.StartContainer(ctx) + return s.Start(ctx, func() { + s.SetValidatorConfigAndPeers(peers, true) + }) }) } } @@ -195,8 +185,9 @@ func Genesis( v := v tl.Logf("{%s} => starting container...", v.Name()) eg.Go(func() error { - v.SetValidatorConfigAndPeers(peers, false) - return v.StartContainer(ctx) + return v.Start(ctx, func() { + v.SetValidatorConfigAndPeers(peers, false) + }) }) } @@ -205,8 +196,9 @@ func Genesis( n := n tl.Logf("{%s} => starting container...", n.Name()) eg.Go(func() error { - n.SetValidatorConfigAndPeers(peers, false) - return n.StartContainer(ctx) + return n.Start(ctx, func() { + n.SetValidatorConfigAndPeers(peers, false) + }) }) } @@ -244,16 +236,16 @@ func CreateTestNetwork(pool *dockertest.Pool, name string, t *testing.T) (*docke } // Cleanup will clean up Docker containers, networks, and the other various config files generated in testing -func Cleanup(pool *dockertest.Pool, testName, testDir string) func() { +func Cleanup(pool *dockertest.Pool, t *testing.T, testDir string) func() { return func() { cont, _ := pool.Client.ListContainers(docker.ListContainersOptions{All: true}) ctx := context.Background() for _, c := range cont { for k, v := range c.Labels { - if k == "horcrux-test" && v == testName { + if k == "horcrux-test" && v == t.Name() { _ = pool.Client.StopContainer(c.ID, 10) - _, err := pool.Client.WaitContainerWithContext(c.ID, ctx) - if err != nil { + _, _ = pool.Client.WaitContainerWithContext(c.ID, ctx) + if t.Failed() { stdout := new(bytes.Buffer) stderr := new(bytes.Buffer) _ = pool.Client.Logs(docker.LogsOptions{ @@ -276,7 +268,7 @@ func Cleanup(pool *dockertest.Pool, testName, testDir string) func() { nets, _ := pool.Client.ListNetworks() for _, n := range nets { for k, v := range n.Labels { - if k == "horcrux-test" && v == testName { + if k == "horcrux-test" && v == t.Name() { _ = pool.Client.RemoveNetwork(n.ID) } } diff --git a/test/test_signer.go b/test/test_signer.go index e9009f60..bd77b8ed 100644 --- a/test/test_signer.go +++ b/test/test_signer.go @@ -8,18 +8,25 @@ import ( "os" "path" "path/filepath" + "strconv" "strings" + "time" "github.com/ory/dockertest" "github.com/ory/dockertest/docker" "github.com/strangelove-ventures/horcrux/signer" + "github.com/strangelove-ventures/horcrux/signer/proto" tmjson "github.com/tendermint/tendermint/libs/json" "golang.org/x/sync/errgroup" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" ) -var ( - signerPort = "2222" - signerImage = "horcrux-test" +const ( + signerPort = "2222" + signerImage = "horcrux-test" + binary = "horcrux" + signerPortDocker = signerPort + "/tcp" ) // TestSigner represents a remote signer instance @@ -28,6 +35,7 @@ type TestSigner struct { Index int ValidatorIndex int Pool *dockertest.Pool + networkID string Container *docker.Container Key signer.CosignerKey tl TestLogger @@ -62,7 +70,6 @@ func StartSingleSignerContainers( testSigners TestSigners, validator *TestNode, sentryNodes TestNodes, - network *docker.Network, ) error { eg := new(errgroup.Group) ctx := context.Background() @@ -96,7 +103,7 @@ func StartSingleSignerContainers( for _, s := range testSigners { s := s eg.Go(func() error { - return s.CreateSingleSignerContainer(network.ID) + return s.CreateSingleSignerContainer() }) } if err := eg.Wait(); err != nil { @@ -123,7 +130,6 @@ func StartCosignerContainers( sentries TestNodes, threshold, total, sentriesPerSigner int, - network *docker.Network, ) error { eg := new(errgroup.Group) ctx := context.Background() @@ -201,7 +207,7 @@ func StartCosignerContainers( for _, s := range signers { s := s eg.Go(func() error { - return s.CreateCosignerContainer(network.ID) + return s.CreateCosignerContainer() }) } err = eg.Wait() @@ -233,13 +239,21 @@ func (ts TestSigners) PeerString(skip int) string { } // MakeTestSigners creates the TestSigner objects required for bootstrapping tests -func MakeTestSigners(validatorIndex, count int, home string, pool *dockertest.Pool, tl TestLogger) (out TestSigners) { +func MakeTestSigners( + validatorIndex int, + count int, + home string, + pool *dockertest.Pool, + networkID string, + tl TestLogger, +) (out TestSigners) { for i := 0; i < count; i++ { ts := &TestSigner{ Home: home, Index: i + 1, // +1 is to ensure all Cosigner IDs end up being >0 as required in cosigner.go ValidatorIndex: validatorIndex, Pool: pool, + networkID: networkID, Container: nil, Key: signer.CosignerKey{}, tl: tl, @@ -253,7 +267,7 @@ func (ts *TestSigner) GetHosts() (out Hosts) { host := ContainerPort{ Name: ts.Name(), Container: ts.Container, - Port: docker.Port(fmt.Sprintf("%s/tcp", signerPort)), + Port: docker.Port(signerPortDocker), } out = append(out, host) return @@ -288,15 +302,17 @@ func (ts *TestSigner) Name() string { return fmt.Sprintf("val-%d-sgn-%d-%s", ts.ValidatorIndex, ts.Index, ts.tl.Name()) } -// InitSingleSignerConfig creates and runs a container to init a single signers config files -// blocks until the container exits -func (ts *TestSigner) InitSingleSignerConfig(ctx context.Context, listenNodes TestNodes) error { +// GRPCAddress returns the TCP address of the GRPC server, +// reachable from within the docker network. +func (ts *TestSigner) GRPCAddress() string { + return fmt.Sprintf("tcp://%s:%s", ts.Name(), signerPort) +} + +// ExecHorcruxCmd executes a CLI subcommand for the horcrux binary for the specific cosigner. +// The config home directory will be appended as a flag. +func (ts *TestSigner) ExecHorcruxCmd(ctx context.Context, cmd ...string) error { + cmd = ts.horcruxCmd(cmd) container := RandLowerCaseLetterString(10) - cmd := []string{ - "horcrux", "config", "init", - listenNodes[0].ChainID, listenNodes.ListenAddrs(), - fmt.Sprintf("--home=%s", ts.Dir()), - } ts.tl.Logf("{%s}[%s] -> '%s'", ts.Name(), container, strings.Join(cmd, " ")) cont, err := ts.Pool.Client.CreateContainer(docker.CreateContainerOptions{ Name: container, @@ -304,7 +320,7 @@ func (ts *TestSigner) InitSingleSignerConfig(ctx context.Context, listenNodes Te User: getDockerUserString(), Hostname: container, ExposedPorts: map[docker.Port]struct{}{ - docker.Port(fmt.Sprintf("%s/tcp", signerPort)): {}, + docker.Port(signerPortDocker): {}, }, Image: signerImage, Cmd: cmd, @@ -324,7 +340,9 @@ func (ts *TestSigner) InitSingleSignerConfig(ctx context.Context, listenNodes Te }, }, NetworkingConfig: &docker.NetworkingConfig{ - EndpointsConfig: map[string]*docker.EndpointConfig{}, + EndpointsConfig: map[string]*docker.EndpointConfig{ + ts.networkID: {}, + }, }, Context: nil, }) @@ -335,13 +353,13 @@ func (ts *TestSigner) InitSingleSignerConfig(ctx context.Context, listenNodes Te return err } exitCode, err := ts.Pool.Client.WaitContainerWithContext(cont.ID, ctx) - stdout := new(bytes.Buffer) - stderr := new(bytes.Buffer) + outputStream := new(bytes.Buffer) + errorStream := new(bytes.Buffer) _ = ts.Pool.Client.Logs(docker.LogsOptions{ Context: ctx, Container: cont.ID, - OutputStream: stdout, - ErrorStream: stderr, + OutputStream: outputStream, + ErrorStream: errorStream, Stdout: true, Stderr: true, Tail: "100", @@ -349,77 +367,31 @@ func (ts *TestSigner) InitSingleSignerConfig(ctx context.Context, listenNodes Te Timestamps: false, }) _ = ts.Pool.Client.RemoveContainer(docker.RemoveContainerOptions{ID: cont.ID}) - return handleNodeJobError(container, exitCode, stdout.String(), stderr.String(), err) + stdout := outputStream.String() + stderr := errorStream.String() + return containerExitError(container, exitCode, stdout, stderr, err) +} + +// InitSingleSignerConfig creates and runs a container to init a single signers config files +// blocks until the container exits +func (ts *TestSigner) InitSingleSignerConfig(ctx context.Context, listenNodes TestNodes) error { + return ts.ExecHorcruxCmd(ctx, + "config", "init", + listenNodes[0].ChainID, listenNodes.ListenAddrs()) } // InitCosignerConfig creates and runs a container to init a signer nodes config files // blocks until the container exits func (ts *TestSigner) InitCosignerConfig( ctx context.Context, listenNodes TestNodes, peers TestSigners, skip, threshold int) error { - container := RandLowerCaseLetterString(10) - cmd := []string{ - "horcrux", "config", "init", + return ts.ExecHorcruxCmd(ctx, + "config", "init", listenNodes[0].ChainID, listenNodes.ListenAddrs(), "--cosigner", fmt.Sprintf("--peers=%s", peers.PeerString(skip)), fmt.Sprintf("--threshold=%d", threshold), - fmt.Sprintf("--home=%s", ts.Dir()), - fmt.Sprintf("--listen=tcp://%s:%s", ts.Name(), signerPort), - } - ts.tl.Logf("{%s}[%s] -> '%s'", ts.Name(), container, strings.Join(cmd, " ")) - cont, err := ts.Pool.Client.CreateContainer(docker.CreateContainerOptions{ - Name: container, - Config: &docker.Config{ - User: getDockerUserString(), - Hostname: container, - ExposedPorts: map[docker.Port]struct{}{ - docker.Port(fmt.Sprintf("%s/tcp", signerPort)): {}, - }, - Image: signerImage, - Cmd: cmd, - Labels: map[string]string{"horcrux-test": ts.tl.Name()}, - }, - HostConfig: &docker.HostConfig{ - PublishAllPorts: true, - AutoRemove: false, - Mounts: []docker.HostMount{ - { - Type: "bind", - Source: ts.Home, - Target: ts.Home, - ReadOnly: false, - BindOptions: nil, - }, - }, - }, - NetworkingConfig: &docker.NetworkingConfig{ - EndpointsConfig: map[string]*docker.EndpointConfig{}, - }, - Context: nil, - }) - if err != nil { - return err - } - if err := ts.Pool.Client.StartContainer(cont.ID, nil); err != nil { - return err - } - - exitCode, err := ts.Pool.Client.WaitContainerWithContext(cont.ID, ctx) - stdout := new(bytes.Buffer) - stderr := new(bytes.Buffer) - _ = ts.Pool.Client.Logs(docker.LogsOptions{ - Context: ctx, - Container: cont.ID, - OutputStream: stdout, - ErrorStream: stderr, - Stdout: true, - Stderr: true, - Tail: "100", - Follow: false, - Timestamps: false, - }) - _ = ts.Pool.Client.RemoveContainer(docker.RemoveContainerOptions{ID: cont.ID}) - return handleNodeJobError(container, exitCode, stdout.String(), stderr.String(), err) + fmt.Sprintf("--listen=%s", ts.GRPCAddress()), + ) } // StartContainer starts a TestSigners container and assigns the new running container to replace the old one @@ -437,13 +409,11 @@ func (ts *TestSigner) StartContainer() error { return nil } -// StopContainer stops a TestSigners docker container -func (ts *TestSigner) StopContainer() error { - return ts.Pool.Client.StopContainer(ts.Container.ID, 60) -} - +// StopAndRemoveContainer stops and removes a TestSigners docker container. +// If force is true, error for stopping container will be ignored and container +// will be forcefully removed. func (ts *TestSigner) StopAndRemoveContainer(force bool) error { - if err := ts.StopContainer(); err != nil && !force { + if err := ts.Pool.Client.StopContainer(ts.Container.ID, 60); err != nil && !force { return err } return ts.Pool.Client.RemoveContainer(docker.RemoveContainerOptions{ @@ -461,15 +431,15 @@ func (ts *TestSigner) UnpauseContainer() error { } // CreateSingleSignerContainer creates a docker container to run a single signer -func (ts *TestSigner) CreateSingleSignerContainer(networkID string) error { +func (ts *TestSigner) CreateSingleSignerContainer() error { cont, err := ts.Pool.Client.CreateContainer(docker.CreateContainerOptions{ Name: ts.Name(), Config: &docker.Config{ User: getDockerUserString(), - Cmd: []string{"horcrux", "signer", "start", fmt.Sprintf("--home=%s", ts.Dir())}, + Cmd: []string{binary, "signer", "start", fmt.Sprintf("--home=%s", ts.Dir())}, Hostname: ts.Name(), ExposedPorts: map[docker.Port]struct{}{ - docker.Port(fmt.Sprintf("%s/tcp", signerPort)): {}, + docker.Port(signerPortDocker): {}, }, DNS: []string{}, Image: signerImage, @@ -490,7 +460,7 @@ func (ts *TestSigner) CreateSingleSignerContainer(networkID string) error { }, NetworkingConfig: &docker.NetworkingConfig{ EndpointsConfig: map[string]*docker.EndpointConfig{ - networkID: {}, + ts.networkID: {}, }, }, Context: nil, @@ -503,15 +473,15 @@ func (ts *TestSigner) CreateSingleSignerContainer(networkID string) error { } // CreateCosignerContainer creates a docker container to run a mpc validator node -func (ts *TestSigner) CreateCosignerContainer(networkID string) error { +func (ts *TestSigner) CreateCosignerContainer() error { cont, err := ts.Pool.Client.CreateContainer(docker.CreateContainerOptions{ Name: ts.Name(), Config: &docker.Config{ User: getDockerUserString(), - Cmd: []string{"horcrux", "cosigner", "start", fmt.Sprintf("--home=%s", ts.Dir())}, + Cmd: []string{binary, "cosigner", "start", fmt.Sprintf("--home=%s", ts.Dir())}, Hostname: ts.Name(), ExposedPorts: map[docker.Port]struct{}{ - docker.Port(fmt.Sprintf("%s/tcp", signerPort)): {}, + docker.Port(signerPortDocker): {}, }, DNS: []string{}, Image: signerImage, @@ -532,7 +502,7 @@ func (ts *TestSigner) CreateCosignerContainer(networkID string) error { }, NetworkingConfig: &docker.NetworkingConfig{ EndpointsConfig: map[string]*docker.EndpointConfig{ - networkID: {}, + ts.networkID: {}, }, }, Context: nil, @@ -543,3 +513,41 @@ func (ts *TestSigner) CreateCosignerContainer(networkID string) error { ts.Container = cont return nil } + +// TransferLeadership elects a new raft leader. +func (ts *TestSigner) TransferLeadership(ctx context.Context, newLeaderID int) error { + return ts.ExecHorcruxCmd(ctx, + "elect", strconv.FormatInt(int64(newLeaderID), 10), + ) +} + +// GetLeader returns the current raft leader. +func (ts *TestSigner) GetLeader(ctx context.Context) (string, error) { + grpcAddress := GetHostPort(ts.Container, signerPortDocker) + conn, err := grpc.Dial(grpcAddress, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithDefaultCallOptions(grpc.WaitForReady(true)), + ) + if err != nil { + return "", fmt.Errorf("dialing failed: %w", err) + } + defer conn.Close() + + ctx, cancelFunc := context.WithTimeout(ctx, 10*time.Second) + defer cancelFunc() + + grpcClient := proto.NewCosignerGRPCClient(conn) + + res, err := grpcClient.GetLeader(ctx, &proto.CosignerGRPCGetLeaderRequest{}) + if err != nil { + return "", err + } + return res.GetLeader(), nil +} + +func (ts *TestSigner) horcruxCmd(cmd []string) (out []string) { + out = append(out, binary) + out = append(out, cmd...) + out = append(out, fmt.Sprintf("--home=%s", ts.Dir())) + return out +} diff --git a/test/test_validator.go b/test/test_validator.go index 99e465eb..d3015919 100644 --- a/test/test_validator.go +++ b/test/test_validator.go @@ -7,7 +7,6 @@ import ( "path/filepath" "github.com/ory/dockertest" - "github.com/ory/dockertest/docker" "github.com/strangelove-ventures/horcrux/signer" crypto "github.com/tendermint/tendermint/crypto" ed25519 "github.com/tendermint/tendermint/crypto/ed25519" @@ -28,6 +27,7 @@ type TestValidator struct { func NewHorcruxValidator( tl TestLogger, pool *dockertest.Pool, + networkID string, home string, chainID string, index int, @@ -38,8 +38,8 @@ func NewHorcruxValidator( ) (*TestValidator, error) { testValidator := &TestValidator{ Index: index, - Sentries: MakeTestNodes(index, numSentries, home, chainID, chainType, pool, tl), - Signers: MakeTestSigners(index, numSigners, home, pool, tl), + Sentries: MakeTestNodes(index, numSentries, home, chainID, chainType, pool, networkID, tl), + Signers: MakeTestSigners(index, numSigners, home, pool, networkID, tl), tl: tl, Home: home, Threshold: threshold, @@ -53,6 +53,7 @@ func NewHorcruxValidator( func NewHorcruxValidatorWithPrivValKey( tl TestLogger, pool *dockertest.Pool, + networkID string, home string, chainID string, index int, @@ -64,8 +65,8 @@ func NewHorcruxValidatorWithPrivValKey( ) (*TestValidator, error) { testValidator := &TestValidator{ Index: index, - Sentries: MakeTestNodes(index, numSentries, home, chainID, chainType, pool, tl), - Signers: MakeTestSigners(index, numSigners, home, pool, tl), + Sentries: MakeTestNodes(index, numSentries, home, chainID, chainType, pool, networkID, tl), + Signers: MakeTestSigners(index, numSigners, home, pool, networkID, tl), tl: tl, Home: home, Threshold: threshold, @@ -120,11 +121,10 @@ func (tv *TestValidator) generateShares(filePVKey privval.FilePVKey) error { func (tv *TestValidator) StartHorcruxCluster( ctx context.Context, - network *docker.Network, sentriesPerSigner int, ) error { return StartCosignerContainers(tv.Signers, tv.Sentries, - tv.Threshold, len(tv.Signers), sentriesPerSigner, network) + tv.Threshold, len(tv.Signers), sentriesPerSigner) } func (tv *TestValidator) WaitForConsecutiveBlocks(blocks int64) error { From 8b0803d6482d0ff597b1bee1a53ed61cedc96aee Mon Sep 17 00:00:00 2001 From: Andrew Gouin Date: Fri, 21 Oct 2022 10:37:54 -0600 Subject: [PATCH 38/44] Threshold parameter validation (#112) * Add assertions for t > n/2 for both key sharding and cosigner daemon start * Show container logs for failed tests * Update tests * Fix set state test * Prefer t.Setenv in tests. Fix error message --- cmd/horcrux/cmd/config.go | 21 +++++--- cmd/horcrux/cmd/config_test.go | 80 +++++++----------------------- cmd/horcrux/cmd/key2shares.go | 31 +++++++++--- cmd/horcrux/cmd/key2shares_test.go | 71 ++++++++++++++++++++++++++ cmd/horcrux/cmd/state_test.go | 15 ++---- 5 files changed, 130 insertions(+), 88 deletions(-) create mode 100644 cmd/horcrux/cmd/key2shares_test.go diff --git a/cmd/horcrux/cmd/config.go b/cmd/horcrux/cmd/config.go index fe72c8b7..734ea10e 100644 --- a/cmd/horcrux/cmd/config.go +++ b/cmd/horcrux/cmd/config.go @@ -197,9 +197,13 @@ func validateCosignerConfig(cfg DiskConfig) error { if cfg.CosignerConfig == nil { return fmt.Errorf("cosigner config can't be empty") } - if len(cfg.CosignerConfig.Peers)+1 < cfg.CosignerConfig.Threshold { - return fmt.Errorf("number of peers + 1 (%d) must be greater than threshold (%d)", - len(cfg.CosignerConfig.Peers)+1, cfg.CosignerConfig.Threshold) + if cfg.CosignerConfig.Threshold <= cfg.CosignerConfig.Shares/2 { + return fmt.Errorf("threshold (%d) must be greater than number of shares (%d) / 2", + cfg.CosignerConfig.Threshold, cfg.CosignerConfig.Shares) + } + if cfg.CosignerConfig.Shares < cfg.CosignerConfig.Threshold { + return fmt.Errorf("number of shares (%d) must be greater or equal to threshold (%d)", + cfg.CosignerConfig.Shares, cfg.CosignerConfig.Threshold) } _, err := time.ParseDuration(cfg.CosignerConfig.Timeout) @@ -335,6 +339,7 @@ func addPeersCmd() *cobra.Command { return errors.New("no new peer nodes in args") } diff = append(config.Config.CosignerConfig.Peers, diff...) + config.Config.CosignerConfig.Shares = len(diff) + 1 if err := validateCosignerPeers(diff, config.Config.CosignerConfig.Shares); err != nil { return err } @@ -378,6 +383,8 @@ func removePeersCmd() *cobra.Command { if len(diff) == 0 { return errors.New("cannot remove all peer nodes from config, please leave at least one") } + + config.Config.CosignerConfig.Shares = len(diff) + 1 // If none of the peer nodes in the args are listed in the config, just continue // without throwing an error, as the peer nodes in the config remain untouched. if err := validateCosignerPeers(diff, config.Config.CosignerConfig.Shares); err != nil { @@ -584,11 +591,11 @@ func validateCosignerPeers(peers []CosignerPeer, shares int) error { } } - // Check that no more than {num-shares}-1 peers are in the peer list, assuming + // Check that exactly {num-shares}-1 peers are in the peer list, assuming // the remaining peer ID is the ID the local node is configured with. - if len(peers) == shares { - return fmt.Errorf("too many peers (%v+local node = %v) for the specified number of key shares (%v)", - len(peers), len(peers)+1, shares) + if len(peers) != shares-1 { + return fmt.Errorf("incorrect number of peers. expected (%d shares - local node = %d peers)", + shares, shares-1) } return nil } diff --git a/cmd/horcrux/cmd/config_test.go b/cmd/horcrux/cmd/config_test.go index 199493a7..eea3952a 100644 --- a/cmd/horcrux/cmd/config_test.go +++ b/cmd/horcrux/cmd/config_test.go @@ -16,7 +16,7 @@ const ( ) func TestConfigInitCmd(t *testing.T) { - tmpHome := "/tmp/TestConfigInitCmd" + tmpHome := t.TempDir() tcs := []struct { name string home string @@ -71,9 +71,8 @@ func TestConfigInitCmd(t *testing.T) { t.Run(tc.name, func(t *testing.T) { tmpConfig := filepath.Join(tc.home, ".horcrux") - err := os.Setenv("HOME", tc.home) - require.NoError(t, err) - err = os.MkdirAll(tc.home, 0777) + t.Setenv("HOME", tc.home) + err := os.MkdirAll(tc.home, 0777) require.NoError(t, err) cmd := initCmd() @@ -106,24 +105,10 @@ func TestConfigInitCmd(t *testing.T) { } }) } - - t.Cleanup(func() { - files, err := filepath.Glob(tmpHome + "*") - require.NoError(t, err) - - for _, file := range files { - os.RemoveAll(file) - } - }) } func TestConfigChainIDSetCmd(t *testing.T) { - tmpHome := "/tmp/TestConfigChainIDSetCmd" - - err := os.Setenv("HOME", tmpHome) - require.NoError(t, err) - err = os.MkdirAll(tmpHome, 0777) - require.NoError(t, err) + t.Setenv("HOME", t.TempDir()) cmd := initCmd() cmd.SetOutput(io.Discard) @@ -136,7 +121,7 @@ func TestConfigChainIDSetCmd(t *testing.T) { "-l", "tcp://10.168.1.1:2222", "--timeout", "1500ms", }) - err = cmd.Execute() + err := cmd.Execute() require.NoError(t, err) tcs := []struct { @@ -171,19 +156,10 @@ func TestConfigChainIDSetCmd(t *testing.T) { } }) } - - t.Cleanup(func() { - os.RemoveAll(tmpHome) - }) } func TestConfigNodesAddAndRemove(t *testing.T) { - tmpHome := "/tmp/TestConfigNodesAddAndRemove" - - err := os.Setenv("HOME", tmpHome) - require.NoError(t, err) - err = os.MkdirAll(tmpHome, 0777) - require.NoError(t, err) + t.Setenv("HOME", t.TempDir()) cmd := initCmd() cmd.SetOutput(io.Discard) @@ -196,7 +172,7 @@ func TestConfigNodesAddAndRemove(t *testing.T) { "-l", "tcp://10.168.1.1:2222", "--timeout", "1500ms", }) - err = cmd.Execute() + err := cmd.Execute() require.NoError(t, err) tcs := []struct { @@ -317,19 +293,10 @@ func TestConfigNodesAddAndRemove(t *testing.T) { require.Equal(t, tc.expectNodes, config.Config.ChainNodes) }) } - - t.Cleanup(func() { - os.RemoveAll(tmpHome) - }) } func TestConfigPeersAddAndRemove(t *testing.T) { - tmpHome := "/tmp/TestConfigPeersAddAndRemove" - - err := os.Setenv("HOME", tmpHome) - require.NoError(t, err) - err = os.MkdirAll(tmpHome, 0777) - require.NoError(t, err) + t.Setenv("HOME", t.TempDir()) cmd := initCmd() cmd.SetOutput(io.Discard) @@ -338,11 +305,11 @@ func TestConfigPeersAddAndRemove(t *testing.T) { "tcp://10.168.0.1:1234", "-c", "-p", "tcp://10.168.1.2:2222|2,tcp://10.168.1.3:2222|3,tcp://10.168.1.4:2222|4", - "-t", "2", + "-t", "3", "-l", "tcp://10.168.1.1:2222", "--timeout", "1500ms", }) - err = cmd.Execute() + err := cmd.Execute() require.NoError(t, err) tcs := []struct { @@ -439,7 +406,7 @@ func TestConfigPeersAddAndRemove(t *testing.T) { { name: "add peer with ID out of range", cmd: addPeersCmd(), - args: []string{"tcp://10.168.1.5:2222|5"}, + args: []string{"tcp://10.168.1.5:2222|6"}, expectPeers: []CosignerPeer{ {ShareID: 2, P2PAddr: "tcp://10.168.1.2:2222"}, {ShareID: 3, P2PAddr: "tcp://10.168.1.3:2222"}, @@ -464,10 +431,6 @@ func TestConfigPeersAddAndRemove(t *testing.T) { require.Equal(t, tc.expectPeers, config.Config.CosignerConfig.Peers) }) } - - t.Cleanup(func() { - os.RemoveAll(tmpHome) - }) } func TestDiffSetChainNode(t *testing.T) { @@ -603,12 +566,7 @@ func TestDiffSetCosignerPeer(t *testing.T) { } func TestSetShares(t *testing.T) { - tmpHome := "/tmp/TestSetShares" - - err := os.Setenv("HOME", tmpHome) - require.NoError(t, err) - err = os.MkdirAll(tmpHome, 0777) - require.NoError(t, err) + t.Setenv("HOME", t.TempDir()) cmd := initCmd() cmd.SetOutput(io.Discard) @@ -621,7 +579,7 @@ func TestSetShares(t *testing.T) { "-l", "tcp://10.168.1.1:2222", "--timeout", "1500ms", }) - err = cmd.Execute() + err := cmd.Execute() require.NoError(t, err) tcs := []struct { @@ -632,20 +590,20 @@ func TestSetShares(t *testing.T) { }{ // Do NOT change the order of the test cases! { name: "valid number of shares", - args: []string{"4"}, - expectShares: 4, + args: []string{"3"}, + expectShares: 3, expectErr: false, }, { name: "too few shares for number of peers", args: []string{"1"}, - expectShares: 4, + expectShares: 3, expectErr: true, }, { name: "invalid number of shares", args: []string{"-1"}, - expectShares: 4, + expectShares: 3, expectErr: true, }, } @@ -666,8 +624,4 @@ func TestSetShares(t *testing.T) { } }) } - - t.Cleanup(func() { - os.RemoveAll(tmpHome) - }) } diff --git a/cmd/horcrux/cmd/key2shares.go b/cmd/horcrux/cmd/key2shares.go index 75adc88b..25226f10 100644 --- a/cmd/horcrux/cmd/key2shares.go +++ b/cmd/horcrux/cmd/key2shares.go @@ -36,10 +36,17 @@ func CreateCosignerSharesCmd() *cobra.Command { Args: validateCreateCosignerShares, Short: "Create cosigner shares", RunE: func(cmd *cobra.Command, args []string) (err error) { - threshold, _ := strconv.ParseInt(args[1], 10, 64) - numShares, _ := strconv.ParseInt(args[2], 10, 64) + threshold, shares := args[1], args[2] + t, err := strconv.ParseInt(threshold, 10, 64) + if err != nil { + return fmt.Errorf("error parsing threshold (%s): %w", threshold, err) + } + n, err := strconv.ParseInt(shares, 10, 64) + if err != nil { + return fmt.Errorf("error parsing shares (%s): %w", shares, err) + } - csKeys, err := signer.CreateCosignerSharesFromFile(args[0], threshold, numShares) + csKeys, err := signer.CreateCosignerSharesFromFile(args[0], t, n) if err != nil { return err } @@ -66,11 +73,21 @@ func validateCreateCosignerShares(cmd *cobra.Command, args []string) error { if !os.FileExists(args[0]) { return fmt.Errorf("priv_validator.json file(%s) doesn't exist", args[0]) } - if _, err := strconv.ParseInt(args[1], 10, 64); err != nil { - return fmt.Errorf("shards must be an integer got(%s)", args[1]) + threshold, shares := args[1], args[2] + t, err := strconv.ParseInt(threshold, 10, 64) + if err != nil { + return fmt.Errorf("error parsing threshold (%s): %w", threshold, err) + } + n, err := strconv.ParseInt(shares, 10, 64) + if err != nil { + return fmt.Errorf("error parsing shares (%s): %w", shares, err) + } + if t > n { + return fmt.Errorf("threshold cannot be greater than total shares, got [threshold](%d) > [shares](%d)", t, n) } - if _, err := strconv.ParseInt(args[2], 10, 64); err != nil { - return fmt.Errorf("threshold must be an integer got(%s)", args[2]) + if t <= n/2 { + return fmt.Errorf("threshold must be greater than total shares "+ + "divided by 2, got [threshold](%d) <= [shares](%d) / 2", t, n) } return nil } diff --git a/cmd/horcrux/cmd/key2shares_test.go b/cmd/horcrux/cmd/key2shares_test.go new file mode 100644 index 00000000..4853a163 --- /dev/null +++ b/cmd/horcrux/cmd/key2shares_test.go @@ -0,0 +1,71 @@ +package cmd + +import ( + "io" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + "github.com/tendermint/tendermint/crypto/ed25519" + "github.com/tendermint/tendermint/privval" +) + +func TestKey2Shares(t *testing.T) { + tmp := t.TempDir() + + privValidatorKeyFile := filepath.Join(tmp, "priv_validator_key.json") + privValidatorStateFile := filepath.Join(tmp, "priv_validator_state.json") + pv := privval.NewFilePV(ed25519.GenPrivKey(), privValidatorKeyFile, privValidatorStateFile) + pv.Save() + + tcs := []struct { + name string + args []string + expectErr bool + }{ + { + name: "valid threshold and shares", + args: []string{privValidatorKeyFile, "2", "3"}, + expectErr: false, + }, + { + name: "valid threshold and shares 2", + args: []string{privValidatorKeyFile, "3", "5"}, + expectErr: false, + }, + { + name: "threshold exactly half of shares", + args: []string{privValidatorKeyFile, "2", "4"}, + expectErr: true, + }, + { + name: "threshold less than half of shares", + args: []string{privValidatorKeyFile, "1", "3"}, + expectErr: true, + }, + { + name: "threshold exceeds shares", + args: []string{privValidatorKeyFile, "4", "3"}, + expectErr: true, + }, + { + name: "non-numeric threshold and shares", + args: []string{privValidatorKeyFile, "two", "three"}, + expectErr: true, + }, + } + + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + cmd := CreateCosignerSharesCmd() + cmd.SetOutput(io.Discard) + cmd.SetArgs(tc.args) + err := cmd.Execute() + if tc.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) + } +} diff --git a/cmd/horcrux/cmd/state_test.go b/cmd/horcrux/cmd/state_test.go index ebf7f2cd..053e549e 100644 --- a/cmd/horcrux/cmd/state_test.go +++ b/cmd/horcrux/cmd/state_test.go @@ -2,7 +2,6 @@ package cmd import ( "io" - "os" "path/filepath" "strconv" "testing" @@ -13,14 +12,11 @@ import ( ) func TestStateSetCmd(t *testing.T) { - tmpHome := "/tmp/TestStateSetCmd" + tmpHome := t.TempDir() tmpConfig := filepath.Join(tmpHome, ".horcrux") chainid := "horcrux-1" - err := os.Setenv("HOME", tmpHome) - require.NoError(t, err) - err = os.MkdirAll(tmpHome, 0777) - require.NoError(t, err) + t.Setenv("HOME", tmpHome) cmd := initCmd() cmd.SetOutput(io.Discard) @@ -28,11 +24,12 @@ func TestStateSetCmd(t *testing.T) { chainid, "tcp://10.168.0.1:1234", "-c", + "-t", "2", "-p", "tcp://10.168.1.2:2222|2,tcp://10.168.1.3:2222|3", "-l", "tcp://10.168.1.1:2222", "--timeout", "1500ms", }) - err = cmd.Execute() + err := cmd.Execute() require.NoError(t, err) tcs := []struct { @@ -89,8 +86,4 @@ func TestStateSetCmd(t *testing.T) { } }) } - - t.Cleanup(func() { - os.RemoveAll(tmpHome) - }) } From 25c07708294bf542e890a8573dd565537472c13b Mon Sep 17 00:00:00 2001 From: r4f43l Date: Mon, 31 Oct 2022 18:54:27 +0100 Subject: [PATCH 39/44] Minor corrections and fixes --- signer/threshold_signer_soft.go | 77 +++++++++++++++------------------ 1 file changed, 35 insertions(+), 42 deletions(-) diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index 4fd275e1..eddd5efc 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -18,31 +18,30 @@ import ( // ThresholdSignerSoft implements the interface and signs the message for each local signer. // ThresholdSignerSoft is the implementation of a soft sign signer at the local level. type ThresholdSignerSoft struct { - PubKeyBytes []byte - Key CosignerKey - // Total signers - Total uint8 - Threshold uint8 + pubKeyBytes []byte + key CosignerKey + // total signers + total uint8 + threshold uint8 // Height, Round, Step, Timestamp --> metadata - HrsMeta map[HRSTKey]HrsMetadata + hrsMeta map[HRSTKey]HrsMetadata } // NewThresholdSignerSoft constructs a ThresholdSigner // that signs using the local key share file. func NewThresholdSignerSoft(key CosignerKey, threshold, total uint8) ThresholdSigner { softSigner := &ThresholdSignerSoft{ - Key: key, - HrsMeta: make(map[HRSTKey]HrsMetadata), - Total: total, - Threshold: threshold, + key: key, + hrsMeta: make(map[HRSTKey]HrsMetadata), + total: total, + threshold: threshold, } // cache the public key bytes for signing operations. // Ensures casting else it will naturally panic. - ed25519Key := softSigner.Key.PubKey.(tmcryptoed25519.PubKey) - softSigner.PubKeyBytes = make([]byte, len(ed25519Key)) - softSigner.PubKeyBytes = ed25519Key[:] - // copy(softSigner.PubKeyBytes, ed25519Key[:]) + ed25519Key := softSigner.key.PubKey.(tmcryptoed25519.PubKey) + softSigner.pubKeyBytes = make([]byte, len(ed25519Key)) + softSigner.pubKeyBytes = ed25519Key[:] return softSigner } @@ -54,7 +53,7 @@ func (softSigner *ThresholdSignerSoft) Type() string { // Implements ThresholdSigner func (softSigner *ThresholdSignerSoft) GetID() (int, error) { - return softSigner.Key.ID, nil + return softSigner.key.ID, nil } // Implements ThresholdSigner @@ -75,22 +74,20 @@ func (softSigner *ThresholdSignerSoft) Sign( if err != nil { return res, err } - // If the HRS is the same the sign bytes may still differ by timestamp // It is ok to re-sign a different timestamp if that is the only difference in the sign bytes + // same HRS, and only differ by timestamp its ok to sign again if sameHRS { if bytes.Equal(req.SignBytes, lss.SignBytes) { res.EphemeralPublic = lss.EphemeralPublic res.Signature = lss.Signature return res, nil } else if err := lss.OnlyDifferByTimestamp(req.SignBytes); err != nil { - return res, err + return res, err // same HRS, and only differ by timestamp its ok to sign again } - - // same HRS, and only differ by timestamp - ok to sign again } - meta, ok := softSigner.HrsMeta[hrst] + meta, ok := softSigner.hrsMeta[hrst] if !ok { return res, errors.New("no metadata at HRS") } @@ -123,7 +120,7 @@ func (softSigner *ThresholdSignerSoft) Sign( } sig := tsed25519.SignWithShare( - req.SignBytes, softSigner.Key.ShareKey, ephemeralShare, softSigner.PubKeyBytes, ephemeralPublic) + req.SignBytes, softSigner.key.ShareKey, ephemeralShare, softSigner.pubKeyBytes, ephemeralPublic) m.LastSignState.EphemeralPublic = ephemeralPublic err = m.LastSignState.Save(SignStateConsensus{ @@ -133,23 +130,18 @@ func (softSigner *ThresholdSignerSoft) Sign( Signature: sig, SignBytes: req.SignBytes, }, nil, true) - if err != nil { var isSameHRSError *SameHRSError - // If error is if !errors.As(err, &isSameHRSError) { return res, err } - // if _, isSameHRSError := err.(*SameHRSError); !isSameHRSError { - // return res, err - // } } - for existingKey := range softSigner.HrsMeta { + for existingKey := range softSigner.hrsMeta { // delete any HRS lower than our signed level // we will not be providing parts for any lower HRS if existingKey.Less(hrst) { - delete(softSigner.HrsMeta, existingKey) + delete(softSigner.hrsMeta, existingKey) } } @@ -168,7 +160,7 @@ func (softSigner *ThresholdSignerSoft) DealShares( Timestamp: req.Timestamp.UnixNano(), } - meta, ok := softSigner.HrsMeta[hrsKey] + meta, ok := softSigner.hrsMeta[hrsKey] if ok { return meta, nil } @@ -180,14 +172,14 @@ func (softSigner *ThresholdSignerSoft) DealShares( meta = HrsMetadata{ Secret: secret, - Peers: make([]PeerMetadata, softSigner.Total), + Peers: make([]PeerMetadata, softSigner.total), } // split this secret with shamirs // !! dealt shares need to be saved because dealing produces different shares each time! - meta.DealtShares = tsed25519.DealShares(meta.Secret, softSigner.Threshold, softSigner.Total) + meta.DealtShares = tsed25519.DealShares(meta.Secret, softSigner.threshold, softSigner.total) - softSigner.HrsMeta[hrsKey] = meta + softSigner.hrsMeta[hrsKey] = meta return meta, nil } @@ -212,7 +204,7 @@ func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( Timestamp: req.Timestamp.UnixNano(), } - meta, ok := softSigner.HrsMeta[hrst] + meta, ok := softSigner.hrsMeta[hrst] // generate metadata placeholder if !ok { newMeta, err := softSigner.DealShares(CosignerGetEphemeralSecretPartRequest{ @@ -227,14 +219,14 @@ func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( } meta = newMeta - softSigner.HrsMeta[hrst] = meta + softSigner.hrsMeta[hrst] = meta } ourEphPublicKey := tsed25519.ScalarMultiplyBase(meta.Secret) // set our values - meta.Peers[softSigner.Key.ID-1].Share = meta.DealtShares[softSigner.Key.ID-1] - meta.Peers[softSigner.Key.ID-1].EphemeralSecretPublicKey = ourEphPublicKey + meta.Peers[softSigner.key.ID-1].Share = meta.DealtShares[softSigner.key.ID-1] + meta.Peers[softSigner.key.ID-1].EphemeralSecretPublicKey = ourEphPublicKey // grab the peer info for the ID being requested peer, ok := peers[req.ID] @@ -250,7 +242,7 @@ func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( return res, err } - res.SourceID = softSigner.Key.ID + res.SourceID = softSigner.key.ID res.SourceEphemeralSecretPublicKey = ourEphPublicKey res.EncryptedSharePart = encrypted @@ -264,7 +256,7 @@ func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( } digest := sha256.Sum256(jsonBytes) - signature, err := rsa.SignPSS(rand.Reader, &softSigner.Key.RSAKey, crypto.SHA256, digest[:], nil) + signature, err := rsa.SignPSS(rand.Reader, &softSigner.key.RSAKey, crypto.SHA256, digest[:], nil) if err != nil { return res, err @@ -323,7 +315,8 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( Step: req.Step, Timestamp: req.Timestamp.UnixNano(), } - meta, ok := softSigner.HrsMeta[hrst] // generate metadata placeholder, softSigner.HrsMeta[hrst] is non-addressable + + meta, ok := softSigner.hrsMeta[hrst] // generate metadata placeholder, softSigner.HrsMeta[hrst] is non-addressable if !ok { newMeta, err := softSigner.DealShares(CosignerGetEphemeralSecretPartRequest{ Height: req.Height, @@ -333,12 +326,12 @@ func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( if err != nil { return err } - meta = newMeta - softSigner.HrsMeta[hrst] = meta // updates the metadata placeholder + softSigner.hrsMeta[hrst] = meta // updates the metadata placeholder } + // decrypt share - sharePart, err := rsa.DecryptOAEP(sha256.New(), rand.Reader, &softSigner.Key.RSAKey, req.EncryptedSharePart, nil) + sharePart, err := rsa.DecryptOAEP(sha256.New(), rand.Reader, &softSigner.key.RSAKey, req.EncryptedSharePart, nil) if err != nil { return err } From 9c204b5ca98b09220b91ef4424439ac7e2b6ebac Mon Sep 17 00:00:00 2001 From: Andrew Gouin Date: Mon, 31 Oct 2022 12:53:30 -0600 Subject: [PATCH 40/44] Update metrics.md (#117) Resolves #116 --- docs/metrics.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/metrics.md b/docs/metrics.md index 2116710f..a7090fc3 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -1,14 +1,14 @@ # Prometheus Metrics ## Enabling Prometheus -Specify the port for incoming prometheus connections during 'config init' by using the -m flag. +Specify the port for incoming prometheus connections during 'config init' by using the -d flag. ``` -horcrux ..options.. -m 0.0.0.0:8001 +horcrux ..options.. -d 0.0.0.0:6001 ``` -For earlier adopters, add the following key to your config.toml +For earlier adopters, add the following key to your config.yaml -debug-listen-address: 0.0.0.0:6001 +debug-addr: 0.0.0.0:6001 Resulting in a configuration like the following: @@ -26,7 +26,7 @@ cosigner: rpc-timeout: 1500ms chain-nodes: - priv-val-addr: tcp://localhost:2300 -debug-listen-address: 0.0.0.0:6001 +debug-addr: 0.0.0.0:6001 ``` ## Prometheus Cautions From 52e7dccceedd4a71f88891d77a28375af70d9b42 Mon Sep 17 00:00:00 2001 From: r4f43l Date: Mon, 7 Nov 2022 10:48:50 +0100 Subject: [PATCH 41/44] Variable cleaning up. --- signer/remote_cosigner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signer/remote_cosigner.go b/signer/remote_cosigner.go index cf55a0eb..e0c9fd0b 100644 --- a/signer/remote_cosigner.go +++ b/signer/remote_cosigner.go @@ -10,7 +10,7 @@ import ( "google.golang.org/grpc/credentials/insecure" ) -// RemoteCosigner uses tendermint rpc to request signing from a remote cosigner +// RemoteCosigner uses CosignerGRPC to request signing from a remote cosigner type RemoteCosigner struct { id int address string From 5c4303cf4e9f8262ad44d514f1022489ec585608 Mon Sep 17 00:00:00 2001 From: Chill Validation <92176880+chillyvee@users.noreply.github.com> Date: Mon, 14 Nov 2022 04:45:12 +0900 Subject: [PATCH 42/44] Add signature and timestamp to log for adjusted timestamp visibility (#118) * Add signature and timestamp to log for adjusted timestamp visibility * Reduce line length * Handle empty Signature --- signer/remote_signer.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/signer/remote_signer.go b/signer/remote_signer.go index 0c9cece1..edc99455 100644 --- a/signer/remote_signer.go +++ b/signer/remote_signer.go @@ -159,7 +159,13 @@ func (rs *ReconnRemoteSigner) handleSignVoteRequest(vote *tmProto.Vote) tmProtoP msgSum.SignedVoteResponse.Error = getRemoteSignerError(err) return tmProtoPrivval.Message{Sum: msgSum} } - rs.Logger.Info("Signed vote", "node", rs.address, "height", vote.Height, "round", vote.Round, "type", vote.Type) + // Show signatures provided to each node have the same signature and timestamps + sigLen := 6 + if len(vote.Signature) < sigLen { + sigLen = len(vote.Signature) + } + rs.Logger.Info("Signed vote", "height", vote.Height, "round", vote.Round, "type", vote.Type, + "sig", vote.Signature[:sigLen], "ts", vote.Timestamp.Unix(), "node", rs.address) if vote.Type == tmProto.PrecommitType { stepSize := vote.Height - previousPrecommitHeight From 2be62d5d8ef3af82d2d340c4cfd2b56bf75acb9d Mon Sep 17 00:00:00 2001 From: r4f43l <91068974+nitronit@users.noreply.github.com> Date: Sun, 13 Nov 2022 20:45:51 +0100 Subject: [PATCH 43/44] Threshold small pr (#98) * Threshold signer interface * local cosigner fix * fixed some minor renaming package * fix switch to casting * remove scope introducing block * fixed: TODO check true here is correct for async * Removed scope-introducing blocks * go lint fix * not all is fixed but most. * Fix the unreadabillity setting slots. * Update threshold_signer_soft.go * lint fix * Minor corrections and fixes * Variable cleaning up. --- signer/local_cosigner.go | 31 ++- signer/remote_cosigner.go | 2 +- signer/threshold_signer.go | 43 ++++ signer/threshold_signer_soft.go | 344 ++++++++++++++++++++++++++++++++ 4 files changed, 402 insertions(+), 18 deletions(-) create mode 100644 signer/threshold_signer.go create mode 100644 signer/threshold_signer_soft.go diff --git a/signer/local_cosigner.go b/signer/local_cosigner.go index 3b623d54..0fbb7c62 100644 --- a/signer/local_cosigner.go +++ b/signer/local_cosigner.go @@ -11,12 +11,21 @@ import ( "sync" "time" - tmCryptoEd25519 "github.com/tendermint/tendermint/crypto/ed25519" - tmJson "github.com/tendermint/tendermint/libs/json" + tmcryptoed25519 "github.com/tendermint/tendermint/crypto/ed25519" + tmjson "github.com/tendermint/tendermint/libs/json" "gitlab.com/unit410/edwards25519" tsed25519 "gitlab.com/unit410/threshold-ed25519/pkg" ) +type LastSignStateWrapper struct { + // Signing is thread safe - lastSignStateMutex is used for putting locks so only one goroutine can r/w to the function + lastSignStateMutex sync.Mutex + + // lastSignState stores the last sign state for a share we have fully signed + // incremented whenever we are asked to sign a share + LastSignState *SignState +} + // return true if we are less than the other key func (hrst *HRSTKey) Less(other HRSTKey) bool { if hrst.Height < other.Height { @@ -71,18 +80,6 @@ type LocalCosignerConfig struct { Threshold uint8 } -type PeerMetadata struct { - Share []byte - EphemeralSecretPublicKey []byte -} - -type HrsMetadata struct { - // need to be _total_ entries per player - Secret []byte - DealtShares []tsed25519.Scalar - Peers []PeerMetadata -} - // LocalCosigner responds to sign requests using their share key // The cosigner maintains a watermark to avoid double-signing // @@ -130,7 +127,7 @@ func NewLocalCosigner(cfg LocalCosignerConfig) *LocalCosigner { // cache the public key bytes for signing operations switch ed25519Key := cosigner.key.PubKey.(type) { - case tmCryptoEd25519.PubKey: + case tmcryptoed25519.PubKey: cosigner.pubKeyBytes = make([]byte, len(ed25519Key)) copy(cosigner.pubKeyBytes, ed25519Key[:]) default: @@ -381,7 +378,7 @@ func (cosigner *LocalCosigner) getEphemeralSecretPart( // sign the response payload with our private key // cosigners can verify the signature to confirm sender validity { - jsonBytes, err := tmJson.Marshal(res) + jsonBytes, err := tmjson.Marshal(res) if err != nil { return res, err @@ -414,7 +411,7 @@ func (cosigner *LocalCosigner) setEphemeralSecretPart(req CosignerSetEphemeralSe digestMsg.SourceEphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey digestMsg.EncryptedSharePart = req.EncryptedSharePart - digestBytes, err := tmJson.Marshal(digestMsg) + digestBytes, err := tmjson.Marshal(digestMsg) if err != nil { return err } diff --git a/signer/remote_cosigner.go b/signer/remote_cosigner.go index cf55a0eb..e0c9fd0b 100644 --- a/signer/remote_cosigner.go +++ b/signer/remote_cosigner.go @@ -10,7 +10,7 @@ import ( "google.golang.org/grpc/credentials/insecure" ) -// RemoteCosigner uses tendermint rpc to request signing from a remote cosigner +// RemoteCosigner uses CosignerGRPC to request signing from a remote cosigner type RemoteCosigner struct { id int address string diff --git a/signer/threshold_signer.go b/signer/threshold_signer.go new file mode 100644 index 00000000..b83ab6b4 --- /dev/null +++ b/signer/threshold_signer.go @@ -0,0 +1,43 @@ +package signer + +import ( + tsed25519 "gitlab.com/unit410/threshold-ed25519/pkg" +) + +const ( + SignerTypeSoftSign = "SoftSign" + SignerTypeHSM = "HSM" +) + +// Interface for the local signer whether it's a soft sign or HSM +type ThresholdSigner interface { + Type() string + + DealShares(req CosignerGetEphemeralSecretPartRequest) (HrsMetadata, error) + + GetEphemeralSecretPart(req CosignerGetEphemeralSecretPartRequest, m *LastSignStateWrapper, + peers map[int]CosignerPeer) (CosignerEphemeralSecretPart, error) + + SetEphemeralSecretPart(req CosignerSetEphemeralSecretPartRequest, m *LastSignStateWrapper, + peers map[int]CosignerPeer) error + + Sign(req CosignerSignRequest, m *LastSignStateWrapper) (CosignerSignResponse, error) + + GetID() (int, error) +} + +// PeerMetadata holds the share and the ephermeral secret public key +// Moved from Local cosigner to threshold_ed25519 +type PeerMetadata struct { + Share []byte + EphemeralSecretPublicKey []byte +} + +// HrsMetadata holds the ephemeral nonces from cosigner peers +// for a given height, round, step. +type HrsMetadata struct { + // need to be _total_ entries per player + Secret []byte + DealtShares []tsed25519.Scalar + Peers []PeerMetadata +} diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go new file mode 100644 index 00000000..eddd5efc --- /dev/null +++ b/signer/threshold_signer_soft.go @@ -0,0 +1,344 @@ +package signer + +import ( + "bytes" + "crypto" + "crypto/rand" + "crypto/rsa" + "crypto/sha256" + "errors" + "fmt" + + tmcryptoed25519 "github.com/tendermint/tendermint/crypto/ed25519" + tmjson "github.com/tendermint/tendermint/libs/json" + "gitlab.com/unit410/edwards25519" + tsed25519 "gitlab.com/unit410/threshold-ed25519/pkg" +) + +// ThresholdSignerSoft implements the interface and signs the message for each local signer. +// ThresholdSignerSoft is the implementation of a soft sign signer at the local level. +type ThresholdSignerSoft struct { + pubKeyBytes []byte + key CosignerKey + // total signers + total uint8 + threshold uint8 + // Height, Round, Step, Timestamp --> metadata + hrsMeta map[HRSTKey]HrsMetadata +} + +// NewThresholdSignerSoft constructs a ThresholdSigner +// that signs using the local key share file. +func NewThresholdSignerSoft(key CosignerKey, threshold, total uint8) ThresholdSigner { + softSigner := &ThresholdSignerSoft{ + key: key, + hrsMeta: make(map[HRSTKey]HrsMetadata), + total: total, + threshold: threshold, + } + + // cache the public key bytes for signing operations. + // Ensures casting else it will naturally panic. + ed25519Key := softSigner.key.PubKey.(tmcryptoed25519.PubKey) + softSigner.pubKeyBytes = make([]byte, len(ed25519Key)) + softSigner.pubKeyBytes = ed25519Key[:] + + return softSigner +} + +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) Type() string { + return SignerTypeSoftSign +} + +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) GetID() (int, error) { + return softSigner.key.ID, nil +} + +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) Sign( + req CosignerSignRequest, m *LastSignStateWrapper) (CosignerSignResponse, error) { + m.lastSignStateMutex.Lock() + defer m.lastSignStateMutex.Unlock() + + res := CosignerSignResponse{} + lss := m.LastSignState + + hrst, err := UnpackHRST(req.SignBytes) + if err != nil { + return res, err + } + + sameHRS, err := lss.CheckHRS(hrst) + if err != nil { + return res, err + } + // If the HRS is the same the sign bytes may still differ by timestamp + // It is ok to re-sign a different timestamp if that is the only difference in the sign bytes + // same HRS, and only differ by timestamp its ok to sign again + if sameHRS { + if bytes.Equal(req.SignBytes, lss.SignBytes) { + res.EphemeralPublic = lss.EphemeralPublic + res.Signature = lss.Signature + return res, nil + } else if err := lss.OnlyDifferByTimestamp(req.SignBytes); err != nil { + return res, err // same HRS, and only differ by timestamp its ok to sign again + } + } + + meta, ok := softSigner.hrsMeta[hrst] + if !ok { + return res, errors.New("no metadata at HRS") + } + + shareParts := make([]tsed25519.Scalar, 0) + publicKeys := make([]tsed25519.Element, 0) + + // calculate secret and public keys + for _, peer := range meta.Peers { + if len(peer.Share) == 0 { + continue + } + shareParts = append(shareParts, peer.Share) + publicKeys = append(publicKeys, peer.EphemeralSecretPublicKey) + } + + ephemeralShare := tsed25519.AddScalars(shareParts) + ephemeralPublic := tsed25519.AddElements(publicKeys) + + // check bounds for ephemeral share to avoid passing out of bounds valids to SignWithShare + + if len(ephemeralShare) != 32 { + return res, errors.New("ephemeral share is out of bounds") + } + + var scalarBytes [32]byte + copy(scalarBytes[:], ephemeralShare) + if !edwards25519.ScMinimal(&scalarBytes) { + return res, errors.New("ephemeral share is out of bounds") + } + + sig := tsed25519.SignWithShare( + req.SignBytes, softSigner.key.ShareKey, ephemeralShare, softSigner.pubKeyBytes, ephemeralPublic) + + m.LastSignState.EphemeralPublic = ephemeralPublic + err = m.LastSignState.Save(SignStateConsensus{ + Height: hrst.Height, + Round: hrst.Round, + Step: hrst.Step, + Signature: sig, + SignBytes: req.SignBytes, + }, nil, true) + if err != nil { + var isSameHRSError *SameHRSError + if !errors.As(err, &isSameHRSError) { + return res, err + } + } + + for existingKey := range softSigner.hrsMeta { + // delete any HRS lower than our signed level + // we will not be providing parts for any lower HRS + if existingKey.Less(hrst) { + delete(softSigner.hrsMeta, existingKey) + } + } + + res.EphemeralPublic = ephemeralPublic + res.Signature = sig + return res, nil +} + +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) DealShares( + req CosignerGetEphemeralSecretPartRequest) (HrsMetadata, error) { + hrsKey := HRSTKey{ + Height: req.Height, + Round: req.Round, + Step: req.Step, + Timestamp: req.Timestamp.UnixNano(), + } + + meta, ok := softSigner.hrsMeta[hrsKey] + if ok { + return meta, nil + } + + secret := make([]byte, 32) + if _, err := rand.Read(secret); err != nil { + return HrsMetadata{}, err + } + + meta = HrsMetadata{ + Secret: secret, + Peers: make([]PeerMetadata, softSigner.total), + } + + // split this secret with shamirs + // !! dealt shares need to be saved because dealing produces different shares each time! + meta.DealtShares = tsed25519.DealShares(meta.Secret, softSigner.threshold, softSigner.total) + + softSigner.hrsMeta[hrsKey] = meta + + return meta, nil +} + +// Get the ephemeral secret part for an ephemeral share +// The ephemeral secret part is encrypted for the receiver +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) GetEphemeralSecretPart( + req CosignerGetEphemeralSecretPartRequest, m *LastSignStateWrapper, peers map[int]CosignerPeer) ( + CosignerEphemeralSecretPart, error) { + + res := CosignerEphemeralSecretPart{} + + // protects the meta map + m.lastSignStateMutex.Lock() + defer m.lastSignStateMutex.Unlock() + + hrst := HRSTKey{ + Height: req.Height, + Round: req.Round, + Step: req.Step, + Timestamp: req.Timestamp.UnixNano(), + } + + meta, ok := softSigner.hrsMeta[hrst] + // generate metadata placeholder + if !ok { + newMeta, err := softSigner.DealShares(CosignerGetEphemeralSecretPartRequest{ + Height: req.Height, + Round: req.Round, + Step: req.Step, + Timestamp: req.Timestamp, + }) + + if err != nil { + return res, err + } + + meta = newMeta + softSigner.hrsMeta[hrst] = meta + } + + ourEphPublicKey := tsed25519.ScalarMultiplyBase(meta.Secret) + + // set our values + meta.Peers[softSigner.key.ID-1].Share = meta.DealtShares[softSigner.key.ID-1] + meta.Peers[softSigner.key.ID-1].EphemeralSecretPublicKey = ourEphPublicKey + + // grab the peer info for the ID being requested + peer, ok := peers[req.ID] + if !ok { + return res, errors.New("unknown peer ID") + } + + sharePart := meta.DealtShares[req.ID-1] + + // use RSA public to encrypt user's share part + encrypted, err := rsa.EncryptOAEP(sha256.New(), rand.Reader, &peer.PublicKey, sharePart, nil) + if err != nil { + return res, err + } + + res.SourceID = softSigner.key.ID + res.SourceEphemeralSecretPublicKey = ourEphPublicKey + res.EncryptedSharePart = encrypted + + // sign the response payload with our private key + // cosigners can verify the signature to confirm sender validity + + jsonBytes, err := tmjson.Marshal(res) + + if err != nil { + return res, err + } + + digest := sha256.Sum256(jsonBytes) + signature, err := rsa.SignPSS(rand.Reader, &softSigner.key.RSAKey, crypto.SHA256, digest[:], nil) + + if err != nil { + return res, err + } + + res.SourceSig = signature + + res.DestinationID = req.ID + + return res, nil +} + +// Store an ephemeral secret share part provided by another cosigner (signer) +// Implements ThresholdSigner +func (softSigner *ThresholdSignerSoft) SetEphemeralSecretPart( + req CosignerSetEphemeralSecretPartRequest, m *LastSignStateWrapper, peers map[int]CosignerPeer) error { + + // Verify the source signature + if req.SourceSig == nil { + return errors.New("SourceSig field is required") + } + + digestMsg := CosignerEphemeralSecretPart{ + SourceID: req.SourceID, + // DestinationID: 0, + SourceEphemeralSecretPublicKey: req.SourceEphemeralSecretPublicKey, + EncryptedSharePart: req.EncryptedSharePart, + // SourceSig: []byte{}, + } + + digestBytes, err := tmjson.Marshal(digestMsg) + if err != nil { + return err + } + + digest := sha256.Sum256(digestBytes) + peer, ok := peers[req.SourceID] + + if !ok { + return fmt.Errorf("unknown cosigner: %d", req.SourceID) + } + + peerPub := peer.PublicKey + err = rsa.VerifyPSS(&peerPub, crypto.SHA256, digest[:], req.SourceSig, nil) + if err != nil { + return err + } + + // protects the meta map + m.lastSignStateMutex.Lock() + defer m.lastSignStateMutex.Unlock() + + hrst := HRSTKey{ + Height: req.Height, + Round: req.Round, + Step: req.Step, + Timestamp: req.Timestamp.UnixNano(), + } + + meta, ok := softSigner.hrsMeta[hrst] // generate metadata placeholder, softSigner.HrsMeta[hrst] is non-addressable + if !ok { + newMeta, err := softSigner.DealShares(CosignerGetEphemeralSecretPartRequest{ + Height: req.Height, + Round: req.Round, + Step: req.Step, + }) + if err != nil { + return err + } + meta = newMeta + softSigner.hrsMeta[hrst] = meta // updates the metadata placeholder + } + + // decrypt share + sharePart, err := rsa.DecryptOAEP(sha256.New(), rand.Reader, &softSigner.key.RSAKey, req.EncryptedSharePart, nil) + if err != nil { + return err + } + // set slot + // Share & EphemeralSecretPublicKey is a SLICE so its a valid change of the shared struct softSigner! + meta.Peers[req.SourceID-1].Share = sharePart + meta.Peers[req.SourceID-1].EphemeralSecretPublicKey = req.SourceEphemeralSecretPublicKey + + return nil +} From 35539a0b6366cef4f4e03dca7c892c07b72c7112 Mon Sep 17 00:00:00 2001 From: r4f43l <91068974+nitronit@users.noreply.github.com> Date: Thu, 24 Nov 2022 12:34:06 +0100 Subject: [PATCH 44/44] Create comparison.md --- docs/comparison.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 docs/comparison.md diff --git a/docs/comparison.md b/docs/comparison.md new file mode 100644 index 00000000..8afe3664 --- /dev/null +++ b/docs/comparison.md @@ -0,0 +1,7 @@ +| | Num. Rounds | Robust | Num. Signers | Parallel Secure | +|--------------------|:--------------------:|:------:|:------------:|:---------------:| +| **Stinson Strobl** | 4 | Yes | t | Yes | +| **Gennaro et al.** | 1 with preprocessing | No | n | No | +| **FROST** | 1 with preprocessing | No | t | Yes | + +| **Stinson Strobl** is the only implement Threshold Schemes in Horcrux. However, its worth important to note that the key generation in Horcrux is not the same as proposed in the paper. Instead its "classic" shamir secret sharing with a fully trusted dealer.