Skip to content

Commit

Permalink
Secure cluster traffic via mutual TLS (prometheus#2237)
Browse files Browse the repository at this point in the history
* Add TLS option to gossip cluster

Co-authored-by: Sharad Gaur <[email protected]>
Signed-off-by: Dustin Hooten <[email protected]>

* generate new certs that expire in 100 years

Signed-off-by: Dustin Hooten <[email protected]>

* Fix tls_connection attributes

Signed-off-by: Dustin Hooten <[email protected]>

* Improve error message

Signed-off-by: Dustin Hooten <[email protected]>

* Fix tls client config docs

Signed-off-by: Dustin Hooten <[email protected]>

* Add capacity arg to message buffer

Signed-off-by: Dustin Hooten <[email protected]>

* fix formatting

Signed-off-by: Dustin Hooten <[email protected]>

* Update version; add version validation

Signed-off-by: Dustin Hooten <[email protected]>

* use lru cache for connection pool

Signed-off-by: Dustin Hooten <[email protected]>

* lock reading from the connection

Signed-off-by: Dustin Hooten <[email protected]>

* when extracting net.Conn from tlsConn, lock and throw away wrapper

Signed-off-by: Dustin Hooten <[email protected]>

* Add mutex to connection pool to protect cache

Signed-off-by: Dustin Hooten <[email protected]>

* fix linting

Signed-off-by: Dustin Hooten <[email protected]>

Co-authored-by: Sharad Gaur <[email protected]>
  • Loading branch information
hooten and sharadgaur authored Aug 9, 2021
1 parent 61d4ebc commit ff85bec
Show file tree
Hide file tree
Showing 48 changed files with 2,125 additions and 16 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
!.golangci.yml
!/cli/testdata/*.yml
!/cli/config/testdata/*.yml
!/cluster/testdata/*.yml
!/config/testdata/*.yml
!/examples/ha/tls/*.yml
!/notify/email/testdata/*.yml
!/doc/examples/simple.yml
!/circle.yml
Expand Down
10 changes: 9 additions & 1 deletion cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
"github.com/hashicorp/memberlist"
"github.com/oklog/ulid"
"github.com/pkg/errors"

"github.com/prometheus/client_golang/prometheus"
)

Expand Down Expand Up @@ -141,6 +140,7 @@ func Create(
tcpTimeout time.Duration,
probeTimeout time.Duration,
probeInterval time.Duration,
tlsTransportConfig *TLSTransportConfig,
) (*Peer, error) {
bindHost, bindPortStr, err := net.SplitHostPort(bindAddr)
if err != nil {
Expand Down Expand Up @@ -235,6 +235,14 @@ func Create(
p.setInitialFailed(resolvedPeers, bindAddr)
}

if tlsTransportConfig != nil {
level.Info(l).Log("msg", "using TLS for gossip")
cfg.Transport, err = NewTLSTransport(context.Background(), l, reg, cfg.BindAddr, cfg.BindPort, tlsTransportConfig)
if err != nil {
return nil, errors.Wrap(err, "tls transport")
}
}

ml, err := memberlist.Create(cfg)
if err != nil {
return nil, errors.Wrap(err, "create memberlist")
Expand Down
72 changes: 72 additions & 0 deletions cluster/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func TestClusterJoinAndReconnect(t *testing.T) {
t.Run("TestReconnect", testReconnect)
t.Run("TestRemoveFailedPeers", testRemoveFailedPeers)
t.Run("TestInitiallyFailingPeers", testInitiallyFailingPeers)
t.Run("TestTLSConnection", testTLSConnection)
}

func testJoinLeave(t *testing.T) {
Expand All @@ -51,6 +52,7 @@ func testJoinLeave(t *testing.T) {
DefaultTcpTimeout,
DefaultProbeTimeout,
DefaultProbeInterval,
nil,
)
require.NoError(t, err)
require.NotNil(t, p)
Expand Down Expand Up @@ -83,6 +85,7 @@ func testJoinLeave(t *testing.T) {
DefaultTcpTimeout,
DefaultProbeTimeout,
DefaultProbeInterval,
nil,
)
require.NoError(t, err)
require.NotNil(t, p2)
Expand Down Expand Up @@ -116,6 +119,7 @@ func testReconnect(t *testing.T) {
DefaultTcpTimeout,
DefaultProbeTimeout,
DefaultProbeInterval,
nil,
)
require.NoError(t, err)
require.NotNil(t, p)
Expand All @@ -139,6 +143,7 @@ func testReconnect(t *testing.T) {
DefaultTcpTimeout,
DefaultProbeTimeout,
DefaultProbeInterval,
nil,
)
require.NoError(t, err)
require.NotNil(t, p2)
Expand Down Expand Up @@ -177,6 +182,7 @@ func testRemoveFailedPeers(t *testing.T) {
DefaultTcpTimeout,
DefaultProbeTimeout,
DefaultProbeInterval,
nil,
)
require.NoError(t, err)
require.NotNil(t, p)
Expand Down Expand Up @@ -226,6 +232,7 @@ func testInitiallyFailingPeers(t *testing.T) {
DefaultTcpTimeout,
DefaultProbeTimeout,
DefaultProbeInterval,
nil,
)
require.NoError(t, err)
require.NotNil(t, p)
Expand Down Expand Up @@ -254,3 +261,68 @@ func testInitiallyFailingPeers(t *testing.T) {
require.Equal(t, expectedLen, len(p.failedPeers))
}
}

func testTLSConnection(t *testing.T) {
logger := log.NewNopLogger()
tlsTransportConfig1, err := GetTLSTransportConfig("./testdata/tls_config_node1.yml")
require.NoError(t, err)
p1, err := Create(
logger,
prometheus.NewRegistry(),
"127.0.0.1:0",
"",
[]string{},
true,
DefaultPushPullInterval,
DefaultGossipInterval,
DefaultTcpTimeout,
DefaultProbeTimeout,
DefaultProbeInterval,
tlsTransportConfig1,
)
require.NoError(t, err)
require.NotNil(t, p1)
err = p1.Join(
DefaultReconnectInterval,
DefaultReconnectTimeout,
)
require.NoError(t, err)
require.False(t, p1.Ready())
require.Equal(t, p1.Status(), "settling")
go p1.Settle(context.Background(), 0*time.Second)
p1.WaitReady(context.Background())
require.Equal(t, p1.Status(), "ready")

// Create the peer who joins the first.
tlsTransportConfig2, err := GetTLSTransportConfig("./testdata/tls_config_node2.yml")
require.NoError(t, err)
p2, err := Create(
logger,
prometheus.NewRegistry(),
"127.0.0.1:0",
"",
[]string{p1.Self().Address()},
true,
DefaultPushPullInterval,
DefaultGossipInterval,
DefaultTcpTimeout,
DefaultProbeTimeout,
DefaultProbeInterval,
tlsTransportConfig2,
)
require.NoError(t, err)
require.NotNil(t, p2)
err = p2.Join(
DefaultReconnectInterval,
DefaultReconnectTimeout,
)
require.NoError(t, err)
go p2.Settle(context.Background(), 0*time.Second)

require.Equal(t, 2, p1.ClusterSize())
p2.Leave(0 * time.Second)
require.Equal(t, 1, p1.ClusterSize())
require.Equal(t, 1, len(p1.failedPeers))
require.Equal(t, p2.Self().Address(), p1.peers[p2.Self().Address()].Node.Address())
require.Equal(t, p2.Name(), p1.failedPeers[0].Name)
}
Loading

0 comments on commit ff85bec

Please sign in to comment.