Skip to content

Commit

Permalink
etcdserver: add grpc interceptor to log info on incoming requests to
Browse files Browse the repository at this point in the history
etcd server

To improve debuggability of etcd v3. Added a grpc interceptor to log
info on incoming requests to etcd server. The log output includes
remote client info, request content (with value field redacted), request
handling latency, response size, etc. Uses zap logger if available,
otherwise uses capnslog.

Also did some clean up on the chaining of grpc interceptors on server
side.
  • Loading branch information
Jingyi Hu committed Aug 8, 2018
1 parent db102de commit b898479
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 9 deletions.
12 changes: 10 additions & 2 deletions etcdserver/api/v3rpc/grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"

"github.com/grpc-ecosystem/go-grpc-middleware"
"github.com/grpc-ecosystem/go-grpc-prometheus"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
Expand All @@ -40,8 +41,15 @@ func Server(s *etcdserver.EtcdServer, tls *tls.Config, gopts ...grpc.ServerOptio
if tls != nil {
opts = append(opts, grpc.Creds(credentials.NewTLS(tls)))
}
opts = append(opts, grpc.UnaryInterceptor(newUnaryInterceptor(s)))
opts = append(opts, grpc.StreamInterceptor(newStreamInterceptor(s)))
opts = append(opts, grpc.UnaryInterceptor(grpc_middleware.ChainUnaryServer(
newLogUnaryInterceptor(s),
newUnaryInterceptor(s),
grpc_prometheus.UnaryServerInterceptor,
)))
opts = append(opts, grpc.StreamInterceptor(grpc_middleware.ChainStreamServer(
newStreamInterceptor(s),
grpc_prometheus.StreamServerInterceptor,
)))
opts = append(opts, grpc.MaxRecvMsgSize(int(s.Cfg.MaxRequestBytes+grpcOverheadBytes)))
opts = append(opts, grpc.MaxSendMsgSize(maxSendBytes))
opts = append(opts, grpc.MaxConcurrentStreams(maxStreams))
Expand Down
130 changes: 126 additions & 4 deletions etcdserver/api/v3rpc/interceptor.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ import (
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"

prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/peer"
)

const (
Expand All @@ -40,7 +42,7 @@ type streamsMap struct {
}

func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) {
if !api.IsCapabilityEnabled(api.V3rpcCapability) {
return nil, rpctypes.ErrGRPCNotCapable
}
Expand All @@ -54,7 +56,126 @@ func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
}
}

return prometheus.UnaryServerInterceptor(ctx, req, info, handler)
resp, err := handler(ctx, req)
return resp, err
}
}

func newLogUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) {
startTime := time.Now()
resp, err := handler(ctx, req)
defer logUnaryRequestStats(ctx, s.Logger(), info, startTime, req, resp)
return resp, err
}
}

func logUnaryRequestStats(ctx context.Context, lg *zap.Logger, info *grpc.UnaryServerInfo, startTime time.Time, req interface{}, resp interface{}) {
duration := time.Since(startTime)
remote := "No remote client info."
peerInfo, ok := peer.FromContext(ctx)
if ok {
remote = peerInfo.Addr.String()
}
var responseType string = info.FullMethod
var reqCount, respCount int64 = 0, 0
var reqSize, respSize int = 0, 0
var reqContent string
switch _resp := resp.(type) {
case *pb.RangeResponse:
_req, ok := req.(*pb.RangeRequest)
if ok {
reqCount = 0
reqSize = _req.Size()
reqContent = _req.String()
}
if _resp != nil {
respCount = _resp.GetCount()
respSize = _resp.Size()
}
case *pb.PutResponse:
_req, ok := req.(*pb.PutRequest)
if ok {
reqCount = 1
reqSize = _req.Size()
reqContent = pb.NewLoggablePutRequest(_req).String()
// redact value field from request content, see PR #9821
}
if _resp != nil {
respCount = 0
respSize = _resp.Size()
}
case *pb.DeleteRangeResponse:
_req, ok := req.(*pb.DeleteRangeRequest)
if ok {
reqCount = 0
reqSize = _req.Size()
reqContent = _req.String()
}
if _resp != nil {
respCount = _resp.GetDeleted()
respSize = _resp.Size()
}
case *pb.TxnResponse:
_req, ok := req.(*pb.TxnRequest)
if ok && _resp != nil {
if _resp.GetSucceeded() { // determine the 'actual' count and size of request based on success or failure
reqCount = int64(len(_req.GetSuccess()))
reqSize = 0
for _, r := range _req.GetSuccess() {
reqSize += r.Size()
}
} else {
reqCount = int64(len(_req.GetFailure()))
reqSize = 0
for _, r := range _req.GetFailure() {
reqSize += r.Size()
}
}
reqContent = pb.NewLoggableTxnRequest(_req).String()
// redact value field from request content, see PR #9821
}
if _resp != nil {
respCount = 0
respSize = _resp.Size()
}
default:
reqCount = -1
reqSize = -1
respCount = -1
respSize = -1
}

logGenericRequestStats(lg, startTime, duration, remote, responseType, reqCount, reqSize, respCount, respSize, reqContent)
}

func logGenericRequestStats(lg *zap.Logger, startTime time.Time, duration time.Duration, remote string, responseType string,
reqCount int64, reqSize int, respCount int64, respSize int, reqContent string) {
if lg == nil {
plog.Debugf(
"start time = %v, " +
"time spent = %v, " +
"remote = %s, " +
"response type = %s, " +
"request count = %d, " +
"request size = %d, " +
"response count = %d, " +
"response size = %d, " +
"request content = %s",
startTime, duration, remote, responseType, reqCount, reqSize, respCount, respSize, reqContent,
)
} else {
lg.Debug("request stats",
zap.Time("start time", startTime),
zap.Duration("time spent", duration),
zap.String("remote", remote),
zap.String("response type", responseType),
zap.Int64("request count", reqCount),
zap.Int("request size", reqSize),
zap.Int64("response count", respCount),
zap.Int("response size", respSize),
zap.String("request content", reqContent),
)
}
}

Expand Down Expand Up @@ -90,7 +211,8 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor
}
}

return prometheus.StreamServerInterceptor(srv, ss, info, handler)
err := handler(srv, ss)
return err
}
}

Expand Down
6 changes: 3 additions & 3 deletions etcdserver/etcdserverpb/raft_internal_stringer.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (as *InternalRaftStringer) String() string {
case as.Request.Put != nil:
return fmt.Sprintf("header:<%s> put:<%s>",
as.Request.Header.String(),
newLoggablePutRequest(as.Request.Put).String(),
NewLoggablePutRequest(as.Request.Put).String(),
)
case as.Request.Txn != nil:
return fmt.Sprintf("header:<%s> txn:<%s>",
Expand Down Expand Up @@ -121,7 +121,7 @@ func newLoggableRequestOp(op *RequestOp) *requestOpStringer {
func (as *requestOpStringer) String() string {
switch op := as.Op.Request.(type) {
case *RequestOp_RequestPut:
return fmt.Sprintf("request_put:<%s>", newLoggablePutRequest(op.RequestPut).String())
return fmt.Sprintf("request_put:<%s>", NewLoggablePutRequest(op.RequestPut).String())
case *RequestOp_RequestTxn:
return fmt.Sprintf("request_txn:<%s>", NewLoggableTxnRequest(op.RequestTxn).String())
default:
Expand Down Expand Up @@ -167,7 +167,7 @@ type loggablePutRequest struct {
IgnoreLease bool `protobuf:"varint,6,opt,name=ignore_lease,proto3"`
}

func newLoggablePutRequest(request *PutRequest) *loggablePutRequest {
func NewLoggablePutRequest(request *PutRequest) *loggablePutRequest {
return &loggablePutRequest{
request.Key,
len(request.Value),
Expand Down
4 changes: 4 additions & 0 deletions etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2407,3 +2407,7 @@ func (s *EtcdServer) goAttach(f func()) {
func (s *EtcdServer) Alarms() []*pb.AlarmMember {
return s.alarmStore.Get(pb.AlarmType_NONE)
}

func (s *EtcdServer) Logger() *zap.Logger {
return s.lg
}

0 comments on commit b898479

Please sign in to comment.