From 2cd95c7514a3d02aa2d98591c013885cb44fbdeb Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 19 Apr 2023 10:54:34 -0700 Subject: [PATCH 01/60] gcp/observability: remove redundant import (#6215) --- gcp/observability/logging.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gcp/observability/logging.go b/gcp/observability/logging.go index bec80140275b..0ffbd93b3922 100644 --- a/gcp/observability/logging.go +++ b/gcp/observability/logging.go @@ -35,7 +35,6 @@ import ( binlogpb "google.golang.org/grpc/binarylog/grpc_binarylog_v1" "google.golang.org/grpc/codes" "google.golang.org/grpc/internal" - "google.golang.org/grpc/internal/binarylog" iblog "google.golang.org/grpc/internal/binarylog" "google.golang.org/grpc/internal/grpcutil" "google.golang.org/grpc/stats/opencensus" @@ -438,7 +437,7 @@ func registerClientRPCEvents(config *config, exporter loggingExporter) { projectID: config.ProjectID, clientSide: true, } - internal.AddGlobalDialOptions.(func(opt ...grpc.DialOption))(internal.WithBinaryLogger.(func(bl binarylog.Logger) grpc.DialOption)(clientSideLogger)) + internal.AddGlobalDialOptions.(func(opt ...grpc.DialOption))(internal.WithBinaryLogger.(func(bl iblog.Logger) grpc.DialOption)(clientSideLogger)) } func registerServerRPCEvents(config *config, exporter loggingExporter) { @@ -478,7 +477,7 @@ func registerServerRPCEvents(config *config, exporter loggingExporter) { projectID: config.ProjectID, clientSide: false, } - internal.AddGlobalServerOptions.(func(opt ...grpc.ServerOption))(internal.BinaryLogger.(func(bl binarylog.Logger) grpc.ServerOption)(serverSideLogger)) + internal.AddGlobalServerOptions.(func(opt ...grpc.ServerOption))(internal.BinaryLogger.(func(bl iblog.Logger) grpc.ServerOption)(serverSideLogger)) } func startLogging(ctx context.Context, config *config) error { From 8c70261b5c3941bab070b422fa6d630acba509c2 Mon Sep 17 00:00:00 2001 From: Easwar Swaminathan Date: Thu, 20 Apr 2023 18:49:17 -0700 Subject: [PATCH 02/60] grpc: ClientConn cleanup in prep for channel idleness (#6189) --- clientconn.go | 160 ++++++++++++++++++++++++++------------- resolver_conn_wrapper.go | 81 ++++++++++++-------- 2 files changed, 160 insertions(+), 81 deletions(-) diff --git a/clientconn.go b/clientconn.go index 3a76142424db..e67a990b24fb 100644 --- a/clientconn.go +++ b/clientconn.go @@ -173,40 +173,11 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn * } }() - pid := cc.dopts.channelzParentID - cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, pid, target) - ted := &channelz.TraceEventDesc{ - Desc: "Channel created", - Severity: channelz.CtInfo, - } - if cc.dopts.channelzParentID != nil { - ted.Parent = &channelz.TraceEventDesc{ - Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID.Int()), - Severity: channelz.CtInfo, - } - } - channelz.AddTraceEvent(logger, cc.channelzID, 1, ted) - cc.csMgr.channelzID = cc.channelzID + // Register ClientConn with channelz. + cc.channelzRegistration(target) - if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil { - return nil, errNoTransportSecurity - } - if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil { - return nil, errTransportCredsAndBundle - } - if cc.dopts.copts.CredsBundle != nil && cc.dopts.copts.CredsBundle.TransportCredentials() == nil { - return nil, errNoTransportCredsInBundle - } - transportCreds := cc.dopts.copts.TransportCredentials - if transportCreds == nil { - transportCreds = cc.dopts.copts.CredsBundle.TransportCredentials() - } - if transportCreds.Info().SecurityProtocol == "insecure" { - for _, cd := range cc.dopts.copts.PerRPCCredentials { - if cd.RequireTransportSecurity() { - return nil, errTransportCredentialsMissing - } - } + if err := cc.validateTransportCredentials(); err != nil { + return nil, err } if cc.dopts.defaultServiceConfigRawJSON != nil { @@ -249,15 +220,12 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn * } // Determine the resolver to use. - resolverBuilder, err := cc.parseTargetAndFindResolver() - if err != nil { + if err := cc.parseTargetAndFindResolver(); err != nil { return nil, err } - cc.authority, err = determineAuthority(cc.parsedTarget.Endpoint(), cc.target, cc.dopts) - if err != nil { + if err = cc.determineAuthority(); err != nil { return nil, err } - channelz.Infof(logger, cc.channelzID, "Channel authority set to %q", cc.authority) if cc.dopts.scChan != nil { // Blocking wait for the initial service config. @@ -290,7 +258,17 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn * }) // Build the resolver. - rWrapper, err := newCCResolverWrapper(cc, resolverBuilder) + rWrapper, err := newCCResolverWrapper(cc, ccResolverWrapperOpts{ + target: cc.parsedTarget, + builder: cc.resolverBuilder, + bOpts: resolver.BuildOptions{ + DisableServiceConfig: cc.dopts.disableServiceConfig, + DialCreds: credsClone, + CredsBundle: cc.dopts.copts.CredsBundle, + Dialer: cc.dopts.copts.Dialer, + }, + channelzID: cc.channelzID, + }) if err != nil { return nil, fmt.Errorf("failed to build resolver: %v", err) } @@ -328,6 +306,64 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn * return cc, nil } +// validateTransportCredentials performs a series of checks on the configured +// transport credentials. It returns a non-nil error if any of these conditions +// are met: +// - no transport creds and no creds bundle is configured +// - both transport creds and creds bundle are configured +// - creds bundle is configured, but it lacks a transport credentials +// - insecure transport creds configured alongside call creds that require +// transport level security +// +// If none of the above conditions are met, the configured credentials are +// deemed valid and a nil error is returned. +func (cc *ClientConn) validateTransportCredentials() error { + if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil { + return errNoTransportSecurity + } + if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil { + return errTransportCredsAndBundle + } + if cc.dopts.copts.CredsBundle != nil && cc.dopts.copts.CredsBundle.TransportCredentials() == nil { + return errNoTransportCredsInBundle + } + transportCreds := cc.dopts.copts.TransportCredentials + if transportCreds == nil { + transportCreds = cc.dopts.copts.CredsBundle.TransportCredentials() + } + if transportCreds.Info().SecurityProtocol == "insecure" { + for _, cd := range cc.dopts.copts.PerRPCCredentials { + if cd.RequireTransportSecurity() { + return errTransportCredentialsMissing + } + } + } + return nil +} + +// channelzRegistration registers the newly created ClientConn with channelz and +// stores the returned identifier in `cc.channelzID` and `cc.csMgr.channelzID`. +// A channelz trace event is emitted for ClientConn creation. If the newly +// created ClientConn is a nested one, i.e a valid parent ClientConn ID is +// specified via a dial option, the trace event is also added to the parent. +// +// Doesn't grab cc.mu as this method is expected to be called only at Dial time. +func (cc *ClientConn) channelzRegistration(target string) { + cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target) + ted := &channelz.TraceEventDesc{ + Desc: "Channel created", + Severity: channelz.CtInfo, + } + if cc.dopts.channelzParentID != nil { + ted.Parent = &channelz.TraceEventDesc{ + Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID.Int()), + Severity: channelz.CtInfo, + } + } + channelz.AddTraceEvent(logger, cc.channelzID, 1, ted) + cc.csMgr.channelzID = cc.channelzID +} + // chainUnaryClientInterceptors chains all unary client interceptors into one. func chainUnaryClientInterceptors(cc *ClientConn) { interceptors := cc.dopts.chainUnaryInts @@ -471,6 +507,7 @@ type ClientConn struct { authority string // See determineAuthority(). dopts dialOptions // Default and user specified dial options. channelzID *channelz.Identifier // Channelz identifier for the channel. + resolverBuilder resolver.Builder // See parseTargetAndFindResolver(). balancerWrapper *ccBalancerWrapper // Uses gracefulswitch.balancer underneath. // The following provide their own synchronization, and therefore don't @@ -1552,7 +1589,14 @@ func (cc *ClientConn) connectionError() error { return cc.lastConnectionError } -func (cc *ClientConn) parseTargetAndFindResolver() (resolver.Builder, error) { +// parseTargetAndFindResolver parses the user's dial target and stores the +// parsed target in `cc.parsedTarget`. +// +// The resolver to use is determined based on the scheme in the parsed target +// and the same is stored in `cc.resolverBuilder`. +// +// Doesn't grab cc.mu as this method is expected to be called only at Dial time. +func (cc *ClientConn) parseTargetAndFindResolver() error { channelz.Infof(logger, cc.channelzID, "original dial target is: %q", cc.target) var rb resolver.Builder @@ -1564,7 +1608,8 @@ func (cc *ClientConn) parseTargetAndFindResolver() (resolver.Builder, error) { rb = cc.getResolver(parsedTarget.URL.Scheme) if rb != nil { cc.parsedTarget = parsedTarget - return rb, nil + cc.resolverBuilder = rb + return nil } } @@ -1579,15 +1624,16 @@ func (cc *ClientConn) parseTargetAndFindResolver() (resolver.Builder, error) { parsedTarget, err = parseTarget(canonicalTarget) if err != nil { channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", canonicalTarget, err) - return nil, err + return err } channelz.Infof(logger, cc.channelzID, "parsed dial target is: %+v", parsedTarget) rb = cc.getResolver(parsedTarget.URL.Scheme) if rb == nil { - return nil, fmt.Errorf("could not get resolver for default scheme: %q", parsedTarget.URL.Scheme) + return fmt.Errorf("could not get resolver for default scheme: %q", parsedTarget.URL.Scheme) } cc.parsedTarget = parsedTarget - return rb, nil + cc.resolverBuilder = rb + return nil } // parseTarget uses RFC 3986 semantics to parse the given target into a @@ -1610,7 +1656,15 @@ func parseTarget(target string) (resolver.Target, error) { // - user specified authority override using `WithAuthority` dial option // - creds' notion of server name for the authentication handshake // - endpoint from dial target of the form "scheme://[authority]/endpoint" -func determineAuthority(endpoint, target string, dopts dialOptions) (string, error) { +// +// Stores the determined authority in `cc.authority`. +// +// Returns a non-nil error if the authority returned by the transport +// credentials do not match the authority configured through the dial option. +// +// Doesn't grab cc.mu as this method is expected to be called only at Dial time. +func (cc *ClientConn) determineAuthority() error { + dopts := cc.dopts // Historically, we had two options for users to specify the serverName or // authority for a channel. One was through the transport credentials // (either in its constructor, or through the OverrideServerName() method). @@ -1627,25 +1681,29 @@ func determineAuthority(endpoint, target string, dopts dialOptions) (string, err } authorityFromDialOption := dopts.authority if (authorityFromCreds != "" && authorityFromDialOption != "") && authorityFromCreds != authorityFromDialOption { - return "", fmt.Errorf("ClientConn's authority from transport creds %q and dial option %q don't match", authorityFromCreds, authorityFromDialOption) + return fmt.Errorf("ClientConn's authority from transport creds %q and dial option %q don't match", authorityFromCreds, authorityFromDialOption) } + endpoint := cc.parsedTarget.Endpoint() + target := cc.target switch { case authorityFromDialOption != "": - return authorityFromDialOption, nil + cc.authority = authorityFromDialOption case authorityFromCreds != "": - return authorityFromCreds, nil + cc.authority = authorityFromCreds case strings.HasPrefix(target, "unix:") || strings.HasPrefix(target, "unix-abstract:"): // TODO: remove when the unix resolver implements optional interface to // return channel authority. - return "localhost", nil + cc.authority = "localhost" case strings.HasPrefix(endpoint, ":"): - return "localhost" + endpoint, nil + cc.authority = "localhost" + endpoint default: // TODO: Define an optional interface on the resolver builder to return // the channel authority given the user's dial target. For resolvers // which don't implement this interface, we will use the endpoint from // "scheme://authority/endpoint" as the default authority. - return endpoint, nil + cc.authority = endpoint } + channelz.Infof(logger, cc.channelzID, "Channel authority set to %q", cc.authority) + return nil } diff --git a/resolver_conn_wrapper.go b/resolver_conn_wrapper.go index 05a9d4e0bac0..854e90f69ae5 100644 --- a/resolver_conn_wrapper.go +++ b/resolver_conn_wrapper.go @@ -23,7 +23,6 @@ import ( "sync" "google.golang.org/grpc/balancer" - "google.golang.org/grpc/credentials" "google.golang.org/grpc/internal/channelz" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/pretty" @@ -31,35 +30,46 @@ import ( "google.golang.org/grpc/serviceconfig" ) +// resolverStateUpdater wraps the single method used by ccResolverWrapper to +// report a state update from the actual resolver implementation. +type resolverStateUpdater interface { + updateResolverState(s resolver.State, err error) error +} + // ccResolverWrapper is a wrapper on top of cc for resolvers. // It implements resolver.ClientConn interface. type ccResolverWrapper struct { - cc *ClientConn + // The following fields are initialized when the wrapper is created and are + // read-only afterwards, and therefore can be accessed without a mutex. + cc resolverStateUpdater + done *grpcsync.Event + channelzID *channelz.Identifier + ignoreServiceConfig bool + resolverMu sync.Mutex resolver resolver.Resolver - done *grpcsync.Event - curState resolver.State incomingMu sync.Mutex // Synchronizes all the incoming calls. + curState resolver.State +} + +// ccResolverWrapperOpts wraps the arguments to be passed when creating a new +// ccResolverWrapper. +type ccResolverWrapperOpts struct { + target resolver.Target // User specified dial target to resolve. + builder resolver.Builder // Resolver builder to use. + bOpts resolver.BuildOptions // Resolver build options to use. + channelzID *channelz.Identifier // Channelz identifier for the channel. } // newCCResolverWrapper uses the resolver.Builder to build a Resolver and // returns a ccResolverWrapper object which wraps the newly built resolver. -func newCCResolverWrapper(cc *ClientConn, rb resolver.Builder) (*ccResolverWrapper, error) { +func newCCResolverWrapper(cc resolverStateUpdater, opts ccResolverWrapperOpts) (*ccResolverWrapper, error) { ccr := &ccResolverWrapper{ - cc: cc, - done: grpcsync.NewEvent(), - } - - var credsClone credentials.TransportCredentials - if creds := cc.dopts.copts.TransportCredentials; creds != nil { - credsClone = creds.Clone() - } - rbo := resolver.BuildOptions{ - DisableServiceConfig: cc.dopts.disableServiceConfig, - DialCreds: credsClone, - CredsBundle: cc.dopts.copts.CredsBundle, - Dialer: cc.dopts.copts.Dialer, + cc: cc, + done: grpcsync.NewEvent(), + channelzID: opts.channelzID, + ignoreServiceConfig: opts.bOpts.DisableServiceConfig, } var err error @@ -69,7 +79,7 @@ func newCCResolverWrapper(cc *ClientConn, rb resolver.Builder) (*ccResolverWrapp // accessing ccr.resolver which is being assigned here. ccr.resolverMu.Lock() defer ccr.resolverMu.Unlock() - ccr.resolver, err = rb.Build(cc.parsedTarget, ccr, rbo) + ccr.resolver, err = opts.builder.Build(opts.target, ccr, opts.bOpts) if err != nil { return nil, err } @@ -91,13 +101,15 @@ func (ccr *ccResolverWrapper) close() { ccr.resolverMu.Unlock() } +// UpdateState is called by resolver implementations to report new state to gRPC +// which includes addresses and service config. func (ccr *ccResolverWrapper) UpdateState(s resolver.State) error { ccr.incomingMu.Lock() defer ccr.incomingMu.Unlock() if ccr.done.HasFired() { return nil } - ccr.addChannelzTraceEvent(s) + ccr.addChannelzTraceEventLocked(s) ccr.curState = s if err := ccr.cc.updateResolverState(ccr.curState, nil); err == balancer.ErrBadResolverState { return balancer.ErrBadResolverState @@ -105,24 +117,27 @@ func (ccr *ccResolverWrapper) UpdateState(s resolver.State) error { return nil } +// ReportError is called by resolver implementations to report errors +// encountered during name resolution to gRPC. func (ccr *ccResolverWrapper) ReportError(err error) { ccr.incomingMu.Lock() defer ccr.incomingMu.Unlock() if ccr.done.HasFired() { return } - channelz.Warningf(logger, ccr.cc.channelzID, "ccResolverWrapper: reporting error to cc: %v", err) + channelz.Warningf(logger, ccr.channelzID, "ccResolverWrapper: reporting error to cc: %v", err) ccr.cc.updateResolverState(resolver.State{}, err) } -// NewAddress is called by the resolver implementation to send addresses to gRPC. +// NewAddress is called by the resolver implementation to send addresses to +// gRPC. func (ccr *ccResolverWrapper) NewAddress(addrs []resolver.Address) { ccr.incomingMu.Lock() defer ccr.incomingMu.Unlock() if ccr.done.HasFired() { return } - ccr.addChannelzTraceEvent(resolver.State{Addresses: addrs, ServiceConfig: ccr.curState.ServiceConfig}) + ccr.addChannelzTraceEventLocked(resolver.State{Addresses: addrs, ServiceConfig: ccr.curState.ServiceConfig}) ccr.curState.Addresses = addrs ccr.cc.updateResolverState(ccr.curState, nil) } @@ -135,26 +150,32 @@ func (ccr *ccResolverWrapper) NewServiceConfig(sc string) { if ccr.done.HasFired() { return } - channelz.Infof(logger, ccr.cc.channelzID, "ccResolverWrapper: got new service config: %s", sc) - if ccr.cc.dopts.disableServiceConfig { - channelz.Info(logger, ccr.cc.channelzID, "Service config lookups disabled; ignoring config") + channelz.Infof(logger, ccr.channelzID, "ccResolverWrapper: got new service config: %s", sc) + if ccr.ignoreServiceConfig { + channelz.Info(logger, ccr.channelzID, "Service config lookups disabled; ignoring config") return } scpr := parseServiceConfig(sc) if scpr.Err != nil { - channelz.Warningf(logger, ccr.cc.channelzID, "ccResolverWrapper: error parsing service config: %v", scpr.Err) + channelz.Warningf(logger, ccr.channelzID, "ccResolverWrapper: error parsing service config: %v", scpr.Err) return } - ccr.addChannelzTraceEvent(resolver.State{Addresses: ccr.curState.Addresses, ServiceConfig: scpr}) + ccr.addChannelzTraceEventLocked(resolver.State{Addresses: ccr.curState.Addresses, ServiceConfig: scpr}) ccr.curState.ServiceConfig = scpr ccr.cc.updateResolverState(ccr.curState, nil) } +// ParseServiceConfig is called by resolver implementations to parse a JSON +// representation of the service config. func (ccr *ccResolverWrapper) ParseServiceConfig(scJSON string) *serviceconfig.ParseResult { return parseServiceConfig(scJSON) } -func (ccr *ccResolverWrapper) addChannelzTraceEvent(s resolver.State) { +// addChannelzTraceEventLocked adds a channelz trace event containing the new +// state received from resolver implementations. +// +// Caller must hold cc.incomingMu. +func (ccr *ccResolverWrapper) addChannelzTraceEventLocked(s resolver.State) { var updates []string var oldSC, newSC *ServiceConfig var oldOK, newOK bool @@ -172,5 +193,5 @@ func (ccr *ccResolverWrapper) addChannelzTraceEvent(s resolver.State) { } else if len(ccr.curState.Addresses) == 0 && len(s.Addresses) > 0 { updates = append(updates, "resolver returned new addresses") } - channelz.Infof(logger, ccr.cc.channelzID, "Resolver state updated: %s (%v)", pretty.ToJSON(s), strings.Join(updates, "; ")) + channelz.Infof(logger, ccr.channelzID, "Resolver state updated: %s (%v)", pretty.ToJSON(s), strings.Join(updates, "; ")) } From 83c460b8de09bac04be4ac982750853fab7809ec Mon Sep 17 00:00:00 2001 From: Gregory Cooke Date: Fri, 21 Apr 2023 14:48:11 -0400 Subject: [PATCH 03/60] authz: Move audit package (#6218) * Move audit logger to it's own package * remove audit prefixes since its the package name now * Add package comment --- authz/{ => audit}/audit_logger.go | 47 ++++++++++++++++--------------- 1 file changed, 24 insertions(+), 23 deletions(-) rename authz/{ => audit}/audit_logger.go (74%) diff --git a/authz/audit_logger.go b/authz/audit/audit_logger.go similarity index 74% rename from authz/audit_logger.go rename to authz/audit/audit_logger.go index 992d66054fbb..b9b721970387 100644 --- a/authz/audit_logger.go +++ b/authz/audit/audit_logger.go @@ -16,7 +16,8 @@ * */ -package authz +// Package audit contains interfaces for audit logging during authorization. +package audit import ( "encoding/json" @@ -27,38 +28,38 @@ import ( // to facilitate thread-safe reading/writing operations. type loggerBuilderRegistry struct { mu sync.Mutex - builders map[string]AuditLoggerBuilder + builders map[string]LoggerBuilder } var ( registry = loggerBuilderRegistry{ - builders: make(map[string]AuditLoggerBuilder), + builders: make(map[string]LoggerBuilder), } ) -// RegisterAuditLoggerBuilder registers the builder in a global map +// RegisterLoggerBuilder registers the builder in a global map // using b.Name() as the key. // // This should only be called during initialization time (i.e. in an init() // function). If multiple builders are registered with the same name, // the one registered last will take effect. -func RegisterAuditLoggerBuilder(b AuditLoggerBuilder) { +func RegisterLoggerBuilder(b LoggerBuilder) { registry.mu.Lock() defer registry.mu.Unlock() registry.builders[b.Name()] = b } -// GetAuditLoggerBuilder returns a builder with the given name. +// GetLoggerBuilder returns a builder with the given name. // It returns nil if the builder is not found in the registry. -func GetAuditLoggerBuilder(name string) AuditLoggerBuilder { +func GetLoggerBuilder(name string) LoggerBuilder { registry.mu.Lock() defer registry.mu.Unlock() return registry.builders[name] } -// AuditEvent contains information passed to the audit logger as part of an +// Event contains information passed to the audit logger as part of an // audit logging event. -type AuditEvent struct { +type Event struct { // FullMethodName is the full method name of the audited RPC, in the format // of "/pkg.Service/Method". For example, "/helloworld.Greeter/SayHello". FullMethodName string @@ -74,14 +75,14 @@ type AuditEvent struct { Authorized bool } -// AuditLoggerConfig represents an opaque data structure holding an audit +// LoggerConfig represents an opaque data structure holding an audit // logger configuration. Concrete types representing configuration of specific // audit loggers must embed this interface to implement it. -type AuditLoggerConfig interface { - auditLoggerConfig() +type LoggerConfig interface { + loggerConfig() } -// AuditLogger is the interface to be implemented by audit loggers. +// Logger is the interface to be implemented by audit loggers. // // An audit logger is a logger instance that can be configured via the // authorization policy API or xDS HTTP RBAC filters. When the authorization @@ -91,35 +92,35 @@ type AuditLoggerConfig interface { // TODO(lwge): Change the link to the merged gRFC once it's ready. // Please refer to https://github.com/grpc/proposal/pull/346 for more details // about audit logging. -type AuditLogger interface { +type Logger interface { // Log performs audit logging for the provided audit event. // // This method is invoked in the RPC path and therefore implementations // must not block. - Log(*AuditEvent) + Log(*Event) } -// AuditLoggerBuilder is the interface to be implemented by audit logger +// LoggerBuilder is the interface to be implemented by audit logger // builders that are used at runtime to configure and instantiate audit loggers. // // Users who want to implement their own audit logging logic should -// implement this interface, along with the AuditLogger interface, and register -// it by calling RegisterAuditLoggerBuilder() at init time. +// implement this interface, along with the Logger interface, and register +// it by calling RegisterLoggerBuilder() at init time. // // TODO(lwge): Change the link to the merged gRFC once it's ready. // Please refer to https://github.com/grpc/proposal/pull/346 for more details // about audit logging. -type AuditLoggerBuilder interface { - // ParseAuditLoggerConfig parses the given JSON bytes into a structured +type LoggerBuilder interface { + // ParseLoggerConfig parses the given JSON bytes into a structured // logger config this builder can use to build an audit logger. - ParseAuditLoggerConfig(config json.RawMessage) (AuditLoggerConfig, error) + ParseLoggerConfig(config json.RawMessage) (LoggerConfig, error) // Build builds an audit logger with the given logger config. // This will only be called with valid configs returned from - // ParseAuditLoggerConfig() and any runtime issues such as failing to + // ParseLoggerConfig() and any runtime issues such as failing to // create a file should be handled by the logger implementation instead of // failing the logger instantiation. So implementers need to make sure it // can return a logger without error at this stage. - Build(AuditLoggerConfig) AuditLogger + Build(LoggerConfig) Logger // Name returns the name of logger built by this builder. // This is used to register and pick the builder. Name() string From 8628e075df225d916eaeaec1488af4dcc02805ec Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Tue, 25 Apr 2023 11:17:53 -0600 Subject: [PATCH 04/60] xds/internal/balancer/outlierdetection: Add Channelz Logger to Outlier Detection LB (#6145) --- .../balancer/outlierdetection/balancer.go | 46 ++++++++++++++----- .../outlierdetection/balancer_test.go | 3 +- .../outlierdetection/subconn_wrapper.go | 5 ++ 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/xds/internal/balancer/outlierdetection/balancer.go b/xds/internal/balancer/outlierdetection/balancer.go index 8d87e8f9884a..749449c2123e 100644 --- a/xds/internal/balancer/outlierdetection/balancer.go +++ b/xds/internal/balancer/outlierdetection/balancer.go @@ -26,6 +26,7 @@ import ( "errors" "fmt" "math" + "strings" "sync" "sync/atomic" "time" @@ -35,6 +36,7 @@ import ( "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal/balancer/gracefulswitch" "google.golang.org/grpc/internal/buffer" + "google.golang.org/grpc/internal/channelz" "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/grpclog" "google.golang.org/grpc/internal/grpcrand" @@ -62,13 +64,14 @@ type bb struct{} func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer { b := &outlierDetectionBalancer{ - cc: cc, - closed: grpcsync.NewEvent(), - done: grpcsync.NewEvent(), - addrs: make(map[string]*addressInfo), - scWrappers: make(map[balancer.SubConn]*subConnWrapper), - scUpdateCh: buffer.NewUnbounded(), - pickerUpdateCh: buffer.NewUnbounded(), + cc: cc, + closed: grpcsync.NewEvent(), + done: grpcsync.NewEvent(), + addrs: make(map[string]*addressInfo), + scWrappers: make(map[balancer.SubConn]*subConnWrapper), + scUpdateCh: buffer.NewUnbounded(), + pickerUpdateCh: buffer.NewUnbounded(), + channelzParentID: bOpts.ChannelzParentID, } b.logger = prefixLogger(b) b.logger.Infof("Created") @@ -159,10 +162,11 @@ type outlierDetectionBalancer struct { // to suppress redundant picker updates. recentPickerNoop bool - closed *grpcsync.Event - done *grpcsync.Event - cc balancer.ClientConn - logger *grpclog.PrefixLogger + closed *grpcsync.Event + done *grpcsync.Event + cc balancer.ClientConn + logger *grpclog.PrefixLogger + channelzParentID *channelz.Identifier // childMu guards calls into child (to uphold the balancer.Balancer API // guarantee of synchronous calls). @@ -822,7 +826,10 @@ func (b *outlierDetectionBalancer) successRateAlgorithm() { return } successRate := float64(bucket.numSuccesses) / float64(bucket.numSuccesses+bucket.numFailures) - if successRate < (mean - stddev*(float64(ejectionCfg.StdevFactor)/1000)) { + requiredSuccessRate := mean - stddev*(float64(ejectionCfg.StdevFactor)/1000) + if successRate < requiredSuccessRate { + channelz.Infof(logger, b.channelzParentID, "SuccessRate algorithm detected outlier: %s. Parameters: successRate=%f, mean=%f, stddev=%f, requiredSuccessRate=%f", + addrInfo.string(), successRate, mean, stddev, requiredSuccessRate) if uint32(grpcrand.Int31n(100)) < ejectionCfg.EnforcementPercentage { b.ejectAddress(addrInfo) } @@ -849,6 +856,8 @@ func (b *outlierDetectionBalancer) failurePercentageAlgorithm() { } failurePercentage := (float64(bucket.numFailures) / float64(bucket.numSuccesses+bucket.numFailures)) * 100 if failurePercentage > float64(b.cfg.FailurePercentageEjection.Threshold) { + channelz.Infof(logger, b.channelzParentID, "FailurePercentage algorithm detected outlier: %s, failurePercentage=%f", + addrInfo.string(), failurePercentage) if uint32(grpcrand.Int31n(100)) < ejectionCfg.EnforcementPercentage { b.ejectAddress(addrInfo) } @@ -863,7 +872,9 @@ func (b *outlierDetectionBalancer) ejectAddress(addrInfo *addressInfo) { addrInfo.ejectionTimeMultiplier++ for _, sbw := range addrInfo.sws { sbw.eject() + channelz.Infof(logger, b.channelzParentID, "Subchannel ejected: %s", sbw.string()) } + } // Caller must hold b.mu. @@ -872,6 +883,7 @@ func (b *outlierDetectionBalancer) unejectAddress(addrInfo *addressInfo) { addrInfo.latestEjectionTimestamp = time.Time{} for _, sbw := range addrInfo.sws { sbw.uneject() + channelz.Infof(logger, b.channelzParentID, "Subchannel unejected: %s", sbw.string()) } } @@ -896,6 +908,16 @@ type addressInfo struct { sws []*subConnWrapper } +func (a *addressInfo) string() string { + var res strings.Builder + res.WriteString("[") + for _, sw := range a.sws { + res.WriteString(sw.string()) + } + res.WriteString("]") + return res.String() +} + func newAddressInfo() *addressInfo { return &addressInfo{ callCounter: newCallCounter(), diff --git a/xds/internal/balancer/outlierdetection/balancer_test.go b/xds/internal/balancer/outlierdetection/balancer_test.go index 8b86ebbb19f2..41447164c013 100644 --- a/xds/internal/balancer/outlierdetection/balancer_test.go +++ b/xds/internal/balancer/outlierdetection/balancer_test.go @@ -34,6 +34,7 @@ import ( "google.golang.org/grpc/balancer" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal/balancer/stub" + "google.golang.org/grpc/internal/channelz" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/grpctest" internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" @@ -304,7 +305,7 @@ func setup(t *testing.T) (*outlierDetectionBalancer, *testutils.TestClientConn, t.Fatalf("balancer.Get(%q) returned nil", Name) } tcc := testutils.NewTestClientConn(t) - odB := builder.Build(tcc, balancer.BuildOptions{}) + odB := builder.Build(tcc, balancer.BuildOptions{ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefChannel, time.Now().Unix(), nil)}) return odB.(*outlierDetectionBalancer), tcc, odB.Close } diff --git a/xds/internal/balancer/outlierdetection/subconn_wrapper.go b/xds/internal/balancer/outlierdetection/subconn_wrapper.go index 8e25eb788b1d..be631387b2f3 100644 --- a/xds/internal/balancer/outlierdetection/subconn_wrapper.go +++ b/xds/internal/balancer/outlierdetection/subconn_wrapper.go @@ -18,6 +18,7 @@ package outlierdetection import ( + "fmt" "unsafe" "google.golang.org/grpc/balancer" @@ -66,3 +67,7 @@ func (scw *subConnWrapper) uneject() { isEjected: false, }) } + +func (scw *subConnWrapper) string() string { + return fmt.Sprintf("%+v", scw.addresses) +} From eff0942e95d93112921414aee758e619ec86f26f Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Tue, 25 Apr 2023 22:47:15 -0400 Subject: [PATCH 05/60] xds/internal/xdsclient: Custom LB xDS Client Changes (#6165) --- internal/envconfig/xds.go | 13 +- xds/internal/balancer/ringhash/config.go | 11 +- xds/internal/balancer/ringhash/config_test.go | 12 + xds/internal/balancer/wrrlocality/balancer.go | 68 ++ .../balancer/wrrlocality/balancer_test.go | 121 ++++ .../xdsclient/tests/cds_watchers_test.go | 2 +- .../xdsclient/tests/resource_update_test.go | 2 +- .../xdsclient/xdslbregistry/converter.go | 154 +++++ .../xdslbregistry/test/converter_test.go | 384 +++++++++++ .../xdsresource/test/unmarshal_cds_test.go | 604 ++++++++++++++++++ .../xdsclient/xdsresource/type_cds.go | 7 + .../xdsclient/xdsresource/unmarshal_cds.go | 39 +- .../xdsresource/unmarshal_cds_test.go | 296 ++------- 13 files changed, 1452 insertions(+), 261 deletions(-) create mode 100644 xds/internal/balancer/wrrlocality/balancer.go create mode 100644 xds/internal/balancer/wrrlocality/balancer_test.go create mode 100644 xds/internal/xdsclient/xdslbregistry/converter.go create mode 100644 xds/internal/xdsclient/xdslbregistry/test/converter_test.go create mode 100644 xds/internal/xdsclient/xdsresource/test/unmarshal_cds_test.go diff --git a/internal/envconfig/xds.go b/internal/envconfig/xds.go index 3b17705ba097..1d9152e8eeb2 100644 --- a/internal/envconfig/xds.go +++ b/internal/envconfig/xds.go @@ -61,11 +61,10 @@ var ( // have a brand new API on the server-side and users explicitly need to use // the new API to get security integration on the server. XDSClientSideSecurity = boolFromEnv("GRPC_XDS_EXPERIMENTAL_SECURITY_SUPPORT", true) - // XDSAggregateAndDNS indicates whether processing of aggregated cluster - // and DNS cluster is enabled, which can be enabled by setting the - // environment variable - // "GRPC_XDS_EXPERIMENTAL_ENABLE_AGGREGATE_AND_LOGICAL_DNS_CLUSTER" to - // "true". + // XDSAggregateAndDNS indicates whether processing of aggregated cluster and + // DNS cluster is enabled, which can be disabled by setting the environment + // variable "GRPC_XDS_EXPERIMENTAL_ENABLE_AGGREGATE_AND_LOGICAL_DNS_CLUSTER" + // to "false". XDSAggregateAndDNS = boolFromEnv("GRPC_XDS_EXPERIMENTAL_ENABLE_AGGREGATE_AND_LOGICAL_DNS_CLUSTER", true) // XDSRBAC indicates whether xDS configured RBAC HTTP Filter is enabled, @@ -89,4 +88,8 @@ var ( // C2PResolverTestOnlyTrafficDirectorURI is the TD URI for testing. C2PResolverTestOnlyTrafficDirectorURI = os.Getenv("GRPC_TEST_ONLY_GOOGLE_C2P_RESOLVER_TRAFFIC_DIRECTOR_URI") + // XDSCustomLBPolicy indicates whether Custom LB Policies are enabled, which + // can be enabled by setting the environment variable + // "GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG" to "true". + XDSCustomLBPolicy = boolFromEnv("GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG", false) ) diff --git a/xds/internal/balancer/ringhash/config.go b/xds/internal/balancer/ringhash/config.go index 4763120fa649..b4afcf100132 100644 --- a/xds/internal/balancer/ringhash/config.go +++ b/xds/internal/balancer/ringhash/config.go @@ -35,8 +35,9 @@ type LBConfig struct { } const ( - defaultMinSize = 1024 - defaultMaxSize = 4096 + defaultMinSize = 1024 + defaultMaxSize = 4096 + ringHashSizeUpperBound = 8 * 1024 * 1024 // 8M ) func parseConfig(c json.RawMessage) (*LBConfig, error) { @@ -44,6 +45,12 @@ func parseConfig(c json.RawMessage) (*LBConfig, error) { if err := json.Unmarshal(c, &cfg); err != nil { return nil, err } + if cfg.MinRingSize > ringHashSizeUpperBound { + return nil, fmt.Errorf("min_ring_size value of %d is greater than max supported value %d for this field", cfg.MinRingSize, ringHashSizeUpperBound) + } + if cfg.MaxRingSize > ringHashSizeUpperBound { + return nil, fmt.Errorf("max_ring_size value of %d is greater than max supported value %d for this field", cfg.MaxRingSize, ringHashSizeUpperBound) + } if cfg.MinRingSize == 0 { cfg.MinRingSize = defaultMinSize } diff --git a/xds/internal/balancer/ringhash/config_test.go b/xds/internal/balancer/ringhash/config_test.go index d8f9ed30bb68..1077d3e7dafb 100644 --- a/xds/internal/balancer/ringhash/config_test.go +++ b/xds/internal/balancer/ringhash/config_test.go @@ -82,6 +82,18 @@ func (s) TestParseConfig(t *testing.T) { envConfigCap: 8000, want: &LBConfig{MinRingSize: 8000, MaxRingSize: 8000}, }, + { + name: "min greater than upper bound", + js: `{"minRingSize": 8388610, "maxRingSize": 10}`, + want: nil, + wantErr: true, + }, + { + name: "max greater than upper bound", + js: `{"minRingSize": 10, "maxRingSize": 8388610}`, + want: nil, + wantErr: true, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/xds/internal/balancer/wrrlocality/balancer.go b/xds/internal/balancer/wrrlocality/balancer.go new file mode 100644 index 000000000000..2ff6fccf89bd --- /dev/null +++ b/xds/internal/balancer/wrrlocality/balancer.go @@ -0,0 +1,68 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package wrrlocality provides an implementation of the wrr locality LB policy, +// as defined in [A52 - xDS Custom LB Policies]. +// +// [A52 - xDS Custom LB Policies]: https://github.com/grpc/proposal/blob/master/A52-xds-custom-lb-policies.md +package wrrlocality + +import ( + "encoding/json" + "errors" + "fmt" + + "google.golang.org/grpc/balancer" + internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + "google.golang.org/grpc/serviceconfig" +) + +// Name is the name of wrr_locality balancer. +const Name = "xds_wrr_locality_experimental" + +func init() { + balancer.Register(bb{}) +} + +type bb struct{} + +func (bb) Name() string { + return Name +} + +func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer { + return nil +} + +// LBConfig is the config for the wrr locality balancer. +type LBConfig struct { + serviceconfig.LoadBalancingConfig + // ChildPolicy is the config for the child policy. + ChildPolicy *internalserviceconfig.BalancerConfig `json:"childPolicy,omitempty"` +} + +func (bb) ParseConfig(s json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { + var lbCfg *LBConfig + if err := json.Unmarshal(s, &lbCfg); err != nil { + return nil, fmt.Errorf("xds: invalid LBConfig for wrrlocality: %s, error: %v", string(s), err) + } + if lbCfg == nil || lbCfg.ChildPolicy == nil { + return nil, errors.New("xds: invalidw LBConfig for wrrlocality: child policy field must be set") + } + return lbCfg, nil +} diff --git a/xds/internal/balancer/wrrlocality/balancer_test.go b/xds/internal/balancer/wrrlocality/balancer_test.go new file mode 100644 index 000000000000..9283b02f14b2 --- /dev/null +++ b/xds/internal/balancer/wrrlocality/balancer_test.go @@ -0,0 +1,121 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package wrrlocality + +import ( + "encoding/json" + "errors" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "google.golang.org/grpc/balancer/roundrobin" + "google.golang.org/grpc/internal/balancer/stub" + "google.golang.org/grpc/internal/grpctest" + internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + "google.golang.org/grpc/serviceconfig" +) + +type s struct { + grpctest.Tester +} + +func Test(t *testing.T) { + grpctest.RunSubTests(t, s{}) +} + +func (s) TestParseConfig(t *testing.T) { + const errParseConfigName = "errParseConfigBalancer" + stub.Register(errParseConfigName, stub.BalancerFuncs{ + ParseConfig: func(json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { + return nil, errors.New("some error") + }, + }) + + parser := bb{} + tests := []struct { + name string + input string + wantCfg serviceconfig.LoadBalancingConfig + wantErr string + }{ + { + name: "happy-case-round robin-child", + input: `{"childPolicy": [{"round_robin": {}}]}`, + wantCfg: &LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: roundrobin.Name, + }, + }, + }, + { + name: "invalid-json", + input: "{{invalidjson{{", + wantErr: "invalid character", + }, + + { + name: "child-policy-field-isn't-set", + input: `{}`, + wantErr: "child policy field must be set", + }, + { + name: "child-policy-type-is-empty", + input: `{"childPolicy": []}`, + wantErr: "invalid loadBalancingConfig: no supported policies found in []", + }, + { + name: "child-policy-empty-config", + input: `{"childPolicy": [{"": {}}]}`, + wantErr: "invalid loadBalancingConfig: no supported policies found in []", + }, + { + name: "child-policy-type-isn't-registered", + input: `{"childPolicy": [{"doesNotExistBalancer": {"cluster": "test_cluster"}}]}`, + wantErr: "invalid loadBalancingConfig: no supported policies found in [doesNotExistBalancer]", + }, + { + name: "child-policy-config-is-invalid", + input: `{"childPolicy": [{"errParseConfigBalancer": {"cluster": "test_cluster"}}]}`, + wantErr: "error parsing loadBalancingConfig for policy \"errParseConfigBalancer\"", + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + gotCfg, gotErr := parser.ParseConfig(json.RawMessage(test.input)) + // Substring match makes this very tightly coupled to the + // internalserviceconfig.BalancerConfig error strings. However, it + // is important to distinguish the different types of error messages + // possible as the parser has a few defined buckets of ways it can + // error out. + if (gotErr != nil) != (test.wantErr != "") { + t.Fatalf("ParseConfig(%v) = %v, wantErr %v", test.input, gotErr, test.wantErr) + } + if gotErr != nil && !strings.Contains(gotErr.Error(), test.wantErr) { + t.Fatalf("ParseConfig(%v) = %v, wantErr %v", test.input, gotErr, test.wantErr) + } + if test.wantErr != "" { + return + } + if diff := cmp.Diff(gotCfg, test.wantCfg); diff != "" { + t.Fatalf("ParseConfig(%v) got unexpected output, diff (-got +want): %v", test.input, diff) + } + }) + } +} diff --git a/xds/internal/xdsclient/tests/cds_watchers_test.go b/xds/internal/xdsclient/tests/cds_watchers_test.go index d6d02724fc91..3583fa929d96 100644 --- a/xds/internal/xdsclient/tests/cds_watchers_test.go +++ b/xds/internal/xdsclient/tests/cds_watchers_test.go @@ -70,7 +70,7 @@ func verifyClusterUpdate(ctx context.Context, updateCh *testutils.Channel, wantU return fmt.Errorf("received update with error type %v, want %v", gotType, wantType) } } - cmpOpts := []cmp.Option{cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "Raw")} + cmpOpts := []cmp.Option{cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "Raw", "LBPolicyJSON")} if diff := cmp.Diff(wantUpdate.Update, got.Update, cmpOpts...); diff != "" { return fmt.Errorf("received unepected diff in the cluster resource update: (-want, got):\n%s", diff) } diff --git a/xds/internal/xdsclient/tests/resource_update_test.go b/xds/internal/xdsclient/tests/resource_update_test.go index dfb285de3a8d..ff6cf7c756a4 100644 --- a/xds/internal/xdsclient/tests/resource_update_test.go +++ b/xds/internal/xdsclient/tests/resource_update_test.go @@ -802,7 +802,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { } cmpOpts := []cmp.Option{ cmpopts.EquateEmpty(), - cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "Raw"), + cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "Raw", "LBPolicyJSON"), } if diff := cmp.Diff(test.wantUpdate, gotUpdate, cmpOpts...); diff != "" { t.Fatalf("Unexpected diff in metadata, diff (-want +got):\n%s", diff) diff --git a/xds/internal/xdsclient/xdslbregistry/converter.go b/xds/internal/xdsclient/xdslbregistry/converter.go new file mode 100644 index 000000000000..ef13802b0c12 --- /dev/null +++ b/xds/internal/xdsclient/xdslbregistry/converter.go @@ -0,0 +1,154 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package xdslbregistry provides utilities to convert proto load balancing +// configuration, defined by the xDS API spec, to JSON load balancing +// configuration. +package xdslbregistry + +import ( + "encoding/json" + "fmt" + "strings" + + v1udpatypepb "github.com/cncf/xds/go/udpa/type/v1" + v3cncftypepb "github.com/cncf/xds/go/xds/type/v3" + v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" + v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" + "github.com/golang/protobuf/proto" + structpb "github.com/golang/protobuf/ptypes/struct" + + "google.golang.org/grpc/internal/envconfig" +) + +const ( + defaultRingHashMinSize = 1024 + defaultRingHashMaxSize = 8 * 1024 * 1024 // 8M +) + +// ConvertToServiceConfig converts a proto Load Balancing Policy configuration +// into a json string. Returns an error if: +// - no supported policy found +// - there is more than 16 layers of recursion in the configuration +// - a failure occurs when converting the policy +func ConvertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy) (json.RawMessage, error) { + return convertToServiceConfig(lbPolicy, 0) +} + +func convertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy, depth int) (json.RawMessage, error) { + // "Configurations that require more than 16 levels of recursion are + // considered invalid and should result in a NACK response." - A51 + if depth > 15 { + return nil, fmt.Errorf("lb policy %v exceeds max depth supported: 16 layers", lbPolicy) + } + + // "This function iterate over the list of policy messages in + // LoadBalancingPolicy, attempting to convert each one to gRPC form, + // stopping at the first supported policy." - A52 + for _, policy := range lbPolicy.GetPolicies() { + // The policy message contains a TypedExtensionConfig + // message with the configuration information. TypedExtensionConfig in turn + // uses an Any typed typed_config field to store policy configuration of any + // type. This typed_config field is used to determine both the name of a + // policy and the configuration for it, depending on its type: + switch policy.GetTypedExtensionConfig().GetTypedConfig().GetTypeUrl() { + case "type.googleapis.com/envoy.extensions.load_balancing_policies.ring_hash.v3.RingHash": + if !envconfig.XDSRingHash { + continue + } + rhProto := &v3ringhashpb.RingHash{} + if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), rhProto); err != nil { + return nil, fmt.Errorf("failed to unmarshal resource: %v", err) + } + return convertRingHash(rhProto) + case "type.googleapis.com/envoy.extensions.load_balancing_policies.round_robin.v3.RoundRobin": + return makeBalancerConfigJSON("round_robin", json.RawMessage("{}")), nil + case "type.googleapis.com/envoy.extensions.load_balancing_policies.wrr_locality.v3.WrrLocality": + wrrlProto := &v3wrrlocalitypb.WrrLocality{} + if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), wrrlProto); err != nil { + return nil, fmt.Errorf("failed to unmarshal resource: %v", err) + } + return convertWrrLocality(wrrlProto, depth) + case "type.googleapis.com/xds.type.v3.TypedStruct": + tsProto := &v3cncftypepb.TypedStruct{} + if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), tsProto); err != nil { + return nil, fmt.Errorf("failed to unmarshal resource: %v", err) + } + return convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) + case "type.googleapis.com/udpa.type.v1.TypedStruct": + tsProto := &v1udpatypepb.TypedStruct{} + if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), tsProto); err != nil { + return nil, fmt.Errorf("failed to unmarshal resource: %v", err) + } + return convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) + } + // Any entry not in the above list is unsupported and will be skipped. + // This includes Least Request as well, since grpc-go does not support + // the Least Request Load Balancing Policy. + } + return nil, fmt.Errorf("no supported policy found in policy list +%v", lbPolicy) +} + +// convertRingHash converts a proto representation of the ring_hash LB policy's +// configuration to gRPC JSON format. +func convertRingHash(cfg *v3ringhashpb.RingHash) (json.RawMessage, error) { + if cfg.GetHashFunction() != v3ringhashpb.RingHash_XX_HASH { + return nil, fmt.Errorf("unsupported ring_hash hash function %v", cfg.GetHashFunction()) + } + + var minSize, maxSize uint64 = defaultRingHashMinSize, defaultRingHashMaxSize + if min := cfg.GetMinimumRingSize(); min != nil { + minSize = min.GetValue() + } + if max := cfg.GetMaximumRingSize(); max != nil { + maxSize = max.GetValue() + } + + lbCfgJSON := []byte(fmt.Sprintf("{\"minRingSize\": %d, \"maxRingSize\": %d}", minSize, maxSize)) + return makeBalancerConfigJSON("ring_hash_experimental", lbCfgJSON), nil +} + +func convertWrrLocality(cfg *v3wrrlocalitypb.WrrLocality, depth int) (json.RawMessage, error) { + epJSON, err := convertToServiceConfig(cfg.GetEndpointPickingPolicy(), depth+1) + if err != nil { + return nil, fmt.Errorf("error converting endpoint picking policy: %v for %+v", err, cfg) + } + lbCfgJSON := []byte(fmt.Sprintf(`{"childPolicy": %s}`, epJSON)) + return makeBalancerConfigJSON("xds_wrr_locality_experimental", lbCfgJSON), nil +} + +func convertCustomPolicy(typeURL string, s *structpb.Struct) (json.RawMessage, error) { + // The gRPC policy name will be the "type name" part of the value of the + // type_url field in the TypedStruct. We get this by using the part after + // the last / character. Can assume a valid type_url from the control plane. + urls := strings.Split(typeURL, "/") + name := urls[len(urls)-1] + + rawJSON, err := json.Marshal(s) + if err != nil { + return nil, fmt.Errorf("error converting custom lb policy %v: %v for %+v", err, typeURL, s) + } + // The Struct contained in the TypedStruct will be returned as-is as the + // configuration JSON object. + return makeBalancerConfigJSON(name, rawJSON), nil +} + +func makeBalancerConfigJSON(name string, value json.RawMessage) []byte { + return []byte(fmt.Sprintf(`[{%q: %s}]`, name, value)) +} diff --git a/xds/internal/xdsclient/xdslbregistry/test/converter_test.go b/xds/internal/xdsclient/xdslbregistry/test/converter_test.go new file mode 100644 index 000000000000..7f31d68f1f8d --- /dev/null +++ b/xds/internal/xdsclient/xdslbregistry/test/converter_test.go @@ -0,0 +1,384 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package test contains test cases for the xDS LB Policy Registry. +package test + +import ( + "encoding/json" + "strings" + "testing" + + v1udpatypepb "github.com/cncf/xds/go/udpa/type/v1" + v3cncftypepb "github.com/cncf/xds/go/xds/type/v3" + v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + v3leastrequestpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/least_request/v3" + v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" + v3roundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/round_robin/v3" + v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" + "github.com/golang/protobuf/proto" + structpb "github.com/golang/protobuf/ptypes/struct" + "github.com/google/go-cmp/cmp" + + _ "google.golang.org/grpc/balancer/roundrobin" + "google.golang.org/grpc/internal/balancer/stub" + "google.golang.org/grpc/internal/envconfig" + "google.golang.org/grpc/internal/grpctest" + "google.golang.org/grpc/internal/pretty" + internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + "google.golang.org/grpc/internal/testutils" + "google.golang.org/grpc/serviceconfig" + "google.golang.org/grpc/xds/internal/balancer/ringhash" + "google.golang.org/grpc/xds/internal/balancer/wrrlocality" + "google.golang.org/grpc/xds/internal/xdsclient/xdslbregistry" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/wrapperspb" +) + +type s struct { + grpctest.Tester +} + +func Test(t *testing.T) { + grpctest.RunSubTests(t, s{}) +} + +type customLBConfig struct { + serviceconfig.LoadBalancingConfig +} + +// We have these tests in a separate test package in order to not take a +// dependency on the internal xDS balancer packages within the xDS Client. +func (s) TestConvertToServiceConfigSuccess(t *testing.T) { + const customLBPolicyName = "myorg.MyCustomLeastRequestPolicy" + stub.Register(customLBPolicyName, stub.BalancerFuncs{ + ParseConfig: func(json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { + return customLBConfig{}, nil + }, + }) + + tests := []struct { + name string + policy *v3clusterpb.LoadBalancingPolicy + wantConfig *internalserviceconfig.BalancerConfig + rhDisabled bool + }{ + { + name: "ring_hash", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3ringhashpb.RingHash{ + HashFunction: v3ringhashpb.RingHash_XX_HASH, + MinimumRingSize: wrapperspb.UInt64(10), + MaximumRingSize: wrapperspb.UInt64(100), + }), + }, + }, + }, + }, + wantConfig: &internalserviceconfig.BalancerConfig{ + Name: "ring_hash_experimental", + Config: &ringhash.LBConfig{ + MinRingSize: 10, + MaxRingSize: 100, + }, + }, + }, + { + name: "round_robin", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3roundrobinpb.RoundRobin{}), + }, + }, + }, + }, + wantConfig: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + { + name: "round_robin_ring_hash_use_first_supported", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3roundrobinpb.RoundRobin{}), + }, + }, + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3ringhashpb.RingHash{ + HashFunction: v3ringhashpb.RingHash_XX_HASH, + MinimumRingSize: wrapperspb.UInt64(10), + MaximumRingSize: wrapperspb.UInt64(100), + }), + }, + }, + }, + }, + wantConfig: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + { + name: "ring_hash_disabled_rh_rr_use_first_supported", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3ringhashpb.RingHash{ + HashFunction: v3ringhashpb.RingHash_XX_HASH, + MinimumRingSize: wrapperspb.UInt64(10), + MaximumRingSize: wrapperspb.UInt64(100), + }), + }, + }, + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3roundrobinpb.RoundRobin{}), + }, + }, + }, + }, + wantConfig: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + rhDisabled: true, + }, + { + name: "custom_lb_type_v3_struct", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3cncftypepb.TypedStruct{ + TypeUrl: "type.googleapis.com/myorg.MyCustomLeastRequestPolicy", + Value: &structpb.Struct{}, + }), + }, + }, + }, + }, + wantConfig: &internalserviceconfig.BalancerConfig{ + Name: "myorg.MyCustomLeastRequestPolicy", + Config: customLBConfig{}, + }, + }, + { + name: "custom_lb_type_v1_struct", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v1udpatypepb.TypedStruct{ + TypeUrl: "type.googleapis.com/myorg.MyCustomLeastRequestPolicy", + Value: &structpb.Struct{}, + }), + }, + }, + }, + }, + wantConfig: &internalserviceconfig.BalancerConfig{ + Name: "myorg.MyCustomLeastRequestPolicy", + Config: customLBConfig{}, + }, + }, + { + name: "wrr_locality_child_round_robin", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: wrrLocalityAny(&v3roundrobinpb.RoundRobin{}), + }, + }, + }, + }, + wantConfig: &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + }, + }, + { + name: "wrr_locality_child_custom_lb_type_v3_struct", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: wrrLocalityAny(&v3cncftypepb.TypedStruct{ + TypeUrl: "type.googleapis.com/myorg.MyCustomLeastRequestPolicy", + Value: &structpb.Struct{}, + }), + }, + }, + }, + }, + wantConfig: &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "myorg.MyCustomLeastRequestPolicy", + Config: customLBConfig{}, + }, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if test.rhDisabled { + oldRingHashSupport := envconfig.XDSRingHash + envconfig.XDSRingHash = false + defer func() { + envconfig.XDSRingHash = oldRingHashSupport + }() + } + rawJSON, err := xdslbregistry.ConvertToServiceConfig(test.policy) + if err != nil { + t.Fatalf("ConvertToServiceConfig(%s) failed: %v", pretty.ToJSON(test.policy), err) + } + bc := &internalserviceconfig.BalancerConfig{} + // The converter registry is not guaranteed to emit json that is + // valid. It's scope is to simply convert from a proto message to + // internal gRPC JSON format. Thus, the tests cause valid JSON to + // eventually be emitted from ConvertToServiceConfig(), but this + // leaves this test brittle over time in case balancer validations + // change over time and add more failure cases. The simplicity of + // using this type (to get rid of non determinism in JSON strings) + // outweighs this brittleness, and also there are plans on + // decoupling the unmarshalling and validation step both present in + // this function in the future. In the future if balancer + // validations change, any configurations in this test that become + // invalid will need to be fixed. (need to make sure emissions above + // are valid configuration). Also, once this Unmarshal call is + // partitioned into Unmarshal vs. Validation in separate operations, + // the brittleness of this test will go away. + if err := json.Unmarshal(rawJSON, bc); err != nil { + t.Fatalf("failed to unmarshal JSON: %v", err) + } + if diff := cmp.Diff(bc, test.wantConfig); diff != "" { + t.Fatalf("ConvertToServiceConfig() got unexpected output, diff (-got +want): %v", diff) + } + }) + } +} + +// TestConvertToServiceConfigFailure tests failure cases of the xDS LB registry +// of converting proto configuration to JSON configuration. +func (s) TestConvertToServiceConfigFailure(t *testing.T) { + tests := []struct { + name string + policy *v3clusterpb.LoadBalancingPolicy + wantErr string + }{ + { + name: "not xx_hash function", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3ringhashpb.RingHash{ + HashFunction: v3ringhashpb.RingHash_MURMUR_HASH_2, + MinimumRingSize: wrapperspb.UInt64(10), + MaximumRingSize: wrapperspb.UInt64(100), + }), + }, + }, + }, + }, + wantErr: "unsupported ring_hash hash function", + }, + { + name: "no-supported-policy", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + // Not supported by gRPC-Go. + TypedConfig: testutils.MarshalAny(&v3leastrequestpb.LeastRequest{}), + }, + }, + }, + }, + wantErr: "no supported policy found in policy list", + }, + // TODO: test validity right on the boundary of recursion 16 layers + // total. + { + name: "too much recursion", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: wrrLocalityAny(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(&v3roundrobinpb.RoundRobin{}))))))))))))))))))))))), + }, + }, + }, + }, + wantErr: "exceeds max depth", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + _, gotErr := xdslbregistry.ConvertToServiceConfig(test.policy) + // Test the error substring to test the different root causes of + // errors. This is more brittle over time, but it's important to + // test the root cause of the errors emitted from the + // ConvertToServiceConfig function call. Also, this package owns the + // error strings so breakages won't come unexpectedly. + if gotErr == nil || !strings.Contains(gotErr.Error(), test.wantErr) { + t.Fatalf("ConvertToServiceConfig() = %v, wantErr %v", gotErr, test.wantErr) + } + }) + } +} + +// wrrLocality is a helper that takes a proto message and returns a +// WrrLocalityProto with the proto message marshaled into a proto.Any as a +// child. +func wrrLocality(m proto.Message) *v3wrrlocalitypb.WrrLocality { + return &v3wrrlocalitypb.WrrLocality{ + EndpointPickingPolicy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(m), + }, + }, + }, + }, + } +} + +// wrrLocalityAny takes a proto message and returns a wrr locality proto +// marshaled as an any with an any child set to the marshaled proto message. +func wrrLocalityAny(m proto.Message) *anypb.Any { + return testutils.MarshalAny(wrrLocality(m)) +} diff --git a/xds/internal/xdsclient/xdsresource/test/unmarshal_cds_test.go b/xds/internal/xdsclient/xdsresource/test/unmarshal_cds_test.go new file mode 100644 index 000000000000..3f4c226d74d3 --- /dev/null +++ b/xds/internal/xdsclient/xdsresource/test/unmarshal_cds_test.go @@ -0,0 +1,604 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package test contains test cases for unmarshalling of CDS resources. +package test + +import ( + "encoding/json" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + _ "google.golang.org/grpc/balancer/roundrobin" // To register round_robin load balancer. + "google.golang.org/grpc/internal/balancer/stub" + "google.golang.org/grpc/internal/envconfig" + "google.golang.org/grpc/internal/grpctest" + internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + "google.golang.org/grpc/internal/testutils" + "google.golang.org/grpc/serviceconfig" + "google.golang.org/grpc/xds/internal/balancer/ringhash" + "google.golang.org/grpc/xds/internal/balancer/wrrlocality" + "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" + "google.golang.org/protobuf/types/known/wrapperspb" + + v3cncftypepb "github.com/cncf/xds/go/xds/type/v3" + v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" + v3aggregateclusterpb "github.com/envoyproxy/go-control-plane/envoy/extensions/clusters/aggregate/v3" + v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" + v3roundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/round_robin/v3" + v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" + "github.com/golang/protobuf/proto" + anypb "github.com/golang/protobuf/ptypes/any" + structpb "github.com/golang/protobuf/ptypes/struct" +) + +type s struct { + grpctest.Tester +} + +func Test(t *testing.T) { + grpctest.RunSubTests(t, s{}) +} + +const ( + clusterName = "clusterName" + serviceName = "service" +) + +var emptyUpdate = xdsresource.ClusterUpdate{ClusterName: clusterName, LRSServerConfig: xdsresource.ClusterLRSOff} + +func wrrLocality(m proto.Message) *v3wrrlocalitypb.WrrLocality { + return &v3wrrlocalitypb.WrrLocality{ + EndpointPickingPolicy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(m), + }, + }, + }, + }, + } +} + +func wrrLocalityAny(m proto.Message) *anypb.Any { + return testutils.MarshalAny(wrrLocality(m)) +} + +type customLBConfig struct { + serviceconfig.LoadBalancingConfig +} + +// We have this test in a separate test package in order to not take a +// dependency on the internal xDS balancer packages within the xDS Client. +func (s) TestValidateCluster_Success(t *testing.T) { + const customLBPolicyName = "myorg.MyCustomLeastRequestPolicy" + stub.Register(customLBPolicyName, stub.BalancerFuncs{ + ParseConfig: func(json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { + return customLBConfig{}, nil + }, + }) + + origCustomLBSupport := envconfig.XDSCustomLBPolicy + envconfig.XDSCustomLBPolicy = true + defer func() { + envconfig.XDSCustomLBPolicy = origCustomLBSupport + }() + tests := []struct { + name string + cluster *v3clusterpb.Cluster + wantUpdate xdsresource.ClusterUpdate + wantLBConfig *internalserviceconfig.BalancerConfig + customLBDisabled bool + }{ + { + name: "happy-case-logical-dns", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_LOGICAL_DNS}, + LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, + LoadAssignment: &v3endpointpb.ClusterLoadAssignment{ + Endpoints: []*v3endpointpb.LocalityLbEndpoints{{ + LbEndpoints: []*v3endpointpb.LbEndpoint{{ + HostIdentifier: &v3endpointpb.LbEndpoint_Endpoint{ + Endpoint: &v3endpointpb.Endpoint{ + Address: &v3corepb.Address{ + Address: &v3corepb.Address_SocketAddress{ + SocketAddress: &v3corepb.SocketAddress{ + Address: "dns_host", + PortSpecifier: &v3corepb.SocketAddress_PortValue{ + PortValue: 8080, + }, + }, + }, + }, + }, + }, + }}, + }}, + }, + }, + wantUpdate: xdsresource.ClusterUpdate{ + ClusterName: clusterName, + ClusterType: xdsresource.ClusterTypeLogicalDNS, + DNSHostName: "dns_host:8080", + }, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + }, + }, + { + name: "happy-case-aggregate-v3", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_ClusterType{ + ClusterType: &v3clusterpb.Cluster_CustomClusterType{ + Name: "envoy.clusters.aggregate", + TypedConfig: testutils.MarshalAny(&v3aggregateclusterpb.ClusterConfig{ + Clusters: []string{"a", "b", "c"}, + }), + }, + }, + LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, + }, + wantUpdate: xdsresource.ClusterUpdate{ + ClusterName: clusterName, LRSServerConfig: xdsresource.ClusterLRSOff, ClusterType: xdsresource.ClusterTypeAggregate, + PrioritizedClusterNames: []string{"a", "b", "c"}, + }, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + }, + }, + { + name: "happy-case-no-service-name-no-lrs", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + }, + LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, + }, + wantUpdate: emptyUpdate, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + }, + }, + { + name: "happy-case-no-lrs", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, + }, + wantUpdate: xdsresource.ClusterUpdate{ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSOff}, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + }, + }, + { + name: "happiest-case", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, + LrsServer: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Self{ + Self: &v3corepb.SelfConfigSource{}, + }, + }, + }, + wantUpdate: xdsresource.ClusterUpdate{ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf}, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + }, + }, + { + name: "happiest-case-with-circuitbreakers", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, + CircuitBreakers: &v3clusterpb.CircuitBreakers{ + Thresholds: []*v3clusterpb.CircuitBreakers_Thresholds{ + { + Priority: v3corepb.RoutingPriority_DEFAULT, + MaxRequests: wrapperspb.UInt32(512), + }, + { + Priority: v3corepb.RoutingPriority_HIGH, + MaxRequests: nil, + }, + }, + }, + LrsServer: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Self{ + Self: &v3corepb.SelfConfigSource{}, + }, + }, + }, + wantUpdate: xdsresource.ClusterUpdate{ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf, MaxRequests: func() *uint32 { i := uint32(512); return &i }()}, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + }, + }, + { + name: "happiest-case-with-ring-hash-lb-policy-with-default-config", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LbPolicy: v3clusterpb.Cluster_RING_HASH, + LrsServer: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Self{ + Self: &v3corepb.SelfConfigSource{}, + }, + }, + }, + wantUpdate: xdsresource.ClusterUpdate{ + ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf, + }, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: "ring_hash_experimental", + Config: &ringhash.LBConfig{ + MinRingSize: 1024, + MaxRingSize: 4096, + }, + }, + }, + { + name: "happiest-case-with-ring-hash-lb-policy-with-none-default-config", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LbPolicy: v3clusterpb.Cluster_RING_HASH, + LbConfig: &v3clusterpb.Cluster_RingHashLbConfig_{ + RingHashLbConfig: &v3clusterpb.Cluster_RingHashLbConfig{ + MinimumRingSize: wrapperspb.UInt64(10), + MaximumRingSize: wrapperspb.UInt64(100), + }, + }, + LrsServer: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Self{ + Self: &v3corepb.SelfConfigSource{}, + }, + }, + }, + wantUpdate: xdsresource.ClusterUpdate{ + ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf, + LBPolicy: &xdsresource.ClusterLBPolicyRingHash{MinimumRingSize: 10, MaximumRingSize: 100}, + }, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: "ring_hash_experimental", + Config: &ringhash.LBConfig{ + MinRingSize: 10, + MaxRingSize: 100, + }, + }, + }, + { + name: "happiest-case-with-ring-hash-lb-policy-configured-through-LoadBalancingPolicy", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LoadBalancingPolicy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3ringhashpb.RingHash{ + HashFunction: v3ringhashpb.RingHash_XX_HASH, + MinimumRingSize: wrapperspb.UInt64(10), + MaximumRingSize: wrapperspb.UInt64(100), + }), + }, + }, + }, + }, + }, + wantUpdate: xdsresource.ClusterUpdate{ + ClusterName: clusterName, EDSServiceName: serviceName, + }, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: "ring_hash_experimental", + Config: &ringhash.LBConfig{ + MinRingSize: 10, + MaxRingSize: 100, + }, + }, + }, + { + name: "happiest-case-with-wrrlocality-rr-child-configured-through-LoadBalancingPolicy", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LoadBalancingPolicy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: wrrLocalityAny(&v3roundrobinpb.RoundRobin{}), + }, + }, + }, + }, + }, + wantUpdate: xdsresource.ClusterUpdate{ + ClusterName: clusterName, EDSServiceName: serviceName, + }, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + }, + }, + { + name: "happiest-case-with-custom-lb-configured-through-LoadBalancingPolicy", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LoadBalancingPolicy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: wrrLocalityAny(&v3cncftypepb.TypedStruct{ + TypeUrl: "type.googleapis.com/myorg.MyCustomLeastRequestPolicy", + Value: &structpb.Struct{}, + }), + }, + }, + }, + }, + }, + wantUpdate: xdsresource.ClusterUpdate{ + ClusterName: clusterName, EDSServiceName: serviceName, + }, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "myorg.MyCustomLeastRequestPolicy", + Config: customLBConfig{}, + }, + }, + }, + }, + { + name: "custom-lb-env-var-not-set-ignore-load-balancing-policy-use-lb-policy-and-enum", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LbPolicy: v3clusterpb.Cluster_RING_HASH, + LbConfig: &v3clusterpb.Cluster_RingHashLbConfig_{ + RingHashLbConfig: &v3clusterpb.Cluster_RingHashLbConfig{ + MinimumRingSize: wrapperspb.UInt64(20), + MaximumRingSize: wrapperspb.UInt64(200), + }, + }, + LoadBalancingPolicy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3ringhashpb.RingHash{ + HashFunction: v3ringhashpb.RingHash_XX_HASH, + MinimumRingSize: wrapperspb.UInt64(10), + MaximumRingSize: wrapperspb.UInt64(100), + }), + }, + }, + }, + }, + }, + wantUpdate: xdsresource.ClusterUpdate{ + ClusterName: clusterName, EDSServiceName: serviceName, + }, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: "ring_hash_experimental", + Config: &ringhash.LBConfig{ + MinRingSize: 20, + MaxRingSize: 200, + }, + }, + customLBDisabled: true, + }, + { + name: "load-balancing-policy-takes-precedence-over-lb-policy-and-enum", + cluster: &v3clusterpb.Cluster{ + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LbPolicy: v3clusterpb.Cluster_RING_HASH, + LbConfig: &v3clusterpb.Cluster_RingHashLbConfig_{ + RingHashLbConfig: &v3clusterpb.Cluster_RingHashLbConfig{ + MinimumRingSize: wrapperspb.UInt64(20), + MaximumRingSize: wrapperspb.UInt64(200), + }, + }, + LoadBalancingPolicy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3ringhashpb.RingHash{ + HashFunction: v3ringhashpb.RingHash_XX_HASH, + MinimumRingSize: wrapperspb.UInt64(10), + MaximumRingSize: wrapperspb.UInt64(100), + }), + }, + }, + }, + }, + }, + wantUpdate: xdsresource.ClusterUpdate{ + ClusterName: clusterName, EDSServiceName: serviceName, + }, + wantLBConfig: &internalserviceconfig.BalancerConfig{ + Name: "ring_hash_experimental", + Config: &ringhash.LBConfig{ + MinRingSize: 10, + MaxRingSize: 100, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if test.customLBDisabled { + envconfig.XDSCustomLBPolicy = false + defer func() { + envconfig.XDSCustomLBPolicy = true + }() + } + update, err := xdsresource.ValidateClusterAndConstructClusterUpdateForTesting(test.cluster) + if err != nil { + t.Errorf("validateClusterAndConstructClusterUpdate(%+v) failed: %v", test.cluster, err) + } + // Ignore the raw JSON string into the cluster update. JSON bytes + // are nondeterministic (whitespace etc.) so we cannot reliably + // compare JSON bytes in a test. Thus, marshal into a Balancer + // Config struct and compare on that. Only need to test this JSON + // emission here, as this covers the possible output space. + if diff := cmp.Diff(update, test.wantUpdate, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "LBPolicy", "LBPolicyJSON")); diff != "" { + t.Errorf("validateClusterAndConstructClusterUpdate(%+v) got diff: %v (-got, +want)", test.cluster, diff) + } + bc := &internalserviceconfig.BalancerConfig{} + if err := json.Unmarshal(update.LBPolicyJSON, bc); err != nil { + t.Fatalf("failed to unmarshal JSON: %v", err) + } + if diff := cmp.Diff(bc, test.wantLBConfig); diff != "" { + t.Fatalf("update.LBConfig got unexpected output, diff (-got +want): %v", diff) + } + }) + } +} diff --git a/xds/internal/xdsclient/xdsresource/type_cds.go b/xds/internal/xdsclient/xdsresource/type_cds.go index d459717acd23..cd49852d8fcc 100644 --- a/xds/internal/xdsclient/xdsresource/type_cds.go +++ b/xds/internal/xdsclient/xdsresource/type_cds.go @@ -18,6 +18,7 @@ package xdsresource import ( + "encoding/json" "time" "google.golang.org/protobuf/types/known/anypb" @@ -156,6 +157,12 @@ type ClusterUpdate struct { // When we add more support policies, this can be made an interface, and // will be set to different types based on the policy type. LBPolicy *ClusterLBPolicyRingHash + // LBPolicyJSON represents the locality and endpoint picking policy in JSON, + // which will be the child policy of xds_cluster_impl. Once full support for + // this field across the system, the LBPolicy field will switch to this + // field. Right now we keep both to keep the system working even though + // downstream has not added support for this JSON field. + LBPolicyJSON json.RawMessage // OutlierDetection is the outlier detection configuration for this cluster. // If nil, it means this cluster does not use the outlier detection feature. diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go index e0bc1589b562..966844881351 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go @@ -18,6 +18,7 @@ package xdsresource import ( + "encoding/json" "errors" "fmt" "net" @@ -30,11 +31,18 @@ import ( v3tlspb "github.com/envoyproxy/go-control-plane/envoy/extensions/transport_sockets/tls/v3" "github.com/golang/protobuf/proto" "google.golang.org/grpc/internal/envconfig" + "google.golang.org/grpc/internal/pretty" + internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/internal/xds/matcher" + "google.golang.org/grpc/xds/internal/xdsclient/xdslbregistry" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" "google.golang.org/protobuf/types/known/anypb" ) +// ValidateClusterAndConstructClusterUpdateForTesting exports the +// validateClusterAndConstructClusterUpdate function for testing purposes. +var ValidateClusterAndConstructClusterUpdateForTesting = validateClusterAndConstructClusterUpdate + // TransportSocket proto message has a `name` field which is expected to be set // to this value by the management server. const transportSocketName = "envoy.transport_sockets.tls" @@ -70,9 +78,12 @@ const ( func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (ClusterUpdate, error) { var lbPolicy *ClusterLBPolicyRingHash + var lbCfgJSON json.RawMessage + var err error switch cluster.GetLbPolicy() { case v3clusterpb.Cluster_ROUND_ROBIN: lbPolicy = nil // The default is round_robin, and there's no config to set. + lbCfgJSON = []byte(fmt.Sprintf(`[{%q: {"childPolicy": [{"round_robin": {}}]}}]`, "xds_wrr_locality_experimental")) case v3clusterpb.Cluster_RING_HASH: if !envconfig.XDSRingHash { return ClusterUpdate{}, fmt.Errorf("unexpected lbPolicy %v in response: %+v", cluster.GetLbPolicy(), cluster) @@ -85,25 +96,18 @@ func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (Clu // defaults to 8M entries, and limited to 8M entries var minSize, maxSize uint64 = defaultRingHashMinSize, defaultRingHashMaxSize if min := rhc.GetMinimumRingSize(); min != nil { - if min.GetValue() > ringHashSizeUpperBound { - return ClusterUpdate{}, fmt.Errorf("unexpected ring_hash mininum ring size %v in response: %+v", min.GetValue(), cluster) - } minSize = min.GetValue() } if max := rhc.GetMaximumRingSize(); max != nil { - if max.GetValue() > ringHashSizeUpperBound { - return ClusterUpdate{}, fmt.Errorf("unexpected ring_hash maxinum ring size %v in response: %+v", max.GetValue(), cluster) - } maxSize = max.GetValue() } - if minSize > maxSize { - return ClusterUpdate{}, fmt.Errorf("ring_hash config min size %v is greater than max %v", minSize, maxSize) - } lbPolicy = &ClusterLBPolicyRingHash{MinimumRingSize: minSize, MaximumRingSize: maxSize} + + rhLBCfgJSON := []byte(fmt.Sprintf("{\"minRingSize\": %d, \"maxRingSize\": %d}", minSize, maxSize)) + lbCfgJSON = []byte(fmt.Sprintf(`[{%q: %s}]`, "ring_hash_experimental", rhLBCfgJSON)) default: return ClusterUpdate{}, fmt.Errorf("unexpected lbPolicy %v in response: %+v", cluster.GetLbPolicy(), cluster) } - // Process security configuration received from the control plane iff the // corresponding environment variable is set. var sc *SecurityConfig @@ -124,11 +128,26 @@ func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (Clu } } + if cluster.GetLoadBalancingPolicy() != nil && envconfig.XDSCustomLBPolicy { + lbCfgJSON, err = xdslbregistry.ConvertToServiceConfig(cluster.GetLoadBalancingPolicy()) + if err != nil { + return ClusterUpdate{}, fmt.Errorf("error converting LoadBalancingPolicy %v in response: %+v: %v", cluster.GetLoadBalancingPolicy(), cluster, err) + } + // "It will be the responsibility of the XdsClient to validate the + // converted configuration. It will do this by having the gRPC LB policy + // registry parse the configuration." - A52 + bc := &internalserviceconfig.BalancerConfig{} + if err := json.Unmarshal(lbCfgJSON, bc); err != nil { + return ClusterUpdate{}, fmt.Errorf("JSON generated from xDS LB policy registry: %s is invalid: %v", pretty.FormatJSON(lbCfgJSON), err) + } + } + ret := ClusterUpdate{ ClusterName: cluster.GetName(), SecurityCfg: sc, MaxRequests: circuitBreakersFromCluster(cluster), LBPolicy: lbPolicy, + LBPolicyJSON: lbCfgJSON, OutlierDetection: od, } diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go b/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go index 533fd85c3984..3b47ae697a99 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go @@ -23,7 +23,6 @@ import ( "testing" "time" - v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "google.golang.org/grpc/internal/envconfig" @@ -38,7 +37,10 @@ import ( v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" v3aggregateclusterpb "github.com/envoyproxy/go-control-plane/envoy/extensions/clusters/aggregate/v3" + v3leastrequestpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/least_request/v3" + v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" v3tlspb "github.com/envoyproxy/go-control-plane/envoy/extensions/transport_sockets/tls/v3" + v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" v3matcherpb "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3" anypb "github.com/golang/protobuf/ptypes/any" ) @@ -51,6 +53,11 @@ const ( var emptyUpdate = ClusterUpdate{ClusterName: clusterName, LRSServerConfig: ClusterLRSOff} func (s) TestValidateCluster_Failure(t *testing.T) { + oldCustomLBSupport := envconfig.XDSCustomLBPolicy + envconfig.XDSCustomLBPolicy = true + defer func() { + envconfig.XDSCustomLBPolicy = oldCustomLBSupport + }() tests := []struct { name string cluster *v3clusterpb.Cluster @@ -155,13 +162,12 @@ func (s) TestValidateCluster_Failure(t *testing.T) { wantErr: true, }, { - name: "ring-hash-min-bound-greater-than-max", + name: "ring-hash-max-bound-greater-than-upper-bound", cluster: &v3clusterpb.Cluster{ LbPolicy: v3clusterpb.Cluster_RING_HASH, LbConfig: &v3clusterpb.Cluster_RingHashLbConfig_{ RingHashLbConfig: &v3clusterpb.Cluster_RingHashLbConfig{ - MinimumRingSize: wrapperspb.UInt64(100), - MaximumRingSize: wrapperspb.UInt64(10), + MaximumRingSize: wrapperspb.UInt64(ringHashSizeUpperBound + 1), }, }, }, @@ -169,12 +175,29 @@ func (s) TestValidateCluster_Failure(t *testing.T) { wantErr: true, }, { - name: "ring-hash-min-bound-greater-than-upper-bound", + name: "ring-hash-max-bound-greater-than-upper-bound-load-balancing-policy", cluster: &v3clusterpb.Cluster{ - LbPolicy: v3clusterpb.Cluster_RING_HASH, - LbConfig: &v3clusterpb.Cluster_RingHashLbConfig_{ - RingHashLbConfig: &v3clusterpb.Cluster_RingHashLbConfig{ - MinimumRingSize: wrapperspb.UInt64(ringHashSizeUpperBound + 1), + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LoadBalancingPolicy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3ringhashpb.RingHash{ + HashFunction: v3ringhashpb.RingHash_XX_HASH, + MinimumRingSize: wrapperspb.UInt64(10), + MaximumRingSize: wrapperspb.UInt64(ringHashSizeUpperBound + 1), + }), + }, + }, }, }, }, @@ -182,12 +205,25 @@ func (s) TestValidateCluster_Failure(t *testing.T) { wantErr: true, }, { - name: "ring-hash-max-bound-greater-than-upper-bound", + name: "least-request-unsupported-in-converter", cluster: &v3clusterpb.Cluster{ - LbPolicy: v3clusterpb.Cluster_RING_HASH, - LbConfig: &v3clusterpb.Cluster_RingHashLbConfig_{ - RingHashLbConfig: &v3clusterpb.Cluster_RingHashLbConfig{ - MaximumRingSize: wrapperspb.UInt64(ringHashSizeUpperBound + 1), + Name: clusterName, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, + EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ + EdsConfig: &v3corepb.ConfigSource{ + ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ + Ads: &v3corepb.AggregatedConfigSource{}, + }, + }, + ServiceName: serviceName, + }, + LoadBalancingPolicy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(&v3leastrequestpb.LeastRequest{}), + }, + }, }, }, }, @@ -243,230 +279,6 @@ func (s) TestValidateCluster_Failure(t *testing.T) { } } -func (s) TestValidateCluster_Success(t *testing.T) { - tests := []struct { - name string - cluster *v3clusterpb.Cluster - wantUpdate ClusterUpdate - }{ - { - name: "happy-case-logical-dns", - cluster: &v3clusterpb.Cluster{ - Name: clusterName, - ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_LOGICAL_DNS}, - LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, - LoadAssignment: &v3endpointpb.ClusterLoadAssignment{ - Endpoints: []*v3endpointpb.LocalityLbEndpoints{{ - LbEndpoints: []*v3endpointpb.LbEndpoint{{ - HostIdentifier: &v3endpointpb.LbEndpoint_Endpoint{ - Endpoint: &v3endpointpb.Endpoint{ - Address: &v3corepb.Address{ - Address: &v3corepb.Address_SocketAddress{ - SocketAddress: &v3corepb.SocketAddress{ - Address: "dns_host", - PortSpecifier: &v3corepb.SocketAddress_PortValue{ - PortValue: 8080, - }, - }, - }, - }, - }, - }, - }}, - }}, - }, - }, - wantUpdate: ClusterUpdate{ - ClusterName: clusterName, - ClusterType: ClusterTypeLogicalDNS, - DNSHostName: "dns_host:8080", - }, - }, - { - name: "happy-case-aggregate-v3", - cluster: &v3clusterpb.Cluster{ - Name: clusterName, - ClusterDiscoveryType: &v3clusterpb.Cluster_ClusterType{ - ClusterType: &v3clusterpb.Cluster_CustomClusterType{ - Name: "envoy.clusters.aggregate", - TypedConfig: testutils.MarshalAny(&v3aggregateclusterpb.ClusterConfig{ - Clusters: []string{"a", "b", "c"}, - }), - }, - }, - LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, - }, - wantUpdate: ClusterUpdate{ - ClusterName: clusterName, LRSServerConfig: ClusterLRSOff, ClusterType: ClusterTypeAggregate, - PrioritizedClusterNames: []string{"a", "b", "c"}, - }, - }, - { - name: "happy-case-no-service-name-no-lrs", - cluster: &v3clusterpb.Cluster{ - Name: clusterName, - ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, - EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ - EdsConfig: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ - Ads: &v3corepb.AggregatedConfigSource{}, - }, - }, - }, - LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, - }, - wantUpdate: emptyUpdate, - }, - { - name: "happy-case-no-lrs", - cluster: &v3clusterpb.Cluster{ - Name: clusterName, - ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, - EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ - EdsConfig: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ - Ads: &v3corepb.AggregatedConfigSource{}, - }, - }, - ServiceName: serviceName, - }, - LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, - }, - wantUpdate: ClusterUpdate{ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: ClusterLRSOff}, - }, - { - name: "happiest-case", - cluster: &v3clusterpb.Cluster{ - Name: clusterName, - ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, - EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ - EdsConfig: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ - Ads: &v3corepb.AggregatedConfigSource{}, - }, - }, - ServiceName: serviceName, - }, - LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, - LrsServer: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Self{ - Self: &v3corepb.SelfConfigSource{}, - }, - }, - }, - wantUpdate: ClusterUpdate{ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: ClusterLRSServerSelf}, - }, - { - name: "happiest-case-with-circuitbreakers", - cluster: &v3clusterpb.Cluster{ - Name: clusterName, - ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, - EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ - EdsConfig: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ - Ads: &v3corepb.AggregatedConfigSource{}, - }, - }, - ServiceName: serviceName, - }, - LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, - CircuitBreakers: &v3clusterpb.CircuitBreakers{ - Thresholds: []*v3clusterpb.CircuitBreakers_Thresholds{ - { - Priority: v3corepb.RoutingPriority_DEFAULT, - MaxRequests: wrapperspb.UInt32(512), - }, - { - Priority: v3corepb.RoutingPriority_HIGH, - MaxRequests: nil, - }, - }, - }, - LrsServer: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Self{ - Self: &v3corepb.SelfConfigSource{}, - }, - }, - }, - wantUpdate: ClusterUpdate{ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: ClusterLRSServerSelf, MaxRequests: func() *uint32 { i := uint32(512); return &i }()}, - }, - { - name: "happiest-case-with-ring-hash-lb-policy-with-default-config", - cluster: &v3clusterpb.Cluster{ - Name: clusterName, - ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, - EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ - EdsConfig: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ - Ads: &v3corepb.AggregatedConfigSource{}, - }, - }, - ServiceName: serviceName, - }, - LbPolicy: v3clusterpb.Cluster_RING_HASH, - LrsServer: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Self{ - Self: &v3corepb.SelfConfigSource{}, - }, - }, - }, - wantUpdate: ClusterUpdate{ - ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: ClusterLRSServerSelf, - LBPolicy: &ClusterLBPolicyRingHash{MinimumRingSize: defaultRingHashMinSize, MaximumRingSize: defaultRingHashMaxSize}, - }, - }, - { - name: "happiest-case-with-ring-hash-lb-policy-with-none-default-config", - cluster: &v3clusterpb.Cluster{ - Name: clusterName, - ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_EDS}, - EdsClusterConfig: &v3clusterpb.Cluster_EdsClusterConfig{ - EdsConfig: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{ - Ads: &v3corepb.AggregatedConfigSource{}, - }, - }, - ServiceName: serviceName, - }, - LbPolicy: v3clusterpb.Cluster_RING_HASH, - LbConfig: &v3clusterpb.Cluster_RingHashLbConfig_{ - RingHashLbConfig: &v3clusterpb.Cluster_RingHashLbConfig{ - MinimumRingSize: wrapperspb.UInt64(10), - MaximumRingSize: wrapperspb.UInt64(100), - }, - }, - LrsServer: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Self{ - Self: &v3corepb.SelfConfigSource{}, - }, - }, - }, - wantUpdate: ClusterUpdate{ - ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: ClusterLRSServerSelf, - LBPolicy: &ClusterLBPolicyRingHash{MinimumRingSize: 10, MaximumRingSize: 100}, - }, - }, - } - - oldAggregateAndDNSSupportEnv := envconfig.XDSAggregateAndDNS - envconfig.XDSAggregateAndDNS = true - defer func() { envconfig.XDSAggregateAndDNS = oldAggregateAndDNSSupportEnv }() - oldRingHashSupport := envconfig.XDSRingHash - envconfig.XDSRingHash = true - defer func() { envconfig.XDSRingHash = oldRingHashSupport }() - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - update, err := validateClusterAndConstructClusterUpdate(test.cluster) - if err != nil { - t.Errorf("validateClusterAndConstructClusterUpdate(%+v) failed: %v", test.cluster, err) - } - if diff := cmp.Diff(update, test.wantUpdate, cmpopts.EquateEmpty()); diff != "" { - t.Errorf("validateClusterAndConstructClusterUpdate(%+v) got diff: %v (-got, +want)", test.cluster, diff) - } - }) - } -} - func (s) TestValidateClusterWithSecurityConfig_EnvVarOff(t *testing.T) { // Turn off the env var protection for client-side security. origClientSideSecurityEnvVar := envconfig.XDSClientSideSecurity @@ -510,7 +322,7 @@ func (s) TestValidateClusterWithSecurityConfig_EnvVarOff(t *testing.T) { if err != nil { t.Errorf("validateClusterAndConstructClusterUpdate() failed: %v", err) } - if diff := cmp.Diff(wantUpdate, gotUpdate); diff != "" { + if diff := cmp.Diff(wantUpdate, gotUpdate, cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicyJSON")); diff != "" { t.Errorf("validateClusterAndConstructClusterUpdate() returned unexpected diff (-want, got):\n%s", diff) } } @@ -1403,7 +1215,7 @@ func (s) TestValidateClusterWithSecurityConfig(t *testing.T) { if (err != nil) != test.wantErr { t.Errorf("validateClusterAndConstructClusterUpdate() returned err %v wantErr %v)", err, test.wantErr) } - if diff := cmp.Diff(test.wantUpdate, update, cmpopts.EquateEmpty(), cmp.AllowUnexported(regexp.Regexp{})); diff != "" { + if diff := cmp.Diff(test.wantUpdate, update, cmpopts.EquateEmpty(), cmp.AllowUnexported(regexp.Regexp{}), cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicyJSON")); diff != "" { t.Errorf("validateClusterAndConstructClusterUpdate() returned unexpected diff (-want, +got):\n%s", diff) } }) @@ -1545,7 +1357,7 @@ func (s) TestUnmarshalCluster(t *testing.T) { if name != test.wantName { t.Errorf("unmarshalClusterResource(%s), got name: %s, want: %s", pretty.ToJSON(test.resource), name, test.wantName) } - if diff := cmp.Diff(update, test.wantUpdate, cmpOpts); diff != "" { + if diff := cmp.Diff(update, test.wantUpdate, cmpOpts, cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicyJSON")); diff != "" { t.Errorf("unmarshalClusterResource(%s), got unexpected update, diff (-got +want): %v", pretty.ToJSON(test.resource), diff) } }) @@ -1695,7 +1507,7 @@ func (s) TestValidateClusterWithOutlierDetection(t *testing.T) { if (err != nil) != test.wantErr { t.Errorf("validateClusterAndConstructClusterUpdate() returned err %v wantErr %v)", err, test.wantErr) } - if diff := cmp.Diff(test.wantUpdate, update, cmpopts.EquateEmpty()); diff != "" { + if diff := cmp.Diff(test.wantUpdate, update, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicyJSON")); diff != "" { t.Errorf("validateClusterAndConstructClusterUpdate() returned unexpected diff (-want, +got):\n%s", diff) } }) From de11139ae6d0e235ee0cb8789725a46d7bf108e2 Mon Sep 17 00:00:00 2001 From: Easwar Swaminathan Date: Wed, 26 Apr 2023 09:50:03 -0700 Subject: [PATCH 06/60] clusterresolver: improve tests (#6188) --- resolver/manual/manual.go | 11 +- .../clusterresolver/e2e_test/balancer_test.go | 1103 +++++++++++++++-- .../clusterresolver/e2e_test/eds_impl_test.go | 6 +- .../clusterresolver/resource_resolver_test.go | 829 ------------- 4 files changed, 1019 insertions(+), 930 deletions(-) diff --git a/resolver/manual/manual.go b/resolver/manual/manual.go index f6e7b5ae3581..f27978e1281f 100644 --- a/resolver/manual/manual.go +++ b/resolver/manual/manual.go @@ -21,6 +21,8 @@ package manual import ( + "sync" + "google.golang.org/grpc/resolver" ) @@ -50,6 +52,7 @@ type Resolver struct { scheme string // Fields actually belong to the resolver. + mu sync.Mutex // Guards access to CC. CC resolver.ClientConn bootstrapState *resolver.State } @@ -62,8 +65,10 @@ func (r *Resolver) InitialState(s resolver.State) { // Build returns itself for Resolver, because it's both a builder and a resolver. func (r *Resolver) Build(target resolver.Target, cc resolver.ClientConn, opts resolver.BuildOptions) (resolver.Resolver, error) { - r.BuildCallback(target, cc, opts) + r.mu.Lock() r.CC = cc + r.mu.Unlock() + r.BuildCallback(target, cc, opts) if r.bootstrapState != nil { r.UpdateState(*r.bootstrapState) } @@ -87,10 +92,14 @@ func (r *Resolver) Close() { // UpdateState calls CC.UpdateState. func (r *Resolver) UpdateState(s resolver.State) { + r.mu.Lock() r.CC.UpdateState(s) + r.mu.Unlock() } // ReportError calls CC.ReportError. func (r *Resolver) ReportError(err error) { + r.mu.Lock() r.CC.ReportError(err) + r.mu.Unlock() } diff --git a/xds/internal/balancer/clusterresolver/e2e_test/balancer_test.go b/xds/internal/balancer/clusterresolver/e2e_test/balancer_test.go index 7eaf29e5e1fa..3d0d08a3c983 100644 --- a/xds/internal/balancer/clusterresolver/e2e_test/balancer_test.go +++ b/xds/internal/balancer/clusterresolver/e2e_test/balancer_test.go @@ -19,10 +19,12 @@ package e2e_test import ( "context" "fmt" + "sort" "strings" "testing" "time" + "github.com/google/go-cmp/cmp" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/connectivity" @@ -31,16 +33,19 @@ import ( "google.golang.org/grpc/internal/stubserver" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/testutils/xds/e2e" + "google.golang.org/grpc/peer" "google.golang.org/grpc/resolver" "google.golang.org/grpc/resolver/manual" "google.golang.org/grpc/serviceconfig" "google.golang.org/grpc/status" "google.golang.org/grpc/xds/internal/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" + "google.golang.org/protobuf/types/known/wrapperspb" v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" + v3aggregateclusterpb "github.com/envoyproxy/go-control-plane/envoy/extensions/clusters/aggregate/v3" v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" testgrpc "google.golang.org/grpc/interop/grpc_testing" testpb "google.golang.org/grpc/interop/grpc_testing" @@ -48,6 +53,124 @@ import ( _ "google.golang.org/grpc/xds/internal/balancer/cdsbalancer" // Register the "cds_experimental" LB policy. ) +// makeAggregateClusterResource returns an aggregate cluster resource with the +// given name and list of child names. +func makeAggregateClusterResource(name string, childNames []string) *v3clusterpb.Cluster { + return &v3clusterpb.Cluster{ + Name: name, + ClusterDiscoveryType: &v3clusterpb.Cluster_ClusterType{ + ClusterType: &v3clusterpb.Cluster_CustomClusterType{ + Name: "envoy.clusters.aggregate", + TypedConfig: testutils.MarshalAny(&v3aggregateclusterpb.ClusterConfig{ + Clusters: childNames, + }), + }, + }, + LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, + } +} + +// makeLogicalDNSClusterResource returns a LOGICAL_DNS cluster resource with the +// given name and given DNS host and port. +func makeLogicalDNSClusterResource(name, dnsHost string, dnsPort uint32) *v3clusterpb.Cluster { + return &v3clusterpb.Cluster{ + Name: name, + ClusterDiscoveryType: &v3clusterpb.Cluster_Type{Type: v3clusterpb.Cluster_LOGICAL_DNS}, + LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, + LoadAssignment: &v3endpointpb.ClusterLoadAssignment{ + Endpoints: []*v3endpointpb.LocalityLbEndpoints{{ + LbEndpoints: []*v3endpointpb.LbEndpoint{{ + HostIdentifier: &v3endpointpb.LbEndpoint_Endpoint{ + Endpoint: &v3endpointpb.Endpoint{ + Address: &v3corepb.Address{ + Address: &v3corepb.Address_SocketAddress{ + SocketAddress: &v3corepb.SocketAddress{ + Address: dnsHost, + PortSpecifier: &v3corepb.SocketAddress_PortValue{ + PortValue: dnsPort, + }, + }, + }, + }, + }, + }, + }}, + }}, + }, + } +} + +// setupDNS unregisters the DNS resolver and registers a manual resolver for the +// same scheme. This allows the test to mock the DNS resolution by supplying the +// addresses of the test backends. +// +// Returns the following: +// - a channel onto which the DNS target being resolved is written to by the +// mock DNS resolver +// - a channel to notify close of the DNS resolver +// - a channel to notify re-resolution requests to the DNS resolver +// - a manual resolver which is used to mock the actual DNS resolution +// - a cleanup function which re-registers the original DNS resolver +func setupDNS() (chan resolver.Target, chan struct{}, chan resolver.ResolveNowOptions, *manual.Resolver, func()) { + targetCh := make(chan resolver.Target, 1) + closeCh := make(chan struct{}, 1) + resolveNowCh := make(chan resolver.ResolveNowOptions, 1) + + mr := manual.NewBuilderWithScheme("dns") + mr.BuildCallback = func(target resolver.Target, _ resolver.ClientConn, _ resolver.BuildOptions) { targetCh <- target } + mr.CloseCallback = func() { closeCh <- struct{}{} } + mr.ResolveNowCallback = func(opts resolver.ResolveNowOptions) { resolveNowCh <- opts } + + dnsResolverBuilder := resolver.Get("dns") + resolver.UnregisterForTesting("dns") + resolver.Register(mr) + + return targetCh, closeCh, resolveNowCh, mr, func() { resolver.Register(dnsResolverBuilder) } +} + +// setupAndDial performs common setup across all tests +// +// - creates an xDS client with the passed in bootstrap contents +// - creates a manual resolver that configures `cds_experimental` as the +// top-level LB policy. +// - creates a ClientConn to talk to the test backends +// +// Returns a function to close the ClientConn and the xDS client. +func setupAndDial(t *testing.T, bootstrapContents []byte) (*grpc.ClientConn, func()) { + t.Helper() + + // Create an xDS client for use by the cluster_resolver LB policy. + xdsC, xdsClose, err := xdsclient.NewWithBootstrapContentsForTesting(bootstrapContents) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + + // Create a manual resolver and push a service config specifying the use of + // the cds LB policy as the top-level LB policy, and a corresponding config + // with a single cluster. + r := manual.NewBuilderWithScheme("whatever") + jsonSC := fmt.Sprintf(`{ + "loadBalancingConfig":[{ + "cds_experimental":{ + "cluster": "%s" + } + }] + }`, clusterName) + scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC) + r.InitialState(xdsclient.SetClient(resolver.State{ServiceConfig: scpr}, xdsC)) + + // Create a ClientConn and make a successful RPC. + cc, err := grpc.Dial(r.Scheme()+":///test.service", grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r)) + if err != nil { + xdsClose() + t.Fatalf("Failed to dial local test server: %v", err) + } + return cc, func() { + xdsClose() + cc.Close() + } +} + // TestErrorFromParentLB_ConnectionError tests the case where the parent of the // clusterresolver LB policy sends its a connection error. The parent policy, // CDS LB policy, sends a connection error when the ADS stream to the management @@ -76,7 +199,6 @@ func (s) TestErrorFromParentLB_ConnectionError(t *testing.T) { }) defer cleanup() - // Start a test backend and extract its host and port. server := stubserver.StartTestService(t, nil) defer server.Stop() @@ -93,37 +215,14 @@ func (s) TestErrorFromParentLB_ConnectionError(t *testing.T) { t.Fatal(err) } - // Create an xDS xdsClient for use by the cluster_resolver LB policy. - xdsClient, close, err := xdsclient.NewWithBootstrapContentsForTesting(bootstrapContents) - if err != nil { - t.Fatalf("Failed to create xDS client: %v", err) - } - defer close() - - // Create a manual resolver and push a service config specifying the use of - // the cds LB policy as the top-level LB policy, and a corresponding config - // with a single cluster. - r := manual.NewBuilderWithScheme("whatever") - jsonSC := fmt.Sprintf(`{ - "loadBalancingConfig":[{ - "cds_experimental":{ - "cluster": "%s" - } - }] - }`, clusterName) - scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC) - r.InitialState(xdsclient.SetClient(resolver.State{ServiceConfig: scpr}, xdsClient)) - - // Create a ClientConn and make a successful RPC. - cc, err := grpc.Dial(r.Scheme()+":///test.service", grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r)) - if err != nil { - t.Fatalf("failed to dial local test server: %v", err) - } - defer cc.Close() + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() client := testgrpc.NewTestServiceClient(cc) if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { - t.Fatalf("rpc EmptyCall() failed: %v", err) + t.Fatalf("EmptyCall() failed: %v", err) } // Close the listener and ensure that the ADS stream breaks. @@ -134,10 +233,10 @@ func (s) TestErrorFromParentLB_ConnectionError(t *testing.T) { default: } - // Ensure that RPCs continue to succeed for the next one second. + // Ensure that RPCs continue to succeed for the next second. for end := time.Now().Add(time.Second); time.Now().Before(end); <-time.After(defaultTestShortTimeout) { if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { - t.Fatalf("rpc EmptyCall() failed: %v", err) + t.Fatalf("EmptyCall() failed: %v", err) } } } @@ -183,7 +282,6 @@ func (s) TestErrorFromParentLB_ResourceNotFound(t *testing.T) { }) defer cleanup() - // Start a test backend and extract its host and port. server := stubserver.StartTestService(t, nil) defer server.Stop() @@ -200,33 +298,10 @@ func (s) TestErrorFromParentLB_ResourceNotFound(t *testing.T) { t.Fatal(err) } - // Create an xDS xdsClient for use by the cluster_resolver LB policy. - xdsClient, close, err := xdsclient.NewWithBootstrapContentsForTesting(bootstrapContents) - if err != nil { - t.Fatalf("Failed to create xDS client: %v", err) - } - defer close() - - // Create a manual resolver and push a service config specifying the use of - // the cds LB policy as the top-level LB policy, and a corresponding config - // with a single cluster. - r := manual.NewBuilderWithScheme("whatever") - jsonSC := fmt.Sprintf(`{ - "loadBalancingConfig":[{ - "cds_experimental":{ - "cluster": "%s" - } - }] - }`, clusterName) - scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC) - r.InitialState(xdsclient.SetClient(resolver.State{ServiceConfig: scpr}, xdsClient)) - - // Create a ClientConn that kick starts the xDS workflow. - cc, err := grpc.Dial(r.Scheme()+":///test.service", grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r)) - if err != nil { - t.Fatalf("failed to dial local test server: %v", err) - } - defer cc.Close() + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() // Wait for the EDS resource to be requested. select { @@ -238,7 +313,7 @@ func (s) TestErrorFromParentLB_ResourceNotFound(t *testing.T) { // Ensure that a successful RPC can be made. client := testgrpc.NewTestServiceClient(cc) if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { - t.Fatalf("rpc EmptyCall() failed: %v", err) + t.Fatalf("EmptyCall() failed: %v", err) } // Delete the cluster resource from the mangement server. @@ -263,7 +338,7 @@ func (s) TestErrorFromParentLB_ResourceNotFound(t *testing.T) { break } if err != nil { - t.Logf("EmptyCall RPC failed: %v", err) + t.Logf("EmptyCall failed: %v", err) } } if ctx.Err() != nil { @@ -298,7 +373,7 @@ func (s) TestErrorFromParentLB_ResourceNotFound(t *testing.T) { sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) defer sCancel() if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); err != nil { - t.Logf("EmptyCall RPC failed: %v", err) + t.Logf("EmptyCall failed: %v", err) continue } break @@ -308,11 +383,11 @@ func (s) TestErrorFromParentLB_ResourceNotFound(t *testing.T) { } } -// TestEDSResourceRemoved tests the case where the EDS resource requested by the -// clusterresolver LB policy is removed from the management server. The test +// TestEDS_ResourceRemoved tests the case where the EDS resource requested by +// the clusterresolver LB policy is removed from the management server. The test // verifies that the EDS watch is not canceled and that RPCs continue to succeed // with the previously received configuration. -func (s) TestEDSResourceRemoved(t *testing.T) { +func (s) TestEDS_ResourceRemoved(t *testing.T) { // Start an xDS management server that uses a couple of channels to // notify the test about the following events: // - an EDS requested with the expected resource name is requested @@ -346,7 +421,6 @@ func (s) TestEDSResourceRemoved(t *testing.T) { }) defer cleanup() - // Start a test backend and extract its host and port. server := stubserver.StartTestService(t, nil) defer server.Stop() @@ -363,37 +437,14 @@ func (s) TestEDSResourceRemoved(t *testing.T) { t.Fatal(err) } - // Create an xDS xdsClient for use by the cluster_resolver LB policy. - xdsClient, close, err := xdsclient.NewWithBootstrapContentsForTesting(bootstrapContents) - if err != nil { - t.Fatalf("Failed to create xDS client: %v", err) - } - defer close() - - // Create a manual resolver and push a service config specifying the use of - // the cds LB policy as the top-level LB policy, and a corresponding config - // with a single cluster. - r := manual.NewBuilderWithScheme("whatever") - jsonSC := fmt.Sprintf(`{ - "loadBalancingConfig":[{ - "cds_experimental":{ - "cluster": "%s" - } - }] - }`, clusterName) - scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC) - r.InitialState(xdsclient.SetClient(resolver.State{ServiceConfig: scpr}, xdsClient)) - - // Create a ClientConn and make a successful RPC. - cc, err := grpc.Dial(r.Scheme()+":///test.service", grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r)) - if err != nil { - t.Fatalf("failed to dial local test server: %v", err) - } - defer cc.Close() + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() client := testgrpc.NewTestServiceClient(cc) if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { - t.Fatalf("rpc EmptyCall() failed: %v", err) + t.Fatalf("EmptyCall() failed: %v", err) } // Delete the endpoints resource from the mangement server. @@ -402,10 +453,11 @@ func (s) TestEDSResourceRemoved(t *testing.T) { t.Fatal(err) } - // Ensure that RPCs continue to succeed for the next one second, and that the EDS watch is not canceled. + // Ensure that RPCs continue to succeed for the next second, and that the + // EDS watch is not canceled. for end := time.Now().Add(time.Second); time.Now().Before(end); <-time.After(defaultTestShortTimeout) { if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { - t.Fatalf("rpc EmptyCall() failed: %v", err) + t.Fatalf("EmptyCall() failed: %v", err) } select { case <-edsResourceCanceledCh: @@ -414,3 +466,862 @@ func (s) TestEDSResourceRemoved(t *testing.T) { } } } + +// TestEDS_ClusterResourceDoesNotContainEDSServiceName tests the case where the +// Cluster resource sent by the management server does not contain an EDS +// service name. The test verifies that the cluster_resolver LB policy uses the +// cluster name for the EDS resource. +func (s) TestEDS_ClusterResourceDoesNotContainEDSServiceName(t *testing.T) { + edsResourceCh := make(chan string, 1) + managementServer, nodeID, bootstrapContents, _, cleanup := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{ + OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { + if req.GetTypeUrl() != version.V3EndpointsURL { + return nil + } + if len(req.GetResourceNames()) > 0 { + select { + case edsResourceCh <- req.GetResourceNames()[0]: + default: + } + } + return nil + }, + }) + defer cleanup() + + server := stubserver.StartTestService(t, nil) + defer server.Stop() + + // Configure cluster and endpoints resources with the same name in the management server. The cluster resource does not specify an EDS service name. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, "", e2e.SecurityLevelNone)}, + Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(clusterName, "localhost", []uint32{testutils.ParsePort(t, server.Address)})}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() + + client := testgrpc.NewTestServiceClient(cc) + if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for EDS request to be received on the management server") + case name := <-edsResourceCh: + if name != clusterName { + t.Fatalf("Received EDS request with resource name %q, want %q", name, clusterName) + } + } +} + +// TestEDS_ClusterResourceUpdates verifies different scenarios with regards to +// cluster resource updates. +// +// - The first cluster resource contains an eds_service_name. The test verifies +// that an EDS request is sent for the received eds_service_name. It also +// verifies that a subsequent RPC gets routed to a backend belonging to that +// service name. +// - The next cluster resource update contains no eds_service_name. The test +// verifies that a subsequent EDS request is sent for the cluster_name and +// that the previously received eds_service_name is no longer requested. It +// also verifies that a subsequent RPC gets routed to a backend belonging to +// the service represented by the cluster_name. +// - The next cluster resource update changes the circuit breaking +// configuration, but does not change the service name. The test verifies +// that a subsequent RPC gets routed to the same backend as before. +func (s) TestEDS_ClusterResourceUpdates(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + // Start an xDS management server that pushes the EDS resource names onto a + // channel. + edsResourceNameCh := make(chan []string, 1) + managementServer, nodeID, bootstrapContents, _, cleanup := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{ + OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { + if req.GetTypeUrl() != version.V3EndpointsURL { + return nil + } + if len(req.GetResourceNames()) == 0 { + // This is the case for ACKs. Do nothing here. + return nil + } + select { + case <-ctx.Done(): + case edsResourceNameCh <- req.GetResourceNames(): + } + return nil + }, + AllowResourceSubset: true, + }) + defer cleanup() + + // Start two test backends and extract their host and port. The first + // backend is used for the EDS resource identified by the eds_service_name, + // and the second backend is used for the EDS resource identified by the + // cluster_name. + servers, cleanup2 := startTestServiceBackends(t, 2) + defer cleanup2() + addrs, ports := backendAddressesAndPorts(t, servers) + + // Configure cluster and endpoints resources in the management server. + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Clusters: []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, edsServiceName, e2e.SecurityLevelNone)}, + Endpoints: []*v3endpointpb.ClusterLoadAssignment{ + e2e.DefaultEndpoint(edsServiceName, "localhost", []uint32{uint32(ports[0])}), + e2e.DefaultEndpoint(clusterName, "localhost", []uint32{uint32(ports[1])}), + }, + SkipValidation: true, + } + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() + + client := testgrpc.NewTestServiceClient(cc) + peer := &peer.Peer{} + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer)); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + if peer.Addr.String() != addrs[0].Addr { + t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr) + } + + // Ensure EDS watch is registered for eds_service_name. + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for EDS request to be received on the management server") + case names := <-edsResourceNameCh: + if !cmp.Equal(names, []string{edsServiceName}) { + t.Fatalf("Received EDS request with resource names %v, want %v", names, []string{edsServiceName}) + } + } + + // Change the cluster resource to not contain an eds_service_name. + resources.Clusters = []*v3clusterpb.Cluster{e2e.DefaultCluster(clusterName, "", e2e.SecurityLevelNone)} + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Ensure that an EDS watch for eds_service_name is canceled and new watch + // for cluster_name is registered. The actual order in which this happens is + // not deterministic, i.e the watch for old resource could be canceled + // before the new one is registered or vice-versa. In either case, + // eventually, we want to see a request to the management server for just + // the cluster_name. + for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { + names := <-edsResourceNameCh + if cmp.Equal(names, []string{clusterName}) { + break + } + } + if ctx.Err() != nil { + t.Fatalf("Timeout when waiting for old EDS watch %q to be canceled and new one %q to be registered", edsServiceName, clusterName) + } + + // Make a RPC, and ensure that it gets routed to second backend, + // corresponding to the cluster_name. + for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer)); err != nil { + continue + } + if peer.Addr.String() == addrs[1].Addr { + break + } + } + if ctx.Err() != nil { + t.Fatalf("Timeout when waiting for EmptyCall() to be routed to correct backend %q", addrs[1].Addr) + } + + // Change cluster resource circuit breaking count. + resources.Clusters[0].CircuitBreakers = &v3clusterpb.CircuitBreakers{ + Thresholds: []*v3clusterpb.CircuitBreakers_Thresholds{ + { + Priority: v3corepb.RoutingPriority_DEFAULT, + MaxRequests: wrapperspb.UInt32(512), + }, + }, + } + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Ensure that RPCs continue to get routed to the second backend for the + // next second. + for end := time.Now().Add(time.Second); time.Now().Before(end); <-time.After(defaultTestShortTimeout) { + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer)); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + if peer.Addr.String() != addrs[1].Addr { + t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[1].Addr) + } + } +} + +// TestAggregateCluster_WithTwoEDSClusters tests the case where the top-level +// cluster resource is an aggregate cluster. It verifies that RPCs fail when the +// management server has not responded to all requested EDS resources, and also +// that RPCs are routed to the highest priority cluster once all requested EDS +// resources have been sent by the management server. +func (s) TestAggregateCluster_WithTwoEDSClusters(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + // Start an xDS management server that pushes the EDS resource names onto a + // channel when requested. + edsResourceNameCh := make(chan []string, 1) + managementServer, nodeID, bootstrapContents, _, cleanup := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{ + OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { + if req.GetTypeUrl() != version.V3EndpointsURL { + return nil + } + if len(req.GetResourceNames()) == 0 { + // This is the case for ACKs. Do nothing here. + return nil + } + select { + case edsResourceNameCh <- req.GetResourceNames(): + case <-ctx.Done(): + } + return nil + }, + AllowResourceSubset: true, + }) + defer cleanup() + + // Start two test backends and extract their host and port. The first + // backend belongs to EDS cluster "cluster-1", while the second backend + // belongs to EDS cluster "cluster-2". + servers, cleanup2 := startTestServiceBackends(t, 2) + defer cleanup2() + addrs, ports := backendAddressesAndPorts(t, servers) + + // Configure an aggregate cluster, two EDS clusters and only one endpoints + // resource (corresponding to the first EDS cluster) in the management + // server. + const clusterName1 = clusterName + "-cluster-1" + const clusterName2 = clusterName + "-cluster-2" + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Clusters: []*v3clusterpb.Cluster{ + makeAggregateClusterResource(clusterName, []string{clusterName1, clusterName2}), + e2e.DefaultCluster(clusterName1, "", e2e.SecurityLevelNone), + e2e.DefaultCluster(clusterName2, "", e2e.SecurityLevelNone), + }, + Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(clusterName1, "localhost", []uint32{uint32(ports[0])})}, + SkipValidation: true, + } + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() + + // Wait for both EDS resources to be requested. + func() { + for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { + select { + case names := <-edsResourceNameCh: + // Copy and sort the sortedNames to avoid racing with an + // OnStreamRequest call. + sortedNames := make([]string, len(names)) + copy(sortedNames, names) + sort.Strings(sortedNames) + if cmp.Equal(sortedNames, []string{clusterName1, clusterName2}) { + return + } + default: + } + } + }() + if ctx.Err() != nil { + t.Fatalf("Timeout when waiting for all EDS resources %v to be requested", []string{clusterName1, clusterName2}) + } + + // Make an RPC with a short deadline. We expect this RPC to not succeed + // because the management server has not responded with all EDS resources + // requested. + client := testgrpc.NewTestServiceClient(cc) + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded { + t.Fatalf("EmptyCall() code %s, want %s", status.Code(err), codes.DeadlineExceeded) + } + + // Update the management server with the second EDS resource. + resources.Endpoints = append(resources.Endpoints, e2e.DefaultEndpoint(clusterName2, "localhost", []uint32{uint32(ports[1])})) + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Make an RPC and ensure that it gets routed to cluster-1, implicitly + // higher priority than cluster-2. + peer := &peer.Peer{} + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + if peer.Addr.String() != addrs[0].Addr { + t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr) + } +} + +// TestAggregateCluster_WithTwoEDSClusters_PrioritiesChange tests the case where +// the top-level cluster resource is an aggregate cluster. It verifies that RPCs +// are routed to the highest priority EDS cluster. +func (s) TestAggregateCluster_WithTwoEDSClusters_PrioritiesChange(t *testing.T) { + // Start an xDS management server. + managementServer, nodeID, bootstrapContents, _, cleanup := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true}) + defer cleanup() + + // Start two test backends and extract their host and port. The first + // backend belongs to EDS cluster "cluster-1", while the second backend + // belongs to EDS cluster "cluster-2". + servers, cleanup2 := startTestServiceBackends(t, 2) + defer cleanup2() + addrs, ports := backendAddressesAndPorts(t, servers) + + // Configure an aggregate cluster, two EDS clusters and the corresponding + // endpoints resources in the management server. + const clusterName1 = clusterName + "cluster-1" + const clusterName2 = clusterName + "cluster-2" + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Clusters: []*v3clusterpb.Cluster{ + makeAggregateClusterResource(clusterName, []string{clusterName1, clusterName2}), + e2e.DefaultCluster(clusterName1, "", e2e.SecurityLevelNone), + e2e.DefaultCluster(clusterName2, "", e2e.SecurityLevelNone), + }, + Endpoints: []*v3endpointpb.ClusterLoadAssignment{ + e2e.DefaultEndpoint(clusterName1, "localhost", []uint32{uint32(ports[0])}), + e2e.DefaultEndpoint(clusterName2, "localhost", []uint32{uint32(ports[1])}), + }, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() + + // Make an RPC and ensure that it gets routed to cluster-1, implicitly + // higher priority than cluster-2. + client := testgrpc.NewTestServiceClient(cc) + peer := &peer.Peer{} + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + if peer.Addr.String() != addrs[0].Addr { + t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr) + } + + // Swap the priorities of the EDS clusters in the aggregate cluster. + resources.Clusters = []*v3clusterpb.Cluster{ + makeAggregateClusterResource(clusterName, []string{clusterName2, clusterName1}), + e2e.DefaultCluster(clusterName1, "", e2e.SecurityLevelNone), + e2e.DefaultCluster(clusterName2, "", e2e.SecurityLevelNone), + } + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Wait for RPCs to get routed to cluster-2, which is now implicitly higher + // priority than cluster-1, after the priority switch above. + for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + if peer.Addr.String() == addrs[1].Addr { + break + } + } + if ctx.Err() != nil { + t.Fatal("Timeout waiting for RPCs to be routed to cluster-2 after priority switch") + } +} + +// TestAggregateCluster_WithOneDNSCluster tests the case where the top-level +// cluster resource is an aggregate cluster that resolves to a single +// LOGICAL_DNS cluster. The test verifies that RPCs can be made to backends that +// make up the LOGICAL_DNS cluster. +func (s) TestAggregateCluster_WithOneDNSCluster(t *testing.T) { + dnsTargetCh, _, _, dnsR, cleanup1 := setupDNS() + defer cleanup1() + + // Start an xDS management server. + managementServer, nodeID, bootstrapContents, _, cleanup2 := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true}) + defer cleanup2() + + // Start two test backends. + servers, cleanup3 := startTestServiceBackends(t, 2) + defer cleanup3() + addrs, _ := backendAddressesAndPorts(t, servers) + + // Configure an aggregate cluster pointing to a single LOGICAL_DNS cluster. + const ( + dnsClusterName = clusterName + "-dns" + dnsHostName = "dns_host" + dnsPort = uint32(8080) + ) + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Clusters: []*v3clusterpb.Cluster{ + makeAggregateClusterResource(clusterName, []string{dnsClusterName}), + makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort), + }, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() + + // Ensure that the DNS resolver is started for the expected target. + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for DNS resolver to be started") + case target := <-dnsTargetCh: + got, want := target.Endpoint(), fmt.Sprintf("%s:%d", dnsHostName, dnsPort) + if got != want { + t.Fatalf("DNS resolution started for target %q, want %q", got, want) + } + } + + // Update DNS resolver with test backend addresses. + dnsR.UpdateState(resolver.State{Addresses: addrs}) + + // Make an RPC and ensure that it gets routed to the first backend since the + // child policy for a LOGICAL_DNS cluster is pick_first by default. + client := testgrpc.NewTestServiceClient(cc) + peer := &peer.Peer{} + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + if peer.Addr.String() != addrs[0].Addr { + t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr) + } +} + +// TestAggregateCluster_WithEDSAndDNS tests the case where the top-level cluster +// resource is an aggregate cluster that resolves to an EDS and a LOGICAL_DNS +// cluster. The test verifies that RPCs fail until both clusters are resolved to +// endpoints, and RPCs are routed to the higher priority EDS cluster. +func (s) TestAggregateCluster_WithEDSAndDNS(t *testing.T) { + dnsTargetCh, _, _, dnsR, cleanup1 := setupDNS() + defer cleanup1() + + // Start an xDS management server that pushes the name of the requested EDS + // resource onto a channel. + edsResourceCh := make(chan string, 1) + managementServer, nodeID, bootstrapContents, _, cleanup2 := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{ + OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { + if req.GetTypeUrl() != version.V3EndpointsURL { + return nil + } + if len(req.GetResourceNames()) > 0 { + select { + case edsResourceCh <- req.GetResourceNames()[0]: + default: + } + } + return nil + }, + AllowResourceSubset: true, + }) + defer cleanup2() + + // Start two test backends and extract their host and port. The first + // backend is used for the EDS cluster and the second backend is used for + // the LOGICAL_DNS cluster. + servers, cleanup3 := startTestServiceBackends(t, 2) + defer cleanup3() + addrs, ports := backendAddressesAndPorts(t, servers) + + // Configure an aggregate cluster pointing to an EDS and DNS cluster. Also + // configure an endpoints resource for the EDS cluster. + const ( + edsClusterName = clusterName + "-eds" + dnsClusterName = clusterName + "-dns" + dnsHostName = "dns_host" + dnsPort = uint32(8080) + ) + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Clusters: []*v3clusterpb.Cluster{ + makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}), + e2e.DefaultCluster(edsClusterName, "", e2e.SecurityLevelNone), + makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort), + }, + Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsClusterName, "localhost", []uint32{uint32(ports[0])})}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() + + // Ensure that an EDS request is sent for the expected resource name. + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for EDS request to be received on the management server") + case name := <-edsResourceCh: + if name != edsClusterName { + t.Fatalf("Received EDS request with resource name %q, want %q", name, edsClusterName) + } + } + + // Ensure that the DNS resolver is started for the expected target. + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for DNS resolver to be started") + case target := <-dnsTargetCh: + got, want := target.Endpoint(), fmt.Sprintf("%s:%d", dnsHostName, dnsPort) + if got != want { + t.Fatalf("DNS resolution started for target %q, want %q", got, want) + } + } + + // Make an RPC with a short deadline. We expect this RPC to not succeed + // because the DNS resolver has not responded with endpoint addresses. + client := testgrpc.NewTestServiceClient(cc) + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded { + t.Fatalf("EmptyCall() code %s, want %s", status.Code(err), codes.DeadlineExceeded) + } + + // Update DNS resolver with test backend addresses. + dnsR.UpdateState(resolver.State{Addresses: addrs[1:]}) + + // Make an RPC and ensure that it gets routed to the first backend since the + // EDS cluster is of higher priority than the LOGICAL_DNS cluster. + peer := &peer.Peer{} + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + if peer.Addr.String() != addrs[0].Addr { + t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr) + } +} + +// TestAggregateCluster_SwitchEDSAndDNS tests the case where the top-level +// cluster resource is an aggregate cluster. It initially resolves to a single +// EDS cluster. The test verifies that RPCs are routed to backends in the EDS +// cluster. Subsequently, the aggregate cluster resolves to a single DNS +// cluster. The test verifies that RPCs are successful, this time to backends in +// the DNS cluster. +func (s) TestAggregateCluster_SwitchEDSAndDNS(t *testing.T) { + dnsTargetCh, _, _, dnsR, cleanup1 := setupDNS() + defer cleanup1() + + // Start an xDS management server. + managementServer, nodeID, bootstrapContents, _, cleanup2 := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true}) + defer cleanup2() + + // Start two test backends and extract their host and port. The first + // backend is used for the EDS cluster and the second backend is used for + // the LOGICAL_DNS cluster. + servers, cleanup3 := startTestServiceBackends(t, 2) + defer cleanup3() + addrs, ports := backendAddressesAndPorts(t, servers) + + // Configure an aggregate cluster pointing to a single EDS cluster. Also, + // configure the underlying EDS cluster (and the corresponding endpoints + // resource) and DNS cluster (will be used later in the test). + const ( + dnsClusterName = clusterName + "-dns" + dnsHostName = "dns_host" + dnsPort = uint32(8080) + ) + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Clusters: []*v3clusterpb.Cluster{ + makeAggregateClusterResource(clusterName, []string{edsServiceName}), + e2e.DefaultCluster(edsServiceName, "", e2e.SecurityLevelNone), + makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort), + }, + Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsServiceName, "localhost", []uint32{uint32(ports[0])})}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() + + // Ensure that the RPC is routed to the appropriate backend. + client := testgrpc.NewTestServiceClient(cc) + peer := &peer.Peer{} + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + if peer.Addr.String() != addrs[0].Addr { + t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr) + } + + // Update the aggregate cluster to point to a single DNS cluster. + resources.Clusters = []*v3clusterpb.Cluster{ + makeAggregateClusterResource(clusterName, []string{dnsClusterName}), + e2e.DefaultCluster(edsServiceName, "", e2e.SecurityLevelNone), + makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort), + } + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Ensure that the DNS resolver is started for the expected target. + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for DNS resolver to be started") + case target := <-dnsTargetCh: + got, want := target.Endpoint(), fmt.Sprintf("%s:%d", dnsHostName, dnsPort) + if got != want { + t.Fatalf("DNS resolution started for target %q, want %q", got, want) + } + } + + // Update DNS resolver with test backend addresses. + dnsR.UpdateState(resolver.State{Addresses: addrs[1:]}) + + // Ensure that start getting routed to the backend corresponding to the + // LOGICAL_DNS cluster. + for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { + client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer)) + if peer.Addr.String() == addrs[1].Addr { + break + } + } + if ctx.Err() != nil { + t.Fatalf("Timeout when waiting for RPCs to be routed to backend %q in the DNS cluster", addrs[1].Addr) + } +} + +// TestAggregateCluster_BadEDS_GoodToBadDNS tests the case where the top-level +// cluster is an aggregate cluster that resolves to an EDS and LOGICAL_DNS +// cluster. The test first asserts that no RPCs can be made after receiving an +// EDS response with zero endpoints because no update has been received from the +// DNS resolver yet. Once the DNS resolver pushes an update, the test verifies +// that we switch to the DNS cluster and can make a successful RPC. At this +// point when the DNS cluster returns an error, the test verifies that RPCs are +// still successful. This is the expected behavior because pick_first (the leaf +// policy) ignores resolver errors when it is not in TransientFailure. +func (s) TestAggregateCluster_BadEDS_GoodToBadDNS(t *testing.T) { + dnsTargetCh, _, _, dnsR, cleanup1 := setupDNS() + defer cleanup1() + + // Start an xDS management server. + managementServer, nodeID, bootstrapContents, _, cleanup2 := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true}) + defer cleanup2() + + // Start two test backends. + servers, cleanup3 := startTestServiceBackends(t, 2) + defer cleanup3() + addrs, _ := backendAddressesAndPorts(t, servers) + + // Configure an aggregate cluster pointing to an EDS and LOGICAL_DNS + // cluster. Also configure an empty endpoints resource for the EDS cluster + // that contains no endpoints. + const ( + edsClusterName = clusterName + "-eds" + dnsClusterName = clusterName + "-dns" + dnsHostName = "dns_host" + dnsPort = uint32(8080) + ) + emptyEndpointResource := e2e.DefaultEndpoint(edsServiceName, "localhost", nil) + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Clusters: []*v3clusterpb.Cluster{ + makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}), + e2e.DefaultCluster(edsClusterName, edsServiceName, e2e.SecurityLevelNone), + makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort), + }, + Endpoints: []*v3endpointpb.ClusterLoadAssignment{emptyEndpointResource}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() + + // Make an RPC with a short deadline. We expect this RPC to not succeed + // because the EDS resource came back with no endpoints, and we are yet to + // push an update through the DNS resolver. + client := testgrpc.NewTestServiceClient(cc) + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded { + t.Fatalf("EmptyCall() code %s, want %s", status.Code(err), codes.DeadlineExceeded) + } + + // Ensure that the DNS resolver is started for the expected target. + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for DNS resolver to be started") + case target := <-dnsTargetCh: + got, want := target.Endpoint(), fmt.Sprintf("%s:%d", dnsHostName, dnsPort) + if got != want { + t.Fatalf("DNS resolution started for target %q, want %q", got, want) + } + } + + // Update DNS resolver with test backend addresses. + dnsR.UpdateState(resolver.State{Addresses: addrs}) + + // Ensure that RPCs start getting routed to the first backend since the + // child policy for a LOGICAL_DNS cluster is pick_first by default. + for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { + peer := &peer.Peer{} + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer)); err != nil { + t.Logf("EmptyCall() failed: %v", err) + continue + } + if peer.Addr.String() == addrs[0].Addr { + break + } + } + if ctx.Err() != nil { + t.Fatalf("Timeout when waiting for RPCs to be routed to backend %q in the DNS cluster", addrs[0].Addr) + } + + // Push an error from the DNS resolver as well. + dnsErr := fmt.Errorf("DNS error") + dnsR.ReportError(dnsErr) + + // Ensure that RPCs continue to succeed for the next second. + for end := time.Now().Add(time.Second); time.Now().Before(end); <-time.After(defaultTestShortTimeout) { + peer := &peer.Peer{} + if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer)); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + if peer.Addr.String() != addrs[0].Addr { + t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr) + } + } +} + +// TestAggregateCluster_BadEDS_BadDNS tests the case where the top-level cluster +// is an aggregate cluster that resolves to an EDS and LOGICAL_DNS cluster. When +// the EDS request returns a resource that contains no endpoints, the test +// verifies that we switch to the DNS cluster. When the DNS cluster returns an +// error, the test verifies that RPCs fail with the error returned by the DNS +// resolver, and thus, ensures that pick_first (the leaf policy) does not ignore +// resolver errors. +func (s) TestAggregateCluster_BadEDS_BadDNS(t *testing.T) { + dnsTargetCh, _, _, dnsR, cleanup1 := setupDNS() + defer cleanup1() + + // Start an xDS management server. + managementServer, nodeID, bootstrapContents, _, cleanup2 := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true}) + defer cleanup2() + + // Configure an aggregate cluster pointing to an EDS and LOGICAL_DNS + // cluster. Also configure an empty endpoints resource for the EDS cluster + // that contains no endpoints. + const ( + edsClusterName = clusterName + "-eds" + dnsClusterName = clusterName + "-dns" + dnsHostName = "dns_host" + dnsPort = uint32(8080) + ) + emptyEndpointResource := e2e.DefaultEndpoint(edsServiceName, "localhost", nil) + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Clusters: []*v3clusterpb.Cluster{ + makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}), + e2e.DefaultCluster(edsClusterName, edsServiceName, e2e.SecurityLevelNone), + makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort), + }, + Endpoints: []*v3endpointpb.ClusterLoadAssignment{emptyEndpointResource}, + SkipValidation: true, + } + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + // Create xDS client, configure cds_experimental LB policy with a manual + // resolver, and dial the test backends. + cc, cleanup := setupAndDial(t, bootstrapContents) + defer cleanup() + + // Make an RPC with a short deadline. We expect this RPC to not succeed + // because the EDS resource came back with no endpoints, and we are yet to + // push an update through the DNS resolver. + client := testgrpc.NewTestServiceClient(cc) + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded { + t.Fatalf("EmptyCall() code %s, want %s", status.Code(err), codes.DeadlineExceeded) + } + + // Ensure that the DNS resolver is started for the expected target. + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for DNS resolver to be started") + case target := <-dnsTargetCh: + got, want := target.Endpoint(), fmt.Sprintf("%s:%d", dnsHostName, dnsPort) + if got != want { + t.Fatalf("DNS resolution started for target %q, want %q", got, want) + } + } + + // Push an error from the DNS resolver as well. + dnsErr := fmt.Errorf("DNS error") + dnsR.ReportError(dnsErr) + + // Ensure that the error returned from the DNS resolver is reported to the + // caller of the RPC. + _, err := client.EmptyCall(ctx, &testpb.Empty{}) + if code := status.Code(err); code != codes.Unavailable { + t.Fatalf("EmptyCall() failed with code %s, want %s", code, codes.Unavailable) + } + if err == nil || !strings.Contains(err.Error(), dnsErr.Error()) { + t.Fatalf("EmptyCall() failed with error %v, want %v", err, dnsErr) + } +} diff --git a/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go b/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go index 7aa951bfec50..053b56f0dc86 100644 --- a/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go +++ b/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go @@ -83,11 +83,9 @@ func backendAddressesAndPorts(t *testing.T, servers []*stubserver.StubServer) ([ } func startTestServiceBackends(t *testing.T, numBackends int) ([]*stubserver.StubServer, func()) { - servers := make([]*stubserver.StubServer, numBackends) + var servers []*stubserver.StubServer for i := 0; i < numBackends; i++ { - servers[i] = &stubserver.StubServer{ - EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { return &testpb.Empty{}, nil }, - } + servers = append(servers, stubserver.StartTestService(t, nil)) servers[i].StartServer() } diff --git a/xds/internal/balancer/clusterresolver/resource_resolver_test.go b/xds/internal/balancer/clusterresolver/resource_resolver_test.go index 2252373e56e6..0ae151ee5215 100644 --- a/xds/internal/balancer/clusterresolver/resource_resolver_test.go +++ b/xds/internal/balancer/clusterresolver/resource_resolver_test.go @@ -19,16 +19,9 @@ package clusterresolver import ( - "context" - "fmt" - "testing" - - "github.com/google/go-cmp/cmp" - "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/resolver" "google.golang.org/grpc/resolver/manual" xdstestutils "google.golang.org/grpc/xds/internal/testutils" - "google.golang.org/grpc/xds/internal/testutils/fakeclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) @@ -49,86 +42,6 @@ func init() { testEDSUpdates = append(testEDSUpdates, parseEDSRespProtoForTesting(clab2.Build())) } -// Test the simple case with one EDS resource to watch. -func (s) TestResourceResolverOneEDSResource(t *testing.T) { - for _, test := range []struct { - name string - clusterName, edsName string - wantName string - edsUpdate xdsresource.EndpointsUpdate - want []priorityConfig - }{ - {name: "watch EDS", - clusterName: testClusterName, - edsName: testEDSService, - wantName: testEDSService, - edsUpdate: testEDSUpdates[0], - want: []priorityConfig{{ - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - EDSServiceName: testEDSService, - }, - edsResp: testEDSUpdates[0], - childNameGen: newNameGenerator(0), - }}, - }, - { - name: "watch EDS no EDS name", // Will watch for cluster name. - clusterName: testClusterName, - wantName: testClusterName, - edsUpdate: testEDSUpdates[1], - want: []priorityConfig{{ - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - }, - edsResp: testEDSUpdates[1], - childNameGen: newNameGenerator(0), - }}, - }, - } { - t.Run(test.name, func(t *testing.T) { - fakeClient := fakeclient.NewClient() - rr := newResourceResolver(&clusterResolverBalancer{xdsClient: fakeClient}) - rr.updateMechanisms([]DiscoveryMechanism{{ - Type: DiscoveryMechanismTypeEDS, - Cluster: test.clusterName, - EDSServiceName: test.edsName, - }}) - ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer ctxCancel() - gotEDSName, err := fakeClient.WaitForWatchEDS(ctx) - if err != nil { - t.Fatalf("xdsClient.WatchCDS failed with error: %v", err) - } - if gotEDSName != test.wantName { - t.Fatalf("xdsClient.WatchEDS called for cluster: %v, want: %v", gotEDSName, test.wantName) - } - - // Invoke callback, should get an update. - fakeClient.InvokeWatchEDSCallback("", test.edsUpdate, nil) - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, test.want, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - // Close the resource resolver. Should stop EDS watch. - rr.stop() - edsNameCanceled, err := fakeClient.WaitForCancelEDSWatch(ctx) - if err != nil { - t.Fatalf("xdsClient.CancelCDS failed with error: %v", err) - } - if edsNameCanceled != test.wantName { - t.Fatalf("xdsClient.CancelEDS called for %v, want: %v", edsNameCanceled, testEDSService) - } - }) - } -} - func setupDNS() (chan resolver.Target, chan struct{}, chan resolver.ResolveNowOptions, *manual.Resolver, func()) { dnsTargetCh := make(chan resolver.Target, 1) dnsCloseCh := make(chan struct{}, 1) @@ -144,745 +57,3 @@ func setupDNS() (chan resolver.Target, chan struct{}, chan resolver.ResolveNowOp } return dnsTargetCh, dnsCloseCh, resolveNowCh, mr, func() { newDNS = oldNewDNS } } - -// Test the simple case of one DNS resolver. -func (s) TestResourceResolverOneDNSResource(t *testing.T) { - for _, test := range []struct { - name string - target string - wantTarget resolver.Target - addrs []resolver.Address - want []priorityConfig - }{ - { - name: "watch DNS", - target: testDNSTarget, - wantTarget: resolver.Target{Scheme: "dns", URL: *testutils.MustParseURL("dns:///" + testDNSTarget)}, - addrs: []resolver.Address{{Addr: "1.1.1.1"}, {Addr: "2.2.2.2"}}, - want: []priorityConfig{{ - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeLogicalDNS, - DNSHostname: testDNSTarget, - }, - addresses: []string{"1.1.1.1", "2.2.2.2"}, - childNameGen: newNameGenerator(0), - }}, - }, - } { - t.Run(test.name, func(t *testing.T) { - dnsTargetCh, dnsCloseCh, _, dnsR, cleanup := setupDNS() - defer cleanup() - fakeClient := fakeclient.NewClient() - rr := newResourceResolver(&clusterResolverBalancer{xdsClient: fakeClient}) - rr.updateMechanisms([]DiscoveryMechanism{{ - Type: DiscoveryMechanismTypeLogicalDNS, - DNSHostname: test.target, - }}) - ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer ctxCancel() - select { - case target := <-dnsTargetCh: - if diff := cmp.Diff(target, test.wantTarget); diff != "" { - t.Fatalf("got unexpected DNS target to watch, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for building DNS resolver") - } - - // Invoke callback, should get an update. - dnsR.UpdateState(resolver.State{Addresses: test.addrs}) - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, test.want, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - // Close the resource resolver. Should close the underlying resolver. - rr.stop() - select { - case <-dnsCloseCh: - case <-ctx.Done(): - t.Fatal("Timed out waiting for closing DNS resolver") - } - }) - } -} - -// Test that changing EDS name would cause a cancel and a new watch. -// -// Also, changes that don't actually change EDS names (e.g. changing cluster -// name but not service name, or change circuit breaking count) doesn't do -// anything. -// -// - update DiscoveryMechanism -// - same EDS name to watch, but different MaxCurrentCount: no new watch -// - different cluster name, but same EDS name: no new watch -func (s) TestResourceResolverChangeEDSName(t *testing.T) { - fakeClient := fakeclient.NewClient() - rr := newResourceResolver(&clusterResolverBalancer{xdsClient: fakeClient}) - rr.updateMechanisms([]DiscoveryMechanism{{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - EDSServiceName: testEDSService, - }}) - ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer ctxCancel() - gotEDSName1, err := fakeClient.WaitForWatchEDS(ctx) - if err != nil { - t.Fatalf("xdsClient.WatchCDS failed with error: %v", err) - } - if gotEDSName1 != testEDSService { - t.Fatalf("xdsClient.WatchEDS called for cluster: %v, want: %v", gotEDSName1, testEDSService) - } - - // Invoke callback, should get an update. - fakeClient.InvokeWatchEDSCallback(gotEDSName1, testEDSUpdates[0], nil) - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, []priorityConfig{{ - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - EDSServiceName: testEDSService, - }, - edsResp: testEDSUpdates[0], - childNameGen: newNameGenerator(0), - }}, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Change name to watch. - rr.updateMechanisms([]DiscoveryMechanism{{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - }}) - edsNameCanceled1, err := fakeClient.WaitForCancelEDSWatch(ctx) - if err != nil { - t.Fatalf("xdsClient.CancelCDS failed with error: %v", err) - } - if edsNameCanceled1 != gotEDSName1 { - t.Fatalf("xdsClient.CancelEDS called for %v, want: %v", edsNameCanceled1, testEDSService) - } - gotEDSName2, err := fakeClient.WaitForWatchEDS(ctx) - if err != nil { - t.Fatalf("xdsClient.WatchCDS failed with error: %v", err) - } - if gotEDSName2 != testClusterName { - t.Fatalf("xdsClient.WatchEDS called for cluster: %v, want: %v", gotEDSName2, testClusterName) - } - // Shouldn't get any update, because the new resource hasn't received any - // update. - shortCtx, shortCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer shortCancel() - select { - case u := <-rr.updateChannel: - t.Fatalf("get unexpected update %+v", u) - case <-shortCtx.Done(): - } - - // Invoke callback, should get an update. - fakeClient.InvokeWatchEDSCallback(gotEDSName2, testEDSUpdates[1], nil) - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, []priorityConfig{{ - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - }, - edsResp: testEDSUpdates[1], - childNameGen: newNameGenerator(1), - }}, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Change circuit breaking count, should get an update with new circuit - // breaking count, but shouldn't trigger new watch. - rr.updateMechanisms([]DiscoveryMechanism{{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - MaxConcurrentRequests: newUint32(123), - }}) - shortCtx, shortCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer shortCancel() - if n, err := fakeClient.WaitForWatchEDS(shortCtx); err == nil { - t.Fatalf("unexpected watch started for EDS: %v", n) - } - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, []priorityConfig{{ - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - MaxConcurrentRequests: newUint32(123), - }, - edsResp: testEDSUpdates[1], - childNameGen: newNameGenerator(1), - }}, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Close the resource resolver. Should stop EDS watch. - rr.stop() - edsNameCanceled, err := fakeClient.WaitForCancelEDSWatch(ctx) - if err != nil { - t.Fatalf("xdsClient.CancelCDS failed with error: %v", err) - } - if edsNameCanceled != gotEDSName2 { - t.Fatalf("xdsClient.CancelEDS called for %v, want: %v", edsNameCanceled, gotEDSName2) - } -} - -// Test the case that same resources with the same priority should not add new -// EDS watch, and also should not trigger an update. -func (s) TestResourceResolverNoChangeNoUpdate(t *testing.T) { - fakeClient := fakeclient.NewClient() - rr := newResourceResolver(&clusterResolverBalancer{xdsClient: fakeClient}) - rr.updateMechanisms([]DiscoveryMechanism{ - { - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[0], - }, - { - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[1], - MaxConcurrentRequests: newUint32(100), - }, - }) - ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer ctxCancel() - gotEDSName1, err := fakeClient.WaitForWatchEDS(ctx) - if err != nil { - t.Fatalf("xdsClient.WatchCDS failed with error: %v", err) - } - if gotEDSName1 != testClusterNames[0] { - t.Fatalf("xdsClient.WatchEDS called for cluster: %v, want: %v", gotEDSName1, testClusterNames[0]) - } - gotEDSName2, err := fakeClient.WaitForWatchEDS(ctx) - if err != nil { - t.Fatalf("xdsClient.WatchCDS failed with error: %v", err) - } - if gotEDSName2 != testClusterNames[1] { - t.Fatalf("xdsClient.WatchEDS called for cluster: %v, want: %v", gotEDSName2, testClusterNames[1]) - } - - // Invoke callback, should get an update. - fakeClient.InvokeWatchEDSCallback(gotEDSName1, testEDSUpdates[0], nil) - // Shouldn't send update, because only one resource received an update. - shortCtx, shortCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer shortCancel() - select { - case u := <-rr.updateChannel: - t.Fatalf("get unexpected update %+v", u) - case <-shortCtx.Done(): - } - fakeClient.InvokeWatchEDSCallback(gotEDSName2, testEDSUpdates[1], nil) - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, []priorityConfig{ - { - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[0], - }, - edsResp: testEDSUpdates[0], - childNameGen: newNameGenerator(0), - }, - { - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[1], - MaxConcurrentRequests: newUint32(100), - }, - edsResp: testEDSUpdates[1], - childNameGen: newNameGenerator(1), - }, - }, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Send the same resources with the same priorities, shouldn't any change. - rr.updateMechanisms([]DiscoveryMechanism{ - { - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[0], - }, - { - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[1], - MaxConcurrentRequests: newUint32(100), - }, - }) - shortCtx, shortCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer shortCancel() - if n, err := fakeClient.WaitForWatchEDS(shortCtx); err == nil { - t.Fatalf("unexpected watch started for EDS: %v", n) - } - shortCtx, shortCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer shortCancel() - select { - case u := <-rr.updateChannel: - t.Fatalf("unexpected update: %+v", u) - case <-shortCtx.Done(): - } - - // Close the resource resolver. Should stop EDS watch. - rr.stop() - edsNameCanceled1, err := fakeClient.WaitForCancelEDSWatch(ctx) - if err != nil { - t.Fatalf("xdsClient.CancelCDS failed with error: %v", err) - } - if edsNameCanceled1 != gotEDSName1 && edsNameCanceled1 != gotEDSName2 { - t.Fatalf("xdsClient.CancelEDS called for %v, want: %v or %v", edsNameCanceled1, gotEDSName1, gotEDSName2) - } - edsNameCanceled2, err := fakeClient.WaitForCancelEDSWatch(ctx) - if err != nil { - t.Fatalf("xdsClient.CancelCDS failed with error: %v", err) - } - if edsNameCanceled2 != gotEDSName2 && edsNameCanceled2 != gotEDSName1 { - t.Fatalf("xdsClient.CancelEDS called for %v, want: %v or %v", edsNameCanceled2, gotEDSName1, gotEDSName2) - } -} - -// Test the case that same resources are watched, but with different priority. -// Should not add new EDS watch, but should trigger an update with the new -// priorities. -func (s) TestResourceResolverChangePriority(t *testing.T) { - fakeClient := fakeclient.NewClient() - rr := newResourceResolver(&clusterResolverBalancer{xdsClient: fakeClient}) - rr.updateMechanisms([]DiscoveryMechanism{ - { - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[0], - }, - { - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[1], - }, - }) - ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer ctxCancel() - gotEDSName1, err := fakeClient.WaitForWatchEDS(ctx) - if err != nil { - t.Fatalf("xdsClient.WatchCDS failed with error: %v", err) - } - if gotEDSName1 != testClusterNames[0] { - t.Fatalf("xdsClient.WatchEDS called for cluster: %v, want: %v", gotEDSName1, testClusterNames[0]) - } - gotEDSName2, err := fakeClient.WaitForWatchEDS(ctx) - if err != nil { - t.Fatalf("xdsClient.WatchCDS failed with error: %v", err) - } - if gotEDSName2 != testClusterNames[1] { - t.Fatalf("xdsClient.WatchEDS called for cluster: %v, want: %v", gotEDSName2, testClusterNames[1]) - } - - // Invoke callback, should get an update. - fakeClient.InvokeWatchEDSCallback(gotEDSName1, testEDSUpdates[0], nil) - // Shouldn't send update, because only one resource received an update. - shortCtx, shortCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer shortCancel() - select { - case u := <-rr.updateChannel: - t.Fatalf("get unexpected update %+v", u) - case <-shortCtx.Done(): - } - fakeClient.InvokeWatchEDSCallback(gotEDSName2, testEDSUpdates[1], nil) - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, []priorityConfig{ - { - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[0], - }, - edsResp: testEDSUpdates[0], - childNameGen: newNameGenerator(0), - }, - { - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[1], - }, - edsResp: testEDSUpdates[1], - childNameGen: newNameGenerator(1), - }, - }, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Send the same resources with different priorities, shouldn't trigger - // watch, but should trigger an update with the new priorities. - rr.updateMechanisms([]DiscoveryMechanism{ - { - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[1], - }, - { - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[0], - }, - }) - shortCtx, shortCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer shortCancel() - if n, err := fakeClient.WaitForWatchEDS(shortCtx); err == nil { - t.Fatalf("unexpected watch started for EDS: %v", n) - } - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, []priorityConfig{ - { - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[1], - }, - edsResp: testEDSUpdates[1], - childNameGen: newNameGenerator(1), - }, - { - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterNames[0], - }, - edsResp: testEDSUpdates[0], - childNameGen: newNameGenerator(0), - }, - }, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Close the resource resolver. Should stop EDS watch. - rr.stop() - edsNameCanceled1, err := fakeClient.WaitForCancelEDSWatch(ctx) - if err != nil { - t.Fatalf("xdsClient.CancelCDS failed with error: %v", err) - } - if edsNameCanceled1 != gotEDSName1 && edsNameCanceled1 != gotEDSName2 { - t.Fatalf("xdsClient.CancelEDS called for %v, want: %v or %v", edsNameCanceled1, gotEDSName1, gotEDSName2) - } - edsNameCanceled2, err := fakeClient.WaitForCancelEDSWatch(ctx) - if err != nil { - t.Fatalf("xdsClient.CancelCDS failed with error: %v", err) - } - if edsNameCanceled2 != gotEDSName2 && edsNameCanceled2 != gotEDSName1 { - t.Fatalf("xdsClient.CancelEDS called for %v, want: %v or %v", edsNameCanceled2, gotEDSName1, gotEDSName2) - } -} - -// Test the case that covers resource for both EDS and DNS. -func (s) TestResourceResolverEDSAndDNS(t *testing.T) { - dnsTargetCh, dnsCloseCh, _, dnsR, cleanup := setupDNS() - defer cleanup() - fakeClient := fakeclient.NewClient() - rr := newResourceResolver(&clusterResolverBalancer{xdsClient: fakeClient}) - rr.updateMechanisms([]DiscoveryMechanism{ - { - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - }, - { - Type: DiscoveryMechanismTypeLogicalDNS, - DNSHostname: testDNSTarget, - }, - }) - ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer ctxCancel() - gotEDSName1, err := fakeClient.WaitForWatchEDS(ctx) - if err != nil { - t.Fatalf("xdsClient.WatchCDS failed with error: %v", err) - } - if gotEDSName1 != testClusterName { - t.Fatalf("xdsClient.WatchEDS called for cluster: %v, want: %v", gotEDSName1, testClusterName) - } - select { - case target := <-dnsTargetCh: - if diff := cmp.Diff(target, resolver.Target{Scheme: "dns", URL: *testutils.MustParseURL("dns:///" + testDNSTarget)}); diff != "" { - t.Fatalf("got unexpected DNS target to watch, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for building DNS resolver") - } - - fakeClient.InvokeWatchEDSCallback(gotEDSName1, testEDSUpdates[0], nil) - // Shouldn't send update, because only one resource received an update. - shortCtx, shortCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer shortCancel() - select { - case u := <-rr.updateChannel: - t.Fatalf("get unexpected update %+v", u) - case <-shortCtx.Done(): - } - // Invoke DNS, should get an update. - dnsR.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "1.1.1.1"}, {Addr: "2.2.2.2"}}}) - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, []priorityConfig{ - { - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - }, - edsResp: testEDSUpdates[0], - childNameGen: newNameGenerator(0), - }, - { - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeLogicalDNS, - DNSHostname: testDNSTarget, - }, - addresses: []string{"1.1.1.1", "2.2.2.2"}, - childNameGen: newNameGenerator(1), - }, - }, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Close the resource resolver. Should stop EDS watch. - rr.stop() - edsNameCanceled1, err := fakeClient.WaitForCancelEDSWatch(ctx) - if err != nil { - t.Fatalf("xdsClient.CancelCDS failed with error: %v", err) - } - if edsNameCanceled1 != gotEDSName1 { - t.Fatalf("xdsClient.CancelEDS called for %v, want: %v", edsNameCanceled1, gotEDSName1) - } - select { - case <-dnsCloseCh: - case <-ctx.Done(): - t.Fatal("Timed out waiting for closing DNS resolver") - } -} - -// Test the case that covers resource changing between EDS and DNS. -func (s) TestResourceResolverChangeFromEDSToDNS(t *testing.T) { - dnsTargetCh, dnsCloseCh, _, dnsR, cleanup := setupDNS() - defer cleanup() - fakeClient := fakeclient.NewClient() - rr := newResourceResolver(&clusterResolverBalancer{xdsClient: fakeClient}) - rr.updateMechanisms([]DiscoveryMechanism{{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - }}) - ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer ctxCancel() - gotEDSName1, err := fakeClient.WaitForWatchEDS(ctx) - if err != nil { - t.Fatalf("xdsClient.WatchCDS failed with error: %v", err) - } - if gotEDSName1 != testClusterName { - t.Fatalf("xdsClient.WatchEDS called for cluster: %v, want: %v", gotEDSName1, testClusterName) - } - - // Invoke callback, should get an update. - fakeClient.InvokeWatchEDSCallback(gotEDSName1, testEDSUpdates[0], nil) - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, []priorityConfig{{ - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - }, - edsResp: testEDSUpdates[0], - childNameGen: newNameGenerator(0), - }}, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Update to watch DNS instead. Should cancel EDS, and start DNS. - rr.updateMechanisms([]DiscoveryMechanism{{ - Type: DiscoveryMechanismTypeLogicalDNS, - DNSHostname: testDNSTarget, - }}) - select { - case target := <-dnsTargetCh: - if diff := cmp.Diff(target, resolver.Target{Scheme: "dns", URL: *testutils.MustParseURL("dns:///" + testDNSTarget)}); diff != "" { - t.Fatalf("got unexpected DNS target to watch, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for building DNS resolver") - } - edsNameCanceled1, err := fakeClient.WaitForCancelEDSWatch(ctx) - if err != nil { - t.Fatalf("xdsClient.CancelCDS failed with error: %v", err) - } - if edsNameCanceled1 != gotEDSName1 { - t.Fatalf("xdsClient.CancelEDS called for %v, want: %v", edsNameCanceled1, gotEDSName1) - } - - dnsR.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "1.1.1.1"}, {Addr: "2.2.2.2"}}}) - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, []priorityConfig{{ - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeLogicalDNS, - DNSHostname: testDNSTarget, - }, - addresses: []string{"1.1.1.1", "2.2.2.2"}, - childNameGen: newNameGenerator(1), - }}, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Close the resource resolver. Should stop DNS. - rr.stop() - select { - case <-dnsCloseCh: - case <-ctx.Done(): - t.Fatal("Timed out waiting for closing DNS resolver") - } -} - -// Test the case that covers errors for both EDS and DNS. -func (s) TestResourceResolverError(t *testing.T) { - dnsTargetCh, dnsCloseCh, _, dnsR, cleanup := setupDNS() - defer cleanup() - fakeClient := fakeclient.NewClient() - rr := newResourceResolver(&clusterResolverBalancer{xdsClient: fakeClient}) - rr.updateMechanisms([]DiscoveryMechanism{ - { - Type: DiscoveryMechanismTypeEDS, - Cluster: testClusterName, - }, - { - Type: DiscoveryMechanismTypeLogicalDNS, - DNSHostname: testDNSTarget, - }, - }) - ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer ctxCancel() - gotEDSName1, err := fakeClient.WaitForWatchEDS(ctx) - if err != nil { - t.Fatalf("xdsClient.WatchCDS failed with error: %v", err) - } - if gotEDSName1 != testClusterName { - t.Fatalf("xdsClient.WatchEDS called for cluster: %v, want: %v", gotEDSName1, testClusterName) - } - select { - case target := <-dnsTargetCh: - if diff := cmp.Diff(target, resolver.Target{Scheme: "dns", URL: *testutils.MustParseURL("dns:///" + testDNSTarget)}); diff != "" { - t.Fatalf("got unexpected DNS target to watch, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for building DNS resolver") - } - - // Invoke callback with an error, should get an update. - edsErr := fmt.Errorf("EDS error") - fakeClient.InvokeWatchEDSCallback(gotEDSName1, xdsresource.EndpointsUpdate{}, edsErr) - select { - case u := <-rr.updateChannel: - if u.err != edsErr { - t.Fatalf("got unexpected error from update, want %v, got %v", edsErr, u.err) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Invoke DNS with an error, should get an update. - dnsErr := fmt.Errorf("DNS error") - dnsR.ReportError(dnsErr) - select { - case u := <-rr.updateChannel: - if u.err != dnsErr { - t.Fatalf("got unexpected error from update, want %v, got %v", dnsErr, u.err) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - - // Close the resource resolver. Should stop EDS watch. - rr.stop() - edsNameCanceled1, err := fakeClient.WaitForCancelEDSWatch(ctx) - if err != nil { - t.Fatalf("xdsClient.CancelCDS failed with error: %v", err) - } - if edsNameCanceled1 != gotEDSName1 { - t.Fatalf("xdsClient.CancelEDS called for %v, want: %v", edsNameCanceled1, gotEDSName1) - } - select { - case <-dnsCloseCh: - case <-ctx.Done(): - t.Fatal("Timed out waiting for closing DNS resolver") - } -} - -// Test re-resolve of the DNS resolver. -func (s) TestResourceResolverDNSResolveNow(t *testing.T) { - dnsTargetCh, dnsCloseCh, resolveNowCh, dnsR, cleanup := setupDNS() - defer cleanup() - fakeClient := fakeclient.NewClient() - rr := newResourceResolver(&clusterResolverBalancer{xdsClient: fakeClient}) - rr.updateMechanisms([]DiscoveryMechanism{{ - Type: DiscoveryMechanismTypeLogicalDNS, - DNSHostname: testDNSTarget, - }}) - ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer ctxCancel() - select { - case target := <-dnsTargetCh: - if diff := cmp.Diff(target, resolver.Target{Scheme: "dns", URL: *testutils.MustParseURL("dns:///" + testDNSTarget)}); diff != "" { - t.Fatalf("got unexpected DNS target to watch, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for building DNS resolver") - } - - // Invoke callback, should get an update. - dnsR.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "1.1.1.1"}, {Addr: "2.2.2.2"}}}) - select { - case u := <-rr.updateChannel: - if diff := cmp.Diff(u.priorities, []priorityConfig{{ - mechanism: DiscoveryMechanism{ - Type: DiscoveryMechanismTypeLogicalDNS, - DNSHostname: testDNSTarget, - }, - addresses: []string{"1.1.1.1", "2.2.2.2"}, - childNameGen: newNameGenerator(0), - }}, cmp.AllowUnexported(priorityConfig{}, nameGenerator{})); diff != "" { - t.Fatalf("got unexpected resource update, diff (-got, +want): %v", diff) - } - case <-ctx.Done(): - t.Fatal("Timed out waiting for update from update channel.") - } - rr.resolveNow() - select { - case <-resolveNowCh: - case <-ctx.Done(): - t.Fatal("Timed out waiting for re-resolve") - } - // Close the resource resolver. Should close the underlying resolver. - rr.stop() - select { - case <-dnsCloseCh: - case <-ctx.Done(): - t.Fatal("Timed out waiting for closing DNS resolver") - } -} From 497436cef13290bc6ea8d596ddc9b3a472d0aad3 Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Wed, 26 Apr 2023 12:56:27 -0400 Subject: [PATCH 07/60] xds/internal/balancer/outlierdetection: Change string to String (#6222) --- xds/internal/balancer/outlierdetection/balancer.go | 14 ++++++-------- .../balancer/outlierdetection/subconn_wrapper.go | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/xds/internal/balancer/outlierdetection/balancer.go b/xds/internal/balancer/outlierdetection/balancer.go index 749449c2123e..97f5503f38d1 100644 --- a/xds/internal/balancer/outlierdetection/balancer.go +++ b/xds/internal/balancer/outlierdetection/balancer.go @@ -828,8 +828,7 @@ func (b *outlierDetectionBalancer) successRateAlgorithm() { successRate := float64(bucket.numSuccesses) / float64(bucket.numSuccesses+bucket.numFailures) requiredSuccessRate := mean - stddev*(float64(ejectionCfg.StdevFactor)/1000) if successRate < requiredSuccessRate { - channelz.Infof(logger, b.channelzParentID, "SuccessRate algorithm detected outlier: %s. Parameters: successRate=%f, mean=%f, stddev=%f, requiredSuccessRate=%f", - addrInfo.string(), successRate, mean, stddev, requiredSuccessRate) + channelz.Infof(logger, b.channelzParentID, "SuccessRate algorithm detected outlier: %s. Parameters: successRate=%f, mean=%f, stddev=%f, requiredSuccessRate=%f", addrInfo, successRate, mean, stddev, requiredSuccessRate) if uint32(grpcrand.Int31n(100)) < ejectionCfg.EnforcementPercentage { b.ejectAddress(addrInfo) } @@ -856,8 +855,7 @@ func (b *outlierDetectionBalancer) failurePercentageAlgorithm() { } failurePercentage := (float64(bucket.numFailures) / float64(bucket.numSuccesses+bucket.numFailures)) * 100 if failurePercentage > float64(b.cfg.FailurePercentageEjection.Threshold) { - channelz.Infof(logger, b.channelzParentID, "FailurePercentage algorithm detected outlier: %s, failurePercentage=%f", - addrInfo.string(), failurePercentage) + channelz.Infof(logger, b.channelzParentID, "FailurePercentage algorithm detected outlier: %s, failurePercentage=%f", addrInfo, failurePercentage) if uint32(grpcrand.Int31n(100)) < ejectionCfg.EnforcementPercentage { b.ejectAddress(addrInfo) } @@ -872,7 +870,7 @@ func (b *outlierDetectionBalancer) ejectAddress(addrInfo *addressInfo) { addrInfo.ejectionTimeMultiplier++ for _, sbw := range addrInfo.sws { sbw.eject() - channelz.Infof(logger, b.channelzParentID, "Subchannel ejected: %s", sbw.string()) + channelz.Infof(logger, b.channelzParentID, "Subchannel ejected: %s", sbw) } } @@ -883,7 +881,7 @@ func (b *outlierDetectionBalancer) unejectAddress(addrInfo *addressInfo) { addrInfo.latestEjectionTimestamp = time.Time{} for _, sbw := range addrInfo.sws { sbw.uneject() - channelz.Infof(logger, b.channelzParentID, "Subchannel unejected: %s", sbw.string()) + channelz.Infof(logger, b.channelzParentID, "Subchannel unejected: %s", sbw) } } @@ -908,11 +906,11 @@ type addressInfo struct { sws []*subConnWrapper } -func (a *addressInfo) string() string { +func (a *addressInfo) String() string { var res strings.Builder res.WriteString("[") for _, sw := range a.sws { - res.WriteString(sw.string()) + res.WriteString(sw.String()) } res.WriteString("]") return res.String() diff --git a/xds/internal/balancer/outlierdetection/subconn_wrapper.go b/xds/internal/balancer/outlierdetection/subconn_wrapper.go index be631387b2f3..71a996f29ae0 100644 --- a/xds/internal/balancer/outlierdetection/subconn_wrapper.go +++ b/xds/internal/balancer/outlierdetection/subconn_wrapper.go @@ -68,6 +68,6 @@ func (scw *subConnWrapper) uneject() { }) } -func (scw *subConnWrapper) string() string { +func (scw *subConnWrapper) String() string { return fmt.Sprintf("%+v", scw.addresses) } From e853dbf004c343da4b8c6204524765ba6fbeef38 Mon Sep 17 00:00:00 2001 From: Gregory Cooke Date: Wed, 26 Apr 2023 15:05:18 -0400 Subject: [PATCH 08/60] authz: add conversion of json to RBAC Audit Logging config (#6192) Add conversion of json to RBAC Audit Logging config --- authz/rbac_translator.go | 94 ++++- authz/rbac_translator_test.go | 644 +++++++++++++++++++++++++++++++++- examples/go.mod | 2 +- examples/go.sum | 4 +- gcp/observability/go.sum | 2 +- go.mod | 2 +- go.sum | 4 +- interop/observability/go.sum | 2 +- stats/opencensus/go.sum | 2 +- 9 files changed, 740 insertions(+), 16 deletions(-) diff --git a/authz/rbac_translator.go b/authz/rbac_translator.go index b01fc2fcdb1d..6e083cfba8ea 100644 --- a/authz/rbac_translator.go +++ b/authz/rbac_translator.go @@ -28,9 +28,12 @@ import ( "fmt" "strings" + v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3rbacpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" v3matcherpb "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/structpb" ) type header struct { @@ -53,11 +56,23 @@ type rule struct { Request request } +type auditLogger struct { + Name string `json:"name"` + Config *structpb.Struct `json:"config"` + IsOptional bool `json:"is_optional"` +} + +type auditLoggingOptions struct { + AuditCondition string `json:"audit_condition"` + AuditLoggers []auditLogger `json:"audit_loggers"` +} + // Represents the SDK authorization policy provided by user. type authorizationPolicy struct { - Name string - DenyRules []rule `json:"deny_rules"` - AllowRules []rule `json:"allow_rules"` + Name string + DenyRules []rule `json:"deny_rules"` + AllowRules []rule `json:"allow_rules"` + AuditLoggingOptions auditLoggingOptions `json:"audit_logging_options"` } func principalOr(principals []*v3rbacpb.Principal) *v3rbacpb.Principal { @@ -266,6 +281,68 @@ func parseRules(rules []rule, prefixName string) (map[string]*v3rbacpb.Policy, e return policies, nil } +// Parse auditLoggingOptions to the associated RBAC protos. The single +// auditLoggingOptions results in two different parsed protos, one for the allow +// policy and one for the deny policy +func (options *auditLoggingOptions) toProtos() (allow *v3rbacpb.RBAC_AuditLoggingOptions, deny *v3rbacpb.RBAC_AuditLoggingOptions, err error) { + allow = &v3rbacpb.RBAC_AuditLoggingOptions{} + deny = &v3rbacpb.RBAC_AuditLoggingOptions{} + + if options.AuditCondition != "" { + rbacCondition, ok := v3rbacpb.RBAC_AuditLoggingOptions_AuditCondition_value[options.AuditCondition] + if !ok { + return nil, nil, fmt.Errorf("failed to parse AuditCondition %v. Allowed values {NONE, ON_DENY, ON_ALLOW, ON_DENY_AND_ALLOW}", options.AuditCondition) + } + allow.AuditCondition = v3rbacpb.RBAC_AuditLoggingOptions_AuditCondition(rbacCondition) + deny.AuditCondition = toDenyCondition(v3rbacpb.RBAC_AuditLoggingOptions_AuditCondition(rbacCondition)) + } + + for i := range options.AuditLoggers { + config := &options.AuditLoggers[i] + if config.Config == nil { + return nil, nil, fmt.Errorf("AuditLogger Config field cannot be nil") + } + customConfig, err := anypb.New(config.Config) + if err != nil { + return nil, nil, fmt.Errorf("error parsing custom audit logger config: %v", err) + } + logger := &v3corepb.TypedExtensionConfig{Name: config.Name, TypedConfig: customConfig} + rbacConfig := v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + IsOptional: config.IsOptional, + AuditLogger: logger, + } + allow.LoggerConfigs = append(allow.LoggerConfigs, &rbacConfig) + deny.LoggerConfigs = append(deny.LoggerConfigs, &rbacConfig) + } + + return allow, deny, nil +} + +// Maps the AuditCondition coming from AuditLoggingOptions to the proper +// condition for the deny policy RBAC proto +func toDenyCondition(condition v3rbacpb.RBAC_AuditLoggingOptions_AuditCondition) v3rbacpb.RBAC_AuditLoggingOptions_AuditCondition { + // Mapping the overall policy AuditCondition to what it must be for the Deny and Allow RBAC + // See gRPC A59 for details - https://github.com/grpc/proposal/pull/346/files + // |Authorization Policy |DENY RBAC |ALLOW RBAC | + // |----------------------|-------------------|---------------------| + // |NONE |NONE |NONE | + // |ON_DENY |ON_DENY |ON_DENY | + // |ON_ALLOW |NONE |ON_ALLOW | + // |ON_DENY_AND_ALLOW |ON_DENY |ON_DENY_AND_ALLOW | + switch condition { + case v3rbacpb.RBAC_AuditLoggingOptions_NONE: + return v3rbacpb.RBAC_AuditLoggingOptions_NONE + case v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY: + return v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY + case v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW: + return v3rbacpb.RBAC_AuditLoggingOptions_NONE + case v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY_AND_ALLOW: + return v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY + default: + return v3rbacpb.RBAC_AuditLoggingOptions_NONE + } +} + // translatePolicy translates SDK authorization policy in JSON format to two // Envoy RBAC polices (deny followed by allow policy) or only one Envoy RBAC // allow policy. If the input policy cannot be parsed or is invalid, an error @@ -283,6 +360,10 @@ func translatePolicy(policyStr string) ([]*v3rbacpb.RBAC, error) { if len(policy.AllowRules) == 0 { return nil, fmt.Errorf(`"allow_rules" is not present`) } + allowLogger, denyLogger, err := policy.AuditLoggingOptions.toProtos() + if err != nil { + return nil, err + } rbacs := make([]*v3rbacpb.RBAC, 0, 2) if len(policy.DenyRules) > 0 { denyPolicies, err := parseRules(policy.DenyRules, policy.Name) @@ -290,8 +371,9 @@ func translatePolicy(policyStr string) ([]*v3rbacpb.RBAC, error) { return nil, fmt.Errorf(`"deny_rules" %v`, err) } denyRBAC := &v3rbacpb.RBAC{ - Action: v3rbacpb.RBAC_DENY, - Policies: denyPolicies, + Action: v3rbacpb.RBAC_DENY, + Policies: denyPolicies, + AuditLoggingOptions: denyLogger, } rbacs = append(rbacs, denyRBAC) } @@ -299,6 +381,6 @@ func translatePolicy(policyStr string) ([]*v3rbacpb.RBAC, error) { if err != nil { return nil, fmt.Errorf(`"allow_rules" %v`, err) } - allowRBAC := &v3rbacpb.RBAC{Action: v3rbacpb.RBAC_ALLOW, Policies: allowPolicies} + allowRBAC := &v3rbacpb.RBAC{Action: v3rbacpb.RBAC_ALLOW, Policies: allowPolicies, AuditLoggingOptions: allowLogger} return append(rbacs, allowRBAC), nil } diff --git a/authz/rbac_translator_test.go b/authz/rbac_translator_test.go index e8e2f76b5ed8..b1c125a5ce00 100644 --- a/authz/rbac_translator_test.go +++ b/authz/rbac_translator_test.go @@ -24,7 +24,10 @@ import ( "github.com/google/go-cmp/cmp" "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/structpb" + v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3rbacpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" v3matcherpb "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3" @@ -42,7 +45,7 @@ func TestTranslatePolicy(t *testing.T) { "deny_rules": [ { "name": "deny_policy_1", - "source": { + "source": { "principals":[ "spiffe://foo.abc", "spiffe://bar*", @@ -117,6 +120,7 @@ func TestTranslatePolicy(t *testing.T) { }, }, }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{}, }, { Action: v3rbacpb.RBAC_ALLOW, @@ -202,6 +206,7 @@ func TestTranslatePolicy(t *testing.T) { }, }, }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{}, }, }, }, @@ -242,6 +247,553 @@ func TestTranslatePolicy(t *testing.T) { }, }, }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{}, + }, + }, + }, + "audit_logging_ALLOW empty config": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "deny_rules": [ + { + "name": "deny_policy_1", + "source": { + "principals":[ + "spiffe://foo.abc" + ] + } + }], + "audit_logging_options": { + "audit_condition": "ON_ALLOW", + "audit_loggers": [ + { + "name": "stdout_logger", + "config": {}, + "is_optional": false + } + ] + } + }`, + wantPolicies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_DENY, + Policies: map[string]*v3rbacpb.Policy{ + "authz_deny_policy_1": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "spiffe://foo.abc"}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + IsOptional: false, + }, + }, + }, + }, + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "authz_allow_authenticated": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_SafeRegex{SafeRegex: &v3matcherpb.RegexMatcher{Regex: ".+"}}, + }}, + }}, + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: ""}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + IsOptional: false, + }, + }, + }, + }, + }, + }, + "audit_logging_DENY_AND_ALLOW": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "deny_rules": [ + { + "name": "deny_policy_1", + "source": { + "principals":[ + "spiffe://foo.abc" + ] + } + }], + "audit_logging_options": { + "audit_condition": "ON_DENY_AND_ALLOW", + "audit_loggers": [ + { + "name": "stdout_logger", + "config": {}, + "is_optional": false + } + ] + } + }`, + wantPolicies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_DENY, + Policies: map[string]*v3rbacpb.Policy{ + "authz_deny_policy_1": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "spiffe://foo.abc"}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + IsOptional: false, + }, + }, + }, + }, + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "authz_allow_authenticated": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_SafeRegex{SafeRegex: &v3matcherpb.RegexMatcher{Regex: ".+"}}, + }}, + }}, + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: ""}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY_AND_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + IsOptional: false, + }, + }, + }, + }, + }, + }, + "audit_logging_NONE": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "deny_rules": [ + { + "name": "deny_policy_1", + "source": { + "principals":[ + "spiffe://foo.abc" + ] + } + }], + "audit_logging_options": { + "audit_condition": "NONE", + "audit_loggers": [ + { + "name": "stdout_logger", + "config": {}, + "is_optional": false + } + ] + } + }`, + wantPolicies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_DENY, + Policies: map[string]*v3rbacpb.Policy{ + "authz_deny_policy_1": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "spiffe://foo.abc"}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + IsOptional: false, + }, + }, + }, + }, + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "authz_allow_authenticated": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_SafeRegex{SafeRegex: &v3matcherpb.RegexMatcher{Regex: ".+"}}, + }}, + }}, + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: ""}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + IsOptional: false, + }, + }, + }, + }, + }, + }, + "audit_logging_custom_config simple": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "deny_rules": [ + { + "name": "deny_policy_1", + "source": { + "principals":[ + "spiffe://foo.abc" + ] + } + }], + "audit_logging_options": { + "audit_condition": "NONE", + "audit_loggers": [ + { + "name": "stdout_logger", + "config": {"abc":123, "xyz":"123"}, + "is_optional": false + } + ] + } + }`, + wantPolicies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_DENY, + Policies: map[string]*v3rbacpb.Policy{ + "authz_deny_policy_1": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "spiffe://foo.abc"}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{"abc": 123, "xyz": "123"})}, + IsOptional: false, + }, + }, + }, + }, + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "authz_allow_authenticated": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_SafeRegex{SafeRegex: &v3matcherpb.RegexMatcher{Regex: ".+"}}, + }}, + }}, + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: ""}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{"abc": 123, "xyz": "123"})}, + IsOptional: false, + }, + }, + }, + }, + }, + }, + "audit_logging_custom_config nested": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "audit_logging_options": { + "audit_condition": "NONE", + "audit_loggers": [ + { + "name": "stdout_logger", + "config": {"abc":123, "xyz":{"abc":123}}, + "is_optional": false + } + ] + } + }`, + wantPolicies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "authz_allow_authenticated": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_SafeRegex{SafeRegex: &v3matcherpb.RegexMatcher{Regex: ".+"}}, + }}, + }}, + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: ""}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{"abc": 123, "xyz": map[string]interface{}{"abc": 123}})}, + IsOptional: false, + }, + }, + }, + }, + }, + }, + "missing audit logger config": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "audit_logging_options": { + "audit_condition": "NONE" + } + }`, + wantPolicies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "authz_allow_authenticated": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_SafeRegex{SafeRegex: &v3matcherpb.RegexMatcher{Regex: ".+"}}, + }}, + }}, + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: ""}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{}, + }, + }, + }, + }, + "missing audit condition": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "audit_logging_options": { + "audit_loggers": [ + { + "name": "stdout_logger", + "config": {}, + "is_optional": false + } + ] + } + }`, + wantPolicies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "authz_allow_authenticated": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_SafeRegex{SafeRegex: &v3matcherpb.RegexMatcher{Regex: ".+"}}, + }}, + }}, + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: ""}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + IsOptional: false, + }, + }, + }, }, }, }, @@ -298,6 +850,83 @@ func TestTranslatePolicy(t *testing.T) { }`, wantErr: `"allow_rules" 0: "headers" 0: unsupported "key" :method`, }, + "bad audit condition": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "audit_logging_options": { + "audit_condition": "ABC", + "audit_loggers": [ + { + "name": "stdout_logger", + "config": {}, + "is_optional": false + } + ] + } + }`, + wantErr: `failed to parse AuditCondition ABC`, + }, + "bad audit logger config": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "audit_logging_options": { + "audit_condition": "NONE", + "audit_loggers": [ + { + "name": "stdout_logger", + "config": "abc", + "is_optional": false + } + ] + } + }`, + wantErr: `failed to unmarshal policy`, + }, + "missing custom config audit logger": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "deny_rules": [ + { + "name": "deny_policy_1", + "source": { + "principals":[ + "spiffe://foo.abc" + ] + } + }], + "audit_logging_options": { + "audit_condition": "ON_DENY", + "audit_loggers": [ + { + "name": "stdout_logger", + "is_optional": false + } + ] + } + }`, + wantErr: "AuditLogger Config field cannot be nil", + }, } for name, test := range tests { t.Run(name, func(t *testing.T) { @@ -311,3 +940,16 @@ func TestTranslatePolicy(t *testing.T) { }) } } + +func anyPbHelper(t *testing.T, in map[string]interface{}) *anypb.Any { + t.Helper() + pb, err := structpb.NewStruct(in) + if err != nil { + t.Fatal(err) + } + ret, err := anypb.New(pb) + if err != nil { + t.Fatal(err) + } + return ret +} diff --git a/examples/go.mod b/examples/go.mod index 7f79ab2b7e87..7e49c3bce4c9 100644 --- a/examples/go.mod +++ b/examples/go.mod @@ -17,7 +17,7 @@ require ( github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe // indirect - github.com/envoyproxy/go-control-plane v0.11.0 // indirect + github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596 // indirect github.com/envoyproxy/protoc-gen-validate v0.10.1 // indirect golang.org/x/net v0.9.0 // indirect golang.org/x/sys v0.7.0 // indirect diff --git a/examples/go.sum b/examples/go.sum index edbb05e00946..8006bf69fef6 100644 --- a/examples/go.sum +++ b/examples/go.sum @@ -636,8 +636,8 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJJM//w9BV6Fxbg2LuVd34= -github.com/envoyproxy/go-control-plane v0.11.0 h1:jtLewhRR2vMRNnq2ZZUoCjUlgut+Y0+sDDWPOfwOi1o= -github.com/envoyproxy/go-control-plane v0.11.0/go.mod h1:VnHyVMpzcLvCFt9yUz1UnCwHLhwx1WguiVDV7pTG/tI= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596 h1:MDgbDqe1rWfGBa+yCcthuqDSHvXFyenZI1U7f1IbWI8= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= diff --git a/gcp/observability/go.sum b/gcp/observability/go.sum index a629a01ac913..bb5535fab90b 100644 --- a/gcp/observability/go.sum +++ b/gcp/observability/go.sum @@ -647,7 +647,7 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJJM//w9BV6Fxbg2LuVd34= -github.com/envoyproxy/go-control-plane v0.11.0/go.mod h1:VnHyVMpzcLvCFt9yUz1UnCwHLhwx1WguiVDV7pTG/tI= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= diff --git a/go.mod b/go.mod index 5707e6e7ef71..75ea83d9309c 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/cespare/xxhash/v2 v2.2.0 github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195 - github.com/envoyproxy/go-control-plane v0.11.0 + github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596 github.com/golang/glog v1.1.0 github.com/golang/protobuf v1.5.3 github.com/google/go-cmp v0.5.9 diff --git a/go.sum b/go.sum index 925f1485b1dd..bd4e7e729e2d 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,8 @@ github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195 h1:58f1tJ1ra+zFINPlwLW github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.11.0 h1:jtLewhRR2vMRNnq2ZZUoCjUlgut+Y0+sDDWPOfwOi1o= -github.com/envoyproxy/go-control-plane v0.11.0/go.mod h1:VnHyVMpzcLvCFt9yUz1UnCwHLhwx1WguiVDV7pTG/tI= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596 h1:MDgbDqe1rWfGBa+yCcthuqDSHvXFyenZI1U7f1IbWI8= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.10.1 h1:c0g45+xCJhdgFGw7a5QAfdS4byAbud7miNWJ1WwEVf8= github.com/envoyproxy/protoc-gen-validate v0.10.1/go.mod h1:DRjgyB0I43LtJapqN6NiRwroiAU2PaFuvk/vjgh61ss= diff --git a/interop/observability/go.sum b/interop/observability/go.sum index 6c75fdb93aea..4a52d183476d 100644 --- a/interop/observability/go.sum +++ b/interop/observability/go.sum @@ -647,7 +647,7 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJJM//w9BV6Fxbg2LuVd34= -github.com/envoyproxy/go-control-plane v0.11.0/go.mod h1:VnHyVMpzcLvCFt9yUz1UnCwHLhwx1WguiVDV7pTG/tI= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= diff --git a/stats/opencensus/go.sum b/stats/opencensus/go.sum index 2dc115e4a27d..43f540fb5667 100644 --- a/stats/opencensus/go.sum +++ b/stats/opencensus/go.sum @@ -630,7 +630,7 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJJM//w9BV6Fxbg2LuVd34= -github.com/envoyproxy/go-control-plane v0.11.0/go.mod h1:VnHyVMpzcLvCFt9yUz1UnCwHLhwx1WguiVDV7pTG/tI= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= From da1a5eb25d2a3be62c5419c7004f9c0d669ba8bf Mon Sep 17 00:00:00 2001 From: Arvind Bright Date: Wed, 26 Apr 2023 16:58:00 -0700 Subject: [PATCH 09/60] tests: nix TestClientDoesntDeadlockWhileWritingErroneousLargeMessages (#6227) --- test/end2end_test.go | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/test/end2end_test.go b/test/end2end_test.go index 42dbc1f73e65..824d7c56c041 100644 --- a/test/end2end_test.go +++ b/test/end2end_test.go @@ -5281,49 +5281,6 @@ func (s) TestStatusInvalidUTF8Details(t *testing.T) { } } -func (s) TestClientDoesntDeadlockWhileWritingErrornousLargeMessages(t *testing.T) { - for _, e := range listTestEnv() { - if e.httpHandler { - continue - } - testClientDoesntDeadlockWhileWritingErrornousLargeMessages(t, e) - } -} - -func testClientDoesntDeadlockWhileWritingErrornousLargeMessages(t *testing.T, e env) { - te := newTest(t, e) - te.userAgent = testAppUA - smallSize := 1024 - te.maxServerReceiveMsgSize = &smallSize - te.startServer(&testServer{security: e.security}) - defer te.tearDown() - tc := testgrpc.NewTestServiceClient(te.clientConn()) - payload, err := newPayload(testpb.PayloadType_COMPRESSABLE, 1048576) - if err != nil { - t.Fatal(err) - } - req := &testpb.SimpleRequest{ - ResponseType: testpb.PayloadType_COMPRESSABLE, - Payload: payload, - } - var wg sync.WaitGroup - for i := 0; i < 10; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for j := 0; j < 100; j++ { - ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Second*10)) - defer cancel() - if _, err := tc.UnaryCall(ctx, req); status.Code(err) != codes.ResourceExhausted { - t.Errorf("TestService/UnaryCall(_,_) = _. %v, want code: %s", err, codes.ResourceExhausted) - return - } - } - }() - } - wg.Wait() -} - func (s) TestRPCTimeout(t *testing.T) { for _, e := range listTestEnv() { testRPCTimeout(t, e) From df82147145ba8af646bdd9ba2fdd06db0b2d798c Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Fri, 28 Apr 2023 17:05:41 -0400 Subject: [PATCH 10/60] internal: Document gcp/observability 1.0 dependencies in /internal (#6229) --- internal/binarylog/binarylog.go | 3 +++ internal/binarylog/method_logger.go | 9 +++++++++ internal/envconfig/observability.go | 6 ++++++ internal/internal.go | 24 ++++++++++++++++++++++++ 4 files changed, 42 insertions(+) diff --git a/internal/binarylog/binarylog.go b/internal/binarylog/binarylog.go index af03a40d990b..755fdebc1b15 100644 --- a/internal/binarylog/binarylog.go +++ b/internal/binarylog/binarylog.go @@ -32,6 +32,9 @@ var grpclogLogger = grpclog.Component("binarylog") // Logger specifies MethodLoggers for method names with a Log call that // takes a context. +// +// This is used in the 1.0 release of gcp/observability, and thus must not be +// deleted or changed. type Logger interface { GetMethodLogger(methodName string) MethodLogger } diff --git a/internal/binarylog/method_logger.go b/internal/binarylog/method_logger.go index 56fcf008d3de..6c3f632215fd 100644 --- a/internal/binarylog/method_logger.go +++ b/internal/binarylog/method_logger.go @@ -49,6 +49,9 @@ func (g *callIDGenerator) reset() { var idGen callIDGenerator // MethodLogger is the sub-logger for each method. +// +// This is used in the 1.0 release of gcp/observability, and thus must not be +// deleted or changed. type MethodLogger interface { Log(context.Context, LogEntryConfig) } @@ -65,6 +68,9 @@ type TruncatingMethodLogger struct { } // NewTruncatingMethodLogger returns a new truncating method logger. +// +// This is used in the 1.0 release of gcp/observability, and thus must not be +// deleted or changed. func NewTruncatingMethodLogger(h, m uint64) *TruncatingMethodLogger { return &TruncatingMethodLogger{ headerMaxLen: h, @@ -145,6 +151,9 @@ func (ml *TruncatingMethodLogger) truncateMessage(msgPb *binlogpb.Message) (trun } // LogEntryConfig represents the configuration for binary log entry. +// +// This is used in the 1.0 release of gcp/observability, and thus must not be +// deleted or changed. type LogEntryConfig interface { toProto() *binlogpb.GrpcLogEntry } diff --git a/internal/envconfig/observability.go b/internal/envconfig/observability.go index 821dd0a7c198..dd314cfb18f4 100644 --- a/internal/envconfig/observability.go +++ b/internal/envconfig/observability.go @@ -28,9 +28,15 @@ const ( var ( // ObservabilityConfig is the json configuration for the gcp/observability // package specified directly in the envObservabilityConfig env var. + // + // This is used in the 1.0 release of gcp/observability, and thus must not be + // deleted or changed. ObservabilityConfig = os.Getenv(envObservabilityConfig) // ObservabilityConfigFile is the json configuration for the // gcp/observability specified in a file with the location specified in // envObservabilityConfigFile env var. + // + // This is used in the 1.0 release of gcp/observability, and thus must not be + // deleted or changed. ObservabilityConfigFile = os.Getenv(envObservabilityConfigFile) ) diff --git a/internal/internal.go b/internal/internal.go index 836b6a3b3e78..42ff39c84446 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -60,6 +60,9 @@ var ( GetServerCredentials interface{} // func (*grpc.Server) credentials.TransportCredentials // CanonicalString returns the canonical string of the code defined here: // https://github.com/grpc/grpc/blob/master/doc/statuscodes.md. + // + // This is used in the 1.0 release of gcp/observability, and thus must not be + // deleted or changed. CanonicalString interface{} // func (codes.Code) string // DrainServerTransports initiates a graceful close of existing connections // on a gRPC server accepted on the provided listener address. An @@ -69,20 +72,35 @@ var ( // AddGlobalServerOptions adds an array of ServerOption that will be // effective globally for newly created servers. The priority will be: 1. // user-provided; 2. this method; 3. default values. + // + // This is used in the 1.0 release of gcp/observability, and thus must not be + // deleted or changed. AddGlobalServerOptions interface{} // func(opt ...ServerOption) // ClearGlobalServerOptions clears the array of extra ServerOption. This // method is useful in testing and benchmarking. + // + // This is used in the 1.0 release of gcp/observability, and thus must not be + // deleted or changed. ClearGlobalServerOptions func() // AddGlobalDialOptions adds an array of DialOption that will be effective // globally for newly created client channels. The priority will be: 1. // user-provided; 2. this method; 3. default values. + // + // This is used in the 1.0 release of gcp/observability, and thus must not be + // deleted or changed. AddGlobalDialOptions interface{} // func(opt ...DialOption) // DisableGlobalDialOptions returns a DialOption that prevents the // ClientConn from applying the global DialOptions (set via // AddGlobalDialOptions). + // + // This is used in the 1.0 release of gcp/observability, and thus must not be + // deleted or changed. DisableGlobalDialOptions interface{} // func() grpc.DialOption // ClearGlobalDialOptions clears the array of extra DialOption. This // method is useful in testing and benchmarking. + // + // This is used in the 1.0 release of gcp/observability, and thus must not be + // deleted or changed. ClearGlobalDialOptions func() // JoinDialOptions combines the dial options passed as arguments into a // single dial option. @@ -93,9 +111,15 @@ var ( // WithBinaryLogger returns a DialOption that specifies the binary logger // for a ClientConn. + // + // This is used in the 1.0 release of gcp/observability, and thus must not be + // deleted or changed. WithBinaryLogger interface{} // func(binarylog.Logger) grpc.DialOption // BinaryLogger returns a ServerOption that can set the binary logger for a // server. + // + // This is used in the 1.0 release of gcp/observability, and thus must not be + // deleted or changed. BinaryLogger interface{} // func(binarylog.Logger) grpc.ServerOption // NewXDSResolverWithConfigForTesting creates a new xds resolver builder using From cf89a0b9310ad47c06e119238748edeb74c15f07 Mon Sep 17 00:00:00 2001 From: Gregory Cooke Date: Mon, 1 May 2023 14:37:26 -0400 Subject: [PATCH 11/60] authz: Swap to using the correct TypedConfig in audit logger parsing (#6235) Swap audit logger parsing to using the correct TypedConfig representation --- authz/rbac_translator.go | 12 +++++++++++- authz/rbac_translator_test.go | 31 ++++++++++++++++++------------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/authz/rbac_translator.go b/authz/rbac_translator.go index 6e083cfba8ea..c4fa996e5995 100644 --- a/authz/rbac_translator.go +++ b/authz/rbac_translator.go @@ -28,6 +28,7 @@ import ( "fmt" "strings" + v1typepb "github.com/cncf/xds/go/udpa/type/v1" v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3rbacpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" @@ -36,6 +37,10 @@ import ( "google.golang.org/protobuf/types/known/structpb" ) +// This is used when converting a custom config from raw JSON to a TypedStruct +// The TypeURL of the TypeStruct will be "grpc.authz.audit_logging/" +const typedURLPrefix = "grpc.authz.audit_logging/" + type header struct { Key string Values []string @@ -302,10 +307,15 @@ func (options *auditLoggingOptions) toProtos() (allow *v3rbacpb.RBAC_AuditLoggin if config.Config == nil { return nil, nil, fmt.Errorf("AuditLogger Config field cannot be nil") } - customConfig, err := anypb.New(config.Config) + typedStruct := &v1typepb.TypedStruct{ + TypeUrl: typedURLPrefix + config.Name, + Value: config.Config, + } + customConfig, err := anypb.New(typedStruct) if err != nil { return nil, nil, fmt.Errorf("error parsing custom audit logger config: %v", err) } + logger := &v3corepb.TypedExtensionConfig{Name: config.Name, TypedConfig: customConfig} rbacConfig := v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ IsOptional: config.IsOptional, diff --git a/authz/rbac_translator_test.go b/authz/rbac_translator_test.go index b1c125a5ce00..fff492cff7ab 100644 --- a/authz/rbac_translator_test.go +++ b/authz/rbac_translator_test.go @@ -22,6 +22,7 @@ import ( "strings" "testing" + v1typepb "github.com/cncf/xds/go/udpa/type/v1" "github.com/google/go-cmp/cmp" "google.golang.org/protobuf/testing/protocmp" "google.golang.org/protobuf/types/known/anypb" @@ -305,7 +306,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ - {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{}, "stdout_logger")}, IsOptional: false, }, }, @@ -339,7 +340,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ - {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{}, "stdout_logger")}, IsOptional: false, }, }, @@ -401,7 +402,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY, LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ - {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{}, "stdout_logger")}, IsOptional: false, }, }, @@ -435,7 +436,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY_AND_ALLOW, LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ - {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{}, "stdout_logger")}, IsOptional: false, }, }, @@ -497,7 +498,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ - {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{}, "stdout_logger")}, IsOptional: false, }, }, @@ -531,7 +532,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ - {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{}, "stdout_logger")}, IsOptional: false, }, }, @@ -593,7 +594,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ - {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{"abc": 123, "xyz": "123"})}, + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{"abc": 123, "xyz": "123"}, "stdout_logger")}, IsOptional: false, }, }, @@ -627,7 +628,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ - {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{"abc": 123, "xyz": "123"})}, + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{"abc": 123, "xyz": "123"}, "stdout_logger")}, IsOptional: false, }, }, @@ -685,7 +686,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ - {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{"abc": 123, "xyz": map[string]interface{}{"abc": 123}})}, + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{"abc": 123, "xyz": map[string]interface{}{"abc": 123}}, "stdout_logger")}, IsOptional: false, }, }, @@ -789,7 +790,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ - {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{})}, + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{}, "stdout_logger")}, IsOptional: false, }, }, @@ -941,15 +942,19 @@ func TestTranslatePolicy(t *testing.T) { } } -func anyPbHelper(t *testing.T, in map[string]interface{}) *anypb.Any { +func anyPbHelper(t *testing.T, in map[string]interface{}, name string) *anypb.Any { t.Helper() pb, err := structpb.NewStruct(in) + typedStruct := &v1typepb.TypedStruct{ + TypeUrl: typedURLPrefix + name, + Value: pb, + } if err != nil { t.Fatal(err) } - ret, err := anypb.New(pb) + customConfig, err := anypb.New(typedStruct) if err != nil { t.Fatal(err) } - return ret + return customConfig } From 019acf2e94863061694848dcc819a911399efac5 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 1 May 2023 14:11:23 -0700 Subject: [PATCH 12/60] stubserver: add option for allowing more services to be registered (#6240) --- internal/stubserver/stubserver.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/internal/stubserver/stubserver.go b/internal/stubserver/stubserver.go index 94ef56482e18..3c89ff6823bd 100644 --- a/internal/stubserver/stubserver.go +++ b/internal/stubserver/stubserver.go @@ -93,6 +93,19 @@ func (ss *StubServer) Start(sopts []grpc.ServerOption, dopts ...grpc.DialOption) return nil } +type registerServiceServerOption struct { + grpc.EmptyServerOption + f func(*grpc.Server) +} + +// RegisterServiceServerOption returns a ServerOption that will run f() in +// Start or StartServer with the grpc.Server created before serving. This +// allows other services to be registered on the test server (e.g. ORCA, +// health, or reflection). +func RegisterServiceServerOption(f func(*grpc.Server)) grpc.ServerOption { + return ®isterServiceServerOption{f: f} +} + // StartServer only starts the server. It does not create a client to it. func (ss *StubServer) StartServer(sopts ...grpc.ServerOption) error { if ss.Network == "" { @@ -113,6 +126,13 @@ func (ss *StubServer) StartServer(sopts ...grpc.ServerOption) error { ss.cleanups = append(ss.cleanups, func() { lis.Close() }) s := grpc.NewServer(sopts...) + for _, so := range sopts { + switch x := so.(type) { + case *registerServiceServerOption: + x.f(s) + } + } + testgrpc.RegisterTestServiceServer(s, ss) go s.Serve(lis) ss.cleanups = append(ss.cleanups, s.Stop) From b15382715d129e480e435af6d78048b91aecc95a Mon Sep 17 00:00:00 2001 From: Easwar Swaminathan Date: Mon, 1 May 2023 14:14:32 -0700 Subject: [PATCH 13/60] xds: make glaze happy for test packages (#6237) --- .../xdsclient/xdslbregistry/{test => tests}/converter_test.go | 4 ++-- .../xdsresource/{test => tests}/unmarshal_cds_test.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) rename xds/internal/xdsclient/xdslbregistry/{test => tests}/converter_test.go (99%) rename xds/internal/xdsclient/xdsresource/{test => tests}/unmarshal_cds_test.go (99%) diff --git a/xds/internal/xdsclient/xdslbregistry/test/converter_test.go b/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go similarity index 99% rename from xds/internal/xdsclient/xdslbregistry/test/converter_test.go rename to xds/internal/xdsclient/xdslbregistry/tests/converter_test.go index 7f31d68f1f8d..2ffbad845f8b 100644 --- a/xds/internal/xdsclient/xdslbregistry/test/converter_test.go +++ b/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go @@ -16,8 +16,8 @@ * */ -// Package test contains test cases for the xDS LB Policy Registry. -package test +// Package tests_test contains test cases for the xDS LB Policy Registry. +package tests_test import ( "encoding/json" diff --git a/xds/internal/xdsclient/xdsresource/test/unmarshal_cds_test.go b/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go similarity index 99% rename from xds/internal/xdsclient/xdsresource/test/unmarshal_cds_test.go rename to xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go index 3f4c226d74d3..dc4e8591f827 100644 --- a/xds/internal/xdsclient/xdsresource/test/unmarshal_cds_test.go +++ b/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go @@ -16,8 +16,8 @@ * */ -// Package test contains test cases for unmarshalling of CDS resources. -package test +// Package tests_test contains test cases for unmarshalling of CDS resources. +package tests_test import ( "encoding/json" From 21a339ce4a7d5ff33da438ffc399aeb2ce14da4f Mon Sep 17 00:00:00 2001 From: Easwar Swaminathan Date: Mon, 1 May 2023 16:50:35 -0700 Subject: [PATCH 14/60] grpc: handle RemoveSubConn inline in balancerWrapper (#6228) --- balancer_conn_wrappers.go | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/balancer_conn_wrappers.go b/balancer_conn_wrappers.go index eeaf5beb72a6..978ed69fdf75 100644 --- a/balancer_conn_wrappers.go +++ b/balancer_conn_wrappers.go @@ -102,10 +102,6 @@ type switchToUpdate struct { name string } -type subConnUpdate struct { - acbw *acBalancerWrapper -} - // watcher is a long-running goroutine which reads updates from a channel and // invokes corresponding methods on the underlying balancer. It ensures that // these methods are invoked in a synchronous fashion. It also ensures that @@ -132,8 +128,6 @@ func (ccb *ccBalancerWrapper) watcher() { ccb.handleResolverError(update.err) case *switchToUpdate: ccb.handleSwitchTo(update.name) - case *subConnUpdate: - ccb.handleRemoveSubConn(update.acbw) default: logger.Errorf("ccBalancerWrapper.watcher: unknown update %+v, type %T", update, update) } @@ -289,14 +283,6 @@ func (ccb *ccBalancerWrapper) handleSwitchTo(name string) { ccb.curBalancerName = builder.Name() } -// handleRemoveSucConn handles a request from the underlying balancer to remove -// a subConn. -// -// See comments in RemoveSubConn() for more details. -func (ccb *ccBalancerWrapper) handleRemoveSubConn(acbw *acBalancerWrapper) { - ccb.cc.removeAddrConn(acbw.getAddrConn(), errConnDrain) -} - func (ccb *ccBalancerWrapper) close() { ccb.closed.Fire() <-ccb.done.Done() @@ -326,21 +312,11 @@ func (ccb *ccBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer } func (ccb *ccBalancerWrapper) RemoveSubConn(sc balancer.SubConn) { - // Before we switched the ccBalancerWrapper to use gracefulswitch.Balancer, it - // was required to handle the RemoveSubConn() method asynchronously by pushing - // the update onto the update channel. This was done to avoid a deadlock as - // switchBalancer() was holding cc.mu when calling Close() on the old - // balancer, which would in turn call RemoveSubConn(). - // - // With the use of gracefulswitch.Balancer in ccBalancerWrapper, handling this - // asynchronously is probably not required anymore since the switchTo() method - // handles the balancer switch by pushing the update onto the channel. - // TODO(easwars): Handle this inline. acbw, ok := sc.(*acBalancerWrapper) if !ok { return } - ccb.updateCh.Put(&subConnUpdate{acbw: acbw}) + ccb.cc.removeAddrConn(acbw.getAddrConn(), errConnDrain) } func (ccb *ccBalancerWrapper) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) { From 713bd04130a0d4b796d28d0ee987071f182dd06d Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 1 May 2023 17:03:11 -0700 Subject: [PATCH 15/60] orca: minor cleanups (#6239) --- orca/call_metric_recorder_test.go | 17 ++++++-------- orca/internal/internal.go | 39 ++++++++++++++++++++++++++++++- orca/orca.go | 35 +++++++-------------------- orca/orca_test.go | 34 +++++++++++++++------------ orca/service.go | 2 +- 5 files changed, 73 insertions(+), 54 deletions(-) diff --git a/orca/call_metric_recorder_test.go b/orca/call_metric_recorder_test.go index 25d4af371d08..43d0e45291e2 100644 --- a/orca/call_metric_recorder_test.go +++ b/orca/call_metric_recorder_test.go @@ -34,6 +34,7 @@ import ( "google.golang.org/grpc/internal/stubserver" "google.golang.org/grpc/metadata" "google.golang.org/grpc/orca" + "google.golang.org/grpc/orca/internal" v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" testgrpc "google.golang.org/grpc/interop/grpc_testing" @@ -58,7 +59,6 @@ func (s) TestE2ECallMetricsUnary(t *testing.T) { desc string injectMetrics bool wantProto *v3orcapb.OrcaLoadReport - wantErr error }{ { desc: "with custom backend metrics", @@ -73,7 +73,6 @@ func (s) TestE2ECallMetricsUnary(t *testing.T) { { desc: "with no custom backend metrics", injectMetrics: false, - wantErr: orca.ErrLoadReportMissing, }, } @@ -146,9 +145,9 @@ func (s) TestE2ECallMetricsUnary(t *testing.T) { t.Fatalf("EmptyCall failed: %v", err) } - gotProto, err := orca.ToLoadReport(trailer) - if test.wantErr != nil && !errors.Is(err, test.wantErr) { - t.Fatalf("When retrieving load report, got error: %v, want: %v", err, orca.ErrLoadReportMissing) + gotProto, err := internal.ToLoadReport(trailer) + if err != nil { + t.Fatalf("When retrieving load report, got error: %v, want: ", err) } if test.wantProto != nil && !cmp.Equal(gotProto, test.wantProto, cmp.Comparer(proto.Equal)) { t.Fatalf("Received load report in trailer: %s, want: %s", pretty.ToJSON(gotProto), pretty.ToJSON(test.wantProto)) @@ -165,7 +164,6 @@ func (s) TestE2ECallMetricsStreaming(t *testing.T) { desc string injectMetrics bool wantProto *v3orcapb.OrcaLoadReport - wantErr error }{ { desc: "with custom backend metrics", @@ -180,7 +178,6 @@ func (s) TestE2ECallMetricsStreaming(t *testing.T) { { desc: "with no custom backend metrics", injectMetrics: false, - wantErr: orca.ErrLoadReportMissing, }, } @@ -288,9 +285,9 @@ func (s) TestE2ECallMetricsStreaming(t *testing.T) { } } - gotProto, err := orca.ToLoadReport(stream.Trailer()) - if test.wantErr != nil && !errors.Is(err, test.wantErr) { - t.Fatalf("When retrieving load report, got error: %v, want: %v", err, orca.ErrLoadReportMissing) + gotProto, err := internal.ToLoadReport(stream.Trailer()) + if err != nil { + t.Fatalf("When retrieving load report, got error: %v, want: ", err) } if test.wantProto != nil && !cmp.Equal(gotProto, test.wantProto, cmp.Comparer(proto.Equal)) { t.Fatalf("Received load report in trailer: %s, want: %s", pretty.ToJSON(gotProto), pretty.ToJSON(test.wantProto)) diff --git a/orca/internal/internal.go b/orca/internal/internal.go index 865d94d86945..35b899d9e877 100644 --- a/orca/internal/internal.go +++ b/orca/internal/internal.go @@ -20,7 +20,16 @@ // avoid polluting the godoc of the top-level orca package. package internal -import ibackoff "google.golang.org/grpc/internal/backoff" +import ( + "errors" + "fmt" + + ibackoff "google.golang.org/grpc/internal/backoff" + "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/proto" + + v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" +) // AllowAnyMinReportingInterval prevents clamping of the MinReportingInterval // configured via ServiceOptions, to a minimum of 30s. @@ -32,3 +41,31 @@ var AllowAnyMinReportingInterval interface{} // func(*ServiceOptions) // // For testing purposes only. var DefaultBackoffFunc = ibackoff.DefaultExponential.Backoff + +// TrailerMetadataKey is the key in which the per-call backend metrics are +// transmitted. +const TrailerMetadataKey = "endpoint-load-metrics-bin" + +// ToLoadReport unmarshals a binary encoded [ORCA LoadReport] protobuf message +// from md and returns the corresponding struct. The load report is expected to +// be stored as the value for key "endpoint-load-metrics-bin". +// +// If no load report was found in the provided metadata, if multiple load +// reports are found, or if the load report found cannot be parsed, an error is +// returned. +// +// [ORCA LoadReport]: (https://github.com/cncf/xds/blob/main/xds/data/orca/v3/orca_load_report.proto#L15) +func ToLoadReport(md metadata.MD) (*v3orcapb.OrcaLoadReport, error) { + vs := md.Get(TrailerMetadataKey) + if len(vs) == 0 { + return nil, nil + } + if len(vs) != 1 { + return nil, errors.New("multiple orca load reports found in provided metadata") + } + ret := new(v3orcapb.OrcaLoadReport) + if err := proto.Unmarshal([]byte(vs[0]), ret); err != nil { + return nil, fmt.Errorf("failed to unmarshal load report found in metadata: %v", err) + } + return ret, nil +} diff --git a/orca/orca.go b/orca/orca.go index bacc4a89ab0b..2c958b6902e9 100644 --- a/orca/orca.go +++ b/orca/orca.go @@ -29,21 +29,19 @@ package orca import ( "context" "errors" - "fmt" "google.golang.org/grpc" "google.golang.org/grpc/grpclog" - "google.golang.org/grpc/internal" + igrpc "google.golang.org/grpc/internal" "google.golang.org/grpc/internal/balancerload" "google.golang.org/grpc/metadata" + "google.golang.org/grpc/orca/internal" "google.golang.org/protobuf/proto" - - v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" ) var ( logger = grpclog.Component("orca-backend-metrics") - joinServerOptions = internal.JoinServerOptions.(func(...grpc.ServerOption) grpc.ServerOption) + joinServerOptions = igrpc.JoinServerOptions.(func(...grpc.ServerOption) grpc.ServerOption) ) const trailerMetadataKey = "endpoint-load-metrics-bin" @@ -144,26 +142,6 @@ func (w *wrappedStream) Context() context.Context { // ErrLoadReportMissing indicates no ORCA load report was found in trailers. var ErrLoadReportMissing = errors.New("orca load report missing in provided metadata") -// ToLoadReport unmarshals a binary encoded [ORCA LoadReport] protobuf message -// from md and returns the corresponding struct. The load report is expected to -// be stored as the value for key "endpoint-load-metrics-bin". -// -// If no load report was found in the provided metadata, ErrLoadReportMissing is -// returned. -// -// [ORCA LoadReport]: (https://github.com/cncf/xds/blob/main/xds/data/orca/v3/orca_load_report.proto#L15) -func ToLoadReport(md metadata.MD) (*v3orcapb.OrcaLoadReport, error) { - vs := md.Get(trailerMetadataKey) - if len(vs) == 0 { - return nil, ErrLoadReportMissing - } - ret := new(v3orcapb.OrcaLoadReport) - if err := proto.Unmarshal([]byte(vs[0]), ret); err != nil { - return nil, fmt.Errorf("failed to unmarshal load report found in metadata: %v", err) - } - return ret, nil -} - // loadParser implements the Parser interface defined in `internal/balancerload` // package. This interface is used by the client stream to parse load reports // sent by the server in trailer metadata. The parsed loads are then sent to @@ -174,9 +152,12 @@ func ToLoadReport(md metadata.MD) (*v3orcapb.OrcaLoadReport, error) { type loadParser struct{} func (loadParser) Parse(md metadata.MD) interface{} { - lr, err := ToLoadReport(md) + lr, err := internal.ToLoadReport(md) if err != nil { - logger.Errorf("Parse(%v) failed: %v", err) + logger.Infof("Parse failed: %v", err) + } + if lr == nil && logger.V(2) { + logger.Infof("Missing ORCA load report data") } return lr } diff --git a/orca/orca_test.go b/orca/orca_test.go index fd356cfba437..096b54907148 100644 --- a/orca/orca_test.go +++ b/orca/orca_test.go @@ -25,12 +25,18 @@ import ( "github.com/google/go-cmp/cmp" "google.golang.org/grpc/internal/pretty" "google.golang.org/grpc/metadata" - "google.golang.org/grpc/orca" + "google.golang.org/grpc/orca/internal" v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" ) func TestToLoadReport(t *testing.T) { + goodReport := &v3orcapb.OrcaLoadReport{ + CpuUtilization: 1.0, + MemUtilization: 50.0, + RequestCost: map[string]float64{"queryCost": 25.0}, + Utilization: map[string]float64{"queueSize": 75.0}, + } tests := []struct { name string md metadata.MD @@ -40,7 +46,7 @@ func TestToLoadReport(t *testing.T) { { name: "no load report in metadata", md: metadata.MD{}, - wantErr: true, + wantErr: false, }, { name: "badly marshaled load report", @@ -49,29 +55,27 @@ func TestToLoadReport(t *testing.T) { }(), wantErr: true, }, + { + name: "multiple load reports", + md: func() metadata.MD { + b, _ := proto.Marshal(goodReport) + return metadata.Pairs("endpoint-load-metrics-bin", string(b), "endpoint-load-metrics-bin", string(b)) + }(), + wantErr: true, + }, { name: "good load report", md: func() metadata.MD { - b, _ := proto.Marshal(&v3orcapb.OrcaLoadReport{ - CpuUtilization: 1.0, - MemUtilization: 50.0, - RequestCost: map[string]float64{"queryCost": 25.0}, - Utilization: map[string]float64{"queueSize": 75.0}, - }) + b, _ := proto.Marshal(goodReport) return metadata.Pairs("endpoint-load-metrics-bin", string(b)) }(), - want: &v3orcapb.OrcaLoadReport{ - CpuUtilization: 1.0, - MemUtilization: 50.0, - RequestCost: map[string]float64{"queryCost": 25.0}, - Utilization: map[string]float64{"queueSize": 75.0}, - }, + want: goodReport, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - got, err := orca.ToLoadReport(test.md) + got, err := internal.ToLoadReport(test.md) if (err != nil) != test.wantErr { t.Fatalf("orca.ToLoadReport(%v) = %v, wantErr: %v", test.md, err, test.wantErr) } diff --git a/orca/service.go b/orca/service.go index 9400ae0c7e64..ae011fd9a9d2 100644 --- a/orca/service.go +++ b/orca/service.go @@ -120,7 +120,7 @@ func (s *Service) determineReportingInterval(req *v3orcaservicepb.OrcaLoadReport } dur := req.GetReportInterval().AsDuration() if dur < s.minReportingInterval { - logger.Warningf("Received reporting interval %q is less than configured minimum: %v. Using default: %s", dur, s.minReportingInterval) + logger.Warningf("Received reporting interval %q is less than configured minimum: %v. Using minimum", dur, s.minReportingInterval) return s.minReportingInterval } return dur From b153b006cee37f7d99748ddb0bdc20ddd90bb425 Mon Sep 17 00:00:00 2001 From: Easwar Swaminathan Date: Mon, 1 May 2023 17:30:53 -0700 Subject: [PATCH 16/60] multiple: standardize import renaming for typed structs (#6238) --- authz/rbac_translator.go | 4 ++-- authz/rbac_translator_test.go | 4 ++-- xds/internal/xdsclient/xdslbregistry/converter.go | 8 ++++---- .../xdsclient/xdslbregistry/tests/converter_test.go | 10 +++++----- .../xdsresource/tests/unmarshal_cds_test.go | 4 ++-- xds/internal/xdsclient/xdsresource/unmarshal_lds.go | 12 ++++++------ .../xdsclient/xdsresource/unmarshal_lds_test.go | 8 ++++---- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/authz/rbac_translator.go b/authz/rbac_translator.go index c4fa996e5995..ce5c15cb976d 100644 --- a/authz/rbac_translator.go +++ b/authz/rbac_translator.go @@ -28,7 +28,7 @@ import ( "fmt" "strings" - v1typepb "github.com/cncf/xds/go/udpa/type/v1" + v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3rbacpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" @@ -307,7 +307,7 @@ func (options *auditLoggingOptions) toProtos() (allow *v3rbacpb.RBAC_AuditLoggin if config.Config == nil { return nil, nil, fmt.Errorf("AuditLogger Config field cannot be nil") } - typedStruct := &v1typepb.TypedStruct{ + typedStruct := &v1xdsudpatypepb.TypedStruct{ TypeUrl: typedURLPrefix + config.Name, Value: config.Config, } diff --git a/authz/rbac_translator_test.go b/authz/rbac_translator_test.go index fff492cff7ab..fed0ef5c9d33 100644 --- a/authz/rbac_translator_test.go +++ b/authz/rbac_translator_test.go @@ -22,7 +22,7 @@ import ( "strings" "testing" - v1typepb "github.com/cncf/xds/go/udpa/type/v1" + v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" "github.com/google/go-cmp/cmp" "google.golang.org/protobuf/testing/protocmp" "google.golang.org/protobuf/types/known/anypb" @@ -945,7 +945,7 @@ func TestTranslatePolicy(t *testing.T) { func anyPbHelper(t *testing.T, in map[string]interface{}, name string) *anypb.Any { t.Helper() pb, err := structpb.NewStruct(in) - typedStruct := &v1typepb.TypedStruct{ + typedStruct := &v1xdsudpatypepb.TypedStruct{ TypeUrl: typedURLPrefix + name, Value: pb, } diff --git a/xds/internal/xdsclient/xdslbregistry/converter.go b/xds/internal/xdsclient/xdslbregistry/converter.go index ef13802b0c12..158ad8b199d6 100644 --- a/xds/internal/xdsclient/xdslbregistry/converter.go +++ b/xds/internal/xdsclient/xdslbregistry/converter.go @@ -26,8 +26,8 @@ import ( "fmt" "strings" - v1udpatypepb "github.com/cncf/xds/go/udpa/type/v1" - v3cncftypepb "github.com/cncf/xds/go/xds/type/v3" + v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" + v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" @@ -86,13 +86,13 @@ func convertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy, depth int } return convertWrrLocality(wrrlProto, depth) case "type.googleapis.com/xds.type.v3.TypedStruct": - tsProto := &v3cncftypepb.TypedStruct{} + tsProto := &v3xdsxdstypepb.TypedStruct{} if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), tsProto); err != nil { return nil, fmt.Errorf("failed to unmarshal resource: %v", err) } return convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) case "type.googleapis.com/udpa.type.v1.TypedStruct": - tsProto := &v1udpatypepb.TypedStruct{} + tsProto := &v1xdsudpatypepb.TypedStruct{} if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), tsProto); err != nil { return nil, fmt.Errorf("failed to unmarshal resource: %v", err) } diff --git a/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go b/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go index 2ffbad845f8b..2607905dc903 100644 --- a/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go +++ b/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go @@ -24,8 +24,8 @@ import ( "strings" "testing" - v1udpatypepb "github.com/cncf/xds/go/udpa/type/v1" - v3cncftypepb "github.com/cncf/xds/go/xds/type/v3" + v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" + v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3leastrequestpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/least_request/v3" @@ -172,7 +172,7 @@ func (s) TestConvertToServiceConfigSuccess(t *testing.T) { Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ { TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ - TypedConfig: testutils.MarshalAny(&v3cncftypepb.TypedStruct{ + TypedConfig: testutils.MarshalAny(&v3xdsxdstypepb.TypedStruct{ TypeUrl: "type.googleapis.com/myorg.MyCustomLeastRequestPolicy", Value: &structpb.Struct{}, }), @@ -191,7 +191,7 @@ func (s) TestConvertToServiceConfigSuccess(t *testing.T) { Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ { TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ - TypedConfig: testutils.MarshalAny(&v1udpatypepb.TypedStruct{ + TypedConfig: testutils.MarshalAny(&v1xdsudpatypepb.TypedStruct{ TypeUrl: "type.googleapis.com/myorg.MyCustomLeastRequestPolicy", Value: &structpb.Struct{}, }), @@ -230,7 +230,7 @@ func (s) TestConvertToServiceConfigSuccess(t *testing.T) { Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ { TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ - TypedConfig: wrrLocalityAny(&v3cncftypepb.TypedStruct{ + TypedConfig: wrrLocalityAny(&v3xdsxdstypepb.TypedStruct{ TypeUrl: "type.googleapis.com/myorg.MyCustomLeastRequestPolicy", Value: &structpb.Struct{}, }), diff --git a/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go b/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go index dc4e8591f827..7d20b1ff61e4 100644 --- a/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go +++ b/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go @@ -37,7 +37,7 @@ import ( "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" "google.golang.org/protobuf/types/known/wrapperspb" - v3cncftypepb "github.com/cncf/xds/go/xds/type/v3" + v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" @@ -457,7 +457,7 @@ func (s) TestValidateCluster_Success(t *testing.T) { Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ { TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ - TypedConfig: wrrLocalityAny(&v3cncftypepb.TypedStruct{ + TypedConfig: wrrLocalityAny(&v3xdsxdstypepb.TypedStruct{ TypeUrl: "type.googleapis.com/myorg.MyCustomLeastRequestPolicy", Value: &structpb.Struct{}, }), diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_lds.go b/xds/internal/xdsclient/xdsresource/unmarshal_lds.go index 1cc8a0179582..8f18b02e28a6 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_lds.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_lds.go @@ -22,8 +22,8 @@ import ( "fmt" "strconv" - v1udpatypepb "github.com/cncf/udpa/go/udpa/type/v1" - v3cncftypepb "github.com/cncf/xds/go/xds/type/v3" + v1udpaudpatypepb "github.com/cncf/udpa/go/udpa/type/v1" + v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" v3httppb "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" @@ -121,16 +121,16 @@ func processClientSideListener(lis *v3listenerpb.Listener) (*ListenerUpdate, err func unwrapHTTPFilterConfig(config *anypb.Any) (proto.Message, string, error) { switch { - case ptypes.Is(config, &v3cncftypepb.TypedStruct{}): + case ptypes.Is(config, &v3xdsxdstypepb.TypedStruct{}): // The real type name is inside the new TypedStruct message. - s := new(v3cncftypepb.TypedStruct) + s := new(v3xdsxdstypepb.TypedStruct) if err := ptypes.UnmarshalAny(config, s); err != nil { return nil, "", fmt.Errorf("error unmarshalling TypedStruct filter config: %v", err) } return s, s.GetTypeUrl(), nil - case ptypes.Is(config, &v1udpatypepb.TypedStruct{}): + case ptypes.Is(config, &v1udpaudpatypepb.TypedStruct{}): // The real type name is inside the old TypedStruct message. - s := new(v1udpatypepb.TypedStruct) + s := new(v1udpaudpatypepb.TypedStruct) if err := ptypes.UnmarshalAny(config, s); err != nil { return nil, "", fmt.Errorf("error unmarshalling TypedStruct filter config: %v", err) } diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_lds_test.go b/xds/internal/xdsclient/xdsresource/unmarshal_lds_test.go index d2ce5ac34424..2dfeb5965b72 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_lds_test.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_lds_test.go @@ -34,8 +34,8 @@ import ( "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" "google.golang.org/protobuf/types/known/durationpb" - v1udpatypepb "github.com/cncf/udpa/go/udpa/type/v1" - v3cncftypepb "github.com/cncf/xds/go/xds/type/v3" + v1udpaudpatypepb "github.com/cncf/udpa/go/udpa/type/v1" + v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" rpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" @@ -1830,7 +1830,7 @@ var clientOnlyCustomFilterConfig = &anypb.Any{ } // This custom filter uses the old TypedStruct message from the cncf/udpa repo. -var customFilterOldTypedStructConfig = &v1udpatypepb.TypedStruct{ +var customFilterOldTypedStructConfig = &v1udpaudpatypepb.TypedStruct{ TypeUrl: "custom.filter", Value: &spb.Struct{ Fields: map[string]*spb.Value{ @@ -1841,7 +1841,7 @@ var customFilterOldTypedStructConfig = &v1udpatypepb.TypedStruct{ var wrappedCustomFilterOldTypedStructConfig *anypb.Any // This custom filter uses the new TypedStruct message from the cncf/xds repo. -var customFilterNewTypedStructConfig = &v3cncftypepb.TypedStruct{ +var customFilterNewTypedStructConfig = &v3xdsxdstypepb.TypedStruct{ TypeUrl: "custom.filter", Value: &spb.Struct{ Fields: map[string]*spb.Value{ From ed3ceba60557551492e669049870572d3a4b05be Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Tue, 2 May 2023 10:09:23 -0700 Subject: [PATCH 17/60] balancer: make producer RPCs block until the SubConn is READY (#6236) --- balancer_conn_wrappers.go | 13 ++-- clientconn.go | 30 +++++++- orca/producer.go | 15 ++-- test/balancer_test.go | 146 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 187 insertions(+), 17 deletions(-) diff --git a/balancer_conn_wrappers.go b/balancer_conn_wrappers.go index 978ed69fdf75..d0383f04748c 100644 --- a/balancer_conn_wrappers.go +++ b/balancer_conn_wrappers.go @@ -25,14 +25,12 @@ import ( "sync" "google.golang.org/grpc/balancer" - "google.golang.org/grpc/codes" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal/balancer/gracefulswitch" "google.golang.org/grpc/internal/buffer" "google.golang.org/grpc/internal/channelz" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/resolver" - "google.golang.org/grpc/status" ) // ccBalancerWrapper sits between the ClientConn and the Balancer. @@ -405,14 +403,13 @@ func (acbw *acBalancerWrapper) getAddrConn() *addrConn { return acbw.ac } -var errSubConnNotReady = status.Error(codes.Unavailable, "SubConn not currently connected") - // NewStream begins a streaming RPC on the addrConn. If the addrConn is not -// ready, returns errSubConnNotReady. +// ready, blocks until it is or ctx expires. Returns an error when the context +// expires or the addrConn is shut down. func (acbw *acBalancerWrapper) NewStream(ctx context.Context, desc *StreamDesc, method string, opts ...CallOption) (ClientStream, error) { - transport := acbw.ac.getReadyTransport() - if transport == nil { - return nil, errSubConnNotReady + transport, err := acbw.ac.getTransport(ctx) + if err != nil { + return nil, err } return newNonRetryClientStream(ctx, desc, method, transport, acbw.ac, opts...) } diff --git a/clientconn.go b/clientconn.go index e67a990b24fb..50d08a49a205 100644 --- a/clientconn.go +++ b/clientconn.go @@ -742,6 +742,7 @@ func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSub dopts: cc.dopts, czData: new(channelzData), resetBackoff: make(chan struct{}), + stateChan: make(chan struct{}), } ac.ctx, ac.cancel = context.WithCancel(cc.ctx) // Track ac in cc. This needs to be done before any getTransport(...) is called. @@ -1122,7 +1123,8 @@ type addrConn struct { addrs []resolver.Address // All addresses that the resolver resolved to. // Use updateConnectivityState for updating addrConn's connectivity state. - state connectivity.State + state connectivity.State + stateChan chan struct{} // closed and recreated on every state change. backoffIdx int // Needs to be stateful for resetConnectBackoff. resetBackoff chan struct{} @@ -1136,6 +1138,9 @@ func (ac *addrConn) updateConnectivityState(s connectivity.State, lastErr error) if ac.state == s { return } + // When changing states, reset the state change channel. + close(ac.stateChan) + ac.stateChan = make(chan struct{}) ac.state = s if lastErr == nil { channelz.Infof(logger, ac.channelzID, "Subchannel Connectivity change to %v", s) @@ -1438,6 +1443,29 @@ func (ac *addrConn) getReadyTransport() transport.ClientTransport { return nil } +// getTransport waits until the addrconn is ready and returns the transport. +// If the context expires first, returns an appropriate status. If the +// addrConn is stopped first, returns an Unavailable status error. +func (ac *addrConn) getTransport(ctx context.Context) (transport.ClientTransport, error) { + for ctx.Err() == nil { + ac.mu.Lock() + t, state, sc := ac.transport, ac.state, ac.stateChan + ac.mu.Unlock() + if state == connectivity.Ready { + return t, nil + } + if state == connectivity.Shutdown { + return nil, status.Errorf(codes.Unavailable, "SubConn shutting down") + } + + select { + case <-ctx.Done(): + case <-sc: + } + } + return nil, status.FromContextError(ctx.Err()).Err() +} + // tearDown starts to tear down the addrConn. // // Note that tearDown doesn't remove ac from ac.cc.conns, so the addrConn struct diff --git a/orca/producer.go b/orca/producer.go index 559033116667..956d5ddfb52d 100644 --- a/orca/producer.go +++ b/orca/producer.go @@ -79,8 +79,8 @@ func RegisterOOBListener(sc balancer.SubConn, l OOBListener, opts OOBListenerOpt p := pr.(*producer) p.registerListener(l, opts.ReportInterval) - // TODO: When we can register for SubConn state updates, don't call run() - // until READY and automatically call stop() on SHUTDOWN. + // TODO: When we can register for SubConn state updates, automatically call + // stop() on SHUTDOWN. // If stop is called multiple times, prevent it from having any effect on // subsequent calls. @@ -175,12 +175,11 @@ func (p *producer) run(ctx context.Context) { logger.Error("Server doesn't support ORCA OOB load reporting protocol; not listening for load reports.") return case status.Code(err) == codes.Unavailable: - // The SubConn is not currently ready; backoff silently. - // - // TODO: don't attempt the stream until the state is READY to - // minimize the chances of this case and to avoid using the - // exponential backoff mechanism, as we should know it's safe to - // retry when the state is READY again. + // TODO: this code should ideally log an error, too, but for now we + // receive this code when shutting down the ClientConn. Once we + // can determine the state or ensure the producer is stopped before + // the stream ends, we can log an error when it's not a natural + // shutdown. default: // Log all other errors. logger.Error("Received unexpected stream error:", err) diff --git a/test/balancer_test.go b/test/balancer_test.go index c9a769c9f5a4..950d31d13ed5 100644 --- a/test/balancer_test.go +++ b/test/balancer_test.go @@ -38,6 +38,7 @@ import ( "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/internal/balancer/stub" "google.golang.org/grpc/internal/balancerload" + "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/grpcutil" imetadata "google.golang.org/grpc/internal/metadata" "google.golang.org/grpc/internal/stubserver" @@ -1004,3 +1005,148 @@ func (s) TestMetadataInPickResult(t *testing.T) { t.Fatalf("Mismatch in custom metadata received at test backend, got: %v, want %v", gotMDVal, wantMDVal) } } + +// producerTestBalancerBuilder and producerTestBalancer start a producer which +// makes an RPC before the subconn is READY, then connects the subconn, and +// pushes the resulting error (expected to be nil) to rpcErrChan. +type producerTestBalancerBuilder struct { + rpcErrChan chan error + ctxChan chan context.Context +} + +func (bb *producerTestBalancerBuilder) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { + return &producerTestBalancer{cc: cc, rpcErrChan: bb.rpcErrChan, ctxChan: bb.ctxChan} +} + +const producerTestBalancerName = "producer_test_balancer" + +func (bb *producerTestBalancerBuilder) Name() string { return producerTestBalancerName } + +type producerTestBalancer struct { + cc balancer.ClientConn + rpcErrChan chan error + ctxChan chan context.Context +} + +func (b *producerTestBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error { + // Create the subconn, but don't connect it. + sc, err := b.cc.NewSubConn(ccs.ResolverState.Addresses, balancer.NewSubConnOptions{}) + if err != nil { + return fmt.Errorf("error creating subconn: %v", err) + } + + // Create the producer. This will call the producer builder's Build + // method, which will try to start an RPC in a goroutine. + p := &testProducerBuilder{start: grpcsync.NewEvent(), rpcErrChan: b.rpcErrChan, ctxChan: b.ctxChan} + sc.GetOrBuildProducer(p) + + // Wait here until the producer is about to perform the RPC, which should + // block until connected. + <-p.start.Done() + + // Ensure the error chan doesn't get anything on it before we connect the + // subconn. + select { + case err := <-b.rpcErrChan: + go func() { b.rpcErrChan <- fmt.Errorf("Got unexpected data on rpcErrChan: %v", err) }() + default: + } + + // Now we can connect, which will unblock the RPC above. + sc.Connect() + + // The stub server requires a READY picker to be reported, to unblock its + // Start method. We won't make RPCs in our test, so a nil picker is okay. + b.cc.UpdateState(balancer.State{ConnectivityState: connectivity.Ready, Picker: nil}) + return nil +} + +func (b *producerTestBalancer) ResolverError(err error) { + panic(fmt.Sprintf("Unexpected resolver error: %v", err)) +} + +func (b *producerTestBalancer) UpdateSubConnState(balancer.SubConn, balancer.SubConnState) {} +func (b *producerTestBalancer) Close() {} + +type testProducerBuilder struct { + start *grpcsync.Event + rpcErrChan chan error + ctxChan chan context.Context +} + +func (b *testProducerBuilder) Build(cci interface{}) (balancer.Producer, func()) { + c := testgrpc.NewTestServiceClient(cci.(grpc.ClientConnInterface)) + // Perform the RPC in a goroutine instead of during build because the + // subchannel's mutex is held here. + go func() { + ctx := <-b.ctxChan + b.start.Fire() + _, err := c.EmptyCall(ctx, &testpb.Empty{}) + b.rpcErrChan <- err + }() + return nil, func() {} +} + +// TestBalancerProducerBlockUntilReady tests that we get no RPC errors from +// producers when subchannels aren't ready. +func (s) TestBalancerProducerBlockUntilReady(t *testing.T) { + // rpcErrChan is given to the LB policy to report the status of the + // producer's one RPC. + ctxChan := make(chan context.Context, 1) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + ctxChan <- ctx + rpcErrChan := make(chan error) + balancer.Register(&producerTestBalancerBuilder{rpcErrChan: rpcErrChan, ctxChan: ctxChan}) + + ss := &stubserver.StubServer{ + EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { + return &testpb.Empty{}, nil + }, + } + + // Start the server & client with the test producer LB policy. + svcCfg := fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, producerTestBalancerName) + if err := ss.Start(nil, grpc.WithDefaultServiceConfig(svcCfg)); err != nil { + t.Fatalf("Error starting testing server: %v", err) + } + defer ss.Stop() + + // Receive the error from the producer's RPC, which should be nil. + if err := <-rpcErrChan; err != nil { + t.Fatalf("Received unexpected error from producer RPC: %v", err) + } +} + +// TestBalancerProducerHonorsContext tests that producers that perform RPC get +// context errors correctly. +func (s) TestBalancerProducerHonorsContext(t *testing.T) { + // rpcErrChan is given to the LB policy to report the status of the + // producer's one RPC. + ctxChan := make(chan context.Context, 1) + ctx, cancel := context.WithCancel(context.Background()) + ctxChan <- ctx + + rpcErrChan := make(chan error) + balancer.Register(&producerTestBalancerBuilder{rpcErrChan: rpcErrChan, ctxChan: ctxChan}) + + ss := &stubserver.StubServer{ + EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { + return &testpb.Empty{}, nil + }, + } + + // Start the server & client with the test producer LB policy. + svcCfg := fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, producerTestBalancerName) + if err := ss.Start(nil, grpc.WithDefaultServiceConfig(svcCfg)); err != nil { + t.Fatalf("Error starting testing server: %v", err) + } + defer ss.Stop() + + cancel() + + // Receive the error from the producer's RPC, which should be canceled. + if err := <-rpcErrChan; status.Code(err) != codes.Canceled { + t.Fatalf("RPC error: %v; want status.Code(err)=%v", err, codes.Canceled) + } +} From 40d01479bb3abdfee035dd1843894b30b0528da3 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Tue, 2 May 2023 14:07:59 -0700 Subject: [PATCH 18/60] googledirectpatph: enable ignore_resource_deletion in bootstrap (#6243) --- xds/googledirectpath/googlec2p.go | 2 +- xds/googledirectpath/googlec2p_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xds/googledirectpath/googlec2p.go b/xds/googledirectpath/googlec2p.go index fa7b770d878b..20891c7a4cb8 100644 --- a/xds/googledirectpath/googlec2p.go +++ b/xds/googledirectpath/googlec2p.go @@ -120,7 +120,7 @@ func (c2pResolverBuilder) Build(t resolver.Target, cc resolver.ClientConn, opts { "server_uri": "%s", "channel_creds": [{"type": "google_default"}], - "server_features": ["xds_v3"] + "server_features": ["xds_v3", "ignore_resource_deletion"] }`, balancerName))) if err != nil { return nil, fmt.Errorf("failed to build bootstrap configuration: %v", err) diff --git a/xds/googledirectpath/googlec2p_test.go b/xds/googledirectpath/googlec2p_test.go index 961f6546bf41..44e1a68e2387 100644 --- a/xds/googledirectpath/googlec2p_test.go +++ b/xds/googledirectpath/googlec2p_test.go @@ -216,7 +216,7 @@ func TestBuildXDS(t *testing.T) { wantServerConfig, err := bootstrap.ServerConfigFromJSON([]byte(fmt.Sprintf(`{ "server_uri": "%s", "channel_creds": [{"type": "google_default"}], - "server_features": ["xds_v3"] + "server_features": ["xds_v3", "ignore_resource_deletion"] }`, tdURL))) if err != nil { t.Fatalf("Failed to build server bootstrap config: %v", err) From add90153d43b1143dd8d74fc9675bf1fe3f1e607 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Tue, 2 May 2023 15:04:33 -0700 Subject: [PATCH 19/60] orca: allow a ServerMetricsProvider to be passed to the ORCA service and ServerOption (#6223) --- examples/features/orca/server/main.go | 22 +- orca/call_metric_recorder.go | 130 --------- orca/call_metrics.go | 196 +++++++++++++ ..._recorder_test.go => call_metrics_test.go} | 30 +- orca/orca.go | 113 +------- orca/producer_test.go | 18 +- orca/server_metrics.go | 270 ++++++++++++++++++ orca/service.go | 85 ++---- orca/service_test.go | 26 +- 9 files changed, 546 insertions(+), 344 deletions(-) delete mode 100644 orca/call_metric_recorder.go create mode 100644 orca/call_metrics.go rename orca/{call_metric_recorder_test.go => call_metrics_test.go} (91%) create mode 100644 orca/server_metrics.go diff --git a/examples/features/orca/server/main.go b/examples/features/orca/server/main.go index 5d4bdb163a17..e52d5d06eebf 100644 --- a/examples/features/orca/server/main.go +++ b/examples/features/orca/server/main.go @@ -44,9 +44,9 @@ type server struct { func (s *server) UnaryEcho(ctx context.Context, in *pb.EchoRequest) (*pb.EchoResponse, error) { // Report a sample cost for this query. - cmr := orca.CallMetricRecorderFromContext(ctx) + cmr := orca.CallMetricsRecorderFromContext(ctx) if cmr == nil { - return nil, status.Errorf(codes.Internal, "unable to retrieve call metric recorder (missing ORCA ServerOption?)") + return nil, status.Errorf(codes.Internal, "unable to retrieve call metrics recorder (missing ORCA ServerOption?)") } cmr.SetRequestCost("db_queries", 10) @@ -63,27 +63,31 @@ func main() { fmt.Printf("Server listening at %v\n", lis.Addr()) // Create the gRPC server with the orca.CallMetricsServerOption() option, - // which will enable per-call metric recording. - s := grpc.NewServer(orca.CallMetricsServerOption()) + // which will enable per-call metric recording. No ServerMetricsProvider + // is given here because the out-of-band reporting is enabled separately. + s := grpc.NewServer(orca.CallMetricsServerOption(nil)) pb.RegisterEchoServer(s, &server{}) // Register the orca service for out-of-band metric reporting, and set the // minimum reporting interval to 3 seconds. Note that, by default, the // minimum interval must be at least 30 seconds, but 3 seconds is set via // an internal-only option for illustration purposes only. - opts := orca.ServiceOptions{MinReportingInterval: 3 * time.Second} + smr := orca.NewServerMetricsRecorder() + opts := orca.ServiceOptions{ + MinReportingInterval: 3 * time.Second, + ServerMetricsProvider: smr, + } internal.ORCAAllowAnyMinReportingInterval.(func(so *orca.ServiceOptions))(&opts) - orcaSvc, err := orca.Register(s, opts) - if err != nil { + if err := orca.Register(s, opts); err != nil { log.Fatalf("Failed to register ORCA service: %v", err) } // Simulate CPU utilization reporting. go func() { for { - orcaSvc.SetCPUUtilization(.5) + smr.SetCPUUtilization(.5) time.Sleep(2 * time.Second) - orcaSvc.SetCPUUtilization(.9) + smr.SetCPUUtilization(.9) time.Sleep(2 * time.Second) } }() diff --git a/orca/call_metric_recorder.go b/orca/call_metric_recorder.go deleted file mode 100644 index 62f2a1a6c220..000000000000 --- a/orca/call_metric_recorder.go +++ /dev/null @@ -1,130 +0,0 @@ -/* - * - * Copyright 2022 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package orca - -import ( - "context" - "sync" - "sync/atomic" - - v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" -) - -// CallMetricRecorder provides functionality to record per-RPC custom backend -// metrics. See CallMetricsServerOption() for more details. -// -// Safe for concurrent use. -type CallMetricRecorder struct { - cpu atomic.Value // float64 - memory atomic.Value // float64 - - mu sync.RWMutex - requestCost map[string]float64 - utilization map[string]float64 -} - -func newCallMetricRecorder() *CallMetricRecorder { - return &CallMetricRecorder{ - requestCost: make(map[string]float64), - utilization: make(map[string]float64), - } -} - -// SetCPUUtilization records a measurement for the CPU utilization metric. -func (c *CallMetricRecorder) SetCPUUtilization(val float64) { - c.cpu.Store(val) -} - -// SetMemoryUtilization records a measurement for the memory utilization metric. -func (c *CallMetricRecorder) SetMemoryUtilization(val float64) { - c.memory.Store(val) -} - -// SetRequestCost records a measurement for a request cost metric, -// uniquely identifiable by name. -func (c *CallMetricRecorder) SetRequestCost(name string, val float64) { - c.mu.Lock() - c.requestCost[name] = val - c.mu.Unlock() -} - -// SetUtilization records a measurement for a utilization metric uniquely -// identifiable by name. -func (c *CallMetricRecorder) SetUtilization(name string, val float64) { - c.mu.Lock() - c.utilization[name] = val - c.mu.Unlock() -} - -// toLoadReportProto dumps the recorded measurements as an OrcaLoadReport proto. -func (c *CallMetricRecorder) toLoadReportProto() *v3orcapb.OrcaLoadReport { - c.mu.RLock() - defer c.mu.RUnlock() - - cost := make(map[string]float64, len(c.requestCost)) - for k, v := range c.requestCost { - cost[k] = v - } - util := make(map[string]float64, len(c.utilization)) - for k, v := range c.utilization { - util[k] = v - } - cpu, _ := c.cpu.Load().(float64) - mem, _ := c.memory.Load().(float64) - return &v3orcapb.OrcaLoadReport{ - CpuUtilization: cpu, - MemUtilization: mem, - RequestCost: cost, - Utilization: util, - } -} - -type callMetricRecorderCtxKey struct{} - -// CallMetricRecorderFromContext returns the RPC specific custom metrics -// recorder [CallMetricRecorder] embedded in the provided RPC context. -// -// Returns nil if no custom metrics recorder is found in the provided context, -// which will be the case when custom metrics reporting is not enabled. -func CallMetricRecorderFromContext(ctx context.Context) *CallMetricRecorder { - rw, ok := ctx.Value(callMetricRecorderCtxKey{}).(*recorderWrapper) - if !ok { - return nil - } - return rw.recorder() -} - -func newContextWithRecorderWrapper(ctx context.Context, r *recorderWrapper) context.Context { - return context.WithValue(ctx, callMetricRecorderCtxKey{}, r) -} - -// recorderWrapper is a wrapper around a CallMetricRecorder to ensures that -// concurrent calls to CallMetricRecorderFromContext() results in only one -// allocation of the underlying metric recorder. -type recorderWrapper struct { - once sync.Once - r *CallMetricRecorder -} - -func (rw *recorderWrapper) recorder() *CallMetricRecorder { - rw.once.Do(func() { - rw.r = newCallMetricRecorder() - }) - return rw.r -} diff --git a/orca/call_metrics.go b/orca/call_metrics.go new file mode 100644 index 000000000000..558c7bce6a8e --- /dev/null +++ b/orca/call_metrics.go @@ -0,0 +1,196 @@ +/* + * + * Copyright 2022 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package orca + +import ( + "context" + "sync" + + "google.golang.org/grpc" + grpcinternal "google.golang.org/grpc/internal" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/orca/internal" + "google.golang.org/protobuf/proto" +) + +// CallMetricsRecorder allows a service method handler to record per-RPC +// metrics. It contains all utilization-based metrics from +// ServerMetricsRecorder as well as additional request cost metrics. +type CallMetricsRecorder interface { + ServerMetricsRecorder + + // SetRequestCost sets the relevant server metric. + SetRequestCost(name string, val float64) + // DeleteRequestCost deletes the relevant server metric to prevent it + // from being sent. + DeleteRequestCost(name string) + + // SetNamedMetric sets the relevant server metric. + SetNamedMetric(name string, val float64) + // DeleteNamedMetric deletes the relevant server metric to prevent it + // from being sent. + DeleteNamedMetric(name string) +} + +type callMetricsRecorderCtxKey struct{} + +// CallMetricsRecorderFromContext returns the RPC-specific custom metrics +// recorder embedded in the provided RPC context. +// +// Returns nil if no custom metrics recorder is found in the provided context, +// which will be the case when custom metrics reporting is not enabled. +func CallMetricsRecorderFromContext(ctx context.Context) CallMetricsRecorder { + rw, ok := ctx.Value(callMetricsRecorderCtxKey{}).(*recorderWrapper) + if !ok { + return nil + } + return rw.recorder() +} + +// recorderWrapper is a wrapper around a CallMetricsRecorder to ensure that +// concurrent calls to CallMetricsRecorderFromContext() results in only one +// allocation of the underlying metrics recorder, while also allowing for lazy +// initialization of the recorder itself. +type recorderWrapper struct { + once sync.Once + r CallMetricsRecorder + smp ServerMetricsProvider +} + +func (rw *recorderWrapper) recorder() CallMetricsRecorder { + rw.once.Do(func() { + rw.r = newServerMetricsRecorder() + }) + return rw.r +} + +// setTrailerMetadata adds a trailer metadata entry with key being set to +// `internal.TrailerMetadataKey` and value being set to the binary-encoded +// orca.OrcaLoadReport protobuf message. +// +// This function is called from the unary and streaming interceptors defined +// above. Any errors encountered here are not propagated to the caller because +// they are ignored there. Hence we simply log any errors encountered here at +// warning level, and return nothing. +func (rw *recorderWrapper) setTrailerMetadata(ctx context.Context) { + var sm *ServerMetrics + if rw.smp != nil { + sm = rw.smp.ServerMetrics() + sm.merge(rw.r.ServerMetrics()) + } else { + sm = rw.r.ServerMetrics() + } + + b, err := proto.Marshal(sm.toLoadReportProto()) + if err != nil { + logger.Warningf("Failed to marshal load report: %v", err) + return + } + if err := grpc.SetTrailer(ctx, metadata.Pairs(internal.TrailerMetadataKey, string(b))); err != nil { + logger.Warningf("Failed to set trailer metadata: %v", err) + } +} + +var joinServerOptions = grpcinternal.JoinServerOptions.(func(...grpc.ServerOption) grpc.ServerOption) + +// CallMetricsServerOption returns a server option which enables the reporting +// of per-RPC custom backend metrics for unary and streaming RPCs. +// +// Server applications interested in injecting custom backend metrics should +// pass the server option returned from this function as the first argument to +// grpc.NewServer(). +// +// Subsequently, server RPC handlers can retrieve a reference to the RPC +// specific custom metrics recorder [CallMetricsRecorder] to be used, via a call +// to CallMetricsRecorderFromContext(), and inject custom metrics at any time +// during the RPC lifecycle. +// +// The injected custom metrics will be sent as part of trailer metadata, as a +// binary-encoded [ORCA LoadReport] protobuf message, with the metadata key +// being set be "endpoint-load-metrics-bin". +// +// If a non-nil ServerMetricsProvider is provided, the gRPC server will +// transmit the metrics it provides, overwritten by any per-RPC metrics given +// to the CallMetricsRecorder. A ServerMetricsProvider is typically obtained +// by calling NewServerMetricsRecorder. +// +// [ORCA LoadReport]: https://github.com/cncf/xds/blob/main/xds/data/orca/v3/orca_load_report.proto#L15 +func CallMetricsServerOption(smp ServerMetricsProvider) grpc.ServerOption { + return joinServerOptions(grpc.ChainUnaryInterceptor(unaryInt(smp)), grpc.ChainStreamInterceptor(streamInt(smp))) +} + +func unaryInt(smp ServerMetricsProvider) func(ctx context.Context, req interface{}, _ *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { + return func(ctx context.Context, req interface{}, _ *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { + // We don't allocate the metric recorder here. It will be allocated the + // first time the user calls CallMetricsRecorderFromContext(). + rw := &recorderWrapper{smp: smp} + ctxWithRecorder := newContextWithRecorderWrapper(ctx, rw) + + resp, err := handler(ctxWithRecorder, req) + + // It is safe to access the underlying metric recorder inside the wrapper at + // this point, as the user's RPC handler is done executing, and therefore + // there will be no more calls to CallMetricsRecorderFromContext(), which is + // where the metric recorder is lazy allocated. + if rw.r != nil { + rw.setTrailerMetadata(ctx) + } + return resp, err + } +} + +func streamInt(smp ServerMetricsProvider) func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { + return func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { + // We don't allocate the metric recorder here. It will be allocated the + // first time the user calls CallMetricsRecorderFromContext(). + rw := &recorderWrapper{smp: smp} + ws := &wrappedStream{ + ServerStream: ss, + ctx: newContextWithRecorderWrapper(ss.Context(), rw), + } + + err := handler(srv, ws) + + // It is safe to access the underlying metric recorder inside the wrapper at + // this point, as the user's RPC handler is done executing, and therefore + // there will be no more calls to CallMetricsRecorderFromContext(), which is + // where the metric recorder is lazy allocated. + if rw.r != nil { + rw.setTrailerMetadata(ss.Context()) + } + return err + } +} + +func newContextWithRecorderWrapper(ctx context.Context, r *recorderWrapper) context.Context { + return context.WithValue(ctx, callMetricsRecorderCtxKey{}, r) +} + +// wrappedStream wraps the grpc.ServerStream received by the streaming +// interceptor. Overrides only the Context() method to return a context which +// contains a reference to the CallMetricsRecorder corresponding to this +// stream. +type wrappedStream struct { + grpc.ServerStream + ctx context.Context +} + +func (w *wrappedStream) Context() context.Context { + return w.ctx +} diff --git a/orca/call_metric_recorder_test.go b/orca/call_metrics_test.go similarity index 91% rename from orca/call_metric_recorder_test.go rename to orca/call_metrics_test.go index 43d0e45291e2..4374b593b9f1 100644 --- a/orca/call_metric_recorder_test.go +++ b/orca/call_metrics_test.go @@ -78,23 +78,24 @@ func (s) TestE2ECallMetricsUnary(t *testing.T) { for _, test := range tests { t.Run(test.desc, func(t *testing.T) { - // A server option to enables reporting of per-call backend metrics. - callMetricsServerOption := orca.CallMetricsServerOption() + // A server option to enable reporting of per-call backend metrics. + smr := orca.NewServerMetricsRecorder() + callMetricsServerOption := orca.CallMetricsServerOption(smr) + smr.SetCPUUtilization(1.0) // An interceptor to injects custom backend metrics, added only when // the injectMetrics field in the test is set. injectingInterceptor := func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) { - recorder := orca.CallMetricRecorderFromContext(ctx) + recorder := orca.CallMetricsRecorderFromContext(ctx) if recorder == nil { err := errors.New("Failed to retrieve per-RPC custom metrics recorder from the RPC context") t.Error(err) return nil, err } - recorder.SetCPUUtilization(1.0) recorder.SetMemoryUtilization(50.0) // This value will be overwritten by a write to the same metric // from the server handler. - recorder.SetUtilization("queueSize", 1.0) + recorder.SetNamedUtilization("queueSize", 1.0) return handler(ctx, req) } @@ -106,14 +107,14 @@ func (s) TestE2ECallMetricsUnary(t *testing.T) { if !test.injectMetrics { return &testpb.Empty{}, nil } - recorder := orca.CallMetricRecorderFromContext(ctx) + recorder := orca.CallMetricsRecorderFromContext(ctx) if recorder == nil { err := errors.New("Failed to retrieve per-RPC custom metrics recorder from the RPC context") t.Error(err) return nil, err } recorder.SetRequestCost("queryCost", 25.0) - recorder.SetUtilization("queueSize", 75.0) + recorder.SetNamedUtilization("queueSize", 75.0) return &testpb.Empty{}, nil }, } @@ -183,23 +184,24 @@ func (s) TestE2ECallMetricsStreaming(t *testing.T) { for _, test := range tests { t.Run(test.desc, func(t *testing.T) { - // A server option to enables reporting of per-call backend metrics. - callMetricsServerOption := orca.CallMetricsServerOption() + // A server option to enable reporting of per-call backend metrics. + smr := orca.NewServerMetricsRecorder() + callMetricsServerOption := orca.CallMetricsServerOption(smr) + smr.SetCPUUtilization(1.0) // An interceptor which injects custom backend metrics, added only // when the injectMetrics field in the test is set. injectingInterceptor := func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { - recorder := orca.CallMetricRecorderFromContext(ss.Context()) + recorder := orca.CallMetricsRecorderFromContext(ss.Context()) if recorder == nil { err := errors.New("Failed to retrieve per-RPC custom metrics recorder from the RPC context") t.Error(err) return err } - recorder.SetCPUUtilization(1.0) recorder.SetMemoryUtilization(50.0) // This value will be overwritten by a write to the same metric // from the server handler. - recorder.SetUtilization("queueSize", 1.0) + recorder.SetNamedUtilization("queueSize", 1.0) return handler(srv, ss) } @@ -209,14 +211,14 @@ func (s) TestE2ECallMetricsStreaming(t *testing.T) { srv := stubserver.StubServer{ FullDuplexCallF: func(stream testgrpc.TestService_FullDuplexCallServer) error { if test.injectMetrics { - recorder := orca.CallMetricRecorderFromContext(stream.Context()) + recorder := orca.CallMetricsRecorderFromContext(stream.Context()) if recorder == nil { err := errors.New("Failed to retrieve per-RPC custom metrics recorder from the RPC context") t.Error(err) return err } recorder.SetRequestCost("queryCost", 25.0) - recorder.SetUtilization("queueSize", 75.0) + recorder.SetNamedUtilization("queueSize", 75.0) } // Streaming implementation replies with a dummy response until the diff --git a/orca/orca.go b/orca/orca.go index 2c958b6902e9..771db36af1c9 100644 --- a/orca/orca.go +++ b/orca/orca.go @@ -27,128 +27,21 @@ package orca import ( - "context" - "errors" - - "google.golang.org/grpc" "google.golang.org/grpc/grpclog" - igrpc "google.golang.org/grpc/internal" "google.golang.org/grpc/internal/balancerload" "google.golang.org/grpc/metadata" "google.golang.org/grpc/orca/internal" - "google.golang.org/protobuf/proto" -) - -var ( - logger = grpclog.Component("orca-backend-metrics") - joinServerOptions = igrpc.JoinServerOptions.(func(...grpc.ServerOption) grpc.ServerOption) ) -const trailerMetadataKey = "endpoint-load-metrics-bin" - -// CallMetricsServerOption returns a server option which enables the reporting -// of per-RPC custom backend metrics for unary and streaming RPCs. -// -// Server applications interested in injecting custom backend metrics should -// pass the server option returned from this function as the first argument to -// grpc.NewServer(). -// -// Subsequently, server RPC handlers can retrieve a reference to the RPC -// specific custom metrics recorder [CallMetricRecorder] to be used, via a call -// to CallMetricRecorderFromContext(), and inject custom metrics at any time -// during the RPC lifecycle. -// -// The injected custom metrics will be sent as part of trailer metadata, as a -// binary-encoded [ORCA LoadReport] protobuf message, with the metadata key -// being set be "endpoint-load-metrics-bin". -// -// [ORCA LoadReport]: https://github.com/cncf/xds/blob/main/xds/data/orca/v3/orca_load_report.proto#L15 -func CallMetricsServerOption() grpc.ServerOption { - return joinServerOptions(grpc.ChainUnaryInterceptor(unaryInt), grpc.ChainStreamInterceptor(streamInt)) -} - -func unaryInt(ctx context.Context, req interface{}, _ *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { - // We don't allocate the metric recorder here. It will be allocated the - // first time the user calls CallMetricRecorderFromContext(). - rw := &recorderWrapper{} - ctxWithRecorder := newContextWithRecorderWrapper(ctx, rw) - - resp, err := handler(ctxWithRecorder, req) - - // It is safe to access the underlying metric recorder inside the wrapper at - // this point, as the user's RPC handler is done executing, and therefore - // there will be no more calls to CallMetricRecorderFromContext(), which is - // where the metric recorder is lazy allocated. - if rw.r == nil { - return resp, err - } - setTrailerMetadata(ctx, rw.r) - return resp, err -} - -func streamInt(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { - // We don't allocate the metric recorder here. It will be allocated the - // first time the user calls CallMetricRecorderFromContext(). - rw := &recorderWrapper{} - ws := &wrappedStream{ - ServerStream: ss, - ctx: newContextWithRecorderWrapper(ss.Context(), rw), - } - - err := handler(srv, ws) - - // It is safe to access the underlying metric recorder inside the wrapper at - // this point, as the user's RPC handler is done executing, and therefore - // there will be no more calls to CallMetricRecorderFromContext(), which is - // where the metric recorder is lazy allocated. - if rw.r == nil { - return err - } - setTrailerMetadata(ss.Context(), rw.r) - return err -} - -// setTrailerMetadata adds a trailer metadata entry with key being set to -// `trailerMetadataKey` and value being set to the binary-encoded -// orca.OrcaLoadReport protobuf message. -// -// This function is called from the unary and streaming interceptors defined -// above. Any errors encountered here are not propagated to the caller because -// they are ignored there. Hence we simply log any errors encountered here at -// warning level, and return nothing. -func setTrailerMetadata(ctx context.Context, r *CallMetricRecorder) { - b, err := proto.Marshal(r.toLoadReportProto()) - if err != nil { - logger.Warningf("failed to marshal load report: %v", err) - return - } - if err := grpc.SetTrailer(ctx, metadata.Pairs(trailerMetadataKey, string(b))); err != nil { - logger.Warningf("failed to set trailer metadata: %v", err) - } -} - -// wrappedStream wraps the grpc.ServerStream received by the streaming -// interceptor. Overrides only the Context() method to return a context which -// contains a reference to the CallMetricRecorder corresponding to this stream. -type wrappedStream struct { - grpc.ServerStream - ctx context.Context -} - -func (w *wrappedStream) Context() context.Context { - return w.ctx -} - -// ErrLoadReportMissing indicates no ORCA load report was found in trailers. -var ErrLoadReportMissing = errors.New("orca load report missing in provided metadata") +var logger = grpclog.Component("orca-backend-metrics") // loadParser implements the Parser interface defined in `internal/balancerload` // package. This interface is used by the client stream to parse load reports // sent by the server in trailer metadata. The parsed loads are then sent to // balancers via balancer.DoneInfo. // -// The grpc package cannot directly call orca.ToLoadReport() as that would cause -// an import cycle. Hence this roundabout method is used. +// The grpc package cannot directly call toLoadReport() as that would cause an +// import cycle. Hence this roundabout method is used. type loadParser struct{} func (loadParser) Parse(md metadata.MD) interface{} { diff --git a/orca/producer_test.go b/orca/producer_test.go index f15317995dec..be41424063fa 100644 --- a/orca/producer_test.go +++ b/orca/producer_test.go @@ -128,11 +128,11 @@ func (s) TestProducer(t *testing.T) { // Register the OpenRCAService with a very short metrics reporting interval. const shortReportingInterval = 50 * time.Millisecond - opts := orca.ServiceOptions{MinReportingInterval: shortReportingInterval} + smr := orca.NewServerMetricsRecorder() + opts := orca.ServiceOptions{MinReportingInterval: shortReportingInterval, ServerMetricsProvider: smr} internal.AllowAnyMinReportingInterval.(func(*orca.ServiceOptions))(&opts) s := grpc.NewServer() - orcaSrv, err := orca.Register(s, opts) - if err != nil { + if err := orca.Register(s, opts); err != nil { t.Fatalf("orca.Register failed: %v", err) } go s.Serve(lis) @@ -157,9 +157,9 @@ func (s) TestProducer(t *testing.T) { defer oobLis.Stop() // Set a few metrics and wait for them on the client side. - orcaSrv.SetCPUUtilization(10) - orcaSrv.SetMemoryUtilization(100) - orcaSrv.SetUtilization("bob", 555) + smr.SetCPUUtilization(10) + smr.SetMemoryUtilization(100) + smr.SetNamedUtilization("bob", 555) loadReportWant := &v3orcapb.OrcaLoadReport{ CpuUtilization: 10, MemUtilization: 100, @@ -181,9 +181,9 @@ testReport: } // Change and add metrics and wait for them on the client side. - orcaSrv.SetCPUUtilization(50) - orcaSrv.SetMemoryUtilization(200) - orcaSrv.SetUtilization("mary", 321) + smr.SetCPUUtilization(50) + smr.SetMemoryUtilization(200) + smr.SetNamedUtilization("mary", 321) loadReportWant = &v3orcapb.OrcaLoadReport{ CpuUtilization: 50, MemUtilization: 200, diff --git a/orca/server_metrics.go b/orca/server_metrics.go new file mode 100644 index 000000000000..6b63d3d252bf --- /dev/null +++ b/orca/server_metrics.go @@ -0,0 +1,270 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package orca + +import ( + "sync" + + v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" +) + +// ServerMetrics is the data returned from a server to a client to describe the +// current state of the server and/or the cost of a request when used per-call. +type ServerMetrics struct { + CPUUtilization float64 // CPU utilization: [0, 1.0]; unset=-1 + MemUtilization float64 // Memory utilization: [0, 1.0]; unset=-1 + QPS float64 // queries per second: [0, inf); unset=-1 + EPS float64 // errors per second: [0, inf); unset=-1 + + // The following maps must never be nil. + + Utilization map[string]float64 // Custom fields: [0, 1.0] + RequestCost map[string]float64 // Custom fields: [0, inf); not sent OOB + NamedMetrics map[string]float64 // Custom fields: [0, inf); not sent OOB +} + +// toLoadReportProto dumps sm as an OrcaLoadReport proto. +func (sm *ServerMetrics) toLoadReportProto() *v3orcapb.OrcaLoadReport { + ret := &v3orcapb.OrcaLoadReport{ + Utilization: sm.Utilization, + RequestCost: sm.RequestCost, + NamedMetrics: sm.NamedMetrics, + } + if sm.CPUUtilization != -1 { + ret.CpuUtilization = sm.CPUUtilization + } + if sm.MemUtilization != -1 { + ret.MemUtilization = sm.MemUtilization + } + if sm.QPS != -1 { + ret.RpsFractional = sm.QPS + } + if sm.EPS != -1 { + ret.Eps = sm.EPS + } + return ret +} + +// merge merges o into sm, overwriting any values present in both. +func (sm *ServerMetrics) merge(o *ServerMetrics) { + if o.CPUUtilization != -1 { + sm.CPUUtilization = o.CPUUtilization + } + if o.MemUtilization != -1 { + sm.MemUtilization = o.MemUtilization + } + if o.QPS != -1 { + sm.QPS = o.QPS + } + if o.EPS != -1 { + sm.EPS = o.EPS + } + mergeMap(sm.Utilization, o.Utilization) + mergeMap(sm.RequestCost, o.RequestCost) + mergeMap(sm.NamedMetrics, o.NamedMetrics) +} + +func mergeMap(a, b map[string]float64) { + for k, v := range b { + a[k] = v + } +} + +// ServerMetricsRecorder allows for recording and providing out of band server +// metrics. +type ServerMetricsRecorder interface { + ServerMetricsProvider + + // SetCPUUtilization sets the relevant server metric. + SetCPUUtilization(float64) + // DeleteCPUUtilization deletes the relevant server metric to prevent it + // from being sent. + DeleteCPUUtilization() + + // SetMemoryUtilization sets the relevant server metric. + SetMemoryUtilization(float64) + // DeleteMemoryUtilization deletes the relevant server metric to prevent it + // from being sent. + DeleteMemoryUtilization() + + // SetQPS sets the relevant server metric. + SetQPS(float64) + // DeleteQPS deletes the relevant server metric to prevent it from being + // sent. + DeleteQPS() + + // SetEPS sets the relevant server metric. + SetEPS(float64) + // DeleteEPS deletes the relevant server metric to prevent it from being + // sent. + DeleteEPS() + + // SetNamedUtilization sets the relevant server metric. + SetNamedUtilization(name string, val float64) + // DeleteNamedUtilization deletes the relevant server metric to prevent it + // from being sent. + DeleteNamedUtilization(name string) +} + +type serverMetricsRecorder struct { + mu sync.Mutex // protects state + state *ServerMetrics // the current metrics +} + +// NewServerMetricsRecorder returns an in-memory store for ServerMetrics and +// allows for safe setting and retrieving of ServerMetrics. Also implements +// ServerMetricsProvider for use with NewService. +func NewServerMetricsRecorder() ServerMetricsRecorder { + return newServerMetricsRecorder() +} + +func newServerMetricsRecorder() *serverMetricsRecorder { + return &serverMetricsRecorder{ + state: &ServerMetrics{ + CPUUtilization: -1, + MemUtilization: -1, + QPS: -1, + EPS: -1, + Utilization: make(map[string]float64), + RequestCost: make(map[string]float64), + NamedMetrics: make(map[string]float64), + }, + } +} + +// ServerMetrics returns a copy of the current ServerMetrics. +func (s *serverMetricsRecorder) ServerMetrics() *ServerMetrics { + s.mu.Lock() + defer s.mu.Unlock() + return &ServerMetrics{ + CPUUtilization: s.state.CPUUtilization, + MemUtilization: s.state.MemUtilization, + QPS: s.state.QPS, + EPS: s.state.EPS, + Utilization: copyMap(s.state.Utilization), + RequestCost: copyMap(s.state.RequestCost), + NamedMetrics: copyMap(s.state.NamedMetrics), + } +} + +func copyMap(m map[string]float64) map[string]float64 { + ret := make(map[string]float64, len(m)) + for k, v := range m { + ret[k] = v + } + return ret +} + +// SetCPUUtilization records a measurement for the CPU utilization metric. +func (s *serverMetricsRecorder) SetCPUUtilization(val float64) { + s.mu.Lock() + defer s.mu.Unlock() + s.state.CPUUtilization = val +} + +// DeleteCPUUtilization deletes the relevant server metric to prevent it from +// being sent. +func (s *serverMetricsRecorder) DeleteCPUUtilization() { + s.SetCPUUtilization(-1) +} + +// SetMemoryUtilization records a measurement for the memory utilization metric. +func (s *serverMetricsRecorder) SetMemoryUtilization(val float64) { + s.mu.Lock() + defer s.mu.Unlock() + s.state.MemUtilization = val +} + +// DeleteMemoryUtilization deletes the relevant server metric to prevent it +// from being sent. +func (s *serverMetricsRecorder) DeleteMemoryUtilization() { + s.SetMemoryUtilization(-1) +} + +// SetQPS records a measurement for the QPS metric. +func (s *serverMetricsRecorder) SetQPS(val float64) { + s.mu.Lock() + defer s.mu.Unlock() + s.state.QPS = val +} + +// DeleteQPS deletes the relevant server metric to prevent it from being sent. +func (s *serverMetricsRecorder) DeleteQPS() { + s.SetQPS(-1) +} + +// SetEPS records a measurement for the EPS metric. +func (s *serverMetricsRecorder) SetEPS(val float64) { + s.mu.Lock() + defer s.mu.Unlock() + s.state.EPS = val +} + +// DeleteEPS deletes the relevant server metric to prevent it from being sent. +func (s *serverMetricsRecorder) DeleteEPS() { + s.SetEPS(-1) +} + +// SetNamedUtilization records a measurement for a utilization metric uniquely +// identifiable by name. +func (s *serverMetricsRecorder) SetNamedUtilization(name string, val float64) { + s.mu.Lock() + defer s.mu.Unlock() + s.state.Utilization[name] = val +} + +// DeleteNamedUtilization deletes any previously recorded measurement for a +// utilization metric uniquely identifiable by name. +func (s *serverMetricsRecorder) DeleteNamedUtilization(name string) { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.state.Utilization, name) +} + +// SetRequestCost records a measurement for a utilization metric uniquely +// identifiable by name. +func (s *serverMetricsRecorder) SetRequestCost(name string, val float64) { + s.mu.Lock() + defer s.mu.Unlock() + s.state.RequestCost[name] = val +} + +// DeleteRequestCost deletes any previously recorded measurement for a +// utilization metric uniquely identifiable by name. +func (s *serverMetricsRecorder) DeleteRequestCost(name string) { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.state.RequestCost, name) +} + +// SetNamedMetric records a measurement for a utilization metric uniquely +// identifiable by name. +func (s *serverMetricsRecorder) SetNamedMetric(name string, val float64) { + s.mu.Lock() + defer s.mu.Unlock() + s.state.NamedMetrics[name] = val +} + +// DeleteNamedMetric deletes any previously recorded measurement for a +// utilization metric uniquely identifiable by name. +func (s *serverMetricsRecorder) DeleteNamedMetric(name string) { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.state.NamedMetrics, name) +} diff --git a/orca/service.go b/orca/service.go index ae011fd9a9d2..7461a6b05a1a 100644 --- a/orca/service.go +++ b/orca/service.go @@ -19,7 +19,7 @@ package orca import ( - "sync" + "fmt" "time" "google.golang.org/grpc" @@ -28,7 +28,6 @@ import ( ointernal "google.golang.org/grpc/orca/internal" "google.golang.org/grpc/status" - v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" v3orcaservicegrpc "github.com/cncf/xds/go/xds/service/orca/v3" v3orcaservicepb "github.com/cncf/xds/go/xds/service/orca/v3" ) @@ -60,15 +59,16 @@ type Service struct { // Minimum reporting interval, as configured by the user, or the default. minReportingInterval time.Duration - // mu guards the custom metrics injected by the server application. - mu sync.RWMutex - cpu float64 - memory float64 - utilization map[string]float64 + smProvider ServerMetricsProvider } // ServiceOptions contains options to configure the ORCA service implementation. type ServiceOptions struct { + // ServerMetricsProvider is the provider to be used by the service for + // reporting OOB server metrics to clients. Typically obtained via + // NewServerMetricsRecorder. This field is required. + ServerMetricsProvider ServerMetricsProvider + // MinReportingInterval sets the lower bound for how often out-of-band // metrics are reported on the streaming RPC initiated by the client. If // unspecified, negative or less than the default value of 30s, the default @@ -81,11 +81,22 @@ type ServiceOptions struct { allowAnyMinReportingInterval bool } +// A ServerMetricsProvider provides ServerMetrics upon request. +type ServerMetricsProvider interface { + // ServerMetrics returns the current set of server metrics. It should + // return a read-only, immutable copy of the data that is active at the + // time of the call. + ServerMetrics() *ServerMetrics +} + // NewService creates a new ORCA service implementation configured using the // provided options. func NewService(opts ServiceOptions) (*Service, error) { // The default minimum supported reporting interval value can be overridden // for testing purposes through the orca internal package. + if opts.ServerMetricsProvider == nil { + return nil, fmt.Errorf("ServerMetricsProvider not specified") + } if !opts.allowAnyMinReportingInterval { if opts.MinReportingInterval < 0 || opts.MinReportingInterval < minReportingInterval { opts.MinReportingInterval = minReportingInterval @@ -93,20 +104,22 @@ func NewService(opts ServiceOptions) (*Service, error) { } service := &Service{ minReportingInterval: opts.MinReportingInterval, - utilization: make(map[string]float64), + smProvider: opts.ServerMetricsProvider, } return service, nil } // Register creates a new ORCA service implementation configured using the -// provided options and registers the same on the provided service registrar. -func Register(s *grpc.Server, opts ServiceOptions) (*Service, error) { +// provided options and registers the same on the provided grpc Server. +func Register(s *grpc.Server, opts ServiceOptions) error { + // TODO(https://github.com/cncf/xds/issues/41): replace *grpc.Server with + // grpc.ServiceRegistrar when possible. service, err := NewService(opts) if err != nil { - return nil, err + return err } v3orcaservicegrpc.RegisterOpenRcaServiceServer(s, service) - return service, nil + return nil } // determineReportingInterval determines the reporting interval for out-of-band @@ -127,7 +140,7 @@ func (s *Service) determineReportingInterval(req *v3orcaservicepb.OrcaLoadReport } func (s *Service) sendMetricsResponse(stream v3orcaservicegrpc.OpenRcaService_StreamCoreMetricsServer) error { - return stream.Send(s.toLoadReportProto()) + return stream.Send(s.smProvider.ServerMetrics().toLoadReportProto()) } // StreamCoreMetrics streams custom backend metrics injected by the server @@ -148,49 +161,3 @@ func (s *Service) StreamCoreMetrics(req *v3orcaservicepb.OrcaLoadReportRequest, } } } - -// SetCPUUtilization records a measurement for the CPU utilization metric. -func (s *Service) SetCPUUtilization(val float64) { - s.mu.Lock() - s.cpu = val - s.mu.Unlock() -} - -// SetMemoryUtilization records a measurement for the memory utilization metric. -func (s *Service) SetMemoryUtilization(val float64) { - s.mu.Lock() - s.memory = val - s.mu.Unlock() -} - -// SetUtilization records a measurement for a utilization metric uniquely -// identifiable by name. -func (s *Service) SetUtilization(name string, val float64) { - s.mu.Lock() - s.utilization[name] = val - s.mu.Unlock() -} - -// DeleteUtilization deletes any previously recorded measurement for a -// utilization metric uniquely identifiable by name. -func (s *Service) DeleteUtilization(name string) { - s.mu.Lock() - delete(s.utilization, name) - s.mu.Unlock() -} - -// toLoadReportProto dumps the recorded measurements as an OrcaLoadReport proto. -func (s *Service) toLoadReportProto() *v3orcapb.OrcaLoadReport { - s.mu.RLock() - defer s.mu.RUnlock() - - util := make(map[string]float64, len(s.utilization)) - for k, v := range s.utilization { - util[k] = v - } - return &v3orcapb.OrcaLoadReport{ - CpuUtilization: s.cpu, - MemUtilization: s.memory, - Utilization: util, - } -} diff --git a/orca/service_test.go b/orca/service_test.go index 715d53241c71..e5cf59fccb4e 100644 --- a/orca/service_test.go +++ b/orca/service_test.go @@ -52,7 +52,7 @@ type testServiceImpl struct { requests int64 testgrpc.TestServiceServer - orcaSrv *orca.Service + smr orca.ServerMetricsRecorder } func (t *testServiceImpl) UnaryCall(context.Context, *testpb.SimpleRequest) (*testpb.SimpleResponse, error) { @@ -60,26 +60,26 @@ func (t *testServiceImpl) UnaryCall(context.Context, *testpb.SimpleRequest) (*te t.requests++ t.mu.Unlock() - t.orcaSrv.SetUtilization(requestsMetricKey, float64(t.requests)) - t.orcaSrv.SetCPUUtilization(50.0) - t.orcaSrv.SetMemoryUtilization(99.0) + t.smr.SetNamedUtilization(requestsMetricKey, float64(t.requests)) + t.smr.SetCPUUtilization(50.0) + t.smr.SetMemoryUtilization(99.0) return &testpb.SimpleResponse{}, nil } func (t *testServiceImpl) EmptyCall(context.Context, *testpb.Empty) (*testpb.Empty, error) { - t.orcaSrv.DeleteUtilization(requestsMetricKey) - t.orcaSrv.SetCPUUtilization(0) - t.orcaSrv.SetMemoryUtilization(0) + t.smr.DeleteNamedUtilization(requestsMetricKey) + t.smr.SetCPUUtilization(0) + t.smr.SetMemoryUtilization(0) return &testpb.Empty{}, nil } -// Test_E2E_CustomBackendMetrics_OutOfBand tests the injection of out-of-band +// TestE2E_CustomBackendMetrics_OutOfBand tests the injection of out-of-band // custom backend metrics from the server application, and verifies that // expected load reports are received at the client. // // TODO: Change this test to use the client API, when ready, to read the // out-of-band metrics pushed by the server. -func (s) Test_E2E_CustomBackendMetrics_OutOfBand(t *testing.T) { +func (s) TestE2E_CustomBackendMetrics_OutOfBand(t *testing.T) { lis, err := testutils.LocalTCPListener() if err != nil { t.Fatal(err) @@ -87,18 +87,18 @@ func (s) Test_E2E_CustomBackendMetrics_OutOfBand(t *testing.T) { // Override the min reporting interval in the internal package. const shortReportingInterval = 100 * time.Millisecond - opts := orca.ServiceOptions{MinReportingInterval: shortReportingInterval} + smr := orca.NewServerMetricsRecorder() + opts := orca.ServiceOptions{MinReportingInterval: shortReportingInterval, ServerMetricsProvider: smr} internal.AllowAnyMinReportingInterval.(func(*orca.ServiceOptions))(&opts) // Register the OpenRCAService with a very short metrics reporting interval. s := grpc.NewServer() - orcaSrv, err := orca.Register(s, opts) - if err != nil { + if err := orca.Register(s, opts); err != nil { t.Fatalf("orca.EnableOutOfBandMetricsReportingForTesting() failed: %v", err) } // Register the test service implementation on the same grpc server, and start serving. - testgrpc.RegisterTestServiceServer(s, &testServiceImpl{orcaSrv: orcaSrv}) + testgrpc.RegisterTestServiceServer(s, &testServiceImpl{smr: smr}) go s.Serve(lis) defer s.Stop() t.Logf("Started gRPC server at %s...", lis.Addr().String()) From 56b33d5cd0bdee3c53f1bbd3e00b3613abe59323 Mon Sep 17 00:00:00 2001 From: Tobo Atchou Date: Wed, 3 May 2023 18:58:06 +0200 Subject: [PATCH 20/60] server/transport: send appropriate debug_data in GOAWAY frames (#6220) --- internal/transport/handler_server.go | 2 +- internal/transport/handler_server_test.go | 9 +++++++++ internal/transport/http2_server.go | 10 +++++----- internal/transport/keepalive_test.go | 13 +++++++++++-- internal/transport/transport.go | 2 +- server.go | 6 +++--- 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/internal/transport/handler_server.go b/internal/transport/handler_server.go index fbee581b8660..98f80e3fa00a 100644 --- a/internal/transport/handler_server.go +++ b/internal/transport/handler_server.go @@ -453,7 +453,7 @@ func (ht *serverHandlerTransport) IncrMsgSent() {} func (ht *serverHandlerTransport) IncrMsgRecv() {} -func (ht *serverHandlerTransport) Drain() { +func (ht *serverHandlerTransport) Drain(debugData string) { panic("Drain() is not implemented") } diff --git a/internal/transport/handler_server_test.go b/internal/transport/handler_server_test.go index a6eb20285787..99ca211b323c 100644 --- a/internal/transport/handler_server_test.go +++ b/internal/transport/handler_server_test.go @@ -502,6 +502,15 @@ func (s) TestHandlerTransport_HandleStreams_ErrDetails(t *testing.T) { checkHeaderAndTrailer(t, hst.rw, wantHeader, wantTrailer) } +// TestHandlerTransport_Drain verifies that Drain() is not implemented +// by `serverHandlerTransport`. +func (s) TestHandlerTransport_Drain(t *testing.T) { + defer func() { recover() }() + st := newHandleStreamTest(t) + st.ht.Drain("whatever") + t.Errorf("serverHandlerTransport.Drain() should have panicked") +} + // checkHeaderAndTrailer checks that the resulting header and trailer matches the expectation. func checkHeaderAndTrailer(t *testing.T, rw testHandlerResponseWriter, wantHeader, wantTrailer http.Header) { // For trailer-only responses, the trailer values might be reported as part of the Header. They will however diff --git a/internal/transport/http2_server.go b/internal/transport/http2_server.go index 4b406b8cb011..79e86ba08836 100644 --- a/internal/transport/http2_server.go +++ b/internal/transport/http2_server.go @@ -1166,12 +1166,12 @@ func (t *http2Server) keepalive() { if val <= 0 { // The connection has been idle for a duration of keepalive.MaxConnectionIdle or more. // Gracefully close the connection. - t.Drain() + t.Drain("max_idle") return } idleTimer.Reset(val) case <-ageTimer.C: - t.Drain() + t.Drain("max_age") ageTimer.Reset(t.kp.MaxConnectionAgeGrace) select { case <-ageTimer.C: @@ -1318,14 +1318,14 @@ func (t *http2Server) RemoteAddr() net.Addr { return t.remoteAddr } -func (t *http2Server) Drain() { +func (t *http2Server) Drain(debugData string) { t.mu.Lock() defer t.mu.Unlock() if t.drainEvent != nil { return } t.drainEvent = grpcsync.NewEvent() - t.controlBuf.put(&goAway{code: http2.ErrCodeNo, debugData: []byte{}, headsUp: true}) + t.controlBuf.put(&goAway{code: http2.ErrCodeNo, debugData: []byte(debugData), headsUp: true}) } var goAwayPing = &ping{data: [8]byte{1, 6, 1, 8, 0, 3, 3, 9}} @@ -1367,7 +1367,7 @@ func (t *http2Server) outgoingGoAwayHandler(g *goAway) (bool, error) { // originated before the GoAway reaches the client. // After getting the ack or timer expiration send out another GoAway this // time with an ID of the max stream server intends to process. - if err := t.framer.fr.WriteGoAway(math.MaxUint32, http2.ErrCodeNo, []byte{}); err != nil { + if err := t.framer.fr.WriteGoAway(math.MaxUint32, http2.ErrCodeNo, g.debugData); err != nil { return false, err } if err := t.framer.fr.WritePing(false, goAwayPing.data); err != nil { diff --git a/internal/transport/keepalive_test.go b/internal/transport/keepalive_test.go index a020fecdc65d..b9e6d74cb739 100644 --- a/internal/transport/keepalive_test.go +++ b/internal/transport/keepalive_test.go @@ -27,6 +27,7 @@ import ( "fmt" "io" "net" + "strings" "testing" "time" @@ -69,9 +70,13 @@ func (s) TestMaxConnectionIdle(t *testing.T) { case <-ctx.Done(): t.Fatalf("context expired before receiving GoAway from the server.") case <-client.GoAway(): - if reason, _ := client.GetGoAwayReason(); reason != GoAwayNoReason { + reason, debugMsg := client.GetGoAwayReason() + if reason != GoAwayNoReason { t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayNoReason) } + if !strings.Contains(debugMsg, "max_idle") { + t.Fatalf("GoAwayDebugMessage is %v, want %v", debugMsg, "max_idle") + } } } @@ -135,9 +140,13 @@ func (s) TestMaxConnectionAge(t *testing.T) { // for more than MaxConnectionIdle time. select { case <-client.GoAway(): - if reason, _ := client.GetGoAwayReason(); reason != GoAwayNoReason { + reason, debugMsg := client.GetGoAwayReason() + if reason != GoAwayNoReason { t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayNoReason) } + if !strings.Contains(debugMsg, "max_age") { + t.Fatalf("GoAwayDebugMessage is %v, want %v", debugMsg, "max_age") + } case <-ctx.Done(): t.Fatalf("timed out before getting a GoAway from the server.") } diff --git a/internal/transport/transport.go b/internal/transport/transport.go index 1b7d7fabc512..aa1c896595d9 100644 --- a/internal/transport/transport.go +++ b/internal/transport/transport.go @@ -726,7 +726,7 @@ type ServerTransport interface { RemoteAddr() net.Addr // Drain notifies the client this ServerTransport stops accepting new RPCs. - Drain() + Drain(debugData string) // IncrMsgSent increments the number of message sent through this transport. IncrMsgSent() diff --git a/server.go b/server.go index 76d152a69c8f..81969e7c15a9 100644 --- a/server.go +++ b/server.go @@ -895,7 +895,7 @@ func (s *Server) drainServerTransports(addr string) { s.mu.Lock() conns := s.conns[addr] for st := range conns { - st.Drain() + st.Drain("") } s.mu.Unlock() } @@ -1046,7 +1046,7 @@ func (s *Server) addConn(addr string, st transport.ServerTransport) bool { if s.drain { // Transport added after we drained our existing conns: drain it // immediately. - st.Drain() + st.Drain("") } if s.conns[addr] == nil { @@ -1856,7 +1856,7 @@ func (s *Server) GracefulStop() { if !s.drain { for _, conns := range s.conns { for st := range conns { - st.Drain() + st.Drain("graceful_stop") } } s.drain = true From 47b3c5545c4d9bef0d42eb1ced7afb313dd7aa92 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 3 May 2023 13:47:37 -0700 Subject: [PATCH 21/60] orca: fix race at producer startup (#6245) --- orca/producer.go | 77 +++++++++++++++++++++++++++---------------- orca/producer_test.go | 12 +++---- orca/service_test.go | 11 ++++--- 3 files changed, 59 insertions(+), 41 deletions(-) diff --git a/orca/producer.go b/orca/producer.go index 956d5ddfb52d..227baeb01ddf 100644 --- a/orca/producer.go +++ b/orca/producer.go @@ -38,19 +38,13 @@ type producerBuilder struct{} // Build constructs and returns a producer and its cleanup function func (*producerBuilder) Build(cci interface{}) (balancer.Producer, func()) { - ctx, cancel := context.WithCancel(context.Background()) p := &producer{ client: v3orcaservicegrpc.NewOpenRcaServiceClient(cci.(grpc.ClientConnInterface)), - closed: grpcsync.NewEvent(), intervals: make(map[time.Duration]int), listeners: make(map[OOBListener]struct{}), backoff: internal.DefaultBackoffFunc, } - go p.run(ctx) - return p, func() { - cancel() - <-p.closed.Done() // Block until stream stopped. - } + return p, func() {} } var producerBuilderSingleton = &producerBuilder{} @@ -77,6 +71,7 @@ type OOBListenerOptions struct { func RegisterOOBListener(sc balancer.SubConn, l OOBListener, opts OOBListenerOptions) (stop func()) { pr, close := sc.GetOrBuildProducer(producerBuilderSingleton) p := pr.(*producer) + p.registerListener(l, opts.ReportInterval) // TODO: When we can register for SubConn state updates, automatically call @@ -93,16 +88,18 @@ func RegisterOOBListener(sc balancer.SubConn, l OOBListener, opts OOBListenerOpt type producer struct { client v3orcaservicegrpc.OpenRcaServiceClient - closed *grpcsync.Event // fired when closure completes // backoff is called between stream attempts to determine how long to delay // to avoid overloading a server experiencing problems. The attempt count // is incremented when stream errors occur and is reset when the stream // reports a result. backoff func(int) time.Duration - mu sync.Mutex - intervals map[time.Duration]int // map from interval time to count of listeners requesting that time - listeners map[OOBListener]struct{} // set of registered listeners + mu sync.Mutex + intervals map[time.Duration]int // map from interval time to count of listeners requesting that time + listeners map[OOBListener]struct{} // set of registered listeners + minInterval time.Duration + stop func() // stops the current run goroutine + stopped chan struct{} // closed when the run goroutine exits } // registerListener adds the listener and its requested report interval to the @@ -110,8 +107,13 @@ type producer struct { func (p *producer) registerListener(l OOBListener, interval time.Duration) { p.mu.Lock() defer p.mu.Unlock() + p.listeners[l] = struct{}{} p.intervals[interval]++ + if len(p.listeners) == 1 || interval < p.minInterval { + p.minInterval = interval + p.updateRunLocked() + } } // registerListener removes the listener and its requested report interval to @@ -119,31 +121,53 @@ func (p *producer) registerListener(l OOBListener, interval time.Duration) { func (p *producer) unregisterListener(l OOBListener, interval time.Duration) { p.mu.Lock() defer p.mu.Unlock() + delete(p.listeners, l) p.intervals[interval]-- if p.intervals[interval] == 0 { delete(p.intervals, interval) + + if p.minInterval == interval { + p.recomputeMinInterval() + p.updateRunLocked() + } } } -// minInterval returns the smallest key in p.intervals. -func (p *producer) minInterval() time.Duration { - p.mu.Lock() - defer p.mu.Unlock() - var min time.Duration +// recomputeMinInterval sets p.minInterval to the minimum key's value in +// p.intervals. +func (p *producer) recomputeMinInterval() { first := true - for t := range p.intervals { - if t < min || first { - min = t + for interval := range p.intervals { + if first || interval < p.minInterval { + p.minInterval = interval first = false } } - return min +} + +// updateRunLocked is called whenever the run goroutine needs to be started / +// stopped / restarted due to: 1. the initial listener being registered, 2. the +// final listener being unregistered, or 3. the minimum registered interval +// changing. +func (p *producer) updateRunLocked() { + if p.stop != nil { + p.stop() + <-p.stopped + p.stop = nil + } + if len(p.listeners) > 0 { + var ctx context.Context + ctx, p.stop = context.WithCancel(context.Background()) + p.stopped = make(chan struct{}) + go p.run(ctx, p.minInterval) + } } // run manages the ORCA OOB stream on the subchannel. -func (p *producer) run(ctx context.Context) { - defer p.closed.Fire() +func (p *producer) run(ctx context.Context, interval time.Duration) { + defer close(p.stopped) + backoffAttempt := 0 backoffTimer := time.NewTimer(0) for ctx.Err() == nil { @@ -153,7 +177,7 @@ func (p *producer) run(ctx context.Context) { return } - resetBackoff, err := p.runStream(ctx) + resetBackoff, err := p.runStream(ctx, interval) if resetBackoff { backoffTimer.Reset(0) @@ -190,8 +214,7 @@ func (p *producer) run(ctx context.Context) { // runStream runs a single stream on the subchannel and returns the resulting // error, if any, and whether or not the run loop should reset the backoff // timer to zero or advance it. -func (p *producer) runStream(ctx context.Context) (resetBackoff bool, err error) { - interval := p.minInterval() +func (p *producer) runStream(ctx context.Context, interval time.Duration) (resetBackoff bool, err error) { streamCtx, cancel := context.WithCancel(ctx) defer cancel() stream, err := p.client.StreamCoreMetrics(streamCtx, &v3orcaservicepb.OrcaLoadReportRequest{ @@ -212,9 +235,5 @@ func (p *producer) runStream(ctx context.Context) (resetBackoff bool, err error) l.OnLoadReport(report) } p.mu.Unlock() - if interval != p.minInterval() { - // restart stream to use new interval - return true, nil - } } } diff --git a/orca/producer_test.go b/orca/producer_test.go index be41424063fa..ce376e7405e2 100644 --- a/orca/producer_test.go +++ b/orca/producer_test.go @@ -519,12 +519,11 @@ func (s) TestProducerMultipleListeners(t *testing.T) { checkReports(2, 1, 0) // Register listener 3 with a more frequent interval; stream is recreated - // with this interval after the next report is received. The first report - // will go to all three listeners. + // with this interval. The next report will go to all three listeners. oobLis3.cleanup = orca.RegisterOOBListener(li.sc, oobLis3, lisOpts3) + awaitRequest(reportInterval3) fake.respCh <- loadReportWant checkReports(3, 2, 1) - awaitRequest(reportInterval3) // Another report without a change in listeners should go to all three listeners. fake.respCh <- loadReportWant @@ -536,13 +535,12 @@ func (s) TestProducerMultipleListeners(t *testing.T) { fake.respCh <- loadReportWant checkReports(5, 3, 3) - // Stop listener 3. This makes the interval longer, with stream recreation - // delayed until the next report is received. Reports should only go to - // listener 1 now. + // Stop listener 3. This makes the interval longer. Reports should only + // go to listener 1 now. oobLis3.Stop() + awaitRequest(reportInterval1) fake.respCh <- loadReportWant checkReports(6, 3, 3) - awaitRequest(reportInterval1) // Another report without a change in listeners should go to the first listener. fake.respCh <- loadReportWant checkReports(7, 3, 3) diff --git a/orca/service_test.go b/orca/service_test.go index e5cf59fccb4e..73ad28430264 100644 --- a/orca/service_test.go +++ b/orca/service_test.go @@ -86,7 +86,7 @@ func (s) TestE2E_CustomBackendMetrics_OutOfBand(t *testing.T) { } // Override the min reporting interval in the internal package. - const shortReportingInterval = 100 * time.Millisecond + const shortReportingInterval = 10 * time.Millisecond smr := orca.NewServerMetricsRecorder() opts := orca.ServiceOptions{MinReportingInterval: shortReportingInterval, ServerMetricsProvider: smr} internal.AllowAnyMinReportingInterval.(func(*orca.ServiceOptions))(&opts) @@ -110,20 +110,21 @@ func (s) TestE2E_CustomBackendMetrics_OutOfBand(t *testing.T) { } defer cc.Close() - // Spawn a goroutine which sends 100 unary RPCs to the test server. This + // Spawn a goroutine which sends 20 unary RPCs to the test server. This // will trigger the injection of custom backend metrics from the // testServiceImpl. + const numRequests = 20 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() testStub := testgrpc.NewTestServiceClient(cc) errCh := make(chan error, 1) go func() { - for i := 0; i < 100; i++ { + for i := 0; i < numRequests; i++ { if _, err := testStub.UnaryCall(ctx, &testpb.SimpleRequest{}); err != nil { errCh <- fmt.Errorf("UnaryCall failed: %v", err) return } - time.Sleep(10 * time.Millisecond) + time.Sleep(time.Millisecond) } errCh <- nil }() @@ -151,7 +152,7 @@ func (s) TestE2E_CustomBackendMetrics_OutOfBand(t *testing.T) { wantProto := &v3orcapb.OrcaLoadReport{ CpuUtilization: 50.0, MemUtilization: 99.0, - Utilization: map[string]float64{requestsMetricKey: 100.0}, + Utilization: map[string]float64{requestsMetricKey: numRequests}, } gotProto, err := stream.Recv() if err != nil { From ccad7b7570fd233e4558b034034672c0de89c87c Mon Sep 17 00:00:00 2001 From: Easwar Swaminathan Date: Thu, 4 May 2023 16:05:13 -0700 Subject: [PATCH 22/60] grpc: use CallbackSerializer in resolver_wrapper (#6234) --- internal/grpcsync/callback_serializer.go | 12 +- internal/grpcsync/callback_serializer_test.go | 13 +- resolver_conn_wrapper.go | 134 +++++++++--------- test/service_config_deprecated_test.go | 33 +++-- 4 files changed, 100 insertions(+), 92 deletions(-) diff --git a/internal/grpcsync/callback_serializer.go b/internal/grpcsync/callback_serializer.go index 6df798c00eb1..d91f92463542 100644 --- a/internal/grpcsync/callback_serializer.go +++ b/internal/grpcsync/callback_serializer.go @@ -31,6 +31,12 @@ import ( // // This type is safe for concurrent access. type CallbackSerializer struct { + // Done is closed once the serializer is shut down completely, i.e a + // scheduled callback, if any, that was running when the context passed to + // NewCallbackSerializer is cancelled, has completed and the serializer has + // deallocated all its resources. + Done chan struct{} + callbacks *buffer.Unbounded } @@ -39,7 +45,10 @@ type CallbackSerializer struct { // provided context to shutdown the CallbackSerializer. It is guaranteed that no // callbacks will be executed once this context is canceled. func NewCallbackSerializer(ctx context.Context) *CallbackSerializer { - t := &CallbackSerializer{callbacks: buffer.NewUnbounded()} + t := &CallbackSerializer{ + Done: make(chan struct{}), + callbacks: buffer.NewUnbounded(), + } go t.run(ctx) return t } @@ -53,6 +62,7 @@ func (t *CallbackSerializer) Schedule(f func(ctx context.Context)) { } func (t *CallbackSerializer) run(ctx context.Context) { + defer close(t.Done) for ctx.Err() == nil { select { case <-ctx.Done(): diff --git a/internal/grpcsync/callback_serializer_test.go b/internal/grpcsync/callback_serializer_test.go index 6cb1ee52d84a..8c465af66aea 100644 --- a/internal/grpcsync/callback_serializer_test.go +++ b/internal/grpcsync/callback_serializer_test.go @@ -144,19 +144,13 @@ func (s) TestCallbackSerializer_Schedule_Close(t *testing.T) { cs := NewCallbackSerializer(ctx) // Schedule a callback which blocks until the context passed to it is - // canceled. It also closes a couple of channels to signal that it started - // and finished respectively. + // canceled. It also closes a channel to signal that it has started. firstCallbackStartedCh := make(chan struct{}) - firstCallbackFinishCh := make(chan struct{}) cs.Schedule(func(ctx context.Context) { close(firstCallbackStartedCh) <-ctx.Done() - close(firstCallbackFinishCh) }) - // Wait for the first callback to start before scheduling the others. - <-firstCallbackStartedCh - // Schedule a bunch of callbacks. These should not be exeuted since the first // one started earlier is blocked. const numCallbacks = 10 @@ -174,11 +168,14 @@ func (s) TestCallbackSerializer_Schedule_Close(t *testing.T) { t.Fatal(err) } + // Wait for the first callback to start before closing the scheduler. + <-firstCallbackStartedCh + // Cancel the context which will unblock the first callback. None of the // other callbacks (which have not started executing at this point) should // be executed after this. cancel() - <-firstCallbackFinishCh + <-cs.Done // Ensure that the newer callbacks are not executed. select { diff --git a/resolver_conn_wrapper.go b/resolver_conn_wrapper.go index 854e90f69ae5..ce12b52ecdc0 100644 --- a/resolver_conn_wrapper.go +++ b/resolver_conn_wrapper.go @@ -19,8 +19,8 @@ package grpc import ( + "context" "strings" - "sync" "google.golang.org/grpc/balancer" "google.golang.org/grpc/internal/channelz" @@ -42,15 +42,17 @@ type ccResolverWrapper struct { // The following fields are initialized when the wrapper is created and are // read-only afterwards, and therefore can be accessed without a mutex. cc resolverStateUpdater - done *grpcsync.Event channelzID *channelz.Identifier ignoreServiceConfig bool - resolverMu sync.Mutex - resolver resolver.Resolver - - incomingMu sync.Mutex // Synchronizes all the incoming calls. - curState resolver.State + // Outgoing (gRPC --> resolver) and incoming (resolver --> gRPC) calls are + // guaranteed to execute in a mutually exclusive manner as they are + // scheduled on the CallbackSerializer. Fields accessed *only* in serializer + // callbacks, can therefore be accessed without a mutex. + serializer *grpcsync.CallbackSerializer + serializerCancel context.CancelFunc + resolver resolver.Resolver + curState resolver.State } // ccResolverWrapperOpts wraps the arguments to be passed when creating a new @@ -65,104 +67,100 @@ type ccResolverWrapperOpts struct { // newCCResolverWrapper uses the resolver.Builder to build a Resolver and // returns a ccResolverWrapper object which wraps the newly built resolver. func newCCResolverWrapper(cc resolverStateUpdater, opts ccResolverWrapperOpts) (*ccResolverWrapper, error) { + ctx, cancel := context.WithCancel(context.Background()) ccr := &ccResolverWrapper{ cc: cc, - done: grpcsync.NewEvent(), channelzID: opts.channelzID, ignoreServiceConfig: opts.bOpts.DisableServiceConfig, + serializer: grpcsync.NewCallbackSerializer(ctx), + serializerCancel: cancel, } - var err error - // We need to hold the lock here while we assign to the ccr.resolver field - // to guard against a data race caused by the following code path, - // rb.Build-->ccr.ReportError-->ccr.poll-->ccr.resolveNow, would end up - // accessing ccr.resolver which is being assigned here. - ccr.resolverMu.Lock() - defer ccr.resolverMu.Unlock() - ccr.resolver, err = opts.builder.Build(opts.target, ccr, opts.bOpts) + r, err := opts.builder.Build(opts.target, ccr, opts.bOpts) if err != nil { + cancel() return nil, err } + ccr.resolver = r return ccr, nil } func (ccr *ccResolverWrapper) resolveNow(o resolver.ResolveNowOptions) { - ccr.resolverMu.Lock() - if !ccr.done.HasFired() { + ccr.serializer.Schedule(func(_ context.Context) { ccr.resolver.ResolveNow(o) - } - ccr.resolverMu.Unlock() + }) } func (ccr *ccResolverWrapper) close() { - ccr.resolverMu.Lock() + // Close the serializer to ensure that no more calls from the resolver are + // handled, before closing the resolver. + ccr.serializerCancel() + <-ccr.serializer.Done ccr.resolver.Close() - ccr.done.Fire() - ccr.resolverMu.Unlock() } // UpdateState is called by resolver implementations to report new state to gRPC // which includes addresses and service config. func (ccr *ccResolverWrapper) UpdateState(s resolver.State) error { - ccr.incomingMu.Lock() - defer ccr.incomingMu.Unlock() - if ccr.done.HasFired() { + errCh := make(chan error, 1) + ccr.serializer.Schedule(func(_ context.Context) { + ccr.addChannelzTraceEvent(s) + ccr.curState = s + if err := ccr.cc.updateResolverState(ccr.curState, nil); err == balancer.ErrBadResolverState { + errCh <- balancer.ErrBadResolverState + return + } + errCh <- nil + }) + + // If the resolver wrapper is closed when waiting for this state update to + // be handled, the callback serializer will be closed as well, and we can + // rely on its Done channel to ensure that we don't block here forever. + select { + case err := <-errCh: + return err + case <-ccr.serializer.Done: return nil } - ccr.addChannelzTraceEventLocked(s) - ccr.curState = s - if err := ccr.cc.updateResolverState(ccr.curState, nil); err == balancer.ErrBadResolverState { - return balancer.ErrBadResolverState - } - return nil } // ReportError is called by resolver implementations to report errors // encountered during name resolution to gRPC. func (ccr *ccResolverWrapper) ReportError(err error) { - ccr.incomingMu.Lock() - defer ccr.incomingMu.Unlock() - if ccr.done.HasFired() { - return - } - channelz.Warningf(logger, ccr.channelzID, "ccResolverWrapper: reporting error to cc: %v", err) - ccr.cc.updateResolverState(resolver.State{}, err) + ccr.serializer.Schedule(func(_ context.Context) { + channelz.Warningf(logger, ccr.channelzID, "ccResolverWrapper: reporting error to cc: %v", err) + ccr.cc.updateResolverState(resolver.State{}, err) + }) } // NewAddress is called by the resolver implementation to send addresses to // gRPC. func (ccr *ccResolverWrapper) NewAddress(addrs []resolver.Address) { - ccr.incomingMu.Lock() - defer ccr.incomingMu.Unlock() - if ccr.done.HasFired() { - return - } - ccr.addChannelzTraceEventLocked(resolver.State{Addresses: addrs, ServiceConfig: ccr.curState.ServiceConfig}) - ccr.curState.Addresses = addrs - ccr.cc.updateResolverState(ccr.curState, nil) + ccr.serializer.Schedule(func(_ context.Context) { + ccr.addChannelzTraceEvent(resolver.State{Addresses: addrs, ServiceConfig: ccr.curState.ServiceConfig}) + ccr.curState.Addresses = addrs + ccr.cc.updateResolverState(ccr.curState, nil) + }) } // NewServiceConfig is called by the resolver implementation to send service // configs to gRPC. func (ccr *ccResolverWrapper) NewServiceConfig(sc string) { - ccr.incomingMu.Lock() - defer ccr.incomingMu.Unlock() - if ccr.done.HasFired() { - return - } - channelz.Infof(logger, ccr.channelzID, "ccResolverWrapper: got new service config: %s", sc) - if ccr.ignoreServiceConfig { - channelz.Info(logger, ccr.channelzID, "Service config lookups disabled; ignoring config") - return - } - scpr := parseServiceConfig(sc) - if scpr.Err != nil { - channelz.Warningf(logger, ccr.channelzID, "ccResolverWrapper: error parsing service config: %v", scpr.Err) - return - } - ccr.addChannelzTraceEventLocked(resolver.State{Addresses: ccr.curState.Addresses, ServiceConfig: scpr}) - ccr.curState.ServiceConfig = scpr - ccr.cc.updateResolverState(ccr.curState, nil) + ccr.serializer.Schedule(func(_ context.Context) { + channelz.Infof(logger, ccr.channelzID, "ccResolverWrapper: got new service config: %s", sc) + if ccr.ignoreServiceConfig { + channelz.Info(logger, ccr.channelzID, "Service config lookups disabled; ignoring config") + return + } + scpr := parseServiceConfig(sc) + if scpr.Err != nil { + channelz.Warningf(logger, ccr.channelzID, "ccResolverWrapper: error parsing service config: %v", scpr.Err) + return + } + ccr.addChannelzTraceEvent(resolver.State{Addresses: ccr.curState.Addresses, ServiceConfig: scpr}) + ccr.curState.ServiceConfig = scpr + ccr.cc.updateResolverState(ccr.curState, nil) + }) } // ParseServiceConfig is called by resolver implementations to parse a JSON @@ -171,11 +169,9 @@ func (ccr *ccResolverWrapper) ParseServiceConfig(scJSON string) *serviceconfig.P return parseServiceConfig(scJSON) } -// addChannelzTraceEventLocked adds a channelz trace event containing the new +// addChannelzTraceEvent adds a channelz trace event containing the new // state received from resolver implementations. -// -// Caller must hold cc.incomingMu. -func (ccr *ccResolverWrapper) addChannelzTraceEventLocked(s resolver.State) { +func (ccr *ccResolverWrapper) addChannelzTraceEvent(s resolver.State) { var updates []string var oldSC, newSC *ServiceConfig var oldOK, newOK bool diff --git a/test/service_config_deprecated_test.go b/test/service_config_deprecated_test.go index 035f11526f79..ecf43a5760fe 100644 --- a/test/service_config_deprecated_test.go +++ b/test/service_config_deprecated_test.go @@ -146,15 +146,18 @@ func testServiceConfigWaitForReadyTD(t *testing.T, e env) { ch <- sc // Wait for the new service config to take effect. - mc = cc.GetMethodConfig("/grpc.testing.TestService/EmptyCall") - for { - if !*mc.WaitForReady { - time.Sleep(100 * time.Millisecond) - mc = cc.GetMethodConfig("/grpc.testing.TestService/EmptyCall") - continue + ctx, cancel = context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { + mc = cc.GetMethodConfig("/grpc.testing.TestService/FullDuplexCall") + if *mc.WaitForReady { + break } - break } + if ctx.Err() != nil { + t.Fatalf("Timeout when waiting for service config to take effect") + } + // The following RPCs are expected to become non-fail-fast ones with 1ms deadline. if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded { t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, %s", err, codes.DeadlineExceeded) @@ -212,14 +215,16 @@ func testServiceConfigTimeoutTD(t *testing.T, e env) { ch <- sc // Wait for the new service config to take effect. - mc = cc.GetMethodConfig("/grpc.testing.TestService/FullDuplexCall") - for { - if *mc.Timeout != time.Nanosecond { - time.Sleep(100 * time.Millisecond) - mc = cc.GetMethodConfig("/grpc.testing.TestService/FullDuplexCall") - continue + ctx, cancel = context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { + mc = cc.GetMethodConfig("/grpc.testing.TestService/FullDuplexCall") + if *mc.Timeout == time.Nanosecond { + break } - break + } + if ctx.Err() != nil { + t.Fatalf("Timeout when waiting for service config to take effect") } ctx, cancel = context.WithTimeout(context.Background(), time.Hour) From 1f3fe1c8bc2cdc97521468580df01904376a6ea7 Mon Sep 17 00:00:00 2001 From: Mikhail Mazurskiy <126021+ash2k@users.noreply.github.com> Date: Sat, 6 May 2023 01:38:20 +1000 Subject: [PATCH 23/60] Update ClientStream.SendMsg doc (#6247) --- stream.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stream.go b/stream.go index d1226a4120f8..f79e31c147ee 100644 --- a/stream.go +++ b/stream.go @@ -123,6 +123,9 @@ type ClientStream interface { // calling RecvMsg on the same stream at the same time, but it is not safe // to call SendMsg on the same stream in different goroutines. It is also // not safe to call CloseSend concurrently with SendMsg. + // + // It is not safe to modify the message after calling SendMsg. Tracing + // libraries and stats handlers may use the message lazily. SendMsg(m interface{}) error // RecvMsg blocks until it receives a message into m or the stream is // done. It returns io.EOF when the stream completes successfully. On From 417cf846073b216abc00f5d15c291a3eba5fd00d Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Fri, 5 May 2023 11:08:42 -0700 Subject: [PATCH 24/60] test: deflake TestBalancerProducerHonorsContext (#6257) --- test/balancer_test.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/balancer_test.go b/test/balancer_test.go index 950d31d13ed5..8b88dc513b29 100644 --- a/test/balancer_test.go +++ b/test/balancer_test.go @@ -1012,10 +1012,11 @@ func (s) TestMetadataInPickResult(t *testing.T) { type producerTestBalancerBuilder struct { rpcErrChan chan error ctxChan chan context.Context + connect bool } func (bb *producerTestBalancerBuilder) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { - return &producerTestBalancer{cc: cc, rpcErrChan: bb.rpcErrChan, ctxChan: bb.ctxChan} + return &producerTestBalancer{cc: cc, rpcErrChan: bb.rpcErrChan, ctxChan: bb.ctxChan, connect: bb.connect} } const producerTestBalancerName = "producer_test_balancer" @@ -1026,6 +1027,7 @@ type producerTestBalancer struct { cc balancer.ClientConn rpcErrChan chan error ctxChan chan context.Context + connect bool } func (b *producerTestBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error { @@ -1052,8 +1054,10 @@ func (b *producerTestBalancer) UpdateClientConnState(ccs balancer.ClientConnStat default: } - // Now we can connect, which will unblock the RPC above. - sc.Connect() + if b.connect { + // Now we can connect, which will unblock the RPC above. + sc.Connect() + } // The stub server requires a READY picker to be reported, to unblock its // Start method. We won't make RPCs in our test, so a nil picker is okay. @@ -1096,8 +1100,9 @@ func (s) TestBalancerProducerBlockUntilReady(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() ctxChan <- ctx + rpcErrChan := make(chan error) - balancer.Register(&producerTestBalancerBuilder{rpcErrChan: rpcErrChan, ctxChan: ctxChan}) + balancer.Register(&producerTestBalancerBuilder{rpcErrChan: rpcErrChan, ctxChan: ctxChan, connect: true}) ss := &stubserver.StubServer{ EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { @@ -1128,7 +1133,7 @@ func (s) TestBalancerProducerHonorsContext(t *testing.T) { ctxChan <- ctx rpcErrChan := make(chan error) - balancer.Register(&producerTestBalancerBuilder{rpcErrChan: rpcErrChan, ctxChan: ctxChan}) + balancer.Register(&producerTestBalancerBuilder{rpcErrChan: rpcErrChan, ctxChan: ctxChan, connect: false}) ss := &stubserver.StubServer{ EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { From f193ec01834d14f6ce00e47c9f2965ffc4de1bbf Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Fri, 5 May 2023 14:25:11 -0700 Subject: [PATCH 25/60] orca: fix race when calling listeners coincides with updating the run goroutine (#6258) --- orca/producer.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/orca/producer.go b/orca/producer.go index 227baeb01ddf..3b7ed8b67d8a 100644 --- a/orca/producer.go +++ b/orca/producer.go @@ -44,7 +44,9 @@ func (*producerBuilder) Build(cci interface{}) (balancer.Producer, func()) { listeners: make(map[OOBListener]struct{}), backoff: internal.DefaultBackoffFunc, } - return p, func() {} + return p, func() { + <-p.stopped + } } var producerBuilderSingleton = &producerBuilder{} @@ -153,20 +155,19 @@ func (p *producer) recomputeMinInterval() { func (p *producer) updateRunLocked() { if p.stop != nil { p.stop() - <-p.stopped p.stop = nil } if len(p.listeners) > 0 { var ctx context.Context ctx, p.stop = context.WithCancel(context.Background()) p.stopped = make(chan struct{}) - go p.run(ctx, p.minInterval) + go p.run(ctx, p.stopped, p.minInterval) } } // run manages the ORCA OOB stream on the subchannel. -func (p *producer) run(ctx context.Context, interval time.Duration) { - defer close(p.stopped) +func (p *producer) run(ctx context.Context, done chan struct{}, interval time.Duration) { + defer close(done) backoffAttempt := 0 backoffTimer := time.NewTimer(0) From c44f77e12db9c09d27504f972e3275d6e6c544ea Mon Sep 17 00:00:00 2001 From: Easwar Swaminathan Date: Fri, 5 May 2023 16:07:27 -0700 Subject: [PATCH 26/60] grpc: use CallbackSerializer in balancer wrapper (#6254) --- balancer_conn_wrappers.go | 258 +++++++++++--------------------------- 1 file changed, 74 insertions(+), 184 deletions(-) diff --git a/balancer_conn_wrappers.go b/balancer_conn_wrappers.go index d0383f04748c..1865a3f09c2b 100644 --- a/balancer_conn_wrappers.go +++ b/balancer_conn_wrappers.go @@ -27,7 +27,6 @@ import ( "google.golang.org/grpc/balancer" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal/balancer/gracefulswitch" - "google.golang.org/grpc/internal/buffer" "google.golang.org/grpc/internal/channelz" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/resolver" @@ -49,147 +48,60 @@ import ( type ccBalancerWrapper struct { cc *ClientConn - // Since these fields are accessed only from handleXxx() methods which are - // synchronized by the watcher goroutine, we do not need a mutex to protect - // these fields. - balancer *gracefulswitch.Balancer - curBalancerName string - - updateCh *buffer.Unbounded // Updates written on this channel are processed by watcher(). - resultCh *buffer.Unbounded // Results of calls to UpdateClientConnState() are pushed here. - closed *grpcsync.Event // Indicates if close has been called. - done *grpcsync.Event // Indicates if close has completed its work. + // Outgoing (gRPC --> balancer) calls are guaranteed to execute in a + // mutually exclusive manner as they are scheduled on the + // CallbackSerializer. Fields accessed *only* in serializer callbacks, can + // therefore be accessed without a mutex. + serializer *grpcsync.CallbackSerializer + serializerCancel context.CancelFunc + balancer *gracefulswitch.Balancer + curBalancerName string } // newCCBalancerWrapper creates a new balancer wrapper. The underlying balancer // is not created until the switchTo() method is invoked. func newCCBalancerWrapper(cc *ClientConn, bopts balancer.BuildOptions) *ccBalancerWrapper { + ctx, cancel := context.WithCancel(context.Background()) ccb := &ccBalancerWrapper{ - cc: cc, - updateCh: buffer.NewUnbounded(), - resultCh: buffer.NewUnbounded(), - closed: grpcsync.NewEvent(), - done: grpcsync.NewEvent(), + cc: cc, + serializer: grpcsync.NewCallbackSerializer(ctx), + serializerCancel: cancel, } - go ccb.watcher() ccb.balancer = gracefulswitch.NewBalancer(ccb, bopts) return ccb } -// The following xxxUpdate structs wrap the arguments received as part of the -// corresponding update. The watcher goroutine uses the 'type' of the update to -// invoke the appropriate handler routine to handle the update. - -type ccStateUpdate struct { - ccs *balancer.ClientConnState -} - -type scStateUpdate struct { - sc balancer.SubConn - state connectivity.State - err error -} - -type exitIdleUpdate struct{} - -type resolverErrorUpdate struct { - err error -} - -type switchToUpdate struct { - name string -} - -// watcher is a long-running goroutine which reads updates from a channel and -// invokes corresponding methods on the underlying balancer. It ensures that -// these methods are invoked in a synchronous fashion. It also ensures that -// these methods are invoked in the order in which the updates were received. -func (ccb *ccBalancerWrapper) watcher() { - for { - select { - case u, ok := <-ccb.updateCh.Get(): - if !ok { - break - } - ccb.updateCh.Load() - if ccb.closed.HasFired() { - break - } - switch update := u.(type) { - case *ccStateUpdate: - ccb.handleClientConnStateChange(update.ccs) - case *scStateUpdate: - ccb.handleSubConnStateChange(update) - case *exitIdleUpdate: - ccb.handleExitIdle() - case *resolverErrorUpdate: - ccb.handleResolverError(update.err) - case *switchToUpdate: - ccb.handleSwitchTo(update.name) - default: - logger.Errorf("ccBalancerWrapper.watcher: unknown update %+v, type %T", update, update) - } - case <-ccb.closed.Done(): - } - - if ccb.closed.HasFired() { - ccb.handleClose() - return - } - } -} - // updateClientConnState is invoked by grpc to push a ClientConnState update to // the underlying balancer. -// -// Unlike other methods invoked by grpc to push updates to the underlying -// balancer, this method cannot simply push the update onto the update channel -// and return. It needs to return the error returned by the underlying balancer -// back to grpc which propagates that to the resolver. func (ccb *ccBalancerWrapper) updateClientConnState(ccs *balancer.ClientConnState) error { - ccb.updateCh.Put(&ccStateUpdate{ccs: ccs}) + errCh := make(chan error, 1) + ccb.serializer.Schedule(func(_ context.Context) { + // If the addresses specified in the update contain addresses of type + // "grpclb" and the selected LB policy is not "grpclb", these addresses + // will be filtered out and ccs will be modified with the updated + // address list. + if ccb.curBalancerName != grpclbName { + var addrs []resolver.Address + for _, addr := range ccs.ResolverState.Addresses { + if addr.Type == resolver.GRPCLB { + continue + } + addrs = append(addrs, addr) + } + ccs.ResolverState.Addresses = addrs + } + errCh <- ccb.balancer.UpdateClientConnState(*ccs) + }) - var res interface{} - var ok bool + // If the balancer wrapper is closed when waiting for this state update to + // be handled, the callback serializer will be closed as well, and we can + // rely on its Done channel to ensure that we don't block here forever. select { - case res, ok = <-ccb.resultCh.Get(): - if !ok { - // The result channel is closed only when the balancer wrapper is closed. - return nil - } - ccb.resultCh.Load() - case <-ccb.closed.Done(): - // Return early if the balancer wrapper is closed while we are waiting for - // the underlying balancer to process a ClientConnState update. - return nil - } - // If the returned error is nil, attempting to type assert to error leads to - // panic. So, this needs to handled separately. - if res == nil { + case err := <-errCh: + return err + case <-ccb.serializer.Done: return nil } - return res.(error) -} - -// handleClientConnStateChange handles a ClientConnState update from the update -// channel and invokes the appropriate method on the underlying balancer. -// -// If the addresses specified in the update contain addresses of type "grpclb" -// and the selected LB policy is not "grpclb", these addresses will be filtered -// out and ccs will be modified with the updated address list. -func (ccb *ccBalancerWrapper) handleClientConnStateChange(ccs *balancer.ClientConnState) { - if ccb.curBalancerName != grpclbName { - // Filter any grpclb addresses since we don't have the grpclb balancer. - var addrs []resolver.Address - for _, addr := range ccs.ResolverState.Addresses { - if addr.Type == resolver.GRPCLB { - continue - } - addrs = append(addrs, addr) - } - ccs.ResolverState.Addresses = addrs - } - ccb.resultCh.Put(ccb.balancer.UpdateClientConnState(*ccs)) } // updateSubConnState is invoked by grpc to push a subConn state update to the @@ -202,39 +114,27 @@ func (ccb *ccBalancerWrapper) updateSubConnState(sc balancer.SubConn, s connecti // tearDown() on the old ac, ac.acbw (acWrapper) will be set to nil, and // this function will be called with (nil, Shutdown). We don't need to call // balancer method in this case. + // + // TODO: Suppress the above mentioned state change to Shutdown, so we don't + // have to handle it here. if sc == nil { return } - ccb.updateCh.Put(&scStateUpdate{ - sc: sc, - state: s, - err: err, + ccb.serializer.Schedule(func(_ context.Context) { + ccb.balancer.UpdateSubConnState(sc, balancer.SubConnState{ConnectivityState: s, ConnectionError: err}) }) } -// handleSubConnStateChange handles a SubConnState update from the update -// channel and invokes the appropriate method on the underlying balancer. -func (ccb *ccBalancerWrapper) handleSubConnStateChange(update *scStateUpdate) { - ccb.balancer.UpdateSubConnState(update.sc, balancer.SubConnState{ConnectivityState: update.state, ConnectionError: update.err}) -} - func (ccb *ccBalancerWrapper) exitIdle() { - ccb.updateCh.Put(&exitIdleUpdate{}) -} - -func (ccb *ccBalancerWrapper) handleExitIdle() { - if ccb.cc.GetState() != connectivity.Idle { - return - } - ccb.balancer.ExitIdle() + ccb.serializer.Schedule(func(_ context.Context) { + ccb.balancer.ExitIdle() + }) } func (ccb *ccBalancerWrapper) resolverError(err error) { - ccb.updateCh.Put(&resolverErrorUpdate{err: err}) -} - -func (ccb *ccBalancerWrapper) handleResolverError(err error) { - ccb.balancer.ResolverError(err) + ccb.serializer.Schedule(func(_ context.Context) { + ccb.balancer.ResolverError(err) + }) } // switchTo is invoked by grpc to instruct the balancer wrapper to switch to the @@ -248,49 +148,39 @@ func (ccb *ccBalancerWrapper) handleResolverError(err error) { // the ccBalancerWrapper keeps track of the current LB policy name, and skips // the graceful balancer switching process if the name does not change. func (ccb *ccBalancerWrapper) switchTo(name string) { - ccb.updateCh.Put(&switchToUpdate{name: name}) -} - -// handleSwitchTo handles a balancer switch update from the update channel. It -// calls the SwitchTo() method on the gracefulswitch.Balancer with a -// balancer.Builder corresponding to name. If no balancer.Builder is registered -// for the given name, it uses the default LB policy which is "pick_first". -func (ccb *ccBalancerWrapper) handleSwitchTo(name string) { - // TODO: Other languages use case-insensitive balancer registries. We should - // switch as well. See: https://github.com/grpc/grpc-go/issues/5288. - if strings.EqualFold(ccb.curBalancerName, name) { - return - } + ccb.serializer.Schedule(func(_ context.Context) { + // TODO: Other languages use case-sensitive balancer registries. We should + // switch as well. See: https://github.com/grpc/grpc-go/issues/5288. + if strings.EqualFold(ccb.curBalancerName, name) { + return + } - // TODO: Ensure that name is a registered LB policy when we get here. - // We currently only validate the `loadBalancingConfig` field. We need to do - // the same for the `loadBalancingPolicy` field and reject the service config - // if the specified policy is not registered. - builder := balancer.Get(name) - if builder == nil { - channelz.Warningf(logger, ccb.cc.channelzID, "Channel switches to new LB policy %q, since the specified LB policy %q was not registered", PickFirstBalancerName, name) - builder = newPickfirstBuilder() - } else { - channelz.Infof(logger, ccb.cc.channelzID, "Channel switches to new LB policy %q", name) - } + // Use the default LB policy, pick_first, if no LB policy with name is + // found in the registry. + builder := balancer.Get(name) + if builder == nil { + channelz.Warningf(logger, ccb.cc.channelzID, "Channel switches to new LB policy %q, since the specified LB policy %q was not registered", PickFirstBalancerName, name) + builder = newPickfirstBuilder() + } else { + channelz.Infof(logger, ccb.cc.channelzID, "Channel switches to new LB policy %q", name) + } - if err := ccb.balancer.SwitchTo(builder); err != nil { - channelz.Errorf(logger, ccb.cc.channelzID, "Channel failed to build new LB policy %q: %v", name, err) - return - } - ccb.curBalancerName = builder.Name() + if err := ccb.balancer.SwitchTo(builder); err != nil { + channelz.Errorf(logger, ccb.cc.channelzID, "Channel failed to build new LB policy %q: %v", name, err) + return + } + ccb.curBalancerName = builder.Name() + }) } func (ccb *ccBalancerWrapper) close() { - ccb.closed.Fire() - <-ccb.done.Done() -} - -func (ccb *ccBalancerWrapper) handleClose() { + // Close the serializer to ensure that no more calls from gRPC are sent to + // the balancer. We don't have to worry about suppressing calls from a + // closed balancer because these are handled by the ClientConn (balancer + // wrapper is only ever closed when the ClientConn is closed). + ccb.serializerCancel() + <-ccb.serializer.Done ccb.balancer.Close() - ccb.updateCh.Close() - ccb.resultCh.Close() - ccb.done.Fire() } func (ccb *ccBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { From 5c4bee51c2ff3e713735d0c99547fc76bb739c2a Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 8 May 2023 10:01:08 -0700 Subject: [PATCH 27/60] balancer/weightedroundrobin: add load balancing policy (A58) (#6241) --- balancer/weightedroundrobin/balancer.go | 532 +++++++++++++ balancer/weightedroundrobin/balancer_test.go | 713 ++++++++++++++++++ balancer/weightedroundrobin/config.go | 60 ++ .../weightedroundrobin/internal/internal.go | 44 ++ balancer/weightedroundrobin/logging.go | 34 + balancer/weightedroundrobin/scheduler.go | 138 ++++ .../weightedroundrobin/weightedroundrobin.go | 23 +- internal/grpcrand/grpcrand.go | 7 + orca/producer.go | 13 +- xds/internal/balancer/clusterimpl/picker.go | 2 +- 10 files changed, 1545 insertions(+), 21 deletions(-) create mode 100644 balancer/weightedroundrobin/balancer.go create mode 100644 balancer/weightedroundrobin/balancer_test.go create mode 100644 balancer/weightedroundrobin/config.go create mode 100644 balancer/weightedroundrobin/internal/internal.go create mode 100644 balancer/weightedroundrobin/logging.go create mode 100644 balancer/weightedroundrobin/scheduler.go diff --git a/balancer/weightedroundrobin/balancer.go b/balancer/weightedroundrobin/balancer.go new file mode 100644 index 000000000000..e0d255222d52 --- /dev/null +++ b/balancer/weightedroundrobin/balancer.go @@ -0,0 +1,532 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package weightedroundrobin + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "sync" + "sync/atomic" + "time" + "unsafe" + + "google.golang.org/grpc/balancer" + "google.golang.org/grpc/balancer/base" + "google.golang.org/grpc/balancer/weightedroundrobin/internal" + "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/internal/grpclog" + "google.golang.org/grpc/internal/grpcrand" + "google.golang.org/grpc/orca" + "google.golang.org/grpc/resolver" + "google.golang.org/grpc/serviceconfig" + + v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" +) + +// Name is the name of the weighted round robin balancer. +const Name = "weighted_round_robin_experimental" + +func init() { + balancer.Register(bb{}) +} + +type bb struct{} + +func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer { + b := &wrrBalancer{ + cc: cc, + subConns: resolver.NewAddressMap(), + csEvltr: &balancer.ConnectivityStateEvaluator{}, + scMap: make(map[balancer.SubConn]*weightedSubConn), + connectivityState: connectivity.Connecting, + } + b.logger = prefixLogger(b) + b.logger.Infof("Created") + return b +} + +func (bb) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { + lbCfg := &lbConfig{ + // Default values as documented in A58. + OOBReportingPeriod: 10 * time.Second, + BlackoutPeriod: 10 * time.Second, + WeightExpirationPeriod: 3 * time.Minute, + WeightUpdatePeriod: time.Second, + ErrorUtilizationPenalty: 1, + } + if err := json.Unmarshal(js, lbCfg); err != nil { + return nil, fmt.Errorf("wrr: unable to unmarshal LB policy config: %s, error: %v", string(js), err) + } + + if lbCfg.ErrorUtilizationPenalty < 0 { + return nil, fmt.Errorf("wrr: errorUtilizationPenalty must be non-negative") + } + + // For easier comparisons later, ensure the OOB reporting period is unset + // (0s) when OOB reports are disabled. + if !lbCfg.EnableOOBLoadReport { + lbCfg.OOBReportingPeriod = 0 + } + + // Impose lower bound of 100ms on weightUpdatePeriod. + if !internal.AllowAnyWeightUpdatePeriod && lbCfg.WeightUpdatePeriod < 100*time.Millisecond { + lbCfg.WeightUpdatePeriod = 100 * time.Millisecond + } + + return lbCfg, nil +} + +func (bb) Name() string { + return Name +} + +// wrrBalancer implements the weighted round robin LB policy. +type wrrBalancer struct { + cc balancer.ClientConn + logger *grpclog.PrefixLogger + + // The following fields are only accessed on calls into the LB policy, and + // do not need a mutex. + cfg *lbConfig // active config + subConns *resolver.AddressMap // active weightedSubConns mapped by address + scMap map[balancer.SubConn]*weightedSubConn + connectivityState connectivity.State // aggregate state + csEvltr *balancer.ConnectivityStateEvaluator + resolverErr error // the last error reported by the resolver; cleared on successful resolution + connErr error // the last connection error; cleared upon leaving TransientFailure + stopPicker func() +} + +func (b *wrrBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error { + b.logger.Infof("UpdateCCS: %v", ccs) + b.resolverErr = nil + cfg, ok := ccs.BalancerConfig.(*lbConfig) + if !ok { + return fmt.Errorf("wrr: received nil or illegal BalancerConfig (type %T): %v", ccs.BalancerConfig, ccs.BalancerConfig) + } + + b.cfg = cfg + b.updateAddresses(ccs.ResolverState.Addresses) + + if len(ccs.ResolverState.Addresses) == 0 { + b.ResolverError(errors.New("resolver produced zero addresses")) // will call regeneratePicker + return balancer.ErrBadResolverState + } + + b.regeneratePicker() + + return nil +} + +func (b *wrrBalancer) updateAddresses(addrs []resolver.Address) { + addrsSet := resolver.NewAddressMap() + + // Loop through new address list and create subconns for any new addresses. + for _, addr := range addrs { + if _, ok := addrsSet.Get(addr); ok { + // Redundant address; skip. + continue + } + addrsSet.Set(addr, nil) + + var wsc *weightedSubConn + wsci, ok := b.subConns.Get(addr) + if ok { + wsc = wsci.(*weightedSubConn) + } else { + // addr is a new address (not existing in b.subConns). + sc, err := b.cc.NewSubConn([]resolver.Address{addr}, balancer.NewSubConnOptions{}) + if err != nil { + b.logger.Warningf("Failed to create new SubConn for address %v: %v", addr, err) + continue + } + wsc = &weightedSubConn{ + SubConn: sc, + logger: b.logger, + connectivityState: connectivity.Idle, + // Initially, we set load reports to off, because they are not + // running upon initial weightedSubConn creation. + cfg: &lbConfig{EnableOOBLoadReport: false}, + } + b.subConns.Set(addr, wsc) + b.scMap[sc] = wsc + b.csEvltr.RecordTransition(connectivity.Shutdown, connectivity.Idle) + sc.Connect() + } + // Update config for existing weightedSubConn or send update for first + // time to new one. Ensures an OOB listener is running if needed + // (and stops the existing one if applicable). + wsc.updateConfig(b.cfg) + } + + // Loop through existing subconns and remove ones that are not in addrs. + for _, addr := range b.subConns.Keys() { + if _, ok := addrsSet.Get(addr); ok { + // Existing address also in new address list; skip. + continue + } + // addr was removed by resolver. Remove. + wsci, _ := b.subConns.Get(addr) + wsc := wsci.(*weightedSubConn) + b.cc.RemoveSubConn(wsc.SubConn) + b.subConns.Delete(addr) + } +} + +func (b *wrrBalancer) ResolverError(err error) { + b.resolverErr = err + if b.subConns.Len() == 0 { + b.connectivityState = connectivity.TransientFailure + } + if b.connectivityState != connectivity.TransientFailure { + // No need to update the picker since no error is being returned. + return + } + b.regeneratePicker() +} + +func (b *wrrBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { + wsc := b.scMap[sc] + if wsc == nil { + b.logger.Errorf("UpdateSubConnState called with an unknown SubConn: %p, %v", sc, state) + return + } + if b.logger.V(2) { + logger.Infof("UpdateSubConnState(%+v, %+v)", sc, state) + } + + cs := state.ConnectivityState + + if cs == connectivity.TransientFailure { + // Save error to be reported via picker. + b.connErr = state.ConnectionError + } + + if cs == connectivity.Shutdown { + delete(b.scMap, sc) + // The subconn was removed from b.subConns when the address was removed + // in updateAddresses. + } + + oldCS := wsc.updateConnectivityState(cs) + b.connectivityState = b.csEvltr.RecordTransition(oldCS, cs) + + // Regenerate picker when one of the following happens: + // - this sc entered or left ready + // - the aggregated state of balancer is TransientFailure + // (may need to update error message) + if (cs == connectivity.Ready) != (oldCS == connectivity.Ready) || + b.connectivityState == connectivity.TransientFailure { + b.regeneratePicker() + } +} + +// Close stops the balancer. It cancels any ongoing scheduler updates and +// stops any ORCA listeners. +func (b *wrrBalancer) Close() { + if b.stopPicker != nil { + b.stopPicker() + b.stopPicker = nil + } + for _, wsc := range b.scMap { + // Ensure any lingering OOB watchers are stopped. + wsc.updateConnectivityState(connectivity.Shutdown) + } +} + +// ExitIdle is ignored; we always connect to all backends. +func (b *wrrBalancer) ExitIdle() {} + +func (b *wrrBalancer) readySubConns() []*weightedSubConn { + var ret []*weightedSubConn + for _, v := range b.subConns.Values() { + wsc := v.(*weightedSubConn) + if wsc.connectivityState == connectivity.Ready { + ret = append(ret, wsc) + } + } + return ret +} + +// mergeErrors builds an error from the last connection error and the last +// resolver error. Must only be called if b.connectivityState is +// TransientFailure. +func (b *wrrBalancer) mergeErrors() error { + // connErr must always be non-nil unless there are no SubConns, in which + // case resolverErr must be non-nil. + if b.connErr == nil { + return fmt.Errorf("last resolver error: %v", b.resolverErr) + } + if b.resolverErr == nil { + return fmt.Errorf("last connection error: %v", b.connErr) + } + return fmt.Errorf("last connection error: %v; last resolver error: %v", b.connErr, b.resolverErr) +} + +func (b *wrrBalancer) regeneratePicker() { + if b.stopPicker != nil { + b.stopPicker() + b.stopPicker = nil + } + + switch b.connectivityState { + case connectivity.TransientFailure: + b.cc.UpdateState(balancer.State{ + ConnectivityState: connectivity.TransientFailure, + Picker: base.NewErrPicker(b.mergeErrors()), + }) + return + case connectivity.Connecting, connectivity.Idle: + // Idle could happen very briefly if all subconns are Idle and we've + // asked them to connect but they haven't reported Connecting yet. + // Report the same as Connecting since this is temporary. + b.cc.UpdateState(balancer.State{ + ConnectivityState: connectivity.Connecting, + Picker: base.NewErrPicker(balancer.ErrNoSubConnAvailable), + }) + return + case connectivity.Ready: + b.connErr = nil + } + + p := &picker{ + v: grpcrand.Uint32(), // start the scheduler at a random point + cfg: b.cfg, + subConns: b.readySubConns(), + } + var ctx context.Context + ctx, b.stopPicker = context.WithCancel(context.Background()) + p.start(ctx) + b.cc.UpdateState(balancer.State{ + ConnectivityState: b.connectivityState, + Picker: p, + }) +} + +// picker is the WRR policy's picker. It uses live-updating backend weights to +// update the scheduler periodically and ensure picks are routed proportional +// to those weights. +type picker struct { + scheduler unsafe.Pointer // *scheduler; accessed atomically + v uint32 // incrementing value used by the scheduler; accessed atomically + cfg *lbConfig // active config when picker created + subConns []*weightedSubConn // all READY subconns +} + +// scWeights returns a slice containing the weights from p.subConns in the same +// order as p.subConns. +func (p *picker) scWeights() []float64 { + ws := make([]float64, len(p.subConns)) + now := internal.TimeNow() + for i, wsc := range p.subConns { + ws[i] = wsc.weight(now, p.cfg.WeightExpirationPeriod, p.cfg.BlackoutPeriod) + } + return ws +} + +func (p *picker) inc() uint32 { + return atomic.AddUint32(&p.v, 1) +} + +func (p *picker) regenerateScheduler() { + s := newScheduler(p.scWeights(), p.inc) + atomic.StorePointer(&p.scheduler, unsafe.Pointer(&s)) +} + +func (p *picker) start(ctx context.Context) { + p.regenerateScheduler() + if len(p.subConns) == 1 { + // No need to regenerate weights with only one backend. + return + } + go func() { + ticker := time.NewTicker(p.cfg.WeightUpdatePeriod) + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + p.regenerateScheduler() + } + } + }() +} + +func (p *picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) { + // Read the scheduler atomically. All scheduler operations are threadsafe, + // and if the scheduler is replaced during this usage, we want to use the + // scheduler that was live when the pick started. + sched := *(*scheduler)(atomic.LoadPointer(&p.scheduler)) + + pickedSC := p.subConns[sched.nextIndex()] + pr := balancer.PickResult{SubConn: pickedSC.SubConn} + if !p.cfg.EnableOOBLoadReport { + pr.Done = func(info balancer.DoneInfo) { + if load, ok := info.ServerLoad.(*v3orcapb.OrcaLoadReport); ok && load != nil { + pickedSC.OnLoadReport(load) + } + } + } + return pr, nil +} + +// weightedSubConn is the wrapper of a subconn that holds the subconn and its +// weight (and other parameters relevant to computing the effective weight). +// When needed, it also tracks connectivity state, listens for metrics updates +// by implementing the orca.OOBListener interface and manages that listener. +type weightedSubConn struct { + balancer.SubConn + logger *grpclog.PrefixLogger + + // The following fields are only accessed on calls into the LB policy, and + // do not need a mutex. + connectivityState connectivity.State + stopORCAListener func() + + // The following fields are accessed asynchronously and are protected by + // mu. Note that mu may not be held when calling into the stopORCAListener + // or when registering a new listener, as those calls require the ORCA + // producer mu which is held when calling the listener, and the listener + // holds mu. + mu sync.Mutex + weightVal float64 + nonEmptySince time.Time + lastUpdated time.Time + cfg *lbConfig +} + +func (w *weightedSubConn) OnLoadReport(load *v3orcapb.OrcaLoadReport) { + if w.logger.V(2) { + w.logger.Infof("Received load report for subchannel %v: %v", w.SubConn, load) + } + // Update weights of this subchannel according to the reported load + if load.CpuUtilization == 0 || load.RpsFractional == 0 { + if w.logger.V(2) { + w.logger.Infof("Ignoring empty load report for subchannel %v", w.SubConn) + } + return + } + + w.mu.Lock() + defer w.mu.Unlock() + + errorRate := load.Eps / load.RpsFractional + w.weightVal = load.RpsFractional / (load.CpuUtilization + errorRate*w.cfg.ErrorUtilizationPenalty) + if w.logger.V(2) { + w.logger.Infof("New weight for subchannel %v: %v", w.SubConn, w.weightVal) + } + + w.lastUpdated = internal.TimeNow() + if w.nonEmptySince == (time.Time{}) { + w.nonEmptySince = w.lastUpdated + } +} + +// updateConfig updates the parameters of the WRR policy and +// stops/starts/restarts the ORCA OOB listener. +func (w *weightedSubConn) updateConfig(cfg *lbConfig) { + w.mu.Lock() + oldCfg := w.cfg + w.cfg = cfg + w.mu.Unlock() + + newPeriod := cfg.OOBReportingPeriod + if cfg.EnableOOBLoadReport == oldCfg.EnableOOBLoadReport && + newPeriod == oldCfg.OOBReportingPeriod { + // Load reporting wasn't enabled before or after, or load reporting was + // enabled before and after, and had the same period. (Note that with + // load reporting disabled, OOBReportingPeriod is always 0.) + return + } + // (Optionally stop and) start the listener to use the new config's + // settings for OOB reporting. + + if w.stopORCAListener != nil { + w.stopORCAListener() + } + if !cfg.EnableOOBLoadReport { + w.stopORCAListener = nil + return + } + if w.logger.V(2) { + w.logger.Infof("Registering ORCA listener for %v with interval %v", w.SubConn, newPeriod) + } + opts := orca.OOBListenerOptions{ReportInterval: newPeriod} + w.stopORCAListener = orca.RegisterOOBListener(w.SubConn, w, opts) +} + +func (w *weightedSubConn) updateConnectivityState(cs connectivity.State) connectivity.State { + switch cs { + case connectivity.Idle: + // Always reconnect when idle. + w.SubConn.Connect() + case connectivity.Ready: + // If we transition back to READY state, reset nonEmptySince so that we + // apply the blackout period after we start receiving load data. Note + // that we cannot guarantee that we will never receive lingering + // callbacks for backend metric reports from the previous connection + // after the new connection has been established, but they should be + // masked by new backend metric reports from the new connection by the + // time the blackout period ends. + w.mu.Lock() + w.nonEmptySince = time.Time{} + w.mu.Unlock() + case connectivity.Shutdown: + if w.stopORCAListener != nil { + w.stopORCAListener() + } + } + + oldCS := w.connectivityState + + if oldCS == connectivity.TransientFailure && + (cs == connectivity.Connecting || cs == connectivity.Idle) { + // Once a subconn enters TRANSIENT_FAILURE, ignore subsequent IDLE or + // CONNECTING transitions to prevent the aggregated state from being + // always CONNECTING when many backends exist but are all down. + return oldCS + } + + w.connectivityState = cs + + return oldCS +} + +// weight returns the current effective weight of the subconn, taking into +// account the parameters. Returns 0 for blacked out or expired data, which +// will cause the backend weight to be treated as the mean of the weights of +// the other backends. +func (w *weightedSubConn) weight(now time.Time, weightExpirationPeriod, blackoutPeriod time.Duration) float64 { + w.mu.Lock() + defer w.mu.Unlock() + // If the most recent update was longer ago than the expiration period, + // reset nonEmptySince so that we apply the blackout period again if we + // start getting data again in the future, and return 0. + if now.Sub(w.lastUpdated) >= weightExpirationPeriod { + w.nonEmptySince = time.Time{} + return 0 + } + // If we don't have at least blackoutPeriod worth of data, return 0. + if blackoutPeriod != 0 && (w.nonEmptySince == (time.Time{}) || now.Sub(w.nonEmptySince) < blackoutPeriod) { + return 0 + } + return w.weightVal +} diff --git a/balancer/weightedroundrobin/balancer_test.go b/balancer/weightedroundrobin/balancer_test.go new file mode 100644 index 000000000000..5dd62ebf872a --- /dev/null +++ b/balancer/weightedroundrobin/balancer_test.go @@ -0,0 +1,713 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package weightedroundrobin_test + +import ( + "context" + "encoding/json" + "fmt" + "sync" + "testing" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/internal" + "google.golang.org/grpc/internal/grpctest" + "google.golang.org/grpc/internal/stubserver" + "google.golang.org/grpc/internal/testutils/roundrobin" + "google.golang.org/grpc/orca" + "google.golang.org/grpc/peer" + "google.golang.org/grpc/resolver" + + wrr "google.golang.org/grpc/balancer/weightedroundrobin" + iwrr "google.golang.org/grpc/balancer/weightedroundrobin/internal" + + testgrpc "google.golang.org/grpc/interop/grpc_testing" + testpb "google.golang.org/grpc/interop/grpc_testing" +) + +type s struct { + grpctest.Tester +} + +func Test(t *testing.T) { + grpctest.RunSubTests(t, s{}) +} + +const defaultTestTimeout = 10 * time.Second +const weightUpdatePeriod = 50 * time.Millisecond +const oobReportingInterval = 10 * time.Millisecond + +func init() { + iwrr.AllowAnyWeightUpdatePeriod = true +} + +func boolp(b bool) *bool { return &b } +func float64p(f float64) *float64 { return &f } +func durationp(d time.Duration) *time.Duration { return &d } + +var ( + perCallConfig = iwrr.LBConfig{ + EnableOOBLoadReport: boolp(false), + OOBReportingPeriod: durationp(5 * time.Millisecond), + BlackoutPeriod: durationp(0), + WeightExpirationPeriod: durationp(time.Minute), + WeightUpdatePeriod: durationp(weightUpdatePeriod), + ErrorUtilizationPenalty: float64p(0), + } + oobConfig = iwrr.LBConfig{ + EnableOOBLoadReport: boolp(true), + OOBReportingPeriod: durationp(5 * time.Millisecond), + BlackoutPeriod: durationp(0), + WeightExpirationPeriod: durationp(time.Minute), + WeightUpdatePeriod: durationp(weightUpdatePeriod), + ErrorUtilizationPenalty: float64p(0), + } +) + +type testServer struct { + *stubserver.StubServer + + oobMetrics orca.ServerMetricsRecorder // Attached to the OOB stream. + callMetrics orca.CallMetricsRecorder // Attached to per-call metrics. +} + +type reportType int + +const ( + reportNone reportType = iota + reportOOB + reportCall + reportBoth +) + +func startServer(t *testing.T, r reportType) *testServer { + t.Helper() + + smr := orca.NewServerMetricsRecorder() + cmr := orca.NewServerMetricsRecorder().(orca.CallMetricsRecorder) + + ss := &stubserver.StubServer{ + EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { + if r := orca.CallMetricsRecorderFromContext(ctx); r != nil { + // Copy metrics from what the test set in cmr into r. + sm := cmr.(orca.ServerMetricsProvider).ServerMetrics() + r.SetCPUUtilization(sm.CPUUtilization) + r.SetQPS(sm.QPS) + r.SetEPS(sm.EPS) + } + return &testpb.Empty{}, nil + }, + } + + var sopts []grpc.ServerOption + if r == reportCall || r == reportBoth { + sopts = append(sopts, orca.CallMetricsServerOption(nil)) + } + + if r == reportOOB || r == reportBoth { + oso := orca.ServiceOptions{ + ServerMetricsProvider: smr, + MinReportingInterval: 10 * time.Millisecond, + } + internal.ORCAAllowAnyMinReportingInterval.(func(so *orca.ServiceOptions))(&oso) + sopts = append(sopts, stubserver.RegisterServiceServerOption(func(s *grpc.Server) { + if err := orca.Register(s, oso); err != nil { + t.Fatalf("Failed to register orca service: %v", err) + } + })) + } + + if err := ss.StartServer(sopts...); err != nil { + t.Fatalf("Error starting server: %v", err) + } + t.Cleanup(ss.Stop) + + return &testServer{ + StubServer: ss, + oobMetrics: smr, + callMetrics: cmr, + } +} + +func svcConfig(t *testing.T, wrrCfg iwrr.LBConfig) string { + t.Helper() + m, err := json.Marshal(wrrCfg) + if err != nil { + t.Fatalf("Error marshaling JSON %v: %v", wrrCfg, err) + } + sc := fmt.Sprintf(`{"loadBalancingConfig": [ {%q:%v} ] }`, wrr.Name, string(m)) + t.Logf("Marshaled service config: %v", sc) + return sc +} + +// Tests basic functionality with one address. With only one address, load +// reporting doesn't affect routing at all. +func (s) TestBalancer_OneAddress(t *testing.T) { + testCases := []struct { + rt reportType + cfg iwrr.LBConfig + }{ + {rt: reportNone, cfg: perCallConfig}, + {rt: reportCall, cfg: perCallConfig}, + {rt: reportOOB, cfg: oobConfig}, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("reportType:%v", tc.rt), func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + srv := startServer(t, tc.rt) + + sc := svcConfig(t, tc.cfg) + if err := srv.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { + t.Fatalf("Error starting client: %v", err) + } + + // Perform many RPCs to ensure the LB policy works with 1 address. + for i := 0; i < 100; i++ { + srv.callMetrics.SetQPS(float64(i)) + srv.oobMetrics.SetQPS(float64(i)) + if _, err := srv.Client.EmptyCall(ctx, &testpb.Empty{}); err != nil { + t.Fatalf("Error from EmptyCall: %v", err) + } + time.Sleep(time.Millisecond) // Delay; test will run 100ms and should perform ~10 weight updates + } + }) + } +} + +// Tests two addresses with ORCA reporting disabled (should fall back to pure +// RR). +func (s) TestBalancer_TwoAddresses_ReportingDisabled(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + srv1 := startServer(t, reportNone) + srv2 := startServer(t, reportNone) + + sc := svcConfig(t, perCallConfig) + if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { + t.Fatalf("Error starting client: %v", err) + } + addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + + // Perform many RPCs to ensure the LB policy works with 2 addresses. + for i := 0; i < 20; i++ { + roundrobin.CheckRoundRobinRPCs(ctx, srv1.Client, addrs) + } +} + +// Tests two addresses with per-call ORCA reporting enabled. Checks the +// backends are called in the appropriate ratios. +func (s) TestBalancer_TwoAddresses_ReportingEnabledPerCall(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + srv1 := startServer(t, reportCall) + srv2 := startServer(t, reportCall) + + // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed + // disproportionately to srv2 (10:1). + srv1.callMetrics.SetQPS(10.0) + srv1.callMetrics.SetCPUUtilization(1.0) + + srv2.callMetrics.SetQPS(10.0) + srv2.callMetrics.SetCPUUtilization(.1) + + sc := svcConfig(t, perCallConfig) + if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { + t.Fatalf("Error starting client: %v", err) + } + addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + + // Call each backend once to ensure the weights have been received. + ensureReached(ctx, t, srv1.Client, 2) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) +} + +// Tests two addresses with OOB ORCA reporting enabled. Checks the backends +// are called in the appropriate ratios. +func (s) TestBalancer_TwoAddresses_ReportingEnabledOOB(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + srv1 := startServer(t, reportOOB) + srv2 := startServer(t, reportOOB) + + // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed + // disproportionately to srv2 (10:1). + srv1.oobMetrics.SetQPS(10.0) + srv1.oobMetrics.SetCPUUtilization(1.0) + + srv2.oobMetrics.SetQPS(10.0) + srv2.oobMetrics.SetCPUUtilization(.1) + + sc := svcConfig(t, oobConfig) + if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { + t.Fatalf("Error starting client: %v", err) + } + addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + + // Call each backend once to ensure the weights have been received. + ensureReached(ctx, t, srv1.Client, 2) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) +} + +// Tests two addresses with OOB ORCA reporting enabled, where the reports +// change over time. Checks the backends are called in the appropriate ratios +// before and after modifying the reports. +func (s) TestBalancer_TwoAddresses_UpdateLoads(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + srv1 := startServer(t, reportOOB) + srv2 := startServer(t, reportOOB) + + // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed + // disproportionately to srv2 (10:1). + srv1.oobMetrics.SetQPS(10.0) + srv1.oobMetrics.SetCPUUtilization(1.0) + + srv2.oobMetrics.SetQPS(10.0) + srv2.oobMetrics.SetCPUUtilization(.1) + + sc := svcConfig(t, oobConfig) + if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { + t.Fatalf("Error starting client: %v", err) + } + addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + + // Call each backend once to ensure the weights have been received. + ensureReached(ctx, t, srv1.Client, 2) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) + + // Update the loads so srv2 is loaded and srv1 is not; ensure RPCs are + // routed disproportionately to srv1. + srv1.oobMetrics.SetQPS(10.0) + srv1.oobMetrics.SetCPUUtilization(.1) + + srv2.oobMetrics.SetQPS(10.0) + srv2.oobMetrics.SetCPUUtilization(1.0) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod + oobReportingInterval) + checkWeights(ctx, t, srvWeight{srv1, 10}, srvWeight{srv2, 1}) +} + +// Tests two addresses with OOB ORCA reporting enabled, then with switching to +// per-call reporting. Checks the backends are called in the appropriate +// ratios before and after the change. +func (s) TestBalancer_TwoAddresses_OOBThenPerCall(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + srv1 := startServer(t, reportBoth) + srv2 := startServer(t, reportBoth) + + // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed + // disproportionately to srv2 (10:1). + srv1.oobMetrics.SetQPS(10.0) + srv1.oobMetrics.SetCPUUtilization(1.0) + + srv2.oobMetrics.SetQPS(10.0) + srv2.oobMetrics.SetCPUUtilization(.1) + + // For per-call metrics (not used initially), srv2 reports that it is + // loaded and srv1 reports low load. After confirming OOB works, switch to + // per-call and confirm the new routing weights are applied. + srv1.callMetrics.SetQPS(10.0) + srv1.callMetrics.SetCPUUtilization(.1) + + srv2.callMetrics.SetQPS(10.0) + srv2.callMetrics.SetCPUUtilization(1.0) + + sc := svcConfig(t, oobConfig) + if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { + t.Fatalf("Error starting client: %v", err) + } + addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + + // Call each backend once to ensure the weights have been received. + ensureReached(ctx, t, srv1.Client, 2) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) + + // Update to per-call weights. + c := svcConfig(t, perCallConfig) + parsedCfg := srv1.R.CC.ParseServiceConfig(c) + if parsedCfg.Err != nil { + panic(fmt.Sprintf("Error parsing config %q: %v", c, parsedCfg.Err)) + } + srv1.R.UpdateState(resolver.State{Addresses: addrs, ServiceConfig: parsedCfg}) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 10}, srvWeight{srv2, 1}) +} + +// Tests two addresses with OOB ORCA reporting enabled and a non-zero error +// penalty applied. +func (s) TestBalancer_TwoAddresses_ErrorPenalty(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + srv1 := startServer(t, reportOOB) + srv2 := startServer(t, reportOOB) + + // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed + // disproportionately to srv2 (10:1). EPS values are set (but ignored + // initially due to ErrorUtilizationPenalty=0). Later EUP will be updated + // to 0.9 which will cause the weights to be equal and RPCs to be routed + // 50/50. + srv1.oobMetrics.SetQPS(10.0) + srv1.oobMetrics.SetCPUUtilization(1.0) + srv1.oobMetrics.SetEPS(0) + // srv1 weight before: 10.0 / 1.0 = 10.0 + // srv1 weight after: 10.0 / 1.0 = 10.0 + + srv2.oobMetrics.SetQPS(10.0) + srv2.oobMetrics.SetCPUUtilization(.1) + srv2.oobMetrics.SetEPS(10.0) + // srv2 weight before: 10.0 / 0.1 = 100.0 + // srv2 weight after: 10.0 / 1.0 = 10.0 + + sc := svcConfig(t, oobConfig) + if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { + t.Fatalf("Error starting client: %v", err) + } + addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + + // Call each backend once to ensure the weights have been received. + ensureReached(ctx, t, srv1.Client, 2) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) + + // Update to include an error penalty in the weights. + newCfg := oobConfig + newCfg.ErrorUtilizationPenalty = float64p(0.9) + c := svcConfig(t, newCfg) + parsedCfg := srv1.R.CC.ParseServiceConfig(c) + if parsedCfg.Err != nil { + panic(fmt.Sprintf("Error parsing config %q: %v", c, parsedCfg.Err)) + } + srv1.R.UpdateState(resolver.State{Addresses: addrs, ServiceConfig: parsedCfg}) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod + oobReportingInterval) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 1}) +} + +// Tests that the blackout period causes backends to use 0 as their weight +// (meaning to use the average weight) until the blackout period elapses. +func (s) TestBalancer_TwoAddresses_BlackoutPeriod(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + var mu sync.Mutex + start := time.Now() + now := start + setNow := func(t time.Time) { + mu.Lock() + defer mu.Unlock() + now = t + } + iwrr.TimeNow = func() time.Time { + mu.Lock() + defer mu.Unlock() + return now + } + t.Cleanup(func() { iwrr.TimeNow = time.Now }) + + testCases := []struct { + blackoutPeriodCfg *time.Duration + blackoutPeriod time.Duration + }{{ + blackoutPeriodCfg: durationp(time.Second), + blackoutPeriod: time.Second, + }, { + blackoutPeriodCfg: nil, + blackoutPeriod: 10 * time.Second, // the default + }} + for _, tc := range testCases { + setNow(start) + srv1 := startServer(t, reportOOB) + srv2 := startServer(t, reportOOB) + + // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed + // disproportionately to srv2 (10:1). + srv1.oobMetrics.SetQPS(10.0) + srv1.oobMetrics.SetCPUUtilization(1.0) + + srv2.oobMetrics.SetQPS(10.0) + srv2.oobMetrics.SetCPUUtilization(.1) + + cfg := oobConfig + cfg.BlackoutPeriod = tc.blackoutPeriodCfg + sc := svcConfig(t, cfg) + if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { + t.Fatalf("Error starting client: %v", err) + } + addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + + // Call each backend once to ensure the weights have been received. + ensureReached(ctx, t, srv1.Client, 2) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + // During the blackout period (1s) we should route roughly 50/50. + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 1}) + + // Advance time to right before the blackout period ends and the weights + // should still be zero. + setNow(start.Add(tc.blackoutPeriod - time.Nanosecond)) + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 1}) + + // Advance time to right after the blackout period ends and the weights + // should now activate. + setNow(start.Add(tc.blackoutPeriod)) + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) + } +} + +// Tests that the weight expiration period causes backends to use 0 as their +// weight (meaning to use the average weight) once the expiration period +// elapses. +func (s) TestBalancer_TwoAddresses_WeightExpiration(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + var mu sync.Mutex + start := time.Now() + now := start + setNow := func(t time.Time) { + mu.Lock() + defer mu.Unlock() + now = t + } + iwrr.TimeNow = func() time.Time { + mu.Lock() + defer mu.Unlock() + return now + } + t.Cleanup(func() { iwrr.TimeNow = time.Now }) + + srv1 := startServer(t, reportBoth) + srv2 := startServer(t, reportBoth) + + // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed + // disproportionately to srv2 (10:1). Because the OOB reporting interval + // is 1 minute but the weights expire in 1 second, routing will go to 50/50 + // after the weights expire. + srv1.oobMetrics.SetQPS(10.0) + srv1.oobMetrics.SetCPUUtilization(1.0) + + srv2.oobMetrics.SetQPS(10.0) + srv2.oobMetrics.SetCPUUtilization(.1) + + cfg := oobConfig + cfg.OOBReportingPeriod = durationp(time.Minute) + sc := svcConfig(t, cfg) + if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { + t.Fatalf("Error starting client: %v", err) + } + addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + + // Call each backend once to ensure the weights have been received. + ensureReached(ctx, t, srv1.Client, 2) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) + + // Advance what time.Now returns to the weight expiration time minus 1s to + // ensure all weights are still honored. + setNow(start.Add(*cfg.WeightExpirationPeriod - time.Second)) + + // Wait for the weight update period to allow the new weights to be processed. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) + + // Advance what time.Now returns to the weight expiration time plus 1s to + // ensure all weights expired and addresses are routed evenly. + setNow(start.Add(*cfg.WeightExpirationPeriod + time.Second)) + + // Wait for the weight expiration period so the weights have expired. + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 1}) +} + +// Tests logic surrounding subchannel management. +func (s) TestBalancer_AddressesChanging(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + srv1 := startServer(t, reportBoth) + srv2 := startServer(t, reportBoth) + srv3 := startServer(t, reportBoth) + srv4 := startServer(t, reportBoth) + + // srv1: weight 10 + srv1.oobMetrics.SetQPS(10.0) + srv1.oobMetrics.SetCPUUtilization(1.0) + // srv2: weight 100 + srv2.oobMetrics.SetQPS(10.0) + srv2.oobMetrics.SetCPUUtilization(.1) + // srv3: weight 20 + srv3.oobMetrics.SetQPS(20.0) + srv3.oobMetrics.SetCPUUtilization(1.0) + // srv4: weight 200 + srv4.oobMetrics.SetQPS(20.0) + srv4.oobMetrics.SetCPUUtilization(.1) + + sc := svcConfig(t, oobConfig) + if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { + t.Fatalf("Error starting client: %v", err) + } + srv2.Client = srv1.Client + addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}, {Addr: srv3.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + + // Call each backend once to ensure the weights have been received. + ensureReached(ctx, t, srv1.Client, 3) + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}, srvWeight{srv3, 2}) + + // Add backend 4 + addrs = append(addrs, resolver.Address{Addr: srv4.Address}) + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}, srvWeight{srv3, 2}, srvWeight{srv4, 20}) + + // Shutdown backend 3. RPCs will no longer be routed to it. + srv3.Stop() + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}, srvWeight{srv4, 20}) + + // Remove addresses 2 and 3. RPCs will no longer be routed to 2 either. + addrs = []resolver.Address{{Addr: srv1.Address}, {Addr: srv4.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv4, 20}) + + // Re-add 2 and remove the rest. + addrs = []resolver.Address{{Addr: srv2.Address}} + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv2, 10}) + + // Re-add 4. + addrs = append(addrs, resolver.Address{Addr: srv4.Address}) + srv1.R.UpdateState(resolver.State{Addresses: addrs}) + time.Sleep(weightUpdatePeriod) + checkWeights(ctx, t, srvWeight{srv2, 10}, srvWeight{srv4, 20}) +} + +func ensureReached(ctx context.Context, t *testing.T, c testgrpc.TestServiceClient, n int) { + t.Helper() + reached := make(map[string]struct{}) + for len(reached) != n { + var peer peer.Peer + if _, err := c.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil { + t.Fatalf("Error from EmptyCall: %v", err) + } + reached[peer.Addr.String()] = struct{}{} + } +} + +type srvWeight struct { + srv *testServer + w int +} + +const rrIterations = 100 + +// checkWeights does rrIterations RPCs and expects the different backends to be +// routed in a ratio as deterimined by the srvWeights passed in. Allows for +// some variance (+/- 2 RPCs per backend). +func checkWeights(ctx context.Context, t *testing.T, sws ...srvWeight) { + t.Helper() + + c := sws[0].srv.Client + + // Replace the weights with approximate counts of RPCs wanted given the + // iterations performed. + weightSum := 0 + for _, sw := range sws { + weightSum += sw.w + } + for i := range sws { + sws[i].w = rrIterations * sws[i].w / weightSum + } + + for attempts := 0; attempts < 10; attempts++ { + serverCounts := make(map[string]int) + for i := 0; i < rrIterations; i++ { + var peer peer.Peer + if _, err := c.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil { + t.Fatalf("Error from EmptyCall: %v; timed out waiting for weighted RR behavior?", err) + } + serverCounts[peer.Addr.String()]++ + } + if len(serverCounts) != len(sws) { + continue + } + success := true + for _, sw := range sws { + c := serverCounts[sw.srv.Address] + if c < sw.w-2 || c > sw.w+2 { + success = false + break + } + } + if success { + t.Logf("Passed iteration %v; counts: %v", attempts, serverCounts) + return + } + t.Logf("Failed iteration %v; counts: %v; want %+v", attempts, serverCounts, sws) + time.Sleep(5 * time.Millisecond) + } + t.Fatalf("Failed to route RPCs with proper ratio") +} diff --git a/balancer/weightedroundrobin/config.go b/balancer/weightedroundrobin/config.go new file mode 100644 index 000000000000..caad18faa11d --- /dev/null +++ b/balancer/weightedroundrobin/config.go @@ -0,0 +1,60 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package weightedroundrobin + +import ( + "time" + + "google.golang.org/grpc/serviceconfig" +) + +type lbConfig struct { + serviceconfig.LoadBalancingConfig `json:"-"` + + // Whether to enable out-of-band utilization reporting collection from the + // endpoints. By default, per-request utilization reporting is used. + EnableOOBLoadReport bool `json:"enableOobLoadReport,omitempty"` + + // Load reporting interval to request from the server. Note that the + // server may not provide reports as frequently as the client requests. + // Used only when enable_oob_load_report is true. Default is 10 seconds. + OOBReportingPeriod time.Duration `json:"oobReportingPeriod,omitempty"` + + // A given endpoint must report load metrics continuously for at least this + // long before the endpoint weight will be used. This avoids churn when + // the set of endpoint addresses changes. Takes effect both immediately + // after we establish a connection to an endpoint and after + // weight_expiration_period has caused us to stop using the most recent + // load metrics. Default is 10 seconds. + BlackoutPeriod time.Duration `json:"blackoutPeriod,omitempty"` + + // If a given endpoint has not reported load metrics in this long, + // then we stop using the reported weight. This ensures that we do + // not continue to use very stale weights. Once we stop using a stale + // value, if we later start seeing fresh reports again, the + // blackout_period applies. Defaults to 3 minutes. + WeightExpirationPeriod time.Duration `json:"weightExpirationPeriod,omitempty"` + + // How often endpoint weights are recalculated. Default is 1 second. + WeightUpdatePeriod time.Duration `json:"weightUpdatePeriod,omitempty"` + + // The multiplier used to adjust endpoint weights with the error rate + // calculated as eps/qps. Default is 1.0. + ErrorUtilizationPenalty float64 `json:"errorUtilizationPenalty,omitempty"` +} diff --git a/balancer/weightedroundrobin/internal/internal.go b/balancer/weightedroundrobin/internal/internal.go new file mode 100644 index 000000000000..d39830261b21 --- /dev/null +++ b/balancer/weightedroundrobin/internal/internal.go @@ -0,0 +1,44 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package internal allows for easier testing of the weightedroundrobin +// package. +package internal + +import ( + "time" +) + +// AllowAnyWeightUpdatePeriod permits any setting of WeightUpdatePeriod for +// testing. Normally a minimum of 100ms is applied. +var AllowAnyWeightUpdatePeriod bool + +// LBConfig allows tests to produce a JSON form of the config from the struct +// instead of using a string. +type LBConfig struct { + EnableOOBLoadReport *bool `json:"enableOobLoadReport,omitempty"` + OOBReportingPeriod *time.Duration `json:"oobReportingPeriod,omitempty"` + BlackoutPeriod *time.Duration `json:"blackoutPeriod,omitempty"` + WeightExpirationPeriod *time.Duration `json:"weightExpirationPeriod,omitempty"` + WeightUpdatePeriod *time.Duration `json:"weightUpdatePeriod,omitempty"` + ErrorUtilizationPenalty *float64 `json:"errorUtilizationPenalty,omitempty"` +} + +// TimeNow can be overridden by tests to return a different value for the +// current time. +var TimeNow = time.Now diff --git a/balancer/weightedroundrobin/logging.go b/balancer/weightedroundrobin/logging.go new file mode 100644 index 000000000000..43184ca9ab91 --- /dev/null +++ b/balancer/weightedroundrobin/logging.go @@ -0,0 +1,34 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package weightedroundrobin + +import ( + "fmt" + + "google.golang.org/grpc/grpclog" + internalgrpclog "google.golang.org/grpc/internal/grpclog" +) + +const prefix = "[%p] " + +var logger = grpclog.Component("weighted-round-robin") + +func prefixLogger(p *wrrBalancer) *internalgrpclog.PrefixLogger { + return internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf(prefix, p)) +} diff --git a/balancer/weightedroundrobin/scheduler.go b/balancer/weightedroundrobin/scheduler.go new file mode 100644 index 000000000000..e19428112e1e --- /dev/null +++ b/balancer/weightedroundrobin/scheduler.go @@ -0,0 +1,138 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package weightedroundrobin + +import ( + "math" +) + +type scheduler interface { + nextIndex() int +} + +// newScheduler uses scWeights to create a new scheduler for selecting subconns +// in a picker. It will return a round robin implementation if at least +// len(scWeights)-1 are zero or there is only a single subconn, otherwise it +// will return an Earliest Deadline First (EDF) scheduler implementation that +// selects the subchannels according to their weights. +func newScheduler(scWeights []float64, inc func() uint32) scheduler { + n := len(scWeights) + if n == 0 { + return nil + } + if n == 1 { + return &rrScheduler{numSCs: 1, inc: inc} + } + sum := float64(0) + numZero := 0 + max := float64(0) + for _, w := range scWeights { + sum += w + if w > max { + max = w + } + if w == 0 { + numZero++ + } + } + if numZero >= n-1 { + return &rrScheduler{numSCs: uint32(n), inc: inc} + } + unscaledMean := sum / float64(n-numZero) + scalingFactor := maxWeight / max + mean := uint16(math.Round(scalingFactor * unscaledMean)) + + weights := make([]uint16, n) + allEqual := true + for i, w := range scWeights { + if w == 0 { + // Backends with weight = 0 use the mean. + weights[i] = mean + } else { + scaledWeight := uint16(math.Round(scalingFactor * w)) + weights[i] = scaledWeight + if scaledWeight != mean { + allEqual = false + } + } + } + + if allEqual { + return &rrScheduler{numSCs: uint32(n), inc: inc} + } + + logger.Infof("using edf scheduler with weights: %v", weights) + return &edfScheduler{weights: weights, inc: inc} +} + +const maxWeight = math.MaxUint16 + +// edfScheduler implements EDF using the same algorithm as grpc-c++ here: +// +// https://github.com/grpc/grpc/blob/master/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc +type edfScheduler struct { + inc func() uint32 + weights []uint16 +} + +// Returns the index in s.weights for the picker to choose. +func (s *edfScheduler) nextIndex() int { + const offset = maxWeight / 2 + + for { + idx := uint64(s.inc()) + + // The sequence number (idx) is split in two: the lower %n gives the + // index of the backend, and the rest gives the number of times we've + // iterated through all backends. `generation` is used to + // deterministically decide whether we pick or skip the backend on this + // iteration, in proportion to the backend's weight. + + backendIndex := idx % uint64(len(s.weights)) + generation := idx / uint64(len(s.weights)) + weight := uint64(s.weights[backendIndex]) + + // We pick a backend `weight` times per `maxWeight` generations. The + // multiply and modulus ~evenly spread out the picks for a given + // backend between different generations. The offset by `backendIndex` + // helps to reduce the chance of multiple consecutive non-picks: if we + // have two consecutive backends with an equal, say, 80% weight of the + // max, with no offset we would see 1/5 generations that skipped both. + // TODO(b/190488683): add test for offset efficacy. + mod := uint64(weight*generation+backendIndex*offset) % maxWeight + + if mod < maxWeight-weight { + continue + } + return int(backendIndex) + } +} + +// A simple RR scheduler to use for fallback when fewer than two backends have +// non-zero weights, or all backends have the the same weight, or when only one +// subconn exists. +type rrScheduler struct { + inc func() uint32 + numSCs uint32 +} + +func (s *rrScheduler) nextIndex() int { + idx := s.inc() + return int(idx % s.numSCs) +} diff --git a/balancer/weightedroundrobin/weightedroundrobin.go b/balancer/weightedroundrobin/weightedroundrobin.go index 6fc4d1910e67..bb029f07c36a 100644 --- a/balancer/weightedroundrobin/weightedroundrobin.go +++ b/balancer/weightedroundrobin/weightedroundrobin.go @@ -16,16 +16,21 @@ * */ -// Package weightedroundrobin defines a weighted roundrobin balancer. +// Package weightedroundrobin provides an implementation of the weighted round +// robin LB policy, as defined in [gRFC A58]. +// +// # Experimental +// +// Notice: This package is EXPERIMENTAL and may be changed or removed in a +// later release. +// +// [gRFC A58]: https://github.com/grpc/proposal/blob/master/A58-client-side-weighted-round-robin-lb-policy.md package weightedroundrobin import ( "google.golang.org/grpc/resolver" ) -// Name is the name of weighted_round_robin balancer. -const Name = "weighted_round_robin" - // attributeKey is the type used as the key to store AddrInfo in the // BalancerAttributes field of resolver.Address. type attributeKey struct{} @@ -44,11 +49,6 @@ func (a AddrInfo) Equal(o interface{}) bool { // SetAddrInfo returns a copy of addr in which the BalancerAttributes field is // updated with addrInfo. -// -// # Experimental -// -// Notice: This API is EXPERIMENTAL and may be changed or removed in a -// later release. func SetAddrInfo(addr resolver.Address, addrInfo AddrInfo) resolver.Address { addr.BalancerAttributes = addr.BalancerAttributes.WithValue(attributeKey{}, addrInfo) return addr @@ -56,11 +56,6 @@ func SetAddrInfo(addr resolver.Address, addrInfo AddrInfo) resolver.Address { // GetAddrInfo returns the AddrInfo stored in the BalancerAttributes field of // addr. -// -// # Experimental -// -// Notice: This API is EXPERIMENTAL and may be changed or removed in a -// later release. func GetAddrInfo(addr resolver.Address) AddrInfo { v := addr.BalancerAttributes.Value(attributeKey{}) ai, _ := v.(AddrInfo) diff --git a/internal/grpcrand/grpcrand.go b/internal/grpcrand/grpcrand.go index 517ea70642a1..0b092cfbe15d 100644 --- a/internal/grpcrand/grpcrand.go +++ b/internal/grpcrand/grpcrand.go @@ -72,3 +72,10 @@ func Uint64() uint64 { defer mu.Unlock() return r.Uint64() } + +// Uint32 implements rand.Uint32 on the grpcrand global source. +func Uint32() uint32 { + mu.Lock() + defer mu.Unlock() + return r.Uint32() +} diff --git a/orca/producer.go b/orca/producer.go index 3b7ed8b67d8a..ce108aad65ca 100644 --- a/orca/producer.go +++ b/orca/producer.go @@ -199,12 +199,13 @@ func (p *producer) run(ctx context.Context, done chan struct{}, interval time.Du // Unimplemented; do not retry. logger.Error("Server doesn't support ORCA OOB load reporting protocol; not listening for load reports.") return - case status.Code(err) == codes.Unavailable: - // TODO: this code should ideally log an error, too, but for now we - // receive this code when shutting down the ClientConn. Once we - // can determine the state or ensure the producer is stopped before - // the stream ends, we can log an error when it's not a natural - // shutdown. + case status.Code(err) == codes.Unavailable, status.Code(err) == codes.Canceled: + // TODO: these codes should ideally log an error, too, but for now + // we receive them when shutting down the ClientConn (Unavailable + // if the stream hasn't started yet, and Canceled if it happens + // mid-stream). Once we can determine the state or ensure the + // producer is stopped before the stream ends, we can log an error + // when it's not a natural shutdown. default: // Log all other errors. logger.Error("Received unexpected stream error:", err) diff --git a/xds/internal/balancer/clusterimpl/picker.go b/xds/internal/balancer/clusterimpl/picker.go index 360fc44c9e4d..3f354424f28e 100644 --- a/xds/internal/balancer/clusterimpl/picker.go +++ b/xds/internal/balancer/clusterimpl/picker.go @@ -160,7 +160,7 @@ func (d *picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) { d.loadStore.CallFinished(lIDStr, info.Err) load, ok := info.ServerLoad.(*v3orcapb.OrcaLoadReport) - if !ok { + if !ok || load == nil { return } d.loadStore.CallServerLoad(lIDStr, serverLoadCPUName, load.CpuUtilization) From 5e587344eef8aaa06ebf76ee1997013b3a8fbed0 Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Mon, 8 May 2023 21:29:36 -0400 Subject: [PATCH 28/60] xds: Add support for Custom LB Policies (#6224) --- attributes/attributes.go | 29 ++ .../weightedroundrobin/weightedroundrobin.go | 6 + .../weightedaggregator/aggregator.go | 8 + balancer/weightedtarget/weightedtarget.go | 12 + .../weightedtarget/weightedtarget_test.go | 21 +- internal/testutils/xds/e2e/clientresources.go | 63 ++- resolver/resolver.go | 16 +- test/xds/xds_client_custom_lb_test.go | 231 ++++++++++ .../balancer/cdsbalancer/cdsbalancer.go | 24 +- .../cdsbalancer/cdsbalancer_security_test.go | 32 +- .../balancer/cdsbalancer/cdsbalancer_test.go | 115 +++-- .../balancer/cdsbalancer/cluster_handler.go | 12 +- .../cdsbalancer/cluster_handler_test.go | 14 - .../clusterimpl/tests/balancer_test.go | 11 +- .../clusterresolver/clusterresolver_test.go | 14 - .../balancer/clusterresolver/config_test.go | 4 - .../balancer/clusterresolver/configbuilder.go | 147 +------ .../clusterresolver/configbuilder_test.go | 399 +----------------- .../clusterresolver/e2e_test/eds_impl_test.go | 17 - .../balancer/clusterresolver/priority_test.go | 18 +- xds/internal/balancer/wrrlocality/balancer.go | 145 ++++++- .../balancer/wrrlocality/balancer_test.go | 131 ++++++ xds/internal/balancer/wrrlocality/logging.go | 34 ++ .../xdsclient/tests/cds_watchers_test.go | 2 +- .../xdsclient/tests/eds_watchers_test.go | 100 +++-- .../tests/federation_watchers_test.go | 6 +- .../xdsclient/tests/resource_update_test.go | 2 +- .../xdsresource/tests/unmarshal_cds_test.go | 5 +- .../xdsclient/xdsresource/type_cds.go | 25 +- .../xdsclient/xdsresource/unmarshal_cds.go | 18 +- .../xdsresource/unmarshal_cds_test.go | 8 +- .../xdsclient/xdsresource/unmarshal_eds.go | 11 + 32 files changed, 955 insertions(+), 725 deletions(-) create mode 100644 test/xds/xds_client_custom_lb_test.go create mode 100644 xds/internal/balancer/wrrlocality/logging.go diff --git a/attributes/attributes.go b/attributes/attributes.go index 02f5dc531891..3efca4591493 100644 --- a/attributes/attributes.go +++ b/attributes/attributes.go @@ -25,6 +25,11 @@ // later release. package attributes +import ( + "fmt" + "strings" +) + // Attributes is an immutable struct for storing and retrieving generic // key/value pairs. Keys must be hashable, and users should define their own // types for keys. Values should not be modified after they are added to an @@ -99,3 +104,27 @@ func (a *Attributes) Equal(o *Attributes) bool { } return true } + +// String prints the attribute map. If any key or values throughout the map +// implement fmt.Stringer, it calls that method and appends. +func (a *Attributes) String() string { + var sb strings.Builder + sb.WriteString("{") + first := true + for k, v := range a.m { + var key, val string + if str, ok := k.(interface{ String() string }); ok { + key = str.String() + } + if str, ok := v.(interface{ String() string }); ok { + val = str.String() + } + if !first { + sb.WriteString(", ") + } + sb.WriteString(fmt.Sprintf("%q: %q, ", key, val)) + first = false + } + sb.WriteString("}") + return sb.String() +} diff --git a/balancer/weightedroundrobin/weightedroundrobin.go b/balancer/weightedroundrobin/weightedroundrobin.go index bb029f07c36a..7567462e023d 100644 --- a/balancer/weightedroundrobin/weightedroundrobin.go +++ b/balancer/weightedroundrobin/weightedroundrobin.go @@ -28,6 +28,8 @@ package weightedroundrobin import ( + "fmt" + "google.golang.org/grpc/resolver" ) @@ -61,3 +63,7 @@ func GetAddrInfo(addr resolver.Address) AddrInfo { ai, _ := v.(AddrInfo) return ai } + +func (a AddrInfo) String() string { + return fmt.Sprintf("Weight: %d", a.Weight) +} diff --git a/balancer/weightedtarget/weightedaggregator/aggregator.go b/balancer/weightedtarget/weightedaggregator/aggregator.go index 37fc41c16885..27279257ed13 100644 --- a/balancer/weightedtarget/weightedaggregator/aggregator.go +++ b/balancer/weightedtarget/weightedaggregator/aggregator.go @@ -178,6 +178,14 @@ func (wbsa *Aggregator) ResumeStateUpdates() { } } +// NeedUpdateStateOnResume sets the UpdateStateOnResume bool to true, letting a +// picker update be sent once ResumeStateUpdates is called. +func (wbsa *Aggregator) NeedUpdateStateOnResume() { + wbsa.mu.Lock() + defer wbsa.mu.Unlock() + wbsa.needUpdateStateOnResume = true +} + // UpdateState is called to report a balancer state change from sub-balancer. // It's usually called by the balancer group. // diff --git a/balancer/weightedtarget/weightedtarget.go b/balancer/weightedtarget/weightedtarget.go index 83bb7d701f19..3d5acdab6afe 100644 --- a/balancer/weightedtarget/weightedtarget.go +++ b/balancer/weightedtarget/weightedtarget.go @@ -143,6 +143,18 @@ func (b *weightedTargetBalancer) UpdateClientConnState(s balancer.ClientConnStat b.targets = newConfig.Targets + // If the targets length is zero, it means we have removed all child + // policies from the balancer group and aggregator. + // At the start of this UpdateClientConnState() operation, a call to + // b.stateAggregator.ResumeStateUpdates() is deferred. Thus, setting the + // needUpdateStateOnResume bool to true here will ensure a new picker is + // built as part of that deferred function. Since there are now no child + // policies, the aggregated connectivity state reported form the Aggregator + // will be TRANSIENT_FAILURE. + if len(b.targets) == 0 { + b.stateAggregator.NeedUpdateStateOnResume() + } + return nil } diff --git a/balancer/weightedtarget/weightedtarget_test.go b/balancer/weightedtarget/weightedtarget_test.go index a20cb0dc1ce4..5658f302a49b 100644 --- a/balancer/weightedtarget/weightedtarget_test.go +++ b/balancer/weightedtarget/weightedtarget_test.go @@ -166,7 +166,8 @@ func init() { // TestWeightedTarget covers the cases that a sub-balancer is added and a // sub-balancer is removed. It verifies that the addresses and balancer configs // are forwarded to the right sub-balancer. This test is intended to test the -// glue code in weighted_target. +// glue code in weighted_target. It also tests an empty target config update, +// which should trigger a transient failure state update. func (s) TestWeightedTarget(t *testing.T) { cc := testutils.NewTestClientConn(t) wtb := wtbBuilder.Build(cc, balancer.BuildOptions{}) @@ -306,6 +307,24 @@ func (s) TestWeightedTarget(t *testing.T) { t.Fatalf("picker.Pick, got %v, want SubConn=%v", gotSCSt, sc3) } } + // Update the Weighted Target Balancer with an empty address list and no + // targets. This should cause a Transient Failure State update to the Client + // Conn. + emptyConfig, err := wtbParser.ParseConfig([]byte(`{}`)) + if err != nil { + t.Fatalf("Failed to parse balancer config: %v", err) + } + if err := wtb.UpdateClientConnState(balancer.ClientConnState{ + ResolverState: resolver.State{}, + BalancerConfig: emptyConfig, + }); err != nil { + t.Fatalf("Failed to update ClientConn state: %v", err) + } + + state := <-cc.NewStateCh + if state != connectivity.TransientFailure { + t.Fatalf("Empty target update should have triggered a TF state update, got: %v", state) + } } // TestWeightedTarget_OneSubBalancer_AddRemoveBackend tests the case where we diff --git a/internal/testutils/xds/e2e/clientresources.go b/internal/testutils/xds/e2e/clientresources.go index b38d27b24963..ff2a5d43398a 100644 --- a/internal/testutils/xds/e2e/clientresources.go +++ b/internal/testutils/xds/e2e/clientresources.go @@ -524,6 +524,14 @@ func ClusterResourceWithOptions(opts ClusterOptions) *v3clusterpb.Cluster { return cluster } +// LocalityOptions contains options to configure a Locality. +type LocalityOptions struct { + // Ports is a set of ports on "localhost" belonging to this locality. + Ports []uint32 + // Weight is the weight of the locality, used for load balancing. + Weight uint32 +} + // EndpointOptions contains options to configure an Endpoint (or // ClusterLoadAssignment) resource. type EndpointOptions struct { @@ -533,9 +541,8 @@ type EndpointOptions struct { // Host is the hostname of the endpoints. In our e2e tests, hostname must // always be "localhost". Host string - // Ports is a set of ports on "localhost" where the endpoints corresponding - // to this resource reside. - Ports []uint32 + // Localities is a set of localities belonging to this resource. + Localities []LocalityOptions // DropPercents is a map from drop category to a drop percentage. If unset, // no drops are configured. DropPercents map[string]int @@ -546,34 +553,50 @@ func DefaultEndpoint(clusterName string, host string, ports []uint32) *v3endpoin return EndpointResourceWithOptions(EndpointOptions{ ClusterName: clusterName, Host: host, - Ports: ports, + Localities: []LocalityOptions{ + { + Ports: ports, + Weight: 1, + }, + }, }) } // EndpointResourceWithOptions returns an xds Endpoint resource configured with // the provided options. func EndpointResourceWithOptions(opts EndpointOptions) *v3endpointpb.ClusterLoadAssignment { - var lbEndpoints []*v3endpointpb.LbEndpoint - for _, port := range opts.Ports { - lbEndpoints = append(lbEndpoints, &v3endpointpb.LbEndpoint{ - HostIdentifier: &v3endpointpb.LbEndpoint_Endpoint{Endpoint: &v3endpointpb.Endpoint{ - Address: &v3corepb.Address{Address: &v3corepb.Address_SocketAddress{ - SocketAddress: &v3corepb.SocketAddress{ - Protocol: v3corepb.SocketAddress_TCP, - Address: opts.Host, - PortSpecifier: &v3corepb.SocketAddress_PortValue{PortValue: port}}, + var endpoints []*v3endpointpb.LocalityLbEndpoints + for i, locality := range opts.Localities { + var lbEndpoints []*v3endpointpb.LbEndpoint + for _, port := range locality.Ports { + lbEndpoints = append(lbEndpoints, &v3endpointpb.LbEndpoint{ + HostIdentifier: &v3endpointpb.LbEndpoint_Endpoint{Endpoint: &v3endpointpb.Endpoint{ + Address: &v3corepb.Address{Address: &v3corepb.Address_SocketAddress{ + SocketAddress: &v3corepb.SocketAddress{ + Protocol: v3corepb.SocketAddress_TCP, + Address: opts.Host, + PortSpecifier: &v3corepb.SocketAddress_PortValue{PortValue: port}}, + }}, }}, - }}, + LoadBalancingWeight: &wrapperspb.UInt32Value{Value: 1}, + }) + } + + endpoints = append(endpoints, &v3endpointpb.LocalityLbEndpoints{ + Locality: &v3corepb.Locality{ + Region: fmt.Sprintf("region-%d", i+1), + Zone: fmt.Sprintf("zone-%d", i+1), + SubZone: fmt.Sprintf("subzone-%d", i+1), + }, + LbEndpoints: lbEndpoints, + LoadBalancingWeight: &wrapperspb.UInt32Value{Value: locality.Weight}, + Priority: 0, }) } + cla := &v3endpointpb.ClusterLoadAssignment{ ClusterName: opts.ClusterName, - Endpoints: []*v3endpointpb.LocalityLbEndpoints{{ - Locality: &v3corepb.Locality{SubZone: "subzone"}, - LbEndpoints: lbEndpoints, - LoadBalancingWeight: &wrapperspb.UInt32Value{Value: 1}, - Priority: 0, - }}, + Endpoints: endpoints, } var drops []*v3endpointpb.ClusterLoadAssignment_Policy_DropOverload diff --git a/resolver/resolver.go b/resolver/resolver.go index 6215e5ef2b02..353c10b69a5b 100644 --- a/resolver/resolver.go +++ b/resolver/resolver.go @@ -22,13 +22,13 @@ package resolver import ( "context" + "fmt" "net" "net/url" "strings" "google.golang.org/grpc/attributes" "google.golang.org/grpc/credentials" - "google.golang.org/grpc/internal/pretty" "google.golang.org/grpc/serviceconfig" ) @@ -124,7 +124,7 @@ type Address struct { Attributes *attributes.Attributes // BalancerAttributes contains arbitrary data about this address intended - // for consumption by the LB policy. These attribes do not affect SubConn + // for consumption by the LB policy. These attributes do not affect SubConn // creation, connection establishment, handshaking, etc. BalancerAttributes *attributes.Attributes @@ -151,7 +151,17 @@ func (a Address) Equal(o Address) bool { // String returns JSON formatted string representation of the address. func (a Address) String() string { - return pretty.ToJSON(a) + var sb strings.Builder + sb.WriteString(fmt.Sprintf("{Addr: %q, ", a.Addr)) + sb.WriteString(fmt.Sprintf("ServerName: %q, ", a.ServerName)) + if a.Attributes != nil { + sb.WriteString(fmt.Sprintf("Attributes: %v, ", a.Attributes.String())) + } + if a.BalancerAttributes != nil { + sb.WriteString(fmt.Sprintf("BalancerAttributes: %v", a.BalancerAttributes.String())) + } + sb.WriteString("}") + return sb.String() } // BuildOptions includes additional information for the builder to create diff --git a/test/xds/xds_client_custom_lb_test.go b/test/xds/xds_client_custom_lb_test.go new file mode 100644 index 000000000000..91ec874c64a7 --- /dev/null +++ b/test/xds/xds_client_custom_lb_test.go @@ -0,0 +1,231 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xds_test + +import ( + "context" + "fmt" + "testing" + + v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" + v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" + v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" + v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" + v3roundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/round_robin/v3" + v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" + "github.com/golang/protobuf/proto" + structpb "github.com/golang/protobuf/ptypes/struct" + testgrpc "google.golang.org/grpc/interop/grpc_testing" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/internal/envconfig" + "google.golang.org/grpc/internal/stubserver" + "google.golang.org/grpc/internal/testutils" + "google.golang.org/grpc/internal/testutils/roundrobin" + "google.golang.org/grpc/internal/testutils/xds/e2e" + "google.golang.org/grpc/resolver" +) + +// wrrLocality is a helper that takes a proto message and returns a +// WrrLocalityProto with the proto message marshaled into a proto.Any as a +// child. +func wrrLocality(m proto.Message) *v3wrrlocalitypb.WrrLocality { + return &v3wrrlocalitypb.WrrLocality{ + EndpointPickingPolicy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(m), + }, + }, + }, + }, + } +} + +// clusterWithLBConfiguration returns a cluster resource with the proto message +// passed Marshaled to an any and specified through the load_balancing_policy +// field. +func clusterWithLBConfiguration(clusterName, edsServiceName string, secLevel e2e.SecurityLevel, m proto.Message) *v3clusterpb.Cluster { + cluster := e2e.DefaultCluster(clusterName, edsServiceName, secLevel) + cluster.LoadBalancingPolicy = &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: testutils.MarshalAny(m), + }, + }, + }, + } + return cluster +} + +// TestWRRLocality tests RPC distribution across a scenario with 5 backends, +// with 2 backends in a locality with weight 1, and 3 backends in a second +// locality with weight 2. Through xDS, the test configures a +// wrr_locality_balancer with either a round robin or custom (specifying pick +// first) child load balancing policy, and asserts the correct distribution +// based on the locality weights and the endpoint picking policy specified. +func (s) TestWrrLocality(t *testing.T) { + oldCustomLBSupport := envconfig.XDSCustomLBPolicy + envconfig.XDSCustomLBPolicy = true + defer func() { + envconfig.XDSCustomLBPolicy = oldCustomLBSupport + }() + + backend1 := stubserver.StartTestService(t, nil) + port1 := testutils.ParsePort(t, backend1.Address) + defer backend1.Stop() + backend2 := stubserver.StartTestService(t, nil) + port2 := testutils.ParsePort(t, backend2.Address) + defer backend2.Stop() + backend3 := stubserver.StartTestService(t, nil) + port3 := testutils.ParsePort(t, backend3.Address) + defer backend3.Stop() + backend4 := stubserver.StartTestService(t, nil) + port4 := testutils.ParsePort(t, backend4.Address) + defer backend4.Stop() + backend5 := stubserver.StartTestService(t, nil) + port5 := testutils.ParsePort(t, backend5.Address) + defer backend5.Stop() + const serviceName = "my-service-client-side-xds" + + tests := []struct { + name string + // Configuration will be specified through load_balancing_policy field. + wrrLocalityConfiguration *v3wrrlocalitypb.WrrLocality + addressDistributionWant []resolver.Address + }{ + { + name: "rr_child", + wrrLocalityConfiguration: wrrLocality(&v3roundrobinpb.RoundRobin{}), + // Each addresses expected probability is locality weight of + // locality / total locality weights multiplied by 1 / number of + // endpoints in each locality (due to round robin across endpoints + // in a locality). Thus, address 1 and address 2 have 1/3 * 1/2 + // probability, and addresses 3 4 5 have 2/3 * 1/3 probability of + // being routed to. + addressDistributionWant: []resolver.Address{ + {Addr: backend1.Address}, + {Addr: backend1.Address}, + {Addr: backend1.Address}, + {Addr: backend1.Address}, + {Addr: backend1.Address}, + {Addr: backend1.Address}, + {Addr: backend2.Address}, + {Addr: backend2.Address}, + {Addr: backend2.Address}, + {Addr: backend2.Address}, + {Addr: backend2.Address}, + {Addr: backend2.Address}, + {Addr: backend3.Address}, + {Addr: backend3.Address}, + {Addr: backend3.Address}, + {Addr: backend3.Address}, + {Addr: backend3.Address}, + {Addr: backend3.Address}, + {Addr: backend3.Address}, + {Addr: backend3.Address}, + {Addr: backend4.Address}, + {Addr: backend4.Address}, + {Addr: backend4.Address}, + {Addr: backend4.Address}, + {Addr: backend4.Address}, + {Addr: backend4.Address}, + {Addr: backend4.Address}, + {Addr: backend4.Address}, + {Addr: backend5.Address}, + {Addr: backend5.Address}, + {Addr: backend5.Address}, + {Addr: backend5.Address}, + {Addr: backend5.Address}, + {Addr: backend5.Address}, + {Addr: backend5.Address}, + {Addr: backend5.Address}, + }, + }, + // This configures custom lb as the child of wrr_locality, which points + // to our pick_first implementation. Thus, the expected distribution of + // addresses is locality weight of locality / total locality weights as + // the probability of picking the first backend within the locality + // (e.g. Address 1 for locality 1, and Address 3 for locality 2). + { + name: "custom_lb_child_pick_first", + wrrLocalityConfiguration: wrrLocality(&v3xdsxdstypepb.TypedStruct{ + TypeUrl: "type.googleapis.com/pick_first", + Value: &structpb.Struct{}, + }), + addressDistributionWant: []resolver.Address{ + {Addr: backend1.Address}, + {Addr: backend3.Address}, + {Addr: backend3.Address}, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + managementServer, nodeID, _, r, cleanup := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{}) + defer cleanup() + + routeConfigName := "route-" + serviceName + clusterName := "cluster-" + serviceName + endpointsName := "endpoints-" + serviceName + resources := e2e.UpdateOptions{ + NodeID: nodeID, + Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(serviceName, routeConfigName)}, + Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(routeConfigName, serviceName, clusterName)}, + Clusters: []*v3clusterpb.Cluster{clusterWithLBConfiguration(clusterName, endpointsName, e2e.SecurityLevelNone, test.wrrLocalityConfiguration)}, + Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ + ClusterName: endpointsName, + Host: "localhost", + Localities: []e2e.LocalityOptions{ + { + Ports: []uint32{port1, port2}, + Weight: 1, + }, + { + Ports: []uint32{port3, port4, port5}, + Weight: 2, + }, + }, + })}, + } + + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + cc, err := grpc.Dial(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r)) + if err != nil { + t.Fatalf("Failed to dial local test server: %v", err) + } + defer cc.Close() + + client := testgrpc.NewTestServiceClient(cc) + if err := roundrobin.CheckWeightedRoundRobinRPCs(ctx, client, test.addressDistributionWant); err != nil { + t.Fatalf("Error in expected round robin: %v", err) + } + }) + } +} diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index 1e3fb4d1286c..91d4a6aa8661 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -38,7 +38,6 @@ import ( "google.golang.org/grpc/serviceconfig" "google.golang.org/grpc/xds/internal/balancer/clusterresolver" "google.golang.org/grpc/xds/internal/balancer/outlierdetection" - "google.golang.org/grpc/xds/internal/balancer/ringhash" "google.golang.org/grpc/xds/internal/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) @@ -394,23 +393,22 @@ func (b *cdsBalancer) handleWatchUpdate(update clusterHandlerUpdate) { dms[i].OutlierDetection = outlierDetectionToConfig(cu.OutlierDetection) } } + lbCfg := &clusterresolver.LBConfig{ DiscoveryMechanisms: dms, } - // lbPolicy is set only when the policy is ringhash. The default (when it's - // not set) is roundrobin. And similarly, we only need to set XDSLBPolicy - // for ringhash (it also defaults to roundrobin). - if lbp := update.lbPolicy; lbp != nil { - lbCfg.XDSLBPolicy = &internalserviceconfig.BalancerConfig{ - Name: ringhash.Name, - Config: &ringhash.LBConfig{ - MinRingSize: lbp.MinimumRingSize, - MaxRingSize: lbp.MaximumRingSize, - }, - } + bc := &internalserviceconfig.BalancerConfig{} + if err := json.Unmarshal(update.lbPolicy, bc); err != nil { + // This will never occur, valid configuration is emitted from the xDS + // Client. Validity is already checked in the xDS Client, however, this + // double validation is present because Unmarshalling and Validating are + // coupled into one json.Unmarshal operation). We will switch this in + // the future to two separate operations. + b.logger.Errorf("Emitted lbPolicy %s from xDS Client is invalid: %v", update.lbPolicy, err) + return } - + lbCfg.XDSLBPolicy = bc ccState := balancer.ClientConnState{ ResolverState: xdsclient.SetClient(resolver.State{}, b.xdsClient), BalancerConfig: lbCfg, diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go b/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go index 8d7face5e0a3..eb687aa70f76 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go @@ -63,6 +63,7 @@ var ( IdentityInstanceName: "default2", SubjectAltNameMatchers: testSANMatchers, }, + LBPolicy: wrrLocalityLBConfigJSON, } cdsUpdateWithMissingSecurityCfg = xdsresource.ClusterUpdate{ ClusterName: serviceName, @@ -248,8 +249,11 @@ func (s) TestSecurityConfigWithoutXDSCreds(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + cdsUpdate := xdsresource.ClusterUpdate{ + ClusterName: serviceName, + LBPolicy: wrrLocalityLBConfigJSON, + } + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -304,8 +308,11 @@ func (s) TestNoSecurityConfigWithXDSCreds(t *testing.T) { // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. No security config is // passed to the CDS balancer as part of this update. - cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + cdsUpdate := xdsresource.ClusterUpdate{ + ClusterName: serviceName, + LBPolicy: wrrLocalityLBConfigJSON, + } + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -461,7 +468,7 @@ func (s) TestSecurityConfigUpdate_BadToGood(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { t.Fatal(err) } @@ -495,7 +502,7 @@ func (s) TestGoodSecurityConfig(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { @@ -548,7 +555,7 @@ func (s) TestSecurityConfigUpdate_GoodToFallback(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { @@ -564,7 +571,10 @@ func (s) TestSecurityConfigUpdate_GoodToFallback(t *testing.T) { // an update which contains bad security config. So, we expect the CDS // balancer to forward this error to the EDS balancer and eventually the // channel needs to be put in a bad state. - cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} + cdsUpdate := xdsresource.ClusterUpdate{ + ClusterName: serviceName, + LBPolicy: wrrLocalityLBConfigJSON, + } if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { t.Fatal(err) } @@ -598,7 +608,7 @@ func (s) TestSecurityConfigUpdate_GoodToBad(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { @@ -675,8 +685,9 @@ func (s) TestSecurityConfigUpdate_GoodToGood(t *testing.T) { RootInstanceName: "default1", SubjectAltNameMatchers: testSANMatchers, }, + LBPolicy: wrrLocalityLBConfigJSON, } - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -700,6 +711,7 @@ func (s) TestSecurityConfigUpdate_GoodToGood(t *testing.T) { RootInstanceName: "default2", SubjectAltNameMatchers: testSANMatchers, }, + LBPolicy: wrrLocalityLBConfigJSON, } if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { t.Fatal(err) diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go index 27b2f15b4652..d69465a96274 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go @@ -37,6 +37,7 @@ import ( "google.golang.org/grpc/xds/internal/balancer/clusterresolver" "google.golang.org/grpc/xds/internal/balancer/outlierdetection" "google.golang.org/grpc/xds/internal/balancer/ringhash" + "google.golang.org/grpc/xds/internal/balancer/wrrlocality" "google.golang.org/grpc/xds/internal/testutils/fakeclient" "google.golang.org/grpc/xds/internal/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/bootstrap" @@ -60,6 +61,20 @@ var ( noopODLBCfg = outlierdetection.LBConfig{ Interval: 1<<63 - 1, } + wrrLocalityLBConfig = &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + } + wrrLocalityLBConfigJSON, _ = json.Marshal(wrrLocalityLBConfig) + ringHashLBConfig = &internalserviceconfig.BalancerConfig{ + Name: ringhash.Name, + Config: &ringhash.LBConfig{MinRingSize: 10, MaxRingSize: 100}, + } + ringHashLBConfigJSON, _ = json.Marshal(ringHashLBConfig) ) type s struct { @@ -381,20 +396,27 @@ func (s) TestHandleClusterUpdate(t *testing.T) { wantCCS balancer.ClientConnState }{ { - name: "happy-case-with-lrs", - cdsUpdate: xdsresource.ClusterUpdate{ClusterName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf}, - wantCCS: edsCCS(serviceName, nil, true, nil, noopODLBCfg), + name: "happy-case-with-lrs", + cdsUpdate: xdsresource.ClusterUpdate{ + ClusterName: serviceName, + LRSServerConfig: xdsresource.ClusterLRSServerSelf, + LBPolicy: wrrLocalityLBConfigJSON, + }, + wantCCS: edsCCS(serviceName, nil, true, wrrLocalityLBConfig, noopODLBCfg), }, { - name: "happy-case-without-lrs", - cdsUpdate: xdsresource.ClusterUpdate{ClusterName: serviceName}, - wantCCS: edsCCS(serviceName, nil, false, nil, noopODLBCfg), + name: "happy-case-without-lrs", + cdsUpdate: xdsresource.ClusterUpdate{ + ClusterName: serviceName, + LBPolicy: wrrLocalityLBConfigJSON, + }, + wantCCS: edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg), }, { name: "happy-case-with-ring-hash-lb-policy", cdsUpdate: xdsresource.ClusterUpdate{ ClusterName: serviceName, - LBPolicy: &xdsresource.ClusterLBPolicyRingHash{MinimumRingSize: 10, MaximumRingSize: 100}, + LBPolicy: ringHashLBConfigJSON, }, wantCCS: edsCCS(serviceName, nil, false, &internalserviceconfig.BalancerConfig{ Name: ringhash.Name, @@ -403,21 +425,25 @@ func (s) TestHandleClusterUpdate(t *testing.T) { }, { name: "happy-case-outlier-detection", - cdsUpdate: xdsresource.ClusterUpdate{ClusterName: serviceName, OutlierDetection: &xdsresource.OutlierDetection{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, - MaxEjectionPercent: 10, - SuccessRateStdevFactor: 1900, - EnforcingSuccessRate: 100, - SuccessRateMinimumHosts: 5, - SuccessRateRequestVolume: 100, - FailurePercentageThreshold: 85, - EnforcingFailurePercentage: 5, - FailurePercentageMinimumHosts: 5, - FailurePercentageRequestVolume: 50, - }}, - wantCCS: edsCCS(serviceName, nil, false, nil, outlierdetection.LBConfig{ + cdsUpdate: xdsresource.ClusterUpdate{ + ClusterName: serviceName, + OutlierDetection: &xdsresource.OutlierDetection{ + Interval: 10 * time.Second, + BaseEjectionTime: 30 * time.Second, + MaxEjectionTime: 300 * time.Second, + MaxEjectionPercent: 10, + SuccessRateStdevFactor: 1900, + EnforcingSuccessRate: 100, + SuccessRateMinimumHosts: 5, + SuccessRateRequestVolume: 100, + FailurePercentageThreshold: 85, + EnforcingFailurePercentage: 5, + FailurePercentageMinimumHosts: 5, + FailurePercentageRequestVolume: 50, + }, + LBPolicy: wrrLocalityLBConfigJSON, + }, + wantCCS: edsCCS(serviceName, nil, false, wrrLocalityLBConfig, outlierdetection.LBConfig{ Interval: 10 * time.Second, BaseEjectionTime: 30 * time.Second, MaxEjectionTime: 300 * time.Second, @@ -501,8 +527,11 @@ func (s) TestHandleClusterUpdateError(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + cdsUpdate := xdsresource.ClusterUpdate{ + ClusterName: serviceName, + LBPolicy: wrrLocalityLBConfigJSON, + } + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { t.Fatal(err) } @@ -586,8 +615,11 @@ func (s) TestResolverError(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + cdsUpdate := xdsresource.ClusterUpdate{ + ClusterName: serviceName, + LBPolicy: wrrLocalityLBConfigJSON, + } + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { t.Fatal(err) } @@ -635,8 +667,11 @@ func (s) TestUpdateSubConnState(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + cdsUpdate := xdsresource.ClusterUpdate{ + ClusterName: serviceName, + LBPolicy: wrrLocalityLBConfigJSON, + } + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -665,13 +700,16 @@ func (s) TestCircuitBreaking(t *testing.T) { cancel() cdsB.Close() }() - // Here we invoke the watch callback registered on the fake xdsClient. This // will trigger the watch handler on the CDS balancer, which will update // the service's counter with the new max requests. var maxRequests uint32 = 1 - cdsUpdate := xdsresource.ClusterUpdate{ClusterName: clusterName, MaxRequests: &maxRequests} - wantCCS := edsCCS(clusterName, &maxRequests, false, nil, noopODLBCfg) + cdsUpdate := xdsresource.ClusterUpdate{ + ClusterName: clusterName, + MaxRequests: &maxRequests, + LBPolicy: wrrLocalityLBConfigJSON, + } + wantCCS := edsCCS(clusterName, &maxRequests, false, wrrLocalityLBConfig, noopODLBCfg) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -699,14 +737,16 @@ func (s) TestClose(t *testing.T) { // provided xdsClient. xdsC, cdsB, edsB, _, cancel := setupWithWatch(t) defer cancel() - // Here we invoke the watch callback registered on the fake xdsClient. This // will trigger the watch handler on the CDS balancer, which will attempt to // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + cdsUpdate := xdsresource.ClusterUpdate{ + ClusterName: serviceName, + LBPolicy: wrrLocalityLBConfigJSON, + } + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -776,8 +816,11 @@ func (s) TestExitIdle(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil, noopODLBCfg) + cdsUpdate := xdsresource.ClusterUpdate{ + ClusterName: serviceName, + LBPolicy: wrrLocalityLBConfigJSON, + } + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { diff --git a/xds/internal/balancer/cdsbalancer/cluster_handler.go b/xds/internal/balancer/cdsbalancer/cluster_handler.go index 234511a45dcf..aa2d9674a790 100644 --- a/xds/internal/balancer/cdsbalancer/cluster_handler.go +++ b/xds/internal/balancer/cdsbalancer/cluster_handler.go @@ -17,6 +17,7 @@ package cdsbalancer import ( + "encoding/json" "errors" "sync" @@ -38,13 +39,9 @@ var ( type clusterHandlerUpdate struct { // securityCfg is the Security Config from the top (root) cluster. securityCfg *xdsresource.SecurityConfig - // lbPolicy is the lb policy from the top (root) cluster. - // - // Currently, we only support roundrobin or ringhash, and since roundrobin - // does need configs, this is only set to the ringhash config, if the policy - // is ringhash. In the future, if we support more policies, we can make this - // an interface, and set it to config of the other policies. - lbPolicy *xdsresource.ClusterLBPolicyRingHash + + // lbPolicy is the the child of the cluster_impl policy, for all priorities. + lbPolicy json.RawMessage // updates is a list of ClusterUpdates from all the leaf clusters. updates []xdsresource.ClusterUpdate @@ -123,6 +120,7 @@ func (ch *clusterHandler) constructClusterUpdate() { case <-ch.updateChannel: default: } + ch.updateChannel <- clusterHandlerUpdate{ securityCfg: ch.createdClusters[ch.rootClusterName].clusterUpdate.SecurityCfg, lbPolicy: ch.createdClusters[ch.rootClusterName].clusterUpdate.LBPolicy, diff --git a/xds/internal/balancer/cdsbalancer/cluster_handler_test.go b/xds/internal/balancer/cdsbalancer/cluster_handler_test.go index caf10955014f..ee989ec3ef73 100644 --- a/xds/internal/balancer/cdsbalancer/cluster_handler_test.go +++ b/xds/internal/balancer/cdsbalancer/cluster_handler_test.go @@ -52,7 +52,6 @@ func (s) TestSuccessCaseLeafNode(t *testing.T) { name string clusterName string clusterUpdate xdsresource.ClusterUpdate - lbPolicy *xdsresource.ClusterLBPolicyRingHash }{ { name: "test-update-root-cluster-EDS-success", @@ -62,16 +61,6 @@ func (s) TestSuccessCaseLeafNode(t *testing.T) { ClusterName: edsService, }, }, - { - name: "test-update-root-cluster-EDS-with-ring-hash", - clusterName: logicalDNSService, - clusterUpdate: xdsresource.ClusterUpdate{ - ClusterType: xdsresource.ClusterTypeLogicalDNS, - ClusterName: logicalDNSService, - LBPolicy: &xdsresource.ClusterLBPolicyRingHash{MinimumRingSize: 10, MaximumRingSize: 100}, - }, - lbPolicy: &xdsresource.ClusterLBPolicyRingHash{MinimumRingSize: 10, MaximumRingSize: 100}, - }, { name: "test-update-root-cluster-Logical-DNS-success", clusterName: logicalDNSService, @@ -111,9 +100,6 @@ func (s) TestSuccessCaseLeafNode(t *testing.T) { if diff := cmp.Diff(chu.updates, []xdsresource.ClusterUpdate{test.clusterUpdate}); diff != "" { t.Fatalf("got unexpected cluster update, diff (-got, +want): %v", diff) } - if diff := cmp.Diff(chu.lbPolicy, test.lbPolicy); diff != "" { - t.Fatalf("got unexpected lb policy in cluster update, diff (-got, +want): %v", diff) - } case <-ctx.Done(): t.Fatal("Timed out waiting for update from update channel.") } diff --git a/xds/internal/balancer/clusterimpl/tests/balancer_test.go b/xds/internal/balancer/clusterimpl/tests/balancer_test.go index cf0e7b0ce842..d335ecd7e844 100644 --- a/xds/internal/balancer/clusterimpl/tests/balancer_test.go +++ b/xds/internal/balancer/clusterimpl/tests/balancer_test.go @@ -112,9 +112,14 @@ func (s) TestConfigUpdateWithSameLoadReportingServerConfig(t *testing.T) { // drops all RPCs, but with no change in the load reporting server config. resources.Endpoints = []*v3endpointpb.ClusterLoadAssignment{ e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ - ClusterName: "endpoints-" + serviceName, - Host: "localhost", - Ports: []uint32{testutils.ParsePort(t, server.Address)}, + ClusterName: "endpoints-" + serviceName, + Host: "localhost", + Localities: []e2e.LocalityOptions{ + { + Ports: []uint32{testutils.ParsePort(t, server.Address)}, + Weight: 1, + }, + }, DropPercents: map[string]int{"test-drop-everything": 100}, }), } diff --git a/xds/internal/balancer/clusterresolver/clusterresolver_test.go b/xds/internal/balancer/clusterresolver/clusterresolver_test.go index f327c8cf5fc5..65cb7a9bf981 100644 --- a/xds/internal/balancer/clusterresolver/clusterresolver_test.go +++ b/xds/internal/balancer/clusterresolver/clusterresolver_test.go @@ -27,8 +27,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "google.golang.org/grpc/balancer" - "google.golang.org/grpc/balancer/roundrobin" - "google.golang.org/grpc/balancer/weightedtarget" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal/grpctest" internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" @@ -378,7 +376,6 @@ func (s) TestOutlierDetection(t *testing.T) { t.Fatal(err) } - localityID := xdsinternal.LocalityID{Zone: "zone"} // The priority configuration generated should have Outlier Detection as a // direct child due to Outlier Detection being turned on. pCfgWant := &priority.LBConfig{ @@ -393,17 +390,6 @@ func (s) TestOutlierDetection(t *testing.T) { Config: &clusterimpl.LBConfig{ Cluster: testClusterName, EDSServiceName: "test-eds-service-name", - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: weightedtarget.Name, - Config: &weightedtarget.LBConfig{ - Targets: map[string]weightedtarget.Target{ - assertString(localityID.ToString): { - Weight: 100, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - }, - }, - }, }, }, }, diff --git a/xds/internal/balancer/clusterresolver/config_test.go b/xds/internal/balancer/clusterresolver/config_test.go index 2455b88d8079..fd17f3ede6d1 100644 --- a/xds/internal/balancer/clusterresolver/config_test.go +++ b/xds/internal/balancer/clusterresolver/config_test.go @@ -286,10 +286,6 @@ func TestParseConfig(t *testing.T) { } } -func newString(s string) *string { - return &s -} - func newUint32(i uint32) *uint32 { return &i } diff --git a/xds/internal/balancer/clusterresolver/configbuilder.go b/xds/internal/balancer/clusterresolver/configbuilder.go index b76a40355cc8..06b0aec2f311 100644 --- a/xds/internal/balancer/clusterresolver/configbuilder.go +++ b/xds/internal/balancer/clusterresolver/configbuilder.go @@ -23,9 +23,7 @@ import ( "fmt" "sort" - "google.golang.org/grpc/balancer/roundrobin" "google.golang.org/grpc/balancer/weightedroundrobin" - "google.golang.org/grpc/balancer/weightedtarget" "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/hierarchy" internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" @@ -34,7 +32,7 @@ import ( "google.golang.org/grpc/xds/internal/balancer/clusterimpl" "google.golang.org/grpc/xds/internal/balancer/outlierdetection" "google.golang.org/grpc/xds/internal/balancer/priority" - "google.golang.org/grpc/xds/internal/balancer/ringhash" + "google.golang.org/grpc/xds/internal/balancer/wrrlocality" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) @@ -63,33 +61,6 @@ type priorityConfig struct { // // The built tree of balancers (see test for the output struct). // -// If xds lb policy is ROUND_ROBIN, the children will be weighted_target for -// locality picking, and round_robin for endpoint picking. -// -// ┌────────┐ -// │priority│ -// └┬──────┬┘ -// │ │ -// ┌───────────▼┐ ┌▼───────────┐ -// │cluster_impl│ │cluster_impl│ -// └─┬──────────┘ └──────────┬─┘ -// │ │ -// ┌──────────────▼─┐ ┌─▼──────────────┐ -// │locality_picking│ │locality_picking│ -// └┬──────────────┬┘ └┬──────────────┬┘ -// │ │ │ │ -// ┌─▼─┐ ┌─▼─┐ ┌─▼─┐ ┌─▼─┐ -// │LRS│ │LRS│ │LRS│ │LRS│ -// └─┬─┘ └─┬─┘ └─┬─┘ └─┬─┘ -// │ │ │ │ -// ┌──────────▼─────┐ ┌─────▼──────────┐ ┌──────────▼─────┐ ┌─────▼──────────┐ -// │endpoint_picking│ │endpoint_picking│ │endpoint_picking│ │endpoint_picking│ -// └────────────────┘ └────────────────┘ └────────────────┘ └────────────────┘ -// -// If xds lb policy is RING_HASH, the children will be just a ring_hash policy. -// The endpoints from all localities will be flattened to one addresses list, -// and the ring_hash policy will pick endpoints from it. -// // ┌────────┐ // │priority│ // └┬──────┬┘ @@ -99,13 +70,8 @@ type priorityConfig struct { // └──────┬─────┘ └─────┬──────┘ // │ │ // ┌──────▼─────┐ ┌─────▼──────┐ -// │ ring_hash │ │ ring_hash │ +// │xDSLBPolicy │ │xDSLBPolicy │ (Locality and Endpoint picking layer) // └────────────┘ └────────────┘ -// -// If endpointPickingPolicy is nil, roundrobin will be used. -// -// Custom locality picking policy isn't support, and weighted_target is always -// used. func buildPriorityConfigJSON(priorities []priorityConfig, xdsLBPolicy *internalserviceconfig.BalancerConfig) ([]byte, []resolver.Address, error) { pc, addrs, err := buildPriorityConfig(priorities, xdsLBPolicy) if err != nil { @@ -284,55 +250,11 @@ func dedupSortedIntSlice(a []int) []int { return a[:i+1] } -// rrBalancerConfig is a const roundrobin config, used as child of -// weighted-roundrobin. To avoid allocating memory everytime. -var rrBalancerConfig = &internalserviceconfig.BalancerConfig{Name: roundrobin.Name} - // priorityLocalitiesToClusterImpl takes a list of localities (with the same // priority), and generates a cluster impl policy config, and a list of -// addresses. +// addresses with their path hierarchy set to [priority-name, locality-name], so +// priority and the xDS LB Policy know which child policy each address is for. func priorityLocalitiesToClusterImpl(localities []xdsresource.Locality, priorityName string, mechanism DiscoveryMechanism, drops []clusterimpl.DropConfig, xdsLBPolicy *internalserviceconfig.BalancerConfig) (*clusterimpl.LBConfig, []resolver.Address, error) { - clusterImplCfg := &clusterimpl.LBConfig{ - Cluster: mechanism.Cluster, - EDSServiceName: mechanism.EDSServiceName, - LoadReportingServer: mechanism.LoadReportingServer, - MaxConcurrentRequests: mechanism.MaxConcurrentRequests, - DropCategories: drops, - // ChildPolicy is not set. Will be set based on xdsLBPolicy - } - - if xdsLBPolicy == nil || xdsLBPolicy.Name == roundrobin.Name { - // If lb policy is ROUND_ROBIN: - // - locality-picking policy is weighted_target - // - endpoint-picking policy is round_robin - logger.Infof("xds lb policy is %q, building config with weighted_target + round_robin", roundrobin.Name) - // Child of weighted_target is hardcoded to round_robin. - wtConfig, addrs := localitiesToWeightedTarget(localities, priorityName, rrBalancerConfig) - clusterImplCfg.ChildPolicy = &internalserviceconfig.BalancerConfig{Name: weightedtarget.Name, Config: wtConfig} - return clusterImplCfg, addrs, nil - } - - if xdsLBPolicy.Name == ringhash.Name { - // If lb policy is RIHG_HASH, will build one ring_hash policy as child. - // The endpoints from all localities will be flattened to one addresses - // list, and the ring_hash policy will pick endpoints from it. - logger.Infof("xds lb policy is %q, building config with ring_hash", ringhash.Name) - addrs := localitiesToRingHash(localities, priorityName) - // Set child to ring_hash, note that the ring_hash config is from - // xdsLBPolicy. - clusterImplCfg.ChildPolicy = &internalserviceconfig.BalancerConfig{Name: ringhash.Name, Config: xdsLBPolicy.Config} - return clusterImplCfg, addrs, nil - } - - return nil, nil, fmt.Errorf("unsupported xds LB policy %q, not one of {%q,%q}", xdsLBPolicy.Name, roundrobin.Name, ringhash.Name) -} - -// localitiesToRingHash takes a list of localities (with the same priority), and -// generates a list of addresses. -// -// The addresses have path hierarchy set to [priority-name], so priority knows -// which child policy they are for. -func localitiesToRingHash(localities []xdsresource.Locality, priorityName string) []resolver.Address { var addrs []resolver.Address for _, locality := range localities { var lw uint32 = 1 @@ -350,54 +272,29 @@ func localitiesToRingHash(localities []xdsresource.Locality, priorityName string if endpoint.HealthStatus != xdsresource.EndpointHealthStatusHealthy && endpoint.HealthStatus != xdsresource.EndpointHealthStatusUnknown { continue } - + addr := resolver.Address{Addr: endpoint.Address} + addr = hierarchy.Set(addr, []string{priorityName, localityStr}) + addr = internal.SetLocalityID(addr, locality.ID) + // "To provide the xds_wrr_locality load balancer information about + // locality weights received from EDS, the cluster resolver will + // populate a new locality weight attribute for each address The + // attribute will have the weight (as an integer) of the locality + // the address is part of." - A52 + addr = wrrlocality.SetAddrInfo(addr, wrrlocality.AddrInfo{LocalityWeight: lw}) var ew uint32 = 1 if endpoint.Weight != 0 { ew = endpoint.Weight } - - // The weight of each endpoint is locality_weight * endpoint_weight. - ai := weightedroundrobin.AddrInfo{Weight: lw * ew} - addr := weightedroundrobin.SetAddrInfo(resolver.Address{Addr: endpoint.Address}, ai) - addr = hierarchy.Set(addr, []string{priorityName, localityStr}) - addr = internal.SetLocalityID(addr, locality.ID) - addrs = append(addrs, addr) - } - } - return addrs -} - -// localitiesToWeightedTarget takes a list of localities (with the same -// priority), and generates a weighted target config, and list of addresses. -// -// The addresses have path hierarchy set to [priority-name, locality-name], so -// priority and weighted target know which child policy they are for. -func localitiesToWeightedTarget(localities []xdsresource.Locality, priorityName string, childPolicy *internalserviceconfig.BalancerConfig) (*weightedtarget.LBConfig, []resolver.Address) { - weightedTargets := make(map[string]weightedtarget.Target) - var addrs []resolver.Address - for _, locality := range localities { - localityStr, err := locality.ID.ToString() - if err != nil { - localityStr = fmt.Sprintf("%+v", locality.ID) - } - weightedTargets[localityStr] = weightedtarget.Target{Weight: locality.Weight, ChildPolicy: childPolicy} - for _, endpoint := range locality.Endpoints { - // Filter out all "unhealthy" endpoints (unknown and healthy are - // both considered to be healthy: - // https://www.envoyproxy.io/docs/envoy/latest/api-v2/api/v2/core/health_check.proto#envoy-api-enum-core-healthstatus). - if endpoint.HealthStatus != xdsresource.EndpointHealthStatusHealthy && endpoint.HealthStatus != xdsresource.EndpointHealthStatusUnknown { - continue - } - - addr := resolver.Address{Addr: endpoint.Address} - if childPolicy.Name == weightedroundrobin.Name && endpoint.Weight != 0 { - ai := weightedroundrobin.AddrInfo{Weight: endpoint.Weight} - addr = weightedroundrobin.SetAddrInfo(addr, ai) - } - addr = hierarchy.Set(addr, []string{priorityName, localityStr}) - addr = internal.SetLocalityID(addr, locality.ID) + addr = weightedroundrobin.SetAddrInfo(addr, weightedroundrobin.AddrInfo{Weight: lw * ew}) addrs = append(addrs, addr) } } - return &weightedtarget.LBConfig{Targets: weightedTargets}, addrs + return &clusterimpl.LBConfig{ + Cluster: mechanism.Cluster, + EDSServiceName: mechanism.EDSServiceName, + LoadReportingServer: mechanism.LoadReportingServer, + MaxConcurrentRequests: mechanism.MaxConcurrentRequests, + DropCategories: drops, + ChildPolicy: xdsLBPolicy, + }, addrs, nil } diff --git a/xds/internal/balancer/clusterresolver/configbuilder_test.go b/xds/internal/balancer/clusterresolver/configbuilder_test.go index 5fbb0b95e339..6c94cae9ed47 100644 --- a/xds/internal/balancer/clusterresolver/configbuilder_test.go +++ b/xds/internal/balancer/clusterresolver/configbuilder_test.go @@ -30,7 +30,6 @@ import ( "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/roundrobin" "google.golang.org/grpc/balancer/weightedroundrobin" - "google.golang.org/grpc/balancer/weightedtarget" "google.golang.org/grpc/internal/hierarchy" internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/resolver" @@ -39,6 +38,7 @@ import ( "google.golang.org/grpc/xds/internal/balancer/outlierdetection" "google.golang.org/grpc/xds/internal/balancer/priority" "google.golang.org/grpc/xds/internal/balancer/ringhash" + "google.golang.org/grpc/xds/internal/balancer/wrrlocality" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) @@ -68,7 +68,8 @@ var ( return out[i].Addr < out[j].Addr }) return out - })} + }), + } noopODCfg = outlierdetection.LBConfig{ Interval: 1<<63 - 1, @@ -230,21 +231,6 @@ func TestBuildPriorityConfig(t *testing.T) { Cluster: testClusterName, EDSServiceName: testEDSServiceName, DropCategories: []clusterimpl.DropConfig{}, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: weightedtarget.Name, - Config: &weightedtarget.LBConfig{ - Targets: map[string]weightedtarget.Target{ - assertString(testLocalityIDs[0].ToString): { - Weight: 20, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - assertString(testLocalityIDs[1].ToString): { - Weight: 80, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - }, - }, - }, }, }, }, @@ -262,21 +248,6 @@ func TestBuildPriorityConfig(t *testing.T) { Cluster: testClusterName, EDSServiceName: testEDSServiceName, DropCategories: []clusterimpl.DropConfig{}, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: weightedtarget.Name, - Config: &weightedtarget.LBConfig{ - Targets: map[string]weightedtarget.Target{ - assertString(testLocalityIDs[2].ToString): { - Weight: 20, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - assertString(testLocalityIDs[3].ToString): { - Weight: 80, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - }, - }, - }, }, }, }, @@ -393,21 +364,6 @@ func TestBuildClusterImplConfigForEDS(t *testing.T) { RequestsPerMillion: testDropOverMillion, }, }, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: weightedtarget.Name, - Config: &weightedtarget.LBConfig{ - Targets: map[string]weightedtarget.Target{ - assertString(testLocalityIDs[0].ToString): { - Weight: 20, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - assertString(testLocalityIDs[1].ToString): { - Weight: 80, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - }, - }, - }, }, "priority-2-1": { Cluster: testClusterName, @@ -420,32 +376,17 @@ func TestBuildClusterImplConfigForEDS(t *testing.T) { RequestsPerMillion: testDropOverMillion, }, }, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: weightedtarget.Name, - Config: &weightedtarget.LBConfig{ - Targets: map[string]weightedtarget.Target{ - assertString(testLocalityIDs[2].ToString): { - Weight: 20, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - assertString(testLocalityIDs[3].ToString): { - Weight: 80, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - }, - }, - }, }, } wantAddrs := []resolver.Address{ - testAddrWithAttrs(testAddressStrs[0][0], nil, "priority-2-0", &testLocalityIDs[0]), - testAddrWithAttrs(testAddressStrs[0][1], nil, "priority-2-0", &testLocalityIDs[0]), - testAddrWithAttrs(testAddressStrs[1][0], nil, "priority-2-0", &testLocalityIDs[1]), - testAddrWithAttrs(testAddressStrs[1][1], nil, "priority-2-0", &testLocalityIDs[1]), - testAddrWithAttrs(testAddressStrs[2][0], nil, "priority-2-1", &testLocalityIDs[2]), - testAddrWithAttrs(testAddressStrs[2][1], nil, "priority-2-1", &testLocalityIDs[2]), - testAddrWithAttrs(testAddressStrs[3][0], nil, "priority-2-1", &testLocalityIDs[3]), - testAddrWithAttrs(testAddressStrs[3][1], nil, "priority-2-1", &testLocalityIDs[3]), + testAddrWithAttrs(testAddressStrs[0][0], 20, 1, "priority-2-0", &testLocalityIDs[0]), + testAddrWithAttrs(testAddressStrs[0][1], 20, 1, "priority-2-0", &testLocalityIDs[0]), + testAddrWithAttrs(testAddressStrs[1][0], 80, 1, "priority-2-0", &testLocalityIDs[1]), + testAddrWithAttrs(testAddressStrs[1][1], 80, 1, "priority-2-0", &testLocalityIDs[1]), + testAddrWithAttrs(testAddressStrs[2][0], 20, 1, "priority-2-1", &testLocalityIDs[2]), + testAddrWithAttrs(testAddressStrs[2][1], 20, 1, "priority-2-1", &testLocalityIDs[2]), + testAddrWithAttrs(testAddressStrs[3][0], 80, 1, "priority-2-1", &testLocalityIDs[3]), + testAddrWithAttrs(testAddressStrs[3][1], 80, 1, "priority-2-1", &testLocalityIDs[3]), } if diff := cmp.Diff(gotNames, wantNames); diff != "" { @@ -594,31 +535,13 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { wantConfig: &clusterimpl.LBConfig{ Cluster: testClusterName, EDSServiceName: testEDSService, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: weightedtarget.Name, - Config: &weightedtarget.LBConfig{ - Targets: map[string]weightedtarget.Target{ - assertString(internal.LocalityID{Zone: "test-zone-1"}.ToString): { - Weight: 20, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: roundrobin.Name, - }, - }, - assertString(internal.LocalityID{Zone: "test-zone-2"}.ToString): { - Weight: 80, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: roundrobin.Name, - }, - }, - }, - }, - }, + ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, }, wantAddrs: []resolver.Address{ - testAddrWithAttrs("addr-1-1", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-1-2", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-2-1", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - testAddrWithAttrs("addr-2-2", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), + testAddrWithAttrs("addr-1-1", 20, 90, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), + testAddrWithAttrs("addr-1-2", 20, 10, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), + testAddrWithAttrs("addr-2-1", 80, 90, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), + testAddrWithAttrs("addr-2-2", 80, 10, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), }, }, { @@ -651,26 +574,12 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { }, }, wantAddrs: []resolver.Address{ - testAddrWithAttrs("addr-1-1", newUint32(1800), "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-1-2", newUint32(200), "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-2-1", newUint32(7200), "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - testAddrWithAttrs("addr-2-2", newUint32(800), "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), + testAddrWithAttrs("addr-1-1", 20, 90, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), + testAddrWithAttrs("addr-1-2", 20, 10, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), + testAddrWithAttrs("addr-2-1", 80, 90, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), + testAddrWithAttrs("addr-2-2", 80, 10, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), }, }, - { - name: "unsupported child", - localities: []xdsresource.Locality{{ - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-1-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, - {Address: "addr-1-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, - }, - ID: internal.LocalityID{Zone: "test-zone-1"}, - Weight: 20, - }}, - priorityName: "test-priority", - childPolicy: &internalserviceconfig.BalancerConfig{Name: "some-child"}, - wantErr: true, - }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -688,267 +597,6 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { } } -func TestLocalitiesToWeightedTarget(t *testing.T) { - tests := []struct { - name string - localities []xdsresource.Locality - priorityName string - childPolicy *internalserviceconfig.BalancerConfig - lrsServer *string - wantConfig *weightedtarget.LBConfig - wantAddrs []resolver.Address - }{ - { - name: "roundrobin as child, with LRS", - localities: []xdsresource.Locality{ - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-1-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - {Address: "addr-1-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - }, - ID: internal.LocalityID{Zone: "test-zone-1"}, - Weight: 20, - }, - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-2-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - {Address: "addr-2-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - }, - ID: internal.LocalityID{Zone: "test-zone-2"}, - Weight: 80, - }, - }, - priorityName: "test-priority", - childPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - lrsServer: newString("test-lrs-server"), - wantConfig: &weightedtarget.LBConfig{ - Targets: map[string]weightedtarget.Target{ - assertString(internal.LocalityID{Zone: "test-zone-1"}.ToString): { - Weight: 20, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - assertString(internal.LocalityID{Zone: "test-zone-2"}.ToString): { - Weight: 80, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - }, - }, - }, - wantAddrs: []resolver.Address{ - testAddrWithAttrs("addr-1-1", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-1-2", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-2-1", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - testAddrWithAttrs("addr-2-2", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - }, - }, - { - name: "roundrobin as child, no LRS", - localities: []xdsresource.Locality{ - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-1-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - {Address: "addr-1-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - }, - ID: internal.LocalityID{Zone: "test-zone-1"}, - Weight: 20, - }, - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-2-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - {Address: "addr-2-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - }, - ID: internal.LocalityID{Zone: "test-zone-2"}, - Weight: 80, - }, - }, - priorityName: "test-priority", - childPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, - // lrsServer is nil, so LRS policy will not be used. - wantConfig: &weightedtarget.LBConfig{ - Targets: map[string]weightedtarget.Target{ - assertString(internal.LocalityID{Zone: "test-zone-1"}.ToString): { - Weight: 20, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: roundrobin.Name, - }, - }, - assertString(internal.LocalityID{Zone: "test-zone-2"}.ToString): { - Weight: 80, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: roundrobin.Name, - }, - }, - }, - }, - wantAddrs: []resolver.Address{ - testAddrWithAttrs("addr-1-1", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-1-2", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-2-1", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - testAddrWithAttrs("addr-2-2", nil, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - }, - }, - { - name: "weighted round robin as child, no LRS", - localities: []xdsresource.Locality{ - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-1-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, - {Address: "addr-1-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, - }, - ID: internal.LocalityID{Zone: "test-zone-1"}, - Weight: 20, - }, - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-2-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, - {Address: "addr-2-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, - }, - ID: internal.LocalityID{Zone: "test-zone-2"}, - Weight: 80, - }, - }, - priorityName: "test-priority", - childPolicy: &internalserviceconfig.BalancerConfig{Name: weightedroundrobin.Name}, - // lrsServer is nil, so LRS policy will not be used. - wantConfig: &weightedtarget.LBConfig{ - Targets: map[string]weightedtarget.Target{ - assertString(internal.LocalityID{Zone: "test-zone-1"}.ToString): { - Weight: 20, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: weightedroundrobin.Name, - }, - }, - assertString(internal.LocalityID{Zone: "test-zone-2"}.ToString): { - Weight: 80, - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: weightedroundrobin.Name, - }, - }, - }, - }, - wantAddrs: []resolver.Address{ - testAddrWithAttrs("addr-1-1", newUint32(90), "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-1-2", newUint32(10), "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-2-1", newUint32(90), "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - testAddrWithAttrs("addr-2-2", newUint32(10), "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, got1 := localitiesToWeightedTarget(tt.localities, tt.priorityName, tt.childPolicy) - if diff := cmp.Diff(got, tt.wantConfig); diff != "" { - t.Errorf("localitiesToWeightedTarget() diff (-got +want) %v", diff) - } - if diff := cmp.Diff(got1, tt.wantAddrs, cmp.AllowUnexported(attributes.Attributes{})); diff != "" { - t.Errorf("localitiesToWeightedTarget() diff (-got +want) %v", diff) - } - }) - } -} - -func TestLocalitiesToRingHash(t *testing.T) { - tests := []struct { - name string - localities []xdsresource.Locality - priorityName string - wantAddrs []resolver.Address - }{ - { - // Check that address weights are locality_weight * endpoint_weight. - name: "with locality and endpoint weight", - localities: []xdsresource.Locality{ - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-1-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, - {Address: "addr-1-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, - }, - ID: internal.LocalityID{Zone: "test-zone-1"}, - Weight: 20, - }, - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-2-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, - {Address: "addr-2-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, - }, - ID: internal.LocalityID{Zone: "test-zone-2"}, - Weight: 80, - }, - }, - priorityName: "test-priority", - wantAddrs: []resolver.Address{ - testAddrWithAttrs("addr-1-1", newUint32(1800), "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-1-2", newUint32(200), "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-2-1", newUint32(7200), "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - testAddrWithAttrs("addr-2-2", newUint32(800), "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - }, - }, - { - // Check that endpoint_weight is 0, weight is the locality weight. - name: "locality weight only", - localities: []xdsresource.Locality{ - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-1-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - {Address: "addr-1-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - }, - ID: internal.LocalityID{Zone: "test-zone-1"}, - Weight: 20, - }, - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-2-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - {Address: "addr-2-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy}, - }, - ID: internal.LocalityID{Zone: "test-zone-2"}, - Weight: 80, - }, - }, - priorityName: "test-priority", - wantAddrs: []resolver.Address{ - testAddrWithAttrs("addr-1-1", newUint32(20), "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-1-2", newUint32(20), "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-2-1", newUint32(80), "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - testAddrWithAttrs("addr-2-2", newUint32(80), "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - }, - }, - { - // Check that locality_weight is 0, weight is the endpoint weight. - name: "endpoint weight only", - localities: []xdsresource.Locality{ - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-1-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, - {Address: "addr-1-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, - }, - ID: internal.LocalityID{Zone: "test-zone-1"}, - }, - { - Endpoints: []xdsresource.Endpoint{ - {Address: "addr-2-1", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, - {Address: "addr-2-2", HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, - }, - ID: internal.LocalityID{Zone: "test-zone-2"}, - }, - }, - priorityName: "test-priority", - wantAddrs: []resolver.Address{ - testAddrWithAttrs("addr-1-1", newUint32(90), "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-1-2", newUint32(10), "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testAddrWithAttrs("addr-2-1", newUint32(90), "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - testAddrWithAttrs("addr-2-2", newUint32(10), "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := localitiesToRingHash(tt.localities, tt.priorityName) - if diff := cmp.Diff(got, tt.wantAddrs, cmp.AllowUnexported(attributes.Attributes{})); diff != "" { - t.Errorf("localitiesToWeightedTarget() diff (-got +want) %v", diff) - } - }) - } -} - func assertString(f func() (string, error)) string { s, err := f() if err != nil { @@ -957,17 +605,16 @@ func assertString(f func() (string, error)) string { return s } -func testAddrWithAttrs(addrStr string, weight *uint32, priority string, lID *internal.LocalityID) resolver.Address { +func testAddrWithAttrs(addrStr string, localityWeight, endpointWeight uint32, priority string, lID *internal.LocalityID) resolver.Address { addr := resolver.Address{Addr: addrStr} - if weight != nil { - addr = weightedroundrobin.SetAddrInfo(addr, weightedroundrobin.AddrInfo{Weight: *weight}) - } path := []string{priority} if lID != nil { path = append(path, assertString(lID.ToString)) addr = internal.SetLocalityID(addr, *lID) } addr = hierarchy.Set(addr, path) + addr = wrrlocality.SetAddrInfo(addr, wrrlocality.AddrInfo{LocalityWeight: localityWeight}) + addr = weightedroundrobin.SetAddrInfo(addr, weightedroundrobin.AddrInfo{Weight: localityWeight * endpointWeight}) return addr } diff --git a/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go b/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go index 053b56f0dc86..c7c2ab9945f0 100644 --- a/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go +++ b/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go @@ -364,23 +364,6 @@ func (s) TestEDS_MultipleLocalities(t *testing.T) { if err := rrutil.CheckWeightedRoundRobinRPCs(ctx, testClient, wantAddrs); err != nil { t.Fatal(err) } - - // Change the weight of locality2 and ensure weighted roundrobin. Since - // locality2 has twice the weight of locality3, it will be picked twice as - // frequently as locality3 for RPCs. And since locality2 has a single - // backend and locality3 has two backends, the backend in locality2 will - // receive four times the traffic of each of locality3's backends. - resources = clientEndpointsResource(nodeID, edsServiceName, []localityInfo{ - {name: localityName2, weight: 2, ports: ports[1:2]}, - {name: localityName3, weight: 1, ports: ports[2:4]}, - }) - if err := managementServer.Update(ctx, resources); err != nil { - t.Fatal(err) - } - wantAddrs = []resolver.Address{addrs[1], addrs[1], addrs[1], addrs[1], addrs[2], addrs[3]} - if err := rrutil.CheckWeightedRoundRobinRPCs(ctx, testClient, wantAddrs); err != nil { - t.Fatal(err) - } } // TestEDS_EndpointsHealth tests the cluster_resolver LB policy using an EDS diff --git a/xds/internal/balancer/clusterresolver/priority_test.go b/xds/internal/balancer/clusterresolver/priority_test.go index fdcef37f2d8e..68325a31c17e 100644 --- a/xds/internal/balancer/clusterresolver/priority_test.go +++ b/xds/internal/balancer/clusterresolver/priority_test.go @@ -26,6 +26,7 @@ import ( corepb "github.com/envoyproxy/go-control-plane/envoy/api/v2/core" "github.com/google/go-cmp/cmp" + "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/weightedtarget" "google.golang.org/grpc/connectivity" @@ -35,15 +36,24 @@ import ( "google.golang.org/grpc/resolver" "google.golang.org/grpc/xds/internal/balancer/clusterimpl" "google.golang.org/grpc/xds/internal/balancer/priority" + "google.golang.org/grpc/xds/internal/balancer/wrrlocality" xdstestutils "google.golang.org/grpc/xds/internal/testutils" "google.golang.org/grpc/xds/internal/testutils/fakeclient" "google.golang.org/grpc/xds/internal/xdsclient" ) var ( - testClusterNames = []string{"test-cluster-1", "test-cluster-2"} - testSubZones = []string{"I", "II", "III", "IV"} - testEndpointAddrs []string + testClusterNames = []string{"test-cluster-1", "test-cluster-2"} + testSubZones = []string{"I", "II", "III", "IV"} + testEndpointAddrs []string + wrrLocalityLBConfig = &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + } ) const testBackendAddrsCount = 12 @@ -75,6 +85,7 @@ func setupTestEDS(t *testing.T, initChild *internalserviceconfig.BalancerConfig) Cluster: testClusterName, Type: DiscoveryMechanismTypeEDS, }}, + XDSLBPolicy: wrrLocalityLBConfig, }, }); err != nil { edsb.Close() @@ -844,6 +855,7 @@ func (s) TestFallbackToDNS(t *testing.T) { DNSHostname: testDNSTarget, }, }, + XDSLBPolicy: wrrLocalityLBConfig, }, }); err != nil { t.Fatal(err) diff --git a/xds/internal/balancer/wrrlocality/balancer.go b/xds/internal/balancer/wrrlocality/balancer.go index 2ff6fccf89bd..ac63e84e62fb 100644 --- a/xds/internal/balancer/wrrlocality/balancer.go +++ b/xds/internal/balancer/wrrlocality/balancer.go @@ -28,8 +28,12 @@ import ( "fmt" "google.golang.org/grpc/balancer" + "google.golang.org/grpc/balancer/weightedtarget" + "google.golang.org/grpc/internal/grpclog" internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" + "google.golang.org/grpc/xds/internal" ) // Name is the name of wrr_locality balancer. @@ -45,10 +49,6 @@ func (bb) Name() string { return Name } -func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer { - return nil -} - // LBConfig is the config for the wrr locality balancer. type LBConfig struct { serviceconfig.LoadBalancingConfig @@ -56,13 +56,146 @@ type LBConfig struct { ChildPolicy *internalserviceconfig.BalancerConfig `json:"childPolicy,omitempty"` } +// To plumb in a different child in tests. +var weightedTargetName = weightedtarget.Name + +func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer { + builder := balancer.Get(weightedTargetName) + if builder == nil { + // Shouldn't happen, registered through imported weighted target, + // defensive programming. + return nil + } + + // Doesn't need to intercept any balancer.ClientConn operations; pass + // through by just giving cc to child balancer. + wtb := builder.Build(cc, bOpts) + if wtb == nil { + // shouldn't happen, defensive programming. + return nil + } + wtbCfgParser, ok := builder.(balancer.ConfigParser) + if !ok { + // Shouldn't happen, imported weighted target builder has this method. + return nil + } + wrrL := &wrrLocalityBalancer{ + child: wtb, + childParser: wtbCfgParser, + } + + wrrL.logger = prefixLogger(wrrL) + wrrL.logger.Infof("Created") + return wrrL +} + func (bb) ParseConfig(s json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { var lbCfg *LBConfig if err := json.Unmarshal(s, &lbCfg); err != nil { - return nil, fmt.Errorf("xds: invalid LBConfig for wrrlocality: %s, error: %v", string(s), err) + return nil, fmt.Errorf("xds_wrr_locality: invalid LBConfig: %s, error: %v", string(s), err) } if lbCfg == nil || lbCfg.ChildPolicy == nil { - return nil, errors.New("xds: invalidw LBConfig for wrrlocality: child policy field must be set") + return nil, errors.New("xds_wrr_locality: invalid LBConfig: child policy field must be set") } return lbCfg, nil } + +type attributeKey struct{} + +// Equal allows the values to be compared by Attributes.Equal. +func (a AddrInfo) Equal(o interface{}) bool { + oa, ok := o.(AddrInfo) + return ok && oa.LocalityWeight == a.LocalityWeight +} + +// AddrInfo is the locality weight of the locality an address is a part of. +type AddrInfo struct { + LocalityWeight uint32 +} + +// SetAddrInfo returns a copy of addr in which the BalancerAttributes field is +// updated with AddrInfo. +func SetAddrInfo(addr resolver.Address, addrInfo AddrInfo) resolver.Address { + addr.BalancerAttributes = addr.BalancerAttributes.WithValue(attributeKey{}, addrInfo) + return addr +} + +func (a AddrInfo) String() string { + return fmt.Sprintf("Locality Weight: %d", a.LocalityWeight) +} + +// getAddrInfo returns the AddrInfo stored in the BalancerAttributes field of +// addr. Returns false if no AddrInfo found. +func getAddrInfo(addr resolver.Address) (AddrInfo, bool) { + v := addr.BalancerAttributes.Value(attributeKey{}) + ai, ok := v.(AddrInfo) + return ai, ok +} + +// wrrLocalityBalancer wraps a weighted target balancer, and builds +// configuration for the weighted target once it receives configuration +// specifying the weighted target child balancer and locality weight +// information. +type wrrLocalityBalancer struct { + // child will be a weighted target balancer, and will be built it at + // wrrLocalityBalancer build time. Other than preparing configuration, other + // balancer operations are simply pass through. + child balancer.Balancer + + childParser balancer.ConfigParser + + logger *grpclog.PrefixLogger +} + +func (b *wrrLocalityBalancer) UpdateClientConnState(s balancer.ClientConnState) error { + lbCfg, ok := s.BalancerConfig.(*LBConfig) + if !ok { + b.logger.Errorf("Received config with unexpected type %T: %v", s.BalancerConfig, s.BalancerConfig) + return balancer.ErrBadResolverState + } + + weightedTargets := make(map[string]weightedtarget.Target) + for _, addr := range s.ResolverState.Addresses { + // This get of LocalityID could potentially return a zero value. This + // shouldn't happen though (this attribute that is set actually gets + // used to build localities in the first place), and thus don't error + // out, and just build a weighted target with undefined behavior. + locality, err := internal.GetLocalityID(addr).ToString() + if err != nil { + // Should never happen. + logger.Errorf("Failed to marshal LocalityID: %v, skipping this locality in weighted target") + } + ai, ok := getAddrInfo(addr) + if !ok { + return fmt.Errorf("xds_wrr_locality: missing locality weight information in address %q", addr) + } + weightedTargets[locality] = weightedtarget.Target{Weight: ai.LocalityWeight, ChildPolicy: lbCfg.ChildPolicy} + } + wtCfg := &weightedtarget.LBConfig{Targets: weightedTargets} + wtCfgJSON, err := json.Marshal(wtCfg) + if err != nil { + // Shouldn't happen. + return fmt.Errorf("xds_wrr_locality: error marshalling prepared config: %v", wtCfg) + } + var sc serviceconfig.LoadBalancingConfig + if sc, err = b.childParser.ParseConfig(wtCfgJSON); err != nil { + return fmt.Errorf("xds_wrr_locality: config generated %v is invalid: %v", wtCfgJSON, err) + } + + return b.child.UpdateClientConnState(balancer.ClientConnState{ + ResolverState: s.ResolverState, + BalancerConfig: sc, + }) +} + +func (b *wrrLocalityBalancer) ResolverError(err error) { + b.child.ResolverError(err) +} + +func (b *wrrLocalityBalancer) UpdateSubConnState(sc balancer.SubConn, scState balancer.SubConnState) { + b.child.UpdateSubConnState(sc, scState) +} + +func (b *wrrLocalityBalancer) Close() { + b.child.Close() +} diff --git a/xds/internal/balancer/wrrlocality/balancer_test.go b/xds/internal/balancer/wrrlocality/balancer_test.go index 9283b02f14b2..f0da7413bdb8 100644 --- a/xds/internal/balancer/wrrlocality/balancer_test.go +++ b/xds/internal/balancer/wrrlocality/balancer_test.go @@ -19,17 +19,28 @@ package wrrlocality import ( + "context" "encoding/json" "errors" "strings" "testing" + "time" "github.com/google/go-cmp/cmp" + "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/roundrobin" + "google.golang.org/grpc/balancer/weightedtarget" "google.golang.org/grpc/internal/balancer/stub" "google.golang.org/grpc/internal/grpctest" internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + "google.golang.org/grpc/internal/testutils" + "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" + "google.golang.org/grpc/xds/internal" +) + +const ( + defaultTestTimeout = 5 * time.Second ) type s struct { @@ -119,3 +130,123 @@ func (s) TestParseConfig(t *testing.T) { }) } } + +// TestUpdateClientConnState tests the UpdateClientConnState method of the +// wrr_locality_experimental balancer. This UpdateClientConn operation should +// take the localities and their weights in the addresses passed in, alongside +// the endpoint picking policy defined in the Balancer Config and construct a +// weighted target configuration corresponding to these inputs. +func (s) TestUpdateClientConnState(t *testing.T) { + // Configure the stub balancer defined below as the child policy of + // wrrLocalityBalancer. + cfgCh := testutils.NewChannel() + oldWeightedTargetName := weightedTargetName + defer func() { + weightedTargetName = oldWeightedTargetName + }() + weightedTargetName = "fake_weighted_target" + stub.Register("fake_weighted_target", stub.BalancerFuncs{ + ParseConfig: func(c json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { + var cfg weightedtarget.LBConfig + if err := json.Unmarshal(c, &cfg); err != nil { + return nil, err + } + return &cfg, nil + }, + UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { + wtCfg, ok := ccs.BalancerConfig.(*weightedtarget.LBConfig) + if !ok { + return errors.New("child received config that was not a weighted target config") + } + defer cfgCh.Send(wtCfg) + return nil + }, + }) + + builder := balancer.Get(Name) + if builder == nil { + t.Fatalf("balancer.Get(%q) returned nil", Name) + } + tcc := testutils.NewTestClientConn(t) + bal := builder.Build(tcc, balancer.BuildOptions{}) + defer bal.Close() + wrrL := bal.(*wrrLocalityBalancer) + + // Create the addresses with two localities with certain locality weights. + // This represents what addresses the wrr_locality balancer will receive in + // UpdateClientConnState. + addr1 := resolver.Address{ + Addr: "locality-1", + } + addr1 = internal.SetLocalityID(addr1, internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }) + addr1 = SetAddrInfo(addr1, AddrInfo{LocalityWeight: 2}) + + addr2 := resolver.Address{ + Addr: "locality-2", + } + addr2 = internal.SetLocalityID(addr2, internal.LocalityID{ + Region: "region-2", + Zone: "zone-2", + SubZone: "subzone-2", + }) + addr2 = SetAddrInfo(addr2, AddrInfo{LocalityWeight: 1}) + addrs := []resolver.Address{addr1, addr2} + + err := wrrL.UpdateClientConnState(balancer.ClientConnState{ + BalancerConfig: &LBConfig{ + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + ResolverState: resolver.State{ + Addresses: addrs, + }, + }) + if err != nil { + t.Fatalf("Unexpected error from UpdateClientConnState: %v", err) + } + + // Note that these inline strings declared as the key in Targets built from + // Locality ID are not exactly what is shown in the example in the gRFC. + // However, this is an implementation detail that does not affect + // correctness (confirmed with Java team). The important thing is to get + // those three pieces of information region, zone, and subzone down to the + // child layer. + wantWtCfg := &weightedtarget.LBConfig{ + Targets: map[string]weightedtarget.Target{ + "{\"region\":\"region-1\",\"zone\":\"zone-1\",\"subZone\":\"subzone-1\"}": { + Weight: 2, + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + "{\"region\":\"region-2\",\"zone\":\"zone-2\",\"subZone\":\"subzone-2\"}": { + Weight: 1, + ChildPolicy: &internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }, + }, + }, + } + + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + cfg, err := cfgCh.Receive(ctx) + if err != nil { + t.Fatalf("No signal received from UpdateClientConnState() on the child: %v", err) + } + + gotWtCfg, ok := cfg.(*weightedtarget.LBConfig) + if !ok { + // Shouldn't happen - only sends a config on this channel. + t.Fatalf("Unexpected config type: %T", gotWtCfg) + } + + if diff := cmp.Diff(gotWtCfg, wantWtCfg); diff != "" { + t.Fatalf("Child received unexpected config, diff (-got, +want): %v", diff) + } +} diff --git a/xds/internal/balancer/wrrlocality/logging.go b/xds/internal/balancer/wrrlocality/logging.go new file mode 100644 index 000000000000..42ccea0a92b2 --- /dev/null +++ b/xds/internal/balancer/wrrlocality/logging.go @@ -0,0 +1,34 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package wrrlocality + +import ( + "fmt" + + "google.golang.org/grpc/grpclog" + internalgrpclog "google.golang.org/grpc/internal/grpclog" +) + +const prefix = "[wrrlocality-lb %p] " + +var logger = grpclog.Component("xds") + +func prefixLogger(p *wrrLocalityBalancer) *internalgrpclog.PrefixLogger { + return internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf(prefix, p)) +} diff --git a/xds/internal/xdsclient/tests/cds_watchers_test.go b/xds/internal/xdsclient/tests/cds_watchers_test.go index 3583fa929d96..9670caaca0a6 100644 --- a/xds/internal/xdsclient/tests/cds_watchers_test.go +++ b/xds/internal/xdsclient/tests/cds_watchers_test.go @@ -70,7 +70,7 @@ func verifyClusterUpdate(ctx context.Context, updateCh *testutils.Channel, wantU return fmt.Errorf("received update with error type %v, want %v", gotType, wantType) } } - cmpOpts := []cmp.Option{cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "Raw", "LBPolicyJSON")} + cmpOpts := []cmp.Option{cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "Raw", "LBPolicy")} if diff := cmp.Diff(wantUpdate.Update, got.Update, cmpOpts...); diff != "" { return fmt.Errorf("received unepected diff in the cluster resource update: (-want, got):\n%s", diff) } diff --git a/xds/internal/xdsclient/tests/eds_watchers_test.go b/xds/internal/xdsclient/tests/eds_watchers_test.go index 9b220fc59f2c..4cc365e70ead 100644 --- a/xds/internal/xdsclient/tests/eds_watchers_test.go +++ b/xds/internal/xdsclient/tests/eds_watchers_test.go @@ -134,9 +134,13 @@ func (s) TestEDSWatch(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Address: fmt.Sprintf("%s:%d", edsHost1, edsPort1), Weight: 1}}, - ID: internal.LocalityID{SubZone: "subzone"}, - Priority: 0, - Weight: 1, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, + Priority: 0, + Weight: 1, }, }, }, @@ -153,9 +157,13 @@ func (s) TestEDSWatch(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Address: fmt.Sprintf("%s:%d", edsHost1, edsPort1), Weight: 1}}, - ID: internal.LocalityID{SubZone: "subzone"}, - Priority: 0, - Weight: 1, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, + Priority: 0, + Weight: 1, }, }, }, @@ -265,9 +273,13 @@ func (s) TestEDSWatch_TwoWatchesForSameResourceName(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Address: fmt.Sprintf("%s:%d", edsHost1, edsPort1), Weight: 1}}, - ID: internal.LocalityID{SubZone: "subzone"}, - Priority: 0, - Weight: 1, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, + Priority: 0, + Weight: 1, }, }, }, @@ -277,9 +289,13 @@ func (s) TestEDSWatch_TwoWatchesForSameResourceName(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Address: fmt.Sprintf("%s:%d", edsHost2, edsPort2), Weight: 1}}, - ID: internal.LocalityID{SubZone: "subzone"}, - Priority: 0, - Weight: 1, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, + Priority: 0, + Weight: 1, }, }, }, @@ -295,9 +311,13 @@ func (s) TestEDSWatch_TwoWatchesForSameResourceName(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Address: fmt.Sprintf("%s:%d", edsHost1, edsPort1), Weight: 1}}, - ID: internal.LocalityID{SubZone: "subzone"}, - Priority: 0, - Weight: 1, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, + Priority: 0, + Weight: 1, }, }, }, @@ -307,9 +327,13 @@ func (s) TestEDSWatch_TwoWatchesForSameResourceName(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Address: fmt.Sprintf("%s:%d", edsHost2, edsPort2), Weight: 1}}, - ID: internal.LocalityID{SubZone: "subzone"}, - Priority: 0, - Weight: 1, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, + Priority: 0, + Weight: 1, }, }, }, @@ -460,9 +484,13 @@ func (s) TestEDSWatch_ThreeWatchesForDifferentResourceNames(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Address: fmt.Sprintf("%s:%d", edsHost1, edsPort1), Weight: 1}}, - ID: internal.LocalityID{SubZone: "subzone"}, - Priority: 0, - Weight: 1, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, + Priority: 0, + Weight: 1, }, }, }, @@ -541,9 +569,13 @@ func (s) TestEDSWatch_ResourceCaching(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Address: fmt.Sprintf("%s:%d", edsHost1, edsPort1), Weight: 1}}, - ID: internal.LocalityID{SubZone: "subzone"}, - Priority: 0, - Weight: 1, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, + Priority: 0, + Weight: 1, }, }, }, @@ -669,9 +701,13 @@ func (s) TestEDSWatch_ValidResponseCancelsExpiryTimerBehavior(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Address: fmt.Sprintf("%s:%d", edsHost1, edsPort1), Weight: 1}}, - ID: internal.LocalityID{SubZone: "subzone"}, - Priority: 0, - Weight: 1, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, + Priority: 0, + Weight: 1, }, }, }, @@ -801,9 +837,13 @@ func (s) TestEDSWatch_PartialValid(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Address: fmt.Sprintf("%s:%d", edsHost1, edsPort1), Weight: 1}}, - ID: internal.LocalityID{SubZone: "subzone"}, - Priority: 0, - Weight: 1, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, + Priority: 0, + Weight: 1, }, }, }, diff --git a/xds/internal/xdsclient/tests/federation_watchers_test.go b/xds/internal/xdsclient/tests/federation_watchers_test.go index 974e6221aab9..4298ce6c0885 100644 --- a/xds/internal/xdsclient/tests/federation_watchers_test.go +++ b/xds/internal/xdsclient/tests/federation_watchers_test.go @@ -305,7 +305,11 @@ func (s) TestFederation_EndpointsResourceContextParamOrder(t *testing.T) { { Endpoints: []xdsresource.Endpoint{{Address: "localhost:666", Weight: 1}}, Weight: 1, - ID: internal.LocalityID{SubZone: "subzone"}, + ID: internal.LocalityID{ + Region: "region-1", + Zone: "zone-1", + SubZone: "subzone-1", + }, }, }, }, diff --git a/xds/internal/xdsclient/tests/resource_update_test.go b/xds/internal/xdsclient/tests/resource_update_test.go index ff6cf7c756a4..7dd368aa5e24 100644 --- a/xds/internal/xdsclient/tests/resource_update_test.go +++ b/xds/internal/xdsclient/tests/resource_update_test.go @@ -802,7 +802,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { } cmpOpts := []cmp.Option{ cmpopts.EquateEmpty(), - cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "Raw", "LBPolicyJSON"), + cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "Raw", "LBPolicy"), } if diff := cmp.Diff(test.wantUpdate, gotUpdate, cmpOpts...); diff != "" { t.Fatalf("Unexpected diff in metadata, diff (-want +got):\n%s", diff) diff --git a/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go b/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go index 7d20b1ff61e4..96ad204ad4b3 100644 --- a/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go +++ b/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go @@ -357,7 +357,6 @@ func (s) TestValidateCluster_Success(t *testing.T) { }, wantUpdate: xdsresource.ClusterUpdate{ ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf, - LBPolicy: &xdsresource.ClusterLBPolicyRingHash{MinimumRingSize: 10, MaximumRingSize: 100}, }, wantLBConfig: &internalserviceconfig.BalancerConfig{ Name: "ring_hash_experimental", @@ -589,11 +588,11 @@ func (s) TestValidateCluster_Success(t *testing.T) { // compare JSON bytes in a test. Thus, marshal into a Balancer // Config struct and compare on that. Only need to test this JSON // emission here, as this covers the possible output space. - if diff := cmp.Diff(update, test.wantUpdate, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "LBPolicy", "LBPolicyJSON")); diff != "" { + if diff := cmp.Diff(update, test.wantUpdate, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "LBPolicy")); diff != "" { t.Errorf("validateClusterAndConstructClusterUpdate(%+v) got diff: %v (-got, +want)", test.cluster, diff) } bc := &internalserviceconfig.BalancerConfig{} - if err := json.Unmarshal(update.LBPolicyJSON, bc); err != nil { + if err := json.Unmarshal(update.LBPolicy, bc); err != nil { t.Fatalf("failed to unmarshal JSON: %v", err) } if diff := cmp.Diff(bc, test.wantLBConfig); diff != "" { diff --git a/xds/internal/xdsclient/xdsresource/type_cds.go b/xds/internal/xdsclient/xdsresource/type_cds.go index cd49852d8fcc..8ea9608dc9b7 100644 --- a/xds/internal/xdsclient/xdsresource/type_cds.go +++ b/xds/internal/xdsclient/xdsresource/type_cds.go @@ -52,13 +52,6 @@ const ( ClusterLRSServerSelf ) -// ClusterLBPolicyRingHash represents ring_hash lb policy, and also contains its -// config. -type ClusterLBPolicyRingHash struct { - MinimumRingSize uint64 - MaximumRingSize uint64 -} - // OutlierDetection is the outlier detection configuration for a cluster. type OutlierDetection struct { // Interval is the time interval between ejection analysis sweeps. This can @@ -148,21 +141,9 @@ type ClusterUpdate struct { // a prioritized list of cluster names. PrioritizedClusterNames []string - // LBPolicy is the lb policy for this cluster. - // - // This only support round_robin and ring_hash. - // - if it's nil, the lb policy is round_robin - // - if it's not nil, the lb policy is ring_hash, the this field has the config. - // - // When we add more support policies, this can be made an interface, and - // will be set to different types based on the policy type. - LBPolicy *ClusterLBPolicyRingHash - // LBPolicyJSON represents the locality and endpoint picking policy in JSON, - // which will be the child policy of xds_cluster_impl. Once full support for - // this field across the system, the LBPolicy field will switch to this - // field. Right now we keep both to keep the system working even though - // downstream has not added support for this JSON field. - LBPolicyJSON json.RawMessage + // LBPolicy represents the locality and endpoint picking policy in JSON, + // which will be the child policy of xds_cluster_impl. + LBPolicy json.RawMessage // OutlierDetection is the outlier detection configuration for this cluster. // If nil, it means this cluster does not use the outlier detection feature. diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go index 966844881351..c117ce6e7b52 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go @@ -77,13 +77,11 @@ const ( ) func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (ClusterUpdate, error) { - var lbPolicy *ClusterLBPolicyRingHash - var lbCfgJSON json.RawMessage + var lbPolicy json.RawMessage var err error switch cluster.GetLbPolicy() { case v3clusterpb.Cluster_ROUND_ROBIN: - lbPolicy = nil // The default is round_robin, and there's no config to set. - lbCfgJSON = []byte(fmt.Sprintf(`[{%q: {"childPolicy": [{"round_robin": {}}]}}]`, "xds_wrr_locality_experimental")) + lbPolicy = []byte(`[{"xds_wrr_locality_experimental": {"childPolicy": [{"round_robin": {}}]}}]`) case v3clusterpb.Cluster_RING_HASH: if !envconfig.XDSRingHash { return ClusterUpdate{}, fmt.Errorf("unexpected lbPolicy %v in response: %+v", cluster.GetLbPolicy(), cluster) @@ -101,10 +99,9 @@ func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (Clu if max := rhc.GetMaximumRingSize(); max != nil { maxSize = max.GetValue() } - lbPolicy = &ClusterLBPolicyRingHash{MinimumRingSize: minSize, MaximumRingSize: maxSize} - rhLBCfgJSON := []byte(fmt.Sprintf("{\"minRingSize\": %d, \"maxRingSize\": %d}", minSize, maxSize)) - lbCfgJSON = []byte(fmt.Sprintf(`[{%q: %s}]`, "ring_hash_experimental", rhLBCfgJSON)) + rhLBCfg := []byte(fmt.Sprintf("{\"minRingSize\": %d, \"maxRingSize\": %d}", minSize, maxSize)) + lbPolicy = []byte(fmt.Sprintf(`[{"ring_hash_experimental": %s}]`, rhLBCfg)) default: return ClusterUpdate{}, fmt.Errorf("unexpected lbPolicy %v in response: %+v", cluster.GetLbPolicy(), cluster) } @@ -129,7 +126,7 @@ func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (Clu } if cluster.GetLoadBalancingPolicy() != nil && envconfig.XDSCustomLBPolicy { - lbCfgJSON, err = xdslbregistry.ConvertToServiceConfig(cluster.GetLoadBalancingPolicy()) + lbPolicy, err = xdslbregistry.ConvertToServiceConfig(cluster.GetLoadBalancingPolicy()) if err != nil { return ClusterUpdate{}, fmt.Errorf("error converting LoadBalancingPolicy %v in response: %+v: %v", cluster.GetLoadBalancingPolicy(), cluster, err) } @@ -137,8 +134,8 @@ func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (Clu // converted configuration. It will do this by having the gRPC LB policy // registry parse the configuration." - A52 bc := &internalserviceconfig.BalancerConfig{} - if err := json.Unmarshal(lbCfgJSON, bc); err != nil { - return ClusterUpdate{}, fmt.Errorf("JSON generated from xDS LB policy registry: %s is invalid: %v", pretty.FormatJSON(lbCfgJSON), err) + if err := json.Unmarshal(lbPolicy, bc); err != nil { + return ClusterUpdate{}, fmt.Errorf("JSON generated from xDS LB policy registry: %s is invalid: %v", pretty.FormatJSON(lbPolicy), err) } } @@ -147,7 +144,6 @@ func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (Clu SecurityCfg: sc, MaxRequests: circuitBreakersFromCluster(cluster), LBPolicy: lbPolicy, - LBPolicyJSON: lbCfgJSON, OutlierDetection: od, } diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go b/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go index 3b47ae697a99..0c69d27ad42d 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go @@ -322,7 +322,7 @@ func (s) TestValidateClusterWithSecurityConfig_EnvVarOff(t *testing.T) { if err != nil { t.Errorf("validateClusterAndConstructClusterUpdate() failed: %v", err) } - if diff := cmp.Diff(wantUpdate, gotUpdate, cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicyJSON")); diff != "" { + if diff := cmp.Diff(wantUpdate, gotUpdate, cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicy")); diff != "" { t.Errorf("validateClusterAndConstructClusterUpdate() returned unexpected diff (-want, got):\n%s", diff) } } @@ -1215,7 +1215,7 @@ func (s) TestValidateClusterWithSecurityConfig(t *testing.T) { if (err != nil) != test.wantErr { t.Errorf("validateClusterAndConstructClusterUpdate() returned err %v wantErr %v)", err, test.wantErr) } - if diff := cmp.Diff(test.wantUpdate, update, cmpopts.EquateEmpty(), cmp.AllowUnexported(regexp.Regexp{}), cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicyJSON")); diff != "" { + if diff := cmp.Diff(test.wantUpdate, update, cmpopts.EquateEmpty(), cmp.AllowUnexported(regexp.Regexp{}), cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicy")); diff != "" { t.Errorf("validateClusterAndConstructClusterUpdate() returned unexpected diff (-want, +got):\n%s", diff) } }) @@ -1357,7 +1357,7 @@ func (s) TestUnmarshalCluster(t *testing.T) { if name != test.wantName { t.Errorf("unmarshalClusterResource(%s), got name: %s, want: %s", pretty.ToJSON(test.resource), name, test.wantName) } - if diff := cmp.Diff(update, test.wantUpdate, cmpOpts, cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicyJSON")); diff != "" { + if diff := cmp.Diff(update, test.wantUpdate, cmpOpts, cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicy")); diff != "" { t.Errorf("unmarshalClusterResource(%s), got unexpected update, diff (-got +want): %v", pretty.ToJSON(test.resource), diff) } }) @@ -1507,7 +1507,7 @@ func (s) TestValidateClusterWithOutlierDetection(t *testing.T) { if (err != nil) != test.wantErr { t.Errorf("validateClusterAndConstructClusterUpdate() returned err %v wantErr %v)", err, test.wantErr) } - if diff := cmp.Diff(test.wantUpdate, update, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicyJSON")); diff != "" { + if diff := cmp.Diff(test.wantUpdate, update, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicy")); diff != "" { t.Errorf("validateClusterAndConstructClusterUpdate() returned unexpected diff (-want, +got):\n%s", diff) } }) diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_eds.go b/xds/internal/xdsclient/xdsresource/unmarshal_eds.go index a3202f8c8100..95333aaf61d5 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_eds.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_eds.go @@ -141,6 +141,17 @@ func parseEDSRespProto(m *v3endpointpb.ClusterLoadAssignment) (EndpointsUpdate, SubZone: l.SubZone, } lidStr, _ := lid.ToString() + + // "Since an xDS configuration can place a given locality under multiple + // priorities, it is possible to see locality weight attributes with + // different values for the same locality." - A52 + // + // This is handled in the client by emitting the locality weight + // specified for the priority it is specified in. If the same locality + // has a different weight in two priorities, each priority will specify + // a locality with the locality weight specified for that priority, and + // thus the subsequent tree of balancers linked to that priority will + // use that locality weight as well. if localitiesWithPriority[lidStr] { return EndpointsUpdate{}, fmt.Errorf("duplicate locality %s with the same priority %v", lidStr, priority) } From b3fbd87a9e45b1e3bef39d9b12b3609a89f4e4c9 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 10 May 2023 13:26:37 -0700 Subject: [PATCH 29/60] interop: add ORCA test cases and functionality (#6266) --- interop/client/client.go | 14 ++- interop/observability/go.mod | 2 + interop/observability/go.sum | 2 + interop/orcalb.go | 170 +++++++++++++++++++++++++++++++++++ interop/server/server.go | 18 +++- interop/test_utils.go | 156 +++++++++++++++++++++++++++++++- 6 files changed, 357 insertions(+), 5 deletions(-) create mode 100644 interop/orcalb.go diff --git a/interop/client/client.go b/interop/client/client.go index 980ed9942589..a4228190e12c 100644 --- a/interop/client/client.go +++ b/interop/client/client.go @@ -17,6 +17,10 @@ */ // Binary client is an interop client. +// +// See interop test case descriptions [here]. +// +// [here]: https://github.com/grpc/grpc/blob/master/doc/interop-test-descriptions.md package main import ( @@ -94,7 +98,9 @@ var ( custom_metadata: server will echo custom metadata; unimplemented_method: client attempts to call unimplemented method; unimplemented_service: client attempts to call unimplemented service; - pick_first_unary: all requests are sent to one server despite multiple servers are resolved.`) + pick_first_unary: all requests are sent to one server despite multiple servers are resolved; + orca_per_rpc: the client verifies ORCA per-RPC metrics are provided; + orca_oob: the client verifies ORCA out-of-band metrics are provided.`) logger = grpclog.Component("interop") ) @@ -308,6 +314,12 @@ func main() { case "channel_soak": interop.DoSoakTest(tc, serverAddr, opts, true /* resetChannel */, *soakIterations, *soakMaxFailures, time.Duration(*soakPerIterationMaxAcceptableLatencyMs)*time.Millisecond, time.Duration(*soakMinTimeMsBetweenRPCs)*time.Millisecond, time.Now().Add(time.Duration(*soakOverallTimeoutSeconds)*time.Second)) logger.Infoln("ChannelSoak done") + case "orca_per_rpc": + interop.DoORCAPerRPCTest(tc) + logger.Infoln("ORCAPerRPC done") + case "orca_oob": + interop.DoORCAOOBTest(tc) + logger.Infoln("ORCAOOB done") default: logger.Fatal("Unsupported test case: ", *testCase) } diff --git a/interop/observability/go.mod b/interop/observability/go.mod index 1d743a02318b..784ea504d1a0 100644 --- a/interop/observability/go.mod +++ b/interop/observability/go.mod @@ -18,6 +18,8 @@ require ( contrib.go.opencensus.io/exporter/stackdriver v0.13.12 // indirect github.com/aws/aws-sdk-go v1.44.162 // indirect github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect + github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195 // indirect + github.com/envoyproxy/protoc-gen-validate v0.10.1 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/go-cmp v0.5.9 // indirect diff --git a/interop/observability/go.sum b/interop/observability/go.sum index 4a52d183476d..167fb14bc0ce 100644 --- a/interop/observability/go.sum +++ b/interop/observability/go.sum @@ -638,6 +638,7 @@ github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20220314180256-7f1daf1720fc/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20230105202645-06c439db220b/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195 h1:58f1tJ1ra+zFINPlwLWvQsR9CzAKt2e+EWV2yX9oXQ4= github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -651,6 +652,7 @@ github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go. github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= +github.com/envoyproxy/protoc-gen-validate v0.10.1 h1:c0g45+xCJhdgFGw7a5QAfdS4byAbud7miNWJ1WwEVf8= github.com/envoyproxy/protoc-gen-validate v0.10.1/go.mod h1:DRjgyB0I43LtJapqN6NiRwroiAU2PaFuvk/vjgh61ss= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= diff --git a/interop/orcalb.go b/interop/orcalb.go new file mode 100644 index 000000000000..28ea7524d7b7 --- /dev/null +++ b/interop/orcalb.go @@ -0,0 +1,170 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package interop + +import ( + "context" + "fmt" + "sync" + "time" + + v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" + "google.golang.org/grpc/balancer" + "google.golang.org/grpc/balancer/base" + "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/orca" +) + +func init() { + balancer.Register(orcabb{}) +} + +type orcabb struct{} + +func (orcabb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { + return &orcab{cc: cc} +} + +func (orcabb) Name() string { + return "test_backend_metrics_load_balancer" +} + +type orcab struct { + cc balancer.ClientConn + sc balancer.SubConn + cancelWatch func() + + reportMu sync.Mutex + report *v3orcapb.OrcaLoadReport +} + +func (o *orcab) UpdateClientConnState(s balancer.ClientConnState) error { + if o.sc != nil { + o.sc.UpdateAddresses(s.ResolverState.Addresses) + return nil + } + + if len(s.ResolverState.Addresses) == 0 { + o.ResolverError(fmt.Errorf("produced no addresses")) + return fmt.Errorf("resolver produced no addresses") + } + var err error + o.sc, err = o.cc.NewSubConn(s.ResolverState.Addresses, balancer.NewSubConnOptions{}) + if err != nil { + o.cc.UpdateState(balancer.State{ConnectivityState: connectivity.TransientFailure, Picker: base.NewErrPicker(fmt.Errorf("error creating subconn: %v", err))}) + return nil + } + o.cancelWatch = orca.RegisterOOBListener(o.sc, o, orca.OOBListenerOptions{ReportInterval: time.Second}) + o.sc.Connect() + o.cc.UpdateState(balancer.State{ConnectivityState: connectivity.Connecting, Picker: base.NewErrPicker(balancer.ErrNoSubConnAvailable)}) + return nil +} + +func (o *orcab) ResolverError(err error) { + if o.sc == nil { + o.cc.UpdateState(balancer.State{ConnectivityState: connectivity.TransientFailure, Picker: base.NewErrPicker(fmt.Errorf("resolver error: %v", err))}) + } +} + +func (o *orcab) UpdateSubConnState(sc balancer.SubConn, scState balancer.SubConnState) { + if o.sc != sc { + logger.Errorf("received subconn update for unknown subconn: %v vs %v", o.sc, sc) + return + } + switch scState.ConnectivityState { + case connectivity.Ready: + o.cc.UpdateState(balancer.State{ConnectivityState: connectivity.Ready, Picker: &scPicker{sc: sc, o: o}}) + case connectivity.TransientFailure: + o.cc.UpdateState(balancer.State{ConnectivityState: connectivity.TransientFailure, Picker: base.NewErrPicker(fmt.Errorf("all subchannels in transient failure: %v", scState.ConnectionError))}) + case connectivity.Connecting: + // Ignore; picker already set to "connecting". + case connectivity.Idle: + sc.Connect() + o.cc.UpdateState(balancer.State{ConnectivityState: connectivity.Connecting, Picker: base.NewErrPicker(balancer.ErrNoSubConnAvailable)}) + case connectivity.Shutdown: + // Ignore; we are closing but handle that in Close instead. + } +} + +func (o *orcab) Close() { + o.cancelWatch() +} + +func (o *orcab) OnLoadReport(r *v3orcapb.OrcaLoadReport) { + o.reportMu.Lock() + defer o.reportMu.Unlock() + logger.Infof("received OOB load report: %v", r) + o.report = r +} + +type scPicker struct { + sc balancer.SubConn + o *orcab +} + +func (p *scPicker) Pick(info balancer.PickInfo) (balancer.PickResult, error) { + doneCB := func(di balancer.DoneInfo) { + if lr, _ := di.ServerLoad.(*v3orcapb.OrcaLoadReport); lr != nil && + (lr.CpuUtilization != 0 || lr.MemUtilization != 0 || len(lr.Utilization) > 0 || len(lr.RequestCost) > 0) { + // Since all RPCs will respond with a load report due to the + // presence of the DialOption, we need to inspect every field and + // use the out-of-band report instead if all are unset/zero. + setContextCMR(info.Ctx, lr) + } else { + p.o.reportMu.Lock() + defer p.o.reportMu.Unlock() + if lr := p.o.report; lr != nil { + setContextCMR(info.Ctx, lr) + } + } + } + return balancer.PickResult{SubConn: p.sc, Done: doneCB}, nil +} + +func setContextCMR(ctx context.Context, lr *v3orcapb.OrcaLoadReport) { + if r := orcaResultFromContext(ctx); r != nil { + *r = lr + } +} + +type orcaKey string + +var orcaCtxKey = orcaKey("orcaResult") + +// contextWithORCAResult sets a key in ctx with a pointer to an ORCA load +// report that is to be filled in by the "test_backend_metrics_load_balancer" +// LB policy's Picker's Done callback. +// +// If a per-call load report is provided from the server for the call, result +// will be filled with that, otherwise the most recent OOB load report is used. +// If no OOB report has been received, result is not modified. +func contextWithORCAResult(ctx context.Context, result **v3orcapb.OrcaLoadReport) context.Context { + return context.WithValue(ctx, orcaCtxKey, result) +} + +// orcaResultFromContext returns the ORCA load report stored in the context. +// The LB policy uses this to communicate the load report back to the interop +// client application. +func orcaResultFromContext(ctx context.Context) **v3orcapb.OrcaLoadReport { + v := ctx.Value(orcaCtxKey) + if v == nil { + return nil + } + return v.(**v3orcapb.OrcaLoadReport) +} diff --git a/interop/server/server.go b/interop/server/server.go index 0778dbf961f0..67fbc3119963 100644 --- a/interop/server/server.go +++ b/interop/server/server.go @@ -17,18 +17,25 @@ */ // Binary server is an interop server. +// +// See interop test case descriptions [here]. +// +// [here]: https://github.com/grpc/grpc/blob/master/doc/interop-test-descriptions.md package main import ( "flag" "net" "strconv" + "time" "google.golang.org/grpc" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/alts" "google.golang.org/grpc/grpclog" + "google.golang.org/grpc/internal" "google.golang.org/grpc/interop" + "google.golang.org/grpc/orca" "google.golang.org/grpc/testdata" testgrpc "google.golang.org/grpc/interop/grpc_testing" @@ -56,7 +63,7 @@ func main() { logger.Fatalf("failed to listen: %v", err) } logger.Infof("interop server listening on %v", lis.Addr()) - var opts []grpc.ServerOption + opts := []grpc.ServerOption{orca.CallMetricsServerOption(nil)} if *useTLS { if *certFile == "" { *certFile = testdata.Path("server1.pem") @@ -78,6 +85,13 @@ func main() { opts = append(opts, grpc.Creds(altsTC)) } server := grpc.NewServer(opts...) - testgrpc.RegisterTestServiceServer(server, interop.NewTestServer()) + metricsRecorder := orca.NewServerMetricsRecorder() + sopts := orca.ServiceOptions{ + MinReportingInterval: time.Second, + ServerMetricsProvider: metricsRecorder, + } + internal.ORCAAllowAnyMinReportingInterval.(func(*orca.ServiceOptions))(&sopts) + orca.Register(server, sopts) + testgrpc.RegisterTestServiceServer(server, interop.NewTestServer(interop.NewTestServerOptions{MetricsRecorder: metricsRecorder})) server.Serve(lis) } diff --git a/interop/test_utils.go b/interop/test_utils.go index 6f6cde7d846c..0057c071217a 100644 --- a/interop/test_utils.go +++ b/interop/test_utils.go @@ -17,6 +17,10 @@ */ // Package interop contains functions used by interop client/server. +// +// See interop test case descriptions [here]. +// +// [here]: https://github.com/grpc/grpc/blob/master/doc/interop-test-descriptions.md package interop import ( @@ -36,9 +40,11 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/grpclog" "google.golang.org/grpc/metadata" + "google.golang.org/grpc/orca" "google.golang.org/grpc/peer" "google.golang.org/grpc/status" + v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" testgrpc "google.golang.org/grpc/interop/grpc_testing" testpb "google.golang.org/grpc/interop/grpc_testing" ) @@ -772,10 +778,23 @@ func DoSoakTest(tc testgrpc.TestServiceClient, serverAddr string, dopts []grpc.D type testServer struct { testgrpc.UnimplementedTestServiceServer + + metricsRecorder orca.ServerMetricsRecorder +} + +// NewTestServerOptions contains options that control the behavior of the test +// server returned by NewTestServer. +type NewTestServerOptions struct { + MetricsRecorder orca.ServerMetricsRecorder } -// NewTestServer creates a test server for test service. -func NewTestServer() testgrpc.TestServiceServer { +// NewTestServer creates a test server for test service. opts carries optional +// settings and does not need to be provided. If multiple opts are provided, +// only the first one is used. +func NewTestServer(opts ...NewTestServerOptions) testgrpc.TestServiceServer { + if len(opts) > 0 { + return &testServer{metricsRecorder: opts[0].MetricsRecorder} + } return &testServer{} } @@ -818,11 +837,34 @@ func (s *testServer) UnaryCall(ctx context.Context, in *testpb.SimpleRequest) (* if err != nil { return nil, err } + if r, orcaData := orca.CallMetricsRecorderFromContext(ctx), in.GetOrcaPerQueryReport(); r != nil && orcaData != nil { + // Transfer the request's per-Call ORCA data to the call metrics + // recorder in the context, if present. + setORCAMetrics(r, orcaData) + } + if r, orcaData := s.metricsRecorder, in.GetOrcaOobReport(); r != nil && orcaData != nil { + // Transfer the request's OOB ORCA data to the server metrics recorder + // in the server, if present. + setORCAMetrics(r, orcaData) + } return &testpb.SimpleResponse{ Payload: pl, }, nil } +func setORCAMetrics(r orca.ServerMetricsRecorder, orcaData *testpb.TestOrcaReport) { + r.SetCPUUtilization(orcaData.CpuUtilization) + r.SetMemoryUtilization(orcaData.MemoryUtilization) + if rq, ok := r.(orca.CallMetricsRecorder); ok { + for k, v := range orcaData.RequestCost { + rq.SetRequestCost(k, v) + } + } + for k, v := range orcaData.Utilization { + r.SetNamedUtilization(k, v) + } +} + func (s *testServer) StreamingOutputCall(args *testpb.StreamingOutputCallRequest, stream testgrpc.TestService_StreamingOutputCallServer) error { cs := args.GetResponseParameters() for _, c := range cs { @@ -883,6 +925,13 @@ func (s *testServer) FullDuplexCall(stream testgrpc.TestService_FullDuplexCallSe if st != nil && st.Code != 0 { return status.Error(codes.Code(st.Code), st.Message) } + + if r, orcaData := s.metricsRecorder, in.GetOrcaOobReport(); r != nil && orcaData != nil { + // Transfer the request's OOB ORCA data to the server metrics recorder + // in the server, if present. + setORCAMetrics(r, orcaData) + } + cs := in.GetResponseParameters() for _, c := range cs { if us := c.GetIntervalUs(); us > 0 { @@ -933,3 +982,106 @@ func (s *testServer) HalfDuplexCall(stream testgrpc.TestService_HalfDuplexCallSe } return nil } + +// DoORCAPerRPCTest performs a unary RPC that enables ORCA per-call reporting +// and verifies the load report sent back to the LB policy's Done callback. +func DoORCAPerRPCTest(tc testgrpc.TestServiceClient) { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + orcaRes := &v3orcapb.OrcaLoadReport{} + _, err := tc.UnaryCall(contextWithORCAResult(ctx, &orcaRes), &testpb.SimpleRequest{ + OrcaPerQueryReport: &testpb.TestOrcaReport{ + CpuUtilization: 0.8210, + MemoryUtilization: 0.5847, + RequestCost: map[string]float64{"cost": 3456.32}, + Utilization: map[string]float64{"util": 0.30499}, + }, + }) + if err != nil { + logger.Fatalf("/TestService/UnaryCall RPC failed: ", err) + } + want := &v3orcapb.OrcaLoadReport{ + CpuUtilization: 0.8210, + MemUtilization: 0.5847, + RequestCost: map[string]float64{"cost": 3456.32}, + Utilization: map[string]float64{"util": 0.30499}, + } + if !proto.Equal(orcaRes, want) { + logger.Fatalf("/TestService/UnaryCall RPC received ORCA load report %+v; want %+v", orcaRes, want) + } +} + +// DoORCAOOBTest performs a streaming RPC that enables ORCA OOB reporting and +// verifies the load report sent to the LB policy's OOB listener. +func DoORCAOOBTest(tc testgrpc.TestServiceClient) { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + stream, err := tc.FullDuplexCall(ctx) + if err != nil { + logger.Fatalf("/TestService/FullDuplexCall received error starting stream: %v", err) + } + err = stream.Send(&testpb.StreamingOutputCallRequest{ + OrcaOobReport: &testpb.TestOrcaReport{ + CpuUtilization: 0.8210, + MemoryUtilization: 0.5847, + Utilization: map[string]float64{"util": 0.30499}, + }, + ResponseParameters: []*testpb.ResponseParameters{{Size: 1}}, + }) + if err != nil { + logger.Fatalf("/TestService/FullDuplexCall received error sending: %v", err) + } + _, err = stream.Recv() + if err != nil { + logger.Fatalf("/TestService/FullDuplexCall received error receiving: %v", err) + } + + ctx2, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + want := &v3orcapb.OrcaLoadReport{ + CpuUtilization: 0.8210, + MemUtilization: 0.5847, + Utilization: map[string]float64{"util": 0.30499}, + } + checkORCAMetrics(ctx2, tc, want) + + err = stream.Send(&testpb.StreamingOutputCallRequest{ + OrcaOobReport: &testpb.TestOrcaReport{ + CpuUtilization: 0.29309, + MemoryUtilization: 0.2, + Utilization: map[string]float64{"util": 0.2039}, + }, + ResponseParameters: []*testpb.ResponseParameters{{Size: 1}}, + }) + if err != nil { + logger.Fatalf("/TestService/FullDuplexCall received error sending: %v", err) + } + _, err = stream.Recv() + if err != nil { + logger.Fatalf("/TestService/FullDuplexCall received error receiving: %v", err) + } + + ctx3, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + want = &v3orcapb.OrcaLoadReport{ + CpuUtilization: 0.29309, + MemUtilization: 0.2, + Utilization: map[string]float64{"util": 0.2039}, + } + checkORCAMetrics(ctx3, tc, want) +} + +func checkORCAMetrics(ctx context.Context, tc testgrpc.TestServiceClient, want *v3orcapb.OrcaLoadReport) { + for ctx.Err() == nil { + orcaRes := &v3orcapb.OrcaLoadReport{} + if _, err := tc.UnaryCall(contextWithORCAResult(ctx, &orcaRes), &testpb.SimpleRequest{}); err != nil { + logger.Fatalf("/TestService/UnaryCall RPC failed: ", err) + } + if proto.Equal(orcaRes, want) { + return + } + logger.Infof("/TestService/UnaryCall RPC received ORCA load report %+v; want %+v", orcaRes, want) + time.Sleep(time.Second) + } + logger.Fatalf("timed out waiting for expected ORCA load report") +} From afcbdc9ace7b4af94d014620727ea331cc3047fe Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Wed, 10 May 2023 19:30:34 -0400 Subject: [PATCH 30/60] xds/internal/xdsclient/xdslbregistry: Continue in converter if type not found (#6268) --- .../xdsclient/xdslbregistry/converter.go | 33 ++++++++++++++++--- .../xdslbregistry/tests/converter_test.go | 19 +++++++++++ 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/xds/internal/xdsclient/xdslbregistry/converter.go b/xds/internal/xdsclient/xdslbregistry/converter.go index 158ad8b199d6..6a5546d90159 100644 --- a/xds/internal/xdsclient/xdslbregistry/converter.go +++ b/xds/internal/xdsclient/xdslbregistry/converter.go @@ -34,6 +34,7 @@ import ( "github.com/golang/protobuf/proto" structpb "github.com/golang/protobuf/ptypes/struct" + "google.golang.org/grpc/balancer" "google.golang.org/grpc/internal/envconfig" ) @@ -90,13 +91,24 @@ func convertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy, depth int if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), tsProto); err != nil { return nil, fmt.Errorf("failed to unmarshal resource: %v", err) } - return convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) + json, cont, err := convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) + if cont { + continue + } + return json, err case "type.googleapis.com/udpa.type.v1.TypedStruct": tsProto := &v1xdsudpatypepb.TypedStruct{} if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), tsProto); err != nil { return nil, fmt.Errorf("failed to unmarshal resource: %v", err) } - return convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) + if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), tsProto); err != nil { + return nil, fmt.Errorf("failed to unmarshal resource: %v", err) + } + json, cont, err := convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) + if cont { + continue + } + return json, err } // Any entry not in the above list is unsupported and will be skipped. // This includes Least Request as well, since grpc-go does not support @@ -133,20 +145,31 @@ func convertWrrLocality(cfg *v3wrrlocalitypb.WrrLocality, depth int) (json.RawMe return makeBalancerConfigJSON("xds_wrr_locality_experimental", lbCfgJSON), nil } -func convertCustomPolicy(typeURL string, s *structpb.Struct) (json.RawMessage, error) { +// convertCustomPolicy attempts to prepare json configuration for a custom lb +// proto, which specifies the gRPC balancer type and configuration. Returns the +// converted json, a bool representing whether the caller should continue to the +// next policy, which is true if the gRPC Balancer registry does not contain +// that balancer type, and an error which should cause caller to error if error +// converting. +func convertCustomPolicy(typeURL string, s *structpb.Struct) (json.RawMessage, bool, error) { // The gRPC policy name will be the "type name" part of the value of the // type_url field in the TypedStruct. We get this by using the part after // the last / character. Can assume a valid type_url from the control plane. urls := strings.Split(typeURL, "/") name := urls[len(urls)-1] + if balancer.Get(name) == nil { + return nil, true, nil + } + rawJSON, err := json.Marshal(s) if err != nil { - return nil, fmt.Errorf("error converting custom lb policy %v: %v for %+v", err, typeURL, s) + return nil, false, fmt.Errorf("error converting custom lb policy %v: %v for %+v", err, typeURL, s) } + // The Struct contained in the TypedStruct will be returned as-is as the // configuration JSON object. - return makeBalancerConfigJSON(name, rawJSON), nil + return makeBalancerConfigJSON(name, rawJSON), false, nil } func makeBalancerConfigJSON(name string, value json.RawMessage) []byte { diff --git a/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go b/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go index 2607905dc903..c6d947d6bfde 100644 --- a/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go +++ b/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go @@ -170,6 +170,16 @@ func (s) TestConvertToServiceConfigSuccess(t *testing.T) { name: "custom_lb_type_v3_struct", policy: &v3clusterpb.LoadBalancingPolicy{ Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + // The type not registered in gRPC Policy registry. + // Should fallback to next policy in list. + TypedConfig: testutils.MarshalAny(&v3xdsxdstypepb.TypedStruct{ + TypeUrl: "type.googleapis.com/myorg.ThisTypeDoesNotExist", + Value: &structpb.Struct{}, + }), + }, + }, { TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ TypedConfig: testutils.MarshalAny(&v3xdsxdstypepb.TypedStruct{ @@ -318,6 +328,15 @@ func (s) TestConvertToServiceConfigFailure(t *testing.T) { name: "no-supported-policy", policy: &v3clusterpb.LoadBalancingPolicy{ Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + // The type not registered in gRPC Policy registry. + TypedConfig: testutils.MarshalAny(&v3xdsxdstypepb.TypedStruct{ + TypeUrl: "type.googleapis.com/myorg.ThisTypeDoesNotExist", + Value: &structpb.Struct{}, + }), + }, + }, { TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ // Not supported by gRPC-Go. From 7d6134424ab0fbeff84a0e6324bc82fdde2e29f3 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Thu, 11 May 2023 09:24:03 -0700 Subject: [PATCH 31/60] examples: fix authz example to receive streaming error properly (#6270) --- examples/features/authz/client/main.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/examples/features/authz/client/main.go b/examples/features/authz/client/main.go index 85d085d24580..2654314e5e11 100644 --- a/examples/features/authz/client/main.go +++ b/examples/features/authz/client/main.go @@ -55,7 +55,15 @@ func callBidiStreamingEcho(ctx context.Context, client ecpb.EchoClient, opts ... return status.Errorf(status.Code(err), "BidirectionalStreamingEcho RPC failed: %v", err) } for i := 0; i < 5; i++ { - if err := c.Send(&ecpb.EchoRequest{Message: fmt.Sprintf("Request %d", i+1)}); err != nil { + err := c.Send(&ecpb.EchoRequest{Message: fmt.Sprintf("Request %d", i+1)}) + if err == io.EOF { + // Bidi streaming RPC errors happen and make Send return io.EOF, + // not the RPC error itself. Call Recv to determine the error. + break + } + if err != nil { + // Some local errors are reported this way, e.g. errors serializing + // the request message. return status.Errorf(status.Code(err), "sending StreamingEcho message: %v", err) } } From 1536887cc692aa0f1cdca8e911bad4e483dedabb Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Thu, 11 May 2023 12:29:32 -0400 Subject: [PATCH 32/60] interop/xds: Add Custom LB needed for interop test (#6262) --- balancer/balancer.go | 2 +- balancer/rls/picker.go | 6 +- interop/xds/client/client.go | 1 + interop/xds/custom_lb.go | 140 ++++++++++++++++++++++++++++++++++ interop/xds/custom_lb_test.go | 135 ++++++++++++++++++++++++++++++++ stream.go | 4 +- test/balancer_test.go | 6 +- 7 files changed, 285 insertions(+), 9 deletions(-) create mode 100644 interop/xds/custom_lb.go create mode 100644 interop/xds/custom_lb_test.go diff --git a/balancer/balancer.go b/balancer/balancer.go index 09d61dd1b55b..8f00523c0e24 100644 --- a/balancer/balancer.go +++ b/balancer/balancer.go @@ -286,7 +286,7 @@ type PickResult struct { // // LB policies with child policies are responsible for propagating metadata // injected by their children to the ClientConn, as part of Pick(). - Metatada metadata.MD + Metadata metadata.MD } // TransientFailureError returns e. It exists for backward compatibility and diff --git a/balancer/rls/picker.go b/balancer/rls/picker.go index 3305f4529fd9..c2d972739689 100644 --- a/balancer/rls/picker.go +++ b/balancer/rls/picker.go @@ -166,10 +166,10 @@ func (p *rlsPicker) delegateToChildPoliciesLocked(dcEntry *cacheEntry, info bala if err != nil { return res, err } - if res.Metatada == nil { - res.Metatada = metadata.Pairs(rlsDataHeaderName, dcEntry.headerData) + if res.Metadata == nil { + res.Metadata = metadata.Pairs(rlsDataHeaderName, dcEntry.headerData) } else { - res.Metatada.Append(rlsDataHeaderName, dcEntry.headerData) + res.Metadata.Append(rlsDataHeaderName, dcEntry.headerData) } return res, nil } diff --git a/interop/xds/client/client.go b/interop/xds/client/client.go index f5e8469e72cb..ff03428e1105 100644 --- a/interop/xds/client/client.go +++ b/interop/xds/client/client.go @@ -43,6 +43,7 @@ import ( testgrpc "google.golang.org/grpc/interop/grpc_testing" testpb "google.golang.org/grpc/interop/grpc_testing" + _ "google.golang.org/grpc/interop/xds" // to register Custom LB. ) func init() { diff --git a/interop/xds/custom_lb.go b/interop/xds/custom_lb.go new file mode 100644 index 000000000000..a08d82554008 --- /dev/null +++ b/interop/xds/custom_lb.go @@ -0,0 +1,140 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package xds contains various xds interop helpers for usage in interop tests. +package xds + +import ( + "encoding/json" + "fmt" + "sync" + + "google.golang.org/grpc/balancer" + "google.golang.org/grpc/balancer/roundrobin" + "google.golang.org/grpc/internal/pretty" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/serviceconfig" +) + +func init() { + balancer.Register(rpcBehaviorBB{}) +} + +const name = "test.RpcBehaviorLoadBalancer" + +type rpcBehaviorBB struct{} + +func (rpcBehaviorBB) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer { + b := &rpcBehaviorLB{ + ClientConn: cc, + } + // round_robin child to complete balancer tree with a usable leaf policy and + // have RPCs actually work. + builder := balancer.Get(roundrobin.Name) + if builder == nil { + // Shouldn't happen, defensive programming. Registered from import of + // roundrobin package. + return nil + } + rr := builder.Build(b, bOpts) + if rr == nil { + // Shouldn't happen, defensive programming. + return nil + } + b.Balancer = rr + return b +} + +func (rpcBehaviorBB) ParseConfig(s json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { + lbCfg := &lbConfig{} + if err := json.Unmarshal(s, lbCfg); err != nil { + return nil, fmt.Errorf("rpc-behavior-lb: unable to marshal lbConfig: %s, error: %v", string(s), err) + } + return lbCfg, nil + +} + +func (rpcBehaviorBB) Name() string { + return name +} + +type lbConfig struct { + serviceconfig.LoadBalancingConfig `json:"-"` + RPCBehavior string `json:"rpcBehavior,omitempty"` +} + +// rpcBehaviorLB is a load balancer that wraps a round robin balancer and +// appends the rpc-behavior metadata field to any metadata in pick results based +// on what is specified in configuration. +type rpcBehaviorLB struct { + // embed a ClientConn to wrap only UpdateState() operation + balancer.ClientConn + // embed a Balancer to wrap only UpdateClientConnState() operation + balancer.Balancer + + mu sync.Mutex + cfg *lbConfig +} + +func (b *rpcBehaviorLB) UpdateClientConnState(s balancer.ClientConnState) error { + lbCfg, ok := s.BalancerConfig.(*lbConfig) + if !ok { + return fmt.Errorf("test.RpcBehaviorLoadBalancer:received config with unexpected type %T: %s", s.BalancerConfig, pretty.ToJSON(s.BalancerConfig)) + } + b.mu.Lock() + b.cfg = lbCfg + b.mu.Unlock() + return b.Balancer.UpdateClientConnState(balancer.ClientConnState{ + ResolverState: s.ResolverState, + }) +} + +func (b *rpcBehaviorLB) UpdateState(state balancer.State) { + b.mu.Lock() + rpcBehavior := b.cfg.RPCBehavior + b.mu.Unlock() + + b.ClientConn.UpdateState(balancer.State{ + ConnectivityState: state.ConnectivityState, + Picker: newRPCBehaviorPicker(state.Picker, rpcBehavior), + }) +} + +// rpcBehaviorPicker wraps a picker and adds the rpc-behavior metadata field +// into the child pick result's metadata. +type rpcBehaviorPicker struct { + childPicker balancer.Picker + rpcBehavior string +} + +// Pick appends the rpc-behavior metadata entry to the pick result of the child. +func (p *rpcBehaviorPicker) Pick(info balancer.PickInfo) (balancer.PickResult, error) { + pr, err := p.childPicker.Pick(info) + if err != nil { + return balancer.PickResult{}, err + } + pr.Metadata = metadata.Join(pr.Metadata, metadata.Pairs("rpc-behavior", p.rpcBehavior)) + return pr, nil +} + +func newRPCBehaviorPicker(childPicker balancer.Picker, rpcBehavior string) *rpcBehaviorPicker { + return &rpcBehaviorPicker{ + childPicker: childPicker, + rpcBehavior: rpcBehavior, + } +} diff --git a/interop/xds/custom_lb_test.go b/interop/xds/custom_lb_test.go new file mode 100644 index 000000000000..fc3a7f71c5c9 --- /dev/null +++ b/interop/xds/custom_lb_test.go @@ -0,0 +1,135 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xds + +import ( + "context" + "errors" + "fmt" + "testing" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/internal" + "google.golang.org/grpc/internal/grpctest" + "google.golang.org/grpc/internal/stubserver" + "google.golang.org/grpc/internal/testutils" + testgrpc "google.golang.org/grpc/interop/grpc_testing" + testpb "google.golang.org/grpc/interop/grpc_testing" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/resolver" + "google.golang.org/grpc/resolver/manual" + "google.golang.org/grpc/serviceconfig" +) + +var defaultTestTimeout = 5 * time.Second + +type s struct { + grpctest.Tester +} + +func Test(t *testing.T) { + grpctest.RunSubTests(t, s{}) +} + +// TestCustomLB tests the Custom LB for the interop client. It configures the +// custom lb as the top level Load Balancing policy of the channel, then asserts +// it can successfully make an RPC and also that the rpc behavior the Custom LB +// is configured with makes it's way to the server in metadata. +func (s) TestCustomLB(t *testing.T) { + errCh := testutils.NewChannel() + // Setup a backend which verifies the expected rpc-behavior metadata is + // present in the request. + backend := &stubserver.StubServer{ + UnaryCallF: func(ctx context.Context, in *testpb.SimpleRequest) (*testpb.SimpleResponse, error) { + md, ok := metadata.FromIncomingContext(ctx) + if !ok { + errCh.Send(errors.New("failed to receive metadata")) + return &testpb.SimpleResponse{}, nil + } + rpcBMD := md.Get("rpc-behavior") + if len(rpcBMD) != 1 { + errCh.Send(errors.New("only one value received for metadata key rpc-behavior")) + return &testpb.SimpleResponse{}, nil + } + wantVal := "error-code-0" + if rpcBMD[0] != wantVal { + errCh.Send(fmt.Errorf("metadata val for key \"rpc-behavior\": got val %v, want val %v", rpcBMD[0], wantVal)) + return &testpb.SimpleResponse{}, nil + } + // Success. + errCh.Send(nil) + return &testpb.SimpleResponse{}, nil + }, + } + if err := backend.StartServer(); err != nil { + t.Fatalf("Failed to start backend: %v", err) + } + t.Logf("Started good TestService backend at: %q", backend.Address) + defer backend.Stop() + + lbCfgJSON := `{ + "loadBalancingConfig": [ + { + "test.RpcBehaviorLoadBalancer": { + "rpcBehavior": "error-code-0" + } + } + ] + }` + + sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(lbCfgJSON) + mr := manual.NewBuilderWithScheme("customlb-e2e") + defer mr.Close() + mr.InitialState(resolver.State{ + Addresses: []resolver.Address{ + {Addr: backend.Address}, + }, + ServiceConfig: sc, + }) + + cc, err := grpc.Dial(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + t.Fatalf("grpc.Dial() failed: %v", err) + } + defer cc.Close() + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + testServiceClient := testgrpc.NewTestServiceClient(cc) + + // Make a Unary RPC. This RPC should be successful due to the round_robin + // leaf balancer. Also, the custom load balancer should inject the + // "rpc-behavior" string it is configured with into the metadata sent to + // server. + if _, err := testServiceClient.UnaryCall(ctx, &testpb.SimpleRequest{}); err != nil { + t.Fatalf("EmptyCall() failed: %v", err) + } + + val, err := errCh.Receive(ctx) + if err != nil { + t.Fatalf("error receiving from errCh: %v", err) + } + + // Should receive nil on the error channel which implies backend verified it + // correctly received the correct "rpc-behavior" metadata. + if err, ok := val.(error); ok { + t.Fatalf("error in backend verifications on metadata received: %v", err) + } +} diff --git a/stream.go b/stream.go index f79e31c147ee..06ec22cd0a9d 100644 --- a/stream.go +++ b/stream.go @@ -472,7 +472,7 @@ func (a *csAttempt) newStream() error { // It is safe to overwrite the csAttempt's context here, since all state // maintained in it are local to the attempt. When the attempt has to be // retried, a new instance of csAttempt will be created. - if a.pickResult.Metatada != nil { + if a.pickResult.Metadata != nil { // We currently do not have a function it the metadata package which // merges given metadata with existing metadata in a context. Existing // function `AppendToOutgoingContext()` takes a variadic argument of key @@ -482,7 +482,7 @@ func (a *csAttempt) newStream() error { // in a form passable to AppendToOutgoingContext(), or create a version // of AppendToOutgoingContext() that accepts a metadata.MD. md, _ := metadata.FromOutgoingContext(a.ctx) - md = metadata.Join(md, a.pickResult.Metatada) + md = metadata.Join(md, a.pickResult.Metadata) a.ctx = metadata.NewOutgoingContext(a.ctx, md) } diff --git a/test/balancer_test.go b/test/balancer_test.go index 8b88dc513b29..4026c75b46e3 100644 --- a/test/balancer_test.go +++ b/test/balancer_test.go @@ -922,10 +922,10 @@ func (wp *wrappedPicker) Pick(info balancer.PickInfo) (balancer.PickResult, erro return balancer.PickResult{}, err } - if res.Metatada == nil { - res.Metatada = metadata.Pairs(metadataHeaderInjectedByBalancer, metadataValueInjectedByBalancer) + if res.Metadata == nil { + res.Metadata = metadata.Pairs(metadataHeaderInjectedByBalancer, metadataValueInjectedByBalancer) } else { - res.Metatada.Append(metadataHeaderInjectedByBalancer, metadataValueInjectedByBalancer) + res.Metadata.Append(metadataHeaderInjectedByBalancer, metadataValueInjectedByBalancer) } return res, nil } From 523dcddf9aaba17e8b131cab225a2cfdacc79bdb Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Thu, 11 May 2023 09:37:17 -0700 Subject: [PATCH 33/60] weightedroundrobin: fix test race accessing timeNow (#6269) --- balancer/weightedroundrobin/balancer_test.go | 29 ++++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/balancer/weightedroundrobin/balancer_test.go b/balancer/weightedroundrobin/balancer_test.go index 5dd62ebf872a..f0cf4dab2f4e 100644 --- a/balancer/weightedroundrobin/balancer_test.go +++ b/balancer/weightedroundrobin/balancer_test.go @@ -23,6 +23,7 @@ import ( "encoding/json" "fmt" "sync" + "sync/atomic" "testing" "time" @@ -448,12 +449,13 @@ func (s) TestBalancer_TwoAddresses_BlackoutPeriod(t *testing.T) { defer mu.Unlock() now = t } - iwrr.TimeNow = func() time.Time { + + setTimeNow(func() time.Time { mu.Lock() defer mu.Unlock() return now - } - t.Cleanup(func() { iwrr.TimeNow = time.Now }) + }) + t.Cleanup(func() { setTimeNow(time.Now) }) testCases := []struct { blackoutPeriodCfg *time.Duration @@ -526,12 +528,12 @@ func (s) TestBalancer_TwoAddresses_WeightExpiration(t *testing.T) { defer mu.Unlock() now = t } - iwrr.TimeNow = func() time.Time { + setTimeNow(func() time.Time { mu.Lock() defer mu.Unlock() return now - } - t.Cleanup(func() { iwrr.TimeNow = time.Now }) + }) + t.Cleanup(func() { setTimeNow(time.Now) }) srv1 := startServer(t, reportBoth) srv2 := startServer(t, reportBoth) @@ -711,3 +713,18 @@ func checkWeights(ctx context.Context, t *testing.T, sws ...srvWeight) { } t.Fatalf("Failed to route RPCs with proper ratio") } + +func init() { + setTimeNow(time.Now) + iwrr.TimeNow = timeNow +} + +var timeNowFunc atomic.Value // func() time.Time + +func timeNow() time.Time { + return timeNowFunc.Load().(func() time.Time)() +} + +func setTimeNow(f func() time.Time) { + timeNowFunc.Store(f) +} From 1db474c85cb3e56b9114f6d91ea6040625a6fea9 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Thu, 11 May 2023 11:56:53 -0700 Subject: [PATCH 34/60] weightedroundrobin: fix duration format in lb config (#6271) --- balancer/weightedroundrobin/balancer.go | 19 +-- balancer/weightedroundrobin/balancer_test.go | 33 ++--- balancer/weightedroundrobin/config.go | 11 +- .../weightedroundrobin/internal/internal.go | 14 +- internal/serviceconfig/duration.go | 130 ++++++++++++++++++ internal/serviceconfig/duration_test.go | 87 ++++++++++++ 6 files changed, 256 insertions(+), 38 deletions(-) create mode 100644 internal/serviceconfig/duration.go create mode 100644 internal/serviceconfig/duration_test.go diff --git a/balancer/weightedroundrobin/balancer.go b/balancer/weightedroundrobin/balancer.go index e0d255222d52..e957b91b1966 100644 --- a/balancer/weightedroundrobin/balancer.go +++ b/balancer/weightedroundrobin/balancer.go @@ -34,6 +34,7 @@ import ( "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal/grpclog" "google.golang.org/grpc/internal/grpcrand" + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/orca" "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" @@ -66,10 +67,10 @@ func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Ba func (bb) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { lbCfg := &lbConfig{ // Default values as documented in A58. - OOBReportingPeriod: 10 * time.Second, - BlackoutPeriod: 10 * time.Second, - WeightExpirationPeriod: 3 * time.Minute, - WeightUpdatePeriod: time.Second, + OOBReportingPeriod: iserviceconfig.Duration(10 * time.Second), + BlackoutPeriod: iserviceconfig.Duration(10 * time.Second), + WeightExpirationPeriod: iserviceconfig.Duration(3 * time.Minute), + WeightUpdatePeriod: iserviceconfig.Duration(time.Second), ErrorUtilizationPenalty: 1, } if err := json.Unmarshal(js, lbCfg); err != nil { @@ -87,8 +88,8 @@ func (bb) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, er } // Impose lower bound of 100ms on weightUpdatePeriod. - if !internal.AllowAnyWeightUpdatePeriod && lbCfg.WeightUpdatePeriod < 100*time.Millisecond { - lbCfg.WeightUpdatePeriod = 100 * time.Millisecond + if !internal.AllowAnyWeightUpdatePeriod && lbCfg.WeightUpdatePeriod < iserviceconfig.Duration(100*time.Millisecond) { + lbCfg.WeightUpdatePeriod = iserviceconfig.Duration(100 * time.Millisecond) } return lbCfg, nil @@ -337,7 +338,7 @@ func (p *picker) scWeights() []float64 { ws := make([]float64, len(p.subConns)) now := internal.TimeNow() for i, wsc := range p.subConns { - ws[i] = wsc.weight(now, p.cfg.WeightExpirationPeriod, p.cfg.BlackoutPeriod) + ws[i] = wsc.weight(now, time.Duration(p.cfg.WeightExpirationPeriod), time.Duration(p.cfg.BlackoutPeriod)) } return ws } @@ -358,7 +359,7 @@ func (p *picker) start(ctx context.Context) { return } go func() { - ticker := time.NewTicker(p.cfg.WeightUpdatePeriod) + ticker := time.NewTicker(time.Duration(p.cfg.WeightUpdatePeriod)) for { select { case <-ctx.Done(): @@ -469,7 +470,7 @@ func (w *weightedSubConn) updateConfig(cfg *lbConfig) { if w.logger.V(2) { w.logger.Infof("Registering ORCA listener for %v with interval %v", w.SubConn, newPeriod) } - opts := orca.OOBListenerOptions{ReportInterval: newPeriod} + opts := orca.OOBListenerOptions{ReportInterval: time.Duration(newPeriod)} w.stopORCAListener = orca.RegisterOOBListener(w.SubConn, w, opts) } diff --git a/balancer/weightedroundrobin/balancer_test.go b/balancer/weightedroundrobin/balancer_test.go index f0cf4dab2f4e..a0a84a7f057b 100644 --- a/balancer/weightedroundrobin/balancer_test.go +++ b/balancer/weightedroundrobin/balancer_test.go @@ -53,31 +53,32 @@ func Test(t *testing.T) { const defaultTestTimeout = 10 * time.Second const weightUpdatePeriod = 50 * time.Millisecond +const weightExpirationPeriod = time.Minute const oobReportingInterval = 10 * time.Millisecond func init() { iwrr.AllowAnyWeightUpdatePeriod = true } -func boolp(b bool) *bool { return &b } -func float64p(f float64) *float64 { return &f } -func durationp(d time.Duration) *time.Duration { return &d } +func boolp(b bool) *bool { return &b } +func float64p(f float64) *float64 { return &f } +func stringp(s string) *string { return &s } var ( perCallConfig = iwrr.LBConfig{ EnableOOBLoadReport: boolp(false), - OOBReportingPeriod: durationp(5 * time.Millisecond), - BlackoutPeriod: durationp(0), - WeightExpirationPeriod: durationp(time.Minute), - WeightUpdatePeriod: durationp(weightUpdatePeriod), + OOBReportingPeriod: stringp("0.005s"), + BlackoutPeriod: stringp("0s"), + WeightExpirationPeriod: stringp("60s"), + WeightUpdatePeriod: stringp(".050s"), ErrorUtilizationPenalty: float64p(0), } oobConfig = iwrr.LBConfig{ EnableOOBLoadReport: boolp(true), - OOBReportingPeriod: durationp(5 * time.Millisecond), - BlackoutPeriod: durationp(0), - WeightExpirationPeriod: durationp(time.Minute), - WeightUpdatePeriod: durationp(weightUpdatePeriod), + OOBReportingPeriod: stringp("0.005s"), + BlackoutPeriod: stringp("0s"), + WeightExpirationPeriod: stringp("60s"), + WeightUpdatePeriod: stringp(".050s"), ErrorUtilizationPenalty: float64p(0), } ) @@ -458,10 +459,10 @@ func (s) TestBalancer_TwoAddresses_BlackoutPeriod(t *testing.T) { t.Cleanup(func() { setTimeNow(time.Now) }) testCases := []struct { - blackoutPeriodCfg *time.Duration + blackoutPeriodCfg *string blackoutPeriod time.Duration }{{ - blackoutPeriodCfg: durationp(time.Second), + blackoutPeriodCfg: stringp("1s"), blackoutPeriod: time.Second, }, { blackoutPeriodCfg: nil, @@ -549,7 +550,7 @@ func (s) TestBalancer_TwoAddresses_WeightExpiration(t *testing.T) { srv2.oobMetrics.SetCPUUtilization(.1) cfg := oobConfig - cfg.OOBReportingPeriod = durationp(time.Minute) + cfg.OOBReportingPeriod = stringp("60s") sc := svcConfig(t, cfg) if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { t.Fatalf("Error starting client: %v", err) @@ -566,7 +567,7 @@ func (s) TestBalancer_TwoAddresses_WeightExpiration(t *testing.T) { // Advance what time.Now returns to the weight expiration time minus 1s to // ensure all weights are still honored. - setNow(start.Add(*cfg.WeightExpirationPeriod - time.Second)) + setNow(start.Add(weightExpirationPeriod - time.Second)) // Wait for the weight update period to allow the new weights to be processed. time.Sleep(weightUpdatePeriod) @@ -574,7 +575,7 @@ func (s) TestBalancer_TwoAddresses_WeightExpiration(t *testing.T) { // Advance what time.Now returns to the weight expiration time plus 1s to // ensure all weights expired and addresses are routed evenly. - setNow(start.Add(*cfg.WeightExpirationPeriod + time.Second)) + setNow(start.Add(weightExpirationPeriod + time.Second)) // Wait for the weight expiration period so the weights have expired. time.Sleep(weightUpdatePeriod) diff --git a/balancer/weightedroundrobin/config.go b/balancer/weightedroundrobin/config.go index caad18faa11d..38f89d32fb43 100644 --- a/balancer/weightedroundrobin/config.go +++ b/balancer/weightedroundrobin/config.go @@ -19,8 +19,7 @@ package weightedroundrobin import ( - "time" - + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/serviceconfig" ) @@ -34,7 +33,7 @@ type lbConfig struct { // Load reporting interval to request from the server. Note that the // server may not provide reports as frequently as the client requests. // Used only when enable_oob_load_report is true. Default is 10 seconds. - OOBReportingPeriod time.Duration `json:"oobReportingPeriod,omitempty"` + OOBReportingPeriod iserviceconfig.Duration `json:"oobReportingPeriod,omitempty"` // A given endpoint must report load metrics continuously for at least this // long before the endpoint weight will be used. This avoids churn when @@ -42,17 +41,17 @@ type lbConfig struct { // after we establish a connection to an endpoint and after // weight_expiration_period has caused us to stop using the most recent // load metrics. Default is 10 seconds. - BlackoutPeriod time.Duration `json:"blackoutPeriod,omitempty"` + BlackoutPeriod iserviceconfig.Duration `json:"blackoutPeriod,omitempty"` // If a given endpoint has not reported load metrics in this long, // then we stop using the reported weight. This ensures that we do // not continue to use very stale weights. Once we stop using a stale // value, if we later start seeing fresh reports again, the // blackout_period applies. Defaults to 3 minutes. - WeightExpirationPeriod time.Duration `json:"weightExpirationPeriod,omitempty"` + WeightExpirationPeriod iserviceconfig.Duration `json:"weightExpirationPeriod,omitempty"` // How often endpoint weights are recalculated. Default is 1 second. - WeightUpdatePeriod time.Duration `json:"weightUpdatePeriod,omitempty"` + WeightUpdatePeriod iserviceconfig.Duration `json:"weightUpdatePeriod,omitempty"` // The multiplier used to adjust endpoint weights with the error rate // calculated as eps/qps. Default is 1.0. diff --git a/balancer/weightedroundrobin/internal/internal.go b/balancer/weightedroundrobin/internal/internal.go index d39830261b21..7b64fbf4e574 100644 --- a/balancer/weightedroundrobin/internal/internal.go +++ b/balancer/weightedroundrobin/internal/internal.go @@ -31,14 +31,14 @@ var AllowAnyWeightUpdatePeriod bool // LBConfig allows tests to produce a JSON form of the config from the struct // instead of using a string. type LBConfig struct { - EnableOOBLoadReport *bool `json:"enableOobLoadReport,omitempty"` - OOBReportingPeriod *time.Duration `json:"oobReportingPeriod,omitempty"` - BlackoutPeriod *time.Duration `json:"blackoutPeriod,omitempty"` - WeightExpirationPeriod *time.Duration `json:"weightExpirationPeriod,omitempty"` - WeightUpdatePeriod *time.Duration `json:"weightUpdatePeriod,omitempty"` - ErrorUtilizationPenalty *float64 `json:"errorUtilizationPenalty,omitempty"` + EnableOOBLoadReport *bool `json:"enableOobLoadReport,omitempty"` + OOBReportingPeriod *string `json:"oobReportingPeriod,omitempty"` + BlackoutPeriod *string `json:"blackoutPeriod,omitempty"` + WeightExpirationPeriod *string `json:"weightExpirationPeriod,omitempty"` + WeightUpdatePeriod *string `json:"weightUpdatePeriod,omitempty"` + ErrorUtilizationPenalty *float64 `json:"errorUtilizationPenalty,omitempty"` } // TimeNow can be overridden by tests to return a different value for the -// current time. +// current iserviceconfig. var TimeNow = time.Now diff --git a/internal/serviceconfig/duration.go b/internal/serviceconfig/duration.go new file mode 100644 index 000000000000..11d82afcc7ec --- /dev/null +++ b/internal/serviceconfig/duration.go @@ -0,0 +1,130 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package serviceconfig + +import ( + "encoding/json" + "fmt" + "math" + "strconv" + "strings" + "time" +) + +// Duration defines JSON marshal and unmarshal methods to conform to the +// protobuf JSON spec defined [here]. +// +// [here]: https://protobuf.dev/reference/protobuf/google.protobuf/#duration +type Duration time.Duration + +func (d Duration) String() string { + return fmt.Sprint(time.Duration(d)) +} + +// MarshalJSON converts from d to a JSON string output. +func (d Duration) MarshalJSON() ([]byte, error) { + ns := time.Duration(d).Nanoseconds() + sec := ns / int64(time.Second) + ns = ns % int64(time.Second) + + var sign string + if sec < 0 || ns < 0 { + sign, sec, ns = "-", -1*sec, -1*ns + } + + // Generated output always contains 0, 3, 6, or 9 fractional digits, + // depending on required precision. + str := fmt.Sprintf("%s%d.%09d", sign, sec, ns) + str = strings.TrimSuffix(str, "000") + str = strings.TrimSuffix(str, "000") + str = strings.TrimSuffix(str, ".000") + return []byte(fmt.Sprintf("\"%ss\"", str)), nil +} + +// UnmarshalJSON unmarshals b as a duration JSON string into d. +func (d *Duration) UnmarshalJSON(b []byte) error { + var s string + if err := json.Unmarshal(b, &s); err != nil { + return err + } + if !strings.HasSuffix(s, "s") { + return fmt.Errorf("malformed duration %q: missing seconds unit", s) + } + neg := false + if s[0] == '-' { + neg = true + s = s[1:] + } + ss := strings.SplitN(s[:len(s)-1], ".", 3) + if len(ss) > 2 { + return fmt.Errorf("malformed duration %q: too many decimals", s) + } + // hasDigits is set if either the whole or fractional part of the number is + // present, since both are optional but one is required. + hasDigits := false + var sec, ns int64 + if len(ss[0]) > 0 { + var err error + if sec, err = strconv.ParseInt(ss[0], 10, 64); err != nil { + return fmt.Errorf("malformed duration %q: %v", s, err) + } + // Maximum seconds value per the durationpb spec. + const maxProtoSeconds = 315_576_000_000 + if sec > maxProtoSeconds { + return fmt.Errorf("out of range: %q", s) + } + hasDigits = true + } + if len(ss) == 2 && len(ss[1]) > 0 { + if len(ss[1]) > 9 { + return fmt.Errorf("malformed duration %q: too many digits after decimal", s) + } + var err error + if ns, err = strconv.ParseInt(ss[1], 10, 64); err != nil { + return fmt.Errorf("malformed duration %q: %v", s, err) + } + for i := 9; i > len(ss[1]); i-- { + ns *= 10 + } + hasDigits = true + } + if !hasDigits { + return fmt.Errorf("malformed duration %q: contains no numbers", s) + } + + if neg { + sec *= -1 + ns *= -1 + } + + // Maximum/minimum seconds/nanoseconds representable by Go's time.Duration. + const maxSeconds = math.MaxInt64 / int64(time.Second) + const maxNanosAtMaxSeconds = math.MaxInt64 % int64(time.Second) + const minSeconds = math.MinInt64 / int64(time.Second) + const minNanosAtMinSeconds = math.MinInt64 % int64(time.Second) + + if sec > maxSeconds || (sec == maxSeconds && ns >= maxNanosAtMaxSeconds) { + *d = Duration(math.MaxInt64) + } else if sec < minSeconds || (sec == minSeconds && ns <= minNanosAtMinSeconds) { + *d = Duration(math.MinInt64) + } else { + *d = Duration(sec*int64(time.Second) + ns) + } + return nil +} diff --git a/internal/serviceconfig/duration_test.go b/internal/serviceconfig/duration_test.go new file mode 100644 index 000000000000..5696541aa870 --- /dev/null +++ b/internal/serviceconfig/duration_test.go @@ -0,0 +1,87 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package serviceconfig + +import ( + "fmt" + "math" + "strings" + "testing" + "time" + + "google.golang.org/grpc/internal/grpcrand" +) + +// Tests both marshalling and unmarshalling of Durations. +func TestDuration_MarshalUnmarshal(t *testing.T) { + testCases := []struct { + json string + td time.Duration + unmarshalErr error + noMarshal bool + }{ + // Basic values. + {json: `"1s"`, td: time.Second}, + {json: `"-100.700s"`, td: -100*time.Second - 700*time.Millisecond}, + {json: `".050s"`, td: 50 * time.Millisecond, noMarshal: true}, + {json: `"-.001s"`, td: -1 * time.Millisecond, noMarshal: true}, + {json: `"-0.200s"`, td: -200 * time.Millisecond}, + // Positive near / out of bounds. + {json: `"9223372036s"`, td: 9223372036 * time.Second}, + {json: `"9223372037s"`, td: math.MaxInt64, noMarshal: true}, + {json: `"9223372036.854775807s"`, td: math.MaxInt64}, + {json: `"9223372036.854775808s"`, td: math.MaxInt64, noMarshal: true}, + {json: `"315576000000s"`, td: math.MaxInt64, noMarshal: true}, + {json: `"315576000001s"`, unmarshalErr: fmt.Errorf("out of range")}, + // Negative near / out of bounds. + {json: `"-9223372036s"`, td: -9223372036 * time.Second}, + {json: `"-9223372037s"`, td: math.MinInt64, noMarshal: true}, + {json: `"-9223372036.854775808s"`, td: math.MinInt64}, + {json: `"-9223372036.854775809s"`, td: math.MinInt64, noMarshal: true}, + {json: `"-315576000000s"`, td: math.MinInt64, noMarshal: true}, + {json: `"-315576000001s"`, unmarshalErr: fmt.Errorf("out of range")}, + // Parse errors. + {json: `123s`, unmarshalErr: fmt.Errorf("invalid character")}, + {json: `"5m"`, unmarshalErr: fmt.Errorf("malformed duration")}, + {json: `"5.3.2s"`, unmarshalErr: fmt.Errorf("malformed duration")}, + {json: `"x.3s"`, unmarshalErr: fmt.Errorf("malformed duration")}, + {json: `"3.xs"`, unmarshalErr: fmt.Errorf("malformed duration")}, + {json: `"3.1234567890s"`, unmarshalErr: fmt.Errorf("malformed duration")}, + {json: `".s"`, unmarshalErr: fmt.Errorf("malformed duration")}, + {json: `"s"`, unmarshalErr: fmt.Errorf("malformed duration")}, + } + for _, tc := range testCases { + // Seed `got` with a random value to ensure we properly reset it in all + // non-error cases. + got := Duration(grpcrand.Uint64()) + err := got.UnmarshalJSON([]byte(tc.json)) + if (err == nil && time.Duration(got) != tc.td) || + (err != nil) != (tc.unmarshalErr != nil) || !strings.Contains(fmt.Sprint(err), fmt.Sprint(tc.unmarshalErr)) { + t.Errorf("UnmarshalJSON of %v = %v, %v; want %v, %v", tc.json, time.Duration(got), err, tc.td, tc.unmarshalErr) + } + + if tc.unmarshalErr == nil && !tc.noMarshal { + d := Duration(tc.td) + got, err := d.MarshalJSON() + if string(got) != tc.json || err != nil { + t.Errorf("MarshalJSON of %v = %v, %v; want %v, nil", d, string(got), err, tc.json) + } + } + } +} From fd376a5cbdc9d05257936f683396093812b7ce22 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Fri, 12 May 2023 11:01:06 -0700 Subject: [PATCH 35/60] test: fix flaky TimeoutOnDeadServer test; some cleanups (#6276) --- test/channelz_test.go | 28 ++--------- test/clientconn_state_transition_test.go | 18 +++++++ test/creds_test.go | 36 ++++--------- test/end2end_test.go | 64 ++++-------------------- test/goaway_test.go | 13 +---- test/healthcheck_test.go | 33 ++++-------- test/pickfirst_test.go | 6 +-- test/roundrobin_test.go | 19 ++----- 8 files changed, 55 insertions(+), 162 deletions(-) diff --git a/test/channelz_test.go b/test/channelz_test.go index 0a6ff579773f..d43c155a15df 100644 --- a/test/channelz_test.go +++ b/test/channelz_test.go @@ -1531,21 +1531,11 @@ func (s) TestCZSubChannelTraceCreationDeletion(t *testing.T) { t.Fatal(err) } - // Wait for ready ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - for src := te.cc.GetState(); src != connectivity.Ready; src = te.cc.GetState() { - if !te.cc.WaitForStateChange(ctx, src) { - t.Fatalf("timed out waiting for state change. got %v; want %v", src, connectivity.Ready) - } - } + awaitState(ctx, t, te.cc, connectivity.Ready) r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}}) - // Wait for not-ready. - for src := te.cc.GetState(); src == connectivity.Ready; src = te.cc.GetState() { - if !te.cc.WaitForStateChange(ctx, src) { - t.Fatalf("timed out waiting for state change. got %v; want !%v", src, connectivity.Ready) - } - } + awaitNotState(ctx, t, te.cc, connectivity.Ready) if err := verifyResultWithDelay(func() (bool, error) { tcs, _ := channelz.GetTopChannels(0, 0) @@ -2016,21 +2006,11 @@ func (s) TestCZTraceOverwriteSubChannelDeletion(t *testing.T) { t.Fatal(err) } - // Wait for ready ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - for src := te.cc.GetState(); src != connectivity.Ready; src = te.cc.GetState() { - if !te.cc.WaitForStateChange(ctx, src) { - t.Fatalf("timed out waiting for state change. got %v; want %v", src, connectivity.Ready) - } - } + awaitState(ctx, t, te.cc, connectivity.Ready) r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}}) - // Wait for not-ready. - for src := te.cc.GetState(); src == connectivity.Ready; src = te.cc.GetState() { - if !te.cc.WaitForStateChange(ctx, src) { - t.Fatalf("timed out waiting for state change. got %v; want !%v", src, connectivity.Ready) - } - } + awaitNotState(ctx, t, te.cc, connectivity.Ready) // verify that the subchannel no longer exist due to trace referencing it got overwritten. if err := verifyResultWithDelay(func() (bool, error) { diff --git a/test/clientconn_state_transition_test.go b/test/clientconn_state_transition_test.go index 1f15c6905ad6..57f932d1eb5e 100644 --- a/test/clientconn_state_transition_test.go +++ b/test/clientconn_state_transition_test.go @@ -519,3 +519,21 @@ func stayConnected(ctx context.Context, cc *grpc.ClientConn) { } } } + +func awaitState(ctx context.Context, t *testing.T, cc *grpc.ClientConn, stateWant connectivity.State) { + t.Helper() + for state := cc.GetState(); state != stateWant; state = cc.GetState() { + if !cc.WaitForStateChange(ctx, state) { + t.Fatalf("timed out waiting for state change. got %v; want %v", state, stateWant) + } + } +} + +func awaitNotState(ctx context.Context, t *testing.T, cc *grpc.ClientConn, stateDoNotWant connectivity.State) { + t.Helper() + for state := cc.GetState(); state == stateDoNotWant; state = cc.GetState() { + if !cc.WaitForStateChange(ctx, state) { + t.Fatalf("timed out waiting for state change. got %v; want NOT %v", state, stateDoNotWant) + } + } +} diff --git a/test/creds_test.go b/test/creds_test.go index 70af9945cc8e..06c716a3ee92 100644 --- a/test/creds_test.go +++ b/test/creds_test.go @@ -200,7 +200,7 @@ func (s) TestGRPCMethodAccessibleToCredsViaContextRequestInfo(t *testing.T) { cc := te.clientConn(grpc.WithPerRPCCredentials(&methodTestCreds{})) tc := testgrpc.NewTestServiceClient(cc) - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); status.Convert(err).Message() != wantMethod { t.Fatalf("ss.client.EmptyCall(_, _) = _, %v; want _, _.Message()=%q", err, wantMethod) @@ -233,7 +233,7 @@ func (s) TestFailFastRPCErrorOnBadCertificates(t *testing.T) { defer te.tearDown() opts := []grpc.DialOption{grpc.WithTransportCredentials(clientAlwaysFailCred{})} - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() cc, err := grpc.DialContext(ctx, te.srvAddr, opts...) if err != nil { @@ -261,17 +261,15 @@ func (s) TestWaitForReadyRPCErrorOnBadCertificates(t *testing.T) { defer te.tearDown() opts := []grpc.DialOption{grpc.WithTransportCredentials(clientAlwaysFailCred{})} - dctx, dcancel := context.WithTimeout(context.Background(), 10*time.Second) - defer dcancel() - cc, err := grpc.DialContext(dctx, te.srvAddr, opts...) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + cc, err := grpc.DialContext(ctx, te.srvAddr, opts...) if err != nil { t.Fatalf("Dial(_) = %v, want %v", err, nil) } defer cc.Close() tc := testgrpc.NewTestServiceClient(cc) - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) - defer cancel() if _, err = tc.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); strings.Contains(err.Error(), clientAlwaysFailCredErrorMsg) { return } @@ -444,17 +442,9 @@ func (s) TestCredsHandshakeAuthority(t *testing.T) { defer cc.Close() r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: lis.Addr().String()}}}) - ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() - for { - s := cc.GetState() - if s == connectivity.Ready { - break - } - if !cc.WaitForStateChange(ctx, s) { - t.Fatalf("ClientConn is not ready after 100 ms") - } - } + awaitState(ctx, t, cc, connectivity.Ready) if cred.got != testAuthority { t.Fatalf("client creds got authority: %q, want: %q", cred.got, testAuthority) @@ -484,17 +474,9 @@ func (s) TestCredsHandshakeServerNameAuthority(t *testing.T) { defer cc.Close() r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: lis.Addr().String(), ServerName: testServerName}}}) - ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() - for { - s := cc.GetState() - if s == connectivity.Ready { - break - } - if !cc.WaitForStateChange(ctx, s) { - t.Fatalf("ClientConn is not ready after 100 ms") - } - } + awaitState(ctx, t, cc, connectivity.Ready) if cred.got != testServerName { t.Fatalf("client creds got authority: %q, want: %q", cred.got, testAuthority) diff --git a/test/end2end_test.go b/test/end2end_test.go index 824d7c56c041..865285b35a2d 100644 --- a/test/end2end_test.go +++ b/test/end2end_test.go @@ -965,38 +965,25 @@ func (s) TestTimeoutOnDeadServer(t *testing.T) { func testTimeoutOnDeadServer(t *testing.T, e env) { te := newTest(t, e) te.userAgent = testAppUA - te.declareLogNoise( - "transport: http2Client.notifyError got notified that the client transport was broken EOF", - "grpc: addrConn.transportMonitor exits due to: grpc: the connection is closing", - "grpc: addrConn.resetTransport failed to create client transport: connection error", - ) te.startServer(&testServer{security: e.security}) defer te.tearDown() cc := te.clientConn() tc := testgrpc.NewTestServiceClient(cc) ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() if _, err := tc.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil { t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, ", err) } + // Wait for the client to report READY, stop the server, then wait for the + // client to notice the connection is gone. + awaitState(ctx, t, cc, connectivity.Ready) te.srv.Stop() - cancel() - - // Wait for the client to notice the connection is gone. - ctx, cancel = context.WithTimeout(context.Background(), 500*time.Millisecond) - state := cc.GetState() - for ; state == connectivity.Ready && cc.WaitForStateChange(ctx, state); state = cc.GetState() { - } - cancel() - if state == connectivity.Ready { - t.Fatalf("Timed out waiting for non-ready state") - } - ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond) + awaitNotState(ctx, t, cc, connectivity.Ready) + ctx, cancel = context.WithTimeout(ctx, 5*time.Millisecond) _, err := tc.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)) cancel() - if e.balancer != "" && status.Code(err) != codes.DeadlineExceeded { - // If e.balancer == nil, the ac will stop reconnecting because the dialer returns non-temp error, - // the error will be an internal error. + if status.Code(err) != codes.DeadlineExceeded { t.Fatalf("TestService/EmptyCall(%v, _) = _, %v, want _, error code: %s", ctx, err, codes.DeadlineExceeded) } awaitNewConnLogOutput() @@ -1070,11 +1057,6 @@ func (s) TestFailFast(t *testing.T) { func testFailFast(t *testing.T, e env) { te := newTest(t, e) te.userAgent = testAppUA - te.declareLogNoise( - "transport: http2Client.notifyError got notified that the client transport was broken EOF", - "grpc: addrConn.transportMonitor exits due to: grpc: the connection is closing", - "grpc: addrConn.resetTransport failed to create client transport: connection error", - ) te.startServer(&testServer{security: e.security}) defer te.tearDown() @@ -1114,9 +1096,6 @@ func testServiceConfigSetup(t *testing.T, e env) *test { te := newTest(t, e) te.userAgent = testAppUA te.declareLogNoise( - "transport: http2Client.notifyError got notified that the client transport was broken EOF", - "grpc: addrConn.transportMonitor exits due to: grpc: the connection is closing", - "grpc: addrConn.resetTransport failed to create client transport: connection error", "Failed to dial : context canceled; please retry.", ) return te @@ -1746,9 +1725,6 @@ func testPreloaderClientSend(t *testing.T, e env) { te := newTest(t, e) te.userAgent = testAppUA te.declareLogNoise( - "transport: http2Client.notifyError got notified that the client transport was broken EOF", - "grpc: addrConn.transportMonitor exits due to: grpc: the connection is closing", - "grpc: addrConn.resetTransport failed to create client transport: connection error", "Failed to dial : context canceled; please retry.", ) te.startServer(&testServer{security: e.security}) @@ -1875,9 +1851,6 @@ func testMaxMsgSizeClientDefault(t *testing.T, e env) { te := newTest(t, e) te.userAgent = testAppUA te.declareLogNoise( - "transport: http2Client.notifyError got notified that the client transport was broken EOF", - "grpc: addrConn.transportMonitor exits due to: grpc: the connection is closing", - "grpc: addrConn.resetTransport failed to create client transport: connection error", "Failed to dial : context canceled; please retry.", ) te.startServer(&testServer{security: e.security}) @@ -1942,9 +1915,6 @@ func testMaxMsgSizeClientAPI(t *testing.T, e env) { te.maxClientReceiveMsgSize = newInt(1024) te.maxClientSendMsgSize = newInt(1024) te.declareLogNoise( - "transport: http2Client.notifyError got notified that the client transport was broken EOF", - "grpc: addrConn.transportMonitor exits due to: grpc: the connection is closing", - "grpc: addrConn.resetTransport failed to create client transport: connection error", "Failed to dial : context canceled; please retry.", ) te.startServer(&testServer{security: e.security}) @@ -2030,9 +2000,6 @@ func testMaxMsgSizeServerAPI(t *testing.T, e env) { te.maxServerReceiveMsgSize = newInt(1024) te.maxServerSendMsgSize = newInt(1024) te.declareLogNoise( - "transport: http2Client.notifyError got notified that the client transport was broken EOF", - "grpc: addrConn.transportMonitor exits due to: grpc: the connection is closing", - "grpc: addrConn.resetTransport failed to create client transport: connection error", "Failed to dial : context canceled; please retry.", ) te.startServer(&testServer{security: e.security}) @@ -2141,11 +2108,6 @@ func testTap(t *testing.T, e env) { te.userAgent = testAppUA ttap := &myTap{} te.tapHandle = ttap.handle - te.declareLogNoise( - "transport: http2Client.notifyError got notified that the client transport was broken EOF", - "grpc: addrConn.transportMonitor exits due to: grpc: the connection is closing", - "grpc: addrConn.resetTransport failed to create client transport: connection error", - ) te.startServer(&testServer{security: e.security}) defer te.tearDown() @@ -4878,17 +4840,9 @@ func testWaitForReadyConnection(t *testing.T, e env) { cc := te.clientConn() // Non-blocking dial. tc := testgrpc.NewTestServiceClient(cc) - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - defer cancel() - state := cc.GetState() - // Wait for connection to be Ready. - for ; state != connectivity.Ready && cc.WaitForStateChange(ctx, state); state = cc.GetState() { - } - if state != connectivity.Ready { - t.Fatalf("Want connection state to be Ready, got %v", state) - } - ctx, cancel = context.WithTimeout(context.Background(), time.Second) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() + awaitState(ctx, t, cc, connectivity.Ready) // Make a fail-fast RPC. if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { t.Fatalf("TestService/EmptyCall(_,_) = _, %v, want _, nil", err) diff --git a/test/goaway_test.go b/test/goaway_test.go index 48ef197e74cc..c44bb831b70b 100644 --- a/test/goaway_test.go +++ b/test/goaway_test.go @@ -594,12 +594,7 @@ func (s) TestGoAwayThenClose(t *testing.T) { client := testgrpc.NewTestServiceClient(cc) t.Log("Waiting for the ClientConn to enter READY state.") - state := cc.GetState() - for ; state != connectivity.Ready && cc.WaitForStateChange(ctx, state); state = cc.GetState() { - } - if ctx.Err() != nil { - t.Fatalf("timed out waiting for READY channel state; last state = %v", state) - } + awaitState(ctx, t, cc, connectivity.Ready) // We make a streaming RPC and do an one-message-round-trip to make sure // it's created on connection 1. @@ -622,11 +617,7 @@ func (s) TestGoAwayThenClose(t *testing.T) { go s1.GracefulStop() t.Log("Waiting for the ClientConn to enter IDLE state.") - for ; state != connectivity.Idle && cc.WaitForStateChange(ctx, state); state = cc.GetState() { - } - if ctx.Err() != nil { - t.Fatalf("timed out waiting for IDLE channel state; last state = %v", state) - } + awaitState(ctx, t, cc, connectivity.Idle) t.Log("Performing another RPC to create a connection to server 2.") if _, err := client.UnaryCall(ctx, &testpb.SimpleRequest{}); err != nil { diff --git a/test/healthcheck_test.go b/test/healthcheck_test.go index 1fb4cf46e2be..a6865b803026 100644 --- a/test/healthcheck_test.go +++ b/test/healthcheck_test.go @@ -212,44 +212,33 @@ func (s) TestHealthCheckWatchStateChange(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() - if ok := cc.WaitForStateChange(ctx, connectivity.Idle); !ok { - t.Fatal("ClientConn is still in IDLE state when the context times out.") - } - if ok := cc.WaitForStateChange(ctx, connectivity.Connecting); !ok { - t.Fatal("ClientConn is still in CONNECTING state when the context times out.") - } + awaitNotState(ctx, t, cc, connectivity.Idle) + awaitNotState(ctx, t, cc, connectivity.Connecting) + awaitState(ctx, t, cc, connectivity.TransientFailure) if s := cc.GetState(); s != connectivity.TransientFailure { t.Fatalf("ClientConn is in %v state, want TRANSIENT FAILURE", s) } ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVING) - if ok := cc.WaitForStateChange(ctx, connectivity.TransientFailure); !ok { - t.Fatal("ClientConn is still in TRANSIENT FAILURE state when the context times out.") - } + awaitNotState(ctx, t, cc, connectivity.TransientFailure) if s := cc.GetState(); s != connectivity.Ready { t.Fatalf("ClientConn is in %v state, want READY", s) } ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVICE_UNKNOWN) - if ok := cc.WaitForStateChange(ctx, connectivity.Ready); !ok { - t.Fatal("ClientConn is still in READY state when the context times out.") - } + awaitNotState(ctx, t, cc, connectivity.Ready) if s := cc.GetState(); s != connectivity.TransientFailure { t.Fatalf("ClientConn is in %v state, want TRANSIENT FAILURE", s) } ts.SetServingStatus("foo", healthpb.HealthCheckResponse_SERVING) - if ok := cc.WaitForStateChange(ctx, connectivity.TransientFailure); !ok { - t.Fatal("ClientConn is still in TRANSIENT FAILURE state when the context times out.") - } + awaitNotState(ctx, t, cc, connectivity.TransientFailure) if s := cc.GetState(); s != connectivity.Ready { t.Fatalf("ClientConn is in %v state, want READY", s) } ts.SetServingStatus("foo", healthpb.HealthCheckResponse_UNKNOWN) - if ok := cc.WaitForStateChange(ctx, connectivity.Ready); !ok { - t.Fatal("ClientConn is still in READY state when the context times out.") - } + awaitNotState(ctx, t, cc, connectivity.Ready) if s := cc.GetState(); s != connectivity.TransientFailure { t.Fatalf("ClientConn is in %v state, want TRANSIENT FAILURE", s) } @@ -278,12 +267,8 @@ func (s) TestHealthCheckHealthServerNotRegistered(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() - if ok := cc.WaitForStateChange(ctx, connectivity.Idle); !ok { - t.Fatal("ClientConn is still in IDLE state when the context times out.") - } - if ok := cc.WaitForStateChange(ctx, connectivity.Connecting); !ok { - t.Fatal("ClientConn is still in CONNECTING state when the context times out.") - } + awaitNotState(ctx, t, cc, connectivity.Idle) + awaitNotState(ctx, t, cc, connectivity.Connecting) if s := cc.GetState(); s != connectivity.Ready { t.Fatalf("ClientConn is in %v state, want READY", s) } diff --git a/test/pickfirst_test.go b/test/pickfirst_test.go index 15b6dcd84616..800d2f4178c2 100644 --- a/test/pickfirst_test.go +++ b/test/pickfirst_test.go @@ -250,11 +250,7 @@ func (s) TestPickFirst_NewAddressWhileBlocking(t *testing.T) { // Send a resolver update with no addresses. This should push the channel into // TransientFailure. r.UpdateState(resolver.State{}) - for state := cc.GetState(); state != connectivity.TransientFailure; state = cc.GetState() { - if !cc.WaitForStateChange(ctx, state) { - t.Fatalf("timeout waiting for state change. got %v; want %v", state, connectivity.TransientFailure) - } - } + awaitState(ctx, t, cc, connectivity.TransientFailure) doneCh := make(chan struct{}) client := testgrpc.NewTestServiceClient(cc) diff --git a/test/roundrobin_test.go b/test/roundrobin_test.go index 8069e32358fb..92fed10ffed0 100644 --- a/test/roundrobin_test.go +++ b/test/roundrobin_test.go @@ -119,11 +119,7 @@ func (s) TestRoundRobin_AddressesRemoved(t *testing.T) { // Send a resolver update with no addresses. This should push the channel into // TransientFailure. r.UpdateState(resolver.State{Addresses: []resolver.Address{}}) - for state := cc.GetState(); state != connectivity.TransientFailure; state = cc.GetState() { - if !cc.WaitForStateChange(ctx, state) { - t.Fatalf("timeout waiting for state change. got %v; want %v", state, connectivity.TransientFailure) - } - } + awaitState(ctx, t, cc, connectivity.TransientFailure) const msgWant = "produced zero addresses" client := testgrpc.NewTestServiceClient(cc) @@ -145,11 +141,7 @@ func (s) TestRoundRobin_NewAddressWhileBlocking(t *testing.T) { // Send a resolver update with no addresses. This should push the channel into // TransientFailure. r.UpdateState(resolver.State{Addresses: []resolver.Address{}}) - for state := cc.GetState(); state != connectivity.TransientFailure; state = cc.GetState() { - if !cc.WaitForStateChange(ctx, state) { - t.Fatalf("timeout waiting for state change. got %v; want %v", state, connectivity.TransientFailure) - } - } + awaitState(ctx, t, cc, connectivity.TransientFailure) client := testgrpc.NewTestServiceClient(cc) doneCh := make(chan struct{}) @@ -229,12 +221,7 @@ func (s) TestRoundRobin_AllServersDown(t *testing.T) { b.Stop() } - // Wait for TransientFailure. - for state := cc.GetState(); state != connectivity.TransientFailure; state = cc.GetState() { - if !cc.WaitForStateChange(ctx, state) { - t.Fatalf("timeout waiting for state change. got %v; want %v", state, connectivity.TransientFailure) - } - } + awaitState(ctx, t, cc, connectivity.TransientFailure) // Failfast RPCs should fail with Unavailable. client := testgrpc.NewTestServiceClient(cc) From 68381e7bd2c31b3bd5ca5f31b25e338192f16049 Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Fri, 12 May 2023 15:28:07 -0400 Subject: [PATCH 36/60] xds: WRR in xDS (#6272) --- test/xds/xds_client_custom_lb_test.go | 124 +++++---- .../xdsclient/xdslbregistry/converter.go | 177 ------------ .../xdsclient/xdsresource/unmarshal_cds.go | 3 +- xds/internal/xdslbregistry/converter.go | 260 ++++++++++++++++++ .../tests => xdslbregistry}/converter_test.go | 56 ++-- 5 files changed, 367 insertions(+), 253 deletions(-) delete mode 100644 xds/internal/xdsclient/xdslbregistry/converter.go create mode 100644 xds/internal/xdslbregistry/converter.go rename xds/internal/{xdsclient/xdslbregistry/tests => xdslbregistry}/converter_test.go (87%) diff --git a/test/xds/xds_client_custom_lb_test.go b/test/xds/xds_client_custom_lb_test.go index 91ec874c64a7..749eb7f9aa64 100644 --- a/test/xds/xds_client_custom_lb_test.go +++ b/test/xds/xds_client_custom_lb_test.go @@ -22,6 +22,17 @@ import ( "context" "fmt" "testing" + "time" + + "google.golang.org/grpc" + _ "google.golang.org/grpc/balancer/weightedroundrobin" // To register weighted_round_robin_experimental. + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/internal/envconfig" + "google.golang.org/grpc/internal/stubserver" + "google.golang.org/grpc/internal/testutils" + "google.golang.org/grpc/internal/testutils/roundrobin" + "google.golang.org/grpc/internal/testutils/xds/e2e" + "google.golang.org/grpc/resolver" v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" @@ -29,20 +40,14 @@ import ( v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" + v3clientsideweightedroundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/client_side_weighted_round_robin/v3" v3roundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/round_robin/v3" v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" "github.com/golang/protobuf/proto" structpb "github.com/golang/protobuf/ptypes/struct" testgrpc "google.golang.org/grpc/interop/grpc_testing" - - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" - "google.golang.org/grpc/internal/envconfig" - "google.golang.org/grpc/internal/stubserver" - "google.golang.org/grpc/internal/testutils" - "google.golang.org/grpc/internal/testutils/roundrobin" - "google.golang.org/grpc/internal/testutils/xds/e2e" - "google.golang.org/grpc/resolver" + "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/wrapperspb" ) // wrrLocality is a helper that takes a proto message and returns a @@ -113,7 +118,10 @@ func (s) TestWrrLocality(t *testing.T) { name string // Configuration will be specified through load_balancing_policy field. wrrLocalityConfiguration *v3wrrlocalitypb.WrrLocality - addressDistributionWant []resolver.Address + addressDistributionWant []struct { + addr string + count int + } }{ { name: "rr_child", @@ -124,43 +132,15 @@ func (s) TestWrrLocality(t *testing.T) { // in a locality). Thus, address 1 and address 2 have 1/3 * 1/2 // probability, and addresses 3 4 5 have 2/3 * 1/3 probability of // being routed to. - addressDistributionWant: []resolver.Address{ - {Addr: backend1.Address}, - {Addr: backend1.Address}, - {Addr: backend1.Address}, - {Addr: backend1.Address}, - {Addr: backend1.Address}, - {Addr: backend1.Address}, - {Addr: backend2.Address}, - {Addr: backend2.Address}, - {Addr: backend2.Address}, - {Addr: backend2.Address}, - {Addr: backend2.Address}, - {Addr: backend2.Address}, - {Addr: backend3.Address}, - {Addr: backend3.Address}, - {Addr: backend3.Address}, - {Addr: backend3.Address}, - {Addr: backend3.Address}, - {Addr: backend3.Address}, - {Addr: backend3.Address}, - {Addr: backend3.Address}, - {Addr: backend4.Address}, - {Addr: backend4.Address}, - {Addr: backend4.Address}, - {Addr: backend4.Address}, - {Addr: backend4.Address}, - {Addr: backend4.Address}, - {Addr: backend4.Address}, - {Addr: backend4.Address}, - {Addr: backend5.Address}, - {Addr: backend5.Address}, - {Addr: backend5.Address}, - {Addr: backend5.Address}, - {Addr: backend5.Address}, - {Addr: backend5.Address}, - {Addr: backend5.Address}, - {Addr: backend5.Address}, + addressDistributionWant: []struct { + addr string + count int + }{ + {addr: backend1.Address, count: 6}, + {addr: backend2.Address, count: 6}, + {addr: backend3.Address, count: 8}, + {addr: backend4.Address, count: 8}, + {addr: backend5.Address, count: 8}, }, }, // This configures custom lb as the child of wrr_locality, which points @@ -174,10 +154,44 @@ func (s) TestWrrLocality(t *testing.T) { TypeUrl: "type.googleapis.com/pick_first", Value: &structpb.Struct{}, }), - addressDistributionWant: []resolver.Address{ - {Addr: backend1.Address}, - {Addr: backend3.Address}, - {Addr: backend3.Address}, + addressDistributionWant: []struct { + addr string + count int + }{ + {addr: backend1.Address, count: 1}, + {addr: backend3.Address, count: 2}, + }, + }, + // Sanity check for weighted round robin. Don't need to test super + // specific behaviors, as that is covered in unit tests. Set up weighted + // round robin as the endpoint picking policy with per RPC load reports + // enabled. Due the server not sending trailers with load reports, the + // weighted round robin policy should essentially function as round + // robin, and thus should have the same distribution as round robin + // above. + { + name: "custom_lb_child_wrr/", + wrrLocalityConfiguration: wrrLocality(&v3clientsideweightedroundrobinpb.ClientSideWeightedRoundRobin{ + EnableOobLoadReport: &wrapperspb.BoolValue{ + Value: false, + }, + // BlackoutPeriod long enough to cause load report weights to + // trigger in the scope of test case, but no load reports + // configured anyway. + BlackoutPeriod: durationpb.New(10 * time.Second), + WeightExpirationPeriod: durationpb.New(10 * time.Second), + WeightUpdatePeriod: durationpb.New(time.Second), + ErrorUtilizationPenalty: &wrapperspb.FloatValue{Value: 1}, + }), + addressDistributionWant: []struct { + addr string + count int + }{ + {addr: backend1.Address, count: 6}, + {addr: backend2.Address, count: 6}, + {addr: backend3.Address, count: 8}, + {addr: backend4.Address, count: 8}, + {addr: backend5.Address, count: 8}, }, }, } @@ -223,7 +237,13 @@ func (s) TestWrrLocality(t *testing.T) { defer cc.Close() client := testgrpc.NewTestServiceClient(cc) - if err := roundrobin.CheckWeightedRoundRobinRPCs(ctx, client, test.addressDistributionWant); err != nil { + var addrDistWant []resolver.Address + for _, addrAndCount := range test.addressDistributionWant { + for i := 0; i < addrAndCount.count; i++ { + addrDistWant = append(addrDistWant, resolver.Address{Addr: addrAndCount.addr}) + } + } + if err := roundrobin.CheckWeightedRoundRobinRPCs(ctx, client, addrDistWant); err != nil { t.Fatalf("Error in expected round robin: %v", err) } }) diff --git a/xds/internal/xdsclient/xdslbregistry/converter.go b/xds/internal/xdsclient/xdslbregistry/converter.go deleted file mode 100644 index 6a5546d90159..000000000000 --- a/xds/internal/xdsclient/xdslbregistry/converter.go +++ /dev/null @@ -1,177 +0,0 @@ -/* - * - * Copyright 2023 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -// Package xdslbregistry provides utilities to convert proto load balancing -// configuration, defined by the xDS API spec, to JSON load balancing -// configuration. -package xdslbregistry - -import ( - "encoding/json" - "fmt" - "strings" - - v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" - v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" - v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" - v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" - v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" - "github.com/golang/protobuf/proto" - structpb "github.com/golang/protobuf/ptypes/struct" - - "google.golang.org/grpc/balancer" - "google.golang.org/grpc/internal/envconfig" -) - -const ( - defaultRingHashMinSize = 1024 - defaultRingHashMaxSize = 8 * 1024 * 1024 // 8M -) - -// ConvertToServiceConfig converts a proto Load Balancing Policy configuration -// into a json string. Returns an error if: -// - no supported policy found -// - there is more than 16 layers of recursion in the configuration -// - a failure occurs when converting the policy -func ConvertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy) (json.RawMessage, error) { - return convertToServiceConfig(lbPolicy, 0) -} - -func convertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy, depth int) (json.RawMessage, error) { - // "Configurations that require more than 16 levels of recursion are - // considered invalid and should result in a NACK response." - A51 - if depth > 15 { - return nil, fmt.Errorf("lb policy %v exceeds max depth supported: 16 layers", lbPolicy) - } - - // "This function iterate over the list of policy messages in - // LoadBalancingPolicy, attempting to convert each one to gRPC form, - // stopping at the first supported policy." - A52 - for _, policy := range lbPolicy.GetPolicies() { - // The policy message contains a TypedExtensionConfig - // message with the configuration information. TypedExtensionConfig in turn - // uses an Any typed typed_config field to store policy configuration of any - // type. This typed_config field is used to determine both the name of a - // policy and the configuration for it, depending on its type: - switch policy.GetTypedExtensionConfig().GetTypedConfig().GetTypeUrl() { - case "type.googleapis.com/envoy.extensions.load_balancing_policies.ring_hash.v3.RingHash": - if !envconfig.XDSRingHash { - continue - } - rhProto := &v3ringhashpb.RingHash{} - if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), rhProto); err != nil { - return nil, fmt.Errorf("failed to unmarshal resource: %v", err) - } - return convertRingHash(rhProto) - case "type.googleapis.com/envoy.extensions.load_balancing_policies.round_robin.v3.RoundRobin": - return makeBalancerConfigJSON("round_robin", json.RawMessage("{}")), nil - case "type.googleapis.com/envoy.extensions.load_balancing_policies.wrr_locality.v3.WrrLocality": - wrrlProto := &v3wrrlocalitypb.WrrLocality{} - if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), wrrlProto); err != nil { - return nil, fmt.Errorf("failed to unmarshal resource: %v", err) - } - return convertWrrLocality(wrrlProto, depth) - case "type.googleapis.com/xds.type.v3.TypedStruct": - tsProto := &v3xdsxdstypepb.TypedStruct{} - if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), tsProto); err != nil { - return nil, fmt.Errorf("failed to unmarshal resource: %v", err) - } - json, cont, err := convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) - if cont { - continue - } - return json, err - case "type.googleapis.com/udpa.type.v1.TypedStruct": - tsProto := &v1xdsudpatypepb.TypedStruct{} - if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), tsProto); err != nil { - return nil, fmt.Errorf("failed to unmarshal resource: %v", err) - } - if err := proto.Unmarshal(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), tsProto); err != nil { - return nil, fmt.Errorf("failed to unmarshal resource: %v", err) - } - json, cont, err := convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) - if cont { - continue - } - return json, err - } - // Any entry not in the above list is unsupported and will be skipped. - // This includes Least Request as well, since grpc-go does not support - // the Least Request Load Balancing Policy. - } - return nil, fmt.Errorf("no supported policy found in policy list +%v", lbPolicy) -} - -// convertRingHash converts a proto representation of the ring_hash LB policy's -// configuration to gRPC JSON format. -func convertRingHash(cfg *v3ringhashpb.RingHash) (json.RawMessage, error) { - if cfg.GetHashFunction() != v3ringhashpb.RingHash_XX_HASH { - return nil, fmt.Errorf("unsupported ring_hash hash function %v", cfg.GetHashFunction()) - } - - var minSize, maxSize uint64 = defaultRingHashMinSize, defaultRingHashMaxSize - if min := cfg.GetMinimumRingSize(); min != nil { - minSize = min.GetValue() - } - if max := cfg.GetMaximumRingSize(); max != nil { - maxSize = max.GetValue() - } - - lbCfgJSON := []byte(fmt.Sprintf("{\"minRingSize\": %d, \"maxRingSize\": %d}", minSize, maxSize)) - return makeBalancerConfigJSON("ring_hash_experimental", lbCfgJSON), nil -} - -func convertWrrLocality(cfg *v3wrrlocalitypb.WrrLocality, depth int) (json.RawMessage, error) { - epJSON, err := convertToServiceConfig(cfg.GetEndpointPickingPolicy(), depth+1) - if err != nil { - return nil, fmt.Errorf("error converting endpoint picking policy: %v for %+v", err, cfg) - } - lbCfgJSON := []byte(fmt.Sprintf(`{"childPolicy": %s}`, epJSON)) - return makeBalancerConfigJSON("xds_wrr_locality_experimental", lbCfgJSON), nil -} - -// convertCustomPolicy attempts to prepare json configuration for a custom lb -// proto, which specifies the gRPC balancer type and configuration. Returns the -// converted json, a bool representing whether the caller should continue to the -// next policy, which is true if the gRPC Balancer registry does not contain -// that balancer type, and an error which should cause caller to error if error -// converting. -func convertCustomPolicy(typeURL string, s *structpb.Struct) (json.RawMessage, bool, error) { - // The gRPC policy name will be the "type name" part of the value of the - // type_url field in the TypedStruct. We get this by using the part after - // the last / character. Can assume a valid type_url from the control plane. - urls := strings.Split(typeURL, "/") - name := urls[len(urls)-1] - - if balancer.Get(name) == nil { - return nil, true, nil - } - - rawJSON, err := json.Marshal(s) - if err != nil { - return nil, false, fmt.Errorf("error converting custom lb policy %v: %v for %+v", err, typeURL, s) - } - - // The Struct contained in the TypedStruct will be returned as-is as the - // configuration JSON object. - return makeBalancerConfigJSON(name, rawJSON), false, nil -} - -func makeBalancerConfigJSON(name string, value json.RawMessage) []byte { - return []byte(fmt.Sprintf(`[{%q: %s}]`, name, value)) -} diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go index c117ce6e7b52..8ec1dfda3fe3 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go @@ -30,12 +30,13 @@ import ( v3aggregateclusterpb "github.com/envoyproxy/go-control-plane/envoy/extensions/clusters/aggregate/v3" v3tlspb "github.com/envoyproxy/go-control-plane/envoy/extensions/transport_sockets/tls/v3" "github.com/golang/protobuf/proto" + "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/pretty" internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/internal/xds/matcher" - "google.golang.org/grpc/xds/internal/xdsclient/xdslbregistry" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" + "google.golang.org/grpc/xds/internal/xdslbregistry" "google.golang.org/protobuf/types/known/anypb" ) diff --git a/xds/internal/xdslbregistry/converter.go b/xds/internal/xdslbregistry/converter.go new file mode 100644 index 000000000000..c154518731bb --- /dev/null +++ b/xds/internal/xdslbregistry/converter.go @@ -0,0 +1,260 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package xdslbregistry provides utilities to convert proto load balancing +// configuration, defined by the xDS API spec, to JSON load balancing +// configuration. These converters are registered by proto type in a registry, +// which gets pulled from based off proto type passed in. +package xdslbregistry + +import ( + "encoding/json" + "fmt" + "strings" + + "github.com/golang/protobuf/proto" + "google.golang.org/grpc/balancer" + "google.golang.org/grpc/balancer/weightedroundrobin" + "google.golang.org/grpc/internal/envconfig" + internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + "google.golang.org/grpc/xds/internal/balancer/ringhash" + "google.golang.org/grpc/xds/internal/balancer/wrrlocality" + + v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" + v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" + v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + v3clientsideweightedroundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/client_side_weighted_round_robin/v3" + v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" + v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" + structpb "github.com/golang/protobuf/ptypes/struct" +) + +var ( + // m is a map from proto type to converter. + m = make(map[string]converter) +) + +func init() { + // Construct map here to avoid an initialization cycle. + m = map[string]converter{ + "type.googleapis.com/envoy.extensions.load_balancing_policies.ring_hash.v3.RingHash": convertRingHashProtoToServiceConfig, + "type.googleapis.com/envoy.extensions.load_balancing_policies.round_robin.v3.RoundRobin": convertRoundRobinProtoToServiceConfig, + "type.googleapis.com/envoy.extensions.load_balancing_policies.wrr_locality.v3.WrrLocality": convertWRRLocalityProtoToServiceConfig, + "type.googleapis.com/envoy.extensions.load_balancing_policies.client_side_weighted_round_robin.v3.ClientSideWeightedRoundRobin": convertWeightedRoundRobinProtoToServiceConfig, + "type.googleapis.com/xds.type.v3.TypedStruct": convertV3TypedStructToServiceConfig, + "type.googleapis.com/udpa.type.v1.TypedStruct": convertV1TypedStructToServiceConfig, + } +} + +// converter converts raw proto bytes into the internal Go JSON representation +// of the proto passed. Returns the json message, and an error. If both +// returned are nil, it represents continuing to the next proto. +type converter func([]byte, int) (json.RawMessage, error) + +const ( + defaultRingHashMinSize = 1024 + defaultRingHashMaxSize = 8 * 1024 * 1024 // 8M +) + +// ConvertToServiceConfig converts a proto Load Balancing Policy configuration +// into a json string. Returns an error if: +// - no supported policy found +// - there is more than 16 layers of recursion in the configuration +// - a failure occurs when converting the policy +func ConvertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy) (json.RawMessage, error) { + return convertToServiceConfig(lbPolicy, 0) +} + +func convertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy, depth int) (json.RawMessage, error) { + // "Configurations that require more than 16 levels of recursion are + // considered invalid and should result in a NACK response." - A51 + if depth > 15 { + return nil, fmt.Errorf("lb policy %v exceeds max depth supported: 16 layers", lbPolicy) + } + + // "This function iterate over the list of policy messages in + // LoadBalancingPolicy, attempting to convert each one to gRPC form, + // stopping at the first supported policy." - A52 + for _, policy := range lbPolicy.GetPolicies() { + policy.GetTypedExtensionConfig().GetTypedConfig().GetTypeUrl() + converter := m[policy.GetTypedExtensionConfig().GetTypedConfig().GetTypeUrl()] + // "Any entry not in the above list is unsupported and will be skipped." + // - A52 + // This includes Least Request as well, since grpc-go does not support + // the Least Request Load Balancing Policy. + if converter == nil { + continue + } + json, err := converter(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), depth) + if json == nil && err == nil { + continue + } + return json, err + } + return nil, fmt.Errorf("no supported policy found in policy list +%v", lbPolicy) +} + +func convertRingHashProtoToServiceConfig(rawProto []byte, depth int) (json.RawMessage, error) { + if !envconfig.XDSRingHash { + return nil, nil + } + rhProto := &v3ringhashpb.RingHash{} + if err := proto.Unmarshal(rawProto, rhProto); err != nil { + return nil, fmt.Errorf("failed to unmarshal resource: %v", err) + } + if rhProto.GetHashFunction() != v3ringhashpb.RingHash_XX_HASH { + return nil, fmt.Errorf("unsupported ring_hash hash function %v", rhProto.GetHashFunction()) + } + + var minSize, maxSize uint64 = defaultRingHashMinSize, defaultRingHashMaxSize + if min := rhProto.GetMinimumRingSize(); min != nil { + minSize = min.GetValue() + } + if max := rhProto.GetMaximumRingSize(); max != nil { + maxSize = max.GetValue() + } + + rhCfg := &ringhash.LBConfig{ + MinRingSize: minSize, + MaxRingSize: maxSize, + } + + rhCfgJSON, err := json.Marshal(rhCfg) + if err != nil { + return nil, fmt.Errorf("error marshaling JSON for type %T: %v", rhCfg, err) + } + return makeBalancerConfigJSON(ringhash.Name, rhCfgJSON), nil +} + +func convertRoundRobinProtoToServiceConfig([]byte, int) (json.RawMessage, error) { + return makeBalancerConfigJSON("round_robin", json.RawMessage("{}")), nil +} + +type wrrLocalityLBConfig struct { + ChildPolicy json.RawMessage `json:"childPolicy,omitempty"` +} + +func convertWRRLocalityProtoToServiceConfig(rawProto []byte, depth int) (json.RawMessage, error) { + wrrlProto := &v3wrrlocalitypb.WrrLocality{} + if err := proto.Unmarshal(rawProto, wrrlProto); err != nil { + return nil, fmt.Errorf("failed to unmarshal resource: %v", err) + } + epJSON, err := convertToServiceConfig(wrrlProto.GetEndpointPickingPolicy(), depth+1) + if err != nil { + return nil, fmt.Errorf("error converting endpoint picking policy: %v for %+v", err, wrrlProto) + } + wrrLCfg := wrrLocalityLBConfig{ + ChildPolicy: epJSON, + } + + lbCfgJSON, err := json.Marshal(wrrLCfg) + if err != nil { + return nil, fmt.Errorf("error marshaling JSON for type %T: %v", wrrLCfg, err) + } + return makeBalancerConfigJSON(wrrlocality.Name, lbCfgJSON), nil +} + +func convertWeightedRoundRobinProtoToServiceConfig(rawProto []byte, depth int) (json.RawMessage, error) { + cswrrProto := &v3clientsideweightedroundrobinpb.ClientSideWeightedRoundRobin{} + if err := proto.Unmarshal(rawProto, cswrrProto); err != nil { + return nil, fmt.Errorf("failed to unmarshal resource: %v", err) + } + wrrLBCfg := &wrrLBConfig{} + // Only set fields if specified in proto. If not set, ParseConfig of the WRR + // will populate the config with defaults. + if enableOOBLoadReportCfg := cswrrProto.GetEnableOobLoadReport(); enableOOBLoadReportCfg != nil { + wrrLBCfg.EnableOOBLoadReport = enableOOBLoadReportCfg.GetValue() + } + if oobReportingPeriodCfg := cswrrProto.GetOobReportingPeriod(); oobReportingPeriodCfg != nil { + wrrLBCfg.OOBReportingPeriod = internalserviceconfig.Duration(oobReportingPeriodCfg.AsDuration()) + } + if blackoutPeriodCfg := cswrrProto.GetBlackoutPeriod(); blackoutPeriodCfg != nil { + wrrLBCfg.BlackoutPeriod = internalserviceconfig.Duration(blackoutPeriodCfg.AsDuration()) + } + if weightExpirationPeriodCfg := cswrrProto.GetBlackoutPeriod(); weightExpirationPeriodCfg != nil { + wrrLBCfg.WeightExpirationPeriod = internalserviceconfig.Duration(weightExpirationPeriodCfg.AsDuration()) + } + if weightUpdatePeriodCfg := cswrrProto.GetWeightUpdatePeriod(); weightUpdatePeriodCfg != nil { + wrrLBCfg.WeightUpdatePeriod = internalserviceconfig.Duration(weightUpdatePeriodCfg.AsDuration()) + } + if errorUtilizationPenaltyCfg := cswrrProto.GetErrorUtilizationPenalty(); errorUtilizationPenaltyCfg != nil { + wrrLBCfg.ErrorUtilizationPenalty = float64(errorUtilizationPenaltyCfg.GetValue()) + } + + lbCfgJSON, err := json.Marshal(wrrLBCfg) + if err != nil { + return nil, fmt.Errorf("error marshaling JSON for type %T: %v", wrrLBCfg, err) + } + return makeBalancerConfigJSON(weightedroundrobin.Name, lbCfgJSON), nil +} + +func convertV1TypedStructToServiceConfig(rawProto []byte, depth int) (json.RawMessage, error) { + tsProto := &v1xdsudpatypepb.TypedStruct{} + if err := proto.Unmarshal(rawProto, tsProto); err != nil { + return nil, fmt.Errorf("failed to unmarshal resource: %v", err) + } + return convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) +} + +func convertV3TypedStructToServiceConfig(rawProto []byte, depth int) (json.RawMessage, error) { + tsProto := &v3xdsxdstypepb.TypedStruct{} + if err := proto.Unmarshal(rawProto, tsProto); err != nil { + return nil, fmt.Errorf("failed to unmarshal resource: %v", err) + } + return convertCustomPolicy(tsProto.GetTypeUrl(), tsProto.GetValue()) +} + +// convertCustomPolicy attempts to prepare json configuration for a custom lb +// proto, which specifies the gRPC balancer type and configuration. Returns the +// converted json and an error which should cause caller to error if error +// converting. If both json and error returned are nil, it means the gRPC +// Balancer registry does not contain that balancer type, and the caller should +// continue to the next policy. +func convertCustomPolicy(typeURL string, s *structpb.Struct) (json.RawMessage, error) { + // The gRPC policy name will be the "type name" part of the value of the + // type_url field in the TypedStruct. We get this by using the part after + // the last / character. Can assume a valid type_url from the control plane. + pos := strings.LastIndex(typeURL, "/") + name := typeURL[pos+1:] + + if balancer.Get(name) == nil { + return nil, nil + } + + rawJSON, err := json.Marshal(s) + if err != nil { + return nil, fmt.Errorf("error converting custom lb policy %v: %v for %+v", err, typeURL, s) + } + + // The Struct contained in the TypedStruct will be returned as-is as the + // configuration JSON object. + return makeBalancerConfigJSON(name, rawJSON), nil +} + +type wrrLBConfig struct { + EnableOOBLoadReport bool `json:"enableOobLoadReport,omitempty"` + OOBReportingPeriod internalserviceconfig.Duration `json:"oobReportingPeriod,omitempty"` + BlackoutPeriod internalserviceconfig.Duration `json:"blackoutPeriod,omitempty"` + WeightExpirationPeriod internalserviceconfig.Duration `json:"weightExpirationPeriod,omitempty"` + WeightUpdatePeriod internalserviceconfig.Duration `json:"weightUpdatePeriod,omitempty"` + ErrorUtilizationPenalty float64 `json:"errorUtilizationPenalty,omitempty"` +} + +func makeBalancerConfigJSON(name string, value json.RawMessage) []byte { + return []byte(fmt.Sprintf(`[{%q: %s}]`, name, value)) +} diff --git a/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go b/xds/internal/xdslbregistry/converter_test.go similarity index 87% rename from xds/internal/xdsclient/xdslbregistry/tests/converter_test.go rename to xds/internal/xdslbregistry/converter_test.go index c6d947d6bfde..9d418d9fe45a 100644 --- a/xds/internal/xdsclient/xdslbregistry/tests/converter_test.go +++ b/xds/internal/xdslbregistry/converter_test.go @@ -16,8 +16,8 @@ * */ -// Package tests_test contains test cases for the xDS LB Policy Registry. -package tests_test +// Package xdslbregistry_test contains test cases for the xDS LB Policy Registry. +package xdslbregistry_test import ( "encoding/json" @@ -46,7 +46,7 @@ import ( "google.golang.org/grpc/serviceconfig" "google.golang.org/grpc/xds/internal/balancer/ringhash" "google.golang.org/grpc/xds/internal/balancer/wrrlocality" - "google.golang.org/grpc/xds/internal/xdsclient/xdslbregistry" + "google.golang.org/grpc/xds/internal/xdslbregistry" "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/wrapperspb" ) @@ -63,8 +63,15 @@ type customLBConfig struct { serviceconfig.LoadBalancingConfig } -// We have these tests in a separate test package in order to not take a -// dependency on the internal xDS balancer packages within the xDS Client. +func wrrLocalityBalancerConfig(childPolicy *internalserviceconfig.BalancerConfig) *internalserviceconfig.BalancerConfig { + return &internalserviceconfig.BalancerConfig{ + Name: wrrlocality.Name, + Config: &wrrlocality.LBConfig{ + ChildPolicy: childPolicy, + }, + } +} + func (s) TestConvertToServiceConfigSuccess(t *testing.T) { const customLBPolicyName = "myorg.MyCustomLeastRequestPolicy" stub.Register(customLBPolicyName, stub.BalancerFuncs{ @@ -225,14 +232,9 @@ func (s) TestConvertToServiceConfigSuccess(t *testing.T) { }, }, }, - wantConfig: &internalserviceconfig.BalancerConfig{ - Name: wrrlocality.Name, - Config: &wrrlocality.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: "round_robin", - }, - }, - }, + wantConfig: wrrLocalityBalancerConfig(&internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }), }, { name: "wrr_locality_child_custom_lb_type_v3_struct", @@ -248,15 +250,25 @@ func (s) TestConvertToServiceConfigSuccess(t *testing.T) { }, }, }, - wantConfig: &internalserviceconfig.BalancerConfig{ - Name: wrrlocality.Name, - Config: &wrrlocality.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ - Name: "myorg.MyCustomLeastRequestPolicy", - Config: customLBConfig{}, + wantConfig: wrrLocalityBalancerConfig(&internalserviceconfig.BalancerConfig{ + Name: "myorg.MyCustomLeastRequestPolicy", + Config: customLBConfig{}, + }), + }, + { + name: "on-the-boundary-of-recursive-limit", + policy: &v3clusterpb.LoadBalancingPolicy{ + Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ + { + TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ + TypedConfig: wrrLocalityAny(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(&v3roundrobinpb.RoundRobin{}))))))))))))))), + }, }, }, }, + wantConfig: wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(wrrLocalityBalancerConfig(&internalserviceconfig.BalancerConfig{ + Name: "round_robin", + }))))))))))))))), }, } @@ -347,15 +359,13 @@ func (s) TestConvertToServiceConfigFailure(t *testing.T) { }, wantErr: "no supported policy found in policy list", }, - // TODO: test validity right on the boundary of recursion 16 layers - // total. { - name: "too much recursion", + name: "exceeds-boundary-of-recursive-limit-by-1", policy: &v3clusterpb.LoadBalancingPolicy{ Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ { TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ - TypedConfig: wrrLocalityAny(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(&v3roundrobinpb.RoundRobin{}))))))))))))))))))))))), + TypedConfig: wrrLocalityAny(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(wrrLocality(&v3roundrobinpb.RoundRobin{})))))))))))))))), }, }, }, From 5dcfb37c0b43586965ed1ffb86bc63d706bc2c4e Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Fri, 12 May 2023 14:09:59 -0700 Subject: [PATCH 37/60] interop: hold lock on server for OOB metrics updates; share 30s timeout (#6277) --- interop/interop_test.sh | 23 ++++++++++++++++++++++- interop/test_utils.go | 21 ++++++++++----------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/interop/interop_test.sh b/interop/interop_test.sh index 99e12c3c3829..65e12a16b6cd 100755 --- a/interop/interop_test.sh +++ b/interop/interop_test.sh @@ -45,6 +45,20 @@ pass () { echo "$(tput setaf 2) $(date): $1 $(tput sgr 0)" } +withTimeout () { + timer=$1 + shift + + # Run command in the background. + cmd=$(printf '%q ' "$@") + eval "$cmd" & + wpid=$! + # Kill after 20 seconds. + sleep $timer && kill $wpid & + # Wait for the background thread. + wait $wpid +} + # Don't run some tests that need a special environment: # "google_default_credentials" # "compute_engine_channel_credentials" @@ -70,6 +84,8 @@ CASES=( "custom_metadata" "unimplemented_method" "unimplemented_service" + "orca_per_rpc" + "orca_oob" ) # Build server @@ -96,7 +112,12 @@ for case in ${CASES[@]}; do echo "$(tput setaf 4) $(date): testing: ${case} $(tput sgr 0)" CLIENT_LOG="$(mktemp)" - if ! GRPC_GO_LOG_SEVERITY_LEVEL=info timeout 20 go run ./interop/client --use_tls --server_host_override=foo.test.google.fr --use_test_ca --test_case="${case}" &> $CLIENT_LOG; then + if ! GRPC_GO_LOG_SEVERITY_LEVEL=info withTimeout 20 go run ./interop/client \ + --use_tls \ + --server_host_override=foo.test.google.fr \ + --use_test_ca --test_case="${case}" \ + --service_config_json='{ "loadBalancingConfig": [{ "test_backend_metrics_load_balancer": {} }]}' \ + &> $CLIENT_LOG; then fail "FAIL: test case ${case} got server log: $(cat $SERVER_LOG) diff --git a/interop/test_utils.go b/interop/test_utils.go index 0057c071217a..29916876eeb4 100644 --- a/interop/test_utils.go +++ b/interop/test_utils.go @@ -30,6 +30,7 @@ import ( "io" "os" "strings" + "sync" "time" "github.com/golang/protobuf/proto" @@ -779,6 +780,7 @@ func DoSoakTest(tc testgrpc.TestServiceClient, serverAddr string, dopts []grpc.D type testServer struct { testgrpc.UnimplementedTestServiceServer + orcaMu sync.Mutex metricsRecorder orca.ServerMetricsRecorder } @@ -842,11 +844,6 @@ func (s *testServer) UnaryCall(ctx context.Context, in *testpb.SimpleRequest) (* // recorder in the context, if present. setORCAMetrics(r, orcaData) } - if r, orcaData := s.metricsRecorder, in.GetOrcaOobReport(); r != nil && orcaData != nil { - // Transfer the request's OOB ORCA data to the server metrics recorder - // in the server, if present. - setORCAMetrics(r, orcaData) - } return &testpb.SimpleResponse{ Payload: pl, }, nil @@ -912,6 +909,7 @@ func (s *testServer) FullDuplexCall(stream testgrpc.TestService_FullDuplexCallSe stream.SetTrailer(trailer) } } + hasORCALock := false for { in, err := stream.Recv() if err == io.EOF { @@ -929,6 +927,11 @@ func (s *testServer) FullDuplexCall(stream testgrpc.TestService_FullDuplexCallSe if r, orcaData := s.metricsRecorder, in.GetOrcaOobReport(); r != nil && orcaData != nil { // Transfer the request's OOB ORCA data to the server metrics recorder // in the server, if present. + if !hasORCALock { + s.orcaMu.Lock() + defer s.orcaMu.Unlock() + hasORCALock = true + } setORCAMetrics(r, orcaData) } @@ -1036,14 +1039,12 @@ func DoORCAOOBTest(tc testgrpc.TestServiceClient) { logger.Fatalf("/TestService/FullDuplexCall received error receiving: %v", err) } - ctx2, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() want := &v3orcapb.OrcaLoadReport{ CpuUtilization: 0.8210, MemUtilization: 0.5847, Utilization: map[string]float64{"util": 0.30499}, } - checkORCAMetrics(ctx2, tc, want) + checkORCAMetrics(ctx, tc, want) err = stream.Send(&testpb.StreamingOutputCallRequest{ OrcaOobReport: &testpb.TestOrcaReport{ @@ -1061,14 +1062,12 @@ func DoORCAOOBTest(tc testgrpc.TestServiceClient) { logger.Fatalf("/TestService/FullDuplexCall received error receiving: %v", err) } - ctx3, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() want = &v3orcapb.OrcaLoadReport{ CpuUtilization: 0.29309, MemUtilization: 0.2, Utilization: map[string]float64{"util": 0.2039}, } - checkORCAMetrics(ctx3, tc, want) + checkORCAMetrics(ctx, tc, want) } func checkORCAMetrics(ctx context.Context, tc testgrpc.TestServiceClient, want *v3orcapb.OrcaLoadReport) { From 0bdae480582d74b0b7851e7ebc6dbe411bf7e5c3 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 15 May 2023 14:40:35 -0700 Subject: [PATCH 38/60] interop: fix interop_test.sh shutdown (#6279) --- interop/interop_test.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/interop/interop_test.sh b/interop/interop_test.sh index 65e12a16b6cd..7fc290a12c6b 100755 --- a/interop/interop_test.sh +++ b/interop/interop_test.sh @@ -53,10 +53,16 @@ withTimeout () { cmd=$(printf '%q ' "$@") eval "$cmd" & wpid=$! - # Kill after 20 seconds. + # Kill after $timer seconds. sleep $timer && kill $wpid & + kpid=$! # Wait for the background thread. wait $wpid + res=$? + # Kill the killer pid in case it's still running. + kill $kpid || true + wait $kpid || true + return $res } # Don't run some tests that need a special environment: From 1230f0e43c314798e5ce007526737160b447504f Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Mon, 15 May 2023 18:19:18 -0400 Subject: [PATCH 39/60] xds/internal/xdsclient: Split registry up and two separate packages (#6278) --- .../xdslbregistry/converter}/converter.go | 71 +++------------- .../xdsclient/xdslbregistry/xdslbregistry.go | 85 +++++++++++++++++++ .../xdslbregistry/xdslbregistry_test.go} | 27 +++--- .../xdsresource/tests/unmarshal_cds_test.go | 1 + .../xdsclient/xdsresource/unmarshal_cds.go | 4 +- xds/xds.go | 15 ++-- 6 files changed, 120 insertions(+), 83 deletions(-) rename xds/internal/{xdslbregistry => xdsclient/xdslbregistry/converter}/converter.go (70%) create mode 100644 xds/internal/xdsclient/xdslbregistry/xdslbregistry.go rename xds/internal/{xdslbregistry/converter_test.go => xdsclient/xdslbregistry/xdslbregistry_test.go} (98%) diff --git a/xds/internal/xdslbregistry/converter.go b/xds/internal/xdsclient/xdslbregistry/converter/converter.go similarity index 70% rename from xds/internal/xdslbregistry/converter.go rename to xds/internal/xdsclient/xdslbregistry/converter/converter.go index c154518731bb..27dc6533087b 100644 --- a/xds/internal/xdslbregistry/converter.go +++ b/xds/internal/xdsclient/xdslbregistry/converter/converter.go @@ -16,11 +16,11 @@ * */ -// Package xdslbregistry provides utilities to convert proto load balancing +// Package converter provides converters to convert proto load balancing // configuration, defined by the xDS API spec, to JSON load balancing // configuration. These converters are registered by proto type in a registry, // which gets pulled from based off proto type passed in. -package xdslbregistry +package converter import ( "encoding/json" @@ -34,81 +34,30 @@ import ( internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/xds/internal/balancer/ringhash" "google.golang.org/grpc/xds/internal/balancer/wrrlocality" + "google.golang.org/grpc/xds/internal/xdsclient/xdslbregistry" v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" - v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" v3clientsideweightedroundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/client_side_weighted_round_robin/v3" v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" structpb "github.com/golang/protobuf/ptypes/struct" ) -var ( - // m is a map from proto type to converter. - m = make(map[string]converter) -) - func init() { - // Construct map here to avoid an initialization cycle. - m = map[string]converter{ - "type.googleapis.com/envoy.extensions.load_balancing_policies.ring_hash.v3.RingHash": convertRingHashProtoToServiceConfig, - "type.googleapis.com/envoy.extensions.load_balancing_policies.round_robin.v3.RoundRobin": convertRoundRobinProtoToServiceConfig, - "type.googleapis.com/envoy.extensions.load_balancing_policies.wrr_locality.v3.WrrLocality": convertWRRLocalityProtoToServiceConfig, - "type.googleapis.com/envoy.extensions.load_balancing_policies.client_side_weighted_round_robin.v3.ClientSideWeightedRoundRobin": convertWeightedRoundRobinProtoToServiceConfig, - "type.googleapis.com/xds.type.v3.TypedStruct": convertV3TypedStructToServiceConfig, - "type.googleapis.com/udpa.type.v1.TypedStruct": convertV1TypedStructToServiceConfig, - } + xdslbregistry.Register("type.googleapis.com/envoy.extensions.load_balancing_policies.ring_hash.v3.RingHash", convertRingHashProtoToServiceConfig) + xdslbregistry.Register("type.googleapis.com/envoy.extensions.load_balancing_policies.round_robin.v3.RoundRobin", convertRoundRobinProtoToServiceConfig) + xdslbregistry.Register("type.googleapis.com/envoy.extensions.load_balancing_policies.wrr_locality.v3.WrrLocality", convertWRRLocalityProtoToServiceConfig) + xdslbregistry.Register("type.googleapis.com/envoy.extensions.load_balancing_policies.client_side_weighted_round_robin.v3.ClientSideWeightedRoundRobin", convertWeightedRoundRobinProtoToServiceConfig) + xdslbregistry.Register("type.googleapis.com/xds.type.v3.TypedStruct", convertV3TypedStructToServiceConfig) + xdslbregistry.Register("type.googleapis.com/udpa.type.v1.TypedStruct", convertV1TypedStructToServiceConfig) } -// converter converts raw proto bytes into the internal Go JSON representation -// of the proto passed. Returns the json message, and an error. If both -// returned are nil, it represents continuing to the next proto. -type converter func([]byte, int) (json.RawMessage, error) - const ( defaultRingHashMinSize = 1024 defaultRingHashMaxSize = 8 * 1024 * 1024 // 8M ) -// ConvertToServiceConfig converts a proto Load Balancing Policy configuration -// into a json string. Returns an error if: -// - no supported policy found -// - there is more than 16 layers of recursion in the configuration -// - a failure occurs when converting the policy -func ConvertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy) (json.RawMessage, error) { - return convertToServiceConfig(lbPolicy, 0) -} - -func convertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy, depth int) (json.RawMessage, error) { - // "Configurations that require more than 16 levels of recursion are - // considered invalid and should result in a NACK response." - A51 - if depth > 15 { - return nil, fmt.Errorf("lb policy %v exceeds max depth supported: 16 layers", lbPolicy) - } - - // "This function iterate over the list of policy messages in - // LoadBalancingPolicy, attempting to convert each one to gRPC form, - // stopping at the first supported policy." - A52 - for _, policy := range lbPolicy.GetPolicies() { - policy.GetTypedExtensionConfig().GetTypedConfig().GetTypeUrl() - converter := m[policy.GetTypedExtensionConfig().GetTypedConfig().GetTypeUrl()] - // "Any entry not in the above list is unsupported and will be skipped." - // - A52 - // This includes Least Request as well, since grpc-go does not support - // the Least Request Load Balancing Policy. - if converter == nil { - continue - } - json, err := converter(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), depth) - if json == nil && err == nil { - continue - } - return json, err - } - return nil, fmt.Errorf("no supported policy found in policy list +%v", lbPolicy) -} - func convertRingHashProtoToServiceConfig(rawProto []byte, depth int) (json.RawMessage, error) { if !envconfig.XDSRingHash { return nil, nil @@ -154,7 +103,7 @@ func convertWRRLocalityProtoToServiceConfig(rawProto []byte, depth int) (json.Ra if err := proto.Unmarshal(rawProto, wrrlProto); err != nil { return nil, fmt.Errorf("failed to unmarshal resource: %v", err) } - epJSON, err := convertToServiceConfig(wrrlProto.GetEndpointPickingPolicy(), depth+1) + epJSON, err := xdslbregistry.ConvertToServiceConfig(wrrlProto.GetEndpointPickingPolicy(), depth+1) if err != nil { return nil, fmt.Errorf("error converting endpoint picking policy: %v for %+v", err, wrrlProto) } diff --git a/xds/internal/xdsclient/xdslbregistry/xdslbregistry.go b/xds/internal/xdsclient/xdslbregistry/xdslbregistry.go new file mode 100644 index 000000000000..0f3d1df4db20 --- /dev/null +++ b/xds/internal/xdsclient/xdslbregistry/xdslbregistry.go @@ -0,0 +1,85 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package xdslbregistry provides a registry of converters that convert proto +// from load balancing configuration, defined by the xDS API spec, to JSON load +// balancing configuration. +package xdslbregistry + +import ( + "encoding/json" + "fmt" + + v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" +) + +var ( + // m is a map from proto type to Converter. + m = make(map[string]Converter) +) + +// Register registers the converter to the map keyed on a proto type. Must be +// called at init time. Not thread safe. +func Register(protoType string, c Converter) { + m[protoType] = c +} + +// SetRegistry sets the xDS LB registry. Must be called at init time. Not thread +// safe. +func SetRegistry(registry map[string]Converter) { + m = registry +} + +// Converter converts raw proto bytes into the internal Go JSON representation +// of the proto passed. Returns the json message, and an error. If both +// returned are nil, it represents continuing to the next proto. +type Converter func([]byte, int) (json.RawMessage, error) + +// ConvertToServiceConfig converts a proto Load Balancing Policy configuration +// into a json string. Returns an error if: +// - no supported policy found +// - there is more than 16 layers of recursion in the configuration +// - a failure occurs when converting the policy +func ConvertToServiceConfig(lbPolicy *v3clusterpb.LoadBalancingPolicy, depth int) (json.RawMessage, error) { + // "Configurations that require more than 16 levels of recursion are + // considered invalid and should result in a NACK response." - A51 + if depth > 15 { + return nil, fmt.Errorf("lb policy %v exceeds max depth supported: 16 layers", lbPolicy) + } + + // "This function iterate over the list of policy messages in + // LoadBalancingPolicy, attempting to convert each one to gRPC form, + // stopping at the first supported policy." - A52 + for _, policy := range lbPolicy.GetPolicies() { + policy.GetTypedExtensionConfig().GetTypedConfig().GetTypeUrl() + converter := m[policy.GetTypedExtensionConfig().GetTypedConfig().GetTypeUrl()] + // "Any entry not in the above list is unsupported and will be skipped." + // - A52 + // This includes Least Request as well, since grpc-go does not support + // the Least Request Load Balancing Policy. + if converter == nil { + continue + } + json, err := converter(policy.GetTypedExtensionConfig().GetTypedConfig().GetValue(), depth) + if json == nil && err == nil { + continue + } + return json, err + } + return nil, fmt.Errorf("no supported policy found in policy list +%v", lbPolicy) +} diff --git a/xds/internal/xdslbregistry/converter_test.go b/xds/internal/xdsclient/xdslbregistry/xdslbregistry_test.go similarity index 98% rename from xds/internal/xdslbregistry/converter_test.go rename to xds/internal/xdsclient/xdslbregistry/xdslbregistry_test.go index 9d418d9fe45a..b3f19c2e5953 100644 --- a/xds/internal/xdslbregistry/converter_test.go +++ b/xds/internal/xdsclient/xdslbregistry/xdslbregistry_test.go @@ -24,18 +24,8 @@ import ( "strings" "testing" - v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" - v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" - v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" - v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" - v3leastrequestpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/least_request/v3" - v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" - v3roundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/round_robin/v3" - v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" "github.com/golang/protobuf/proto" - structpb "github.com/golang/protobuf/ptypes/struct" "github.com/google/go-cmp/cmp" - _ "google.golang.org/grpc/balancer/roundrobin" "google.golang.org/grpc/internal/balancer/stub" "google.golang.org/grpc/internal/envconfig" @@ -44,11 +34,22 @@ import ( internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/serviceconfig" + _ "google.golang.org/grpc/xds" // Register the xDS LB Registry Converters. "google.golang.org/grpc/xds/internal/balancer/ringhash" "google.golang.org/grpc/xds/internal/balancer/wrrlocality" - "google.golang.org/grpc/xds/internal/xdslbregistry" + "google.golang.org/grpc/xds/internal/xdsclient/xdslbregistry" "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/wrapperspb" + + v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" + v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" + v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + v3leastrequestpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/least_request/v3" + v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" + v3roundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/round_robin/v3" + v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" + structpb "github.com/golang/protobuf/ptypes/struct" ) type s struct { @@ -281,7 +282,7 @@ func (s) TestConvertToServiceConfigSuccess(t *testing.T) { envconfig.XDSRingHash = oldRingHashSupport }() } - rawJSON, err := xdslbregistry.ConvertToServiceConfig(test.policy) + rawJSON, err := xdslbregistry.ConvertToServiceConfig(test.policy, 0) if err != nil { t.Fatalf("ConvertToServiceConfig(%s) failed: %v", pretty.ToJSON(test.policy), err) } @@ -376,7 +377,7 @@ func (s) TestConvertToServiceConfigFailure(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - _, gotErr := xdslbregistry.ConvertToServiceConfig(test.policy) + _, gotErr := xdslbregistry.ConvertToServiceConfig(test.policy, 0) // Test the error substring to test the different root causes of // errors. This is more brittle over time, but it's important to // test the root cause of the errors emitted from the diff --git a/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go b/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go index 96ad204ad4b3..afa418815a0b 100644 --- a/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go +++ b/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go @@ -32,6 +32,7 @@ import ( internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/serviceconfig" + _ "google.golang.org/grpc/xds" // Register the xDS LB Registry Converters. "google.golang.org/grpc/xds/internal/balancer/ringhash" "google.golang.org/grpc/xds/internal/balancer/wrrlocality" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go index 8ec1dfda3fe3..d07ad2ea1aee 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go @@ -35,8 +35,8 @@ import ( "google.golang.org/grpc/internal/pretty" internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/internal/xds/matcher" + "google.golang.org/grpc/xds/internal/xdsclient/xdslbregistry" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" - "google.golang.org/grpc/xds/internal/xdslbregistry" "google.golang.org/protobuf/types/known/anypb" ) @@ -127,7 +127,7 @@ func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (Clu } if cluster.GetLoadBalancingPolicy() != nil && envconfig.XDSCustomLBPolicy { - lbPolicy, err = xdslbregistry.ConvertToServiceConfig(cluster.GetLoadBalancingPolicy()) + lbPolicy, err = xdslbregistry.ConvertToServiceConfig(cluster.GetLoadBalancingPolicy(), 0) if err != nil { return ClusterUpdate{}, fmt.Errorf("error converting LoadBalancingPolicy %v in response: %+v: %v", cluster.GetLoadBalancingPolicy(), cluster, err) } diff --git a/xds/xds.go b/xds/xds.go index 8e6def6014a7..bd6ed9c90f13 100644 --- a/xds/xds.go +++ b/xds/xds.go @@ -36,13 +36,14 @@ import ( "google.golang.org/grpc/resolver" "google.golang.org/grpc/xds/csds" - _ "google.golang.org/grpc/credentials/tls/certprovider/pemfile" // Register the file watcher certificate provider plugin. - _ "google.golang.org/grpc/xds/internal/balancer" // Register the balancers. - _ "google.golang.org/grpc/xds/internal/clusterspecifier/rls" // Register the RLS cluster specifier plugin. Note that this does not register the RLS LB policy. - _ "google.golang.org/grpc/xds/internal/httpfilter/fault" // Register the fault injection filter. - _ "google.golang.org/grpc/xds/internal/httpfilter/rbac" // Register the RBAC filter. - _ "google.golang.org/grpc/xds/internal/httpfilter/router" // Register the router filter. - _ "google.golang.org/grpc/xds/internal/resolver" // Register the xds_resolver + _ "google.golang.org/grpc/credentials/tls/certprovider/pemfile" // Register the file watcher certificate provider plugin. + _ "google.golang.org/grpc/xds/internal/balancer" // Register the balancers. + _ "google.golang.org/grpc/xds/internal/clusterspecifier/rls" // Register the RLS cluster specifier plugin. Note that this does not register the RLS LB policy. + _ "google.golang.org/grpc/xds/internal/httpfilter/fault" // Register the fault injection filter. + _ "google.golang.org/grpc/xds/internal/httpfilter/rbac" // Register the RBAC filter. + _ "google.golang.org/grpc/xds/internal/httpfilter/router" // Register the router filter. + _ "google.golang.org/grpc/xds/internal/resolver" // Register the xds_resolver. + _ "google.golang.org/grpc/xds/internal/xdsclient/xdslbregistry/converter" // Register the xDS LB Registry Converters. v3statusgrpc "github.com/envoyproxy/go-control-plane/envoy/service/status/v3" ) From 4eb88d7d67c84177572ca435ed58aef878ae2d50 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 15 May 2023 15:48:02 -0700 Subject: [PATCH 40/60] cleanup: use new Duration type in base ServiceConfig (#6284) --- service_config.go | 75 +++++------------------------------------- service_config_test.go | 51 ---------------------------- 2 files changed, 8 insertions(+), 118 deletions(-) diff --git a/service_config.go b/service_config.go index f22acace4253..0df11fc09882 100644 --- a/service_config.go +++ b/service_config.go @@ -23,8 +23,6 @@ import ( "errors" "fmt" "reflect" - "strconv" - "strings" "time" "google.golang.org/grpc/codes" @@ -106,8 +104,8 @@ type healthCheckConfig struct { type jsonRetryPolicy struct { MaxAttempts int - InitialBackoff string - MaxBackoff string + InitialBackoff internalserviceconfig.Duration + MaxBackoff internalserviceconfig.Duration BackoffMultiplier float64 RetryableStatusCodes []codes.Code } @@ -129,50 +127,6 @@ type retryThrottlingPolicy struct { TokenRatio float64 } -func parseDuration(s *string) (*time.Duration, error) { - if s == nil { - return nil, nil - } - if !strings.HasSuffix(*s, "s") { - return nil, fmt.Errorf("malformed duration %q", *s) - } - ss := strings.SplitN((*s)[:len(*s)-1], ".", 3) - if len(ss) > 2 { - return nil, fmt.Errorf("malformed duration %q", *s) - } - // hasDigits is set if either the whole or fractional part of the number is - // present, since both are optional but one is required. - hasDigits := false - var d time.Duration - if len(ss[0]) > 0 { - i, err := strconv.ParseInt(ss[0], 10, 32) - if err != nil { - return nil, fmt.Errorf("malformed duration %q: %v", *s, err) - } - d = time.Duration(i) * time.Second - hasDigits = true - } - if len(ss) == 2 && len(ss[1]) > 0 { - if len(ss[1]) > 9 { - return nil, fmt.Errorf("malformed duration %q", *s) - } - f, err := strconv.ParseInt(ss[1], 10, 64) - if err != nil { - return nil, fmt.Errorf("malformed duration %q: %v", *s, err) - } - for i := 9; i > len(ss[1]); i-- { - f *= 10 - } - d += time.Duration(f) - hasDigits = true - } - if !hasDigits { - return nil, fmt.Errorf("malformed duration %q", *s) - } - - return &d, nil -} - type jsonName struct { Service string Method string @@ -201,7 +155,7 @@ func (j jsonName) generatePath() (string, error) { type jsonMC struct { Name *[]jsonName WaitForReady *bool - Timeout *string + Timeout *internalserviceconfig.Duration MaxRequestMessageBytes *int64 MaxResponseMessageBytes *int64 RetryPolicy *jsonRetryPolicy @@ -252,15 +206,10 @@ func parseServiceConfig(js string) *serviceconfig.ParseResult { if m.Name == nil { continue } - d, err := parseDuration(m.Timeout) - if err != nil { - logger.Warningf("grpc: unmarshaling service config %s: %v", js, err) - return &serviceconfig.ParseResult{Err: err} - } mc := MethodConfig{ WaitForReady: m.WaitForReady, - Timeout: d, + Timeout: (*time.Duration)(m.Timeout), } if mc.RetryPolicy, err = convertRetryPolicy(m.RetryPolicy); err != nil { logger.Warningf("grpc: unmarshaling service config %s: %v", js, err) @@ -312,18 +261,10 @@ func convertRetryPolicy(jrp *jsonRetryPolicy) (p *internalserviceconfig.RetryPol if jrp == nil { return nil, nil } - ib, err := parseDuration(&jrp.InitialBackoff) - if err != nil { - return nil, err - } - mb, err := parseDuration(&jrp.MaxBackoff) - if err != nil { - return nil, err - } if jrp.MaxAttempts <= 1 || - *ib <= 0 || - *mb <= 0 || + jrp.InitialBackoff <= 0 || + jrp.MaxBackoff <= 0 || jrp.BackoffMultiplier <= 0 || len(jrp.RetryableStatusCodes) == 0 { logger.Warningf("grpc: ignoring retry policy %v due to illegal configuration", jrp) @@ -332,8 +273,8 @@ func convertRetryPolicy(jrp *jsonRetryPolicy) (p *internalserviceconfig.RetryPol rp := &internalserviceconfig.RetryPolicy{ MaxAttempts: jrp.MaxAttempts, - InitialBackoff: *ib, - MaxBackoff: *mb, + InitialBackoff: time.Duration(jrp.InitialBackoff), + MaxBackoff: time.Duration(jrp.MaxBackoff), BackoffMultiplier: jrp.BackoffMultiplier, RetryableStatusCodes: make(map[codes.Code]bool), } diff --git a/service_config_test.go b/service_config_test.go index b3c6988e8d97..90ed40a68021 100644 --- a/service_config_test.go +++ b/service_config_test.go @@ -20,8 +20,6 @@ package grpc import ( "encoding/json" - "fmt" - "math" "reflect" "testing" "time" @@ -449,55 +447,6 @@ func (s) TestParseMethodConfigDuplicatedName(t *testing.T) { }) } -func (s) TestParseDuration(t *testing.T) { - testCases := []struct { - s *string - want *time.Duration - err bool - }{ - {s: nil, want: nil}, - {s: newString("1s"), want: newDuration(time.Second)}, - {s: newString("-1s"), want: newDuration(-time.Second)}, - {s: newString("1.1s"), want: newDuration(1100 * time.Millisecond)}, - {s: newString("1.s"), want: newDuration(time.Second)}, - {s: newString("1.0s"), want: newDuration(time.Second)}, - {s: newString(".002s"), want: newDuration(2 * time.Millisecond)}, - {s: newString(".002000s"), want: newDuration(2 * time.Millisecond)}, - {s: newString("0.003s"), want: newDuration(3 * time.Millisecond)}, - {s: newString("0.000004s"), want: newDuration(4 * time.Microsecond)}, - {s: newString("5000.000000009s"), want: newDuration(5000*time.Second + 9*time.Nanosecond)}, - {s: newString("4999.999999999s"), want: newDuration(5000*time.Second - time.Nanosecond)}, - {s: newString("1"), err: true}, - {s: newString("s"), err: true}, - {s: newString(".s"), err: true}, - {s: newString("1 s"), err: true}, - {s: newString(" 1s"), err: true}, - {s: newString("1ms"), err: true}, - {s: newString("1.1.1s"), err: true}, - {s: newString("Xs"), err: true}, - {s: newString("as"), err: true}, - {s: newString(".0000000001s"), err: true}, - {s: newString(fmt.Sprint(math.MaxInt32) + "s"), want: newDuration(math.MaxInt32 * time.Second)}, - {s: newString(fmt.Sprint(int64(math.MaxInt32)+1) + "s"), err: true}, - } - for _, tc := range testCases { - got, err := parseDuration(tc.s) - if tc.err != (err != nil) || - (got == nil) != (tc.want == nil) || - (got != nil && *got != *tc.want) { - wantErr := "" - if tc.err { - wantErr = "" - } - s := "" - if tc.s != nil { - s = `&"` + *tc.s + `"` - } - t.Errorf("parseDuration(%v) = %v, %v; want %v, %v", s, got, err, tc.want, wantErr) - } - } -} - func newBool(b bool) *bool { return &b } From 24fd25216321976ff8428880d837e3d8fcaac8d2 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 15 May 2023 15:49:07 -0700 Subject: [PATCH 41/60] proto: update generated code to match grpc-proto changes (#6283) --- interop/grpc_testing/messages.pb.go | 579 ++++++++++++++-------------- 1 file changed, 282 insertions(+), 297 deletions(-) diff --git a/interop/grpc_testing/messages.pb.go b/interop/grpc_testing/messages.pb.go index 21d7854de481..ccc27a936697 100644 --- a/interop/grpc_testing/messages.pb.go +++ b/interop/grpc_testing/messages.pb.go @@ -384,8 +384,6 @@ type SimpleRequest struct { FillGrpclbRouteType bool `protobuf:"varint,10,opt,name=fill_grpclb_route_type,json=fillGrpclbRouteType,proto3" json:"fill_grpclb_route_type,omitempty"` // If set the server should record this metrics report data for the current RPC. OrcaPerQueryReport *TestOrcaReport `protobuf:"bytes,11,opt,name=orca_per_query_report,json=orcaPerQueryReport,proto3" json:"orca_per_query_report,omitempty"` - // If set the server should update this metrics report data at the OOB server. - OrcaOobReport *TestOrcaReport `protobuf:"bytes,12,opt,name=orca_oob_report,json=orcaOobReport,proto3" json:"orca_oob_report,omitempty"` } func (x *SimpleRequest) Reset() { @@ -497,13 +495,6 @@ func (x *SimpleRequest) GetOrcaPerQueryReport() *TestOrcaReport { return nil } -func (x *SimpleRequest) GetOrcaOobReport() *TestOrcaReport { - if x != nil { - return x.OrcaOobReport - } - return nil -} - // Unary response, as configured by the request. type SimpleResponse struct { state protoimpl.MessageState @@ -1642,7 +1633,7 @@ var file_grpc_testing_messages_proto_rawDesc = []byte{ 0x45, 0x63, 0x68, 0x6f, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xb9, 0x05, 0x0a, 0x0d, 0x53, 0x69, 0x6d, + 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xf3, 0x04, 0x0a, 0x0d, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3e, 0x0a, 0x0d, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, @@ -1681,268 +1672,263 @@ var file_grpc_testing_messages_proto_rawDesc = []byte{ 0x72, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x54, 0x65, 0x73, 0x74, 0x4f, 0x72, 0x63, 0x61, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x52, 0x12, 0x6f, 0x72, 0x63, 0x61, - 0x50, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x12, 0x44, - 0x0a, 0x0f, 0x6f, 0x72, 0x63, 0x61, 0x5f, 0x6f, 0x6f, 0x62, 0x5f, 0x72, 0x65, 0x70, 0x6f, 0x72, - 0x74, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, - 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x54, 0x65, 0x73, 0x74, 0x4f, 0x72, 0x63, 0x61, 0x52, - 0x65, 0x70, 0x6f, 0x72, 0x74, 0x52, 0x0d, 0x6f, 0x72, 0x63, 0x61, 0x4f, 0x6f, 0x62, 0x52, 0x65, - 0x70, 0x6f, 0x72, 0x74, 0x22, 0x82, 0x02, 0x0a, 0x0e, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2f, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, - 0x61, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, - 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x52, - 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x75, 0x73, 0x65, 0x72, - 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x75, 0x73, 0x65, 0x72, - 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x6f, 0x61, 0x75, 0x74, 0x68, 0x5f, 0x73, 0x63, - 0x6f, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x6f, 0x61, 0x75, 0x74, 0x68, - 0x53, 0x63, 0x6f, 0x70, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, - 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, - 0x49, 0x64, 0x12, 0x49, 0x0a, 0x11, 0x67, 0x72, 0x70, 0x63, 0x6c, 0x62, 0x5f, 0x72, 0x6f, 0x75, - 0x74, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, - 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x72, 0x70, - 0x63, 0x6c, 0x62, 0x52, 0x6f, 0x75, 0x74, 0x65, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0f, 0x67, 0x72, - 0x70, 0x63, 0x6c, 0x62, 0x52, 0x6f, 0x75, 0x74, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1a, 0x0a, - 0x08, 0x68, 0x6f, 0x73, 0x74, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x08, 0x68, 0x6f, 0x73, 0x74, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x92, 0x01, 0x0a, 0x19, 0x53, 0x74, - 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x43, 0x61, 0x6c, 0x6c, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x2f, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, - 0x61, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, - 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x52, - 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x44, 0x0a, 0x11, 0x65, 0x78, 0x70, 0x65, - 0x63, 0x74, 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, - 0x6e, 0x67, 0x2e, 0x42, 0x6f, 0x6f, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x10, 0x65, 0x78, - 0x70, 0x65, 0x63, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x22, 0x54, - 0x0a, 0x1a, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x70, 0x75, 0x74, - 0x43, 0x61, 0x6c, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x36, 0x0a, 0x17, - 0x61, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x70, 0x61, 0x79, 0x6c, 0x6f, - 0x61, 0x64, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x15, 0x61, - 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74, 0x65, 0x64, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, - 0x53, 0x69, 0x7a, 0x65, 0x22, 0x82, 0x01, 0x0a, 0x12, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x73, - 0x69, 0x7a, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x12, - 0x1f, 0x0a, 0x0b, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x76, 0x61, 0x6c, 0x5f, 0x75, 0x73, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x76, 0x61, 0x6c, 0x55, 0x73, - 0x12, 0x37, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, - 0x69, 0x6e, 0x67, 0x2e, 0x42, 0x6f, 0x6f, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x0a, 0x63, - 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x22, 0xe9, 0x02, 0x0a, 0x1a, 0x53, 0x74, - 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x43, 0x61, 0x6c, - 0x6c, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3e, 0x0a, 0x0d, 0x72, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x19, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x50, - 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0c, 0x72, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x51, 0x0a, 0x13, 0x72, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x18, - 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, - 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x50, 0x61, 0x72, - 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x52, 0x12, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x2f, 0x0a, 0x07, 0x70, - 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x67, - 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x50, 0x61, 0x79, 0x6c, - 0x6f, 0x61, 0x64, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x41, 0x0a, 0x0f, - 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, - 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, - 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x45, 0x63, 0x68, 0x6f, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, - 0x0e, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, - 0x44, 0x0a, 0x0f, 0x6f, 0x72, 0x63, 0x61, 0x5f, 0x6f, 0x6f, 0x62, 0x5f, 0x72, 0x65, 0x70, 0x6f, - 0x72, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, - 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x54, 0x65, 0x73, 0x74, 0x4f, 0x72, 0x63, 0x61, - 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x52, 0x0d, 0x6f, 0x72, 0x63, 0x61, 0x4f, 0x6f, 0x62, 0x52, - 0x65, 0x70, 0x6f, 0x72, 0x74, 0x22, 0x4e, 0x0a, 0x1b, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, - 0x6e, 0x67, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x43, 0x61, 0x6c, 0x6c, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2f, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, - 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x07, 0x70, 0x61, - 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22, 0x4a, 0x0a, 0x0f, 0x52, 0x65, 0x63, 0x6f, 0x6e, 0x6e, 0x65, - 0x63, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x12, 0x37, 0x0a, 0x18, 0x6d, 0x61, 0x78, 0x5f, - 0x72, 0x65, 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x5f, 0x62, 0x61, 0x63, 0x6b, 0x6f, 0x66, - 0x66, 0x5f, 0x6d, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x15, 0x6d, 0x61, 0x78, 0x52, - 0x65, 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x42, 0x61, 0x63, 0x6b, 0x6f, 0x66, 0x66, 0x4d, - 0x73, 0x22, 0x46, 0x0a, 0x0d, 0x52, 0x65, 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x49, 0x6e, - 0x66, 0x6f, 0x12, 0x16, 0x0a, 0x06, 0x70, 0x61, 0x73, 0x73, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x06, 0x70, 0x61, 0x73, 0x73, 0x65, 0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x62, 0x61, - 0x63, 0x6b, 0x6f, 0x66, 0x66, 0x5f, 0x6d, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x09, - 0x62, 0x61, 0x63, 0x6b, 0x6f, 0x66, 0x66, 0x4d, 0x73, 0x22, 0x56, 0x0a, 0x18, 0x4c, 0x6f, 0x61, + 0x50, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x22, 0x82, + 0x02, 0x0a, 0x0e, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x2f, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, + 0x67, 0x2e, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, + 0x61, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x75, 0x73, 0x65, 0x72, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x75, 0x73, 0x65, 0x72, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1f, + 0x0a, 0x0b, 0x6f, 0x61, 0x75, 0x74, 0x68, 0x5f, 0x73, 0x63, 0x6f, 0x70, 0x65, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0a, 0x6f, 0x61, 0x75, 0x74, 0x68, 0x53, 0x63, 0x6f, 0x70, 0x65, 0x12, + 0x1b, 0x0a, 0x09, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x08, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x49, 0x64, 0x12, 0x49, 0x0a, 0x11, + 0x67, 0x72, 0x70, 0x63, 0x6c, 0x62, 0x5f, 0x72, 0x6f, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x79, 0x70, + 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, + 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x72, 0x70, 0x63, 0x6c, 0x62, 0x52, 0x6f, 0x75, + 0x74, 0x65, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0f, 0x67, 0x72, 0x70, 0x63, 0x6c, 0x62, 0x52, 0x6f, + 0x75, 0x74, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x68, 0x6f, 0x73, 0x74, 0x6e, + 0x61, 0x6d, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x68, 0x6f, 0x73, 0x74, 0x6e, + 0x61, 0x6d, 0x65, 0x22, 0x92, 0x01, 0x0a, 0x19, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, + 0x67, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x43, 0x61, 0x6c, 0x6c, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x12, 0x2f, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, + 0x67, 0x2e, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, + 0x61, 0x64, 0x12, 0x44, 0x0a, 0x11, 0x65, 0x78, 0x70, 0x65, 0x63, 0x74, 0x5f, 0x63, 0x6f, 0x6d, + 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, + 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x42, 0x6f, 0x6f, + 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x10, 0x65, 0x78, 0x70, 0x65, 0x63, 0x74, 0x43, 0x6f, + 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x22, 0x54, 0x0a, 0x1a, 0x53, 0x74, 0x72, 0x65, + 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x43, 0x61, 0x6c, 0x6c, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x36, 0x0a, 0x17, 0x61, 0x67, 0x67, 0x72, 0x65, 0x67, + 0x61, 0x74, 0x65, 0x64, 0x5f, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x5f, 0x73, 0x69, 0x7a, + 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x15, 0x61, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, + 0x74, 0x65, 0x64, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x53, 0x69, 0x7a, 0x65, 0x22, 0x82, + 0x01, 0x0a, 0x12, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, + 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x76, 0x61, 0x6c, 0x5f, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, + 0x69, 0x6e, 0x74, 0x65, 0x72, 0x76, 0x61, 0x6c, 0x55, 0x73, 0x12, 0x37, 0x0a, 0x0a, 0x63, 0x6f, + 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, + 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x42, 0x6f, + 0x6f, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, + 0x73, 0x65, 0x64, 0x22, 0xe9, 0x02, 0x0a, 0x1a, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, + 0x67, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x43, 0x61, 0x6c, 0x6c, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x3e, 0x0a, 0x0d, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x67, 0x72, 0x70, 0x63, + 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, + 0x54, 0x79, 0x70, 0x65, 0x52, 0x0c, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x54, 0x79, + 0x70, 0x65, 0x12, 0x51, 0x0a, 0x13, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x5f, 0x70, + 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x20, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, + 0x73, 0x52, 0x12, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, + 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x2f, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, + 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x07, 0x70, + 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x41, 0x0a, 0x0f, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x18, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x45, + 0x63, 0x68, 0x6f, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x0e, 0x72, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x44, 0x0a, 0x0f, 0x6f, 0x72, 0x63, + 0x61, 0x5f, 0x6f, 0x6f, 0x62, 0x5f, 0x72, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x18, 0x08, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, + 0x67, 0x2e, 0x54, 0x65, 0x73, 0x74, 0x4f, 0x72, 0x63, 0x61, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, + 0x52, 0x0d, 0x6f, 0x72, 0x63, 0x61, 0x4f, 0x6f, 0x62, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x22, + 0x4e, 0x0a, 0x1b, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x4f, 0x75, 0x74, 0x70, + 0x75, 0x74, 0x43, 0x61, 0x6c, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2f, + 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x15, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x50, + 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22, + 0x4a, 0x0a, 0x0f, 0x52, 0x65, 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x50, 0x61, 0x72, 0x61, + 0x6d, 0x73, 0x12, 0x37, 0x0a, 0x18, 0x6d, 0x61, 0x78, 0x5f, 0x72, 0x65, 0x63, 0x6f, 0x6e, 0x6e, + 0x65, 0x63, 0x74, 0x5f, 0x62, 0x61, 0x63, 0x6b, 0x6f, 0x66, 0x66, 0x5f, 0x6d, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x15, 0x6d, 0x61, 0x78, 0x52, 0x65, 0x63, 0x6f, 0x6e, 0x6e, 0x65, + 0x63, 0x74, 0x42, 0x61, 0x63, 0x6b, 0x6f, 0x66, 0x66, 0x4d, 0x73, 0x22, 0x46, 0x0a, 0x0d, 0x52, + 0x65, 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x16, 0x0a, 0x06, + 0x70, 0x61, 0x73, 0x73, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, 0x70, 0x61, + 0x73, 0x73, 0x65, 0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x62, 0x61, 0x63, 0x6b, 0x6f, 0x66, 0x66, 0x5f, + 0x6d, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x09, 0x62, 0x61, 0x63, 0x6b, 0x6f, 0x66, + 0x66, 0x4d, 0x73, 0x22, 0x56, 0x0a, 0x18, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, + 0x63, 0x65, 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, + 0x19, 0x0a, 0x08, 0x6e, 0x75, 0x6d, 0x5f, 0x72, 0x70, 0x63, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x07, 0x6e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x74, 0x69, + 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x5f, 0x73, 0x65, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x0a, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x53, 0x65, 0x63, 0x22, 0xe2, 0x04, 0x0a, 0x19, + 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x53, 0x74, 0x61, 0x74, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x59, 0x0a, 0x0c, 0x72, 0x70, 0x63, + 0x73, 0x5f, 0x62, 0x79, 0x5f, 0x70, 0x65, 0x65, 0x72, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x37, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, + 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, + 0x65, 0x65, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x72, 0x70, 0x63, 0x73, 0x42, 0x79, + 0x50, 0x65, 0x65, 0x72, 0x12, 0x21, 0x0a, 0x0c, 0x6e, 0x75, 0x6d, 0x5f, 0x66, 0x61, 0x69, 0x6c, + 0x75, 0x72, 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x6e, 0x75, 0x6d, 0x46, + 0x61, 0x69, 0x6c, 0x75, 0x72, 0x65, 0x73, 0x12, 0x5f, 0x0a, 0x0e, 0x72, 0x70, 0x63, 0x73, 0x5f, + 0x62, 0x79, 0x5f, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x39, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, + 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x4d, + 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0c, 0x72, 0x70, 0x63, 0x73, + 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x1a, 0xb1, 0x01, 0x0a, 0x0a, 0x52, 0x70, 0x63, + 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, 0x12, 0x64, 0x0a, 0x0c, 0x72, 0x70, 0x63, 0x73, 0x5f, + 0x62, 0x79, 0x5f, 0x70, 0x65, 0x65, 0x72, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x42, 0x2e, + 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x6e, 0x75, 0x6d, 0x5f, 0x72, 0x70, 0x63, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x6e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, - 0x12, 0x1f, 0x0a, 0x0b, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x5f, 0x73, 0x65, 0x63, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x53, 0x65, - 0x63, 0x22, 0xe2, 0x04, 0x0a, 0x19, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, - 0x65, 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x59, 0x0a, 0x0c, 0x72, 0x70, 0x63, 0x73, 0x5f, 0x62, 0x79, 0x5f, 0x70, 0x65, 0x65, 0x72, 0x18, - 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x37, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, - 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, - 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, - 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, - 0x72, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, 0x12, 0x21, 0x0a, 0x0c, 0x6e, 0x75, - 0x6d, 0x5f, 0x66, 0x61, 0x69, 0x6c, 0x75, 0x72, 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x0b, 0x6e, 0x75, 0x6d, 0x46, 0x61, 0x69, 0x6c, 0x75, 0x72, 0x65, 0x73, 0x12, 0x5f, 0x0a, - 0x0e, 0x72, 0x70, 0x63, 0x73, 0x5f, 0x62, 0x79, 0x5f, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, - 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x39, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, - 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, - 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, + 0x72, 0x2e, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, 0x45, 0x6e, 0x74, 0x72, + 0x79, 0x52, 0x0a, 0x72, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, 0x1a, 0x3d, 0x0a, + 0x0f, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, + 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, + 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x3d, 0x0a, 0x0f, + 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, + 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, + 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x73, 0x0a, 0x11, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, - 0x52, 0x0c, 0x72, 0x70, 0x63, 0x73, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x1a, 0xb1, - 0x01, 0x0a, 0x0a, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, 0x12, 0x64, 0x0a, - 0x0c, 0x72, 0x70, 0x63, 0x73, 0x5f, 0x62, 0x79, 0x5f, 0x70, 0x65, 0x65, 0x72, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x42, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, - 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x53, - 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, 0x70, 0x63, - 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, 0x2e, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, - 0x65, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x72, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, - 0x65, 0x65, 0x72, 0x1a, 0x3d, 0x0a, 0x0f, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, - 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, - 0x38, 0x01, 0x1a, 0x3d, 0x0a, 0x0f, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, - 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, - 0x01, 0x1a, 0x73, 0x0a, 0x11, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, - 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x48, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x32, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, - 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, - 0x63, 0x65, 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x2e, 0x52, 0x70, 0x63, 0x73, 0x42, 0x79, 0x50, 0x65, 0x65, 0x72, 0x52, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x25, 0x0a, 0x23, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, - 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x65, - 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x86, 0x09, - 0x0a, 0x24, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, - 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x8e, 0x01, 0x0a, 0x1a, 0x6e, 0x75, 0x6d, 0x5f, 0x72, - 0x70, 0x63, 0x73, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x5f, 0x62, 0x79, 0x5f, 0x6d, - 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x4e, 0x2e, 0x67, 0x72, - 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, - 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, - 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, - 0x4e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x42, 0x79, - 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x42, 0x02, 0x18, 0x01, 0x52, - 0x16, 0x6e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x42, - 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x94, 0x01, 0x0a, 0x1c, 0x6e, 0x75, 0x6d, 0x5f, - 0x72, 0x70, 0x63, 0x73, 0x5f, 0x73, 0x75, 0x63, 0x63, 0x65, 0x65, 0x64, 0x65, 0x64, 0x5f, 0x62, - 0x79, 0x5f, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x50, - 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, - 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, - 0x6c, 0x61, 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x2e, 0x4e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x53, 0x75, 0x63, 0x63, 0x65, 0x65, - 0x64, 0x65, 0x64, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, - 0x42, 0x02, 0x18, 0x01, 0x52, 0x18, 0x6e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x53, 0x75, 0x63, - 0x63, 0x65, 0x65, 0x64, 0x65, 0x64, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x8b, - 0x01, 0x0a, 0x19, 0x6e, 0x75, 0x6d, 0x5f, 0x72, 0x70, 0x63, 0x73, 0x5f, 0x66, 0x61, 0x69, 0x6c, - 0x65, 0x64, 0x5f, 0x62, 0x79, 0x5f, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x4d, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, - 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, - 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x4e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x46, 0x61, - 0x69, 0x6c, 0x65, 0x64, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x42, 0x02, 0x18, 0x01, 0x52, 0x15, 0x6e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x46, 0x61, - 0x69, 0x6c, 0x65, 0x64, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x70, 0x0a, 0x10, - 0x73, 0x74, 0x61, 0x74, 0x73, 0x5f, 0x70, 0x65, 0x72, 0x5f, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, - 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x46, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, - 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, - 0x65, 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, - 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x73, - 0x50, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0e, - 0x73, 0x74, 0x61, 0x74, 0x73, 0x50, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x1a, 0x49, - 0x0a, 0x1b, 0x4e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, - 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, - 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, - 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x4b, 0x0a, 0x1d, 0x4e, 0x75, 0x6d, + 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, + 0x65, 0x79, 0x12, 0x48, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x32, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, + 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x53, 0x74, 0x61, + 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, 0x70, 0x63, 0x73, 0x42, + 0x79, 0x50, 0x65, 0x65, 0x72, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, + 0x22, 0x25, 0x0a, 0x23, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, + 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x86, 0x09, 0x0a, 0x24, 0x4c, 0x6f, 0x61, 0x64, + 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, + 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x8e, 0x01, 0x0a, 0x1a, 0x6e, 0x75, 0x6d, 0x5f, 0x72, 0x70, 0x63, 0x73, 0x5f, 0x73, 0x74, + 0x61, 0x72, 0x74, 0x65, 0x64, 0x5f, 0x62, 0x79, 0x5f, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x4e, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, + 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, + 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x4e, 0x75, 0x6d, 0x52, 0x70, 0x63, + 0x73, 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, + 0x45, 0x6e, 0x74, 0x72, 0x79, 0x42, 0x02, 0x18, 0x01, 0x52, 0x16, 0x6e, 0x75, 0x6d, 0x52, 0x70, + 0x63, 0x73, 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, + 0x64, 0x12, 0x94, 0x01, 0x0a, 0x1c, 0x6e, 0x75, 0x6d, 0x5f, 0x72, 0x70, 0x63, 0x73, 0x5f, 0x73, + 0x75, 0x63, 0x63, 0x65, 0x65, 0x64, 0x65, 0x64, 0x5f, 0x62, 0x79, 0x5f, 0x6d, 0x65, 0x74, 0x68, + 0x6f, 0x64, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x50, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, + 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, + 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x53, + 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x4e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x53, 0x75, 0x63, 0x63, 0x65, 0x65, 0x64, 0x65, 0x64, 0x42, 0x79, 0x4d, - 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, - 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x48, 0x0a, 0x1a, 0x4e, 0x75, 0x6d, 0x52, 0x70, 0x63, - 0x73, 0x46, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, + 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x42, 0x02, 0x18, 0x01, 0x52, 0x18, + 0x6e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x53, 0x75, 0x63, 0x63, 0x65, 0x65, 0x64, 0x65, 0x64, + 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x8b, 0x01, 0x0a, 0x19, 0x6e, 0x75, 0x6d, + 0x5f, 0x72, 0x70, 0x63, 0x73, 0x5f, 0x66, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x5f, 0x62, 0x79, 0x5f, + 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x4d, 0x2e, 0x67, + 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, + 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, + 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x2e, 0x4e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x46, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x42, 0x79, + 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x42, 0x02, 0x18, 0x01, 0x52, + 0x15, 0x6e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x46, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x42, 0x79, + 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x70, 0x0a, 0x10, 0x73, 0x74, 0x61, 0x74, 0x73, 0x5f, + 0x70, 0x65, 0x72, 0x5f, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x46, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, + 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, 0x63, 0x75, + 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x73, 0x50, 0x65, 0x72, 0x4d, 0x65, 0x74, + 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0e, 0x73, 0x74, 0x61, 0x74, 0x73, 0x50, + 0x65, 0x72, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x1a, 0x49, 0x0a, 0x1b, 0x4e, 0x75, 0x6d, 0x52, + 0x70, 0x63, 0x73, 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, + 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x1a, 0x4b, 0x0a, 0x1d, 0x4e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x53, 0x75, + 0x63, 0x63, 0x65, 0x65, 0x64, 0x65, 0x64, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, - 0x1a, 0xcf, 0x01, 0x0a, 0x0b, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, - 0x12, 0x21, 0x0a, 0x0c, 0x72, 0x70, 0x63, 0x73, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x72, 0x70, 0x63, 0x73, 0x53, 0x74, 0x61, 0x72, - 0x74, 0x65, 0x64, 0x12, 0x62, 0x0a, 0x06, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x4a, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, - 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, - 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x53, 0x74, - 0x61, 0x74, 0x73, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, - 0x06, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x1a, 0x39, 0x0a, 0x0b, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x05, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, - 0x38, 0x01, 0x1a, 0x81, 0x01, 0x0a, 0x13, 0x53, 0x74, 0x61, 0x74, 0x73, 0x50, 0x65, 0x72, 0x4d, - 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, - 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x54, 0x0a, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x3e, 0x2e, 0x67, 0x72, - 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, - 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, - 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, - 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xe9, 0x02, 0x0a, 0x16, 0x43, 0x6c, 0x69, 0x65, 0x6e, - 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x12, 0x42, 0x0a, 0x05, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0e, - 0x32, 0x2c, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, + 0x1a, 0x48, 0x0a, 0x1a, 0x4e, 0x75, 0x6d, 0x52, 0x70, 0x63, 0x73, 0x46, 0x61, 0x69, 0x6c, 0x65, + 0x64, 0x42, 0x79, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, + 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, + 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0xcf, 0x01, 0x0a, 0x0b, 0x4d, + 0x65, 0x74, 0x68, 0x6f, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x72, 0x70, + 0x63, 0x73, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x0b, 0x72, 0x70, 0x63, 0x73, 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x12, 0x62, 0x0a, + 0x06, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x4a, 0x2e, + 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, + 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, + 0x61, 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x2e, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x53, 0x74, 0x61, 0x74, 0x73, 0x2e, 0x52, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x72, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x1a, 0x39, 0x0a, 0x0b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x45, 0x6e, 0x74, 0x72, 0x79, + 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x6b, + 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x81, 0x01, 0x0a, + 0x13, 0x53, 0x74, 0x61, 0x74, 0x73, 0x50, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x54, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x3e, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, + 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x4c, 0x6f, 0x61, 0x64, 0x42, 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, + 0x72, 0x41, 0x63, 0x63, 0x75, 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, + 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, + 0x22, 0xe9, 0x02, 0x0a, 0x16, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, + 0x67, 0x75, 0x72, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x42, 0x0a, 0x05, 0x74, + 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0e, 0x32, 0x2c, 0x2e, 0x67, 0x72, 0x70, + 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, + 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x2e, 0x52, 0x70, 0x63, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, 0x74, 0x79, 0x70, 0x65, 0x73, 0x12, + 0x49, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x02, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x2d, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, + 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x1f, 0x0a, 0x0b, 0x74, 0x69, + 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x5f, 0x73, 0x65, 0x63, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x0a, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x53, 0x65, 0x63, 0x1a, 0x74, 0x0a, 0x08, 0x4d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x40, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2c, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, + 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, + 0x67, 0x75, 0x72, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x52, 0x70, 0x63, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x22, 0x29, 0x0a, 0x07, 0x52, 0x70, 0x63, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0e, 0x0a, 0x0a, + 0x45, 0x4d, 0x50, 0x54, 0x59, 0x5f, 0x43, 0x41, 0x4c, 0x4c, 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, + 0x55, 0x4e, 0x41, 0x52, 0x59, 0x5f, 0x43, 0x41, 0x4c, 0x4c, 0x10, 0x01, 0x22, 0x19, 0x0a, 0x17, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x52, 0x70, 0x63, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, - 0x74, 0x79, 0x70, 0x65, 0x73, 0x12, 0x49, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, - 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, - 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x4d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x12, 0x1f, 0x0a, 0x0b, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x5f, 0x73, 0x65, 0x63, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x53, 0x65, - 0x63, 0x1a, 0x74, 0x0a, 0x08, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x40, 0x0a, - 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2c, 0x2e, 0x67, 0x72, - 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, - 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x2e, 0x52, 0x70, 0x63, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, - 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, - 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x29, 0x0a, 0x07, 0x52, 0x70, 0x63, 0x54, 0x79, - 0x70, 0x65, 0x12, 0x0e, 0x0a, 0x0a, 0x45, 0x4d, 0x50, 0x54, 0x59, 0x5f, 0x43, 0x41, 0x4c, 0x4c, - 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x55, 0x4e, 0x41, 0x52, 0x59, 0x5f, 0x43, 0x41, 0x4c, 0x4c, - 0x10, 0x01, 0x22, 0x19, 0x0a, 0x17, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x66, - 0x69, 0x67, 0x75, 0x72, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x8b, 0x03, - 0x0a, 0x0e, 0x54, 0x65, 0x73, 0x74, 0x4f, 0x72, 0x63, 0x61, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, - 0x12, 0x27, 0x0a, 0x0f, 0x63, 0x70, 0x75, 0x5f, 0x75, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x01, 0x52, 0x0e, 0x63, 0x70, 0x75, 0x55, 0x74, - 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x2d, 0x0a, 0x12, 0x6d, 0x65, 0x6d, - 0x6f, 0x72, 0x79, 0x5f, 0x75, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x01, 0x52, 0x11, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, - 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x50, 0x0a, 0x0c, 0x72, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x5f, 0x63, 0x6f, 0x73, 0x74, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, - 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x54, 0x65, - 0x73, 0x74, 0x4f, 0x72, 0x63, 0x61, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x43, 0x6f, 0x73, 0x74, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0b, 0x72, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x43, 0x6f, 0x73, 0x74, 0x12, 0x4f, 0x0a, 0x0b, 0x75, 0x74, - 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x2d, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x54, - 0x65, 0x73, 0x74, 0x4f, 0x72, 0x63, 0x61, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x55, 0x74, - 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0b, - 0x75, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0x3e, 0x0a, 0x10, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x43, 0x6f, 0x73, 0x74, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, - 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, - 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, - 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x3e, 0x0a, 0x10, 0x55, - 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, - 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, - 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, - 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x2a, 0x1f, 0x0a, 0x0b, 0x50, - 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x54, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x4f, - 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x00, 0x2a, 0x6f, 0x0a, 0x0f, - 0x47, 0x72, 0x70, 0x63, 0x6c, 0x62, 0x52, 0x6f, 0x75, 0x74, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, - 0x1d, 0x0a, 0x19, 0x47, 0x52, 0x50, 0x43, 0x4c, 0x42, 0x5f, 0x52, 0x4f, 0x55, 0x54, 0x45, 0x5f, - 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x1e, - 0x0a, 0x1a, 0x47, 0x52, 0x50, 0x43, 0x4c, 0x42, 0x5f, 0x52, 0x4f, 0x55, 0x54, 0x45, 0x5f, 0x54, - 0x59, 0x50, 0x45, 0x5f, 0x46, 0x41, 0x4c, 0x4c, 0x42, 0x41, 0x43, 0x4b, 0x10, 0x01, 0x12, 0x1d, - 0x0a, 0x19, 0x47, 0x52, 0x50, 0x43, 0x4c, 0x42, 0x5f, 0x52, 0x4f, 0x55, 0x54, 0x45, 0x5f, 0x54, - 0x59, 0x50, 0x45, 0x5f, 0x42, 0x41, 0x43, 0x4b, 0x45, 0x4e, 0x44, 0x10, 0x02, 0x42, 0x1d, 0x0a, - 0x1b, 0x69, 0x6f, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, - 0x2e, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x8b, 0x03, 0x0a, 0x0e, 0x54, 0x65, 0x73, 0x74, + 0x4f, 0x72, 0x63, 0x61, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x63, 0x70, + 0x75, 0x5f, 0x75, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x01, 0x52, 0x0e, 0x63, 0x70, 0x75, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x12, 0x2d, 0x0a, 0x12, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x5f, 0x75, 0x74, + 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, 0x52, + 0x11, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x12, 0x50, 0x0a, 0x0c, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x5f, 0x63, 0x6f, + 0x73, 0x74, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, + 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x54, 0x65, 0x73, 0x74, 0x4f, 0x72, 0x63, 0x61, + 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x43, 0x6f, + 0x73, 0x74, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0b, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x43, 0x6f, 0x73, 0x74, 0x12, 0x4f, 0x0a, 0x0b, 0x75, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x67, 0x72, 0x70, 0x63, + 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x54, 0x65, 0x73, 0x74, 0x4f, 0x72, 0x63, + 0x61, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0b, 0x75, 0x74, 0x69, 0x6c, 0x69, 0x7a, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0x3e, 0x0a, 0x10, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x43, 0x6f, 0x73, 0x74, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x3e, 0x0a, 0x10, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x3a, 0x02, 0x38, 0x01, 0x2a, 0x1f, 0x0a, 0x0b, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, + 0x54, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, + 0x41, 0x42, 0x4c, 0x45, 0x10, 0x00, 0x2a, 0x6f, 0x0a, 0x0f, 0x47, 0x72, 0x70, 0x63, 0x6c, 0x62, + 0x52, 0x6f, 0x75, 0x74, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1d, 0x0a, 0x19, 0x47, 0x52, 0x50, + 0x43, 0x4c, 0x42, 0x5f, 0x52, 0x4f, 0x55, 0x54, 0x45, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, + 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x1e, 0x0a, 0x1a, 0x47, 0x52, 0x50, 0x43, + 0x4c, 0x42, 0x5f, 0x52, 0x4f, 0x55, 0x54, 0x45, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x46, 0x41, + 0x4c, 0x4c, 0x42, 0x41, 0x43, 0x4b, 0x10, 0x01, 0x12, 0x1d, 0x0a, 0x19, 0x47, 0x52, 0x50, 0x43, + 0x4c, 0x42, 0x5f, 0x52, 0x4f, 0x55, 0x54, 0x45, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x42, 0x41, + 0x43, 0x4b, 0x45, 0x4e, 0x44, 0x10, 0x02, 0x42, 0x1d, 0x0a, 0x1b, 0x69, 0x6f, 0x2e, 0x67, 0x72, + 0x70, 0x63, 0x2e, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x69, 0x6e, 0x74, 0x65, 0x67, + 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -2004,38 +1990,37 @@ var file_grpc_testing_messages_proto_depIdxs = []int32{ 5, // 4: grpc.testing.SimpleRequest.response_status:type_name -> grpc.testing.EchoStatus 3, // 5: grpc.testing.SimpleRequest.expect_compressed:type_name -> grpc.testing.BoolValue 21, // 6: grpc.testing.SimpleRequest.orca_per_query_report:type_name -> grpc.testing.TestOrcaReport - 21, // 7: grpc.testing.SimpleRequest.orca_oob_report:type_name -> grpc.testing.TestOrcaReport - 4, // 8: grpc.testing.SimpleResponse.payload:type_name -> grpc.testing.Payload - 1, // 9: grpc.testing.SimpleResponse.grpclb_route_type:type_name -> grpc.testing.GrpclbRouteType - 4, // 10: grpc.testing.StreamingInputCallRequest.payload:type_name -> grpc.testing.Payload - 3, // 11: grpc.testing.StreamingInputCallRequest.expect_compressed:type_name -> grpc.testing.BoolValue - 3, // 12: grpc.testing.ResponseParameters.compressed:type_name -> grpc.testing.BoolValue - 0, // 13: grpc.testing.StreamingOutputCallRequest.response_type:type_name -> grpc.testing.PayloadType - 10, // 14: grpc.testing.StreamingOutputCallRequest.response_parameters:type_name -> grpc.testing.ResponseParameters - 4, // 15: grpc.testing.StreamingOutputCallRequest.payload:type_name -> grpc.testing.Payload - 5, // 16: grpc.testing.StreamingOutputCallRequest.response_status:type_name -> grpc.testing.EchoStatus - 21, // 17: grpc.testing.StreamingOutputCallRequest.orca_oob_report:type_name -> grpc.testing.TestOrcaReport - 4, // 18: grpc.testing.StreamingOutputCallResponse.payload:type_name -> grpc.testing.Payload - 23, // 19: grpc.testing.LoadBalancerStatsResponse.rpcs_by_peer:type_name -> grpc.testing.LoadBalancerStatsResponse.RpcsByPeerEntry - 24, // 20: grpc.testing.LoadBalancerStatsResponse.rpcs_by_method:type_name -> grpc.testing.LoadBalancerStatsResponse.RpcsByMethodEntry - 26, // 21: grpc.testing.LoadBalancerAccumulatedStatsResponse.num_rpcs_started_by_method:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.NumRpcsStartedByMethodEntry - 27, // 22: grpc.testing.LoadBalancerAccumulatedStatsResponse.num_rpcs_succeeded_by_method:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.NumRpcsSucceededByMethodEntry - 28, // 23: grpc.testing.LoadBalancerAccumulatedStatsResponse.num_rpcs_failed_by_method:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.NumRpcsFailedByMethodEntry - 30, // 24: grpc.testing.LoadBalancerAccumulatedStatsResponse.stats_per_method:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.StatsPerMethodEntry - 2, // 25: grpc.testing.ClientConfigureRequest.types:type_name -> grpc.testing.ClientConfigureRequest.RpcType - 32, // 26: grpc.testing.ClientConfigureRequest.metadata:type_name -> grpc.testing.ClientConfigureRequest.Metadata - 33, // 27: grpc.testing.TestOrcaReport.request_cost:type_name -> grpc.testing.TestOrcaReport.RequestCostEntry - 34, // 28: grpc.testing.TestOrcaReport.utilization:type_name -> grpc.testing.TestOrcaReport.UtilizationEntry - 25, // 29: grpc.testing.LoadBalancerStatsResponse.RpcsByPeer.rpcs_by_peer:type_name -> grpc.testing.LoadBalancerStatsResponse.RpcsByPeer.RpcsByPeerEntry - 22, // 30: grpc.testing.LoadBalancerStatsResponse.RpcsByMethodEntry.value:type_name -> grpc.testing.LoadBalancerStatsResponse.RpcsByPeer - 31, // 31: grpc.testing.LoadBalancerAccumulatedStatsResponse.MethodStats.result:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.MethodStats.ResultEntry - 29, // 32: grpc.testing.LoadBalancerAccumulatedStatsResponse.StatsPerMethodEntry.value:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.MethodStats - 2, // 33: grpc.testing.ClientConfigureRequest.Metadata.type:type_name -> grpc.testing.ClientConfigureRequest.RpcType - 34, // [34:34] is the sub-list for method output_type - 34, // [34:34] is the sub-list for method input_type - 34, // [34:34] is the sub-list for extension type_name - 34, // [34:34] is the sub-list for extension extendee - 0, // [0:34] is the sub-list for field type_name + 4, // 7: grpc.testing.SimpleResponse.payload:type_name -> grpc.testing.Payload + 1, // 8: grpc.testing.SimpleResponse.grpclb_route_type:type_name -> grpc.testing.GrpclbRouteType + 4, // 9: grpc.testing.StreamingInputCallRequest.payload:type_name -> grpc.testing.Payload + 3, // 10: grpc.testing.StreamingInputCallRequest.expect_compressed:type_name -> grpc.testing.BoolValue + 3, // 11: grpc.testing.ResponseParameters.compressed:type_name -> grpc.testing.BoolValue + 0, // 12: grpc.testing.StreamingOutputCallRequest.response_type:type_name -> grpc.testing.PayloadType + 10, // 13: grpc.testing.StreamingOutputCallRequest.response_parameters:type_name -> grpc.testing.ResponseParameters + 4, // 14: grpc.testing.StreamingOutputCallRequest.payload:type_name -> grpc.testing.Payload + 5, // 15: grpc.testing.StreamingOutputCallRequest.response_status:type_name -> grpc.testing.EchoStatus + 21, // 16: grpc.testing.StreamingOutputCallRequest.orca_oob_report:type_name -> grpc.testing.TestOrcaReport + 4, // 17: grpc.testing.StreamingOutputCallResponse.payload:type_name -> grpc.testing.Payload + 23, // 18: grpc.testing.LoadBalancerStatsResponse.rpcs_by_peer:type_name -> grpc.testing.LoadBalancerStatsResponse.RpcsByPeerEntry + 24, // 19: grpc.testing.LoadBalancerStatsResponse.rpcs_by_method:type_name -> grpc.testing.LoadBalancerStatsResponse.RpcsByMethodEntry + 26, // 20: grpc.testing.LoadBalancerAccumulatedStatsResponse.num_rpcs_started_by_method:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.NumRpcsStartedByMethodEntry + 27, // 21: grpc.testing.LoadBalancerAccumulatedStatsResponse.num_rpcs_succeeded_by_method:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.NumRpcsSucceededByMethodEntry + 28, // 22: grpc.testing.LoadBalancerAccumulatedStatsResponse.num_rpcs_failed_by_method:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.NumRpcsFailedByMethodEntry + 30, // 23: grpc.testing.LoadBalancerAccumulatedStatsResponse.stats_per_method:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.StatsPerMethodEntry + 2, // 24: grpc.testing.ClientConfigureRequest.types:type_name -> grpc.testing.ClientConfigureRequest.RpcType + 32, // 25: grpc.testing.ClientConfigureRequest.metadata:type_name -> grpc.testing.ClientConfigureRequest.Metadata + 33, // 26: grpc.testing.TestOrcaReport.request_cost:type_name -> grpc.testing.TestOrcaReport.RequestCostEntry + 34, // 27: grpc.testing.TestOrcaReport.utilization:type_name -> grpc.testing.TestOrcaReport.UtilizationEntry + 25, // 28: grpc.testing.LoadBalancerStatsResponse.RpcsByPeer.rpcs_by_peer:type_name -> grpc.testing.LoadBalancerStatsResponse.RpcsByPeer.RpcsByPeerEntry + 22, // 29: grpc.testing.LoadBalancerStatsResponse.RpcsByMethodEntry.value:type_name -> grpc.testing.LoadBalancerStatsResponse.RpcsByPeer + 31, // 30: grpc.testing.LoadBalancerAccumulatedStatsResponse.MethodStats.result:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.MethodStats.ResultEntry + 29, // 31: grpc.testing.LoadBalancerAccumulatedStatsResponse.StatsPerMethodEntry.value:type_name -> grpc.testing.LoadBalancerAccumulatedStatsResponse.MethodStats + 2, // 32: grpc.testing.ClientConfigureRequest.Metadata.type:type_name -> grpc.testing.ClientConfigureRequest.RpcType + 33, // [33:33] is the sub-list for method output_type + 33, // [33:33] is the sub-list for method input_type + 33, // [33:33] is the sub-list for extension type_name + 33, // [33:33] is the sub-list for extension extendee + 0, // [0:33] is the sub-list for field type_name } func init() { file_grpc_testing_messages_proto_init() } From 8eba9c2de14ab211f21633104165fb082d272bfe Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 15 May 2023 15:49:19 -0700 Subject: [PATCH 42/60] github: upgrade to v3 of checkout & setup-go (#6280) --- .github/workflows/testing.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index f7f0fbec6e45..afb830852597 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -24,11 +24,11 @@ jobs: steps: # Setup the environment. - name: Setup Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v3 with: go-version: '1.20' - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 # Run the vet checks. - name: vet @@ -89,12 +89,12 @@ jobs: run: echo "${{ matrix.grpcenv }}" >> $GITHUB_ENV - name: Setup Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v3 with: go-version: ${{ matrix.goversion }} - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 # Only run vet for 'vet' runs. - name: Run vet.sh From 756119c7de49e91b6f3b9d693b9850e1598938eb Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Tue, 16 May 2023 15:46:31 -0400 Subject: [PATCH 43/60] xds/outlierdetection: forward metadata from child picker (#6287) --- xds/internal/balancer/outlierdetection/balancer.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xds/internal/balancer/outlierdetection/balancer.go b/xds/internal/balancer/outlierdetection/balancer.go index 97f5503f38d1..1b35f518b48b 100644 --- a/xds/internal/balancer/outlierdetection/balancer.go +++ b/xds/internal/balancer/outlierdetection/balancer.go @@ -412,13 +412,15 @@ func (wp *wrappedPicker) Pick(info balancer.PickInfo) (balancer.PickResult, erro // programming. logger.Errorf("Picked SubConn from child picker is not a SubConnWrapper") return balancer.PickResult{ - SubConn: pr.SubConn, - Done: done, + SubConn: pr.SubConn, + Done: done, + Metadata: pr.Metadata, }, nil } return balancer.PickResult{ - SubConn: scw.SubConn, - Done: done, + SubConn: scw.SubConn, + Done: done, + Metadata: pr.Metadata, }, nil } From 92e65c890c9abdc571d88d7dd885ff6c4ae9dd7b Mon Sep 17 00:00:00 2001 From: Sergii Tkachenko Date: Tue, 16 May 2023 18:20:55 -0400 Subject: [PATCH 44/60] test/kokoro: Add custom_lb_test to the xds_k8s_lb job (#6290) --- test/kokoro/xds_k8s_lb.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/kokoro/xds_k8s_lb.sh b/test/kokoro/xds_k8s_lb.sh index f1f01794a056..d50f0f5484fb 100755 --- a/test/kokoro/xds_k8s_lb.sh +++ b/test/kokoro/xds_k8s_lb.sh @@ -158,7 +158,16 @@ main() { # Run tests cd "${TEST_DRIVER_FULL_DIR}" local failed_tests=0 - test_suites=("api_listener_test" "change_backend_service_test" "failover_test" "remove_neg_test" "round_robin_test" "affinity_test" "outlier_detection_test") + test_suites=( + "affinity_test" + "api_listener_test" + "change_backend_service_test" + "custom_lb_test" + "failover_test" + "outlier_detection_test" + "remove_neg_test" + "round_robin_test" + ) for test in "${test_suites[@]}"; do run_test $test || (( ++failed_tests )) done From 52fef6da12c6bebdb10abdf7466bc0249bf18123 Mon Sep 17 00:00:00 2001 From: erm-g <110920239+erm-g@users.noreply.github.com> Date: Wed, 17 May 2023 14:03:37 +0000 Subject: [PATCH 45/60] authz: Stdout logger (#6230) * Draft of StdoutLogger * Fitting StdoutLogger to lb patterns * conversion from proto to json for laudit loggers * Tests for multiple loggers and empty Options * Added LoggerConfig impl * Switched to grpcLogger and added a unit test comparing log with os.StdOut * Minor fix in exception handling wording * Added timestamp for logging statement * Changed format to json and added custom marshalling * Migration to log.go and additional test for a full event * Migration of stdout logger to a separate package * migration to grpcLogger, unit test fix * Delete xds parsing functionality. Will be done in a separate PR * Delete xds parsing functionality. Will be done in a separate PR * Address PR comments (embedding interface, table test, pointer optimizations) * vet.sh fixes * Address PR comments * Commit for go tidy changes * vet.sh fix for buf usage * Address PR comments * Address PR comments * Address PR comments (easwars) * Address PR comments (luwei) * Migrate printing to standard out from log package level func to a Logger struct func. Add timestamp testing logic. Add registry presense test. * Changed event Timestamp format back to RFC3339 * Address PR comments * Address PR comments * Address PR comments * Address PR comments --- authz/audit/stdout/stdout_logger.go | 107 +++++++++++++++++ authz/audit/stdout/stdout_logger_test.go | 140 +++++++++++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 authz/audit/stdout/stdout_logger.go create mode 100644 authz/audit/stdout/stdout_logger_test.go diff --git a/authz/audit/stdout/stdout_logger.go b/authz/audit/stdout/stdout_logger.go new file mode 100644 index 000000000000..ee095527ccec --- /dev/null +++ b/authz/audit/stdout/stdout_logger.go @@ -0,0 +1,107 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package stdout defines an stdout audit logger. +package stdout + +import ( + "encoding/json" + "log" + "os" + "time" + + "google.golang.org/grpc/authz/audit" + "google.golang.org/grpc/grpclog" +) + +var grpcLogger = grpclog.Component("authz-audit") + +func init() { + audit.RegisterLoggerBuilder(&loggerBuilder{ + goLogger: log.New(os.Stdout, "", 0), + }) +} + +type event struct { + FullMethodName string `json:"rpc_method"` + Principal string `json:"principal"` + PolicyName string `json:"policy_name"` + MatchedRule string `json:"matched_rule"` + Authorized bool `json:"authorized"` + Timestamp string `json:"timestamp"` // Time when the audit event is logged via Log method +} + +// logger implements the audit.Logger interface by logging to standard output. +type logger struct { + goLogger *log.Logger +} + +// Log marshals the audit.Event to json and prints it to standard output. +func (l *logger) Log(event *audit.Event) { + jsonContainer := map[string]interface{}{ + "grpc_audit_log": convertEvent(event), + } + jsonBytes, err := json.Marshal(jsonContainer) + if err != nil { + grpcLogger.Errorf("failed to marshal AuditEvent data to JSON: %v", err) + return + } + l.goLogger.Println(string(jsonBytes)) +} + +// loggerConfig represents the configuration for the stdout logger. +// It is currently empty and implements the audit.Logger interface by embedding it. +type loggerConfig struct { + audit.LoggerConfig +} + +type loggerBuilder struct { + goLogger *log.Logger +} + +func (loggerBuilder) Name() string { + return "stdout_logger" +} + +// Build returns a new instance of the stdout logger. +// Passed in configuration is ignored as the stdout logger does not +// expect any configuration to be provided. +func (lb *loggerBuilder) Build(audit.LoggerConfig) audit.Logger { + return &logger{ + goLogger: lb.goLogger, + } +} + +// ParseLoggerConfig is a no-op since the stdout logger does not accept any configuration. +func (*loggerBuilder) ParseLoggerConfig(config json.RawMessage) (audit.LoggerConfig, error) { + if len(config) != 0 && string(config) != "{}" { + grpcLogger.Warningf("Stdout logger doesn't support custom configs. Ignoring:\n%s", string(config)) + } + return &loggerConfig{}, nil +} + +func convertEvent(auditEvent *audit.Event) *event { + return &event{ + FullMethodName: auditEvent.FullMethodName, + Principal: auditEvent.Principal, + PolicyName: auditEvent.PolicyName, + MatchedRule: auditEvent.MatchedRule, + Authorized: auditEvent.Authorized, + Timestamp: time.Now().Format(time.RFC3339Nano), + } +} diff --git a/authz/audit/stdout/stdout_logger_test.go b/authz/audit/stdout/stdout_logger_test.go new file mode 100644 index 000000000000..a389b942e2c7 --- /dev/null +++ b/authz/audit/stdout/stdout_logger_test.go @@ -0,0 +1,140 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package stdout + +import ( + "bytes" + "encoding/json" + "log" + "os" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "google.golang.org/grpc/authz/audit" + "google.golang.org/grpc/internal/grpctest" +) + +type s struct { + grpctest.Tester +} + +func Test(t *testing.T) { + grpctest.RunSubTests(t, s{}) +} + +func (s) TestStdoutLogger_Log(t *testing.T) { + tests := map[string]struct { + event *audit.Event + wantMessage string + wantErr string + }{ + "few fields": { + event: &audit.Event{PolicyName: "test policy", Principal: "test principal"}, + wantMessage: `{"fullMethodName":"","principal":"test principal","policyName":"test policy","matchedRule":"","authorized":false`, + }, + "all fields": { + event: &audit.Event{ + FullMethodName: "/helloworld.Greeter/SayHello", + Principal: "spiffe://example.org/ns/default/sa/default/backend", + PolicyName: "example-policy", + MatchedRule: "dev-access", + Authorized: true, + }, + wantMessage: `{"fullMethodName":"/helloworld.Greeter/SayHello",` + + `"principal":"spiffe://example.org/ns/default/sa/default/backend","policyName":"example-policy",` + + `"matchedRule":"dev-access","authorized":true`, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + before := time.Now().Unix() + var buf bytes.Buffer + builder := &loggerBuilder{goLogger: log.New(&buf, "", 0)} + auditLogger := builder.Build(nil) + + auditLogger.Log(test.event) + + var container map[string]interface{} + if err := json.Unmarshal(buf.Bytes(), &container); err != nil { + t.Fatalf("Failed to unmarshal audit log event: %v", err) + } + innerEvent := extractEvent(container["grpc_audit_log"].(map[string]interface{})) + if innerEvent.Timestamp == "" { + t.Fatalf("Resulted event has no timestamp: %v", innerEvent) + } + after := time.Now().Unix() + innerEventUnixTime, err := time.Parse(time.RFC3339Nano, innerEvent.Timestamp) + if err != nil { + t.Fatalf("Failed to convert event timestamp into Unix time format: %v", err) + } + if before > innerEventUnixTime.Unix() || after < innerEventUnixTime.Unix() { + t.Errorf("The audit event timestamp is outside of the test interval: test start %v, event timestamp %v, test end %v", before, innerEventUnixTime.Unix(), after) + } + if diff := cmp.Diff(trimEvent(innerEvent), test.event); diff != "" { + t.Fatalf("Unexpected message\ndiff (-got +want):\n%s", diff) + } + }) + } +} + +func (s) TestStdoutLoggerBuilder_NilConfig(t *testing.T) { + builder := &loggerBuilder{ + goLogger: log.New(os.Stdout, "", log.LstdFlags), + } + config, err := builder.ParseLoggerConfig(nil) + if err != nil { + t.Fatalf("Failed to parse stdout logger configuration: %v", err) + } + if l := builder.Build(config); l == nil { + t.Fatal("Failed to build stdout audit logger") + } +} + +func (s) TestStdoutLoggerBuilder_Registration(t *testing.T) { + if audit.GetLoggerBuilder("stdout_logger") == nil { + t.Fatal("stdout logger is not registered") + } +} + +// extractEvent extracts an stdout.event from a map +// unmarshalled from a logged json message. +func extractEvent(container map[string]interface{}) event { + return event{ + FullMethodName: container["rpc_method"].(string), + Principal: container["principal"].(string), + PolicyName: container["policy_name"].(string), + MatchedRule: container["matched_rule"].(string), + Authorized: container["authorized"].(bool), + Timestamp: container["timestamp"].(string), + } +} + +// trimEvent converts a logged stdout.event into an audit.Event +// by removing Timestamp field. It is used for comparing events during testing. +func trimEvent(testEvent event) *audit.Event { + return &audit.Event{ + FullMethodName: testEvent.FullMethodName, + Principal: testEvent.Principal, + PolicyName: testEvent.PolicyName, + MatchedRule: testEvent.MatchedRule, + Authorized: testEvent.Authorized, + } +} From 390c392f8422e46121b85fba4d0c1c9faf37317d Mon Sep 17 00:00:00 2001 From: Gregory Cooke Date: Wed, 17 May 2023 10:21:06 -0400 Subject: [PATCH 46/60] authz: Rbac engine audit logging (#6225) add the functionality to actually do audit logging in rbac_engine.go and associated tests for that functionality. --- authz/grpc_authz_server_interceptors.go | 4 +- authz/rbac_translator.go | 43 +- authz/rbac_translator_test.go | 128 ++- internal/xds/rbac/converter.go | 98 ++ internal/xds/rbac/converter_test.go | 114 +++ internal/xds/rbac/rbac_engine.go | 97 +- internal/xds/rbac/rbac_engine_test.go | 1104 ++++++++++++++++++++--- xds/internal/httpfilter/rbac/rbac.go | 5 +- 8 files changed, 1407 insertions(+), 186 deletions(-) create mode 100644 internal/xds/rbac/converter.go create mode 100644 internal/xds/rbac/converter_test.go diff --git a/authz/grpc_authz_server_interceptors.go b/authz/grpc_authz_server_interceptors.go index ab93af13f37e..3e5f598a97d1 100644 --- a/authz/grpc_authz_server_interceptors.go +++ b/authz/grpc_authz_server_interceptors.go @@ -44,11 +44,11 @@ type StaticInterceptor struct { // NewStatic returns a new StaticInterceptor from a static authorization policy // JSON string. func NewStatic(authzPolicy string) (*StaticInterceptor, error) { - rbacs, err := translatePolicy(authzPolicy) + rbacs, policyName, err := translatePolicy(authzPolicy) if err != nil { return nil, err } - chainEngine, err := rbac.NewChainEngine(rbacs) + chainEngine, err := rbac.NewChainEngine(rbacs, policyName) if err != nil { return nil, err } diff --git a/authz/rbac_translator.go b/authz/rbac_translator.go index ce5c15cb976d..d88797d49907 100644 --- a/authz/rbac_translator.go +++ b/authz/rbac_translator.go @@ -39,7 +39,7 @@ import ( // This is used when converting a custom config from raw JSON to a TypedStruct // The TypeURL of the TypeStruct will be "grpc.authz.audit_logging/" -const typedURLPrefix = "grpc.authz.audit_logging/" +const typeURLPrefix = "grpc.authz.audit_logging/" type header struct { Key string @@ -62,14 +62,14 @@ type rule struct { } type auditLogger struct { - Name string `json:"name"` - Config *structpb.Struct `json:"config"` - IsOptional bool `json:"is_optional"` + Name string `json:"name"` + Config structpb.Struct `json:"config"` + IsOptional bool `json:"is_optional"` } type auditLoggingOptions struct { - AuditCondition string `json:"audit_condition"` - AuditLoggers []auditLogger `json:"audit_loggers"` + AuditCondition string `json:"audit_condition"` + AuditLoggers []*auditLogger `json:"audit_loggers"` } // Represents the SDK authorization policy provided by user. @@ -302,14 +302,13 @@ func (options *auditLoggingOptions) toProtos() (allow *v3rbacpb.RBAC_AuditLoggin deny.AuditCondition = toDenyCondition(v3rbacpb.RBAC_AuditLoggingOptions_AuditCondition(rbacCondition)) } - for i := range options.AuditLoggers { - config := &options.AuditLoggers[i] - if config.Config == nil { - return nil, nil, fmt.Errorf("AuditLogger Config field cannot be nil") + for i, config := range options.AuditLoggers { + if config.Name == "" { + return nil, nil, fmt.Errorf("missing required field: name in audit_logging_options.audit_loggers[%v]", i) } typedStruct := &v1xdsudpatypepb.TypedStruct{ - TypeUrl: typedURLPrefix + config.Name, - Value: config.Config, + TypeUrl: typeURLPrefix + config.Name, + Value: &config.Config, } customConfig, err := anypb.New(typedStruct) if err != nil { @@ -355,30 +354,30 @@ func toDenyCondition(condition v3rbacpb.RBAC_AuditLoggingOptions_AuditCondition) // translatePolicy translates SDK authorization policy in JSON format to two // Envoy RBAC polices (deny followed by allow policy) or only one Envoy RBAC -// allow policy. If the input policy cannot be parsed or is invalid, an error -// will be returned. -func translatePolicy(policyStr string) ([]*v3rbacpb.RBAC, error) { +// allow policy. Also returns the overall policy name. If the input policy +// cannot be parsed or is invalid, an error will be returned. +func translatePolicy(policyStr string) ([]*v3rbacpb.RBAC, string, error) { policy := &authorizationPolicy{} d := json.NewDecoder(bytes.NewReader([]byte(policyStr))) d.DisallowUnknownFields() if err := d.Decode(policy); err != nil { - return nil, fmt.Errorf("failed to unmarshal policy: %v", err) + return nil, "", fmt.Errorf("failed to unmarshal policy: %v", err) } if policy.Name == "" { - return nil, fmt.Errorf(`"name" is not present`) + return nil, "", fmt.Errorf(`"name" is not present`) } if len(policy.AllowRules) == 0 { - return nil, fmt.Errorf(`"allow_rules" is not present`) + return nil, "", fmt.Errorf(`"allow_rules" is not present`) } allowLogger, denyLogger, err := policy.AuditLoggingOptions.toProtos() if err != nil { - return nil, err + return nil, "", err } rbacs := make([]*v3rbacpb.RBAC, 0, 2) if len(policy.DenyRules) > 0 { denyPolicies, err := parseRules(policy.DenyRules, policy.Name) if err != nil { - return nil, fmt.Errorf(`"deny_rules" %v`, err) + return nil, "", fmt.Errorf(`"deny_rules" %v`, err) } denyRBAC := &v3rbacpb.RBAC{ Action: v3rbacpb.RBAC_DENY, @@ -389,8 +388,8 @@ func translatePolicy(policyStr string) ([]*v3rbacpb.RBAC, error) { } allowPolicies, err := parseRules(policy.AllowRules, policy.Name) if err != nil { - return nil, fmt.Errorf(`"allow_rules" %v`, err) + return nil, "", fmt.Errorf(`"allow_rules" %v`, err) } allowRBAC := &v3rbacpb.RBAC{Action: v3rbacpb.RBAC_ALLOW, Policies: allowPolicies, AuditLoggingOptions: allowLogger} - return append(rbacs, allowRBAC), nil + return append(rbacs, allowRBAC), policy.Name, nil } diff --git a/authz/rbac_translator_test.go b/authz/rbac_translator_test.go index fed0ef5c9d33..23b6fb669e9c 100644 --- a/authz/rbac_translator_test.go +++ b/authz/rbac_translator_test.go @@ -36,9 +36,10 @@ import ( func TestTranslatePolicy(t *testing.T) { tests := map[string]struct { - authzPolicy string - wantErr string - wantPolicies []*v3rbacpb.RBAC + authzPolicy string + wantErr string + wantPolicies []*v3rbacpb.RBAC + wantPolicyName string }{ "valid policy": { authzPolicy: `{ @@ -210,6 +211,7 @@ func TestTranslatePolicy(t *testing.T) { AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{}, }, }, + wantPolicyName: "authz", }, "allow authenticated": { authzPolicy: `{ @@ -798,6 +800,101 @@ func TestTranslatePolicy(t *testing.T) { }, }, }, + "missing custom config audit logger": { + authzPolicy: `{ + "name": "authz", + "allow_rules": [ + { + "name": "allow_authenticated", + "source": { + "principals":["*", ""] + } + }], + "deny_rules": [ + { + "name": "deny_policy_1", + "source": { + "principals":[ + "spiffe://foo.abc" + ] + } + }], + "audit_logging_options": { + "audit_condition": "ON_DENY", + "audit_loggers": [ + { + "name": "stdout_logger", + "is_optional": false + } + ] + } + }`, + wantPolicies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_DENY, + Policies: map[string]*v3rbacpb.Policy{ + "authz_deny_policy_1": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "spiffe://foo.abc"}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{}, "stdout_logger")}, + IsOptional: false, + }, + }, + }, + }, + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "authz_allow_authenticated": { + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_OrIds{OrIds: &v3rbacpb.Principal_Set{ + Ids: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_SafeRegex{SafeRegex: &v3matcherpb.RegexMatcher{Regex: ".+"}}, + }}, + }}, + {Identifier: &v3rbacpb.Principal_Authenticated_{ + Authenticated: &v3rbacpb.Principal_Authenticated{PrincipalName: &v3matcherpb.StringMatcher{ + MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: ""}, + }}, + }}, + }, + }}}, + }, + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{Name: "stdout_logger", TypedConfig: anyPbHelper(t, map[string]interface{}{}, "stdout_logger")}, + IsOptional: false, + }, + }, + }, + }, + }, + }, "unknown field": { authzPolicy: `{"random": 123}`, wantErr: "failed to unmarshal policy", @@ -897,7 +994,7 @@ func TestTranslatePolicy(t *testing.T) { }`, wantErr: `failed to unmarshal policy`, }, - "missing custom config audit logger": { + "missing audit logger name": { authzPolicy: `{ "name": "authz", "allow_rules": [ @@ -907,37 +1004,32 @@ func TestTranslatePolicy(t *testing.T) { "principals":["*", ""] } }], - "deny_rules": [ - { - "name": "deny_policy_1", - "source": { - "principals":[ - "spiffe://foo.abc" - ] - } - }], "audit_logging_options": { - "audit_condition": "ON_DENY", + "audit_condition": "NONE", "audit_loggers": [ { - "name": "stdout_logger", + "name": "", + "config": {}, "is_optional": false } ] } }`, - wantErr: "AuditLogger Config field cannot be nil", + wantErr: `missing required field: name`, }, } for name, test := range tests { t.Run(name, func(t *testing.T) { - gotPolicies, gotErr := translatePolicy(test.authzPolicy) + gotPolicies, gotPolicyName, gotErr := translatePolicy(test.authzPolicy) if gotErr != nil && !strings.HasPrefix(gotErr.Error(), test.wantErr) { t.Fatalf("unexpected error\nwant:%v\ngot:%v", test.wantErr, gotErr) } if diff := cmp.Diff(gotPolicies, test.wantPolicies, protocmp.Transform()); diff != "" { t.Fatalf("unexpected policy\ndiff (-want +got):\n%s", diff) } + if test.wantPolicyName != "" && gotPolicyName != test.wantPolicyName { + t.Fatalf("unexpected policy name\nwant:%v\ngot:%v", test.wantPolicyName, gotPolicyName) + } }) } } @@ -946,7 +1038,7 @@ func anyPbHelper(t *testing.T, in map[string]interface{}, name string) *anypb.An t.Helper() pb, err := structpb.NewStruct(in) typedStruct := &v1xdsudpatypepb.TypedStruct{ - TypeUrl: typedURLPrefix + name, + TypeUrl: typeURLPrefix + name, Value: pb, } if err != nil { diff --git a/internal/xds/rbac/converter.go b/internal/xds/rbac/converter.go new file mode 100644 index 000000000000..db22fd5a9e08 --- /dev/null +++ b/internal/xds/rbac/converter.go @@ -0,0 +1,98 @@ +/* + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package rbac + +import ( + "encoding/json" + "fmt" + "strings" + + v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" + v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" + v3rbacpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" + "google.golang.org/grpc/authz/audit" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/structpb" +) + +const udpaTypedStuctType = "type.googleapis.com/udpa.type.v1.TypedStruct" +const xdsTypedStuctType = "type.googleapis.com/xds.type.v3.TypedStruct" + +func buildLogger(loggerConfig *v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig) (audit.Logger, error) { + if loggerConfig.GetAuditLogger().GetTypedConfig() == nil { + return nil, fmt.Errorf("missing required field: TypedConfig") + } + customConfig, loggerName, err := getCustomConfig(loggerConfig.AuditLogger.TypedConfig) + if err != nil { + return nil, err + } + if loggerName == "" { + return nil, fmt.Errorf("field TypedConfig.TypeURL cannot be an empty string") + } + factory := audit.GetLoggerBuilder(loggerName) + if factory == nil { + if loggerConfig.IsOptional { + return nil, nil + } + return nil, fmt.Errorf("no builder registered for %v", loggerName) + } + auditLoggerConfig, err := factory.ParseLoggerConfig(customConfig) + if err != nil { + return nil, fmt.Errorf("custom config could not be parsed by registered factory. error: %v", err) + } + auditLogger := factory.Build(auditLoggerConfig) + return auditLogger, nil +} + +func getCustomConfig(config *anypb.Any) (json.RawMessage, string, error) { + switch config.GetTypeUrl() { + case udpaTypedStuctType: + typedStruct := &v1xdsudpatypepb.TypedStruct{} + if err := config.UnmarshalTo(typedStruct); err != nil { + return nil, "", fmt.Errorf("failed to unmarshal resource: %v", err) + } + return convertCustomConfig(typedStruct.TypeUrl, typedStruct.Value) + case xdsTypedStuctType: + typedStruct := &v3xdsxdstypepb.TypedStruct{} + if err := config.UnmarshalTo(typedStruct); err != nil { + return nil, "", fmt.Errorf("failed to unmarshal resource: %v", err) + } + return convertCustomConfig(typedStruct.TypeUrl, typedStruct.Value) + } + return nil, "", fmt.Errorf("custom config not implemented for type [%v]", config.GetTypeUrl()) +} + +func convertCustomConfig(typeURL string, s *structpb.Struct) (json.RawMessage, string, error) { + // The gRPC policy name will be the "type name" part of the value of the + // type_url field in the TypedStruct. We get this by using the part after + // the last / character. Can assume a valid type_url from the control plane. + urls := strings.Split(typeURL, "/") + if len(urls) == 0 { + return nil, "", fmt.Errorf("error converting custom audit logger %v for %v: typeURL must have a url-like format with the typeName being the value after the last /", typeURL, s) + } + name := urls[len(urls)-1] + + rawJSON := []byte("{}") + var err error + if s != nil { + rawJSON, err = json.Marshal(s) + if err != nil { + return nil, "", fmt.Errorf("error converting custom audit logger %v for %v: %v", typeURL, s, err) + } + } + return rawJSON, name, nil +} diff --git a/internal/xds/rbac/converter_test.go b/internal/xds/rbac/converter_test.go new file mode 100644 index 000000000000..253b9db2d50d --- /dev/null +++ b/internal/xds/rbac/converter_test.go @@ -0,0 +1,114 @@ +/* + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package rbac + +import ( + "strings" + "testing" + + v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + v3rbacpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" + "google.golang.org/grpc/authz/audit" + "google.golang.org/protobuf/types/known/anypb" +) + +func (s) TestBuildLoggerErrors(t *testing.T) { + tests := []struct { + name string + loggerConfig *v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig + expectedLogger audit.Logger + expectedError string + }{ + { + name: "nil typed config", + loggerConfig: &v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + AuditLogger: &v3corepb.TypedExtensionConfig{ + TypedConfig: nil, + }, + }, + expectedError: "missing required field: TypedConfig", + }, + { + name: "Unsupported Type", + loggerConfig: &v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: &anypb.Any{}, + }, + }, + expectedError: "custom config not implemented for type ", + }, + { + name: "Empty name", + loggerConfig: &v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, ""), + }, + }, + expectedError: "field TypedConfig.TypeURL cannot be an empty string", + }, + { + name: "No registered logger", + loggerConfig: &v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "UnregisteredLogger", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "UnregisteredLogger"), + }, + IsOptional: false, + }, + expectedError: "no builder registered for UnregisteredLogger", + }, + { + name: "fail to parse custom config", + loggerConfig: &v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerCustomConfig", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{"abc": "BADVALUE", "xyz": "123"}, "fail to parse custom config_TestAuditLoggerCustomConfig")}, + IsOptional: false, + }, + expectedError: "custom config could not be parsed", + }, + { + name: "no registered logger but optional passes", + loggerConfig: &v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "UnregisteredLogger", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "no registered logger but optional passes_UnregisteredLogger"), + }, + IsOptional: true, + }, + expectedLogger: nil, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + b := TestAuditLoggerCustomConfigBuilder{testName: test.name} + audit.RegisterLoggerBuilder(&b) + logger, err := buildLogger(test.loggerConfig) + if err != nil && !strings.HasPrefix(err.Error(), test.expectedError) { + t.Fatalf("expected error: %v. got error: %v", test.expectedError, err) + } else { + if logger != test.expectedLogger { + t.Fatalf("expected logger: %v. got logger: %v", test.expectedLogger, logger) + } + } + + }) + } + +} diff --git a/internal/xds/rbac/rbac_engine.go b/internal/xds/rbac/rbac_engine.go index a212579c63e2..63237affe23f 100644 --- a/internal/xds/rbac/rbac_engine.go +++ b/internal/xds/rbac/rbac_engine.go @@ -30,6 +30,7 @@ import ( v3rbacpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" "google.golang.org/grpc" + "google.golang.org/grpc/authz/audit" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" "google.golang.org/grpc/grpclog" @@ -51,10 +52,10 @@ type ChainEngine struct { // NewChainEngine returns a chain of RBAC engines, used to make authorization // decisions on incoming RPCs. Returns a non-nil error for invalid policies. -func NewChainEngine(policies []*v3rbacpb.RBAC) (*ChainEngine, error) { +func NewChainEngine(policies []*v3rbacpb.RBAC, policyName string) (*ChainEngine, error) { engines := make([]*engine, 0, len(policies)) for _, policy := range policies { - engine, err := newEngine(policy) + engine, err := newEngine(policy, policyName) if err != nil { return nil, err } @@ -94,13 +95,16 @@ func (cre *ChainEngine) IsAuthorized(ctx context.Context) error { switch { case engine.action == v3rbacpb.RBAC_ALLOW && !ok: cre.logRequestDetails(rpcData) + engine.doAuditLogging(rpcData, matchingPolicyName, false) return status.Errorf(codes.PermissionDenied, "incoming RPC did not match an allow policy") case engine.action == v3rbacpb.RBAC_DENY && ok: cre.logRequestDetails(rpcData) + engine.doAuditLogging(rpcData, matchingPolicyName, false) return status.Errorf(codes.PermissionDenied, "incoming RPC matched a deny policy %q", matchingPolicyName) } // Every policy in the engine list must be queried. Thus, iterate to the // next policy. + engine.doAuditLogging(rpcData, matchingPolicyName, true) } // If the incoming RPC gets through all of the engines successfully (i.e. // doesn't not match an allow or match a deny engine), the RPC is authorized @@ -110,14 +114,18 @@ func (cre *ChainEngine) IsAuthorized(ctx context.Context) error { // engine is used for matching incoming RPCs to policies. type engine struct { - policies map[string]*policyMatcher + // TODO(gtcooke94) - differentiate between `policyName`, `policies`, and `rules` + policyName string + policies map[string]*policyMatcher // action must be ALLOW or DENY. - action v3rbacpb.RBAC_Action + action v3rbacpb.RBAC_Action + auditLoggers []audit.Logger + auditCondition v3rbacpb.RBAC_AuditLoggingOptions_AuditCondition } -// newEngine creates an RBAC Engine based on the contents of policy. Returns a +// newEngine creates an RBAC Engine based on the contents of a policy. Returns a // non-nil error if the policy is invalid. -func newEngine(config *v3rbacpb.RBAC) (*engine, error) { +func newEngine(config *v3rbacpb.RBAC, policyName string) (*engine, error) { a := config.GetAction() if a != v3rbacpb.RBAC_ALLOW && a != v3rbacpb.RBAC_DENY { return nil, fmt.Errorf("unsupported action %s", config.Action) @@ -131,18 +139,47 @@ func newEngine(config *v3rbacpb.RBAC) (*engine, error) { } policies[name] = matcher } + + auditLoggers, auditCondition, err := parseAuditOptions(config.GetAuditLoggingOptions()) + if err != nil { + return nil, err + } return &engine{ - policies: policies, - action: a, + policyName: policyName, + policies: policies, + action: a, + auditLoggers: auditLoggers, + auditCondition: auditCondition, }, nil } +func parseAuditOptions(opts *v3rbacpb.RBAC_AuditLoggingOptions) ([]audit.Logger, v3rbacpb.RBAC_AuditLoggingOptions_AuditCondition, error) { + if opts == nil { + return nil, v3rbacpb.RBAC_AuditLoggingOptions_NONE, nil + } + var auditLoggers []audit.Logger + for _, logger := range opts.LoggerConfigs { + auditLogger, err := buildLogger(logger) + if err != nil { + return nil, v3rbacpb.RBAC_AuditLoggingOptions_NONE, err + } + if auditLogger == nil { + // This occurs when the audit logger is not registered but also + // marked optional. + continue + } + auditLoggers = append(auditLoggers, auditLogger) + } + return auditLoggers, opts.GetAuditCondition(), nil + +} + // findMatchingPolicy determines if an incoming RPC matches a policy. On a // successful match, it returns the name of the matching policy and a true bool // to specify that there was a matching policy found. It returns false in // the case of not finding a matching policy. -func (r *engine) findMatchingPolicy(rpcData *rpcData) (string, bool) { - for policy, matcher := range r.policies { +func (e *engine) findMatchingPolicy(rpcData *rpcData) (string, bool) { + for policy, matcher := range e.policies { if matcher.match(rpcData) { return policy, true } @@ -238,3 +275,43 @@ type rpcData struct { // handshake. certs []*x509.Certificate } + +func (e *engine) doAuditLogging(rpcData *rpcData, rule string, authorized bool) { + // In the RBAC world, we need to have a SPIFFE ID as the principal for this + // to be meaningful + principal := "" + if rpcData.peerInfo != nil && rpcData.peerInfo.AuthInfo != nil && rpcData.peerInfo.AuthInfo.AuthType() == "tls" { + // If AuthType = tls, then we can cast AuthInfo to TLSInfo. + tlsInfo := rpcData.peerInfo.AuthInfo.(credentials.TLSInfo) + if tlsInfo.SPIFFEID != nil { + principal = tlsInfo.SPIFFEID.String() + } + } + + //TODO(gtcooke94) check if we need to log before creating the event + event := &audit.Event{ + FullMethodName: rpcData.fullMethod, + Principal: principal, + PolicyName: e.policyName, + MatchedRule: rule, + Authorized: authorized, + } + for _, logger := range e.auditLoggers { + switch e.auditCondition { + case v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY: + if !authorized { + logger.Log(event) + } + case v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW: + if authorized { + logger.Log(event) + } + case v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY_AND_ALLOW: + logger.Log(event) + } + } +} + +// This is used when converting a custom config from raw JSON to a TypedStruct. +// The TypeURL of the TypeStruct will be "grpc.authz.audit_logging/". +const typeURLPrefix = "grpc.authz.audit_logging/" diff --git a/internal/xds/rbac/rbac_engine_test.go b/internal/xds/rbac/rbac_engine_test.go index 19bc4e8ca891..32c357f4953f 100644 --- a/internal/xds/rbac/rbac_engine_test.go +++ b/internal/xds/rbac/rbac_engine_test.go @@ -21,10 +21,15 @@ import ( "crypto/tls" "crypto/x509" "crypto/x509/pkix" + "encoding/json" + "fmt" "net" "net/url" + "reflect" "testing" + v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" + v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3rbacpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" @@ -32,12 +37,15 @@ import ( v3typepb "github.com/envoyproxy/go-control-plane/envoy/type/v3" wrapperspb "github.com/golang/protobuf/ptypes/wrappers" "google.golang.org/grpc" + "google.golang.org/grpc/authz/audit" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" "google.golang.org/grpc/internal/grpctest" "google.golang.org/grpc/metadata" "google.golang.org/grpc/peer" "google.golang.org/grpc/status" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/structpb" ) type s struct { @@ -62,9 +70,10 @@ func (a *addr) String() string { return a.ipAddress } // raise errors. func (s) TestNewChainEngine(t *testing.T) { tests := []struct { - name string - policies []*v3rbacpb.RBAC - wantErr bool + name string + policies []*v3rbacpb.RBAC + wantErr bool + policyName string }{ { name: "SuccessCaseAnyMatchSingular", @@ -424,16 +433,256 @@ func (s) TestNewChainEngine(t *testing.T) { }, }, }, + { + name: "SimpleAuditLogger", + policies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "anyone": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "SimpleAuditLogger_TestAuditLoggerBuffer")}, + IsOptional: false, + }, + }, + }, + }, + }, + }, + { + name: "AuditLoggerCustomConfig", + policies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "anyone": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerCustomConfig", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{"abc": 123, "xyz": "123"}, "AuditLoggerCustomConfig_TestAuditLoggerCustomConfig")}, + IsOptional: false, + }, + }, + }, + }, + }, + policyName: "test_policy", + }, + { + name: "AuditLoggerCustomConfigXdsTypedStruct", + policies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "anyone": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerCustomConfig", + TypedConfig: createXDSTypedStruct(t, map[string]interface{}{"abc": 123, "xyz": "123"}, "AuditLoggerCustomConfigXdsTypedStruct_TestAuditLoggerCustomConfig")}, + IsOptional: false, + }, + }, + }, + }, + }, + policyName: "test_policy", + }, + { + name: "Missing Optional AuditLogger doesn't fail", + policies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "anyone": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "UnsupportedLogger", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "Missing Optional AuditLogger doesn't fail_UnsupportedLogger")}, + IsOptional: true, + }, + }, + }, + }, + }, + }, + { + name: "Missing Non-Optional AuditLogger fails", + policies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "anyone": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "UnsupportedLogger", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "Missing Non-Optional AuditLogger fails_UnsupportedLogger")}, + IsOptional: false, + }, + }, + }, + }, + }, + wantErr: true, + }, + { + name: "Cannot_parse_missing_CustomConfig", + policies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "anyone": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerCustomConfig", + }, + IsOptional: false, + }, + }, + }, + }, + }, + wantErr: true, + }, + { + name: "Cannot_parse_bad_CustomConfig", + policies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "anyone": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerCustomConfig", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{"abc": "BADVALUE", "xyz": "123"}, "Cannot_parse_bad_CustomConfig_TestAuditLoggerCustomConfig")}, + IsOptional: false, + }, + }, + }, + }, + }, + wantErr: true, + }, + { + name: "Cannot_parse_missing_typedConfig_name", + policies: []*v3rbacpb.RBAC{ + { + Action: v3rbacpb.RBAC_ALLOW, + Policies: map[string]*v3rbacpb.Policy{ + "anyone": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerCustomConfig", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{"abc": 123, "xyz": "123"}, "")}, + IsOptional: false, + }, + }, + }, + }, + }, + wantErr: true, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - if _, err := NewChainEngine(test.policies); (err != nil) != test.wantErr { + b := TestAuditLoggerBufferBuilder{testName: test.name} + audit.RegisterLoggerBuilder(&b) + b2 := TestAuditLoggerCustomConfigBuilder{testName: test.name} + audit.RegisterLoggerBuilder(&b2) + if _, err := NewChainEngine(test.policies, test.policyName); (err != nil) != test.wantErr { t.Fatalf("NewChainEngine(%+v) returned err: %v, wantErr: %v", test.policies, err, test.wantErr) } }) } } +type rbacQuery struct { + rpcData *rpcData + wantStatusCode codes.Code + wantAuditEvents []*audit.Event +} + // TestChainEngine tests the chain of RBAC Engines by configuring the chain of // engines in a certain way in different scenarios. After configuring the chain // of engines in a certain way, this test pings the chain of engines with @@ -446,10 +695,8 @@ func (s) TestChainEngine(t *testing.T) { tests := []struct { name string rbacConfigs []*v3rbacpb.RBAC - rbacQueries []struct { - rpcData *rpcData - wantStatusCode codes.Code - } + rbacQueries []rbacQuery + policyName string }{ // SuccessCaseAnyMatch tests a single RBAC Engine instantiated with // a config with a policy with any rules for both permissions and @@ -471,10 +718,7 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ { rpcData: &rpcData{ fullMethod: "some method", @@ -505,10 +749,7 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ // This RPC should match with the local host fan policy. Thus, // this RPC should be allowed to proceed. { @@ -571,10 +812,7 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ // This incoming RPC Call should match with the service admin // policy. { @@ -659,10 +897,7 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ // This incoming RPC Call should match with the not-secret-content policy. { rpcData: &rpcData{ @@ -701,10 +936,7 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ // This incoming RPC Call should match with the certain-direct-remote-ip policy. { rpcData: &rpcData{ @@ -745,10 +977,7 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ // This incoming RPC Call should match with the certain-remote-ip policy. { rpcData: &rpcData{ @@ -785,10 +1014,7 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ // This incoming RPC Call shouldn't match with the // certain-destination-ip policy, as the test listens on local // host. @@ -836,10 +1062,7 @@ func (s) TestChainEngine(t *testing.T) { Action: v3rbacpb.RBAC_DENY, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ // This RPC should match with the allow policy, and shouldn't // match with the deny and thus should be allowed to proceed. { @@ -903,10 +1126,7 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ // This incoming RPC Call should match with the service admin // policy. No authentication info is provided, so the // authenticated matcher should match to the string matcher on @@ -956,10 +1176,7 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ { rpcData: &rpcData{ fullMethod: "some method", @@ -992,10 +1209,7 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - rbacQueries: []struct { - rpcData *rpcData - wantStatusCode codes.Code - }{ + rbacQueries: []rbacQuery{ { rpcData: &rpcData{ fullMethod: "some method", @@ -1007,85 +1221,709 @@ func (s) TestChainEngine(t *testing.T) { }, }, }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - // Instantiate the chainedRBACEngine with different configurations that are - // interesting to test and to query. - cre, err := NewChainEngine(test.rbacConfigs) - if err != nil { - t.Fatalf("Error constructing RBAC Engine: %v", err) - } - // Query the created chain of RBAC Engines with different args to see - // if the chain of RBAC Engines configured as such works as intended. - for _, data := range test.rbacQueries { - func() { - // Construct the context with three data points that have enough - // information to represent incoming RPC's. This will be how a - // user uses this API. A user will have to put MD, PeerInfo, and - // the connection the RPC is sent on in the context. - ctx := metadata.NewIncomingContext(context.Background(), data.rpcData.md) - - // Make a TCP connection with a certain destination port. The - // address/port of this connection will be used to populate the - // destination ip/port in RPCData struct. This represents what - // the user of ChainEngine will have to place into - // context, as this is only way to get destination ip and port. - lis, err := net.Listen("tcp", "localhost:0") - if err != nil { - t.Fatalf("Error listening: %v", err) - } - defer lis.Close() - connCh := make(chan net.Conn, 1) - go func() { - conn, err := lis.Accept() - if err != nil { - t.Errorf("Error accepting connection: %v", err) - return - } - connCh <- conn - }() - _, err = net.Dial("tcp", lis.Addr().String()) - if err != nil { - t.Fatalf("Error dialing: %v", err) - } - conn := <-connCh - defer conn.Close() - getConnection = func(context.Context) net.Conn { - return conn - } - ctx = peer.NewContext(ctx, data.rpcData.peerInfo) - stream := &ServerTransportStreamWithMethod{ - method: data.rpcData.fullMethod, - } - - ctx = grpc.NewContextWithServerTransportStream(ctx, stream) - err = cre.IsAuthorized(ctx) - if gotCode := status.Code(err); gotCode != data.wantStatusCode { - t.Fatalf("IsAuthorized(%+v, %+v) returned (%+v), want(%+v)", ctx, data.rpcData.fullMethod, gotCode, data.wantStatusCode) - } - }() - } - }) - } -} - -type ServerTransportStreamWithMethod struct { - method string -} - -func (sts *ServerTransportStreamWithMethod) Method() string { - return sts.method -} - -func (sts *ServerTransportStreamWithMethod) SetHeader(md metadata.MD) error { - return nil -} - -func (sts *ServerTransportStreamWithMethod) SendHeader(md metadata.MD) error { - return nil -} - -func (sts *ServerTransportStreamWithMethod) SetTrailer(md metadata.MD) error { - return nil + // AllowAndDenyPolicy tests a policy with an allow (on path) and + // deny (on port) policy chained together. This represents how a user + // configured interceptor would use this, and also is a potential + // configuration for a dynamic xds interceptor. Further, it tests that + // the audit logger works properly in each scenario. + { + name: "AuditLoggingAllowAndDenyPolicy_ON_ALLOW", + policyName: "test_policy", + rbacConfigs: []*v3rbacpb.RBAC{ + { + Policies: map[string]*v3rbacpb.Policy{ + "localhost-fan": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_UrlPath{UrlPath: &v3matcherpb.PathMatcher{Rule: &v3matcherpb.PathMatcher_Path{Path: &v3matcherpb.StringMatcher{MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "localhost-fan-page"}}}}}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + Action: v3rbacpb.RBAC_DENY, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "AuditLoggingAllowAndDenyPolicy_ON_ALLOW_TestAuditLoggerBuffer")}, + IsOptional: false, + }, + }, + }, + }, + { + Policies: map[string]*v3rbacpb.Policy{ + "certain-source-ip": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_DirectRemoteIp{DirectRemoteIp: &v3corepb.CidrRange{AddressPrefix: "0.0.0.0", PrefixLen: &wrapperspb.UInt32Value{Value: uint32(10)}}}}, + }, + }, + }, + Action: v3rbacpb.RBAC_ALLOW, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "AuditLoggingAllowAndDenyPolicy_ON_ALLOW_TestAuditLoggerBuffer")}, + IsOptional: false, + }, + }, + }, + }, + }, + rbacQueries: []rbacQuery{ + // This RPC should match with the allow policy, and shouldn't + // match with the deny and thus should be allowed to proceed. + { + rpcData: &rpcData{ + fullMethod: "", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "0.0.0.0"}, + AuthInfo: credentials.TLSInfo{ + State: tls.ConnectionState{ + PeerCertificates: []*x509.Certificate{ + { + URIs: []*url.URL{ + { + Scheme: "spiffe", + Host: "cluster.local", + Path: "/ns/default/sa/admin", + }, + }, + }, + }, + }, + SPIFFEID: &url.URL{ + Scheme: "spiffe", + Host: "cluster.local", + Path: "/ns/default/sa/admin", + }, + }, + }, + }, + wantStatusCode: codes.OK, + wantAuditEvents: []*audit.Event{ + { + FullMethodName: "", + Principal: "spiffe://cluster.local/ns/default/sa/admin", + PolicyName: "test_policy", + MatchedRule: "certain-source-ip", + Authorized: true, + }, + }, + }, + // This RPC should match with both the allow policy and deny policy + // and thus shouldn't be allowed to proceed as matched with deny. + { + rpcData: &rpcData{ + fullMethod: "localhost-fan-page", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "0.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + }, + // This RPC shouldn't match with either policy, and thus + // shouldn't be allowed to proceed as didn't match with allow. + { + rpcData: &rpcData{ + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "10.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + }, + // This RPC shouldn't match with allow, match with deny, and + // thus shouldn't be allowed to proceed. + { + rpcData: &rpcData{ + fullMethod: "localhost-fan-page", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "10.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + }, + }, + }, + { + name: "AuditLoggingAllowAndDenyPolicy_ON_DENY", + policyName: "test_policy", + rbacConfigs: []*v3rbacpb.RBAC{ + { + Policies: map[string]*v3rbacpb.Policy{ + "localhost-fan": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_UrlPath{UrlPath: &v3matcherpb.PathMatcher{Rule: &v3matcherpb.PathMatcher_Path{Path: &v3matcherpb.StringMatcher{MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "localhost-fan-page"}}}}}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + Action: v3rbacpb.RBAC_DENY, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "AuditLoggingAllowAndDenyPolicy_ON_DENY_TestAuditLoggerBuffer")}, + IsOptional: false, + }, + }, + }, + }, + { + Policies: map[string]*v3rbacpb.Policy{ + "certain-source-ip": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_DirectRemoteIp{DirectRemoteIp: &v3corepb.CidrRange{AddressPrefix: "0.0.0.0", PrefixLen: &wrapperspb.UInt32Value{Value: uint32(10)}}}}, + }, + }, + }, + Action: v3rbacpb.RBAC_ALLOW, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "AuditLoggingAllowAndDenyPolicy_ON_DENY_TestAuditLoggerBuffer")}, + IsOptional: false, + }, + }, + }, + }, + }, + rbacQueries: []rbacQuery{ + // This RPC should match with the allow policy, and shouldn't + // match with the deny and thus should be allowed to proceed. + // Audit logging matches with nothing. + { + rpcData: &rpcData{ + fullMethod: "", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "0.0.0.0"}, + }, + }, + wantStatusCode: codes.OK, + }, + // This RPC should match with both the allow policy and deny policy + // and thus shouldn't be allowed to proceed as matched with deny. + // Audit logging matches with deny and short circuits. + { + rpcData: &rpcData{ + fullMethod: "localhost-fan-page", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "0.0.0.0"}, + AuthInfo: credentials.TLSInfo{ + State: tls.ConnectionState{ + PeerCertificates: []*x509.Certificate{ + { + URIs: []*url.URL{ + { + Host: "cluster.local", + Path: "/ns/default/sa/admin", + }, + }, + }, + }, + }, + }, + }, + }, + wantStatusCode: codes.PermissionDenied, + wantAuditEvents: []*audit.Event{ + { + FullMethodName: "localhost-fan-page", + PolicyName: "test_policy", + MatchedRule: "localhost-fan", + Authorized: false, + }, + }, + }, + // This RPC shouldn't match with either policy, and thus + // shouldn't be allowed to proceed as didn't match with allow. + // Audit logging matches with the allow policy. + { + rpcData: &rpcData{ + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "10.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + wantAuditEvents: []*audit.Event{ + { + FullMethodName: "", + PolicyName: "test_policy", + MatchedRule: "", + Authorized: false, + }, + }, + }, + // This RPC shouldn't match with allow, match with deny, and + // thus shouldn't be allowed to proceed. + // Audit logging will have the deny logged. + { + rpcData: &rpcData{ + fullMethod: "localhost-fan-page", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "10.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + wantAuditEvents: []*audit.Event{ + { + FullMethodName: "localhost-fan-page", + PolicyName: "test_policy", + MatchedRule: "localhost-fan", + Authorized: false, + }, + }, + }, + }, + }, + { + name: "AuditLoggingAllowAndDenyPolicy_NONE", + policyName: "test_policy", + rbacConfigs: []*v3rbacpb.RBAC{ + { + Policies: map[string]*v3rbacpb.Policy{ + "localhost-fan": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_UrlPath{UrlPath: &v3matcherpb.PathMatcher{Rule: &v3matcherpb.PathMatcher_Path{Path: &v3matcherpb.StringMatcher{MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "localhost-fan-page"}}}}}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + Action: v3rbacpb.RBAC_DENY, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "AuditLoggingAllowAndDenyPolicy_NONE_TestAuditLoggerBuffer")}, + IsOptional: false, + }, + }, + }, + }, + { + Policies: map[string]*v3rbacpb.Policy{ + "certain-source-ip": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_DirectRemoteIp{DirectRemoteIp: &v3corepb.CidrRange{AddressPrefix: "0.0.0.0", PrefixLen: &wrapperspb.UInt32Value{Value: uint32(10)}}}}, + }, + }, + }, + Action: v3rbacpb.RBAC_ALLOW, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_NONE, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "AuditLoggingAllowAndDenyPolicy_NONE_TestAuditLoggerBuffer")}, + IsOptional: false, + }, + }, + }, + }, + }, + rbacQueries: []rbacQuery{ + // This RPC should match with the allow policy, and shouldn't + // match with the deny and thus should be allowed to proceed. + // Audit logging is NONE. + { + rpcData: &rpcData{ + fullMethod: "", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "0.0.0.0"}, + }, + }, + wantStatusCode: codes.OK, + }, + // This RPC should match with both the allow policy and deny policy + // and thus shouldn't be allowed to proceed as matched with deny. + // Audit logging is NONE. + { + rpcData: &rpcData{ + fullMethod: "localhost-fan-page", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "0.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + }, + // This RPC shouldn't match with either policy, and thus + // shouldn't be allowed to proceed as didn't match with allow. + // Audit logging is NONE. + { + rpcData: &rpcData{ + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "10.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + }, + // This RPC shouldn't match with allow, match with deny, and + // thus shouldn't be allowed to proceed. + // Audit logging is NONE. + { + rpcData: &rpcData{ + fullMethod: "localhost-fan-page", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "10.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + }, + }, + }, + { + name: "AuditLoggingAllowAndDenyPolicy_ON_DENY_AND_ALLOW", + policyName: "test_policy", + rbacConfigs: []*v3rbacpb.RBAC{ + { + Policies: map[string]*v3rbacpb.Policy{ + "localhost-fan": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_UrlPath{UrlPath: &v3matcherpb.PathMatcher{Rule: &v3matcherpb.PathMatcher_Path{Path: &v3matcherpb.StringMatcher{MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "localhost-fan-page"}}}}}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_Any{Any: true}}, + }, + }, + }, + Action: v3rbacpb.RBAC_DENY, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "AuditLoggingAllowAndDenyPolicy_ON_DENY_AND_ALLOW_TestAuditLoggerBuffer")}, + IsOptional: false, + }, + }, + }, + }, + { + Policies: map[string]*v3rbacpb.Policy{ + "certain-source-ip": { + Permissions: []*v3rbacpb.Permission{ + {Rule: &v3rbacpb.Permission_Any{Any: true}}, + }, + Principals: []*v3rbacpb.Principal{ + {Identifier: &v3rbacpb.Principal_DirectRemoteIp{DirectRemoteIp: &v3corepb.CidrRange{AddressPrefix: "0.0.0.0", PrefixLen: &wrapperspb.UInt32Value{Value: uint32(10)}}}}, + }, + }, + }, + Action: v3rbacpb.RBAC_ALLOW, + AuditLoggingOptions: &v3rbacpb.RBAC_AuditLoggingOptions{ + AuditCondition: v3rbacpb.RBAC_AuditLoggingOptions_ON_DENY_AND_ALLOW, + LoggerConfigs: []*v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + {AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: "TestAuditLoggerBuffer", + TypedConfig: createUDPATypedStruct(t, map[string]interface{}{}, "AuditLoggingAllowAndDenyPolicy_ON_DENY_AND_ALLOW_TestAuditLoggerBuffer")}, + IsOptional: false, + }, + }, + }, + }, + }, + rbacQueries: []rbacQuery{ + // This RPC should match with the allow policy, and shouldn't + // match with the deny and thus should be allowed to proceed. + // Audit logging matches with nothing. + { + rpcData: &rpcData{ + fullMethod: "", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "0.0.0.0"}, + }, + }, + wantStatusCode: codes.OK, + wantAuditEvents: []*audit.Event{ + { + FullMethodName: "", + PolicyName: "test_policy", + MatchedRule: "certain-source-ip", + Authorized: true, + }, + }, + }, + // This RPC should match with both the allow policy and deny policy + // and thus shouldn't be allowed to proceed as matched with deny. + // Audit logging matches with deny and short circuits. + { + rpcData: &rpcData{ + fullMethod: "localhost-fan-page", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "0.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + wantAuditEvents: []*audit.Event{ + { + FullMethodName: "localhost-fan-page", + PolicyName: "test_policy", + MatchedRule: "localhost-fan", + Authorized: false, + }, + }, + }, + // This RPC shouldn't match with either policy, and thus + // shouldn't be allowed to proceed as didn't match with allow. + // Audit logging matches with the allow policy. + { + rpcData: &rpcData{ + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "10.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + wantAuditEvents: []*audit.Event{ + { + FullMethodName: "", + PolicyName: "test_policy", + MatchedRule: "", + Authorized: false, + }, + }, + }, + // This RPC shouldn't match with allow, match with deny, and + // thus shouldn't be allowed to proceed. + // Audit logging will have the deny logged. + { + rpcData: &rpcData{ + fullMethod: "localhost-fan-page", + peerInfo: &peer.Peer{ + Addr: &addr{ipAddress: "10.0.0.0"}, + }, + }, + wantStatusCode: codes.PermissionDenied, + wantAuditEvents: []*audit.Event{ + { + FullMethodName: "localhost-fan-page", + PolicyName: "test_policy", + MatchedRule: "localhost-fan", + Authorized: false, + }, + }, + }, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + b := TestAuditLoggerBufferBuilder{testName: test.name} + audit.RegisterLoggerBuilder(&b) + b2 := TestAuditLoggerCustomConfigBuilder{testName: test.name} + audit.RegisterLoggerBuilder(&b2) + + // Instantiate the chainedRBACEngine with different configurations that are + // interesting to test and to query. + cre, err := NewChainEngine(test.rbacConfigs, test.policyName) + if err != nil { + t.Fatalf("Error constructing RBAC Engine: %v", err) + } + // Query the created chain of RBAC Engines with different args to see + // if the chain of RBAC Engines configured as such works as intended. + for _, data := range test.rbacQueries { + func() { + // Construct the context with three data points that have enough + // information to represent incoming RPC's. This will be how a + // user uses this API. A user will have to put MD, PeerInfo, and + // the connection the RPC is sent on in the context. + ctx := metadata.NewIncomingContext(context.Background(), data.rpcData.md) + + // Make a TCP connection with a certain destination port. The + // address/port of this connection will be used to populate the + // destination ip/port in RPCData struct. This represents what + // the user of ChainEngine will have to place into context, + // as this is only way to get destination ip and port. + lis, err := net.Listen("tcp", "localhost:0") + if err != nil { + t.Fatalf("Error listening: %v", err) + } + defer lis.Close() + connCh := make(chan net.Conn, 1) + go func() { + conn, err := lis.Accept() + if err != nil { + t.Errorf("Error accepting connection: %v", err) + return + } + connCh <- conn + }() + _, err = net.Dial("tcp", lis.Addr().String()) + if err != nil { + t.Fatalf("Error dialing: %v", err) + } + conn := <-connCh + defer conn.Close() + getConnection = func(context.Context) net.Conn { + return conn + } + ctx = peer.NewContext(ctx, data.rpcData.peerInfo) + stream := &ServerTransportStreamWithMethod{ + method: data.rpcData.fullMethod, + } + + ctx = grpc.NewContextWithServerTransportStream(ctx, stream) + err = cre.IsAuthorized(ctx) + if gotCode := status.Code(err); gotCode != data.wantStatusCode { + t.Fatalf("IsAuthorized(%+v, %+v) returned (%+v), want(%+v)", ctx, data.rpcData.fullMethod, gotCode, data.wantStatusCode) + } + if !reflect.DeepEqual(b.auditEvents, data.wantAuditEvents) { + t.Fatalf("Unexpected audit event for query:%v", data) + } + + // This builder's auditEvents can be shared for several queries, make sure it's empty. + b.auditEvents = nil + }() + } + }) + } +} + +type ServerTransportStreamWithMethod struct { + method string +} + +func (sts *ServerTransportStreamWithMethod) Method() string { + return sts.method +} + +func (sts *ServerTransportStreamWithMethod) SetHeader(md metadata.MD) error { + return nil +} + +func (sts *ServerTransportStreamWithMethod) SendHeader(md metadata.MD) error { + return nil +} + +func (sts *ServerTransportStreamWithMethod) SetTrailer(md metadata.MD) error { + return nil +} + +// An audit logger that will log to the auditEvents slice. +type TestAuditLoggerBuffer struct { + auditEvents *[]*audit.Event +} + +func (logger *TestAuditLoggerBuffer) Log(e *audit.Event) { + *(logger.auditEvents) = append(*(logger.auditEvents), e) +} + +// Builds TestAuditLoggerBuffer. +type TestAuditLoggerBufferBuilder struct { + auditEvents []*audit.Event + testName string +} + +// The required config for TestAuditLoggerBuffer. +type TestAuditLoggerBufferConfig struct { + audit.LoggerConfig +} + +func (b *TestAuditLoggerBufferBuilder) ParseLoggerConfig(configJSON json.RawMessage) (config audit.LoggerConfig, err error) { + return TestAuditLoggerBufferConfig{}, nil +} + +func (b *TestAuditLoggerBufferBuilder) Build(config audit.LoggerConfig) audit.Logger { + return &TestAuditLoggerBuffer{auditEvents: &b.auditEvents} +} + +func (b *TestAuditLoggerBufferBuilder) Name() string { + return b.testName + "_TestAuditLoggerBuffer" +} + +// An audit logger to test using a custom config. +type TestAuditLoggerCustomConfig struct{} + +func (logger *TestAuditLoggerCustomConfig) Log(*audit.Event) {} + +// Build TestAuditLoggerCustomConfig. This builds a TestAuditLoggerCustomConfig +// logger that uses a custom config. +type TestAuditLoggerCustomConfigBuilder struct { + testName string +} + +// The custom config for the TestAuditLoggerCustomConfig logger. +type TestAuditLoggerCustomConfigConfig struct { + audit.LoggerConfig + Abc int + Xyz string +} + +// Parses TestAuditLoggerCustomConfigConfig. Hard-coded to match with it's test +// case above. +func (b TestAuditLoggerCustomConfigBuilder) ParseLoggerConfig(configJSON json.RawMessage) (audit.LoggerConfig, error) { + c := TestAuditLoggerCustomConfigConfig{} + err := json.Unmarshal(configJSON, &c) + if err != nil { + return nil, fmt.Errorf("could not parse custom config: %v", err) + } + return c, nil +} + +func (b *TestAuditLoggerCustomConfigBuilder) Build(config audit.LoggerConfig) audit.Logger { + return &TestAuditLoggerCustomConfig{} +} + +func (b *TestAuditLoggerCustomConfigBuilder) Name() string { + return b.testName + "_TestAuditLoggerCustomConfig" +} + +// Builds custom configs for audit logger RBAC protos. +func createUDPATypedStruct(t *testing.T, in map[string]interface{}, name string) *anypb.Any { + t.Helper() + pb, err := structpb.NewStruct(in) + if err != nil { + t.Fatalf("createUDPATypedStructFailed during structpb.NewStruct: %v", err) + } + typedURL := "" + if name != "" { + typedURL = typeURLPrefix + name + } + typedStruct := &v1xdsudpatypepb.TypedStruct{ + TypeUrl: typedURL, + Value: pb, + } + customConfig, err := anypb.New(typedStruct) + if err != nil { + t.Fatalf("createUDPATypedStructFailed during anypb.New: %v", err) + } + return customConfig +} + +// Builds custom configs for audit logger RBAC protos. +func createXDSTypedStruct(t *testing.T, in map[string]interface{}, name string) *anypb.Any { + t.Helper() + pb, err := structpb.NewStruct(in) + if err != nil { + t.Fatalf("createXDSTypedStructFailed during structpb.NewStruct: %v", err) + } + typedStruct := &v3xdsxdstypepb.TypedStruct{ + TypeUrl: typeURLPrefix + name, + Value: pb, + } + customConfig, err := anypb.New(typedStruct) + if err != nil { + t.Fatalf("createXDSTypedStructFailed during anypb.New: %v", err) + } + return customConfig } diff --git a/xds/internal/httpfilter/rbac/rbac.go b/xds/internal/httpfilter/rbac/rbac.go index 209283c3bf59..277fcfc5927a 100644 --- a/xds/internal/httpfilter/rbac/rbac.go +++ b/xds/internal/httpfilter/rbac/rbac.go @@ -126,7 +126,10 @@ func parseConfig(rbacCfg *rpb.RBAC) (httpfilter.FilterConfig, error) { return config{}, nil } - ce, err := rbac.NewChainEngine([]*v3rbacpb.RBAC{rbacCfg.GetRules()}) + // TODO(gregorycooke) - change the call chain to here so we have the filter + // name to input here instead of an empty string. It will come from here: + // https://github.com/grpc/grpc-go/blob/eff0942e95d93112921414aee758e619ec86f26f/xds/internal/xdsclient/xdsresource/unmarshal_lds.go#L199 + ce, err := rbac.NewChainEngine([]*v3rbacpb.RBAC{rbacCfg.GetRules()}, "") if err != nil { // "At this time, if the RBAC.action is Action.LOG then the policy will be // completely ignored, as if RBAC was not configurated." - A41 From 417d4b6895679bd9378cb37c2afecf6a292eb267 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 17 May 2023 14:57:56 -0700 Subject: [PATCH 47/60] examples: add error_handling example; move errors to error_details (#6293) --- examples/examples_test.sh | 9 ++- .../{errors => error_details}/README.md | 0 .../{errors => error_details}/client/main.go | 0 .../{errors => error_details}/server/main.go | 0 examples/features/error_handling/README.md | 22 ++++++ .../features/error_handling/client/main.go | 70 +++++++++++++++++++ .../features/error_handling/server/main.go | 65 +++++++++++++++++ 7 files changed, 163 insertions(+), 3 deletions(-) rename examples/features/{errors => error_details}/README.md (100%) rename examples/features/{errors => error_details}/client/main.go (100%) rename examples/features/{errors => error_details}/server/main.go (100%) create mode 100644 examples/features/error_handling/README.md create mode 100644 examples/features/error_handling/client/main.go create mode 100644 examples/features/error_handling/server/main.go diff --git a/examples/examples_test.sh b/examples/examples_test.sh index 9ae49d37c5e9..bead4d0dcbe1 100755 --- a/examples/examples_test.sh +++ b/examples/examples_test.sh @@ -57,7 +57,8 @@ EXAMPLES=( "features/compression" "features/deadline" "features/encryption/TLS" - "features/errors" + "features/error_details" + "features/error_handling" "features/interceptor" "features/load_balancing" "features/metadata" @@ -109,7 +110,8 @@ declare -A EXPECTED_SERVER_OUTPUT=( ["features/compression"]="UnaryEcho called with message \"compress\"" ["features/deadline"]="" ["features/encryption/TLS"]="" - ["features/errors"]="" + ["features/error_details"]="" + ["features/error_handling"]="" ["features/interceptor"]="unary echoing message \"hello world\"" ["features/load_balancing"]="serving on :50051" ["features/metadata"]="message:\"this is examples/metadata\", sending echo" @@ -130,7 +132,8 @@ declare -A EXPECTED_CLIENT_OUTPUT=( ["features/compression"]="UnaryEcho call returned \"compress\", " ["features/deadline"]="wanted = DeadlineExceeded, got = DeadlineExceeded" ["features/encryption/TLS"]="UnaryEcho: hello world" - ["features/errors"]="Greeting: Hello world" + ["features/error_details"]="Greeting: Hello world" + ["features/error_handling"]="Received error" ["features/interceptor"]="UnaryEcho: hello world" ["features/load_balancing"]="calling helloworld.Greeter/SayHello with pick_first" ["features/metadata"]="this is examples/metadata" diff --git a/examples/features/errors/README.md b/examples/features/error_details/README.md similarity index 100% rename from examples/features/errors/README.md rename to examples/features/error_details/README.md diff --git a/examples/features/errors/client/main.go b/examples/features/error_details/client/main.go similarity index 100% rename from examples/features/errors/client/main.go rename to examples/features/error_details/client/main.go diff --git a/examples/features/errors/server/main.go b/examples/features/error_details/server/main.go similarity index 100% rename from examples/features/errors/server/main.go rename to examples/features/error_details/server/main.go diff --git a/examples/features/error_handling/README.md b/examples/features/error_handling/README.md new file mode 100644 index 000000000000..c6c4ba2c2e2d --- /dev/null +++ b/examples/features/error_handling/README.md @@ -0,0 +1,22 @@ +# Description + +This example demonstrates basic RPC error handling in gRPC. + +# Run the sample code + +Run the server, which returns an error if the RPC request's `Name` field is +empty. + +```sh +$ go run ./server/main.go +``` + +Then run the client in another terminal, which does two requests: one with an +empty Name field and one with it populated with the current username provided by +os/user. + +```sh +$ go run ./client/main.go +``` + +It should print the status codes it received from the server. diff --git a/examples/features/error_handling/client/main.go b/examples/features/error_handling/client/main.go new file mode 100644 index 000000000000..bd4ec0a1d33f --- /dev/null +++ b/examples/features/error_handling/client/main.go @@ -0,0 +1,70 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Binary client is an example client. +package main + +import ( + "context" + "flag" + "log" + "os/user" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials/insecure" + pb "google.golang.org/grpc/examples/helloworld/helloworld" + "google.golang.org/grpc/status" +) + +var addr = flag.String("addr", "localhost:50052", "the address to connect to") + +func main() { + flag.Parse() + + name := "unknown" + if u, err := user.Current(); err == nil && u.Username != "" { + name = u.Username + } + + // Set up a connection to the server. + conn, err := grpc.Dial(*addr, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + log.Fatalf("Failed to connect: %v", err) + } + defer conn.Close() + c := pb.NewGreeterClient(conn) + + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + + for _, reqName := range []string{"", name} { + log.Printf("Calling SayHello with Name:%q", reqName) + r, err := c.SayHello(ctx, &pb.HelloRequest{Name: reqName}) + if err != nil { + if status.Code(err) != codes.InvalidArgument { + log.Printf("Received unexpected error: %v", err) + continue + } + log.Printf("Received error: %v", err) + continue + } + log.Printf("Received response: %s", r.Message) + } +} diff --git a/examples/features/error_handling/server/main.go b/examples/features/error_handling/server/main.go new file mode 100644 index 000000000000..4471c560add9 --- /dev/null +++ b/examples/features/error_handling/server/main.go @@ -0,0 +1,65 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Binary server is an example server. +package main + +import ( + "context" + "flag" + "fmt" + "log" + "net" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + pb "google.golang.org/grpc/examples/helloworld/helloworld" +) + +var port = flag.Int("port", 50052, "port number") + +// server is used to implement helloworld.GreeterServer. +type server struct { + pb.UnimplementedGreeterServer +} + +// SayHello implements helloworld.GreeterServer. +func (s *server) SayHello(ctx context.Context, in *pb.HelloRequest) (*pb.HelloReply, error) { + if in.Name == "" { + return nil, status.Errorf(codes.InvalidArgument, "request missing required field: Name") + } + return &pb.HelloReply{Message: "Hello " + in.Name}, nil +} + +func main() { + flag.Parse() + + address := fmt.Sprintf(":%v", *port) + lis, err := net.Listen("tcp", address) + if err != nil { + log.Fatalf("failed to listen: %v", err) + } + + s := grpc.NewServer() + pb.RegisterGreeterServer(s, &server{}) + if err := s.Serve(lis); err != nil { + log.Fatalf("failed to serve: %v", err) + } +} From 098b2d00c5bcbc5c696dcab9a2a7f7b442f8142e Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Thu, 18 May 2023 14:28:53 -0400 Subject: [PATCH 48/60] xds/internal/balancer/outlierdetection: Switch Outlier Detection to use new duration field (#6286) --- .../balancer/cdsbalancer/cdsbalancer.go | 6 +- .../balancer/cdsbalancer/cdsbalancer_test.go | 24 ++--- .../balancer/outlierdetection/balancer.go | 18 ++-- .../outlierdetection/balancer_test.go | 93 +++++++++---------- .../balancer/outlierdetection/config.go | 12 +-- .../e2e_test/outlierdetection_test.go | 24 ++--- 6 files changed, 85 insertions(+), 92 deletions(-) diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index 91d4a6aa8661..c9a1611c169b 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -308,9 +308,9 @@ func outlierDetectionToConfig(od *xdsresource.OutlierDetection) outlierdetection } return outlierdetection.LBConfig{ - Interval: od.Interval, - BaseEjectionTime: od.BaseEjectionTime, - MaxEjectionTime: od.MaxEjectionTime, + Interval: internalserviceconfig.Duration(od.Interval), + BaseEjectionTime: internalserviceconfig.Duration(od.BaseEjectionTime), + MaxEjectionTime: internalserviceconfig.Duration(od.MaxEjectionTime), MaxEjectionPercent: od.MaxEjectionPercent, SuccessRateEjection: sre, FailurePercentageEjection: fpe, diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go index d69465a96274..35923bc8624a 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go @@ -444,9 +444,9 @@ func (s) TestHandleClusterUpdate(t *testing.T) { LBPolicy: wrrLocalityLBConfigJSON, }, wantCCS: edsCCS(serviceName, nil, false, wrrLocalityLBConfig, outlierdetection.LBConfig{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, + Interval: internalserviceconfig.Duration(10 * time.Second), + BaseEjectionTime: internalserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: internalserviceconfig.Duration(300 * time.Second), MaxEjectionPercent: 10, SuccessRateEjection: &outlierdetection.SuccessRateEjection{ StdevFactor: 1900, @@ -918,9 +918,9 @@ func (s) TestOutlierDetectionToConfig(t *testing.T) { FailurePercentageRequestVolume: 50, }, odLBCfgWant: outlierdetection.LBConfig{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, + Interval: internalserviceconfig.Duration(10 * time.Second), + BaseEjectionTime: internalserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: internalserviceconfig.Duration(300 * time.Second), MaxEjectionPercent: 10, SuccessRateEjection: nil, FailurePercentageEjection: &outlierdetection.FailurePercentageEjection{ @@ -951,9 +951,9 @@ func (s) TestOutlierDetectionToConfig(t *testing.T) { FailurePercentageRequestVolume: 50, }, odLBCfgWant: outlierdetection.LBConfig{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, + Interval: internalserviceconfig.Duration(10 * time.Second), + BaseEjectionTime: internalserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: internalserviceconfig.Duration(300 * time.Second), MaxEjectionPercent: 10, SuccessRateEjection: &outlierdetection.SuccessRateEjection{ StdevFactor: 1900, @@ -981,9 +981,9 @@ func (s) TestOutlierDetectionToConfig(t *testing.T) { FailurePercentageRequestVolume: 50, }, odLBCfgWant: outlierdetection.LBConfig{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, + Interval: internalserviceconfig.Duration(10 * time.Second), + BaseEjectionTime: internalserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: internalserviceconfig.Duration(300 * time.Second), MaxEjectionPercent: 10, SuccessRateEjection: &outlierdetection.SuccessRateEjection{ StdevFactor: 1900, diff --git a/xds/internal/balancer/outlierdetection/balancer.go b/xds/internal/balancer/outlierdetection/balancer.go index 1b35f518b48b..548514f6d05d 100644 --- a/xds/internal/balancer/outlierdetection/balancer.go +++ b/xds/internal/balancer/outlierdetection/balancer.go @@ -225,9 +225,9 @@ func (b *outlierDetectionBalancer) onIntervalConfig() { for _, addrInfo := range b.addrs { addrInfo.callCounter.clear() } - interval = b.cfg.Interval + interval = time.Duration(b.cfg.Interval) } else { - interval = b.cfg.Interval - now().Sub(b.timerStartTime) + interval = time.Duration(b.cfg.Interval) - now().Sub(b.timerStartTime) if interval < 0 { interval = 0 } @@ -589,14 +589,14 @@ func (b *outlierDetectionBalancer) Target() string { return b.cc.Target() } -func max(x, y int64) int64 { +func max(x, y time.Duration) time.Duration { if x < y { return y } return x } -func min(x, y int64) int64 { +func min(x, y time.Duration) time.Duration { if x < y { return x } @@ -754,10 +754,10 @@ func (b *outlierDetectionBalancer) intervalTimerAlgorithm() { // to uneject the address below. continue } - et := b.cfg.BaseEjectionTime.Nanoseconds() * addrInfo.ejectionTimeMultiplier - met := max(b.cfg.BaseEjectionTime.Nanoseconds(), b.cfg.MaxEjectionTime.Nanoseconds()) - curTimeAfterEt := now().After(addrInfo.latestEjectionTimestamp.Add(time.Duration(min(et, met)))) - if curTimeAfterEt { + et := time.Duration(b.cfg.BaseEjectionTime) * time.Duration(addrInfo.ejectionTimeMultiplier) + met := max(time.Duration(b.cfg.BaseEjectionTime), time.Duration(b.cfg.MaxEjectionTime)) + uet := addrInfo.latestEjectionTimestamp.Add(min(et, met)) + if now().After(uet) { b.unejectAddress(addrInfo) } } @@ -767,7 +767,7 @@ func (b *outlierDetectionBalancer) intervalTimerAlgorithm() { if b.intervalTimer != nil { b.intervalTimer.Stop() } - b.intervalTimer = afterFunc(b.cfg.Interval, b.intervalTimerAlgorithm) + b.intervalTimer = afterFunc(time.Duration(b.cfg.Interval), b.intervalTimerAlgorithm) } // addrsWithAtLeastRequestVolume returns a slice of address information of all diff --git a/xds/internal/balancer/outlierdetection/balancer_test.go b/xds/internal/balancer/outlierdetection/balancer_test.go index 41447164c013..4f542d61e572 100644 --- a/xds/internal/balancer/outlierdetection/balancer_test.go +++ b/xds/internal/balancer/outlierdetection/balancer_test.go @@ -37,7 +37,7 @@ import ( "google.golang.org/grpc/internal/channelz" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/grpctest" - internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" @@ -78,7 +78,6 @@ func (s) TestParseConfig(t *testing.T) { { name: "noop-lb-config", input: `{ - "interval": 9223372036854775807, "childPolicy": [ { "xds_cluster_impl_experimental": { @@ -88,8 +87,7 @@ func (s) TestParseConfig(t *testing.T) { ] }`, wantCfg: &LBConfig{ - Interval: math.MaxInt64, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "xds_cluster_impl_experimental", Config: &clusterimpl.LBConfig{ Cluster: "test_cluster", @@ -100,9 +98,9 @@ func (s) TestParseConfig(t *testing.T) { { name: "good-lb-config", input: `{ - "interval": 10000000000, - "baseEjectionTime": 30000000000, - "maxEjectionTime": 300000000000, + "interval": "10s", + "baseEjectionTime": "30s", + "maxEjectionTime": "300s", "maxEjectionPercent": 10, "successRateEjection": { "stdevFactor": 1900, @@ -125,9 +123,9 @@ func (s) TestParseConfig(t *testing.T) { ] }`, wantCfg: &LBConfig{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, + Interval: iserviceconfig.Duration(10 * time.Second), + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), MaxEjectionPercent: 10, SuccessRateEjection: &SuccessRateEjection{ StdevFactor: 1900, @@ -141,7 +139,7 @@ func (s) TestParseConfig(t *testing.T) { MinimumHosts: 5, RequestVolume: 50, }, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "xds_cluster_impl_experimental", Config: &clusterimpl.LBConfig{ Cluster: "test_cluster", @@ -151,18 +149,18 @@ func (s) TestParseConfig(t *testing.T) { }, { name: "interval-is-negative", - input: `{"interval": -10}`, - wantErr: "OutlierDetectionLoadBalancingConfig.interval = -10ns; must be >= 0", + input: `{"interval": "-10s"}`, + wantErr: "OutlierDetectionLoadBalancingConfig.interval = -10s; must be >= 0", }, { name: "base-ejection-time-is-negative", - input: `{"baseEjectionTime": -10}`, - wantErr: "OutlierDetectionLoadBalancingConfig.base_ejection_time = -10ns; must be >= 0", + input: `{"baseEjectionTime": "-10s"}`, + wantErr: "OutlierDetectionLoadBalancingConfig.base_ejection_time = -10s; must be >= 0", }, { name: "max-ejection-time-is-negative", - input: `{"maxEjectionTime": -10}`, - wantErr: "OutlierDetectionLoadBalancingConfig.max_ejection_time = -10ns; must be >= 0", + input: `{"maxEjectionTime": "-10s"}`, + wantErr: "OutlierDetectionLoadBalancingConfig.max_ejection_time = -10s; must be >= 0", }, { name: "max-ejection-percent-is-greater-than-100", @@ -199,9 +197,9 @@ func (s) TestParseConfig(t *testing.T) { { name: "child-policy-not-present", input: `{ - "interval": 10000000000, - "baseEjectionTime": 30000000000, - "maxEjectionTime": 300000000000, + "interval": "10s", + "baseEjectionTime": "30s", + "maxEjectionTime": "300s", "maxEjectionPercent": 10, "successRateEjection": { "stdevFactor": 1900, @@ -221,7 +219,6 @@ func (s) TestParseConfig(t *testing.T) { { name: "child-policy-present-but-parse-error", input: `{ - "interval": 9223372036854775807, "childPolicy": [ { "errParseConfigBalancer": { @@ -235,7 +232,6 @@ func (s) TestParseConfig(t *testing.T) { { name: "no-supported-child-policy", input: `{ - "interval": 9223372036854775807, "childPolicy": [ { "doesNotExistBalancer": { @@ -258,7 +254,7 @@ func (s) TestParseConfig(t *testing.T) { ] }`, wantCfg: &LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "xds_cluster_impl_experimental", Config: &clusterimpl.LBConfig{ Cluster: "test_cluster", @@ -362,8 +358,7 @@ func (s) TestChildBasicOperations(t *testing.T) { // it's first update. od.UpdateClientConnState(balancer.ClientConnState{ BalancerConfig: &LBConfig{ - Interval: math.MaxInt64, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name() + "child1", Config: bc, }, @@ -386,7 +381,7 @@ func (s) TestChildBasicOperations(t *testing.T) { od.UpdateClientConnState(balancer.ClientConnState{ BalancerConfig: &LBConfig{ Interval: math.MaxInt64, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name() + "child2", Config: emptyChildConfig{}, }, @@ -475,9 +470,9 @@ func (s) TestUpdateAddresses(t *testing.T) { }, }, BalancerConfig: &LBConfig{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, + Interval: iserviceconfig.Duration(10 * time.Second), + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), MaxEjectionPercent: 10, FailurePercentageEjection: &FailurePercentageEjection{ Threshold: 50, @@ -485,7 +480,7 @@ func (s) TestUpdateAddresses(t *testing.T) { MinimumHosts: 2, RequestVolume: 3, }, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name(), Config: emptyChildConfig{}, }, @@ -651,14 +646,14 @@ func (s) TestDurationOfInterval(t *testing.T) { od.UpdateClientConnState(balancer.ClientConnState{ BalancerConfig: &LBConfig{ - Interval: 8 * time.Second, + Interval: iserviceconfig.Duration(8 * time.Second), SuccessRateEjection: &SuccessRateEjection{ StdevFactor: 1900, EnforcementPercentage: 100, MinimumHosts: 5, RequestVolume: 100, }, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name(), Config: emptyChildConfig{}, }, @@ -691,14 +686,14 @@ func (s) TestDurationOfInterval(t *testing.T) { // interval timer of ~4 seconds. od.UpdateClientConnState(balancer.ClientConnState{ BalancerConfig: &LBConfig{ - Interval: 9 * time.Second, + Interval: iserviceconfig.Duration(9 * time.Second), SuccessRateEjection: &SuccessRateEjection{ StdevFactor: 1900, EnforcementPercentage: 100, MinimumHosts: 5, RequestVolume: 100, }, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name(), Config: emptyChildConfig{}, }, @@ -718,8 +713,8 @@ func (s) TestDurationOfInterval(t *testing.T) { // interval timer at all due to it being a no-op. od.UpdateClientConnState(balancer.ClientConnState{ BalancerConfig: &LBConfig{ - Interval: 10 * time.Second, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name(), Config: emptyChildConfig{}, }, @@ -793,8 +788,8 @@ func (s) TestEjectUnejectSuccessRate(t *testing.T) { }, BalancerConfig: &LBConfig{ Interval: math.MaxInt64, // so the interval will never run unless called manually in test. - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), MaxEjectionPercent: 10, FailurePercentageEjection: &FailurePercentageEjection{ Threshold: 50, @@ -802,7 +797,7 @@ func (s) TestEjectUnejectSuccessRate(t *testing.T) { MinimumHosts: 3, RequestVolume: 3, }, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name(), Config: emptyChildConfig{}, }, @@ -997,8 +992,8 @@ func (s) TestEjectFailureRate(t *testing.T) { }, BalancerConfig: &LBConfig{ Interval: math.MaxInt64, // so the interval will never run unless called manually in test. - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), MaxEjectionPercent: 10, SuccessRateEjection: &SuccessRateEjection{ StdevFactor: 500, @@ -1006,7 +1001,7 @@ func (s) TestEjectFailureRate(t *testing.T) { MinimumHosts: 3, RequestVolume: 3, }, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name(), Config: emptyChildConfig{}, }, @@ -1103,10 +1098,10 @@ func (s) TestEjectFailureRate(t *testing.T) { }, BalancerConfig: &LBConfig{ Interval: math.MaxInt64, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), MaxEjectionPercent: 10, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name(), Config: emptyChildConfig{}, }, @@ -1173,8 +1168,8 @@ func (s) TestConcurrentOperations(t *testing.T) { }, BalancerConfig: &LBConfig{ Interval: math.MaxInt64, // so the interval will never run unless called manually in test. - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), MaxEjectionPercent: 10, SuccessRateEjection: &SuccessRateEjection{ // Have both Success Rate and Failure Percentage to step through all the interval timer code StdevFactor: 500, @@ -1188,7 +1183,7 @@ func (s) TestConcurrentOperations(t *testing.T) { MinimumHosts: 3, RequestVolume: 3, }, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name(), Config: emptyChildConfig{}, }, @@ -1311,7 +1306,7 @@ func (s) TestConcurrentOperations(t *testing.T) { }, BalancerConfig: &LBConfig{ Interval: math.MaxInt64, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: t.Name(), Config: emptyChildConfig{}, }, diff --git a/xds/internal/balancer/outlierdetection/config.go b/xds/internal/balancer/outlierdetection/config.go index c931674ae409..9c4383cf6ece 100644 --- a/xds/internal/balancer/outlierdetection/config.go +++ b/xds/internal/balancer/outlierdetection/config.go @@ -18,9 +18,7 @@ package outlierdetection import ( - "time" - - internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/serviceconfig" ) @@ -128,15 +126,15 @@ type LBConfig struct { // Interval is the time interval between ejection analysis sweeps. This can // result in both new ejections as well as addresses being returned to // service. Defaults to 10s. - Interval time.Duration `json:"interval,omitempty"` + Interval iserviceconfig.Duration `json:"interval,omitempty"` // BaseEjectionTime is the base time that a host is ejected for. The real // time is equal to the base time multiplied by the number of times the host // has been ejected and is capped by MaxEjectionTime. Defaults to 30s. - BaseEjectionTime time.Duration `json:"baseEjectionTime,omitempty"` + BaseEjectionTime iserviceconfig.Duration `json:"baseEjectionTime,omitempty"` // MaxEjectionTime is the maximum time that an address is ejected for. If // not specified, the default value (300s) or the BaseEjectionTime value is // applied, whichever is larger. - MaxEjectionTime time.Duration `json:"maxEjectionTime,omitempty"` + MaxEjectionTime iserviceconfig.Duration `json:"maxEjectionTime,omitempty"` // MaxEjectionPercent is the maximum % of an upstream cluster that can be // ejected due to outlier detection. Defaults to 10% but will eject at least // one host regardless of the value. @@ -148,7 +146,7 @@ type LBConfig struct { // algorithm. If set, failure rate ejections will be performed. FailurePercentageEjection *FailurePercentageEjection `json:"failurePercentageEjection,omitempty"` // ChildPolicy is the config for the child policy. - ChildPolicy *internalserviceconfig.BalancerConfig `json:"childPolicy,omitempty"` + ChildPolicy *iserviceconfig.BalancerConfig `json:"childPolicy,omitempty"` } // EqualIgnoringChildPolicy returns whether the LBConfig is same with the diff --git a/xds/internal/balancer/outlierdetection/e2e_test/outlierdetection_test.go b/xds/internal/balancer/outlierdetection/e2e_test/outlierdetection_test.go index c687dc576663..e08ddc98ea79 100644 --- a/xds/internal/balancer/outlierdetection/e2e_test/outlierdetection_test.go +++ b/xds/internal/balancer/outlierdetection/e2e_test/outlierdetection_test.go @@ -159,9 +159,9 @@ func (s) TestOutlierDetectionAlgorithmsE2E(t *testing.T) { "loadBalancingConfig": [ { "outlier_detection_experimental": { - "interval": 50000000, - "baseEjectionTime": 100000000, - "maxEjectionTime": 300000000000, + "interval": "0.050s", + "baseEjectionTime": "0.100s", + "maxEjectionTime": "300s", "maxEjectionPercent": 33, "successRateEjection": { "stdevFactor": 50, @@ -182,9 +182,9 @@ func (s) TestOutlierDetectionAlgorithmsE2E(t *testing.T) { "loadBalancingConfig": [ { "outlier_detection_experimental": { - "interval": 50000000, - "baseEjectionTime": 100000000, - "maxEjectionTime": 300000000000, + "interval": "0.050s", + "baseEjectionTime": "0.100s", + "maxEjectionTime": "300s", "maxEjectionPercent": 33, "failurePercentageEjection": { "threshold": 50, @@ -277,9 +277,9 @@ func (s) TestNoopConfiguration(t *testing.T) { "loadBalancingConfig": [ { "outlier_detection_experimental": { - "interval": 50000000, - "baseEjectionTime": 100000000, - "maxEjectionTime": 300000000000, + "interval": "0.050s", + "baseEjectionTime": "0.100s", + "maxEjectionTime": "300s", "maxEjectionPercent": 33, "childPolicy": [{"round_robin": {}}] } @@ -325,9 +325,9 @@ func (s) TestNoopConfiguration(t *testing.T) { "loadBalancingConfig": [ { "outlier_detection_experimental": { - "interval": 50000000, - "baseEjectionTime": 100000000, - "maxEjectionTime": 300000000000, + "interval": "0.050s", + "baseEjectionTime": "0.100s", + "maxEjectionTime": "300s", "maxEjectionPercent": 33, "failurePercentageEjection": { "threshold": 50, From 9b7a947cdcb2cf5664eec045362b5222f7ac7dcc Mon Sep 17 00:00:00 2001 From: Easwar Swaminathan Date: Mon, 22 May 2023 12:42:45 -0700 Subject: [PATCH 49/60] grpc: support channel idleness (#6263) --- balancer_conn_wrappers.go | 243 ++++++++-- call.go | 5 + clientconn.go | 328 ++++++++++---- clientconn_test.go | 2 +- dialoptions.go | 22 + idle.go | 287 ++++++++++++ idle_test.go | 360 +++++++++++++++ internal/grpcsync/callback_serializer.go | 56 ++- internal/grpcsync/callback_serializer_test.go | 50 ++- picker_wrapper.go | 26 +- resolver_conn_wrapper.go | 98 ++-- stream.go | 5 + test/clientconn_state_transition_test.go | 7 + test/idleness_test.go | 423 ++++++++++++++++++ 14 files changed, 1735 insertions(+), 177 deletions(-) create mode 100644 idle.go create mode 100644 idle_test.go create mode 100644 test/idleness_test.go diff --git a/balancer_conn_wrappers.go b/balancer_conn_wrappers.go index 1865a3f09c2b..4f9944697dde 100644 --- a/balancer_conn_wrappers.go +++ b/balancer_conn_wrappers.go @@ -32,6 +32,15 @@ import ( "google.golang.org/grpc/resolver" ) +type ccbMode int + +const ( + ccbModeActive = iota + ccbModeIdle + ccbModeClosed + ccbModeExitingIdle +) + // ccBalancerWrapper sits between the ClientConn and the Balancer. // // ccBalancerWrapper implements methods corresponding to the ones on the @@ -46,16 +55,25 @@ import ( // It uses the gracefulswitch.Balancer internally to ensure that balancer // switches happen in a graceful manner. type ccBalancerWrapper struct { - cc *ClientConn + // The following fields are initialized when the wrapper is created and are + // read-only afterwards, and therefore can be accessed without a mutex. + cc *ClientConn + opts balancer.BuildOptions // Outgoing (gRPC --> balancer) calls are guaranteed to execute in a - // mutually exclusive manner as they are scheduled on the - // CallbackSerializer. Fields accessed *only* in serializer callbacks, can - // therefore be accessed without a mutex. - serializer *grpcsync.CallbackSerializer - serializerCancel context.CancelFunc - balancer *gracefulswitch.Balancer - curBalancerName string + // mutually exclusive manner as they are scheduled in the serializer. Fields + // accessed *only* in these serializer callbacks, can therefore be accessed + // without a mutex. + balancer *gracefulswitch.Balancer + curBalancerName string + + // mu guards access to the below fields. Access to the serializer and its + // cancel function needs to be mutex protected because they are overwritten + // when the wrapper exits idle mode. + mu sync.Mutex + serializer *grpcsync.CallbackSerializer // To serialize all outoing calls. + serializerCancel context.CancelFunc // To close the seralizer at close/enterIdle time. + mode ccbMode // Tracks the current mode of the wrapper. } // newCCBalancerWrapper creates a new balancer wrapper. The underlying balancer @@ -64,6 +82,7 @@ func newCCBalancerWrapper(cc *ClientConn, bopts balancer.BuildOptions) *ccBalanc ctx, cancel := context.WithCancel(context.Background()) ccb := &ccBalancerWrapper{ cc: cc, + opts: bopts, serializer: grpcsync.NewCallbackSerializer(ctx), serializerCancel: cancel, } @@ -74,8 +93,12 @@ func newCCBalancerWrapper(cc *ClientConn, bopts balancer.BuildOptions) *ccBalanc // updateClientConnState is invoked by grpc to push a ClientConnState update to // the underlying balancer. func (ccb *ccBalancerWrapper) updateClientConnState(ccs *balancer.ClientConnState) error { + ccb.mu.Lock() errCh := make(chan error, 1) - ccb.serializer.Schedule(func(_ context.Context) { + // Here and everywhere else where Schedule() is called, it is done with the + // lock held. But the lock guards only the scheduling part. The actual + // callback is called asynchronously without the lock being held. + ok := ccb.serializer.Schedule(func(_ context.Context) { // If the addresses specified in the update contain addresses of type // "grpclb" and the selected LB policy is not "grpclb", these addresses // will be filtered out and ccs will be modified with the updated @@ -92,16 +115,19 @@ func (ccb *ccBalancerWrapper) updateClientConnState(ccs *balancer.ClientConnStat } errCh <- ccb.balancer.UpdateClientConnState(*ccs) }) - - // If the balancer wrapper is closed when waiting for this state update to - // be handled, the callback serializer will be closed as well, and we can - // rely on its Done channel to ensure that we don't block here forever. - select { - case err := <-errCh: - return err - case <-ccb.serializer.Done: - return nil + if !ok { + // If we are unable to schedule a function with the serializer, it + // indicates that it has been closed. A serializer is only closed when + // the wrapper is closed or is in idle. + ccb.mu.Unlock() + return fmt.Errorf("grpc: cannot send state update to a closed or idle balancer") } + ccb.mu.Unlock() + + // We get here only if the above call to Schedule succeeds, in which case it + // is guaranteed that the scheduled function will run. Therefore it is safe + // to block on this channel. + return <-errCh } // updateSubConnState is invoked by grpc to push a subConn state update to the @@ -120,21 +146,19 @@ func (ccb *ccBalancerWrapper) updateSubConnState(sc balancer.SubConn, s connecti if sc == nil { return } + ccb.mu.Lock() ccb.serializer.Schedule(func(_ context.Context) { ccb.balancer.UpdateSubConnState(sc, balancer.SubConnState{ConnectivityState: s, ConnectionError: err}) }) -} - -func (ccb *ccBalancerWrapper) exitIdle() { - ccb.serializer.Schedule(func(_ context.Context) { - ccb.balancer.ExitIdle() - }) + ccb.mu.Unlock() } func (ccb *ccBalancerWrapper) resolverError(err error) { + ccb.mu.Lock() ccb.serializer.Schedule(func(_ context.Context) { ccb.balancer.ResolverError(err) }) + ccb.mu.Unlock() } // switchTo is invoked by grpc to instruct the balancer wrapper to switch to the @@ -148,42 +172,149 @@ func (ccb *ccBalancerWrapper) resolverError(err error) { // the ccBalancerWrapper keeps track of the current LB policy name, and skips // the graceful balancer switching process if the name does not change. func (ccb *ccBalancerWrapper) switchTo(name string) { + ccb.mu.Lock() ccb.serializer.Schedule(func(_ context.Context) { // TODO: Other languages use case-sensitive balancer registries. We should // switch as well. See: https://github.com/grpc/grpc-go/issues/5288. if strings.EqualFold(ccb.curBalancerName, name) { return } + ccb.buildLoadBalancingPolicy(name) + }) + ccb.mu.Unlock() +} - // Use the default LB policy, pick_first, if no LB policy with name is - // found in the registry. - builder := balancer.Get(name) - if builder == nil { - channelz.Warningf(logger, ccb.cc.channelzID, "Channel switches to new LB policy %q, since the specified LB policy %q was not registered", PickFirstBalancerName, name) - builder = newPickfirstBuilder() - } else { - channelz.Infof(logger, ccb.cc.channelzID, "Channel switches to new LB policy %q", name) - } +// buildLoadBalancingPolicy performs the following: +// - retrieve a balancer builder for the given name. Use the default LB +// policy, pick_first, if no LB policy with name is found in the registry. +// - instruct the gracefulswitch balancer to switch to the above builder. This +// will actually build the new balancer. +// - update the `curBalancerName` field +// +// Must be called from a serializer callback. +func (ccb *ccBalancerWrapper) buildLoadBalancingPolicy(name string) { + builder := balancer.Get(name) + if builder == nil { + channelz.Warningf(logger, ccb.cc.channelzID, "Channel switches to new LB policy %q, since the specified LB policy %q was not registered", PickFirstBalancerName, name) + builder = newPickfirstBuilder() + } else { + channelz.Infof(logger, ccb.cc.channelzID, "Channel switches to new LB policy %q", name) + } + + if err := ccb.balancer.SwitchTo(builder); err != nil { + channelz.Errorf(logger, ccb.cc.channelzID, "Channel failed to build new LB policy %q: %v", name, err) + return + } + ccb.curBalancerName = builder.Name() +} + +func (ccb *ccBalancerWrapper) close() { + channelz.Info(logger, ccb.cc.channelzID, "ccBalancerWrapper: closing") + ccb.closeBalancer(ccbModeClosed) +} + +// enterIdleMode is invoked by grpc when the channel enters idle mode upon +// expiry of idle_timeout. This call blocks until the balancer is closed. +func (ccb *ccBalancerWrapper) enterIdleMode() { + channelz.Info(logger, ccb.cc.channelzID, "ccBalancerWrapper: entering idle mode") + ccb.closeBalancer(ccbModeIdle) +} + +// closeBalancer is invoked when the channel is being closed or when it enters +// idle mode upon expiry of idle_timeout. +func (ccb *ccBalancerWrapper) closeBalancer(m ccbMode) { + ccb.mu.Lock() + if ccb.mode == ccbModeClosed || ccb.mode == ccbModeIdle { + ccb.mu.Unlock() + return + } + + ccb.mode = m + done := ccb.serializer.Done + b := ccb.balancer + ok := ccb.serializer.Schedule(func(_ context.Context) { + // Close the serializer to ensure that no more calls from gRPC are sent + // to the balancer. + ccb.serializerCancel() + // Empty the current balancer name because we don't have a balancer + // anymore and also so that we act on the next call to switchTo by + // creating a new balancer specified by the new resolver. + ccb.curBalancerName = "" + }) + if !ok { + ccb.mu.Unlock() + return + } + ccb.mu.Unlock() + + // Give enqueued callbacks a chance to finish. + <-done + // Spawn a goroutine to close the balancer (since it may block trying to + // cleanup all allocated resources) and return early. + go b.Close() +} + +// exitIdleMode is invoked by grpc when the channel exits idle mode either +// because of an RPC or because of an invocation of the Connect() API. This +// recreates the balancer that was closed previously when entering idle mode. +// +// If the channel is not in idle mode, we know for a fact that we are here as a +// result of the user calling the Connect() method on the ClientConn. In this +// case, we can simply forward the call to the underlying balancer, instructing +// it to reconnect to the backends. +func (ccb *ccBalancerWrapper) exitIdleMode() { + ccb.mu.Lock() + if ccb.mode == ccbModeClosed { + // Request to exit idle is a no-op when wrapper is already closed. + ccb.mu.Unlock() + return + } - if err := ccb.balancer.SwitchTo(builder); err != nil { - channelz.Errorf(logger, ccb.cc.channelzID, "Channel failed to build new LB policy %q: %v", name, err) + if ccb.mode == ccbModeIdle { + // Recreate the serializer which was closed when we entered idle. + ctx, cancel := context.WithCancel(context.Background()) + ccb.serializer = grpcsync.NewCallbackSerializer(ctx) + ccb.serializerCancel = cancel + } + + // The ClientConn guarantees that mutual exclusion between close() and + // exitIdleMode(), and since we just created a new serializer, we can be + // sure that the below function will be scheduled. + done := make(chan struct{}) + ccb.serializer.Schedule(func(_ context.Context) { + defer close(done) + + ccb.mu.Lock() + defer ccb.mu.Unlock() + + if ccb.mode != ccbModeIdle { + ccb.balancer.ExitIdle() return } - ccb.curBalancerName = builder.Name() + + // Gracefulswitch balancer does not support a switchTo operation after + // being closed. Hence we need to create a new one here. + ccb.balancer = gracefulswitch.NewBalancer(ccb, ccb.opts) + ccb.mode = ccbModeActive + channelz.Info(logger, ccb.cc.channelzID, "ccBalancerWrapper: exiting idle mode") + }) + ccb.mu.Unlock() + + <-done } -func (ccb *ccBalancerWrapper) close() { - // Close the serializer to ensure that no more calls from gRPC are sent to - // the balancer. We don't have to worry about suppressing calls from a - // closed balancer because these are handled by the ClientConn (balancer - // wrapper is only ever closed when the ClientConn is closed). - ccb.serializerCancel() - <-ccb.serializer.Done - ccb.balancer.Close() +func (ccb *ccBalancerWrapper) isIdleOrClosed() bool { + ccb.mu.Lock() + defer ccb.mu.Unlock() + return ccb.mode == ccbModeIdle || ccb.mode == ccbModeClosed } func (ccb *ccBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { + if ccb.isIdleOrClosed() { + return nil, fmt.Errorf("grpc: cannot create SubConn when balancer is closed or idle") + } + if len(addrs) <= 0 { return nil, fmt.Errorf("grpc: cannot create SubConn with empty address list") } @@ -200,6 +331,18 @@ func (ccb *ccBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer } func (ccb *ccBalancerWrapper) RemoveSubConn(sc balancer.SubConn) { + if ccb.isIdleOrClosed() { + // It it safe to ignore this call when the balancer is closed or in idle + // because the ClientConn takes care of closing the connections. + // + // Not returning early from here when the balancer is closed or in idle + // leads to a deadlock though, because of the following sequence of + // calls when holding cc.mu: + // cc.exitIdleMode --> ccb.enterIdleMode --> gsw.Close --> + // ccb.RemoveAddrConn --> cc.removeAddrConn + return + } + acbw, ok := sc.(*acBalancerWrapper) if !ok { return @@ -208,6 +351,10 @@ func (ccb *ccBalancerWrapper) RemoveSubConn(sc balancer.SubConn) { } func (ccb *ccBalancerWrapper) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) { + if ccb.isIdleOrClosed() { + return + } + acbw, ok := sc.(*acBalancerWrapper) if !ok { return @@ -216,6 +363,10 @@ func (ccb *ccBalancerWrapper) UpdateAddresses(sc balancer.SubConn, addrs []resol } func (ccb *ccBalancerWrapper) UpdateState(s balancer.State) { + if ccb.isIdleOrClosed() { + return + } + // Update picker before updating state. Even though the ordering here does // not matter, it can lead to multiple calls of Pick in the common start-up // case where we wait for ready and then perform an RPC. If the picker is @@ -226,6 +377,10 @@ func (ccb *ccBalancerWrapper) UpdateState(s balancer.State) { } func (ccb *ccBalancerWrapper) ResolveNow(o resolver.ResolveNowOptions) { + if ccb.isIdleOrClosed() { + return + } + ccb.cc.resolveNow(o) } diff --git a/call.go b/call.go index 9e20e4d385f9..e6a1dc5d75ed 100644 --- a/call.go +++ b/call.go @@ -27,6 +27,11 @@ import ( // // All errors returned by Invoke are compatible with the status package. func (cc *ClientConn) Invoke(ctx context.Context, method string, args, reply interface{}, opts ...CallOption) error { + if err := cc.idlenessMgr.onCallBegin(); err != nil { + return err + } + defer cc.idlenessMgr.onCallEnd() + // allow interceptor to see all applicable call options, which means those // configured as defaults from dial option as well as per-call options opts = combine(cc.dopts.callOptions, opts) diff --git a/clientconn.go b/clientconn.go index 50d08a49a205..1def61e5a23d 100644 --- a/clientconn.go +++ b/clientconn.go @@ -69,6 +69,9 @@ var ( errConnDrain = errors.New("grpc: the connection is drained") // errConnClosing indicates that the connection is closing. errConnClosing = errors.New("grpc: the connection is closing") + // errConnIdling indicates the the connection is being closed as the channel + // is moving to an idle mode due to inactivity. + errConnIdling = errors.New("grpc: the connection is closing due to channel idleness") // invalidDefaultServiceConfigErrPrefix is used to prefix the json parsing error for the default // service config. invalidDefaultServiceConfigErrPrefix = "grpc: the provided default service config is invalid" @@ -134,17 +137,29 @@ func (dcs *defaultConfigSelector) SelectConfig(rpcInfo iresolver.RPCInfo) (*ires // e.g. to use dns resolver, a "dns:///" prefix should be applied to the target. func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) { cc := &ClientConn{ - target: target, - csMgr: &connectivityStateManager{}, - conns: make(map[*addrConn]struct{}), - dopts: defaultDialOptions(), - blockingpicker: newPickerWrapper(), - czData: new(channelzData), - firstResolveEvent: grpcsync.NewEvent(), - } + target: target, + csMgr: &connectivityStateManager{}, + conns: make(map[*addrConn]struct{}), + dopts: defaultDialOptions(), + czData: new(channelzData), + } + + // We start the channel off in idle mode, but kick it out of idle at the end + // of this method, instead of waiting for the first RPC. Other gRPC + // implementations do wait for the first RPC to kick the channel out of + // idle. But doing so would be a major behavior change for our users who are + // used to seeing the channel active after Dial. + // + // Taking this approach of kicking it out of idle at the end of this method + // allows us to share the code between channel creation and exiting idle + // mode. This will also make it easy for us to switch to starting the + // channel off in idle, if at all we ever get to do that. + cc.idlenessState = ccIdlenessStateIdle + cc.retryThrottler.Store((*retryThrottler)(nil)) cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil}) cc.ctx, cc.cancel = context.WithCancel(context.Background()) + cc.exitIdleCond = sync.NewCond(&cc.mu) disableGlobalOpts := false for _, opt := range opts { @@ -243,67 +258,175 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn * go cc.scWatcher() } - var credsClone credentials.TransportCredentials - if creds := cc.dopts.copts.TransportCredentials; creds != nil { - credsClone = creds.Clone() + // This creates the name resolver, load balancer, blocking picker etc. + if err := cc.exitIdleMode(); err != nil { + return nil, err } - cc.balancerWrapper = newCCBalancerWrapper(cc, balancer.BuildOptions{ - DialCreds: credsClone, - CredsBundle: cc.dopts.copts.CredsBundle, - Dialer: cc.dopts.copts.Dialer, - Authority: cc.authority, - CustomUserAgent: cc.dopts.copts.UserAgent, - ChannelzParentID: cc.channelzID, - Target: cc.parsedTarget, - }) - // Build the resolver. - rWrapper, err := newCCResolverWrapper(cc, ccResolverWrapperOpts{ - target: cc.parsedTarget, - builder: cc.resolverBuilder, - bOpts: resolver.BuildOptions{ - DisableServiceConfig: cc.dopts.disableServiceConfig, - DialCreds: credsClone, - CredsBundle: cc.dopts.copts.CredsBundle, - Dialer: cc.dopts.copts.Dialer, - }, - channelzID: cc.channelzID, - }) - if err != nil { - return nil, fmt.Errorf("failed to build resolver: %v", err) + // Configure idleness support with configured idle timeout or default idle + // timeout duration. Idleness can be explicitly disabled by the user, by + // setting the dial option to 0. + cc.idlenessMgr = newIdlenessManager(cc, cc.dopts.idleTimeout) + + // Return early for non-blocking dials. + if !cc.dopts.block { + return cc, nil } - cc.mu.Lock() - cc.resolverWrapper = rWrapper - cc.mu.Unlock() // A blocking dial blocks until the clientConn is ready. - if cc.dopts.block { - for { + for { + s := cc.GetState() + if s == connectivity.Idle { cc.Connect() - s := cc.GetState() - if s == connectivity.Ready { - break - } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure { - if err = cc.connectionError(); err != nil { - terr, ok := err.(interface { - Temporary() bool - }) - if ok && !terr.Temporary() { - return nil, err - } - } - } - if !cc.WaitForStateChange(ctx, s) { - // ctx got timeout or canceled. - if err = cc.connectionError(); err != nil && cc.dopts.returnLastError { + } + if s == connectivity.Ready { + return cc, nil + } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure { + if err = cc.connectionError(); err != nil { + terr, ok := err.(interface { + Temporary() bool + }) + if ok && !terr.Temporary() { return nil, err } - return nil, ctx.Err() } } + if !cc.WaitForStateChange(ctx, s) { + // ctx got timeout or canceled. + if err = cc.connectionError(); err != nil && cc.dopts.returnLastError { + return nil, err + } + return nil, ctx.Err() + } } +} - return cc, nil +// addTraceEvent is a helper method to add a trace event on the channel. If the +// channel is a nested one, the same event is also added on the parent channel. +func (cc *ClientConn) addTraceEvent(msg string) { + ted := &channelz.TraceEventDesc{ + Desc: fmt.Sprintf("Channel %s", msg), + Severity: channelz.CtInfo, + } + if cc.dopts.channelzParentID != nil { + ted.Parent = &channelz.TraceEventDesc{ + Desc: fmt.Sprintf("Nested channel(id:%d) %s", cc.channelzID.Int(), msg), + Severity: channelz.CtInfo, + } + } + channelz.AddTraceEvent(logger, cc.channelzID, 0, ted) +} + +// exitIdleMode moves the channel out of idle mode by recreating the name +// resolver and load balancer. +func (cc *ClientConn) exitIdleMode() error { + cc.mu.Lock() + if cc.conns == nil { + cc.mu.Unlock() + return errConnClosing + } + if cc.idlenessState != ccIdlenessStateIdle { + logger.Error("ClientConn asked to exit idle mode when not in idle mode") + return nil + } + + defer func() { + // When Close() and exitIdleMode() race against each other, one of the + // following two can happen: + // - Close() wins the race and runs first. exitIdleMode() runs after, and + // sees that the ClientConn is already closed and hence returns early. + // - exitIdleMode() wins the race and runs first and recreates the balancer + // and releases the lock before recreating the resolver. If Close() runs + // in this window, it will wait for exitIdleMode to complete. + // + // We achieve this synchronization using the below condition variable. + cc.mu.Lock() + cc.idlenessState = ccIdlenessStateActive + cc.exitIdleCond.Signal() + cc.mu.Unlock() + }() + + cc.idlenessState = ccIdlenessStateExitingIdle + exitedIdle := false + if cc.blockingpicker == nil { + cc.blockingpicker = newPickerWrapper() + } else { + cc.blockingpicker.exitIdleMode() + exitedIdle = true + } + + var credsClone credentials.TransportCredentials + if creds := cc.dopts.copts.TransportCredentials; creds != nil { + credsClone = creds.Clone() + } + if cc.balancerWrapper == nil { + cc.balancerWrapper = newCCBalancerWrapper(cc, balancer.BuildOptions{ + DialCreds: credsClone, + CredsBundle: cc.dopts.copts.CredsBundle, + Dialer: cc.dopts.copts.Dialer, + Authority: cc.authority, + CustomUserAgent: cc.dopts.copts.UserAgent, + ChannelzParentID: cc.channelzID, + Target: cc.parsedTarget, + }) + } else { + cc.balancerWrapper.exitIdleMode() + } + cc.firstResolveEvent = grpcsync.NewEvent() + cc.mu.Unlock() + + // This needs to be called without cc.mu because this builds a new resolver + // which might update state or report error inline which needs to be handled + // by cc.updateResolverState() which also grabs cc.mu. + if err := cc.initResolverWrapper(credsClone); err != nil { + return err + } + + if exitedIdle { + cc.addTraceEvent("exiting idle mode") + } + return nil +} + +// enterIdleMode puts the channel in idle mode, and as part of it shuts down the +// name resolver, load balancer and any subchannels. +func (cc *ClientConn) enterIdleMode() error { + cc.mu.Lock() + if cc.conns == nil { + cc.mu.Unlock() + return ErrClientConnClosing + } + if cc.idlenessState != ccIdlenessStateActive { + logger.Error("ClientConn asked to enter idle mode when not active") + return nil + } + + // cc.conns == nil is a proxy for the ClientConn being closed. So, instead + // of setting it to nil here, we recreate the map. This also means that we + // don't have to do this when exiting idle mode. + conns := cc.conns + cc.conns = make(map[*addrConn]struct{}) + + // TODO: Currently, we close the resolver wrapper upon entering idle mode + // and create a new one upon exiting idle mode. This means that the + // `cc.resolverWrapper` field would be overwritten everytime we exit idle + // mode. While this means that we need to hold `cc.mu` when accessing + // `cc.resolverWrapper`, it makes the code simpler in the wrapper. We should + // try to do the same for the balancer and picker wrappers too. + cc.resolverWrapper.close() + cc.blockingpicker.enterIdleMode() + cc.balancerWrapper.enterIdleMode() + cc.csMgr.updateState(connectivity.Idle) + cc.idlenessState = ccIdlenessStateIdle + cc.mu.Unlock() + + go func() { + cc.addTraceEvent("entering idle mode") + for ac := range conns { + ac.tearDown(errConnIdling) + } + }() + return nil } // validateTransportCredentials performs a series of checks on the configured @@ -350,17 +473,7 @@ func (cc *ClientConn) validateTransportCredentials() error { // Doesn't grab cc.mu as this method is expected to be called only at Dial time. func (cc *ClientConn) channelzRegistration(target string) { cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target) - ted := &channelz.TraceEventDesc{ - Desc: "Channel created", - Severity: channelz.CtInfo, - } - if cc.dopts.channelzParentID != nil { - ted.Parent = &channelz.TraceEventDesc{ - Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID.Int()), - Severity: channelz.CtInfo, - } - } - channelz.AddTraceEvent(logger, cc.channelzID, 1, ted) + cc.addTraceEvent("created") cc.csMgr.channelzID = cc.channelzID } @@ -509,6 +622,7 @@ type ClientConn struct { channelzID *channelz.Identifier // Channelz identifier for the channel. resolverBuilder resolver.Builder // See parseTargetAndFindResolver(). balancerWrapper *ccBalancerWrapper // Uses gracefulswitch.balancer underneath. + idlenessMgr idlenessManager // The following provide their own synchronization, and therefore don't // require cc.mu to be held to access them. @@ -529,11 +643,31 @@ type ClientConn struct { sc *ServiceConfig // Latest service config received from the resolver. conns map[*addrConn]struct{} // Set to nil on close. mkp keepalive.ClientParameters // May be updated upon receipt of a GoAway. + idlenessState ccIdlenessState // Tracks idleness state of the channel. + exitIdleCond *sync.Cond // Signalled when channel exits idle. lceMu sync.Mutex // protects lastConnectionError lastConnectionError error } +// ccIdlenessState tracks the idleness state of the channel. +// +// Channels start off in `active` and move to `idle` after a period of +// inactivity. When moving back to `active` upon an incoming RPC, they +// transition through `exiting_idle`. This state is useful for synchronization +// with Close(). +// +// This state tracking is mostly for self-protection. The idlenessManager is +// expected to keep track of the state as well, and is expected not to call into +// the ClientConn unnecessarily. +type ccIdlenessState int8 + +const ( + ccIdlenessStateActive ccIdlenessState = iota + ccIdlenessStateIdle + ccIdlenessStateExitingIdle +) + // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or // ctx expires. A true value is returned in former case and false in latter. // @@ -573,7 +707,7 @@ func (cc *ClientConn) GetState() connectivity.State { // Notice: This API is EXPERIMENTAL and may be changed or removed in a later // release. func (cc *ClientConn) Connect() { - cc.balancerWrapper.exitIdle() + cc.balancerWrapper.exitIdleMode() } func (cc *ClientConn) scWatcher() { @@ -1061,39 +1195,40 @@ func (cc *ClientConn) Close() error { cc.mu.Unlock() return ErrClientConnClosing } + + for cc.idlenessState == ccIdlenessStateExitingIdle { + cc.exitIdleCond.Wait() + } + conns := cc.conns cc.conns = nil cc.csMgr.updateState(connectivity.Shutdown) + pWrapper := cc.blockingpicker rWrapper := cc.resolverWrapper - cc.resolverWrapper = nil bWrapper := cc.balancerWrapper + idlenessMgr := cc.idlenessMgr cc.mu.Unlock() // The order of closing matters here since the balancer wrapper assumes the // picker is closed before it is closed. - cc.blockingpicker.close() + if pWrapper != nil { + pWrapper.close() + } if bWrapper != nil { bWrapper.close() } if rWrapper != nil { rWrapper.close() } + if idlenessMgr != nil { + idlenessMgr.close() + } for ac := range conns { ac.tearDown(ErrClientConnClosing) } - ted := &channelz.TraceEventDesc{ - Desc: "Channel deleted", - Severity: channelz.CtInfo, - } - if cc.dopts.channelzParentID != nil { - ted.Parent = &channelz.TraceEventDesc{ - Desc: fmt.Sprintf("Nested channel(id:%d) deleted", cc.channelzID.Int()), - Severity: channelz.CtInfo, - } - } - channelz.AddTraceEvent(logger, cc.channelzID, 0, ted) + cc.addTraceEvent("deleted") // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add // trace reference to the entity being deleted, and thus prevent it from being // deleted right away. @@ -1735,3 +1870,32 @@ func (cc *ClientConn) determineAuthority() error { channelz.Infof(logger, cc.channelzID, "Channel authority set to %q", cc.authority) return nil } + +// initResolverWrapper creates a ccResolverWrapper, which builds the name +// resolver. This method grabs the lock to assign the newly built resolver +// wrapper to the cc.resolverWrapper field. +func (cc *ClientConn) initResolverWrapper(creds credentials.TransportCredentials) error { + rw, err := newCCResolverWrapper(cc, ccResolverWrapperOpts{ + target: cc.parsedTarget, + builder: cc.resolverBuilder, + bOpts: resolver.BuildOptions{ + DisableServiceConfig: cc.dopts.disableServiceConfig, + DialCreds: creds, + CredsBundle: cc.dopts.copts.CredsBundle, + Dialer: cc.dopts.copts.Dialer, + }, + channelzID: cc.channelzID, + }) + if err != nil { + return fmt.Errorf("failed to build resolver: %v", err) + } + // Resolver implementations may report state update or error inline when + // built (or right after), and this is handled in cc.updateResolverState. + // Also, an error from the resolver might lead to a re-resolution request + // from the balancer, which is handled in resolveNow() where + // `cc.resolverWrapper` is accessed. Hence, we need to hold the lock here. + cc.mu.Lock() + cc.resolverWrapper = rw + cc.mu.Unlock() + return nil +} diff --git a/clientconn_test.go b/clientconn_test.go index 9004f3177fdd..3cd04a743444 100644 --- a/clientconn_test.go +++ b/clientconn_test.go @@ -370,7 +370,7 @@ func (s) TestBackoffWhenNoServerPrefaceReceived(t *testing.T) { }() bc := backoff.Config{ BaseDelay: 200 * time.Millisecond, - Multiplier: 1.1, + Multiplier: 2.0, Jitter: 0, MaxDelay: 120 * time.Second, } diff --git a/dialoptions.go b/dialoptions.go index cdc8263bda65..51c8997d5d18 100644 --- a/dialoptions.go +++ b/dialoptions.go @@ -77,6 +77,7 @@ type dialOptions struct { defaultServiceConfig *ServiceConfig // defaultServiceConfig is parsed from defaultServiceConfigRawJSON. defaultServiceConfigRawJSON *string resolvers []resolver.Builder + idleTimeout time.Duration } // DialOption configures how we set up the connection. @@ -627,6 +628,7 @@ func defaultDialOptions() dialOptions { ReadBufferSize: defaultReadBufSize, UseProxy: true, }, + idleTimeout: 30 * time.Minute, } } @@ -655,3 +657,23 @@ func WithResolvers(rs ...resolver.Builder) DialOption { o.resolvers = append(o.resolvers, rs...) }) } + +// WithIdleTimeout returns a DialOption that configures an idle timeout for the +// channel. If the channel is idle for the configured timeout, i.e there are no +// ongoing RPCs and no new RPCs are initiated, the channel will enter idle mode +// and as a result the name resolver and load balancer will be shut down. The +// channel will exit idle mode when the Connect() method is called or when an +// RPC is initiated. +// +// A default timeout of 30 min will be used if this dial option is not set at +// dial time and idleness can be disabled by passing a timeout of zero. +// +// # Experimental +// +// Notice: This API is EXPERIMENTAL and may be changed or removed in a +// later release. +func WithIdleTimeout(d time.Duration) DialOption { + return newFuncDialOption(func(o *dialOptions) { + o.idleTimeout = d + }) +} diff --git a/idle.go b/idle.go new file mode 100644 index 000000000000..dc3dc72f6b09 --- /dev/null +++ b/idle.go @@ -0,0 +1,287 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package grpc + +import ( + "fmt" + "math" + "sync" + "sync/atomic" + "time" +) + +// For overriding in unit tests. +var timeAfterFunc = func(d time.Duration, f func()) *time.Timer { + return time.AfterFunc(d, f) +} + +// idlenessEnforcer is the functionality provided by grpc.ClientConn to enter +// and exit from idle mode. +type idlenessEnforcer interface { + exitIdleMode() error + enterIdleMode() error +} + +// idlenessManager defines the functionality required to track RPC activity on a +// channel. +type idlenessManager interface { + onCallBegin() error + onCallEnd() + close() +} + +type noopIdlenessManager struct{} + +func (noopIdlenessManager) onCallBegin() error { return nil } +func (noopIdlenessManager) onCallEnd() {} +func (noopIdlenessManager) close() {} + +// idlenessManagerImpl implements the idlenessManager interface. It uses atomic +// operations to synchronize access to shared state and a mutex to guarantee +// mutual exclusion in a critical section. +type idlenessManagerImpl struct { + // State accessed atomically. + lastCallEndTime int64 // Unix timestamp in nanos; time when the most recent RPC completed. + activeCallsCount int32 // Count of active RPCs; -math.MaxInt32 means channel is idle or is trying to get there. + activeSinceLastTimerCheck int32 // Boolean; True if there was an RPC since the last timer callback. + closed int32 // Boolean; True when the manager is closed. + + // Can be accessed without atomics or mutex since these are set at creation + // time and read-only after that. + enforcer idlenessEnforcer // Functionality provided by grpc.ClientConn. + timeout int64 // Idle timeout duration nanos stored as an int64. + + // idleMu is used to guarantee mutual exclusion in two scenarios: + // - Opposing intentions: + // - a: Idle timeout has fired and handleIdleTimeout() is trying to put + // the channel in idle mode because the channel has been inactive. + // - b: At the same time an RPC is made on the channel, and onCallBegin() + // is trying to prevent the channel from going idle. + // - Competing intentions: + // - The channel is in idle mode and there are multiple RPCs starting at + // the same time, all trying to move the channel out of idle. Only one + // of them should succeed in doing so, while the other RPCs should + // piggyback on the first one and be successfully handled. + idleMu sync.RWMutex + actuallyIdle bool + timer *time.Timer +} + +// newIdlenessManager creates a new idleness manager implementation for the +// given idle timeout. +func newIdlenessManager(enforcer idlenessEnforcer, idleTimeout time.Duration) idlenessManager { + if idleTimeout == 0 { + return noopIdlenessManager{} + } + + i := &idlenessManagerImpl{ + enforcer: enforcer, + timeout: int64(idleTimeout), + } + i.timer = timeAfterFunc(idleTimeout, i.handleIdleTimeout) + return i +} + +// resetIdleTimer resets the idle timer to the given duration. This method +// should only be called from the timer callback. +func (i *idlenessManagerImpl) resetIdleTimer(d time.Duration) { + i.idleMu.Lock() + defer i.idleMu.Unlock() + + if i.timer == nil { + // Only close sets timer to nil. We are done. + return + } + + // It is safe to ignore the return value from Reset() because this method is + // only ever called from the timer callback, which means the timer has + // already fired. + i.timer.Reset(d) +} + +// handleIdleTimeout is the timer callback that is invoked upon expiry of the +// configured idle timeout. The channel is considered inactive if there are no +// ongoing calls and no RPC activity since the last time the timer fired. +func (i *idlenessManagerImpl) handleIdleTimeout() { + if i.isClosed() { + return + } + + if atomic.LoadInt32(&i.activeCallsCount) > 0 { + i.resetIdleTimer(time.Duration(i.timeout)) + return + } + + // There has been activity on the channel since we last got here. Reset the + // timer and return. + if atomic.LoadInt32(&i.activeSinceLastTimerCheck) == 1 { + // Set the timer to fire after a duration of idle timeout, calculated + // from the time the most recent RPC completed. + atomic.StoreInt32(&i.activeSinceLastTimerCheck, 0) + i.resetIdleTimer(time.Duration(atomic.LoadInt64(&i.lastCallEndTime) + i.timeout - time.Now().UnixNano())) + return + } + + // This CAS operation is extremely likely to succeed given that there has + // been no activity since the last time we were here. Setting the + // activeCallsCount to -math.MaxInt32 indicates to onCallBegin() that the + // channel is either in idle mode or is trying to get there. + if !atomic.CompareAndSwapInt32(&i.activeCallsCount, 0, -math.MaxInt32) { + // This CAS operation can fail if an RPC started after we checked for + // activity at the top of this method, or one was ongoing from before + // the last time we were here. In both case, reset the timer and return. + i.resetIdleTimer(time.Duration(i.timeout)) + return + } + + // Now that we've set the active calls count to -math.MaxInt32, it's time to + // actually move to idle mode. + if i.tryEnterIdleMode() { + // Successfully entered idle mode. No timer needed until we exit idle. + return + } + + // Failed to enter idle mode due to a concurrent RPC that kept the channel + // active, or because of an error from the channel. Undo the attempt to + // enter idle, and reset the timer to try again later. + atomic.AddInt32(&i.activeCallsCount, math.MaxInt32) + i.resetIdleTimer(time.Duration(i.timeout)) +} + +// tryEnterIdleMode instructs the channel to enter idle mode. But before +// that, it performs a last minute check to ensure that no new RPC has come in, +// making the channel active. +// +// Return value indicates whether or not the channel moved to idle mode. +// +// Holds idleMu which ensures mutual exclusion with exitIdleMode. +func (i *idlenessManagerImpl) tryEnterIdleMode() bool { + i.idleMu.Lock() + defer i.idleMu.Unlock() + + if atomic.LoadInt32(&i.activeCallsCount) != -math.MaxInt32 { + // We raced and lost to a new RPC. Very rare, but stop entering idle. + return false + } + if atomic.LoadInt32(&i.activeSinceLastTimerCheck) == 1 { + // An very short RPC could have come in (and also finished) after we + // checked for calls count and activity in handleIdleTimeout(), but + // before the CAS operation. So, we need to check for activity again. + return false + } + + // No new RPCs have come in since we last set the active calls count value + // -math.MaxInt32 in the timer callback. And since we have the lock, it is + // safe to enter idle mode now. + if err := i.enforcer.enterIdleMode(); err != nil { + logger.Errorf("Failed to enter idle mode: %v", err) + return false + } + + // Successfully entered idle mode. + i.actuallyIdle = true + return true +} + +// onCallBegin is invoked at the start of every RPC. +func (i *idlenessManagerImpl) onCallBegin() error { + if i.isClosed() { + return nil + } + + if atomic.AddInt32(&i.activeCallsCount, 1) > 0 { + // Channel is not idle now. Set the activity bit and allow the call. + atomic.StoreInt32(&i.activeSinceLastTimerCheck, 1) + return nil + } + + // Channel is either in idle mode or is in the process of moving to idle + // mode. Attempt to exit idle mode to allow this RPC. + if err := i.exitIdleMode(); err != nil { + // Undo the increment to calls count, and return an error causing the + // RPC to fail. + atomic.AddInt32(&i.activeCallsCount, -1) + return err + } + + atomic.StoreInt32(&i.activeSinceLastTimerCheck, 1) + return nil +} + +// exitIdleMode instructs the channel to exit idle mode. +// +// Holds idleMu which ensures mutual exclusion with tryEnterIdleMode. +func (i *idlenessManagerImpl) exitIdleMode() error { + i.idleMu.Lock() + defer i.idleMu.Unlock() + + if !i.actuallyIdle { + // This can happen in two scenarios: + // - handleIdleTimeout() set the calls count to -math.MaxInt32 and called + // tryEnterIdleMode(). But before the latter could grab the lock, an RPC + // came in and onCallBegin() noticed that the calls count is negative. + // - Channel is in idle mode, and multiple new RPCs come in at the same + // time, all of them notice a negative calls count in onCallBegin and get + // here. The first one to get the lock would got the channel to exit idle. + // + // Either way, nothing to do here. + return nil + } + + if err := i.enforcer.exitIdleMode(); err != nil { + return fmt.Errorf("channel failed to exit idle mode: %v", err) + } + + // Undo the idle entry process. This also respects any new RPC attempts. + atomic.AddInt32(&i.activeCallsCount, math.MaxInt32) + i.actuallyIdle = false + + // Start a new timer to fire after the configured idle timeout. + i.timer = timeAfterFunc(time.Duration(i.timeout), i.handleIdleTimeout) + return nil +} + +// onCallEnd is invoked at the end of every RPC. +func (i *idlenessManagerImpl) onCallEnd() { + if i.isClosed() { + return + } + + // Record the time at which the most recent call finished. + atomic.StoreInt64(&i.lastCallEndTime, time.Now().UnixNano()) + + // Decrement the active calls count. This count can temporarily go negative + // when the timer callback is in the process of moving the channel to idle + // mode, but one or more RPCs come in and complete before the timer callback + // can get done with the process of moving to idle mode. + atomic.AddInt32(&i.activeCallsCount, -1) +} + +func (i *idlenessManagerImpl) isClosed() bool { + return atomic.LoadInt32(&i.closed) == 1 +} + +func (i *idlenessManagerImpl) close() { + atomic.StoreInt32(&i.closed, 1) + + i.idleMu.Lock() + i.timer.Stop() + i.timer = nil + i.idleMu.Unlock() +} diff --git a/idle_test.go b/idle_test.go new file mode 100644 index 000000000000..a20b4e09947b --- /dev/null +++ b/idle_test.go @@ -0,0 +1,360 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package grpc + +import ( + "context" + "fmt" + "sync" + "sync/atomic" + "testing" + "time" +) + +const ( + defaultTestIdleTimeout = 500 * time.Millisecond // A short idle_timeout for tests. + defaultTestShortTimeout = 10 * time.Millisecond // A small deadline to wait for events expected to not happen. +) + +type testIdlenessEnforcer struct { + exitIdleCh chan struct{} + enterIdleCh chan struct{} +} + +func (ti *testIdlenessEnforcer) exitIdleMode() error { + ti.exitIdleCh <- struct{}{} + return nil + +} + +func (ti *testIdlenessEnforcer) enterIdleMode() error { + ti.enterIdleCh <- struct{}{} + return nil + +} + +func newTestIdlenessEnforcer() *testIdlenessEnforcer { + return &testIdlenessEnforcer{ + exitIdleCh: make(chan struct{}, 1), + enterIdleCh: make(chan struct{}, 1), + } +} + +// overrideNewTimer overrides the new timer creation function by ensuring that a +// message is pushed on the returned channel everytime the timer fires. +func overrideNewTimer(t *testing.T) <-chan struct{} { + t.Helper() + + ch := make(chan struct{}, 1) + origTimeAfterFunc := timeAfterFunc + timeAfterFunc = func(d time.Duration, callback func()) *time.Timer { + return time.AfterFunc(d, func() { + select { + case ch <- struct{}{}: + default: + } + callback() + }) + } + t.Cleanup(func() { timeAfterFunc = origTimeAfterFunc }) + return ch +} + +// TestIdlenessManager_Disabled tests the case where the idleness manager is +// disabled by passing an idle_timeout of 0. Verifies the following things: +// - timer callback does not fire +// - an RPC does not trigger a call to exitIdleMode on the ClientConn +// - more calls to RPC termination (as compared to RPC initiation) does not +// result in an error log +func (s) TestIdlenessManager_Disabled(t *testing.T) { + callbackCh := overrideNewTimer(t) + + // Create an idleness manager that is disabled because of idleTimeout being + // set to `0`. + enforcer := newTestIdlenessEnforcer() + mgr := newIdlenessManager(enforcer, time.Duration(0)) + + // Ensure that the timer callback does not fire within a short deadline. + select { + case <-callbackCh: + t.Fatal("Idle timer callback fired when manager is disabled") + case <-time.After(defaultTestShortTimeout): + } + + // The first invocation of onCallBegin() would lead to a call to + // exitIdleMode() on the enforcer, unless the idleness manager is disabled. + mgr.onCallBegin() + select { + case <-enforcer.exitIdleCh: + t.Fatalf("exitIdleMode() called on enforcer when manager is disabled") + case <-time.After(defaultTestShortTimeout): + } + + // If the number of calls to onCallEnd() exceeds the number of calls to + // onCallBegin(), the idleness manager is expected to throw an error log + // (which will cause our TestLogger to fail the test). But since the manager + // is disabled, this should not happen. + mgr.onCallEnd() + mgr.onCallEnd() + + // The idleness manager is explicitly not closed here. But since the manager + // is disabled, it will not start the run goroutine, and hence we expect the + // leakchecker to not find any leaked goroutines. +} + +// TestIdlenessManager_Enabled_TimerFires tests the case where the idle manager +// is enabled. Ensures that when there are no RPCs, the timer callback is +// invoked and the enterIdleMode() method is invoked on the enforcer. +func (s) TestIdlenessManager_Enabled_TimerFires(t *testing.T) { + callbackCh := overrideNewTimer(t) + + enforcer := newTestIdlenessEnforcer() + mgr := newIdlenessManager(enforcer, time.Duration(defaultTestIdleTimeout)) + defer mgr.close() + + // Ensure that the timer callback fires within a appropriate amount of time. + select { + case <-callbackCh: + case <-time.After(2 * defaultTestIdleTimeout): + t.Fatal("Timeout waiting for idle timer callback to fire") + } + + // Ensure that the channel moves to idle mode eventually. + select { + case <-enforcer.enterIdleCh: + case <-time.After(defaultTestTimeout): + t.Fatal("Timeout waiting for channel to move to idle") + } +} + +// TestIdlenessManager_Enabled_OngoingCall tests the case where the idle manager +// is enabled. Ensures that when there is an ongoing RPC, the channel does not +// enter idle mode. +func (s) TestIdlenessManager_Enabled_OngoingCall(t *testing.T) { + callbackCh := overrideNewTimer(t) + + enforcer := newTestIdlenessEnforcer() + mgr := newIdlenessManager(enforcer, time.Duration(defaultTestIdleTimeout)) + defer mgr.close() + + // Fire up a goroutine that simulates an ongoing RPC that is terminated + // after the timer callback fires for the first time. + timerFired := make(chan struct{}) + go func() { + mgr.onCallBegin() + <-timerFired + mgr.onCallEnd() + }() + + // Ensure that the timer callback fires and unblock the above goroutine. + select { + case <-callbackCh: + close(timerFired) + case <-time.After(2 * defaultTestIdleTimeout): + t.Fatal("Timeout waiting for idle timer callback to fire") + } + + // The invocation of the timer callback should not put the channel in idle + // mode since we had an ongoing RPC. + select { + case <-enforcer.enterIdleCh: + t.Fatalf("enterIdleMode() called on enforcer when active RPC exists") + case <-time.After(defaultTestShortTimeout): + } + + // Since we terminated the ongoing RPC and we have no other active RPCs, the + // channel must move to idle eventually. + select { + case <-enforcer.enterIdleCh: + case <-time.After(defaultTestTimeout): + t.Fatal("Timeout waiting for channel to move to idle") + } +} + +// TestIdlenessManager_Enabled_ActiveSinceLastCheck tests the case where the +// idle manager is enabled. Ensures that when there are active RPCs in the last +// period (even though there is no active call when the timer fires), the +// channel does not enter idle mode. +func (s) TestIdlenessManager_Enabled_ActiveSinceLastCheck(t *testing.T) { + callbackCh := overrideNewTimer(t) + + enforcer := newTestIdlenessEnforcer() + mgr := newIdlenessManager(enforcer, time.Duration(defaultTestIdleTimeout)) + defer mgr.close() + + // Fire up a goroutine that simulates unary RPCs until the timer callback + // fires. + timerFired := make(chan struct{}) + go func() { + for ; ; <-time.After(defaultTestShortTimeout) { + mgr.onCallBegin() + mgr.onCallEnd() + + select { + case <-timerFired: + return + default: + } + } + }() + + // Ensure that the timer callback fires, and that we don't enter idle as + // part of this invocation of the timer callback, since we had some RPCs in + // this period. + select { + case <-callbackCh: + close(timerFired) + case <-time.After(2 * defaultTestIdleTimeout): + t.Fatal("Timeout waiting for idle timer callback to fire") + } + select { + case <-enforcer.enterIdleCh: + t.Fatalf("enterIdleMode() called on enforcer when one RPC completed in the last period") + case <-time.After(defaultTestShortTimeout): + } + + // Since the unrary RPC terminated and we have no other active RPCs, the + // channel must move to idle eventually. + select { + case <-enforcer.enterIdleCh: + case <-time.After(defaultTestTimeout): + t.Fatal("Timeout waiting for channel to move to idle") + } +} + +// TestIdlenessManager_Enabled_ExitIdleOnRPC tests the case where the idle +// manager is enabled. Ensures that the channel moves out of idle when an RPC is +// initiated. +func (s) TestIdlenessManager_Enabled_ExitIdleOnRPC(t *testing.T) { + overrideNewTimer(t) + + enforcer := newTestIdlenessEnforcer() + mgr := newIdlenessManager(enforcer, time.Duration(defaultTestIdleTimeout)) + defer mgr.close() + + // Ensure that the channel moves to idle since there are no RPCs. + select { + case <-enforcer.enterIdleCh: + case <-time.After(2 * defaultTestIdleTimeout): + t.Fatal("Timeout waiting for channel to move to idle mode") + } + + for i := 0; i < 100; i++ { + // A call to onCallBegin and onCallEnd simulates an RPC. + go func() { + if err := mgr.onCallBegin(); err != nil { + t.Errorf("onCallBegin() failed: %v", err) + } + mgr.onCallEnd() + }() + } + + // Ensure that the channel moves out of idle as a result of the above RPC. + select { + case <-enforcer.exitIdleCh: + case <-time.After(2 * defaultTestIdleTimeout): + t.Fatal("Timeout waiting for channel to move out of idle mode") + } + + // Ensure that only one call to exit idle mode is made to the CC. + sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) + defer sCancel() + select { + case <-enforcer.exitIdleCh: + t.Fatal("More than one call to exit idle mode on the ClientConn; only one expected") + case <-sCtx.Done(): + } +} + +type racyIdlenessState int32 + +const ( + stateInital racyIdlenessState = iota + stateEnteredIdle + stateExitedIdle + stateActiveRPCs +) + +// racyIdlnessEnforcer is a test idleness enforcer used specifically to test the +// race between idle timeout and incoming RPCs. +type racyIdlenessEnforcer struct { + state *racyIdlenessState // Accessed atomically. +} + +// exitIdleMode sets the internal state to stateExitedIdle. We should only ever +// exit idle when we are currently in idle. +func (ri *racyIdlenessEnforcer) exitIdleMode() error { + if !atomic.CompareAndSwapInt32((*int32)(ri.state), int32(stateEnteredIdle), int32(stateExitedIdle)) { + return fmt.Errorf("idleness enforcer asked to exit idle when it did not enter idle earlier") + } + return nil +} + +// enterIdleMode attempts to set the internal state to stateEnteredIdle. We should only ever enter idle before RPCs start. +func (ri *racyIdlenessEnforcer) enterIdleMode() error { + if !atomic.CompareAndSwapInt32((*int32)(ri.state), int32(stateInital), int32(stateEnteredIdle)) { + return fmt.Errorf("idleness enforcer asked to enter idle after rpcs started") + } + return nil +} + +// TestIdlenessManager_IdleTimeoutRacesWithOnCallBegin tests the case where +// firing of the idle timeout races with an incoming RPC. The test verifies that +// if the timer callback win the race and puts the channel in idle, the RPCs can +// kick it out of idle. And if the RPCs win the race and keep the channel +// active, then the timer callback should not attempt to put the channel in idle +// mode. +func (s) TestIdlenessManager_IdleTimeoutRacesWithOnCallBegin(t *testing.T) { + // Run multiple iterations to simulate different possibilities. + for i := 0; i < 10; i++ { + t.Run(fmt.Sprintf("iteration=%d", i), func(t *testing.T) { + var idlenessState racyIdlenessState + enforcer := &racyIdlenessEnforcer{state: &idlenessState} + + // Configure a large idle timeout so that we can control the + // race between the timer callback and RPCs. + mgr := newIdlenessManager(enforcer, time.Duration(10*time.Minute)) + defer mgr.close() + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + m := mgr.(interface{ handleIdleTimeout() }) + <-time.After(defaultTestIdleTimeout) + m.handleIdleTimeout() + }() + for j := 0; j < 100; j++ { + wg.Add(1) + go func() { + defer wg.Done() + // Wait for the configured idle timeout and simulate an RPC to + // race with the idle timeout timer callback. + <-time.After(defaultTestIdleTimeout) + if err := mgr.onCallBegin(); err != nil { + t.Errorf("onCallBegin() failed: %v", err) + } + atomic.StoreInt32((*int32)(&idlenessState), int32(stateActiveRPCs)) + mgr.onCallEnd() + }() + } + wg.Wait() + }) + } +} diff --git a/internal/grpcsync/callback_serializer.go b/internal/grpcsync/callback_serializer.go index d91f92463542..37b8d4117e77 100644 --- a/internal/grpcsync/callback_serializer.go +++ b/internal/grpcsync/callback_serializer.go @@ -20,6 +20,7 @@ package grpcsync import ( "context" + "sync" "google.golang.org/grpc/internal/buffer" ) @@ -31,19 +32,21 @@ import ( // // This type is safe for concurrent access. type CallbackSerializer struct { - // Done is closed once the serializer is shut down completely, i.e a - // scheduled callback, if any, that was running when the context passed to - // NewCallbackSerializer is cancelled, has completed and the serializer has - // deallocated all its resources. + // Done is closed once the serializer is shut down completely, i.e all + // scheduled callbacks are executed and the serializer has deallocated all + // its resources. Done chan struct{} callbacks *buffer.Unbounded + closedMu sync.Mutex + closed bool } // NewCallbackSerializer returns a new CallbackSerializer instance. The provided // context will be passed to the scheduled callbacks. Users should cancel the // provided context to shutdown the CallbackSerializer. It is guaranteed that no -// callbacks will be executed once this context is canceled. +// callbacks will be added once this context is canceled, and any pending un-run +// callbacks will be executed before the serializer is shut down. func NewCallbackSerializer(ctx context.Context) *CallbackSerializer { t := &CallbackSerializer{ Done: make(chan struct{}), @@ -57,17 +60,30 @@ func NewCallbackSerializer(ctx context.Context) *CallbackSerializer { // // Callbacks are expected to honor the context when performing any blocking // operations, and should return early when the context is canceled. -func (t *CallbackSerializer) Schedule(f func(ctx context.Context)) { +// +// Return value indicates if the callback was successfully added to the list of +// callbacks to be executed by the serializer. It is not possible to add +// callbacks once the context passed to NewCallbackSerializer is cancelled. +func (t *CallbackSerializer) Schedule(f func(ctx context.Context)) bool { + t.closedMu.Lock() + defer t.closedMu.Unlock() + + if t.closed { + return false + } t.callbacks.Put(f) + return true } func (t *CallbackSerializer) run(ctx context.Context) { + var backlog []func(context.Context) + defer close(t.Done) for ctx.Err() == nil { select { case <-ctx.Done(): - t.callbacks.Close() - return + // Do nothing here. Next iteration of the for loop will not happen, + // since ctx.Err() would be non-nil. case callback, ok := <-t.callbacks.Get(): if !ok { return @@ -76,4 +92,28 @@ func (t *CallbackSerializer) run(ctx context.Context) { callback.(func(ctx context.Context))(ctx) } } + + // Fetch pending callbacks if any, and execute them before returning from + // this method and closing t.Done. + t.closedMu.Lock() + t.closed = true + backlog = t.fetchPendingCallbacks() + t.callbacks.Close() + t.closedMu.Unlock() + for _, b := range backlog { + b(ctx) + } +} + +func (t *CallbackSerializer) fetchPendingCallbacks() []func(context.Context) { + var backlog []func(context.Context) + for { + select { + case b := <-t.callbacks.Get(): + backlog = append(backlog, b.(func(context.Context))) + t.callbacks.Load() + default: + return backlog + } + } } diff --git a/internal/grpcsync/callback_serializer_test.go b/internal/grpcsync/callback_serializer_test.go index 8c465af66aea..cdbd446f8101 100644 --- a/internal/grpcsync/callback_serializer_test.go +++ b/internal/grpcsync/callback_serializer_test.go @@ -20,7 +20,6 @@ package grpcsync import ( "context" - "fmt" "sync" "testing" "time" @@ -141,7 +140,10 @@ func (s) TestCallbackSerializer_Schedule_Concurrent(t *testing.T) { // are not executed once Close() returns. func (s) TestCallbackSerializer_Schedule_Close(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - cs := NewCallbackSerializer(ctx) + defer cancel() + + serializerCtx, serializerCancel := context.WithTimeout(context.Background(), defaultTestTimeout) + cs := NewCallbackSerializer(serializerCtx) // Schedule a callback which blocks until the context passed to it is // canceled. It also closes a channel to signal that it has started. @@ -151,36 +153,54 @@ func (s) TestCallbackSerializer_Schedule_Close(t *testing.T) { <-ctx.Done() }) - // Schedule a bunch of callbacks. These should not be exeuted since the first - // one started earlier is blocked. + // Schedule a bunch of callbacks. These should be exeuted since the are + // scheduled before the serializer is closed. const numCallbacks = 10 - errCh := make(chan error, numCallbacks) + callbackCh := make(chan int, numCallbacks) for i := 0; i < numCallbacks; i++ { - cs.Schedule(func(_ context.Context) { - errCh <- fmt.Errorf("callback %d executed when not expected to", i) - }) + num := i + if !cs.Schedule(func(context.Context) { callbackCh <- num }) { + t.Fatal("Schedule failed to accept a callback when the serializer is yet to be closed") + } } // Ensure that none of the newer callbacks are executed at this point. select { case <-time.After(defaultTestShortTimeout): - case err := <-errCh: - t.Fatal(err) + case <-callbackCh: + t.Fatal("Newer callback executed when older one is still executing") } // Wait for the first callback to start before closing the scheduler. <-firstCallbackStartedCh - // Cancel the context which will unblock the first callback. None of the + // Cancel the context which will unblock the first callback. All of the // other callbacks (which have not started executing at this point) should // be executed after this. - cancel() + serializerCancel() + + // Ensure that the newer callbacks are executed. + for i := 0; i < numCallbacks; i++ { + select { + case <-ctx.Done(): + t.Fatal("Timeout when waiting for callback scheduled before close to be executed") + case num := <-callbackCh: + if num != i { + t.Fatalf("Executing callback %d, want %d", num, i) + } + } + } <-cs.Done - // Ensure that the newer callbacks are not executed. + done := make(chan struct{}) + if cs.Schedule(func(context.Context) { close(done) }) { + t.Fatal("Scheduled a callback after closing the serializer") + } + + // Ensure that the lates callback is executed at this point. select { case <-time.After(defaultTestShortTimeout): - case err := <-errCh: - t.Fatal(err) + case <-done: + t.Fatal("Newer callback executed when scheduled after closing serializer") } } diff --git a/picker_wrapper.go b/picker_wrapper.go index c525dc070fc6..8e24d864986d 100644 --- a/picker_wrapper.go +++ b/picker_wrapper.go @@ -36,6 +36,7 @@ import ( type pickerWrapper struct { mu sync.Mutex done bool + idle bool blockingCh chan struct{} picker balancer.Picker } @@ -47,7 +48,11 @@ func newPickerWrapper() *pickerWrapper { // updatePicker is called by UpdateBalancerState. It unblocks all blocked pick. func (pw *pickerWrapper) updatePicker(p balancer.Picker) { pw.mu.Lock() - if pw.done { + if pw.done || pw.idle { + // There is a small window where a picker update from the LB policy can + // race with the channel going to idle mode. If the picker is idle here, + // it is because the channel asked it to do so, and therefore it is sage + // to ignore the update from the LB policy. pw.mu.Unlock() return } @@ -187,6 +192,25 @@ func (pw *pickerWrapper) close() { close(pw.blockingCh) } +func (pw *pickerWrapper) enterIdleMode() { + pw.mu.Lock() + defer pw.mu.Unlock() + if pw.done { + return + } + pw.idle = true +} + +func (pw *pickerWrapper) exitIdleMode() { + pw.mu.Lock() + defer pw.mu.Unlock() + if pw.done { + return + } + pw.blockingCh = make(chan struct{}) + pw.idle = false +} + // dropError is a wrapper error that indicates the LB policy wishes to drop the // RPC and not retry it. type dropError struct { diff --git a/resolver_conn_wrapper.go b/resolver_conn_wrapper.go index ce12b52ecdc0..b408b3688f2e 100644 --- a/resolver_conn_wrapper.go +++ b/resolver_conn_wrapper.go @@ -21,6 +21,7 @@ package grpc import ( "context" "strings" + "sync" "google.golang.org/grpc/balancer" "google.golang.org/grpc/internal/channelz" @@ -44,15 +45,20 @@ type ccResolverWrapper struct { cc resolverStateUpdater channelzID *channelz.Identifier ignoreServiceConfig bool - - // Outgoing (gRPC --> resolver) and incoming (resolver --> gRPC) calls are - // guaranteed to execute in a mutually exclusive manner as they are - // scheduled on the CallbackSerializer. Fields accessed *only* in serializer - // callbacks, can therefore be accessed without a mutex. - serializer *grpcsync.CallbackSerializer - serializerCancel context.CancelFunc - resolver resolver.Resolver - curState resolver.State + opts ccResolverWrapperOpts + serializer *grpcsync.CallbackSerializer // To serialize all incoming calls. + serializerCancel context.CancelFunc // To close the serializer, accessed only from close(). + + // All incoming (resolver --> gRPC) calls are guaranteed to execute in a + // mutually exclusive manner as they are scheduled on the serializer. + // Fields accessed *only* in these serializer callbacks, can therefore be + // accessed without a mutex. + curState resolver.State + + // mu guards access to the below fields. + mu sync.Mutex + closed bool + resolver resolver.Resolver // Accessed only from outgoing calls. } // ccResolverWrapperOpts wraps the arguments to be passed when creating a new @@ -72,38 +78,81 @@ func newCCResolverWrapper(cc resolverStateUpdater, opts ccResolverWrapperOpts) ( cc: cc, channelzID: opts.channelzID, ignoreServiceConfig: opts.bOpts.DisableServiceConfig, + opts: opts, serializer: grpcsync.NewCallbackSerializer(ctx), serializerCancel: cancel, } + // Cannot hold the lock at build time because the resolver can send an + // update or error inline and these incoming calls grab the lock to schedule + // a callback in the serializer. r, err := opts.builder.Build(opts.target, ccr, opts.bOpts) if err != nil { cancel() return nil, err } + + // Any error reported by the resolver at build time that leads to a + // re-resolution request from the balancer is dropped by grpc until we + // return from this function. So, we don't have to handle pending resolveNow + // requests here. + ccr.mu.Lock() ccr.resolver = r + ccr.mu.Unlock() + return ccr, nil } func (ccr *ccResolverWrapper) resolveNow(o resolver.ResolveNowOptions) { - ccr.serializer.Schedule(func(_ context.Context) { - ccr.resolver.ResolveNow(o) - }) + ccr.mu.Lock() + defer ccr.mu.Unlock() + + // ccr.resolver field is set only after the call to Build() returns. But in + // the process of building, the resolver may send an error update which when + // propagated to the balancer may result in a re-resolution request. + if ccr.closed || ccr.resolver == nil { + return + } + ccr.resolver.ResolveNow(o) } func (ccr *ccResolverWrapper) close() { + ccr.mu.Lock() + if ccr.closed { + ccr.mu.Unlock() + return + } + + channelz.Info(logger, ccr.channelzID, "Closing the name resolver") + // Close the serializer to ensure that no more calls from the resolver are - // handled, before closing the resolver. + // handled, before actually closing the resolver. ccr.serializerCancel() + ccr.closed = true + r := ccr.resolver + ccr.mu.Unlock() + + // Give enqueued callbacks a chance to finish. <-ccr.serializer.Done - ccr.resolver.Close() + + // Spawn a goroutine to close the resolver (since it may block trying to + // cleanup all allocated resources) and return early. + go r.Close() +} + +// serializerScheduleLocked is a convenience method to schedule a function to be +// run on the serializer while holding ccr.mu. +func (ccr *ccResolverWrapper) serializerScheduleLocked(f func(context.Context)) { + ccr.mu.Lock() + ccr.serializer.Schedule(f) + ccr.mu.Unlock() } // UpdateState is called by resolver implementations to report new state to gRPC // which includes addresses and service config. func (ccr *ccResolverWrapper) UpdateState(s resolver.State) error { errCh := make(chan error, 1) - ccr.serializer.Schedule(func(_ context.Context) { + ok := ccr.serializer.Schedule(func(context.Context) { ccr.addChannelzTraceEvent(s) ccr.curState = s if err := ccr.cc.updateResolverState(ccr.curState, nil); err == balancer.ErrBadResolverState { @@ -112,22 +161,19 @@ func (ccr *ccResolverWrapper) UpdateState(s resolver.State) error { } errCh <- nil }) - - // If the resolver wrapper is closed when waiting for this state update to - // be handled, the callback serializer will be closed as well, and we can - // rely on its Done channel to ensure that we don't block here forever. - select { - case err := <-errCh: - return err - case <-ccr.serializer.Done: + if !ok { + // The only time when Schedule() fail to add the callback to the + // serializer is when the serializer is closed, and this happens only + // when the resolver wrapper is closed. return nil } + return <-errCh } // ReportError is called by resolver implementations to report errors // encountered during name resolution to gRPC. func (ccr *ccResolverWrapper) ReportError(err error) { - ccr.serializer.Schedule(func(_ context.Context) { + ccr.serializerScheduleLocked(func(_ context.Context) { channelz.Warningf(logger, ccr.channelzID, "ccResolverWrapper: reporting error to cc: %v", err) ccr.cc.updateResolverState(resolver.State{}, err) }) @@ -136,7 +182,7 @@ func (ccr *ccResolverWrapper) ReportError(err error) { // NewAddress is called by the resolver implementation to send addresses to // gRPC. func (ccr *ccResolverWrapper) NewAddress(addrs []resolver.Address) { - ccr.serializer.Schedule(func(_ context.Context) { + ccr.serializerScheduleLocked(func(_ context.Context) { ccr.addChannelzTraceEvent(resolver.State{Addresses: addrs, ServiceConfig: ccr.curState.ServiceConfig}) ccr.curState.Addresses = addrs ccr.cc.updateResolverState(ccr.curState, nil) @@ -146,7 +192,7 @@ func (ccr *ccResolverWrapper) NewAddress(addrs []resolver.Address) { // NewServiceConfig is called by the resolver implementation to send service // configs to gRPC. func (ccr *ccResolverWrapper) NewServiceConfig(sc string) { - ccr.serializer.Schedule(func(_ context.Context) { + ccr.serializerScheduleLocked(func(_ context.Context) { channelz.Infof(logger, ccr.channelzID, "ccResolverWrapper: got new service config: %s", sc) if ccr.ignoreServiceConfig { channelz.Info(logger, ccr.channelzID, "Service config lookups disabled; ignoring config") diff --git a/stream.go b/stream.go index 06ec22cd0a9d..75ab86268ba1 100644 --- a/stream.go +++ b/stream.go @@ -155,6 +155,11 @@ type ClientStream interface { // If none of the above happen, a goroutine and a context will be leaked, and grpc // will not call the optionally-configured stats handler with a stats.End message. func (cc *ClientConn) NewStream(ctx context.Context, desc *StreamDesc, method string, opts ...CallOption) (ClientStream, error) { + if err := cc.idlenessMgr.onCallBegin(); err != nil { + return nil, err + } + defer cc.idlenessMgr.onCallEnd() + // allow interceptor to see all applicable call options, which means those // configured as defaults from dial option as well as per-call options opts = combine(cc.dopts.callOptions, opts) diff --git a/test/clientconn_state_transition_test.go b/test/clientconn_state_transition_test.go index 57f932d1eb5e..a14ff4588a0f 100644 --- a/test/clientconn_state_transition_test.go +++ b/test/clientconn_state_transition_test.go @@ -537,3 +537,10 @@ func awaitNotState(ctx context.Context, t *testing.T, cc *grpc.ClientConn, state } } } + +func awaitNoStateChange(ctx context.Context, t *testing.T, cc *grpc.ClientConn, currState connectivity.State) { + t.Helper() + if cc.WaitForStateChange(ctx, currState) { + t.Fatalf("State changed from %q to %q when no state change was expected", currState, cc.GetState()) + } +} diff --git a/test/idleness_test.go b/test/idleness_test.go new file mode 100644 index 000000000000..88366ed3ae12 --- /dev/null +++ b/test/idleness_test.go @@ -0,0 +1,423 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package test + +import ( + "context" + "errors" + "fmt" + "strings" + "testing" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/internal/channelz" + "google.golang.org/grpc/internal/stubserver" + "google.golang.org/grpc/resolver" + "google.golang.org/grpc/resolver/manual" + "google.golang.org/grpc/status" + + testgrpc "google.golang.org/grpc/interop/grpc_testing" + testpb "google.golang.org/grpc/interop/grpc_testing" +) + +const defaultTestShortIdleTimeout = 500 * time.Millisecond + +// channelzTraceEventFound looks up the top-channels in channelz (expects a +// single one), and checks if there is a trace event on the channel matching the +// provided description string. +func channelzTraceEventFound(ctx context.Context, wantDesc string) error { + for ctx.Err() == nil { + tcs, _ := channelz.GetTopChannels(0, 0) + if l := len(tcs); l != 1 { + return fmt.Errorf("when looking for channelz trace event with description %q, found %d top-level channels, want 1", wantDesc, l) + } + if tcs[0].Trace == nil { + return fmt.Errorf("when looking for channelz trace event with description %q, no trace events found for top-level channel", wantDesc) + } + + for _, e := range tcs[0].Trace.Events { + if strings.Contains(e.Desc, wantDesc) { + return nil + } + } + } + return fmt.Errorf("when looking for channelz trace event with description %q, %w", wantDesc, ctx.Err()) +} + +// channelzTraceEventNotFound looks up the top-channels in channelz (expects a +// single one), and verifies that there is no trace event on the channel +// matching the provided description string. +func channelzTraceEventNotFound(ctx context.Context, wantDesc string) error { + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + + err := channelzTraceEventFound(sCtx, wantDesc) + if err == nil { + return fmt.Errorf("found channelz trace event with description %q, when expected not to", wantDesc) + } + if !errors.Is(err, context.DeadlineExceeded) { + return err + } + return nil +} + +// Tests the case where channel idleness is disabled by passing an idle_timeout +// of 0. Verifies that a READY channel with no RPCs does not move to IDLE. +func (s) TestChannelIdleness_Disabled_NoActivity(t *testing.T) { + // Setup channelz for testing. + czCleanup := channelz.NewChannelzStorageForTesting() + t.Cleanup(func() { czCleanupWrapper(czCleanup, t) }) + + // Create a ClientConn with idle_timeout set to 0. + r := manual.NewBuilderWithScheme("whatever") + dopts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithResolvers(r), + grpc.WithIdleTimeout(0), // Disable idleness. + grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"round_robin":{}}]}`), + } + cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...) + if err != nil { + t.Fatalf("grpc.Dial() failed: %v", err) + } + t.Cleanup(func() { cc.Close() }) + + // Start a test backend and push an address update via the resolver. + backend := stubserver.StartTestService(t, nil) + t.Cleanup(backend.Stop) + r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backend.Address}}}) + + // Veirfy that the ClientConn moves to READY. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + awaitState(ctx, t, cc, connectivity.Ready) + + // Veirfy that the ClientConn stay in READY. + sCtx, sCancel := context.WithTimeout(ctx, 3*defaultTestShortIdleTimeout) + defer sCancel() + awaitNoStateChange(sCtx, t, cc, connectivity.Ready) + + // Verify that there are no idleness related channelz events. + if err := channelzTraceEventNotFound(ctx, "entering idle mode"); err != nil { + t.Fatal(err) + } + if err := channelzTraceEventNotFound(ctx, "exiting idle mode"); err != nil { + t.Fatal(err) + } +} + +// Tests the case where channel idleness is enabled by passing a small value for +// idle_timeout. Verifies that a READY channel with no RPCs moves to IDLE. +func (s) TestChannelIdleness_Enabled_NoActivity(t *testing.T) { + // Setup channelz for testing. + czCleanup := channelz.NewChannelzStorageForTesting() + t.Cleanup(func() { czCleanupWrapper(czCleanup, t) }) + + // Create a ClientConn with a short idle_timeout. + r := manual.NewBuilderWithScheme("whatever") + dopts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithResolvers(r), + grpc.WithIdleTimeout(defaultTestShortIdleTimeout), + grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"round_robin":{}}]}`), + } + cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...) + if err != nil { + t.Fatalf("grpc.Dial() failed: %v", err) + } + t.Cleanup(func() { cc.Close() }) + + // Start a test backend and push an address update via the resolver. + backend := stubserver.StartTestService(t, nil) + t.Cleanup(backend.Stop) + r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backend.Address}}}) + + // Veirfy that the ClientConn moves to READY. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + awaitState(ctx, t, cc, connectivity.Ready) + + // Veirfy that the ClientConn moves to IDLE as there is no activity. + awaitState(ctx, t, cc, connectivity.Idle) + + // Verify idleness related channelz events. + if err := channelzTraceEventFound(ctx, "entering idle mode"); err != nil { + t.Fatal(err) + } +} + +// Tests the case where channel idleness is enabled by passing a small value for +// idle_timeout. Verifies that a READY channel with an ongoing RPC stays READY. +func (s) TestChannelIdleness_Enabled_OngoingCall(t *testing.T) { + // Setup channelz for testing. + czCleanup := channelz.NewChannelzStorageForTesting() + t.Cleanup(func() { czCleanupWrapper(czCleanup, t) }) + + // Create a ClientConn with a short idle_timeout. + r := manual.NewBuilderWithScheme("whatever") + dopts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithResolvers(r), + grpc.WithIdleTimeout(defaultTestShortIdleTimeout), + grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"round_robin":{}}]}`), + } + cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...) + if err != nil { + t.Fatalf("grpc.Dial() failed: %v", err) + } + t.Cleanup(func() { cc.Close() }) + + // Start a test backend which keeps a unary RPC call active by blocking on a + // channel that is closed by the test later on. Also push an address update + // via the resolver. + blockCh := make(chan struct{}) + backend := &stubserver.StubServer{ + EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { + <-blockCh + return &testpb.Empty{}, nil + }, + } + if err := backend.StartServer(); err != nil { + t.Fatalf("Failed to start backend: %v", err) + } + t.Cleanup(backend.Stop) + r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backend.Address}}}) + + // Veirfy that the ClientConn moves to READY. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + awaitState(ctx, t, cc, connectivity.Ready) + + // Spawn a goroutine which checks expected state transitions and idleness + // channelz trace events. It eventually closes `blockCh`, thereby unblocking + // the server RPC handler and the unary call below. + errCh := make(chan error, 1) + go func() { + // Veirfy that the ClientConn stay in READY. + sCtx, sCancel := context.WithTimeout(ctx, 3*defaultTestShortIdleTimeout) + defer sCancel() + awaitNoStateChange(sCtx, t, cc, connectivity.Ready) + + // Verify that there are no idleness related channelz events. + if err := channelzTraceEventNotFound(ctx, "entering idle mode"); err != nil { + errCh <- err + return + } + if err := channelzTraceEventNotFound(ctx, "exiting idle mode"); err != nil { + errCh <- err + return + } + + // Unblock the unary RPC on the server. + close(blockCh) + errCh <- nil + }() + + // Make a unary RPC that blocks on the server, thereby ensuring that the + // count of active RPCs on the client is non-zero. + client := testgrpc.NewTestServiceClient(cc) + if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { + t.Errorf("EmptyCall RPC failed: %v", err) + } + + select { + case err := <-errCh: + if err != nil { + t.Fatal(err) + } + case <-ctx.Done(): + t.Fatalf("Timeout when trying to verify that an active RPC keeps channel from moving to IDLE") + } +} + +// Tests the case where channel idleness is enabled by passing a small value for +// idle_timeout. Verifies that activity on a READY channel (frequent and short +// RPCs) keeps it from moving to IDLE. +func (s) TestChannelIdleness_Enabled_ActiveSinceLastCheck(t *testing.T) { + // Setup channelz for testing. + czCleanup := channelz.NewChannelzStorageForTesting() + t.Cleanup(func() { czCleanupWrapper(czCleanup, t) }) + + // Create a ClientConn with a short idle_timeout. + r := manual.NewBuilderWithScheme("whatever") + dopts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithResolvers(r), + grpc.WithIdleTimeout(defaultTestShortIdleTimeout), + grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"round_robin":{}}]}`), + } + cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...) + if err != nil { + t.Fatalf("grpc.Dial() failed: %v", err) + } + t.Cleanup(func() { cc.Close() }) + + // Start a test backend and push an address update via the resolver. + backend := stubserver.StartTestService(t, nil) + t.Cleanup(backend.Stop) + r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backend.Address}}}) + + // Veirfy that the ClientConn moves to READY. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + awaitState(ctx, t, cc, connectivity.Ready) + + // For a duration of three times the configured idle timeout, making RPCs + // every now and then and ensure that the channel does not move out of + // READY. + sCtx, sCancel := context.WithTimeout(ctx, 3*defaultTestShortIdleTimeout) + defer sCancel() + go func() { + for ; sCtx.Err() == nil; <-time.After(defaultTestShortIdleTimeout / 4) { + client := testgrpc.NewTestServiceClient(cc) + if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); err != nil { + // While iterating through this for loop, at some point in time, + // the context deadline will expire. It is safe to ignore that + // error code. + if status.Code(err) != codes.DeadlineExceeded { + t.Errorf("EmptyCall RPC failed: %v", err) + return + } + } + } + }() + + // Veirfy that the ClientConn stay in READY. + awaitNoStateChange(sCtx, t, cc, connectivity.Ready) + + // Verify that there are no idleness related channelz events. + if err := channelzTraceEventNotFound(ctx, "entering idle mode"); err != nil { + t.Fatal(err) + } + if err := channelzTraceEventNotFound(ctx, "exiting idle mode"); err != nil { + t.Fatal(err) + } +} + +// Tests the case where channel idleness is enabled by passing a small value for +// idle_timeout. Verifies that a READY channel with no RPCs moves to IDLE. Also +// verifies that a subsequent RPC on the IDLE channel kicks it out of IDLE. +func (s) TestChannelIdleness_Enabled_ExitIdleOnRPC(t *testing.T) { + // Setup channelz for testing. + czCleanup := channelz.NewChannelzStorageForTesting() + t.Cleanup(func() { czCleanupWrapper(czCleanup, t) }) + + // Start a test backend and set the bootstrap state of the resolver to + // include this address. This will ensure that when the resolver is + // restarted when exiting idle, it will push the same address to grpc again. + r := manual.NewBuilderWithScheme("whatever") + backend := stubserver.StartTestService(t, nil) + t.Cleanup(backend.Stop) + r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: backend.Address}}}) + + // Create a ClientConn with a short idle_timeout. + dopts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithResolvers(r), + grpc.WithIdleTimeout(defaultTestShortIdleTimeout), + grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"round_robin":{}}]}`), + } + cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...) + if err != nil { + t.Fatalf("grpc.Dial() failed: %v", err) + } + t.Cleanup(func() { cc.Close() }) + + // Veirfy that the ClientConn moves to READY. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + awaitState(ctx, t, cc, connectivity.Ready) + + // Veirfy that the ClientConn moves to IDLE as there is no activity. + awaitState(ctx, t, cc, connectivity.Idle) + + // Verify idleness related channelz events. + if err := channelzTraceEventFound(ctx, "entering idle mode"); err != nil { + t.Fatal(err) + } + + // Make an RPC and ensure that it succeeds and moves the channel back to + // READY. + client := testgrpc.NewTestServiceClient(cc) + if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { + t.Fatalf("EmptyCall RPC failed: %v", err) + } + awaitState(ctx, t, cc, connectivity.Ready) + if err := channelzTraceEventFound(ctx, "exiting idle mode"); err != nil { + t.Fatal(err) + } +} + +// Tests the case where channel idleness is enabled by passing a small value for +// idle_timeout. Simulates a race between the idle timer firing and RPCs being +// initiated, after a period of inactivity on the channel. +// +// After a period of inactivity (for the configured idle timeout duration), when +// RPCs are started, there are two possibilities: +// - the idle timer wins the race and puts the channel in idle. The RPCs then +// kick it out of idle. +// - the RPCs win the race, and therefore the channel never moves to idle. +// +// In either of these cases, all RPCs must succeed. +func (s) TestChannelIdleness_Enabled_IdleTimeoutRacesWithRPCs(t *testing.T) { + // Setup channelz for testing. + czCleanup := channelz.NewChannelzStorageForTesting() + t.Cleanup(func() { czCleanupWrapper(czCleanup, t) }) + + // Start a test backend and set the bootstrap state of the resolver to + // include this address. This will ensure that when the resolver is + // restarted when exiting idle, it will push the same address to grpc again. + r := manual.NewBuilderWithScheme("whatever") + backend := stubserver.StartTestService(t, nil) + t.Cleanup(backend.Stop) + r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: backend.Address}}}) + + // Create a ClientConn with a short idle_timeout. + dopts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithResolvers(r), + grpc.WithIdleTimeout(defaultTestShortTimeout), + grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"round_robin":{}}]}`), + } + cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...) + if err != nil { + t.Fatalf("grpc.Dial() failed: %v", err) + } + t.Cleanup(func() { cc.Close() }) + + // Veirfy that the ClientConn moves to READY. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + awaitState(ctx, t, cc, connectivity.Ready) + + // Make an RPC every defaultTestShortTimeout duration so as to race with the + // idle timeout. Whether the idle timeout wins the race or the RPC wins the + // race, RPCs must succeed. + client := testgrpc.NewTestServiceClient(cc) + for i := 0; i < 20; i++ { + <-time.After(defaultTestShortTimeout) + if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { + t.Errorf("EmptyCall RPC failed: %v", err) + } + } +} From 511a96359f5525920014c2d1df31aac07f5bbeaf Mon Sep 17 00:00:00 2001 From: apolcyn Date: Mon, 22 May 2023 15:32:29 -0700 Subject: [PATCH 50/60] interop: let the interop client send additional metadata, controlled by a flag (#6295) --- interop/client/client.go | 43 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/interop/client/client.go b/interop/client/client.go index a4228190e12c..8238e0a106d7 100644 --- a/interop/client/client.go +++ b/interop/client/client.go @@ -24,12 +24,14 @@ package main import ( + "context" "crypto/tls" "crypto/x509" "flag" "net" "os" "strconv" + "strings" "time" "golang.org/x/oauth2" @@ -41,6 +43,7 @@ import ( "google.golang.org/grpc/credentials/oauth" "google.golang.org/grpc/grpclog" "google.golang.org/grpc/interop" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/resolver" "google.golang.org/grpc/testdata" @@ -75,6 +78,7 @@ var ( soakOverallTimeoutSeconds = flag.Int("soak_overall_timeout_seconds", 10, "The overall number of seconds after which a soak test should stop and fail, if the desired number of iterations have not yet completed.") soakMinTimeMsBetweenRPCs = flag.Int("soak_min_time_ms_between_rpcs", 0, "The minimum time in milliseconds between consecutive RPCs in a soak test (rpc_soak or channel_soak), useful for limiting QPS") tlsServerName = flag.String("server_host_override", "", "The server name used to verify the hostname returned by TLS handshake if it is not empty. Otherwise, --server_host is used.") + additionalMetadata = flag.String("additional_metadata", "", "Additional metadata to send in each request, as a semicolon-separated list of key:value pairs.") testCase = flag.String("test_case", "large_unary", `Configure different test cases. Valid options are: empty_unary : empty (zero bytes) request and response; @@ -115,6 +119,34 @@ const ( credsComputeEngineCreds ) +// Parses the --additional_metadata flag and returns metadata to send on each RPC, +// formatted as per https://pkg.go.dev/google.golang.org/grpc/metadata#Pairs. +// Allow any character but semicolons in values. If the flag is empty, return a nil map. +func parseAdditionalMetadataFlag() []string { + if len(*additionalMetadata) == 0 { + return nil + } + r := *additionalMetadata + addMd := make([]string, 0) + for len(r) > 0 { + i := strings.Index(r, ":") + if i < 0 { + logger.Fatalf("Error parsing --additional_metadata flag: missing colon separator") + } + addMd = append(addMd, r[:i]) // append key + r = r[i+1:] + i = strings.Index(r, ";") + // append value + if i < 0 { + addMd = append(addMd, r) + break + } + addMd = append(addMd, r[:i]) + r = r[i+1:] + } + return addMd +} + func main() { flag.Parse() logger.Infof("Client running with test case %q", *testCase) @@ -214,6 +246,17 @@ func main() { if len(*serviceConfigJSON) > 0 { opts = append(opts, grpc.WithDisableServiceConfig(), grpc.WithDefaultServiceConfig(*serviceConfigJSON)) } + if addMd := parseAdditionalMetadataFlag(); addMd != nil { + unaryAddMd := func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + ctx = metadata.AppendToOutgoingContext(ctx, addMd...) + return invoker(ctx, method, req, reply, cc, opts...) + } + streamingAddMd := func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { + ctx = metadata.AppendToOutgoingContext(ctx, addMd...) + return streamer(ctx, desc, cc, method, opts...) + } + opts = append(opts, grpc.WithUnaryInterceptor(unaryAddMd), grpc.WithStreamInterceptor(streamingAddMd)) + } conn, err := grpc.Dial(serverAddr, opts...) if err != nil { logger.Fatalf("Fail to dial: %v", err) From 2a266e78a0307e22d3adfdb5c20f9c4bd12a00ac Mon Sep 17 00:00:00 2001 From: Arvind Bright Date: Mon, 22 May 2023 15:39:17 -0700 Subject: [PATCH 51/60] authz: use pointer to to structpb.Struct instead of value (#6307) --- authz/rbac_translator.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/authz/rbac_translator.go b/authz/rbac_translator.go index d88797d49907..730ec9dc426a 100644 --- a/authz/rbac_translator.go +++ b/authz/rbac_translator.go @@ -62,9 +62,9 @@ type rule struct { } type auditLogger struct { - Name string `json:"name"` - Config structpb.Struct `json:"config"` - IsOptional bool `json:"is_optional"` + Name string `json:"name"` + Config *structpb.Struct `json:"config"` + IsOptional bool `json:"is_optional"` } type auditLoggingOptions struct { @@ -306,9 +306,12 @@ func (options *auditLoggingOptions) toProtos() (allow *v3rbacpb.RBAC_AuditLoggin if config.Name == "" { return nil, nil, fmt.Errorf("missing required field: name in audit_logging_options.audit_loggers[%v]", i) } + if config.Config == nil { + config.Config = &structpb.Struct{} + } typedStruct := &v1xdsudpatypepb.TypedStruct{ TypeUrl: typeURLPrefix + config.Name, - Value: &config.Config, + Value: config.Config, } customConfig, err := anypb.New(typedStruct) if err != nil { From e9799e79dbdb75fd9dfb2e864fa8b0a1b36990f8 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Tue, 23 May 2023 09:48:08 -0700 Subject: [PATCH 52/60] client: support a 1:1 mapping with acbws and addrConns (#6302) --- balancer_conn_wrappers.go | 72 +++------------------ clientconn.go | 131 +++++++++++++++++++------------------- picker_wrapper.go | 12 ++-- stream.go | 17 +++-- 4 files changed, 91 insertions(+), 141 deletions(-) diff --git a/balancer_conn_wrappers.go b/balancer_conn_wrappers.go index 4f9944697dde..4a4dea189d0e 100644 --- a/balancer_conn_wrappers.go +++ b/balancer_conn_wrappers.go @@ -133,19 +133,6 @@ func (ccb *ccBalancerWrapper) updateClientConnState(ccs *balancer.ClientConnStat // updateSubConnState is invoked by grpc to push a subConn state update to the // underlying balancer. func (ccb *ccBalancerWrapper) updateSubConnState(sc balancer.SubConn, s connectivity.State, err error) { - // When updating addresses for a SubConn, if the address in use is not in - // the new addresses, the old ac will be tearDown() and a new ac will be - // created. tearDown() generates a state change with Shutdown state, we - // don't want the balancer to receive this state change. So before - // tearDown() on the old ac, ac.acbw (acWrapper) will be set to nil, and - // this function will be called with (nil, Shutdown). We don't need to call - // balancer method in this case. - // - // TODO: Suppress the above mentioned state change to Shutdown, so we don't - // have to handle it here. - if sc == nil { - return - } ccb.mu.Lock() ccb.serializer.Schedule(func(_ context.Context) { ccb.balancer.UpdateSubConnState(sc, balancer.SubConnState{ConnectivityState: s, ConnectionError: err}) @@ -315,7 +302,7 @@ func (ccb *ccBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer return nil, fmt.Errorf("grpc: cannot create SubConn when balancer is closed or idle") } - if len(addrs) <= 0 { + if len(addrs) == 0 { return nil, fmt.Errorf("grpc: cannot create SubConn with empty address list") } ac, err := ccb.cc.newAddrConn(addrs, opts) @@ -324,9 +311,7 @@ func (ccb *ccBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer return nil, err } acbw := &acBalancerWrapper{ac: ac, producers: make(map[balancer.ProducerBuilder]*refCountedProducer)} - acbw.ac.mu.Lock() ac.acbw = acbw - acbw.ac.mu.Unlock() return acbw, nil } @@ -347,7 +332,7 @@ func (ccb *ccBalancerWrapper) RemoveSubConn(sc balancer.SubConn) { if !ok { return } - ccb.cc.removeAddrConn(acbw.getAddrConn(), errConnDrain) + ccb.cc.removeAddrConn(acbw.ac, errConnDrain) } func (ccb *ccBalancerWrapper) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) { @@ -391,63 +376,24 @@ func (ccb *ccBalancerWrapper) Target() string { // acBalancerWrapper is a wrapper on top of ac for balancers. // It implements balancer.SubConn interface. type acBalancerWrapper struct { + ac *addrConn // read-only + mu sync.Mutex - ac *addrConn producers map[balancer.ProducerBuilder]*refCountedProducer } -func (acbw *acBalancerWrapper) UpdateAddresses(addrs []resolver.Address) { - acbw.mu.Lock() - defer acbw.mu.Unlock() - if len(addrs) <= 0 { - acbw.ac.cc.removeAddrConn(acbw.ac, errConnDrain) - return - } - if !acbw.ac.tryUpdateAddrs(addrs) { - cc := acbw.ac.cc - opts := acbw.ac.scopts - acbw.ac.mu.Lock() - // Set old ac.acbw to nil so the Shutdown state update will be ignored - // by balancer. - // - // TODO(bar) the state transition could be wrong when tearDown() old ac - // and creating new ac, fix the transition. - acbw.ac.acbw = nil - acbw.ac.mu.Unlock() - acState := acbw.ac.getState() - acbw.ac.cc.removeAddrConn(acbw.ac, errConnDrain) - - if acState == connectivity.Shutdown { - return - } +func (acbw *acBalancerWrapper) String() string { + return fmt.Sprintf("SubConn(id:%d)", acbw.ac.channelzID.Int()) +} - newAC, err := cc.newAddrConn(addrs, opts) - if err != nil { - channelz.Warningf(logger, acbw.ac.channelzID, "acBalancerWrapper: UpdateAddresses: failed to newAddrConn: %v", err) - return - } - acbw.ac = newAC - newAC.mu.Lock() - newAC.acbw = acbw - newAC.mu.Unlock() - if acState != connectivity.Idle { - go newAC.connect() - } - } +func (acbw *acBalancerWrapper) UpdateAddresses(addrs []resolver.Address) { + acbw.ac.updateAddrs(addrs) } func (acbw *acBalancerWrapper) Connect() { - acbw.mu.Lock() - defer acbw.mu.Unlock() go acbw.ac.connect() } -func (acbw *acBalancerWrapper) getAddrConn() *addrConn { - acbw.mu.Lock() - defer acbw.mu.Unlock() - return acbw.ac -} - // NewStream begins a streaming RPC on the addrConn. If the addrConn is not // ready, blocks until it is or ctx expires. Returns an error when the context // expires or the addrConn is shut down. diff --git a/clientconn.go b/clientconn.go index 1def61e5a23d..5e45f01f91cf 100644 --- a/clientconn.go +++ b/clientconn.go @@ -24,7 +24,6 @@ import ( "fmt" "math" "net/url" - "reflect" "strings" "sync" "sync/atomic" @@ -970,9 +969,6 @@ func (ac *addrConn) connect() error { ac.mu.Unlock() return nil } - // Update connectivity state within the lock to prevent subsequent or - // concurrent calls from resetting the transport more than once. - ac.updateConnectivityState(connectivity.Connecting, nil) ac.mu.Unlock() ac.resetTransport() @@ -991,58 +987,60 @@ func equalAddresses(a, b []resolver.Address) bool { return true } -// tryUpdateAddrs tries to update ac.addrs with the new addresses list. -// -// If ac is TransientFailure, it updates ac.addrs and returns true. The updated -// addresses will be picked up by retry in the next iteration after backoff. -// -// If ac is Shutdown or Idle, it updates ac.addrs and returns true. -// -// If the addresses is the same as the old list, it does nothing and returns -// true. -// -// If ac is Connecting, it returns false. The caller should tear down the ac and -// create a new one. Note that the backoff will be reset when this happens. -// -// If ac is Ready, it checks whether current connected address of ac is in the -// new addrs list. -// - If true, it updates ac.addrs and returns true. The ac will keep using -// the existing connection. -// - If false, it does nothing and returns false. -func (ac *addrConn) tryUpdateAddrs(addrs []resolver.Address) bool { +// updateAddrs updates ac.addrs with the new addresses list and handles active +// connections or connection attempts. +func (ac *addrConn) updateAddrs(addrs []resolver.Address) { ac.mu.Lock() - defer ac.mu.Unlock() - channelz.Infof(logger, ac.channelzID, "addrConn: tryUpdateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs) - if ac.state == connectivity.Shutdown || - ac.state == connectivity.TransientFailure || - ac.state == connectivity.Idle { - ac.addrs = addrs - return true - } + channelz.Infof(logger, ac.channelzID, "addrConn: updateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs) if equalAddresses(ac.addrs, addrs) { - return true + ac.mu.Unlock() + return } - if ac.state == connectivity.Connecting { - return false + ac.addrs = addrs + + if ac.state == connectivity.Shutdown || + ac.state == connectivity.TransientFailure || + ac.state == connectivity.Idle { + // We were not connecting, so do nothing but update the addresses. + ac.mu.Unlock() + return } - // ac.state is Ready, try to find the connected address. - var curAddrFound bool - for _, a := range addrs { - a.ServerName = ac.cc.getServerName(a) - if reflect.DeepEqual(ac.curAddr, a) { - curAddrFound = true - break + if ac.state == connectivity.Ready { + // Try to find the connected address. + for _, a := range addrs { + a.ServerName = ac.cc.getServerName(a) + if a.Equal(ac.curAddr) { + // We are connected to a valid address, so do nothing but + // update the addresses. + ac.mu.Unlock() + return + } } } - channelz.Infof(logger, ac.channelzID, "addrConn: tryUpdateAddrs curAddrFound: %v", curAddrFound) - if curAddrFound { - ac.addrs = addrs + + // We are either connected to the wrong address or currently connecting. + // Stop the current iteration and restart. + + ac.cancel() + ac.ctx, ac.cancel = context.WithCancel(ac.cc.ctx) + + // We have to defer here because GracefulClose => Close => onClose, which + // requires locking ac.mu. + defer ac.transport.GracefulClose() + ac.transport = nil + + if len(addrs) == 0 { + ac.updateConnectivityState(connectivity.Idle, nil) } - return curAddrFound + ac.mu.Unlock() + + // Since we were connecting/connected, we should start a new connection + // attempt. + go ac.resetTransport() } // getServerName determines the serverName to be used in the connection @@ -1301,7 +1299,8 @@ func (ac *addrConn) adjustParams(r transport.GoAwayReason) { func (ac *addrConn) resetTransport() { ac.mu.Lock() - if ac.state == connectivity.Shutdown { + acCtx := ac.ctx + if acCtx.Err() != nil { ac.mu.Unlock() return } @@ -1329,15 +1328,14 @@ func (ac *addrConn) resetTransport() { ac.updateConnectivityState(connectivity.Connecting, nil) ac.mu.Unlock() - if err := ac.tryAllAddrs(addrs, connectDeadline); err != nil { + if err := ac.tryAllAddrs(acCtx, addrs, connectDeadline); err != nil { ac.cc.resolveNow(resolver.ResolveNowOptions{}) // After exhausting all addresses, the addrConn enters // TRANSIENT_FAILURE. - ac.mu.Lock() - if ac.state == connectivity.Shutdown { - ac.mu.Unlock() + if acCtx.Err() != nil { return } + ac.mu.Lock() ac.updateConnectivityState(connectivity.TransientFailure, err) // Backoff. @@ -1352,13 +1350,13 @@ func (ac *addrConn) resetTransport() { ac.mu.Unlock() case <-b: timer.Stop() - case <-ac.ctx.Done(): + case <-acCtx.Done(): timer.Stop() return } ac.mu.Lock() - if ac.state != connectivity.Shutdown { + if acCtx.Err() == nil { ac.updateConnectivityState(connectivity.Idle, err) } ac.mu.Unlock() @@ -1373,14 +1371,13 @@ func (ac *addrConn) resetTransport() { // tryAllAddrs tries to creates a connection to the addresses, and stop when at // the first successful one. It returns an error if no address was successfully // connected, or updates ac appropriately with the new transport. -func (ac *addrConn) tryAllAddrs(addrs []resolver.Address, connectDeadline time.Time) error { +func (ac *addrConn) tryAllAddrs(ctx context.Context, addrs []resolver.Address, connectDeadline time.Time) error { var firstConnErr error for _, addr := range addrs { - ac.mu.Lock() - if ac.state == connectivity.Shutdown { - ac.mu.Unlock() + if ctx.Err() != nil { return errConnClosing } + ac.mu.Lock() ac.cc.mu.RLock() ac.dopts.copts.KeepaliveParams = ac.cc.mkp @@ -1394,7 +1391,7 @@ func (ac *addrConn) tryAllAddrs(addrs []resolver.Address, connectDeadline time.T channelz.Infof(logger, ac.channelzID, "Subchannel picks a new address %q to connect", addr.Addr) - err := ac.createTransport(addr, copts, connectDeadline) + err := ac.createTransport(ctx, addr, copts, connectDeadline) if err == nil { return nil } @@ -1411,19 +1408,20 @@ func (ac *addrConn) tryAllAddrs(addrs []resolver.Address, connectDeadline time.T // createTransport creates a connection to addr. It returns an error if the // address was not successfully connected, or updates ac appropriately with the // new transport. -func (ac *addrConn) createTransport(addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) error { +func (ac *addrConn) createTransport(ctx context.Context, addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) error { addr.ServerName = ac.cc.getServerName(addr) - hctx, hcancel := context.WithCancel(ac.ctx) + hctx, hcancel := context.WithCancel(ctx) onClose := func(r transport.GoAwayReason) { ac.mu.Lock() defer ac.mu.Unlock() // adjust params based on GoAwayReason ac.adjustParams(r) - if ac.state == connectivity.Shutdown { - // Already shut down. tearDown() already cleared the transport and - // canceled hctx via ac.ctx, and we expected this connection to be - // closed, so do nothing here. + if ctx.Err() != nil { + // Already shut down or connection attempt canceled. tearDown() or + // updateAddrs() already cleared the transport and canceled hctx + // via ac.ctx, and we expected this connection to be closed, so do + // nothing here. return } hcancel() @@ -1442,7 +1440,7 @@ func (ac *addrConn) createTransport(addr resolver.Address, copts transport.Conne ac.updateConnectivityState(connectivity.Idle, nil) } - connectCtx, cancel := context.WithDeadline(ac.ctx, connectDeadline) + connectCtx, cancel := context.WithDeadline(ctx, connectDeadline) defer cancel() copts.ChannelzParentID = ac.channelzID @@ -1459,7 +1457,7 @@ func (ac *addrConn) createTransport(addr resolver.Address, copts transport.Conne ac.mu.Lock() defer ac.mu.Unlock() - if ac.state == connectivity.Shutdown { + if ctx.Err() != nil { // This can happen if the subConn was removed while in `Connecting` // state. tearDown() would have set the state to `Shutdown`, but // would not have closed the transport since ac.transport would not @@ -1471,6 +1469,9 @@ func (ac *addrConn) createTransport(addr resolver.Address, copts transport.Conne // The error we pass to Close() is immaterial since there are no open // streams at this point, so no trailers with error details will be sent // out. We just need to pass a non-nil error. + // + // This can also happen when updateAddrs is called during a connection + // attempt. go newTr.Close(transport.ErrConnClosing) return nil } diff --git a/picker_wrapper.go b/picker_wrapper.go index 8e24d864986d..02f975951242 100644 --- a/picker_wrapper.go +++ b/picker_wrapper.go @@ -68,10 +68,8 @@ func (pw *pickerWrapper) updatePicker(p balancer.Picker) { // - wraps the done function in the passed in result to increment the calls // failed or calls succeeded channelz counter before invoking the actual // done function. -func doneChannelzWrapper(acw *acBalancerWrapper, result *balancer.PickResult) { - acw.mu.Lock() - ac := acw.ac - acw.mu.Unlock() +func doneChannelzWrapper(acbw *acBalancerWrapper, result *balancer.PickResult) { + ac := acbw.ac ac.incrCallsStarted() done := result.Done result.Done = func(b balancer.DoneInfo) { @@ -157,14 +155,14 @@ func (pw *pickerWrapper) pick(ctx context.Context, failfast bool, info balancer. return nil, balancer.PickResult{}, status.Error(codes.Unavailable, err.Error()) } - acw, ok := pickResult.SubConn.(*acBalancerWrapper) + acbw, ok := pickResult.SubConn.(*acBalancerWrapper) if !ok { logger.Errorf("subconn returned from pick is type %T, not *acBalancerWrapper", pickResult.SubConn) continue } - if t := acw.getAddrConn().getReadyTransport(); t != nil { + if t := acbw.ac.getReadyTransport(); t != nil { if channelz.IsOn() { - doneChannelzWrapper(acw, &pickResult) + doneChannelzWrapper(acbw, &pickResult) return t, pickResult, nil } return t, pickResult, nil diff --git a/stream.go b/stream.go index 75ab86268ba1..10092685b228 100644 --- a/stream.go +++ b/stream.go @@ -1273,14 +1273,19 @@ func newNonRetryClientStream(ctx context.Context, desc *StreamDesc, method strin as.p = &parser{r: s} ac.incrCallsStarted() if desc != unaryStreamDesc { - // Listen on cc and stream contexts to cleanup when the user closes the - // ClientConn or cancels the stream context. In all other cases, an error - // should already be injected into the recv buffer by the transport, which - // the client will eventually receive, and then we will cancel the stream's - // context in clientStream.finish. + // Listen on stream context to cleanup when the stream context is + // canceled. Also listen for the addrConn's context in case the + // addrConn is closed or reconnects to a different address. In all + // other cases, an error should already be injected into the recv + // buffer by the transport, which the client will eventually receive, + // and then we will cancel the stream's context in + // addrConnStream.finish. go func() { + ac.mu.Lock() + acCtx := ac.ctx + ac.mu.Unlock() select { - case <-ac.ctx.Done(): + case <-acCtx.Done(): as.finish(status.Error(codes.Canceled, "grpc: the SubConn is closing")) case <-ctx.Done(): as.finish(toRPCErr(ctx.Err())) From 2ae10b2883064d4d776675b4c03439ba14bf513f Mon Sep 17 00:00:00 2001 From: Anirudh Ramachandra Date: Tue, 23 May 2023 12:50:47 -0700 Subject: [PATCH 53/60] xdsclient: remove interface check related to ResourceData (#6308) --- xds/internal/xdsclient/xdsresource/cluster_resource_type.go | 3 +-- xds/internal/xdsclient/xdsresource/endpoints_resource_type.go | 3 +-- xds/internal/xdsclient/xdsresource/listener_resource_type.go | 3 +-- .../xdsclient/xdsresource/route_config_resource_type.go | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go index c51d38d3f06d..183801c1c68c 100644 --- a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go @@ -32,8 +32,7 @@ const ( var ( // Compile time interface checks. - _ Type = clusterResourceType{} - _ ResourceData = &ClusterResourceData{} + _ Type = clusterResourceType{} // Singleton instantiation of the resource type implementation. clusterType = clusterResourceType{ diff --git a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go index 5a2dbbd20319..775a8aa19423 100644 --- a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go @@ -32,8 +32,7 @@ const ( var ( // Compile time interface checks. - _ Type = endpointsResourceType{} - _ ResourceData = &EndpointsResourceData{} + _ Type = endpointsResourceType{} // Singleton instantiation of the resource type implementation. endpointsType = endpointsResourceType{ diff --git a/xds/internal/xdsclient/xdsresource/listener_resource_type.go b/xds/internal/xdsclient/xdsresource/listener_resource_type.go index 33ebd3efbf10..0aff941389ec 100644 --- a/xds/internal/xdsclient/xdsresource/listener_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/listener_resource_type.go @@ -35,8 +35,7 @@ const ( var ( // Compile time interface checks. - _ Type = listenerResourceType{} - _ ResourceData = &ListenerResourceData{} + _ Type = listenerResourceType{} // Singleton instantiation of the resource type implementation. listenerType = listenerResourceType{ diff --git a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go index d06af4ae1aff..8ce5cb28596e 100644 --- a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go @@ -32,8 +32,7 @@ const ( var ( // Compile time interface checks. - _ Type = routeConfigResourceType{} - _ ResourceData = &RouteConfigResourceData{} + _ Type = routeConfigResourceType{} // Singleton instantiation of the resource type implementation. routeConfigType = routeConfigResourceType{ From a6e1acfc4420debbf347b0730e69d25b0a9c69ac Mon Sep 17 00:00:00 2001 From: Easwar Swaminathan Date: Tue, 23 May 2023 13:39:38 -0700 Subject: [PATCH 54/60] grpc: support sticky TF in pick_first LB policy (#6306) --- pickfirst.go | 12 +++++- test/pickfirst_test.go | 86 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/pickfirst.go b/pickfirst.go index fc91b4d266de..89e54196e1e3 100644 --- a/pickfirst.go +++ b/pickfirst.go @@ -119,7 +119,6 @@ func (b *pickfirstBalancer) UpdateSubConnState(subConn balancer.SubConn, state b } return } - b.state = state.ConnectivityState if state.ConnectivityState == connectivity.Shutdown { b.subConn = nil return @@ -132,11 +131,21 @@ func (b *pickfirstBalancer) UpdateSubConnState(subConn balancer.SubConn, state b Picker: &picker{result: balancer.PickResult{SubConn: subConn}}, }) case connectivity.Connecting: + if b.state == connectivity.TransientFailure { + // We stay in TransientFailure until we are Ready. See A62. + return + } b.cc.UpdateState(balancer.State{ ConnectivityState: state.ConnectivityState, Picker: &picker{err: balancer.ErrNoSubConnAvailable}, }) case connectivity.Idle: + if b.state == connectivity.TransientFailure { + // We stay in TransientFailure until we are Ready. Also kick the + // subConn out of Idle into Connecting. See A62. + b.subConn.Connect() + return + } b.cc.UpdateState(balancer.State{ ConnectivityState: state.ConnectivityState, Picker: &idlePicker{subConn: subConn}, @@ -147,6 +156,7 @@ func (b *pickfirstBalancer) UpdateSubConnState(subConn balancer.SubConn, state b Picker: &picker{err: state.ConnectionError}, }) } + b.state = state.ConnectivityState } func (b *pickfirstBalancer) Close() { diff --git a/test/pickfirst_test.go b/test/pickfirst_test.go index 800d2f4178c2..75cb2a659ed5 100644 --- a/test/pickfirst_test.go +++ b/test/pickfirst_test.go @@ -20,15 +20,18 @@ package test import ( "context" + "sync" "testing" "time" "google.golang.org/grpc" + "google.golang.org/grpc/backoff" "google.golang.org/grpc/codes" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/internal/channelz" "google.golang.org/grpc/internal/stubserver" + "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/testutils/pickfirst" "google.golang.org/grpc/resolver" "google.golang.org/grpc/resolver/manual" @@ -293,3 +296,86 @@ func (s) TestPickFirst_NewAddressWhileBlocking(t *testing.T) { case <-doneCh: } } + +func (s) TestPickFirst_StickyTransientFailure(t *testing.T) { + // Spin up a local server which closes the connection as soon as it receives + // one. It also sends a signal on a channel whenver it received a connection. + lis, err := testutils.LocalTCPListener() + if err != nil { + t.Fatalf("Failed to create listener: %v", err) + } + t.Cleanup(func() { lis.Close() }) + + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + connCh := make(chan struct{}, 1) + go func() { + for { + conn, err := lis.Accept() + if err != nil { + return + } + select { + case connCh <- struct{}{}: + conn.Close() + case <-ctx.Done(): + return + } + } + }() + + // Dial the above server with a ConnectParams that does a constant backoff + // of defaultTestShortTimeout duration. + dopts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithDefaultServiceConfig(pickFirstServiceConfig), + grpc.WithConnectParams(grpc.ConnectParams{ + Backoff: backoff.Config{ + BaseDelay: defaultTestShortTimeout, + Multiplier: float64(0), + Jitter: float64(0), + MaxDelay: defaultTestShortTimeout, + }, + }), + } + cc, err := grpc.Dial(lis.Addr().String(), dopts...) + if err != nil { + t.Fatalf("Failed to dial server at %q: %v", lis.Addr(), err) + } + t.Cleanup(func() { cc.Close() }) + + var wg sync.WaitGroup + wg.Add(2) + // Spin up a goroutine that waits for the channel to move to + // TransientFailure. After that it checks that the channel stays in + // TransientFailure, until Shutdown. + go func() { + defer wg.Done() + for state := cc.GetState(); state != connectivity.TransientFailure; state = cc.GetState() { + if !cc.WaitForStateChange(ctx, state) { + t.Errorf("Timeout when waiting for state to change to TransientFailure. Current state is %s", state) + return + } + } + + // TODO(easwars): this waits for 10s. Need shorter deadline here. Basically once the second goroutine exits, we should exit from here too. + if cc.WaitForStateChange(ctx, connectivity.TransientFailure) { + if state := cc.GetState(); state != connectivity.Shutdown { + t.Errorf("Unexpected state change from TransientFailure to %s", cc.GetState()) + } + } + }() + // Spin up a goroutine which ensures that the pick_first LB policy is + // constantly trying to reconnect. + go func() { + defer wg.Done() + for i := 0; i < 10; i++ { + select { + case <-connCh: + case <-time.After(2 * defaultTestShortTimeout): + t.Error("Timeout when waiting for pick_first to reconnect") + } + } + }() + wg.Wait() +} From 59134c303c31ba2ce65da8962c050918098f2a25 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 24 May 2023 10:37:54 -0700 Subject: [PATCH 55/60] client: add support for pickfirst address shuffling from gRFC A62 (#6311) --- balancer_conn_wrappers.go | 6 +++- internal/grpcrand/grpcrand.go | 7 +++++ pickfirst.go | 39 ++++++++++++++++++++++++-- test/pickfirst_test.go | 52 +++++++++++++++++++++++++++++++++++ test/resolver_update_test.go | 1 + 5 files changed, 101 insertions(+), 4 deletions(-) diff --git a/balancer_conn_wrappers.go b/balancer_conn_wrappers.go index 4a4dea189d0e..04b9ad411691 100644 --- a/balancer_conn_wrappers.go +++ b/balancer_conn_wrappers.go @@ -127,7 +127,11 @@ func (ccb *ccBalancerWrapper) updateClientConnState(ccs *balancer.ClientConnStat // We get here only if the above call to Schedule succeeds, in which case it // is guaranteed that the scheduled function will run. Therefore it is safe // to block on this channel. - return <-errCh + err := <-errCh + if logger.V(2) && err != nil { + logger.Infof("error from balancer.UpdateClientConnState: %v", err) + } + return err } // updateSubConnState is invoked by grpc to push a subConn state update to the diff --git a/internal/grpcrand/grpcrand.go b/internal/grpcrand/grpcrand.go index 0b092cfbe15d..d08e3e907666 100644 --- a/internal/grpcrand/grpcrand.go +++ b/internal/grpcrand/grpcrand.go @@ -79,3 +79,10 @@ func Uint32() uint32 { defer mu.Unlock() return r.Uint32() } + +// Shuffle implements rand.Shuffle on the grpcrand global source. +var Shuffle = func(n int, f func(int, int)) { + mu.Lock() + defer mu.Unlock() + r.Shuffle(n, f) +} diff --git a/pickfirst.go b/pickfirst.go index 89e54196e1e3..611bef7995cd 100644 --- a/pickfirst.go +++ b/pickfirst.go @@ -19,11 +19,14 @@ package grpc import ( + "encoding/json" "errors" "fmt" "google.golang.org/grpc/balancer" "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/internal/grpcrand" + "google.golang.org/grpc/serviceconfig" ) // PickFirstBalancerName is the name of the pick_first balancer. @@ -43,10 +46,28 @@ func (*pickfirstBuilder) Name() string { return PickFirstBalancerName } +type pfConfig struct { + serviceconfig.LoadBalancingConfig `json:"-"` + + // If set to true, instructs the LB policy to shuffle the order of the list + // of addresses received from the name resolver before attempting to + // connect to them. + ShuffleAddressList bool `json:"shuffleAddressList"` +} + +func (*pickfirstBuilder) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { + cfg := &pfConfig{} + if err := json.Unmarshal(js, cfg); err != nil { + return nil, fmt.Errorf("pickfirst: unable to unmarshal LB policy config: %s, error: %v", string(js), err) + } + return cfg, nil +} + type pickfirstBalancer struct { state connectivity.State cc balancer.ClientConn subConn balancer.SubConn + cfg *pfConfig } func (b *pickfirstBalancer) ResolverError(err error) { @@ -69,7 +90,8 @@ func (b *pickfirstBalancer) ResolverError(err error) { } func (b *pickfirstBalancer) UpdateClientConnState(state balancer.ClientConnState) error { - if len(state.ResolverState.Addresses) == 0 { + addrs := state.ResolverState.Addresses + if len(addrs) == 0 { // The resolver reported an empty address list. Treat it like an error by // calling b.ResolverError. if b.subConn != nil { @@ -82,12 +104,23 @@ func (b *pickfirstBalancer) UpdateClientConnState(state balancer.ClientConnState return balancer.ErrBadResolverState } + if state.BalancerConfig != nil { + cfg, ok := state.BalancerConfig.(*pfConfig) + if !ok { + return fmt.Errorf("pickfirstBalancer: received nil or illegal BalancerConfig (type %T): %v", state.BalancerConfig, state.BalancerConfig) + } + b.cfg = cfg + } + + if b.cfg != nil && b.cfg.ShuffleAddressList { + grpcrand.Shuffle(len(addrs), func(i, j int) { addrs[i], addrs[j] = addrs[j], addrs[i] }) + } if b.subConn != nil { - b.cc.UpdateAddresses(b.subConn, state.ResolverState.Addresses) + b.cc.UpdateAddresses(b.subConn, addrs) return nil } - subConn, err := b.cc.NewSubConn(state.ResolverState.Addresses, balancer.NewSubConnOptions{}) + subConn, err := b.cc.NewSubConn(addrs, balancer.NewSubConnOptions{}) if err != nil { if logger.V(2) { logger.Errorf("pickfirstBalancer: failed to NewSubConn: %v", err) diff --git a/test/pickfirst_test.go b/test/pickfirst_test.go index 75cb2a659ed5..62310d4d330e 100644 --- a/test/pickfirst_test.go +++ b/test/pickfirst_test.go @@ -30,6 +30,7 @@ import ( "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/internal/channelz" + "google.golang.org/grpc/internal/grpcrand" "google.golang.org/grpc/internal/stubserver" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/testutils/pickfirst" @@ -379,3 +380,54 @@ func (s) TestPickFirst_StickyTransientFailure(t *testing.T) { }() wg.Wait() } + +func (s) TestPickFirst_ShuffleAddressList(t *testing.T) { + const serviceConfig = `{"loadBalancingConfig": [{"pick_first":{ "shuffleAddressList": true }}]}` + + // Install a shuffler that always reverses two entries. + origShuf := grpcrand.Shuffle + defer func() { grpcrand.Shuffle = origShuf }() + grpcrand.Shuffle = func(n int, f func(int, int)) { + if n != 2 { + t.Errorf("Shuffle called with n=%v; want 2", n) + } + f(0, 1) // reverse the two addresses + } + + // Set up our backends. + cc, r, backends := setupPickFirst(t, 2) + addrs := stubBackendsToResolverAddrs(backends) + + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + // Push an update with both addresses and shuffling disabled. We should + // connect to backend 0. + r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + // Send a config with shuffling enabled. This will reverse the addresses, + // but the channel should still be connected to backend 0. + shufState := resolver.State{ + ServiceConfig: parseServiceConfig(t, r, serviceConfig), + Addresses: []resolver.Address{addrs[0], addrs[1]}, + } + r.UpdateState(shufState) + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + // Send a resolver update with no addresses. This should push the channel + // into TransientFailure. + r.UpdateState(resolver.State{}) + awaitState(ctx, t, cc, connectivity.TransientFailure) + + // Send the same config as last time with shuffling enabled. Since we are + // not connected to backend 0, we should connect to backend 1. + r.UpdateState(shufState) + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } +} diff --git a/test/resolver_update_test.go b/test/resolver_update_test.go index ddf67267bac7..416f7175c53a 100644 --- a/test/resolver_update_test.go +++ b/test/resolver_update_test.go @@ -174,6 +174,7 @@ func (s) TestResolverUpdate_InvalidServiceConfigAfterGoodUpdate(t *testing.T) { } bal := bd.Data.(balancer.Balancer) ccUpdateCh.Send(ccs) + ccs.BalancerConfig = nil return bal.UpdateClientConnState(ccs) }, UpdateSubConnState: func(bd *stub.BalancerData, sc balancer.SubConn, state balancer.SubConnState) { From f19266cca454a52234ceffce443b6c68d395a68b Mon Sep 17 00:00:00 2001 From: Gregory Cooke Date: Thu, 25 May 2023 13:24:45 -0400 Subject: [PATCH 56/60] xds: support built-in Stdout audit logger type (#6298) This PR adds the functionality to parse and build the known StdoutLogger that we include as an implemented AuditLogger. --- authz/audit/stdout/stdout_logger.go | 7 ++- examples/go.mod | 2 +- examples/go.sum | 4 +- gcp/observability/go.sum | 2 +- go.mod | 2 +- go.sum | 4 +- internal/xds/rbac/converter.go | 35 ++++++++------- internal/xds/rbac/converter_test.go | 66 ++++++++++++++++++++++++++--- interop/observability/go.sum | 2 +- stats/opencensus/go.sum | 2 +- 10 files changed, 94 insertions(+), 32 deletions(-) diff --git a/authz/audit/stdout/stdout_logger.go b/authz/audit/stdout/stdout_logger.go index ee095527ccec..c4ba21fa4682 100644 --- a/authz/audit/stdout/stdout_logger.go +++ b/authz/audit/stdout/stdout_logger.go @@ -31,6 +31,9 @@ import ( var grpcLogger = grpclog.Component("authz-audit") +// Name is the string to identify this logger type in the registry +const Name = "stdout_logger" + func init() { audit.RegisterLoggerBuilder(&loggerBuilder{ goLogger: log.New(os.Stdout, "", 0), @@ -46,7 +49,7 @@ type event struct { Timestamp string `json:"timestamp"` // Time when the audit event is logged via Log method } -// logger implements the audit.Logger interface by logging to standard output. +// logger implements the audit.logger interface by logging to standard output. type logger struct { goLogger *log.Logger } @@ -75,7 +78,7 @@ type loggerBuilder struct { } func (loggerBuilder) Name() string { - return "stdout_logger" + return Name } // Build returns a new instance of the stdout logger. diff --git a/examples/go.mod b/examples/go.mod index 7e49c3bce4c9..0c75c14493b0 100644 --- a/examples/go.mod +++ b/examples/go.mod @@ -17,7 +17,7 @@ require ( github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe // indirect - github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596 // indirect + github.com/envoyproxy/go-control-plane v0.11.1-0.20230517004634-d1c5e72e4c41 // indirect github.com/envoyproxy/protoc-gen-validate v0.10.1 // indirect golang.org/x/net v0.9.0 // indirect golang.org/x/sys v0.7.0 // indirect diff --git a/examples/go.sum b/examples/go.sum index 8006bf69fef6..7bc1a3576a60 100644 --- a/examples/go.sum +++ b/examples/go.sum @@ -636,8 +636,8 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJJM//w9BV6Fxbg2LuVd34= -github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596 h1:MDgbDqe1rWfGBa+yCcthuqDSHvXFyenZI1U7f1IbWI8= -github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230517004634-d1c5e72e4c41 h1:TNyxMch3whemmD2xddvlcYav9UR0hUvFeWnMUMSdhHA= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230517004634-d1c5e72e4c41/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= diff --git a/gcp/observability/go.sum b/gcp/observability/go.sum index bb5535fab90b..2b1c8b61771f 100644 --- a/gcp/observability/go.sum +++ b/gcp/observability/go.sum @@ -647,7 +647,7 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJJM//w9BV6Fxbg2LuVd34= -github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230517004634-d1c5e72e4c41/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= diff --git a/go.mod b/go.mod index 75ea83d9309c..ecff5ff74e01 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/cespare/xxhash/v2 v2.2.0 github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195 - github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596 + github.com/envoyproxy/go-control-plane v0.11.1-0.20230517004634-d1c5e72e4c41 github.com/golang/glog v1.1.0 github.com/golang/protobuf v1.5.3 github.com/google/go-cmp v0.5.9 diff --git a/go.sum b/go.sum index bd4e7e729e2d..80188b3fda93 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,8 @@ github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195 h1:58f1tJ1ra+zFINPlwLW github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596 h1:MDgbDqe1rWfGBa+yCcthuqDSHvXFyenZI1U7f1IbWI8= -github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230517004634-d1c5e72e4c41 h1:TNyxMch3whemmD2xddvlcYav9UR0hUvFeWnMUMSdhHA= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230517004634-d1c5e72e4c41/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.10.1 h1:c0g45+xCJhdgFGw7a5QAfdS4byAbud7miNWJ1WwEVf8= github.com/envoyproxy/protoc-gen-validate v0.10.1/go.mod h1:DRjgyB0I43LtJapqN6NiRwroiAU2PaFuvk/vjgh61ss= diff --git a/internal/xds/rbac/converter.go b/internal/xds/rbac/converter.go index db22fd5a9e08..713e39cf31cb 100644 --- a/internal/xds/rbac/converter.go +++ b/internal/xds/rbac/converter.go @@ -24,14 +24,14 @@ import ( v1xdsudpatypepb "github.com/cncf/xds/go/udpa/type/v1" v3xdsxdstypepb "github.com/cncf/xds/go/xds/type/v3" v3rbacpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" + v3auditloggersstreampb "github.com/envoyproxy/go-control-plane/envoy/extensions/rbac/audit_loggers/stream/v3" "google.golang.org/grpc/authz/audit" + "google.golang.org/grpc/authz/audit/stdout" + "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/structpb" ) -const udpaTypedStuctType = "type.googleapis.com/udpa.type.v1.TypedStruct" -const xdsTypedStuctType = "type.googleapis.com/xds.type.v3.TypedStruct" - func buildLogger(loggerConfig *v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig) (audit.Logger, error) { if loggerConfig.GetAuditLogger().GetTypedConfig() == nil { return nil, fmt.Errorf("missing required field: TypedConfig") @@ -59,23 +59,26 @@ func buildLogger(loggerConfig *v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConf } func getCustomConfig(config *anypb.Any) (json.RawMessage, string, error) { - switch config.GetTypeUrl() { - case udpaTypedStuctType: - typedStruct := &v1xdsudpatypepb.TypedStruct{} - if err := config.UnmarshalTo(typedStruct); err != nil { - return nil, "", fmt.Errorf("failed to unmarshal resource: %v", err) - } - return convertCustomConfig(typedStruct.TypeUrl, typedStruct.Value) - case xdsTypedStuctType: - typedStruct := &v3xdsxdstypepb.TypedStruct{} - if err := config.UnmarshalTo(typedStruct); err != nil { - return nil, "", fmt.Errorf("failed to unmarshal resource: %v", err) - } - return convertCustomConfig(typedStruct.TypeUrl, typedStruct.Value) + any, err := config.UnmarshalNew() + if err != nil { + return nil, "", err + } + switch m := any.(type) { + case *v1xdsudpatypepb.TypedStruct: + return convertCustomConfig(m.TypeUrl, m.Value) + case *v3xdsxdstypepb.TypedStruct: + return convertCustomConfig(m.TypeUrl, m.Value) + case *v3auditloggersstreampb.StdoutAuditLog: + return convertStdoutConfig(m) } return nil, "", fmt.Errorf("custom config not implemented for type [%v]", config.GetTypeUrl()) } +func convertStdoutConfig(config *v3auditloggersstreampb.StdoutAuditLog) (json.RawMessage, string, error) { + json, err := protojson.Marshal(config) + return json, stdout.Name, err +} + func convertCustomConfig(typeURL string, s *structpb.Struct) (json.RawMessage, string, error) { // The gRPC policy name will be the "type name" part of the value of the // type_url field in the TypedStruct. We get this by using the part after diff --git a/internal/xds/rbac/converter_test.go b/internal/xds/rbac/converter_test.go index 253b9db2d50d..9b8004f7bd5c 100644 --- a/internal/xds/rbac/converter_test.go +++ b/internal/xds/rbac/converter_test.go @@ -17,12 +17,16 @@ package rbac import ( + "reflect" "strings" "testing" v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" v3rbacpb "github.com/envoyproxy/go-control-plane/envoy/config/rbac/v3" + v3auditloggersstreampb "github.com/envoyproxy/go-control-plane/envoy/extensions/rbac/audit_loggers/stream/v3" "google.golang.org/grpc/authz/audit" + "google.golang.org/grpc/authz/audit/stdout" + "google.golang.org/grpc/internal/testutils" "google.golang.org/protobuf/types/known/anypb" ) @@ -47,7 +51,7 @@ func (s) TestBuildLoggerErrors(t *testing.T) { loggerConfig: &v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ AuditLogger: &v3corepb.TypedExtensionConfig{ Name: "TestAuditLoggerBuffer", - TypedConfig: &anypb.Any{}, + TypedConfig: testutils.MarshalAny(&v3rbacpb.RBAC_AuditLoggingOptions{}), }, }, expectedError: "custom config not implemented for type ", @@ -102,13 +106,65 @@ func (s) TestBuildLoggerErrors(t *testing.T) { logger, err := buildLogger(test.loggerConfig) if err != nil && !strings.HasPrefix(err.Error(), test.expectedError) { t.Fatalf("expected error: %v. got error: %v", test.expectedError, err) - } else { - if logger != test.expectedLogger { - t.Fatalf("expected logger: %v. got logger: %v", test.expectedLogger, logger) - } + } + if logger != test.expectedLogger { + t.Fatalf("expected logger: %v. got logger: %v", test.expectedLogger, logger) } }) } +} +func (s) TestBuildLoggerKnownTypes(t *testing.T) { + tests := []struct { + name string + loggerConfig *v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig + expectedType reflect.Type + }{ + { + name: "stdout logger", + loggerConfig: &v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: stdout.Name, + TypedConfig: createStdoutPb(t), + }, + IsOptional: false, + }, + expectedType: reflect.TypeOf(audit.GetLoggerBuilder(stdout.Name).Build(nil)), + }, + { + name: "stdout logger with generic TypedConfig", + loggerConfig: &v3rbacpb.RBAC_AuditLoggingOptions_AuditLoggerConfig{ + AuditLogger: &v3corepb.TypedExtensionConfig{ + Name: stdout.Name, + TypedConfig: createXDSTypedStruct(t, map[string]interface{}{}, stdout.Name), + }, + IsOptional: false, + }, + expectedType: reflect.TypeOf(audit.GetLoggerBuilder(stdout.Name).Build(nil)), + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + logger, err := buildLogger(test.loggerConfig) + if err != nil { + t.Fatalf("expected success. got error: %v", err) + } + loggerType := reflect.TypeOf(logger) + if test.expectedType != loggerType { + t.Fatalf("logger not of expected type. want: %v got: %v", test.expectedType, loggerType) + } + }) + } +} + +// Builds stdout config for audit logger proto. +func createStdoutPb(t *testing.T) *anypb.Any { + t.Helper() + pb := &v3auditloggersstreampb.StdoutAuditLog{} + customConfig, err := anypb.New(pb) + if err != nil { + t.Fatalf("createStdoutPb failed during anypb.New: %v", err) + } + return customConfig } diff --git a/interop/observability/go.sum b/interop/observability/go.sum index 167fb14bc0ce..8c008e984f8c 100644 --- a/interop/observability/go.sum +++ b/interop/observability/go.sum @@ -648,7 +648,7 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJJM//w9BV6Fxbg2LuVd34= -github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230517004634-d1c5e72e4c41/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= diff --git a/stats/opencensus/go.sum b/stats/opencensus/go.sum index 43f540fb5667..7bdc3927073a 100644 --- a/stats/opencensus/go.sum +++ b/stats/opencensus/go.sum @@ -630,7 +630,7 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJJM//w9BV6Fxbg2LuVd34= -github.com/envoyproxy/go-control-plane v0.11.1-0.20230406144219-ba92d50b6596/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= +github.com/envoyproxy/go-control-plane v0.11.1-0.20230517004634-d1c5e72e4c41/go.mod h1:84cjSkVxFD9Pi/gvI5AOq5NPhGsmS8oPsJLtCON6eK8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= From 157db1907efe7bfce70a1cbd4901d4c400d35ec7 Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Thu, 25 May 2023 17:13:37 -0400 Subject: [PATCH 57/60] stats/opencensus: Fix flaky test span (#6296) --- stats/opencensus/e2e_test.go | 137 +++++++++++++++++++++-------------- stats/opencensus/trace.go | 5 +- 2 files changed, 87 insertions(+), 55 deletions(-) diff --git a/stats/opencensus/e2e_test.go b/stats/opencensus/e2e_test.go index 0f1975ba9d4b..d70d9f87024d 100644 --- a/stats/opencensus/e2e_test.go +++ b/stats/opencensus/e2e_test.go @@ -1338,6 +1338,25 @@ func (fe *fakeExporter) ExportSpan(sd *trace.SpanData) { fe.seenSpans = append(fe.seenSpans, gotSI) } +// waitForServerSpan waits until a server span appears somewhere in the span +// list in an exporter. Returns an error if no server span found within the +// passed context's timeout. +func waitForServerSpan(ctx context.Context, fe *fakeExporter) error { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + for ; ctx.Err() == nil; <-time.After(time.Millisecond) { + fe.mu.Lock() + for _, seenSpan := range fe.seenSpans { + if seenSpan.spanKind == trace.SpanKindServer { + fe.mu.Unlock() + return nil + } + } + fe.mu.Unlock() + } + return fmt.Errorf("timeout when waiting for server span to be present in exporter") +} + // TestSpan tests emitted spans from gRPC. It configures a system with a gRPC // Client and gRPC server with the OpenCensus Dial and Server Option configured, // and makes a Unary RPC and a Streaming RPC. This should cause spans with @@ -1375,18 +1394,30 @@ func (s) TestSpan(t *testing.T) { // Make a Unary RPC. This should cause a span with message events // corresponding to the request message and response message to be emitted - // both from the client and the server. Note that RPCs trigger exports of - // corresponding span data synchronously, thus the Span Data is guaranteed - // to have been read by exporter and is ready to make assertions on. + // both from the client and the server. if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{}}); err != nil { t.Fatalf("Unexpected error from UnaryCall: %v", err) } - - // The spans received are server first, then client. This is due to the RPC - // finishing on the server first. The ordering of message events for a Unary - // Call is as follows: (client send, server recv), (server send (server span - // end), client recv (client span end)). wantSI := []spanInformation{ + { + sc: trace.SpanContext{ + TraceOptions: 1, + }, + name: "Attempt.grpc.testing.TestService.UnaryCall", + messageEvents: []trace.MessageEvent{ + { + EventType: trace.MessageEventTypeSent, + MessageID: 1, // First msg send so 1 (see comment above) + UncompressedByteSize: 2, + CompressedByteSize: 2, + }, + { + EventType: trace.MessageEventTypeRecv, + MessageID: 1, // First msg recv so 1 (see comment above) + }, + }, + hasRemoteParent: false, + }, { // Sampling rate of 100 percent, so this should populate every span // with the information that this span is being sampled. Here and @@ -1424,25 +1455,6 @@ func (s) TestSpan(t *testing.T) { // instrumentation code, so I'm iffy on it but fine. hasRemoteParent: true, }, - { - sc: trace.SpanContext{ - TraceOptions: 1, - }, - name: "Attempt.grpc.testing.TestService.UnaryCall", - messageEvents: []trace.MessageEvent{ - { - EventType: trace.MessageEventTypeSent, - MessageID: 1, // First msg send so 1 (see comment above) - UncompressedByteSize: 2, - CompressedByteSize: 2, - }, - { - EventType: trace.MessageEventTypeRecv, - MessageID: 1, // First msg recv so 1 (see comment above) - }, - }, - hasRemoteParent: false, - }, { sc: trace.SpanContext{ TraceOptions: 1, @@ -1453,19 +1465,32 @@ func (s) TestSpan(t *testing.T) { childSpanCount: 1, }, } + if err := waitForServerSpan(ctx, fe); err != nil { + t.Fatal(err) + } + var spanInfoSort = func(i, j int) bool { + // This will order into attempt span (which has an unset span kind to + // not prepend Sent. to span names in backends), then call span, then + // server span. + return fe.seenSpans[i].spanKind < fe.seenSpans[j].spanKind + } + fe.mu.Lock() + // Sort the underlying seen Spans for cmp.Diff assertions and ID + // relationship assertions. + sort.Slice(fe.seenSpans, spanInfoSort) if diff := cmp.Diff(fe.seenSpans, wantSI); diff != "" { + fe.mu.Unlock() t.Fatalf("got unexpected spans, diff (-got, +want): %v", diff) } - fe.mu.Lock() if err := validateTraceAndSpanIDs(fe.seenSpans); err != nil { fe.mu.Unlock() t.Fatalf("Error in runtime data assertions: %v", err) } - if !cmp.Equal(fe.seenSpans[0].parentSpanID, fe.seenSpans[1].sc.SpanID) { - t.Fatalf("server span should point to the client attempt span as its parent. parentSpanID: %v, clientAttemptSpanID: %v", fe.seenSpans[0].parentSpanID, fe.seenSpans[1].sc.SpanID) + if !cmp.Equal(fe.seenSpans[1].parentSpanID, fe.seenSpans[0].sc.SpanID) { + t.Fatalf("server span should point to the client attempt span as its parent. parentSpanID: %v, clientAttemptSpanID: %v", fe.seenSpans[1].parentSpanID, fe.seenSpans[0].sc.SpanID) } - if !cmp.Equal(fe.seenSpans[1].parentSpanID, fe.seenSpans[2].sc.SpanID) { - t.Fatalf("client attempt span should point to the client call span as its parent. parentSpanID: %v, clientCallSpanID: %v", fe.seenSpans[1].parentSpanID, fe.seenSpans[2].sc.SpanID) + if !cmp.Equal(fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID) { + t.Fatalf("client attempt span should point to the client call span as its parent. parentSpanID: %v, clientCallSpanID: %v", fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID) } fe.seenSpans = nil @@ -1490,6 +1515,23 @@ func (s) TestSpan(t *testing.T) { } wantSI = []spanInformation{ + { + sc: trace.SpanContext{ + TraceOptions: 1, + }, + name: "Attempt.grpc.testing.TestService.FullDuplexCall", + messageEvents: []trace.MessageEvent{ + { + EventType: trace.MessageEventTypeSent, + MessageID: 1, // First msg send so 1 + }, + { + EventType: trace.MessageEventTypeSent, + MessageID: 2, // Second msg send so 2 + }, + }, + hasRemoteParent: false, + }, { sc: trace.SpanContext{ TraceOptions: 1, @@ -1522,36 +1564,25 @@ func (s) TestSpan(t *testing.T) { hasRemoteParent: false, childSpanCount: 1, }, - { - sc: trace.SpanContext{ - TraceOptions: 1, - }, - name: "Attempt.grpc.testing.TestService.FullDuplexCall", - messageEvents: []trace.MessageEvent{ - { - EventType: trace.MessageEventTypeSent, - MessageID: 1, // First msg send so 1 - }, - { - EventType: trace.MessageEventTypeSent, - MessageID: 2, // Second msg send so 2 - }, - }, - hasRemoteParent: false, - }, + } + if err := waitForServerSpan(ctx, fe); err != nil { + t.Fatal(err) } fe.mu.Lock() defer fe.mu.Unlock() + // Sort the underlying seen Spans for cmp.Diff assertions and ID + // relationship assertions. + sort.Slice(fe.seenSpans, spanInfoSort) if diff := cmp.Diff(fe.seenSpans, wantSI); diff != "" { t.Fatalf("got unexpected spans, diff (-got, +want): %v", diff) } if err := validateTraceAndSpanIDs(fe.seenSpans); err != nil { t.Fatalf("Error in runtime data assertions: %v", err) } - if !cmp.Equal(fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID) { - t.Fatalf("server span should point to the client attempt span as its parent. parentSpanID: %v, clientAttemptSpanID: %v", fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID) + if !cmp.Equal(fe.seenSpans[1].parentSpanID, fe.seenSpans[0].sc.SpanID) { + t.Fatalf("server span should point to the client attempt span as its parent. parentSpanID: %v, clientAttemptSpanID: %v", fe.seenSpans[1].parentSpanID, fe.seenSpans[0].sc.SpanID) } - if !cmp.Equal(fe.seenSpans[2].parentSpanID, fe.seenSpans[1].sc.SpanID) { - t.Fatalf("client attempt span should point to the client call span as its parent. parentSpanID: %v, clientCallSpanID: %v", fe.seenSpans[2].parentSpanID, fe.seenSpans[1].sc.SpanID) + if !cmp.Equal(fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID) { + t.Fatalf("client attempt span should point to the client call span as its parent. parentSpanID: %v, clientCallSpanID: %v", fe.seenSpans[0].parentSpanID, fe.seenSpans[2].sc.SpanID) } } diff --git a/stats/opencensus/trace.go b/stats/opencensus/trace.go index a7cafb30f4d0..f41cb838adc9 100644 --- a/stats/opencensus/trace.go +++ b/stats/opencensus/trace.go @@ -40,8 +40,9 @@ type traceInfo struct { func (csh *clientStatsHandler) traceTagRPC(ctx context.Context, rti *stats.RPCTagInfo) (context.Context, *traceInfo) { // TODO: get consensus on whether this method name of "s.m" is correct. mn := "Attempt." + strings.Replace(removeLeadingSlash(rti.FullMethodName), "/", ".", -1) - // Returned context is ignored because will populate context with data - // that wraps the span instead. + // Returned context is ignored because will populate context with data that + // wraps the span instead. Don't set span kind client on this attempt span + // to prevent backend from prepending span name with "Sent.". _, span := trace.StartSpan(ctx, mn, trace.WithSampler(csh.to.TS)) tcBin := propagation.Binary(span.SpanContext()) From 4d3f221d1d16276c02dafec47828d16d1337f9ac Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Thu, 25 May 2023 18:05:14 -0400 Subject: [PATCH 58/60] xds/internal/xdsclient: Add support for String Matcher Header Matcher in RDS (#6313) --- internal/xds/matcher/matcher_header.go | 31 +++++++ internal/xds/matcher/matcher_header_test.go | 80 +++++++++++++++++++ xds/internal/xdsclient/xdsresource/matcher.go | 2 + .../xdsclient/xdsresource/type_rds.go | 1 + .../xdsclient/xdsresource/unmarshal_rds.go | 12 ++- .../xdsresource/unmarshal_rds_test.go | 47 +++++++++++ 6 files changed, 171 insertions(+), 2 deletions(-) diff --git a/internal/xds/matcher/matcher_header.go b/internal/xds/matcher/matcher_header.go index fd4833d3fff8..01433f4122a2 100644 --- a/internal/xds/matcher/matcher_header.go +++ b/internal/xds/matcher/matcher_header.go @@ -241,3 +241,34 @@ func (hcm *HeaderContainsMatcher) Match(md metadata.MD) bool { func (hcm *HeaderContainsMatcher) String() string { return fmt.Sprintf("headerContains:%v%v", hcm.key, hcm.contains) } + +// HeaderStringMatcher matches on whether the header value matches against the +// StringMatcher specified. +type HeaderStringMatcher struct { + key string + stringMatcher StringMatcher + invert bool +} + +// NewHeaderStringMatcher returns a new HeaderStringMatcher. +func NewHeaderStringMatcher(key string, sm StringMatcher, invert bool) *HeaderStringMatcher { + return &HeaderStringMatcher{ + key: key, + stringMatcher: sm, + invert: invert, + } +} + +// Match returns whether the passed in HTTP Headers match according to the +// specified StringMatcher. +func (hsm *HeaderStringMatcher) Match(md metadata.MD) bool { + v, ok := mdValuesFromOutgoingCtx(md, hsm.key) + if !ok { + return false + } + return hsm.stringMatcher.Match(v) != hsm.invert +} + +func (hsm *HeaderStringMatcher) String() string { + return fmt.Sprintf("headerString:%v:%v", hsm.key, hsm.stringMatcher) +} diff --git a/internal/xds/matcher/matcher_header_test.go b/internal/xds/matcher/matcher_header_test.go index f567f3198242..9a20cf12b0f9 100644 --- a/internal/xds/matcher/matcher_header_test.go +++ b/internal/xds/matcher/matcher_header_test.go @@ -467,3 +467,83 @@ func TestHeaderSuffixMatcherMatch(t *testing.T) { }) } } + +func TestHeaderStringMatch(t *testing.T) { + tests := []struct { + name string + key string + sm StringMatcher + invert bool + md metadata.MD + want bool + }{ + { + name: "should-match", + key: "th", + sm: StringMatcher{ + exactMatch: newStringP("tv"), + }, + invert: false, + md: metadata.Pairs("th", "tv"), + want: true, + }, + { + name: "not match", + key: "th", + sm: StringMatcher{ + containsMatch: newStringP("tv"), + }, + invert: false, + md: metadata.Pairs("th", "not-match"), + want: false, + }, + { + name: "invert string match", + key: "th", + sm: StringMatcher{ + containsMatch: newStringP("tv"), + }, + invert: true, + md: metadata.Pairs("th", "not-match"), + want: true, + }, + { + name: "header missing", + key: "th", + sm: StringMatcher{ + containsMatch: newStringP("tv"), + }, + invert: false, + md: metadata.Pairs("not-specified-key", "not-match"), + want: false, + }, + { + name: "header missing invert true", + key: "th", + sm: StringMatcher{ + containsMatch: newStringP("tv"), + }, + invert: true, + md: metadata.Pairs("not-specified-key", "not-match"), + want: false, + }, + { + name: "header empty string invert", + key: "th", + sm: StringMatcher{ + containsMatch: newStringP("tv"), + }, + invert: true, + md: metadata.Pairs("th", ""), + want: true, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + hsm := NewHeaderStringMatcher(test.key, test.sm, test.invert) + if got := hsm.Match(test.md); got != test.want { + t.Errorf("match() = %v, want %v", got, test.want) + } + }) + } +} diff --git a/xds/internal/xdsclient/xdsresource/matcher.go b/xds/internal/xdsclient/xdsresource/matcher.go index 6a056235f3bd..77aa85b68e58 100644 --- a/xds/internal/xdsclient/xdsresource/matcher.go +++ b/xds/internal/xdsclient/xdsresource/matcher.go @@ -59,6 +59,8 @@ func RouteToMatcher(r *Route) (*CompositeMatcher, error) { matcherT = matcher.NewHeaderRangeMatcher(h.Name, h.RangeMatch.Start, h.RangeMatch.End, invert) case h.PresentMatch != nil: matcherT = matcher.NewHeaderPresentMatcher(h.Name, *h.PresentMatch, invert) + case h.StringMatch != nil: + matcherT = matcher.NewHeaderStringMatcher(h.Name, *h.StringMatch, invert) default: return nil, fmt.Errorf("illegal route: missing header_match_specifier") } diff --git a/xds/internal/xdsclient/xdsresource/type_rds.go b/xds/internal/xdsclient/xdsresource/type_rds.go index 0504346c399f..ad59209163e7 100644 --- a/xds/internal/xdsclient/xdsresource/type_rds.go +++ b/xds/internal/xdsclient/xdsresource/type_rds.go @@ -171,6 +171,7 @@ type HeaderMatcher struct { SuffixMatch *string RangeMatch *Int64Range PresentMatch *bool + StringMatch *matcher.StringMatcher } // Int64Range is a range for header range match. diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_rds.go b/xds/internal/xdsclient/xdsresource/unmarshal_rds.go index a082d38c5aa5..c51a0c24b508 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_rds.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_rds.go @@ -24,13 +24,15 @@ import ( "strings" "time" - v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" - v3typepb "github.com/envoyproxy/go-control-plane/envoy/type/v3" "github.com/golang/protobuf/proto" "google.golang.org/grpc/codes" "google.golang.org/grpc/internal/envconfig" + "google.golang.org/grpc/internal/xds/matcher" "google.golang.org/grpc/xds/internal/clusterspecifier" "google.golang.org/protobuf/types/known/anypb" + + v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" + v3typepb "github.com/envoyproxy/go-control-plane/envoy/type/v3" ) func unmarshalRouteConfigResource(r *anypb.Any) (string, RouteConfigUpdate, error) { @@ -273,6 +275,12 @@ func routesProtoToSlice(routes []*v3routepb.Route, csps map[string]clusterspecif header.PrefixMatch = &ht.PrefixMatch case *v3routepb.HeaderMatcher_SuffixMatch: header.SuffixMatch = &ht.SuffixMatch + case *v3routepb.HeaderMatcher_StringMatch: + sm, err := matcher.StringMatcherFromProto(ht.StringMatch) + if err != nil { + return nil, nil, fmt.Errorf("route %+v has an invalid string matcher: %v", err, ht.StringMatch) + } + header.StringMatch = &sm default: return nil, nil, fmt.Errorf("route %+v has an unrecognized header matcher: %+v", r, ht) } diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_rds_test.go b/xds/internal/xdsclient/xdsresource/unmarshal_rds_test.go index 5dd4e042d72d..5e0d1e4523b6 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_rds_test.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_rds_test.go @@ -33,6 +33,7 @@ import ( "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/pretty" "google.golang.org/grpc/internal/testutils" + "google.golang.org/grpc/internal/xds/matcher" "google.golang.org/grpc/xds/internal/clusterspecifier" "google.golang.org/grpc/xds/internal/httpfilter" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" @@ -923,6 +924,7 @@ func (s) TestUnmarshalRouteConfig(t *testing.T) { } func (s) TestRoutesProtoToSlice(t *testing.T) { + sm, _ := matcher.StringMatcherFromProto(&v3matcherpb.StringMatcher{MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "tv"}}) var ( goodRouteWithFilterConfigs = func(cfgs map[string]*anypb.Any) []*v3routepb.Route { // Sets per-filter config in cluster "B" and in the route. @@ -1085,6 +1087,51 @@ func (s) TestRoutesProtoToSlice(t *testing.T) { }}, wantErr: false, }, + { + name: "good with string matcher", + routes: []*v3routepb.Route{ + { + Match: &v3routepb.RouteMatch{ + PathSpecifier: &v3routepb.RouteMatch_SafeRegex{SafeRegex: &v3matcherpb.RegexMatcher{Regex: "/a/"}}, + Headers: []*v3routepb.HeaderMatcher{ + { + Name: "th", + HeaderMatchSpecifier: &v3routepb.HeaderMatcher_StringMatch{StringMatch: &v3matcherpb.StringMatcher{MatchPattern: &v3matcherpb.StringMatcher_Exact{Exact: "tv"}}}, + }, + }, + RuntimeFraction: &v3corepb.RuntimeFractionalPercent{ + DefaultValue: &v3typepb.FractionalPercent{ + Numerator: 1, + Denominator: v3typepb.FractionalPercent_HUNDRED, + }, + }, + }, + Action: &v3routepb.Route_Route{ + Route: &v3routepb.RouteAction{ + ClusterSpecifier: &v3routepb.RouteAction_WeightedClusters{ + WeightedClusters: &v3routepb.WeightedCluster{ + Clusters: []*v3routepb.WeightedCluster_ClusterWeight{ + {Name: "B", Weight: &wrapperspb.UInt32Value{Value: 60}}, + {Name: "A", Weight: &wrapperspb.UInt32Value{Value: 40}}, + }, + }}}}, + }, + }, + wantRoutes: []*Route{{ + Regex: func() *regexp.Regexp { return regexp.MustCompile("/a/") }(), + Headers: []*HeaderMatcher{ + { + Name: "th", + InvertMatch: newBoolP(false), + StringMatch: &sm, + }, + }, + Fraction: newUInt32P(10000), + WeightedClusters: map[string]WeightedCluster{"A": {Weight: 40}, "B": {Weight: 60}}, + ActionType: RouteActionRoute, + }}, + wantErr: false, + }, { name: "query is ignored", routes: []*v3routepb.Route{ From e325737cace3fa71bd1f51e5cbf1546f43e58a2f Mon Sep 17 00:00:00 2001 From: Matthew Stevenson <52979934+matthewstevenson88@users.noreply.github.com> Date: Thu, 25 May 2023 15:05:50 -0700 Subject: [PATCH 59/60] alts: Fix flaky ALTS TestFullHandshake test. (#6300) * Fix flaky ALTS FullHandshake test. * Fix one other flake possibility. * fix typo in comment * Wait for full handshake frames to arrive from peer. * Remove runtime.GOMAXPROCS from the test. * Only set vmOnGCP once. --- credentials/alts/alts_test.go | 12 +---- .../alts/internal/testutil/testutil.go | 52 ++++++++++++++----- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/credentials/alts/alts_test.go b/credentials/alts/alts_test.go index aef9642f844d..9a95d462806b 100644 --- a/credentials/alts/alts_test.go +++ b/credentials/alts/alts_test.go @@ -24,7 +24,6 @@ package alts import ( "context" "reflect" - "runtime" "sync" "testing" "time" @@ -309,21 +308,12 @@ func (s) TestCheckRPCVersions(t *testing.T) { // server, where both client and server offload to a local, fake handshaker // service. func (s) TestFullHandshake(t *testing.T) { - // If GOMAXPROCS is set to less than 2, do not run this test. This test - // requires at least 2 goroutines to succeed (one goroutine where a - // server listens, another goroutine where a client runs). - if runtime.GOMAXPROCS(0) < 2 { - return - } - // The vmOnGCP global variable MUST be reset to true after the client // or server credentials have been created, but before the ALTS // handshake begins. If vmOnGCP is not reset and this test is run // anywhere except for a GCP VM, then the ALTS handshake will // immediately fail. - once.Do(func() { - vmOnGCP = true - }) + once.Do(func() {}) vmOnGCP = true // Start the fake handshaker service and the server. diff --git a/credentials/alts/internal/testutil/testutil.go b/credentials/alts/internal/testutil/testutil.go index 24a61202a3da..cdc88c8f9da0 100644 --- a/credentials/alts/internal/testutil/testutil.go +++ b/credentials/alts/internal/testutil/testutil.go @@ -136,6 +136,7 @@ type FakeHandshaker struct { // DoHandshake performs a fake ALTS handshake. func (h *FakeHandshaker) DoHandshake(stream altsgrpc.HandshakerService_DoHandshakeServer) error { var isAssistingClient bool + var handshakeFramesReceivedSoFar []byte for { req, err := stream.Recv() if err != nil { @@ -153,15 +154,38 @@ func (h *FakeHandshaker) DoHandshake(stream altsgrpc.HandshakerService_DoHandsha return fmt.Errorf("processStartClient failure: %v", err) } case *altspb.HandshakerReq_ServerStart: + // If we have received the full ClientInit, send the ServerInit and + // ServerFinished. Otherwise, wait for more bytes to arrive from the client. isAssistingClient = false - resp, err = h.processServerStart(req.ServerStart) + handshakeFramesReceivedSoFar = append(handshakeFramesReceivedSoFar, req.ServerStart.InBytes...) + sendHandshakeFrame := bytes.Equal(handshakeFramesReceivedSoFar, []byte("ClientInit")) + resp, err = h.processServerStart(req.ServerStart, sendHandshakeFrame) if err != nil { - return fmt.Errorf("processServerClient failure: %v", err) + return fmt.Errorf("processServerStart failure: %v", err) } case *altspb.HandshakerReq_Next: - resp, err = h.processNext(req.Next, isAssistingClient) + // If we have received all handshake frames, send the handshake result. + // Otherwise, wait for more bytes to arrive from the peer. + oldHandshakesBytes := len(handshakeFramesReceivedSoFar) + handshakeFramesReceivedSoFar = append(handshakeFramesReceivedSoFar, req.Next.InBytes...) + isHandshakeComplete := false + if isAssistingClient { + isHandshakeComplete = bytes.HasPrefix(handshakeFramesReceivedSoFar, []byte("ServerInitServerFinished")) + } else { + isHandshakeComplete = bytes.HasPrefix(handshakeFramesReceivedSoFar, []byte("ClientInitClientFinished")) + } + if !isHandshakeComplete { + resp = &altspb.HandshakerResp{ + BytesConsumed: uint32(len(handshakeFramesReceivedSoFar) - oldHandshakesBytes), + Status: &altspb.HandshakerStatus{ + Code: uint32(codes.OK), + }, + } + break + } + resp, err = h.getHandshakeResult(isAssistingClient) if err != nil { - return fmt.Errorf("processNext failure: %v", err) + return fmt.Errorf("getHandshakeResult failure: %v", err) } default: return fmt.Errorf("handshake request has unexpected type: %v", req) @@ -192,7 +216,7 @@ func (h *FakeHandshaker) processStartClient(req *altspb.StartClientHandshakeReq) }, nil } -func (h *FakeHandshaker) processServerStart(req *altspb.StartServerHandshakeReq) (*altspb.HandshakerResp, error) { +func (h *FakeHandshaker) processServerStart(req *altspb.StartServerHandshakeReq, sendHandshakeFrame bool) (*altspb.HandshakerResp, error) { if len(req.ApplicationProtocols) != 1 || req.ApplicationProtocols[0] != "grpc" { return nil, fmt.Errorf("unexpected application protocols: %v", req.ApplicationProtocols) } @@ -203,8 +227,14 @@ func (h *FakeHandshaker) processServerStart(req *altspb.StartServerHandshakeReq) if len(parameters.RecordProtocols) != 1 || parameters.RecordProtocols[0] != "ALTSRP_GCM_AES128_REKEY" { return nil, fmt.Errorf("unexpected record protocols: %v", parameters.RecordProtocols) } - if string(req.InBytes) != "ClientInit" { - return nil, fmt.Errorf("unexpected in bytes: %v", req.InBytes) + if sendHandshakeFrame { + return &altspb.HandshakerResp{ + OutFrames: []byte("ServerInitServerFinished"), + BytesConsumed: uint32(len(req.InBytes)), + Status: &altspb.HandshakerStatus{ + Code: uint32(codes.OK), + }, + }, nil } return &altspb.HandshakerResp{ OutFrames: []byte("ServerInitServerFinished"), @@ -215,11 +245,8 @@ func (h *FakeHandshaker) processServerStart(req *altspb.StartServerHandshakeReq) }, nil } -func (h *FakeHandshaker) processNext(req *altspb.NextHandshakeMessageReq, isAssistingClient bool) (*altspb.HandshakerResp, error) { +func (h *FakeHandshaker) getHandshakeResult(isAssistingClient bool) (*altspb.HandshakerResp, error) { if isAssistingClient { - if !bytes.Equal(req.InBytes, []byte("ServerInitServerFinished")) { - return nil, fmt.Errorf("unexpected in bytes: got: %v, want: %v", req.InBytes, []byte("ServerInitServerFinished")) - } return &altspb.HandshakerResp{ OutFrames: []byte("ClientFinished"), BytesConsumed: 24, @@ -248,9 +275,6 @@ func (h *FakeHandshaker) processNext(req *altspb.NextHandshakeMessageReq, isAssi }, }, nil } - if !bytes.Equal(req.InBytes, []byte("ClientFinished")) { - return nil, fmt.Errorf("unexpected in bytes: got: %v, want: %v", req.InBytes, []byte("ClientFinished")) - } return &altspb.HandshakerResp{ BytesConsumed: 14, Result: &altspb.HandshakerResult{ From 9b9b364f6983ec56273447640563b677d3f7e152 Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Thu, 25 May 2023 19:54:17 -0400 Subject: [PATCH 60/60] internal/envconfig: Set Custom LB Env Var to true by default (#6317) --- internal/envconfig/xds.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/envconfig/xds.go b/internal/envconfig/xds.go index 1d9152e8eeb2..8b3418785450 100644 --- a/internal/envconfig/xds.go +++ b/internal/envconfig/xds.go @@ -89,7 +89,7 @@ var ( // C2PResolverTestOnlyTrafficDirectorURI is the TD URI for testing. C2PResolverTestOnlyTrafficDirectorURI = os.Getenv("GRPC_TEST_ONLY_GOOGLE_C2P_RESOLVER_TRAFFIC_DIRECTOR_URI") // XDSCustomLBPolicy indicates whether Custom LB Policies are enabled, which - // can be enabled by setting the environment variable - // "GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG" to "true". - XDSCustomLBPolicy = boolFromEnv("GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG", false) + // can be disabled by setting the environment variable + // "GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG" to "false". + XDSCustomLBPolicy = boolFromEnv("GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG", true) )