From 188de0e728498efecc075e0dfd32936528672d8b Mon Sep 17 00:00:00 2001 From: Timo Reimann Date: Sat, 14 May 2022 18:10:25 +0200 Subject: [PATCH] Make volumes page size customizable This is in support for accounts having a large number of volumes, allowing periodic resyncs through the csi-attacher to become more efficient and less heavy in terms of number of API calls needed. We also stop showing all volumes listed since that also has the potential to clutter the logs on accounts with many volumes. --- README.md | 39 +++++++++++++++++---------- cmd/do-csi-plugin/main.go | 28 ++++++++++++------- driver/controller.go | 16 +++++++---- driver/driver.go | 57 +++++++++++++++++++++++++-------------- 4 files changed, 92 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index 58c39d5d8..71efe1d39 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,11 @@ Snapshots can be created and restored through `VolumeSnapshot` objects. **Note:** -Since version 2, the CSI plugin support v1beta1 Volume Snapshots only. Support for the v1alpha1 has been dropped. +Version 1 of the CSI driver supports v1alpha1 Volume Snapshots only. + +Version 2 and 3 of the CSI driver supports v1beta1 Volume Snapshots only. + +Version 4 and later of the CSI driver supports v1 Volume Snapshots, which is backwards compatible to v1beta1. However, version 3 renders snapshots unusable that had previously been marked as invalid. See the [csi-snapshotter](https://github.com/kubernetes-csi/external-snapshotter) documentation on the validating webhook and v1beta1 to v1 upgrade notes. --- @@ -110,17 +114,9 @@ The [DigitalOcean Kubernetes](https://www.digitalocean.com/products/kubernetes/) --- -#### Snapshot support - -Version 1 of the CSI driver supports v1alpha1 Volume Snapshots only. - -Version 2 and 3 of the CSI driver supports v1beta1 Volume Snapshots only. - -Version 4 and later of the CSI driver supports v1 Volume Snapshots, which is backwards compatible to v1beta1. However, version 3 renders snapshots unusable that had previously been marked as invalid. See the [csi-snapshotter](https://github.com/kubernetes-csi/external-snapshotter) documentation on the validating webhook and v1beta1 to v1 upgrade notes. - -**Driver modes:** +### Driver modes -By default, the driver supports both the [controller and node mode.](https://kubernetes-csi.github.io/docs/deploying.html) +By default, the driver supports both the [controller and node mode](https://kubernetes-csi.github.io/docs/deploying.html). It can manage DigitalOcean Volumes via the cloud API and mount them on the required node. The actually used mode is determined by how the driver is deployed and configured. The suggested release manifests provide separate deployments for controller and node modes, respectively. @@ -141,9 +137,7 @@ Skip secret creation (section 1. in following deployment instructions) when usin | Controller only mode not in DigitalOcean |:white_check_mark:|:white_check_mark:| | Node only mode in DigitalOcean | :x: | :x: | ---- - -**Requirements:** +### Requirements * `--allow-privileged` flag must be set to true for the API server * `--allow-privileged` flag must be set to true for the kubelet in Kubernetes 1.14 and below (flag does not exist in later releases) @@ -290,6 +284,23 @@ flag `--driver-name` to force the new driver to use the old name. Failing to do so will cause any existing PVs to be unusable since the new driver will not manage them and the old driver is no longer running. +## Configuration + +### Default volumes paging size + +Some CSI driver operations require paging through the volumes returned from the DO Volumes API. By default, the page size is not defined and causes the DO API to choose a value as specified in the [API reference](https://docs.digitalocean.com/reference/api/api-reference/#section/Introduction/Links-and-Pagination). In the vast majority of cases, this should work fine. However, for accounts with a very large number of volumes, the API server-chosen default page size may be too small to return all volumes within the configured (sidecar-provided) timeout. + +For that reason, the default page size can be customized by passing the `--default-volumes-page-size` flag a positive number. + +--- +**Notes:** + +1. The user is responsible for selecting a value below the maximum limit mandated by the DO API. Please see the API reference link above to see the current limit. +2. The configured sidecar timeout values may need to be aligned with the chosen page size. In particular, csi-attacher invokes `ListVolumes` to periodically synchronize the API and cluster-local volume states; as such, its timeout must be large enough to account for the expected number of volumes in the given account and region. +3. The default page size does not become effective if an explicit page size (more precisely, _max entries_ in CSI spec speak) is passed to a given gRPC method. + +--- + ## Development Requirements: diff --git a/cmd/do-csi-plugin/main.go b/cmd/do-csi-plugin/main.go index 7ab805fa4..e42175ddc 100644 --- a/cmd/do-csi-plugin/main.go +++ b/cmd/do-csi-plugin/main.go @@ -30,14 +30,15 @@ import ( func main() { var ( - endpoint = flag.String("endpoint", "unix:///var/lib/kubelet/plugins/"+driver.DefaultDriverName+"/csi.sock", "CSI endpoint.") - token = flag.String("token", "", "DigitalOcean access token.") - url = flag.String("url", "https://api.digitalocean.com/", "DigitalOcean API URL.") - region = flag.String("region", "", "DigitalOcean region slug. Specify only when running in controller mode outside of a DigitalOcean droplet.") - doTag = flag.String("do-tag", "", "Tag DigitalOcean volumes on Create/Attach.") - driverName = flag.String("driver-name", driver.DefaultDriverName, "Name for the driver.") - debugAddr = flag.String("debug-addr", "", "Address to serve the HTTP debug server on.") - version = flag.Bool("version", false, "Print the version and exit.") + endpoint = flag.String("endpoint", "unix:///var/lib/kubelet/plugins/"+driver.DefaultDriverName+"/csi.sock", "CSI endpoint.") + token = flag.String("token", "", "DigitalOcean access token.") + url = flag.String("url", "https://api.digitalocean.com/", "DigitalOcean API URL.") + region = flag.String("region", "", "DigitalOcean region slug. Specify only when running in controller mode outside of a DigitalOcean droplet.") + doTag = flag.String("do-tag", "", "Tag DigitalOcean volumes on Create/Attach.") + driverName = flag.String("driver-name", driver.DefaultDriverName, "Name for the driver.") + debugAddr = flag.String("debug-addr", "", "Address to serve the HTTP debug server on.") + defaultVolumesPageSize = flag.Uint("default-volumes-page-size", 0, "The default page size used when paging through volumes results (default: do not specify and let the DO API choose)") + version = flag.Bool("version", false, "Print the version and exit.") ) flag.Parse() @@ -50,7 +51,16 @@ func main() { log.Fatalln("region flag must not be set when driver is running in node mode (i.e., token flag is unset)") } - drv, err := driver.NewDriver(*endpoint, *token, *url, *region, *doTag, *driverName, *debugAddr) + drv, err := driver.NewDriver(driver.NewDriverParams{ + Endpoint: *endpoint, + Token: *token, + URL: *url, + Region: *region, + DOTag: *doTag, + DriverName: *driverName, + DebugAddr: *debugAddr, + DefaultVolumesPageSize: *defaultVolumesPageSize, + }) if err != nil { log.Fatalln(err) } diff --git a/driver/controller.go b/driver/controller.go index f9c2644ec..0aac55761 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -516,10 +516,16 @@ func (d *Driver) ValidateVolumeCapabilities(ctx context.Context, req *csi.Valida // ListVolumes returns a list of all requested volumes func (d *Driver) ListVolumes(ctx context.Context, req *csi.ListVolumesRequest) (*csi.ListVolumesResponse, error) { + maxEntries := req.MaxEntries + if maxEntries == 0 && d.defaultVolumesPageSize > 0 { + maxEntries = int32(d.defaultVolumesPageSize) + } + log := d.log.WithFields(logrus.Fields{ - "max_entries": req.MaxEntries, - "req_starting_token": req.StartingToken, - "method": "list_volumes", + "max_entries": req.MaxEntries, + "effective_max_entries": maxEntries, + "req_starting_token": req.StartingToken, + "method": "list_volumes", }) log.Info("list volumes called") @@ -532,7 +538,7 @@ func (d *Driver) ListVolumes(ctx context.Context, req *csi.ListVolumesRequest) ( startingToken = int32(parsedToken) } - untypedVolumes, nextToken, err := listResources(ctx, log, startingToken, req.MaxEntries, func(ctx context.Context, listOpts *godo.ListOptions) ([]interface{}, *godo.Response, error) { + untypedVolumes, nextToken, err := listResources(ctx, log, startingToken, maxEntries, func(ctx context.Context, listOpts *godo.ListOptions) ([]interface{}, *godo.Response, error) { volListOpts := &godo.ListVolumeParams{ ListOptions: listOpts, Region: d.region, @@ -583,7 +589,7 @@ func (d *Driver) ListVolumes(ctx context.Context, req *csi.ListVolumesRequest) ( resp.NextToken = strconv.FormatInt(int64(nextToken), 10) } - log.WithField("response", resp).Info("volumes listed") + log.WithField("num_volume_entries", len(resp.Entries)).Info("volumes listed") return resp, nil } diff --git a/driver/driver.go b/driver/driver.go index 9bba8f213..904683fc9 100644 --- a/driver/driver.go +++ b/driver/driver.go @@ -30,7 +30,7 @@ import ( "time" "github.com/container-storage-interface/spec/lib/go/csi" - metadata "github.com/digitalocean/go-metadata" + "github.com/digitalocean/go-metadata" "github.com/digitalocean/godo" "github.com/sirupsen/logrus" "golang.org/x/oauth2" @@ -62,12 +62,13 @@ type Driver struct { // `ControllerPublishVolume` to `NodeStageVolume or `NodePublishVolume` publishInfoVolumeName string - endpoint string - debugAddr string - hostID func() string - region string - doTag string - isController bool + endpoint string + debugAddr string + hostID func() string + region string + doTag string + isController bool + defaultVolumesPageSize uint srv *grpc.Server httpSrv *http.Server @@ -89,21 +90,35 @@ type Driver struct { ready bool } +// NewDriverParams defines the parameters that can be passed to NewDriver. +type NewDriverParams struct { + Endpoint string + Token string + URL string + Region string + DOTag string + DriverName string + DebugAddr string + DefaultVolumesPageSize uint +} + // NewDriver returns a CSI plugin that contains the necessary gRPC // interfaces to interact with Kubernetes over unix domain sockets for // managing DigitalOcean Block Storage -func NewDriver(ep, token, url, region, doTag, driverName, debugAddr string) (*Driver, error) { +func NewDriver(p NewDriverParams) (*Driver, error) { + driverName := p.DriverName if driverName == "" { driverName = DefaultDriverName } tokenSource := oauth2.StaticTokenSource(&oauth2.Token{ - AccessToken: token, + AccessToken: p.Token, }) oauthClient := oauth2.NewClient(context.Background(), tokenSource) mdClient := metadata.NewClient() - if region == "" { + var region string + if p.Region == "" { var err error region, err = mdClient.Region() if err != nil { @@ -116,8 +131,8 @@ func NewDriver(ep, token, url, region, doTag, driverName, debugAddr string) (*Dr } hostID := strconv.Itoa(hostIDInt) - opts := []godo.ClientOpt{} - opts = append(opts, godo.SetBaseURL(url)) + var opts []godo.ClientOpt + opts = append(opts, godo.SetBaseURL(p.URL)) if version == "" { version = "dev" @@ -141,15 +156,17 @@ func NewDriver(ep, token, url, region, doTag, driverName, debugAddr string) (*Dr name: driverName, publishInfoVolumeName: driverName + "/volume-name", - doTag: doTag, - endpoint: ep, - debugAddr: debugAddr, - hostID: func() string { return hostID }, - region: region, - mounter: newMounter(log), - log: log, + doTag: p.DOTag, + endpoint: p.Endpoint, + debugAddr: p.DebugAddr, + defaultVolumesPageSize: p.DefaultVolumesPageSize, + + hostID: func() string { return hostID }, + region: region, + mounter: newMounter(log), + log: log, // we're assuming only the controller has a non-empty token. - isController: token != "", + isController: p.Token != "", storage: doClient.Storage, storageActions: doClient.StorageActions,