Skip to content

Commit

Permalink
- add --configs-only and --rbac-only options to upload and `dow…
Browse files Browse the repository at this point in the history
…nload` command, fix #1042

Signed-off-by: Slach <[email protected]>
  • Loading branch information
Slach committed Jan 22, 2025
1 parent ecf5620 commit cb2a7b2
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 75 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ _instances/
_coverage_/
__pycache__/
*.py[cod]
vendor/
vendor/
.aider*
2 changes: 2 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ IMPROVEMENTS

- remove `S3_PART_SIZE` and `AZBLOB_BUFFER_SIZE` parameter from configuration and significant decrease memory usage
during upload and download, fix [854](https://github.com/Altinity/clickhouse-backup/issues/854)
- add `--configs-only` and `--rbac-only` options to `upload` and `download` command,
fix [1042](https://github.com/Altinity/clickhouse-backup/issues/1042)

BUG FIXES

Expand Down
8 changes: 6 additions & 2 deletions Manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ If you need different partitions for different tables, then use --partitions=db.
Values depends on field types in your table, use single quotes for String and Date/DateTime related types
Look at the system.parts partition and partition_id fields for details https://clickhouse.com/docs/en/operations/system-tables/parts/
--schema, -s Upload schemas only
--rbac-only, --rbac Upload RBAC related objects only, will skip upload data, will backup schema only if --schema added
--configs-only, --configs Upload 'clickhouse-server' configuration files only, will skip upload data, will backup schema only if --schema added
--resume, --resumable Save intermediate upload state and resume upload if backup exists on remote storage, ignored with 'remote_storage: custom' or 'use_embedded_backup_restore: true'
--delete, --delete-source, --delete-local explicitly delete local backup during upload
Expand Down Expand Up @@ -138,8 +140,10 @@ If PARTITION BY clause returns tuple with multiple fields, then use --partitions
If you need different partitions for different tables, then use --partitions=db.table1:part1,part2 --partitions=db.table?:*
Values depends on field types in your table, use single quotes for String and Date/DateTime related types
Look at the system.parts partition and partition_id fields for details https://clickhouse.com/docs/en/operations/system-tables/parts/
--schema, -s Download schema only
--resume, --resumable Save intermediate download state and resume download if backup exists on local storage, ignored with 'remote_storage: custom' or 'use_embedded_backup_restore: true'
--schema, --schema-only, -s Download schema only
--rbac-only, --rbac Download RBAC related objects only, will skip download data, will backup schema only if --schema added
--configs-only, --configs Download 'clickhouse-server' configuration files only, will skip download data, will backup schema only if --schema added
--resume, --resumable Save intermediate download state and resume download if backup exists on local storage, ignored with 'remote_storage: custom' or 'use_embedded_backup_restore: true'
```
### CLI command - restore
Expand Down
14 changes: 12 additions & 2 deletions ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,9 @@ Upload backup to remote storage: `curl -s localhost:7171/backup/upload/<BACKUP_N
- Optional string query argument `table` works the same as the `--table value` CLI argument.
- Optional string query argument `partitions` works the same as the `--partitions value` CLI argument.
- Optional boolean query argument `schema` works the same as the `--schema` CLI argument (upload schema only).
- Optional boolean query argument `rbac-only` works the same as the `--rbac-only` CLI argument (upload rbac only).
- Optional boolean query argument `configs-only` works the same as the `--configs-only` CLI argument (upload configs
only).
- Optional boolean query argument `resumable` works the same as the `--resumable` CLI argument (save intermediate upload state and resume upload if data already exists on remote storage).
- Optional string query argument `callback` allow pass callback URL which will call with POST with `application/json` with payload `{"status":"error|success","error":"not empty when error happens", "operation_id" : "<random_uuid>"}`.

Expand All @@ -485,6 +488,9 @@ Download backup from remote storage: `curl -s localhost:7171/backup/download/<BA
- Optional string query argument `table` works the same as the `--table value` CLI argument.
- Optional string query argument `partitions` works the same as the `--partitions value` CLI argument.
- Optional boolean query argument `schema` works the same as the `--schema` CLI argument (download schema only).
- Optional boolean query argument `rbac-only` works the same as the `--rbac-only` CLI argument (download rbac only).
- Optional boolean query argument `configs-only` works the same as the `--configs-only` CLI argument (download configs
only).
- Optional boolean query argument `resumable` works the same as the `--resumable` CLI argument (save intermediate download state and resume download if it already exists on local storage).
- Optional string query argument `callback` allow pass callback URL which will call with POST with `application/json` with payload `{"status":"error|success","error":"not empty when error happens", "operation_id" : "<random_uuid>"}`.

Expand Down Expand Up @@ -658,6 +664,8 @@ If you need different partitions for different tables, then use --partitions=db.
Values depends on field types in your table, use single quotes for String and Date/DateTime related types
Look at the system.parts partition and partition_id fields for details https://clickhouse.com/docs/en/operations/system-tables/parts/
--schema, -s Upload schemas only
--rbac-only, --rbac Upload RBAC related objects only, will skip upload data, will backup schema only if --schema added
--configs-only, --configs Upload 'clickhouse-server' configuration files only, will skip upload data, will backup schema only if --schema added
--resume, --resumable Save intermediate upload state and resume upload if backup exists on remote storage, ignored with 'remote_storage: custom' or 'use_embedded_backup_restore: true'
--delete, --delete-source, --delete-local explicitly delete local backup during upload
Expand Down Expand Up @@ -694,8 +702,10 @@ If PARTITION BY clause returns tuple with multiple fields, then use --partitions
If you need different partitions for different tables, then use --partitions=db.table1:part1,part2 --partitions=db.table?:*
Values depends on field types in your table, use single quotes for String and Date/DateTime related types
Look at the system.parts partition and partition_id fields for details https://clickhouse.com/docs/en/operations/system-tables/parts/
--schema, -s Download schema only
--resume, --resumable Save intermediate download state and resume download if backup exists on local storage, ignored with 'remote_storage: custom' or 'use_embedded_backup_restore: true'
--schema, --schema-only, -s Download schema only
--rbac-only, --rbac Download RBAC related objects only, will skip download data, will backup schema only if --schema added
--configs-only, --configs Download 'clickhouse-server' configuration files only, will skip download data, will backup schema only if --schema added
--resume, --resumable Save intermediate download state and resume download if backup exists on local storage, ignored with 'remote_storage: custom' or 'use_embedded_backup_restore: true'
```
### CLI command - restore
Expand Down
26 changes: 23 additions & 3 deletions cmd/clickhouse-backup/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ func main() {
UsageText: "clickhouse-backup upload [-t, --tables=<db>.<table>] [--partitions=<partition_names>] [-s, --schema] [--diff-from=<local_backup_name>] [--diff-from-remote=<remote_backup_name>] [--resumable] <backup_name>",
Action: func(c *cli.Context) error {
b := backup.NewBackuper(config.GetConfigFromCli(c))
return b.Upload(c.Args().First(), c.Bool("delete-source"), c.String("diff-from"), c.String("diff-from-remote"), c.String("t"), c.StringSlice("partitions"), c.Bool("s"), c.Bool("resume"), version, c.Int("command-id"))
return b.Upload(c.Args().First(), c.Bool("delete-source"), c.String("diff-from"), c.String("diff-from-remote"), c.String("t"), c.StringSlice("partitions"), c.Bool("schema"), c.Bool("rbac-only"), c.Bool("configs-only"), c.Bool("resume"), version, c.Int("command-id"))
},
Flags: append(cliapp.Flags,
cli.StringFlag{
Expand Down Expand Up @@ -294,6 +294,16 @@ func main() {
Hidden: false,
Usage: "Upload schemas only",
},
cli.BoolFlag{
Name: "rbac-only, rbac",
Hidden: false,
Usage: "Upload RBAC related objects only, will skip upload data, will backup schema only if --schema added",
},
cli.BoolFlag{
Name: "configs-only, configs",
Hidden: false,
Usage: "Upload 'clickhouse-server' configuration files only, will skip upload data, will backup schema only if --schema added",
},
cli.BoolFlag{
Name: "resume, resumable",
Hidden: false,
Expand Down Expand Up @@ -323,7 +333,7 @@ func main() {
UsageText: "clickhouse-backup download [-t, --tables=<db>.<table>] [--partitions=<partition_names>] [-s, --schema] [--resumable] <backup_name>",
Action: func(c *cli.Context) error {
b := backup.NewBackuper(config.GetConfigFromCli(c))
return b.Download(c.Args().First(), c.String("t"), c.StringSlice("partitions"), c.Bool("s"), c.Bool("resume"), version, c.Int("command-id"))
return b.Download(c.Args().First(), c.String("t"), c.StringSlice("partitions"), c.Bool("schema"), c.Bool("rbac-only"), c.Bool("configs-only"), c.Bool("resume"), version, c.Int("command-id"))
},
Flags: append(cliapp.Flags,
cli.StringFlag{
Expand All @@ -343,10 +353,20 @@ func main() {
"Look at the system.parts partition and partition_id fields for details https://clickhouse.com/docs/en/operations/system-tables/parts/",
},
cli.BoolFlag{
Name: "schema, s",
Name: "schema, schema-only, s",
Hidden: false,
Usage: "Download schema only",
},
cli.BoolFlag{
Name: "rbac-only, rbac",
Hidden: false,
Usage: "Download RBAC related objects only, will skip download data, will backup schema only if --schema added",
},
cli.BoolFlag{
Name: "configs-only, configs",
Hidden: false,
Usage: "Download 'clickhouse-server' configuration files only, will skip download data, will backup schema only if --schema added",
},
cli.BoolFlag{
Name: "resume, resumable",
Hidden: false,
Expand Down
13 changes: 6 additions & 7 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ require (
github.com/ClickHouse/clickhouse-go/v2 v2.30.1
github.com/antchfx/xmlquery v1.4.3
github.com/aws/aws-sdk-go-v2 v1.33.0
github.com/aws/aws-sdk-go-v2/config v1.29.1
github.com/aws/aws-sdk-go-v2/credentials v1.17.54
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.52
github.com/aws/aws-sdk-go-v2/service/s3 v1.73.2
github.com/aws/aws-sdk-go-v2/service/sts v1.33.9
github.com/aws/smithy-go v1.22.1
github.com/aws/aws-sdk-go-v2/config v1.28.10
github.com/aws/aws-sdk-go-v2/credentials v1.17.51
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.48
github.com/aws/aws-sdk-go-v2/service/s3 v1.72.2
github.com/aws/aws-sdk-go-v2/service/sts v1.33.6
github.com/aws/smithy-go v1.22.2
github.com/djherbis/buffer v1.2.0
github.com/djherbis/nio/v3 v3.0.1
github.com/eapache/go-resiliency v1.7.0
Expand Down Expand Up @@ -109,7 +109,6 @@ require (
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/klauspost/pgzip v1.2.6 // indirect
github.com/kr/fs v0.1.0 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect
Expand Down
2 changes: 1 addition & 1 deletion pkg/backup/create_remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func (b *Backuper) CreateToRemote(backupName string, deleteSource bool, diffFrom
if err := b.CreateBackup(backupName, diffFromRemote, tablePattern, partitions, schemaOnly, backupRBAC, rbacOnly, backupConfigs, configsOnly, skipCheckPartsColumns, resume, version, commandId); err != nil {
return err
}
if err := b.Upload(backupName, deleteSource, diffFrom, diffFromRemote, tablePattern, partitions, schemaOnly, resume, version, commandId); err != nil {
if err := b.Upload(backupName, deleteSource, diffFrom, diffFromRemote, tablePattern, partitions, schemaOnly, rbacOnly, configsOnly, resume, version, commandId); err != nil {
return err
}

Expand Down
67 changes: 37 additions & 30 deletions pkg/backup/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ var (
ErrBackupIsAlreadyExists = errors.New("backup is already exists")
)

func (b *Backuper) Download(backupName string, tablePattern string, partitions []string, schemaOnly, resume bool, backupVersion string, commandId int) error {
func (b *Backuper) Download(backupName string, tablePattern string, partitions []string, schemaOnly, rbacOnly, configsOnly, resume bool, backupVersion string, commandId int) error {
ctx, cancel, err := status.Current.GetContextWithCancel(commandId)
if err != nil {
return err
Expand Down Expand Up @@ -123,7 +123,7 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [
tablesForDownload := parseTablePatternForDownload(remoteBackup.Tables, tablePattern)

if !schemaOnly && !b.cfg.General.DownloadByPart && remoteBackup.RequiredBackup != "" {
err := b.Download(remoteBackup.RequiredBackup, tablePattern, partitions, schemaOnly, b.resume, backupVersion, commandId)
err := b.Download(remoteBackup.RequiredBackup, tablePattern, partitions, schemaOnly, rbacOnly, configsOnly, b.resume, backupVersion, commandId)
if err != nil && !errors.Is(err, ErrBackupIsAlreadyExists) {
return err
}
Expand Down Expand Up @@ -152,24 +152,27 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [

log.Debug().Str("backup", backupName).Msgf("prepare table METADATA concurrent semaphore with concurrency=%d len(tablesForDownload)=%d", b.cfg.General.DownloadConcurrency, len(tablesForDownload))
tableMetadataAfterDownload := make([]*metadata.TableMetadata, len(tablesForDownload))
metadataGroup, metadataCtx := errgroup.WithContext(ctx)
metadataGroup.SetLimit(int(b.cfg.General.DownloadConcurrency))
for i, t := range tablesForDownload {
metadataLogger := log.With().Str("table_metadata", fmt.Sprintf("%s.%s", t.Database, t.Table)).Logger()
idx := i
tableTitle := t
metadataGroup.Go(func() error {
downloadedMetadata, size, err := b.downloadTableMetadata(metadataCtx, backupName, disks, tableTitle, schemaOnly, partitions, b.resume, metadataLogger)
if err != nil {
return err
}
tableMetadataAfterDownload[idx] = downloadedMetadata
atomic.AddUint64(&metadataSize, size)
return nil
})
}
if err := metadataGroup.Wait(); err != nil {
return fmt.Errorf("one of Download Metadata go-routine return error: %v", err)
doDownloadData := !schemaOnly && !rbacOnly && !configsOnly
if doDownloadData || schemaOnly {
metadataGroup, metadataCtx := errgroup.WithContext(ctx)
metadataGroup.SetLimit(int(b.cfg.General.DownloadConcurrency))
for i, t := range tablesForDownload {
metadataLogger := log.With().Str("table_metadata", fmt.Sprintf("%s.%s", t.Database, t.Table)).Logger()
idx := i
tableTitle := t
metadataGroup.Go(func() error {
downloadedMetadata, size, downloadMetadataErr := b.downloadTableMetadata(metadataCtx, backupName, disks, tableTitle, schemaOnly, partitions, b.resume, metadataLogger)
if downloadMetadataErr != nil {
return downloadMetadataErr
}
tableMetadataAfterDownload[idx] = downloadedMetadata
atomic.AddUint64(&metadataSize, size)
return nil
})
}
if err := metadataGroup.Wait(); err != nil {
return fmt.Errorf("one of Download Metadata go-routine return error: %v", err)
}
}
// download, missed .inner. tables, https://github.com/Altinity/clickhouse-backup/issues/765
var missedInnerTableErr error
Expand All @@ -178,7 +181,7 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [
return fmt.Errorf("b.downloadMissedInnerTablesMetadata error: %v", missedInnerTableErr)
}

if !schemaOnly {
if doDownloadData {
if reBalanceErr := b.reBalanceTablesMetadataIfDiskNotExists(tableMetadataAfterDownload, disks, remoteBackup); reBalanceErr != nil {
return reBalanceErr
}
Expand All @@ -194,8 +197,8 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [
idx := i
dataGroup.Go(func() error {
start := time.Now()
if err := b.downloadTableData(dataCtx, remoteBackup.BackupMetadata, *tableMetadataAfterDownload[idx]); err != nil {
return err
if downloadDataErr := b.downloadTableData(dataCtx, remoteBackup.BackupMetadata, *tableMetadataAfterDownload[idx]); downloadDataErr != nil {
return downloadDataErr
}
log.Info().Fields(map[string]interface{}{
"backup_name": backupName,
Expand All @@ -214,20 +217,24 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [
}
}
var rbacSize, configSize uint64
rbacSize, err = b.downloadRBACData(ctx, remoteBackup)
if err != nil {
return fmt.Errorf("download RBAC error: %v", err)
if rbacOnly || rbacOnly == configsOnly {
rbacSize, err = b.downloadRBACData(ctx, remoteBackup)
if err != nil {
return fmt.Errorf("download RBAC error: %v", err)
}
}

configSize, err = b.downloadConfigData(ctx, remoteBackup)
if err != nil {
return fmt.Errorf("download CONFIGS error: %v", err)
if configsOnly || rbacOnly == configsOnly {
configSize, err = b.downloadConfigData(ctx, remoteBackup)
if err != nil {
return fmt.Errorf("download CONFIGS error: %v", err)
}
}

backupMetadata := remoteBackup.BackupMetadata
backupMetadata.Tables = tablesForDownload

if b.isEmbedded && b.cfg.ClickHouse.EmbeddedBackupDisk != "" && backupMetadata.Tables != nil && len(backupMetadata.Tables) > 0 {
if doDownloadData && b.isEmbedded && b.cfg.ClickHouse.EmbeddedBackupDisk != "" && backupMetadata.Tables != nil && len(backupMetadata.Tables) > 0 {
localClickHouseBackupFile := path.Join(b.EmbeddedBackupDataPath, backupName, ".backup")
remoteClickHouseBackupFile := path.Join(backupName, ".backup")
localEmbeddedMetadataSize := int64(0)
Expand Down
2 changes: 1 addition & 1 deletion pkg/backup/restore_remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package backup
import "errors"

func (b *Backuper) RestoreFromRemote(backupName, tablePattern string, databaseMapping, tableMapping, partitions []string, schemaOnly, dataOnly, dropExists, ignoreDependencies, restoreRBAC, rbacOnly, restoreConfigs, configsOnly, resume bool, version string, commandId int) error {
if err := b.Download(backupName, tablePattern, partitions, schemaOnly, resume, version, commandId); err != nil {
if err := b.Download(backupName, tablePattern, partitions, schemaOnly, rbacOnly, configsOnly, resume, version, commandId); err != nil {
// https://github.com/Altinity/clickhouse-backup/issues/625
if !errors.Is(err, ErrBackupIsAlreadyExists) {
return err
Expand Down
Loading

0 comments on commit cb2a7b2

Please sign in to comment.