Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Tag kopia snapshots for later lookups (#1408)
Browse files Browse the repository at this point in the history
## Description

Add tags to each kopia snapshot that include all service/category pairs in the snapshot and all resource owners in the snapshot. This allows future snapshots to lookup existing snapshots by those tags so they can be fed into the snapshot function. Feeding previous snapshots into the snapshot function enables kopia to detect previously uploaded files and skip uploading the data again

## Type of change

<!--- Please check the type of change your PR introduces: --->
- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🐹 Trivial/Minor

## Issue(s)

* #1404 

## Test Plan

<!-- How will this be tested prior to merging.-->
- [ ] 💪 Manual
- [x] ⚡ Unit test
- [ ] 💚 E2E
  • Loading branch information
ashmrtn authored Nov 1, 2022
1 parent e6191f0 commit a64abcb
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 15 deletions.
63 changes: 51 additions & 12 deletions src/internal/kopia/wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -389,26 +389,36 @@ func newTreeMap() *treeMap {
}
}

// inflateDirTree returns an fs.Directory tree rooted at the oldest common
// ancestor of the streams and uses virtualfs.StaticDirectory for internal nodes
// in the hierarchy. Leaf nodes are virtualfs.StreamingDirectory with the given
// DataCollections.
// inflateDirTree returns a set of tags representing all the resource owners and
// service/categories in the snapshot and a fs.Directory tree rooted at the
// oldest common ancestor of the streams. All nodes are
// virtualfs.StreamingDirectory with the given DataCollections if there is one
// for that node. Tags can be used in future backups to fetch old snapshots for
// caching reasons.
func inflateDirTree(
ctx context.Context,
collections []data.Collection,
progress *corsoProgress,
) (fs.Directory, error) {
) (fs.Directory, *ownersCats, error) {
roots := make(map[string]*treeMap)
ownerCats := &ownersCats{
resourceOwners: make(map[string]struct{}),
serviceCats: make(map[string]struct{}),
}

for _, s := range collections {
if s.FullPath() == nil {
return nil, errors.New("no identifier for collection")
return nil, nil, errors.New("no identifier for collection")
}

serviceCat := serviceCatTag(s.FullPath())
ownerCats.serviceCats[serviceCat] = struct{}{}
ownerCats.resourceOwners[s.FullPath().ResourceOwner()] = struct{}{}

itemPath := s.FullPath().Elements()

if len(itemPath) == 0 {
return nil, errors.New("no identifier for collection")
return nil, nil, errors.New("no identifier for collection")
}

dir, ok := roots[itemPath[0]]
Expand Down Expand Up @@ -455,21 +465,21 @@ func inflateDirTree(
}

if len(roots) > 1 {
return nil, errors.New("multiple root directories")
return nil, nil, errors.New("multiple root directories")
}

var res fs.Directory

for dirName, dir := range roots {
tmp, err := buildKopiaDirs(dirName, dir, progress)
if err != nil {
return nil, err
return nil, nil, err
}

res = tmp
}

return res, nil
return res, ownerCats, nil
}

func (w Wrapper) BackupCollections(
Expand Down Expand Up @@ -497,22 +507,49 @@ func (w Wrapper) BackupCollections(
model.ServiceTag: service.String(),
}

dirTree, err := inflateDirTree(ctx, collections, progress)
dirTree, oc, err := inflateDirTree(ctx, collections, progress)
if err != nil {
return nil, nil, errors.Wrap(err, "building kopia directories")
}

s, err := w.makeSnapshotWithRoot(ctx, dirTree, progress)
s, err := w.makeSnapshotWithRoot(ctx, dirTree, oc, progress)
if err != nil {
return nil, nil, err
}

return s, progress.deets, nil
}

type ownersCats struct {
resourceOwners map[string]struct{}
serviceCats map[string]struct{}
}

func serviceCatTag(p path.Path) string {
return p.Service().String() + p.Category().String()
}

// tagsFromStrings returns a map[string]string with the union of both maps
// passed in. Currently uses empty values for each tag because there can be
// multiple instances of resource owners and categories in a single snapshot.
func tagsFromStrings(oc *ownersCats) map[string]string {
res := make(map[string]string, len(oc.serviceCats)+len(oc.resourceOwners))

for k := range oc.serviceCats {
res[k] = ""
}

for k := range oc.resourceOwners {
res[k] = ""
}

return res
}

func (w Wrapper) makeSnapshotWithRoot(
ctx context.Context,
root fs.Directory,
oc *ownersCats,
progress *corsoProgress,
) (*BackupStats, error) {
var man *snapshot.Manifest
Expand Down Expand Up @@ -563,6 +600,8 @@ func (w Wrapper) makeSnapshotWithRoot(
return err
}

man.Tags = tagsFromStrings(oc)

if _, err := snapshot.SaveSnapshot(innerCtx, rw, man); err != nil {
err = errors.Wrap(err, "saving snapshot")
logger.Ctx(innerCtx).Errorw("kopia backup", err)
Expand Down
61 changes: 58 additions & 3 deletions src/internal/kopia/wrapper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ import (

"github.com/google/uuid"
"github.com/kopia/kopia/fs"
"github.com/kopia/kopia/repo"
"github.com/kopia/kopia/repo/manifest"
"github.com/kopia/kopia/snapshot"
"github.com/kopia/kopia/snapshot/snapshotfs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -122,6 +124,20 @@ func getDirEntriesForEntry(
return entries
}

//revive:disable:context-as-argument
func checkSnapshotTags(
t *testing.T,
ctx context.Context,
rep repo.Repository,
expectedTags map[string]string,
snapshotID string,
) {
//revive:enable:context-as-argument
man, err := snapshot.LoadSnapshot(ctx, rep, manifest.ID(snapshotID))
require.NoError(t, err)
assert.Equal(t, expectedTags, man.Tags)
}

// ---------------
// unit tests
// ---------------
Expand Down Expand Up @@ -517,6 +533,14 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree() {
user1Encoded: 5,
user2Encoded: 42,
}
expectedServiceCats := map[string]struct{}{
serviceCatTag(suite.testPath): {},
serviceCatTag(p2): {},
}
expectedResourceOwners := map[string]struct{}{
suite.testPath.ResourceOwner(): {},
p2.ResourceOwner(): {},
}

progress := &corsoProgress{pending: map[string]*itemDetails{}}

Expand All @@ -542,8 +566,12 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree() {
// - emails
// - Inbox
// - 42 separate files
dirTree, err := inflateDirTree(ctx, collections, progress)
dirTree, oc, err := inflateDirTree(ctx, collections, progress)
require.NoError(t, err)

assert.Equal(t, expectedServiceCats, oc.serviceCats)
assert.Equal(t, expectedResourceOwners, oc.resourceOwners)

assert.Equal(t, encodeAsPath(testTenant), dirTree.Name())

entries, err := fs.GetAllEntries(ctx, dirTree)
Expand Down Expand Up @@ -584,6 +612,15 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree_MixedDirectory() {
p2, err := suite.testPath.Append(subdir, false)
require.NoError(suite.T(), err)

expectedServiceCats := map[string]struct{}{
serviceCatTag(suite.testPath): {},
serviceCatTag(p2): {},
}
expectedResourceOwners := map[string]struct{}{
suite.testPath.ResourceOwner(): {},
p2.ResourceOwner(): {},
}

// Test multiple orders of items because right now order can matter. Both
// orders result in a directory structure like:
// - a-tenant
Expand Down Expand Up @@ -630,8 +667,12 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree_MixedDirectory() {
suite.T().Run(test.name, func(t *testing.T) {
progress := &corsoProgress{pending: map[string]*itemDetails{}}

dirTree, err := inflateDirTree(ctx, test.layout, progress)
dirTree, oc, err := inflateDirTree(ctx, test.layout, progress)
require.NoError(t, err)

assert.Equal(t, expectedServiceCats, oc.serviceCats)
assert.Equal(t, expectedResourceOwners, oc.resourceOwners)

assert.Equal(t, encodeAsPath(testTenant), dirTree.Name())

entries, err := fs.GetAllEntries(ctx, dirTree)
Expand Down Expand Up @@ -727,7 +768,7 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree_Fails() {
defer flush()

suite.T().Run(test.name, func(t *testing.T) {
_, err := inflateDirTree(ctx, test.layout, nil)
_, _, err := inflateDirTree(ctx, test.layout, nil)
assert.Error(t, err)
})
}
Expand Down Expand Up @@ -810,6 +851,12 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() {
42,
),
}
expectedTags := map[string]string{
serviceCatTag(suite.testPath1): "",
suite.testPath1.ResourceOwner(): "",
serviceCatTag(suite.testPath2): "",
suite.testPath2.ResourceOwner(): "",
}

stats, deets, err := suite.w.BackupCollections(suite.ctx, collections, path.ExchangeService)
assert.NoError(t, err)
Expand All @@ -821,6 +868,14 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() {
assert.Equal(t, path.ExchangeService.String(), deets.Tags[model.ServiceTag])
// 47 file and 6 folder entries.
assert.Len(t, deets.Entries, 47+6)

checkSnapshotTags(
t,
suite.ctx,
suite.w.c,
expectedTags,
stats.SnapshotID,
)
}

func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() {
Expand Down

0 comments on commit a64abcb

Please sign in to comment.