Skip to content

Commit

Permalink
Migrate location-related structs to the file package (#1751)
Browse files Browse the repository at this point in the history
* migrate location structs to file package

Signed-off-by: Alex Goodman <[email protected]>

* replace source.Location refs with file package call

Signed-off-by: Alex Goodman <[email protected]>

* fix linting

Signed-off-by: Alex Goodman <[email protected]>

* remove hardlink test for file based catalogers

Signed-off-by: Alex Goodman <[email protected]>

* remove hardlink test for all-regular-files testing

Signed-off-by: Alex Goodman <[email protected]>

* migrate file resolver implementations to separate package

Signed-off-by: Alex Goodman <[email protected]>

* fix linting

Signed-off-by: Alex Goodman <[email protected]>

* [wip] migrate resolvers to internal

Signed-off-by: Alex Goodman <[email protected]>

* migrate resolvers to syft/internal

Signed-off-by: Alex Goodman <[email protected]>

---------

Signed-off-by: Alex Goodman <[email protected]>
Signed-off-by: <>
  • Loading branch information
wagoodman authored May 24, 2023
1 parent 4bf17a9 commit 07e7690
Show file tree
Hide file tree
Showing 313 changed files with 2,317 additions and 2,192 deletions.
6 changes: 3 additions & 3 deletions DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,12 @@ always feel free to file an issue or reach out to us [on slack](https://anchore.

#### Searching for files

All catalogers are provided an instance of the [`source.FileResolver`](https://github.com/anchore/syft/blob/v0.70.0/syft/source/file_resolver.go#L8) to interface with the image and search for files. The implementations for these
All catalogers are provided an instance of the [`file.Resolver`](https://github.com/anchore/syft/blob/v0.70.0/syft/source/file_resolver.go#L8) to interface with the image and search for files. The implementations for these
abstractions leverage [`stereoscope`](https://github.com/anchore/stereoscope) in order to perform searching. Here is a
rough outline how that works:

1. a stereoscope `file.Index` is searched based on the input given (a path, glob, or MIME type). The index is relatively fast to search, but requires results to be filtered down to the files that exist in the specific layer(s) of interest. This is done automatically by the `filetree.Searcher` abstraction. This abstraction will fallback to searching directly against the raw `filetree.FileTree` if the index does not contain the file(s) of interest. Note: the `filetree.Searcher` is used by the `source.FileResolver` abstraction.
2. Once the set of files are returned from the `filetree.Searcher` the results are filtered down further to return the most unique file results. For example, you may have requested for files by a glob that returns multiple results. These results are filtered down to deduplicate by real files, so if a result contains two references to the same file, say one accessed via symlink and one accessed via the real path, then the real path reference is returned and the symlink reference is filtered out. If both were accessed by symlink then the first (by lexical order) is returned. This is done automatically by the `source.FileResolver` abstraction.
1. a stereoscope `file.Index` is searched based on the input given (a path, glob, or MIME type). The index is relatively fast to search, but requires results to be filtered down to the files that exist in the specific layer(s) of interest. This is done automatically by the `filetree.Searcher` abstraction. This abstraction will fallback to searching directly against the raw `filetree.FileTree` if the index does not contain the file(s) of interest. Note: the `filetree.Searcher` is used by the `file.Resolver` abstraction.
2. Once the set of files are returned from the `filetree.Searcher` the results are filtered down further to return the most unique file results. For example, you may have requested for files by a glob that returns multiple results. These results are filtered down to deduplicate by real files, so if a result contains two references to the same file, say one accessed via symlink and one accessed via the real path, then the real path reference is returned and the symlink reference is filtered out. If both were accessed by symlink then the first (by lexical order) is returned. This is done automatically by the `file.Resolver` abstraction.
3. By the time results reach the `pkg.Cataloger` you are guaranteed to have a set of unique files that exist in the layer(s) of interest (relative to what the resolver supports).

## Testing
Expand Down
17 changes: 9 additions & 8 deletions cmd/syft/cli/eventloop/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ import (
"github.com/anchore/syft/syft"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/file/cataloger/filecontent"
"github.com/anchore/syft/syft/file/cataloger/filedigest"
"github.com/anchore/syft/syft/file/cataloger/filemetadata"
"github.com/anchore/syft/syft/file/cataloger/secrets"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source"
)
Expand Down Expand Up @@ -61,7 +65,7 @@ func generateCatalogFileMetadataTask(app *config.Application) (Task, error) {
return nil, nil
}

metadataCataloger := file.NewMetadataCataloger()
metadataCataloger := filemetadata.NewCataloger()

task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(app.FileMetadata.Cataloger.ScopeOpt)
Expand Down Expand Up @@ -104,10 +108,7 @@ func generateCatalogFileDigestsTask(app *config.Application) (Task, error) {
hashes = append(hashes, hashObj)
}

digestsCataloger, err := file.NewDigestsCataloger(hashes)
if err != nil {
return nil, err
}
digestsCataloger := filedigest.NewCataloger(hashes)

task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(app.FileMetadata.Cataloger.ScopeOpt)
Expand All @@ -131,12 +132,12 @@ func generateCatalogSecretsTask(app *config.Application) (Task, error) {
return nil, nil
}

patterns, err := file.GenerateSearchPatterns(file.DefaultSecretsPatterns, app.Secrets.AdditionalPatterns, app.Secrets.ExcludePatternNames)
patterns, err := secrets.GenerateSearchPatterns(secrets.DefaultSecretsPatterns, app.Secrets.AdditionalPatterns, app.Secrets.ExcludePatternNames)
if err != nil {
return nil, err
}

secretsCataloger, err := file.NewSecretsCataloger(patterns, app.Secrets.RevealValues, app.Secrets.SkipFilesAboveSize)
secretsCataloger, err := secrets.NewCataloger(patterns, app.Secrets.RevealValues, app.Secrets.SkipFilesAboveSize) //nolint:staticcheck
if err != nil {
return nil, err
}
Expand All @@ -163,7 +164,7 @@ func generateCatalogContentsTask(app *config.Application) (Task, error) {
return nil, nil
}

contentsCataloger, err := file.NewContentsCataloger(app.FileContents.Globs, app.FileContents.SkipFilesAboveSize)
contentsCataloger, err := filecontent.NewCataloger(app.FileContents.Globs, app.FileContents.SkipFilesAboveSize) //nolint:staticcheck
if err != nil {
return nil, err
}
Expand Down
4 changes: 2 additions & 2 deletions internal/licenses/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import (

"github.com/google/licensecheck"

"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)

const (
Expand All @@ -16,7 +16,7 @@ const (
)

// Parse scans the contents of a license file to attempt to determine the type of license it is
func Parse(reader io.Reader, l source.Location) (licenses []pkg.License, err error) {
func Parse(reader io.Reader, l file.Location) (licenses []pkg.License, err error) {
licenses = make([]pkg.License, 0)
contents, err := io.ReadAll(reader)
if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions syft/event/parsers/parsers.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (

"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/event/monitor"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/file/cataloger/secrets"
"github.com/anchore/syft/syft/pkg/cataloger"
)

Expand Down Expand Up @@ -54,12 +54,12 @@ func ParsePackageCatalogerStarted(e partybus.Event) (*cataloger.Monitor, error)
return &monitor, nil
}

func ParseSecretsCatalogingStarted(e partybus.Event) (*file.SecretsMonitor, error) {
func ParseSecretsCatalogingStarted(e partybus.Event) (*secrets.Monitor, error) {
if err := checkEventType(e.Type, event.SecretsCatalogerStarted); err != nil {
return nil, err
}

monitor, ok := e.Value.(file.SecretsMonitor)
monitor, ok := e.Value.(secrets.Monitor)
if !ok {
return nil, newPayloadErr(e.Type, "Value", e.Value)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package file
package filecontent

import (
"bytes"
Expand All @@ -8,24 +8,26 @@ import (

"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/source"
"github.com/anchore/syft/syft/file"
)

type ContentsCataloger struct {
// Deprecated: will be removed in syft v1.0.0
type Cataloger struct {
globs []string
skipFilesAboveSizeInBytes int64
}

func NewContentsCataloger(globs []string, skipFilesAboveSize int64) (*ContentsCataloger, error) {
return &ContentsCataloger{
// Deprecated: will be removed in syft v1.0.0
func NewCataloger(globs []string, skipFilesAboveSize int64) (*Cataloger, error) {
return &Cataloger{
globs: globs,
skipFilesAboveSizeInBytes: skipFilesAboveSize,
}, nil
}

func (i *ContentsCataloger) Catalog(resolver source.FileResolver) (map[source.Coordinates]string, error) {
results := make(map[source.Coordinates]string)
var locations []source.Location
func (i *Cataloger) Catalog(resolver file.Resolver) (map[file.Coordinates]string, error) {
results := make(map[file.Coordinates]string)
var locations []file.Location

locations, err := resolver.FilesByGlob(i.globs...)
if err != nil {
Expand Down Expand Up @@ -56,7 +58,7 @@ func (i *ContentsCataloger) Catalog(resolver source.FileResolver) (map[source.Co
return results, nil
}

func (i *ContentsCataloger) catalogLocation(resolver source.FileResolver, location source.Location) (string, error) {
func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Location) (string, error) {
contentReader, err := resolver.FileContentsByLocation(location)
if err != nil {
return "", err
Expand Down
80 changes: 80 additions & 0 deletions syft/file/cataloger/filecontent/cataloger_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package filecontent

import (
"testing"

"github.com/stretchr/testify/assert"

"github.com/anchore/syft/syft/file"
)

func TestContentsCataloger(t *testing.T) {
allFiles := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"}

tests := []struct {
name string
globs []string
maxSize int64
files []string
expected map[file.Coordinates]string
}{
{
name: "multi-pattern",
globs: []string{"test-fixtures/last/*.txt", "test-fixtures/*.txt"},
files: allFiles,
expected: map[file.Coordinates]string{
file.NewLocation("test-fixtures/last/path.txt").Coordinates: "dGVzdC1maXh0dXJlcy9sYXN0L3BhdGgudHh0IGZpbGUgY29udGVudHMh",
file.NewLocation("test-fixtures/another-path.txt").Coordinates: "dGVzdC1maXh0dXJlcy9hbm90aGVyLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
file.NewLocation("test-fixtures/a-path.txt").Coordinates: "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
},
},
{
name: "no-patterns",
globs: []string{},
files: []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"},
expected: map[file.Coordinates]string{},
},
{
name: "all-txt",
globs: []string{"**/*.txt"},
files: allFiles,
expected: map[file.Coordinates]string{
file.NewLocation("test-fixtures/last/path.txt").Coordinates: "dGVzdC1maXh0dXJlcy9sYXN0L3BhdGgudHh0IGZpbGUgY29udGVudHMh",
file.NewLocation("test-fixtures/another-path.txt").Coordinates: "dGVzdC1maXh0dXJlcy9hbm90aGVyLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
file.NewLocation("test-fixtures/a-path.txt").Coordinates: "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
},
},
{
name: "subpath",
globs: []string{"test-fixtures/*.txt"},
files: allFiles,
expected: map[file.Coordinates]string{
file.NewLocation("test-fixtures/another-path.txt").Coordinates: "dGVzdC1maXh0dXJlcy9hbm90aGVyLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
file.NewLocation("test-fixtures/a-path.txt").Coordinates: "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
},
},
{
name: "size-filter",
maxSize: 42,
globs: []string{"**/*.txt"},
files: allFiles,
expected: map[file.Coordinates]string{
file.NewLocation("test-fixtures/last/path.txt").Coordinates: "dGVzdC1maXh0dXJlcy9sYXN0L3BhdGgudHh0IGZpbGUgY29udGVudHMh",
file.NewLocation("test-fixtures/a-path.txt").Coordinates: "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
},
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c, err := NewCataloger(test.globs, test.maxSize)
assert.NoError(t, err)

resolver := file.NewMockResolverForPaths(test.files...)
actual, err := c.Catalog(resolver)
assert.NoError(t, err)
assert.Equal(t, test.expected, actual, "mismatched contents")

})
}
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
109 changes: 109 additions & 0 deletions syft/file/cataloger/filedigest/cataloger.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package filedigest

import (
"crypto"
"errors"

"github.com/wagoodman/go-partybus"
"github.com/wagoodman/go-progress"

stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/file"
internal2 "github.com/anchore/syft/syft/file/cataloger/internal"
)

var ErrUndigestableFile = errors.New("undigestable file")

type Cataloger struct {
hashes []crypto.Hash
}

func NewCataloger(hashes []crypto.Hash) *Cataloger {
return &Cataloger{
hashes: hashes,
}
}

func (i *Cataloger) Catalog(resolver file.Resolver, coordinates ...file.Coordinates) (map[file.Coordinates][]file.Digest, error) {
results := make(map[file.Coordinates][]file.Digest)
var locations []file.Location

if len(coordinates) == 0 {
locations = internal2.AllRegularFiles(resolver)
} else {
for _, c := range coordinates {
locations = append(locations, file.NewLocationFromCoordinates(c))
}
}

stage, prog := digestsCatalogingProgress(int64(len(locations)))
for _, location := range locations {
stage.Current = location.RealPath
result, err := i.catalogLocation(resolver, location)

if errors.Is(err, ErrUndigestableFile) {
continue
}

if internal.IsErrPathPermission(err) {
log.Debugf("file digests cataloger skipping %q: %+v", location.RealPath, err)
continue
}

if err != nil {
return nil, err
}
prog.Increment()
results[location.Coordinates] = result
}
log.Debugf("file digests cataloger processed %d files", prog.Current())
prog.SetCompleted()
return results, nil
}

func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Location) ([]file.Digest, error) {
meta, err := resolver.FileMetadataByLocation(location)
if err != nil {
return nil, err
}

// we should only attempt to report digests for files that are regular files (don't attempt to resolve links)
if meta.Type != stereoscopeFile.TypeRegular {
return nil, ErrUndigestableFile
}

contentReader, err := resolver.FileContentsByLocation(location)
if err != nil {
return nil, err
}
defer internal.CloseAndLogError(contentReader, location.VirtualPath)

digests, err := file.NewDigestsFromFile(contentReader, i.hashes)
if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
}

return digests, nil
}

func digestsCatalogingProgress(locations int64) (*progress.Stage, *progress.Manual) {
stage := &progress.Stage{}
prog := progress.NewManual(locations)

bus.Publish(partybus.Event{
Type: event.FileDigestsCatalogerStarted,
Value: struct {
progress.Stager
progress.Progressable
}{
Stager: progress.Stager(stage),
Progressable: prog,
},
})

return stage, prog
}
Loading

0 comments on commit 07e7690

Please sign in to comment.