diff --git a/Makefile b/Makefile index f6f2c7c..bc4a881 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ .DEFAULT_GOAL=test build: test - go build -o bin/edmgen/edmgen cmd/edmgen/*.go + go build -v -o bin/edmgen/edmgen cmd/edmgen/*.go clean: rm -f bin/edmgen @@ -10,7 +10,10 @@ clean: test: go mod tidy staticcheck ./... - go test ./... + go test -v ./... integration-test: - go test ./... -tags=integration + go test -v ./... -tags=integration + +release: test integration-test build + go test all -v \ No newline at end of file diff --git a/README.md b/README.md index 24fe6bd..23f3e76 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ While a very useful technology, most DLP solutions lack the tools to sample the ## Install ```bash -go install github.com/crashdump/edmgen +go install github.com/crashdump/edmgen/cmd/edmgen@latest ``` ## Use @@ -61,6 +61,7 @@ Can be applied to the phases: `ExamineFiles` and `SampleContent` * `LineLength`: Only select lines based on their length. Min and Max can be specified. * `LongestLine`: Only select the longest line in the file. +* `IgnoreLine`: Ignore any line containing a specified string. * `Uniq`: Deduplicate content. Especially useful during the final `SampleContent` phase. Note: All filters are implemented as their own self-contained function, which are easily extensible. Implementing your own filter should not require any changes to the core code. diff --git a/cmd/edmgen/cmd.go b/cmd/edmgen/cmd.go index 8b7aef1..c917e23 100644 --- a/cmd/edmgen/cmd.go +++ b/cmd/edmgen/cmd.go @@ -1,11 +1,13 @@ package main import ( + "errors" "fmt" "os" "time" "github.com/urfave/cli/v2" + "golang.org/x/exp/slices" "github.com/crashdump/edmgen/pkg/edm" "github.com/crashdump/edmgen/pkg/filters/content" @@ -14,8 +16,17 @@ import ( var logger *logging +const STDOUT = "stdout" + +func init() { + cli.HelpFlag = &cli.BoolFlag{Name: "help"} + cli.VersionFlag = &cli.BoolFlag{Name: "version", Aliases: []string{"v"}} +} + func main() { logger = newLogger() + var flagOutput string + var flagFormat string logger.print("┌────────┐") logger.print("│ EDMGEN │") @@ -24,83 +35,127 @@ func main() { app := &cli.App{ Name: "edmgen", - Usage: "Walk files in a directory and extract samples for the purporse of Exact Data Match DLPs", + Usage: "Walk files in a directory and extract (samples) lines", Compiled: time.Now(), Authors: []*cli.Author{{ Name: "Adrien Pujol", Email: "ap@cdfr.net", }}, Flags: []cli.Flag{ - &cli.StringSliceFlag{Name: "extensions", Aliases: []string{"E"}}, + &cli.StringFlag{ + Name: "output", + Aliases: []string{"o"}, + Destination: &flagOutput, + Action: func(ctx *cli.Context, v string) error { + if v == "" { + return errors.New("please specify a filename") + } + return nil + }, + }, + &cli.StringFlag{ + Name: "format", + Aliases: []string{"f"}, + DefaultText: STDOUT, + Destination: &flagFormat, + Action: func(ctx *cli.Context, v string) error { + formats := []string{STDOUT, "csv", "txt"} + if slices.Contains[[]string](formats, flagFormat) { + return fmt.Errorf("output %s not currently supported", flagFormat) + } + return nil + }, + }, }, Before: func(cCtx *cli.Context) error { if cCtx.Args().Len() < 1 { - logger.printFatal("You need to specify a path to a folder") + logger.print("You need to specify a path to a folder") + os.Exit(1) } return nil }, - Action: run, - } + Action: func(cCtx *cli.Context) error { + edmc, err := edm.New(edm.Opts{}) + if err != nil { + logger.printFatal(err.Error()) + } - if err := app.Run(os.Args); err != nil { - logger.printFatal(err.Error()) - } -} + path := cCtx.Args().First() -func run(cCtx *cli.Context) error { - edmc, err := edm.New(edm.Opts{}) - if err != nil { - logger.printFatal(err.Error()) - } + /* + * Search for all the relevant files + */ + logger.printHeader("Searching for relevant files...") + err = edmc.SelectFiles(path, + file.IgnoreFilename(ignoreFilenames), + file.IgnoreDirname(ignoreDirnames), + file.RequireExtensions(requireExtentions), + ) + if err != nil { + logger.printFatal(err.Error()) + } + logger.printfResult("Found %d files.", len(edmc.Paths)) + logger.print("") - path := cCtx.Args().First() - - /* - * Search for all the relevant files - */ - logger.printHeader("Searching for relevant files...") - err = edmc.SelectFiles(path, - file.IgnoreFilename(ignoreFilenames), - file.IgnoreDirname(ignoreDirnames), - file.RequireExtensions(requireExtentions), - ) - if err != nil { - logger.printFatal(err.Error()) + /* + * Walk through each of the files and sample lines + */ + logger.printHeader("Examining files...") + err = edmc.ExamineFiles( + content.IgnoreLine("serialVersionUID"), + content.LineLength(60, 120, true), + content.LongestLine, + ) + if err != nil { + logger.printFatal(err.Error()) + } + logger.printfResult("Selected %d lines.", len(edmc.Content)) + logger.print("") + + /* + * Sample the lines previously selected down to the result + */ + logger.printHeader("Sampling content...") + lines := edmc.SampleContent( + content.Uniq, + ) + logger.printfResult("Sampled down to %d lines", len(lines)) + logger.print("") + + /* + * Finally, output the result + */ + switch flagFormat { + case "txt": + writeFileTxt(flagOutput, lines) + + case "csv": + writeFileCsv(flagOutput, lines) + + default: + writeStdout(lines) + } + + logger.print("Complete!") + return nil + }, } - logger.printfResult("Found %d files.", len(edmc.Paths)) - logger.print("") - /* - * Walk through each of the files and sample lines - */ - logger.printHeader("Examining files...") - err = edmc.ExamineFiles( - content.LineLength(40, 100), - content.LongestLine, - ) - if err != nil { + if err := app.Run(os.Args); err != nil { logger.printFatal(err.Error()) } - logger.printfResult("Selected %d lines.", len(edmc.Content)) - logger.print("") - - /* - * Sample the lines previously selected down to the result - */ - logger.printHeader("Sampling content...") - lines := edmc.SampleContent( - content.Uniq, - ) - logger.printfResult("Sampled down to %d lines", len(edmc.Content)) - logger.print("") +} - /* - * Finally, output the result - */ +func writeStdout(lines []string) { for _, line := range lines { fmt.Println(line) } +} + +func writeFileTxt(filename string, lines []string) { + +} + +func writeFileCsv(filename string, lines []string) { - logger.print("Complete!") - return nil } diff --git a/cmd/edmgen/ignore.go b/cmd/edmgen/ignore.go index 836890f..8bdbf0f 100644 --- a/cmd/edmgen/ignore.go +++ b/cmd/edmgen/ignore.go @@ -5,6 +5,7 @@ var ignoreFilenames = []string{ } var ignoreDirnames = []string{ "node_modules", + "documentation", } var requireExtentions = []string{ diff --git a/cmd/edmgen/logging.go b/cmd/edmgen/logging.go index e502468..fcb04db 100644 --- a/cmd/edmgen/logging.go +++ b/cmd/edmgen/logging.go @@ -37,6 +37,5 @@ func (l logging) printWarn(str string) { } func (l logging) printFatal(str string) { - l.logger.Print(fmt.Sprintf("[!!] %s", str)) panic(str) } diff --git a/go.mod b/go.mod index 6e6aa52..09298fb 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.20 require ( github.com/stretchr/testify v1.8.4 github.com/urfave/cli/v2 v2.25.7 + golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 ) require ( diff --git a/go.sum b/go.sum index 77ba6ed..01aba65 100644 --- a/go.sum +++ b/go.sum @@ -12,6 +12,8 @@ github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs= github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= +golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 h1:Di6/M8l0O2lCLc6VVRWhgCiApHV8MnQurBnFSHsQtNY= +golang.org/x/exp v0.0.0-20230725093048-515e97ebf090/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/pkg/filters/content/ignore.go b/pkg/filters/content/ignore.go new file mode 100644 index 0000000..b2e2363 --- /dev/null +++ b/pkg/filters/content/ignore.go @@ -0,0 +1,15 @@ +package content + +import "strings" + +func IgnoreLine(str string) func(content []string) (result []string) { + return func(content []string) (result []string) { + for _, line := range content { + if strings.Contains(line, str) { + continue + } + result = append(result, line) + } + return result + } +} diff --git a/pkg/filters/content/ignore_test.go b/pkg/filters/content/ignore_test.go new file mode 100644 index 0000000..255fad9 --- /dev/null +++ b/pkg/filters/content/ignore_test.go @@ -0,0 +1,64 @@ +package content_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/crashdump/edmgen/pkg/filters/content" +) + +func Test_IgnoreLine(t *testing.T) { + type args struct { + string string + } + tests := []struct { + name string + args args + content []string + want []string + }{ + { + name: "empty-string", + content: []string{ + "Donec a dui et dui fringilla consectetur id nec massa.", + " Nulla aliquet porttitor venenatis.", + "Nam tristique maximus ante hendrerit aliquet.", + "Integer dignissim posuere lobortis.", + " Aenean ultrices erat ut augue ultrices", + "Suspendisse lacinia ante nunc, pulvinar blandit nisl ornare ut.", + }, + args: args{ + string: "", + }, + want: []string(nil), + }, + { + name: "ignore-maximus", + content: []string{ + "Donec a dui et dui fringilla consectetur id nec massa.", + "Nulla aliquet porttitor venenatis.", + "Nam tristique maximus ante hendrerit aliquet.", + "Integer dignissim posuere lobortis.", + " Aenean ultrices erat ut augue ultrices", + "Suspendisse lacinia ante nunc, pulvinar blandit nisl ornare ut.", + }, + args: args{ + string: "maximus", + }, + want: []string{ + "Donec a dui et dui fringilla consectetur id nec massa.", + "Nulla aliquet porttitor venenatis.", + "Integer dignissim posuere lobortis.", + " Aenean ultrices erat ut augue ultrices", + "Suspendisse lacinia ante nunc, pulvinar blandit nisl ornare ut.", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := content.IgnoreLine(tt.args.string)(tt.content) + assert.EqualValues(t, tt.want, got) + }) + } +} diff --git a/pkg/filters/content/length.go b/pkg/filters/content/length.go index 2797dfa..8116ff1 100644 --- a/pkg/filters/content/length.go +++ b/pkg/filters/content/length.go @@ -1,9 +1,15 @@ package content -func LineLength(min int, max int) func(content []string) (result []string) { +import "strings" + +func LineLength(min int, max int, ignoreWhitespaces bool) func(content []string) (result []string) { return func(content []string) (result []string) { for _, line := range content { - if len(line) >= min && len(line) <= max { + length := len(line) + if ignoreWhitespaces { + length = len(strings.ReplaceAll(line, " ", "")) + } + if length >= min && length <= max { result = append(result, line) } } diff --git a/pkg/filters/content/length_test.go b/pkg/filters/content/length_test.go index 90e9181..198c7f4 100644 --- a/pkg/filters/content/length_test.go +++ b/pkg/filters/content/length_test.go @@ -9,31 +9,61 @@ import ( ) func Test_LineLength(t *testing.T) { + type args struct { + min int + max int + ignoreWs bool + } tests := []struct { name string + args args content []string want []string }{ { - name: "", + name: "min-35-max-40-includes-ws", content: []string{ "Donec a dui et dui fringilla consectetur id nec massa.", // 54 chars - "Nulla aliquet porttitor venenatis.", // 34 chars + " Nulla aliquet porttitor venenatis.", // 38 chars "Nam tristique maximus ante hendrerit aliquet.", // 45 chars "Integer dignissim posuere lobortis.", // 35 chars - "Aenean ultrices erat ut augue ultrices", // 38 chars + " Aenean ultrices erat ut augue ultrices", // 38 chars "Suspendisse lacinia ante nunc, pulvinar blandit nisl ornare ut.", // 63 chars }, + args: args{ + min: 35, + max: 40, + ignoreWs: false, + }, want: []string{ + " Nulla aliquet porttitor venenatis.", "Integer dignissim posuere lobortis.", - "Aenean ultrices erat ut augue ultrices", + }, + }, + { + name: "min-35-max-40-ignore-ws", + content: []string{ + "Donec a dui et dui fringilla consectetur id nec massa.", // 54 chars + "Nulla aliquet porttitor venenatis.", // 34 chars + "Nam tristique maximus ante hendrerit aliquet.", // 45 chars + "Integer dignissim posuere lobortis.", // 35 chars + " Aenean ultrices erat ut augue ultrices", // 48 chars + "Suspendisse lacinia ante nunc, pulvinar blandit nisl ornare ut.", // 63 chars + }, + args: args{ + min: 35, + max: 40, + ignoreWs: true, + }, + want: []string{ + "Nam tristique maximus ante hendrerit aliquet.", }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := content.LineLength(35, 40)(tt.content) - assert.Equal(t, tt.want, got) + got := content.LineLength(tt.args.min, tt.args.max, tt.args.ignoreWs)(tt.content) + assert.EqualValues(t, tt.want, got) }) } }