Skip to content

Commit

Permalink
feat: Regex filter URLs
Browse files Browse the repository at this point in the history
Add `-f, --filter` option to filter URLs using a Regex currently on the
URL path.
  • Loading branch information
enenumxela committed Jun 20, 2021
1 parent 70fdf45 commit bbf0771
Show file tree
Hide file tree
Showing 11 changed files with 132 additions and 92 deletions.
13 changes: 9 additions & 4 deletions cmd/sigurlfind3r/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ func init() {
flag.StringVar(&o.Domain, "domain", "", "")
flag.StringVar(&o.SourcesToExclude, "es", "", "")
flag.StringVar(&o.SourcesToExclude, "exclude-sources", "", "")
flag.StringVar(&o.FilterRegex, "f", "", "")
flag.StringVar(&o.FilterRegex, "filter", "", "")
flag.BoolVar(&o.IncludeSubdomains, "is", false, "")
flag.BoolVar(&o.IncludeSubdomains, "include-subs", false, "")
flag.BoolVar(&o.ListSources, "ls", false, "")
Expand All @@ -57,11 +59,12 @@ func init() {
h += "\nOPTIONS:\n"
h += " -d, --domain domain to fetch urls for\n"
h += " -es, --exclude-sources comma(,) separated list of sources to exclude\n"
h += " -f, --filter URL filtering regex\n"
h += " -is, --include-subs include subdomains' urls\n"
h += " -ls, --list-sources list all the available sources\n"
h += " -nc, --no-color no color mode\n"
h += " -s, --silent silent mode: output urls only\n"
h += " -us, --use-sources comma(,) separated list of sources to use\n\n"
h += " -us, --use-sources comma(,) separated list of sources to use\n"

fmt.Println(h)
}
Expand Down Expand Up @@ -117,14 +120,16 @@ func main() {
}

runner := sigurlfind3r.New(&sigurlfind3r.Options{
SourcesToUse: options.SourcesToUse,
SourcesToExclude: options.SourcesToExclude,
FilterRegex: options.FilterRegex,
SourcesToUse: options.SourcesToUse,
SourcesToExclude: options.SourcesToExclude,
IncludeSubdomains: options.IncludeSubdomains,
Keys: &session.Keys{
GitHub: options.YAML.Keys.GitHub,
},
})

URLs, err := runner.Run(context.Background(), options.Domain, options.IncludeSubdomains)
URLs, err := runner.Run(context.Background(), options.Domain)
if err != nil {
log.Fatalln(err)
}
Expand Down
4 changes: 4 additions & 0 deletions internal/configuration/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type YAMLConfiguration struct {

type CLIOptions struct {
Domain string
FilterRegex string
IncludeSubdomains bool
ListSources bool
NoColor bool
Expand All @@ -30,6 +31,7 @@ type CLIOptions struct {

type Options struct {
Domain string
FilterRegex string
IncludeSubdomains bool
ListSources bool
NoColor bool
Expand All @@ -39,6 +41,7 @@ type Options struct {
YAML YAMLConfiguration
}

// ParseCLIOptions parse the command line flags and read config file
func ParseCLIOptions(options *CLIOptions) (parsedOptions *Options, err error) {
version := "1.0.0"

Expand All @@ -51,6 +54,7 @@ func ParseCLIOptions(options *CLIOptions) (parsedOptions *Options, err error) {

parsedOptions = &Options{
Domain: options.Domain,
FilterRegex: options.FilterRegex,
IncludeSubdomains: options.IncludeSubdomains,
ListSources: options.ListSources,
NoColor: options.NoColor,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,30 +1,35 @@
package passive

import (
"sync"

"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/scraping"
"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/scraping/sources/commoncrawl"
"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/scraping/sources/github"
"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/scraping/sources/otx"
"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/scraping/sources/urlscan"
"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/scraping/sources/wayback"
"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/session"
)

// Agent is a struct for running passive url collection for a given host.
// It wraps scraping package and provides a layer to build upon.
type Agent struct {
sources map[string]scraping.Source
}

// New creates a new agent for passive subdomain discovery
// New creates a new agent for passive url collection
func New(sourcesToUse, sourcesToExclude []string) (agent *Agent) {
// Create the agent, insert the sources and remove the excluded sources
agent = &Agent{sources: make(map[string]scraping.Source)}

agent = &Agent{
sources: make(map[string]scraping.Source),
}

agent.addSources(sourcesToUse)
agent.removeSources(sourcesToExclude)

return
}

// addSources adds the given list of sources to the source array
func (agent *Agent) addSources(sourcesToUse []string) {
for _, source := range sourcesToUse {
switch source {
case "commoncrawl":
Expand All @@ -39,44 +44,11 @@ func New(sourcesToUse, sourcesToExclude []string) (agent *Agent) {
agent.sources[source] = &wayback.Source{}
}
}
}

// removeSources deletes the given sources from the source map
func (agent *Agent) removeSources(sourcesToExclude []string) {
for _, source := range sourcesToExclude {
delete(agent.sources, source)
}

return
}

// EnumerateSubdomains enumerates all the subdomains for a given domain
func (agent *Agent) Enumerate(domain string, subs bool, keys *session.Keys) (URLs chan scraping.URL) {
URLs = make(chan scraping.URL)

go func() {
defer close(URLs)

ses, err := session.New(domain, subs, 10, keys)
if err != nil {
return
}

wg := &sync.WaitGroup{}

// Run each source in parallel on the target domain
for name, source := range agent.sources {
wg.Add(1)

go func(name string, source scraping.Source) {
for res := range source.Run(domain, ses, subs) {
URLs <- res
}

wg.Done()
}(name, source)
}

wg.Wait()

}()

return
}
43 changes: 43 additions & 0 deletions pkg/sigurlfind3r/passive/agent_run.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package passive

import (
"regexp"
"sync"

"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/scraping"
"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/session"
)

// Run collects all the known urls for a given domain
func (agent *Agent) Run(domain string, filterRegex *regexp.Regexp, includeSubdomains bool, keys *session.Keys) (URLs chan scraping.URL) {
URLs = make(chan scraping.URL)

go func() {
defer close(URLs)

ses, err := session.New(domain, filterRegex, includeSubdomains, 10, keys)
if err != nil {
return
}

wg := &sync.WaitGroup{}

// Run each source in parallel on the target domain
for name, source := range agent.sources {
wg.Add(1)

go func(name string, source scraping.Source) {
for res := range source.Run(domain, ses, includeSubdomains) {
URLs <- res
}

wg.Done()
}(name, source)
}

wg.Wait()

}()

return
}
3 changes: 3 additions & 0 deletions pkg/sigurlfind3r/passive/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Package passive provides capability for doing passive subdomain
// enumeration on targets.
package passive
4 changes: 3 additions & 1 deletion pkg/sigurlfind3r/scraping/sources/commoncrawl/commoncrawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ func (source *Source) Run(domain string, ses *session.Session, includeSubs bool)
return
}

URLs <- scraping.URL{Source: source.Name(), Value: result.URL}
if URL, ok := scraping.NormalizeURL(result.URL, ses.Scope); ok {
URLs <- scraping.URL{Source: source.Name(), Value: URL}
}
}
}
}()
Expand Down
16 changes: 2 additions & 14 deletions pkg/sigurlfind3r/scraping/sources/urlscan/urlscan.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"
"io/ioutil"

"github.com/enenumxela/urlx/pkg/urlx"
"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/scraping"
"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/session"
)
Expand Down Expand Up @@ -43,19 +42,8 @@ func (source *Source) Run(domain string, ses *session.Session, includeSubs bool)
}

for _, i := range results.Results {
parsedURL, err := urlx.Parse(i.Page.URL)
if err != nil {
continue
}

if parsedURL.ETLDPlus1 == domain {
if includeSubs {
URLs <- scraping.URL{Source: source.Name(), Value: i.Page.URL}
} else {
if parsedURL.SubDomain == "" || parsedURL.SubDomain == "www" {
URLs <- scraping.URL{Source: source.Name(), Value: i.Page.URL}
}
}
if URL, ok := scraping.NormalizeURL(i.Page.URL, ses.Scope); ok {
URLs <- scraping.URL{Source: source.Name(), Value: URL}
}
}
}()
Expand Down
4 changes: 3 additions & 1 deletion pkg/sigurlfind3r/scraping/sources/wayback/wayback.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ func (source *Source) Run(domain string, ses *session.Session, includeSubs bool)
URL = strings.TrimPrefix(URL, "25")
URL = strings.TrimPrefix(URL, "2f")

URLs <- scraping.URL{Source: source.Name(), Value: URL}
if URL, ok := scraping.NormalizeURL(URL, ses.Scope); ok {
URLs <- scraping.URL{Source: source.Name(), Value: URL}
}
}
}
}()
Expand Down
16 changes: 13 additions & 3 deletions pkg/sigurlfind3r/scraping/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ package scraping
import (
"strings"

"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/session"

"github.com/enenumxela/urlx/pkg/urlx"
"github.com/signedsecurity/sigurlfind3r/pkg/sigurlfind3r/session"
)

func NormalizeURL(URL string, scope session.Scope) (string, bool) {
Expand All @@ -14,16 +13,27 @@ func NormalizeURL(URL string, scope session.Scope) (string, bool) {
URL = strings.TrimRight(URL, "/")
URL = strings.Trim(URL, " ")

// if scope.FilterRegex.MatchString(URL) {
// return URL, false
// }

parsedURL, err := urlx.Parse(URL)
if err != nil {
return URL, false
}

// fmt.Println(parsedURL.Path)
// fmt.Println(scope.FilterRegex.MatchString(parsedURL.Path))

if scope.FilterRegex.MatchString(parsedURL.Path) {
return URL, false
}

if parsedURL.ETLDPlus1 == "" || parsedURL.ETLDPlus1 != scope.Domain {
return URL, false
}

if !scope.IncludeSubs {
if !scope.IncludeSubdomains {
if parsedURL.Host != scope.Domain && parsedURL.Host != "www."+scope.Domain {
return URL, false
}
Expand Down
19 changes: 11 additions & 8 deletions pkg/sigurlfind3r/session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"io/ioutil"
"net/http"
"net/url"
"regexp"
"time"
)

Expand All @@ -15,17 +16,18 @@ type Keys struct {
}

type Scope struct {
Domain string
IncludeSubs bool
Domain string
FilterRegex *regexp.Regexp
IncludeSubdomains bool
}

type Session struct {
Scope Scope
Client *http.Client
Keys *Keys
Scope Scope
}

func New(domain string, includeSubs bool, timeout int, keys *Keys) (*Session, error) {
func New(domain string, filterRegex *regexp.Regexp, includeSubdomains bool, timeout int, keys *Keys) (*Session, error) {
client := &http.Client{
Transport: &http.Transport{
MaxIdleConns: 100,
Expand All @@ -38,12 +40,13 @@ func New(domain string, includeSubs bool, timeout int, keys *Keys) (*Session, er
}

return &Session{
Scope: Scope{
Domain: domain,
IncludeSubs: includeSubs,
},
Client: client,
Keys: keys,
Scope: Scope{
Domain: domain,
FilterRegex: filterRegex,
IncludeSubdomains: includeSubdomains,
},
}, nil
}

Expand Down
Loading

0 comments on commit bbf0771

Please sign in to comment.