Skip to content

Commit

Permalink
Merge pull request #221 from donners/archiveis-fix-panic
Browse files Browse the repository at this point in the history
archiveis - fix panic from closing a closed channel
  • Loading branch information
Ice3man543 authored Feb 13, 2020
2 parents 771164a + d7b50e0 commit 7ecaa96
Showing 1 changed file with 33 additions and 44 deletions.
77 changes: 33 additions & 44 deletions pkg/subscraping/sources/archiveis/archiveis.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,54 +13,40 @@ import (
type ArchiveIs struct {
Results chan subscraping.Result
Session *subscraping.Session

closed bool
}

var reNext = regexp.MustCompile("<a id=\"next\" style=\".*\" href=\"(.*)\">&rarr;</a>")

func (a *ArchiveIs) enumerate(ctx context.Context, baseURL string) {
for {
select {
case <-ctx.Done():
close(a.Results)
return
default:
resp, err := a.Session.NormalGetWithContext(ctx, baseURL)
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
close(a.Results)
return
}

// Get the response body
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
resp.Body.Close()
close(a.Results)
return
}
resp.Body.Close()

src := string(body)

for _, subdomain := range a.Session.Extractor.FindAllString(src, -1) {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Subdomain, Value: subdomain}
}

match1 := reNext.FindStringSubmatch(src)
if len(match1) > 0 {
a.enumerate(ctx, match1[1])
}

// Guard channel closing during recursion
if !a.closed {
close(a.Results)
a.closed = true
}
return
}
select {
case <-ctx.Done():
return
default:
}

resp, err := a.Session.NormalGetWithContext(ctx, baseURL)
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
return
}

// Get the response body
body, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
return
}

src := string(body)

for _, subdomain := range a.Session.Extractor.FindAllString(src, -1) {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Subdomain, Value: subdomain}
}

match1 := reNext.FindStringSubmatch(src)
if len(match1) > 0 {
a.enumerate(ctx, match1[1])
}
}

Expand All @@ -76,7 +62,10 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
Results: results,
}

go aInstance.enumerate(ctx, "http://archive.is/*."+domain)
go func() {
aInstance.enumerate(ctx, "http://archive.is/*."+domain)
close(aInstance.Results)
}()

return aInstance.Results
}
Expand Down

0 comments on commit 7ecaa96

Please sign in to comment.