Skip to content

Commit

Permalink
archiveis - fix panic from closing a closed channel
Browse files Browse the repository at this point in the history
If one of the recursive calls encounters an error, it was closing the channel without setting the closed flag. This change moves the channel close outside enumerate() so there's only one place that needs to close it.

Also removed the loop in enumerate() which wasn't necessary.
  • Loading branch information
donners committed Feb 5, 2020
1 parent ad3c7e8 commit d7b50e0
Showing 1 changed file with 33 additions and 44 deletions.
77 changes: 33 additions & 44 deletions pkg/subscraping/sources/archiveis/archiveis.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,54 +13,40 @@ import (
type ArchiveIs struct {
Results chan subscraping.Result
Session *subscraping.Session

closed bool
}

var reNext = regexp.MustCompile("<a id=\"next\" style=\".*\" href=\"(.*)\">&rarr;</a>")

func (a *ArchiveIs) enumerate(ctx context.Context, baseURL string) {
for {
select {
case <-ctx.Done():
close(a.Results)
return
default:
resp, err := a.Session.NormalGetWithContext(ctx, baseURL)
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
close(a.Results)
return
}

// Get the response body
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
resp.Body.Close()
close(a.Results)
return
}
resp.Body.Close()

src := string(body)

for _, subdomain := range a.Session.Extractor.FindAllString(src, -1) {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Subdomain, Value: subdomain}
}

match1 := reNext.FindStringSubmatch(src)
if len(match1) > 0 {
a.enumerate(ctx, match1[1])
}

// Guard channel closing during recursion
if !a.closed {
close(a.Results)
a.closed = true
}
return
}
select {
case <-ctx.Done():
return
default:
}

resp, err := a.Session.NormalGetWithContext(ctx, baseURL)
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
return
}

// Get the response body
body, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
return
}

src := string(body)

for _, subdomain := range a.Session.Extractor.FindAllString(src, -1) {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Subdomain, Value: subdomain}
}

match1 := reNext.FindStringSubmatch(src)
if len(match1) > 0 {
a.enumerate(ctx, match1[1])
}
}

Expand All @@ -76,7 +62,10 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
Results: results,
}

go aInstance.enumerate(ctx, "http://archive.is/*."+domain)
go func() {
aInstance.enumerate(ctx, "http://archive.is/*."+domain)
close(aInstance.Results)
}()

return aInstance.Results
}
Expand Down

0 comments on commit d7b50e0

Please sign in to comment.