Skip to content

Commit

Permalink
Merge pull request #297 from jakopako:jakopako/issue119
Browse files Browse the repository at this point in the history
Can `node_index` and `child_index` be replaced with css selector syntax?
  • Loading branch information
jakopako authored May 12, 2024
2 parents ac6512e + 1e87445 commit 0e245c7
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions scraper/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ type RegexConfig struct {
type ElementLocation struct {
Selector string `yaml:"selector,omitempty"`
JsonSelector string `yaml:"json_selector,omitempty"`
NodeIndex int `yaml:"node_index,omitempty"`
ChildIndex int `yaml:"child_index,omitempty"`
RegexExtract RegexConfig `yaml:"regex_extract,omitempty"`
Attr string `yaml:"attr,omitempty"`
Expand Down Expand Up @@ -858,7 +857,7 @@ func getTextString(e *ElementLocation, s *goquery.Selection) (string, error) {
} else {
fieldSelection = s.Find(e.Selector)
}
if len(fieldSelection.Nodes) > e.NodeIndex {
if len(fieldSelection.Nodes) > 0 {
if e.Attr == "" {
if e.EntireSubtree {
// copied from https://github.com/PuerkitoBio/goquery/blob/v1.8.0/property.go#L62
Expand All @@ -882,7 +881,7 @@ func getTextString(e *ElementLocation, s *goquery.Selection) (string, error) {
buf.Reset()
}
} else {
f(fieldSelection.Get(e.NodeIndex))
f(fieldSelection.Get(0))
fieldStrings = append(fieldStrings, buf.String())
}
} else {
Expand All @@ -896,7 +895,7 @@ func getTextString(e *ElementLocation, s *goquery.Selection) (string, error) {
}
}
} else {
fieldNode := fieldSelection.Get(e.NodeIndex).FirstChild
fieldNode := fieldSelection.Get(0).FirstChild
if fieldNode != nil {
fieldNodes = append(fieldNodes, fieldNode)
}
Expand Down

0 comments on commit 0e245c7

Please sign in to comment.