-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcrawler_options.go
85 lines (73 loc) · 1.82 KB
/
crawler_options.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
package crawl
import (
"net/http"
"time"
)
// Option - Crawl option.
type Option func(*crawl)
// options - Crawl options.
type options struct {
concurrency int
queueCapacity int
headers map[string]string
defaultTimeout time.Duration
}
// WithTransport - Sets crawl HTTP transport.
func WithTransport(transport *http.Transport) Option {
return func(c *crawl) {
c.transport = transport
}
}
// WithQueue - Sets crawl queue.
// Default: creates queue using NewQueue() with capacity of WitWithQueueCapacity().
func WithQueue(queue Queue) Option {
return func(c *crawl) {
c.queue = queue
}
}
// WithDefaultHeaders - Sets crawl default headers.
// Default: empty.
func WithDefaultHeaders(headers map[string]string) Option {
return func(c *crawl) {
c.opts.headers = headers
}
}
// WithUserAgent - Sets crawl default user-agent.
func WithUserAgent(ua string) Option {
return func(c *crawl) {
if c.opts.headers == nil {
c.opts.headers = make(map[string]string)
}
c.opts.headers["User-Agent"] = ua
}
}
// WithConcurrency - Sets crawl concurrency.
// Default: 1000.
func WithConcurrency(n int) Option {
return func(c *crawl) {
c.opts.concurrency = n
}
}
// WithQueueCapacity - Sets queue capacity.
// It sets queue capacity if a queue needs to be created and it sets a capacity of channel in-memory queue.
// It also sets capacity of errors buffered channel.
// Default: 10000.
func WithQueueCapacity(n int) Option {
return func(c *crawl) {
c.opts.queueCapacity = n
}
}
// WithSpiders - Registers spider on a crawler.
func WithSpiders(spiders ...func(Crawler)) Option {
return func(c *crawl) {
for _, spider := range spiders {
spider(c)
}
}
}
// WithDefaultTimeout - Sets default request timeout duration.
func WithDefaultTimeout(d time.Duration) Option {
return func(c *crawl) {
c.opts.defaultTimeout = d
}
}