forked from PaulMcInnis/JobFunnel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsettings.yaml
66 lines (56 loc) · 2.33 KB
/
settings.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# This is an example of a feature-complete JobFunnel configuration YAML.
# Try this out by simply running: "funnel load -s demo/settings.yaml"
# Path where your master CSV, block-lists, and cache data will be stored
# NOTE: we create any missing directories in these filepaths
master_csv_file: demo_job_search_results/demo_search.csv
cache_folder: demo_job_search_results/cache
block_list_file: demo_job_search_results/demo_block_list.json
duplicates_list_file: demo_job_search_results/demo_duplicates_list.json
log_file: demo_job_search_results/log.log
# Job search configuration
search:
# Locale settings, one of USA_ENGLISH, CANADA_ENGLISH, CANADA_FRENCH,
# UK_ENGLISH, FRANCE_FRENCH:
# This tells JobFunnel where the website we are scraping is located, and
# what language the contents are in.
locale: CANADA_ENGLISH
# Job providers which we will search, one of INDEED, MONSTER, GLASSDOOR:
# NOTE: we choose domain via locale (i.e. CANADA_ENGLISH -> www.indeed.ca)
# FIXME: we need to add back GLASSDOOR when that's working again.
providers:
- INDEED
- MONSTER
# Region that we are searching for jobs within:
province_or_state: "ON" # NOTE: this is generally 2 characters long.
city: "Waterloo" # NOTE: this is the full city / town name.
radius: 25 # km (NOTE: if we were in locale: USA_ENGLISH it's in miles)
# These are the terms you would be typing into the website's search field:
keywords:
- Python
# Don't return any listings older than this:
max_listing_days: 35
# Blocked company names that will never appear in any results:
company_block_list:
- "Infox Consulting"
# The desired level of work-remoteness (i.e. IN_PERSON, FULLY_REMOTE, ANY,
# TEMPORARILY_REMOTE, PARTIALLY_REMOTE)
remoteness: ANY
# Logging level options are: critical, error, warning, info, debug, notset
log_level: INFO
# Delaying algorithm configuration
delay:
# Functions used for delaying algorithm: CONSTANT, LINEAR, SIGMOID
algorithm: LINEAR
# Maximum delay/upper bound for converging random delay
max_duration: 5.0
# Minimum delay/lower bound for random delay
min_duration: 1.0
# Random delay
random: False
# Converging random delay, only used if 'random' is set to True
converging: False
# # Proxy settings
# proxy:
# protocol: https # NOTE: you can also set to 'http'
# ip: "1.1.1.1"
# port: 200