-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautomate.py
52 lines (41 loc) · 1.68 KB
/
automate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from datetime import datetime, timedelta
import airflow
import pandas as pd
import json
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
with open("config.json") as json_data_file:
config = json.load(json_data_file)
default_args = {
'owner': 'airflow'
}
dag = DAG(dag_id='job_postings', default_args=default_args, schedule_interval='0 18 * * *', start_date=datetime(2020,6,29))
previous = None
# create task for every search in settings
for i, (keywords, location) in enumerate(zip(config['keywords'], config['location'])):
vpn = BashOperator(
task_id=f'vpn_{i}',
bash_command='nordvpn connect',
dag=dag
)
scrape = BashOperator(
task_id=f'crawl_{i}',
bash_command= """ cd ~/Documents/job_mail/scraping/ && scrapy crawl linkedin -a keywords="{{params.keywords}}" -a location="{{params.location}}" -a filter_time="1" """,
dag=dag,
params={'keywords':keywords, 'location':location}
)
enrich = BashOperator(
task_id=f'enrich_{i}',
bash_command='cd ~/Documents/job_mail/ && python main.py',
dag=dag
)
mail = BashOperator(
task_id=f'mail_{i}',
bash_command= """cd ~/Documents/job_mail/flask_mail && python mail_app.py --keywords "{{params.keywords}}" --location "{{params.location}}" --username "{{params.username}}" --password "{{params.password}}" --recipient "{{params.recipient}}" """,
dag=dag,
params={'keywords':keywords, 'location':location, 'username':config['gmail_username'], 'password':config['gmail_password'], 'recipient':config['recipient']}
)
if previous:
previous >> vpn
vpn >> scrape >> enrich >> mail
previous = mail