-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkeywords.py
112 lines (80 loc) · 2.86 KB
/
keywords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import requests
import json
import pymongo
from bs4 import BeautifulSoup
from sqlalchemy import *
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
# In[2]:
# global 영역으로 빼놓아야한다.
base = declarative_base()
class NaverKeyword(base):
__tablename__ = "naver"
id = Column(Integer, primary_key=True)
rank = Column(Integer, nullable=False)
keyword = Column(String(50), nullable=False)
rdate = Column(TIMESTAMP, nullable=False)
def __init__(self, rank, keyword):
self.rank = rank
self.keyword = keyword
def __repr__(self):
return "<NaverKeyword {}, {}>".format(self.rank, self.keyword)
# In[3]:
class NaverKeywords:
def __init__(self, ip, base):
self.mysql_client = create_engine("mysql://root:dss@{}/world?charset=utf8".format(ip))
self.mongo_client = pymongo.MongoClient('mongodb://{}:27017'.format(ip))
self.datas = None
self.base = base
def crawling(self):
response = requests.get("https://www.naver.com/")
dom = BeautifulSoup(response.content, "html.parser")
keywords = dom.select(".ah_roll_area > .ah_l > .ah_item")
datas = []
for keyword in keywords:
rank = keyword.select_one(".ah_r").text
keyword = keyword.select_one(".ah_k").text
datas.append((rank, keyword))
self.datas = datas
def mysql_save(self):
# make table
self.base.metadata.create_all(self.mysql_client)
# parsing keywords
keywords = [NaverKeyword(rank, keyword) for rank, keyword in self.datas]
# make session
maker = sessionmaker(bind=self.mysql_client)
session = maker()
# save datas
session.add_all(keywords)
session.commit()
# close session
session.close()
def mongo_save(self):
# parsing querys
keyowrds = [{"rank":rank, "keyword":keyword} for rank, keyword in self.datas]
# insert keyowrds
self.mongo_client.crawling.naver_keywords.insert(keyowrds)
def send_slack(self, msg, channel="#dss", username="provision_bot" ):
webhook_URL = "https://hooks.slack.com/services/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"
payload = {
"channel": channel,
"username": username,
"icon_emoji": ":provision:",
"text": msg,
}
response = requests.post(
webhook_URL,
data = json.dumps(payload),
)
def run(self):
self.crawling()
self.mysql_save()
self.mongo_save()
self.send_slack("naver crawling done!")
print("execute done!")
# In[4]:
nk = NaverKeywords("13.209.163.181", base)
nk.run()