-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_bigdata.py
50 lines (38 loc) · 1.99 KB
/
get_bigdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import requests
import math
import pandas as pd
from alive_progress import alive_bar
import argparse
def get_locgoRegnVisitrDDList(startYmd, endYmd, page):
url = 'http://api.visitkorea.or.kr/openapi/service/rest/DataLabService/locgoRegnVisitrDDList'
params = {'serviceKey': '2scFfN6uVjhuRcXrj1DgGjslYn0wYJc7kvCOHHwOI/0kr1sjf2OqLLjq5SvCt3jKyr4JPHr/K3QGuXhvv6HYlQ==', 'pageNo': page, 'numOfRows': '1000', 'MobileOS': 'ETC', 'MobileApp': 'WorkAt_', 'startYmd': startYmd, 'endYmd': endYmd, '_type': 'json'}
response = requests.get(url, params=params).json()
return response
def jsonToDataframe(data):
df = pd.DataFrame.from_records(data)
return df
def save_dataframe_to_csv_with_header(path, dataframe):
dataframe.to_csv(path, index=False)
def append_dataframe_to_csv(path, dataframe):
dataframe.to_csv(path, mode='a', header=False, index=False)
def main(startYmd, endYmd):
filename = startYmd + '_' + endYmd + '.csv'
content = get_locgoRegnVisitrDDList(startYmd, endYmd, 1)
save_dataframe_to_csv_with_header(filename, jsonToDataframe(content['response']['body']['items']['item']).sort_values('baseYmd'))
pages = math.ceil(content['response']['body']['totalCount'] / 1000)
with alive_bar(pages - 1, force_tty=True) as bar:
for pageNum in range(2, pages + 1):
content = get_locgoRegnVisitrDDList(startYmd, endYmd, pageNum)
dataframe = jsonToDataframe(content['response']['body']['items']['item']).sort_values('baseYmd')
append_dataframe_to_csv(filename, dataframe)
bar()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='fetch tour api 3.0')
parser.add_argument("-startYmd", dest="startYmd", required=True,
help="startYmd",
type=str)
parser.add_argument("-endYmd", dest="endYmd", required=True,
help="endYmd",
type=str)
args = parser.parse_args()
main(args.startYmd, args.endYmd)