-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_prepMA.py
118 lines (84 loc) · 3.17 KB
/
data_prepMA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
import pandas as pd
import requests
import json
## Get list of segments
segments = requests.get("https://telraam-api.net/v0/segments/active")
print(segments.status_code)
segment_ids = []
for segment in segments.json()['features']:
segment_ids.append(segment['properties']['id'])
segment_ids = np.unique(segment_ids)
## Working on intake
def gen_time(year, month, day, hour):
if month < 10:
month = '0' + str(month)
if day < 10:
day = '0' + str(day)
if hour < 10:
hour = '0' + str(hour)
date = str(year) + '-' + str(month) + '-' + str(day) + ' ' +\
str(hour) + ':00'
return(date)
def setPeak(hour):
if hour <= 10 or hour >= 16:
return("Peak")
else:
return("Slow")
full_dat = pd.DataFrame()
## Building traffic dataset
for segment in segment_ids:
print(str(segment))
for month in np.arange(11, 15):
year_begin = 2019 + (month - 1)//12
month_begin = month % 12
if month_begin == 0:
month_begin = 12
year_end = 2019 + month//12
month_end = (month + 1) % 12
if month_end == 0:
month_end = 12
segment_id = segment
url = "https://telraam-api.net/v0/reports/" + str(segment_id)
param = {"time_start":gen_time(year_begin, month_begin, 1, 10),
"time_end":gen_time(year_end, month_end, 1, 10),
"level":"segments",
"format": "per-hour"}
headers = {'Content-Type': 'application/json'}
r = requests.request("POST", url, headers=headers,data = json.dumps(param))
if r.status_code != 200:
continue
dat = r.json()['report']
dat = pd.DataFrame(dat)
if dat.shape[0] == 0:
continue
dat['date'] = pd.to_datetime(dat['date'])
dat['period'] = dat['date'].map(lambda x: setPeak(x.hour))
dat['date'] = dat['date'].dt.date
dat = dat[['segment_id', 'date', 'pedestrian', 'bike', 'car', 'lorry', 'period']]
agg_dict = {'segment_id':'first',
'pedestrian':'sum',
'bike':'sum',
'car':'sum',
'lorry':'sum'}
dat = dat.groupby(['date', 'period']).agg(agg_dict).reset_index()
full_dat = pd.concat((full_dat, dat), axis=0)
## Export as cache
## full_dat.to_csv('trafficDAT.csv')
## Appending GPS info (Taking the mean x and y per segment)
segments_list = segments.json()['features']
n = len(segments_list)
gps_dat = pd.DataFrame()
for i in range(n):
segmentID = segments_list[i]['properties']['id']
seg_list = pd.DataFrame(np.array(segments_list[i]['geometry']['coordinates'])[0])
c_dat = seg_list.apply(lambda x: np.mean(x))
c_dat = np.array(c_dat)
dat = pd.DataFrame({'seg':[segmentID], 'x':[c_dat[0]], 'y':[c_dat[1]]})
gps_dat = pd.concat((gps_dat, dat), axis=0)
gps_dat = gps_dat.groupby('seg').agg({'x':'first', 'y':'first'}).reset_index()
gps_dat.columns = ['segment_id', 'lng', 'lat']
gps_dat['segment_id'] = gps_dat['segment_id'].astype('int')
full_dat['segment_id'] = full_dat['segment_id'].astype(int)
traffic_dat = full_dat.merge(gps_dat, on='segment_id')
traffic_dat.to_csv('trafficDAT.csv', index=False)