-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstream_hashDist.py
62 lines (60 loc) · 2.06 KB
/
stream_hashDist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime as dt
import os
# this script iterates through the data points stored
# as daily csv files in this repo to combine them
# into a single data stream file with multiple columns
# set boundaries to iterate
dateStart = dt.date(int(2022),int(9),int(15))
dateEnd = dt.date.today()
# file path
sPath = 'data/stream/distHashrate.csv'
# remove existing file
if os.path.isfile(sPath):
os.remove(sPath)
# initialise pointer
datex = dateStart
# iterate over the dates covered
while datex <= dateEnd:
# generate strings
dateStr = str(datex)
yearMonthStr = datex.strftime("%Y/%m/")
fPath = './data/hashrate/' + yearMonthStr + dateStr + '.csv'
# check if data point file exists
if os.path.isfile(fPath):
# read data point file into df
df = pd.read_csv(fPath)
# copy only needed columns
rawdf = df[['pool', 'rate']].copy()
# set pool as index
rawdf = rawdf.set_index('pool')
# transpose
sData = rawdf.T
# add date to df
sData['date'] = dateStr
# set date as index
sData = sData.set_index('date')
# check if stream file exists
if not os.path.isfile(sPath):
# if it doesn't exist, create file with header
sData.to_csv(sPath, mode='w', header=True)
else:
# if the file does exist
# read stream file into df
fData = pd.read_csv(sPath)
# set index
fData = fData.set_index('date')
# concat both dataframes
fDataNew = pd.concat([fData,sData], axis=0, ignore_index=False)
# fill NAs with 0 since each sample wont have all column values
fDataNew = fDataNew.fillna(0)
# fix column ordering
fDataNew = fDataNew.reindex(sorted(fDataNew.columns), axis=1)
# overwrite the file
fDataNew.to_csv(sPath, mode='w', header=True)
else:
print('No data found for '+dateStr)
datex += dt.timedelta(days=1)
print(fDataNew)