-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathndash_func.py
156 lines (122 loc) · 4.06 KB
/
ndash_func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# Functions to retrieve, extract, and transform US Census Bureau
# Zip Code Tabulation Data
# import packages
import urllib2
import requests
import urllister
import zipfile
import sys
import os, os.path
###############
# Checks to see if there is a zip for the current year
# If it does not exist, then decrements year until a
# file is found
def findFile(year, dataset):
data_path="https://www2.census.gov/geo/tiger/TIGER"
success=False
print(data_path + str(year) + "/" + dataset)
while success==False:
try:
print(data_path + str(year) + "/" + dataset)
con=urllib2.urlopen(data_path + str(year) + "/" + dataset)
except urllib2.HTTPError, e:
print(e)
print("Decrementing year and trying again.")
year = year -1
except urllib2.URLError, e:
print(e)
print("Decrementing year and trying again.")
year = year -1
else:
print("Connection a success!")
success=True
parser=urllister.URLLister()
parser.feed(con.read())
con.close()
parser.close()
zipfile=[s for s in parser.urls if ".zip" in s]
# if there are more than 1 zip files, look for NYS
if len(zipfile) > 1:
ny_i = [i for i, s in enumerate(zipfile) if '_36_' in s][0]
zipfile = zipfile[ny_i]
file_url=data_path + str(year) + "/"+ dataset + "/" + ''.join(zipfile)
return file_url
###############
# Checks to see if the file exists
def fileExists(file_url):
filename=file_url.split('/')[-1]
if os.path.isfile(filename):
return True
else:
return False
###############
# Takes a url and downloads the file to the local repository
def downloadFile(file_url):
counter=0
while not fileExists(file_url):
if file_url.endswith('.zip'):
filename=downloadZip(file_url)
else:
filename=downloadUrl(file_url)
if counter > 3:
print('There is a problem downloading. Exiting.')
break
counter+=1
###############
# Used when the URL is a direct source of the file
def downloadZip(file_url):
con=urllib2.urlopen(file_url)
filename=file_url.split('/')[-1]
save_file=open(filename, "wb")
save_file.write(con.read())
# print('Downloading ' + filename)
# total_size = con.info().getheader('Content-Length').strip()
# total_size = int(total_size)
# bytes_so_far = 0
# save_file=open(filename, "wb")
# while bytes_so_far < total_size:
# buff = con.read(8192)
# save_file.write(buff)
# bytes_so_far += len(buff)
# percent = float(bytes_so_far) / total_size
# percent = round(percent*100, 2)
# sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, total_size, percent))
# save_file.close()
# if bytes_so_far == total_size:
# print(filename + ' successfully downloaded!')
# return filename
return filename
###############
# Used when the URL triggers a download of a zip file
def downloadUrl(file_url):
req=requests.get(file_url)
cd = req.headers['Content-Disposition'].split('"')
index_match=[i for i, s in enumerate(cd) if ".zip" in s]
filename=cd[index_match[0]]
if os.path.isfile(filename):
print(filename + ' already exists!')
return filename
else:
print('Downloading ' + filename)
with open(filename, 'wb') as f:
f.write(req.content)
print(filename + ' successfully downloaded!')
return filename
###############
# Extract the zip file
def extractFile(file_url):
current_dir= os.getcwd()
filename=file_url.split('/')[-1];
# output_dir=os.path.join(current_dir, os.path.splitext(filename)[0])
output_dir=os.path.join(current_dir, 'shapefiles')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# check to see that the file exists, or exit
if not os.path.isfile(filename):
print('Cannot find + ' + filename)
sys.exit()
print('Beginning extraction')
# zipfile.ZipFile(filename).extractall(output_dir)
zipfile.ZipFile(filename).extractall()
print(filename + ' extraction complete!')
return filename.split('.zip')[0]