-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add python scripts for generation of HEROP_ID in CSVs and SHPs #68
- Loading branch information
Showing
3 changed files
with
180 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import csv | ||
from pathlib import Path | ||
|
||
## This is one-off script used to add a new HEROP_ID to all of the CSVs | ||
## The HEROP_ID is a similar to GEOIDs that can come from data.census.gov, | ||
## as described here: https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html | ||
## but is simplified to three parts: | ||
## 3-digit summary level code, the letters "US", and the relevant GEOID. | ||
## For example, the summary level code for State is 040, and the GEOID for states | ||
## are FP codes, so 040US48 would be the HEROP_ID for Texas. | ||
|
||
## Note that in the future, the source and destination paths in the script may | ||
## no longer exist, so it functions more as a record of transformation than as a | ||
## reusable snippet. | ||
|
||
sl_lookup = { | ||
"S": { | ||
"code": "040", | ||
"id_length": 2, | ||
}, | ||
"C": { | ||
"code": "050", | ||
"id_length": 5, | ||
}, | ||
"T": { | ||
"code": "140", | ||
"id_length": 11, | ||
}, | ||
"Z": { | ||
"code": "860", | ||
"id_length": 5, | ||
}, | ||
} | ||
|
||
f_lookup = { | ||
"C_1980.csv": "GEOID", | ||
"C_1990.csv": "GEOID", | ||
"C_2000.csv": "GEOID", | ||
"C_2010.csv": "GEOID", | ||
"C_Latest.csv": "GEOID", | ||
"S_1980.csv": "STATEFP", | ||
"S_1990.csv": "STATEFP", | ||
"S_2000.csv": "STATEFP", | ||
"S_2010.csv": "GEOID", | ||
"S_Latest.csv": "GEOID", | ||
"T_1980.csv": "GEOID", | ||
"T_1990.csv": "GEOID", | ||
"T_2000.csv": "GEOID", | ||
"T_2010.csv": "GEOID", | ||
"T_Latest.csv": "GEOID", | ||
"Z_1980.csv": "ZCTA", | ||
"Z_1990.csv": "ZCTA", | ||
"Z_2000.csv": "ZCTA", | ||
"Z_2010.csv": "GEOID", | ||
"Z_Latest.csv": "GEOID", | ||
} | ||
|
||
drop_fields = ["STATEFP", "G_STATEFP", "STUSPS", "TRACTCE", "ZIP", "COUNTYFP", "ZCTA", "GEOID"] | ||
|
||
new_suffix = "bq" | ||
|
||
csv_dir = Path(__file__).resolve().parent.parent.parent / 'data_final' / 'v2.0' / 'tables' | ||
paths = [i for i in csv_dir.glob("*.csv") if not str(i).endswith(f"{new_suffix}.csv")] | ||
|
||
for path in paths: | ||
|
||
print(path.name) | ||
out_path = str(path).replace(".csv", f"_{new_suffix}.csv") | ||
print(out_path) | ||
geo = path.name[0] | ||
|
||
rows = [] | ||
new_geoid_field = 'HEROP_ID' | ||
with open(path, "r") as r: | ||
reader = csv.DictReader(r) | ||
fieldnames = [new_geoid_field] + [i for i in reader.fieldnames] | ||
|
||
for r in reader: | ||
str_geoid = str(r[f_lookup[path.name]]).zfill(sl_lookup[geo]['id_length']) | ||
new_geoid = f"{sl_lookup[geo]['code']}US{str_geoid}" | ||
r[new_geoid_field] = new_geoid | ||
rows.append(r) | ||
|
||
with open(out_path, "w") as w: | ||
writer = csv.DictWriter(w, fieldnames=fieldnames) | ||
writer.writeheader() | ||
writer.writerows(rows) | ||
|
||
Path(out_path).replace(path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import fiona | ||
from fiona import Feature, Properties | ||
from pathlib import Path | ||
|
||
## This is one-off script used to add a new HEROP_ID to all of the shapefiles, | ||
## while also moving them to a new final location. | ||
## The HEROP_ID is a similar to GEOIDs that can come from data.census.gov, | ||
## as described here: https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html | ||
## but is simplified to three parts: | ||
## 3-digit summary level code, the letters "US", and the relevant GEOID. | ||
## For example, the summary level code for State is 040, and the GEOID for states | ||
## are FP codes, so 040US48 would be the HEROP_ID for Texas. | ||
|
||
## Note that in the future, the source and destination paths in the script may | ||
## no longer exist, so it functions more as a record of transformation than as a | ||
## reusable snippet. | ||
|
||
sl_lookup = { | ||
"S": { | ||
"code": "040", | ||
"id_length": 2, | ||
}, | ||
"C": { | ||
"code": "050", | ||
"id_length": 5, | ||
}, | ||
"T": { | ||
"code": "140", | ||
"id_length": 11, | ||
}, | ||
"Z": { | ||
"code": "860", | ||
"id_length": 5, | ||
}, | ||
} | ||
|
||
# relative paths from the data_final/geometryFiles directory, field names with GEOID base | ||
f_lookup = { | ||
"state/states2018.shp": "STATEFP", | ||
"county/counties2018.shp": "GEOID", | ||
"tract/tracts2018.shp": "GEOID", | ||
"zcta/zctas2018.shp": "GEOID10", | ||
"tl_2010_state/states2010.shp": "STATEFP", | ||
"tl_2010_county/counties2010.shp": "GEOID", | ||
"tl_2010_tract/tracts2010.shp": "GEOID", | ||
"tl_2010_zcta/tl_2010_us_zcta510-wgs84-generalized100ft.shp": "GEOID10", | ||
} | ||
|
||
new_suffix = "bq" | ||
|
||
in_dir = Path(__file__).resolve().parent.parent.parent / 'data_final' / 'geometryFiles' | ||
out_dir = Path(__file__).resolve().parent.parent.parent / 'data_final' / 'v2.0' / 'spatial_data' | ||
out_dir.mkdir(exist_ok=True) | ||
|
||
for path in f_lookup.keys(): | ||
|
||
shp_path = Path(in_dir, path) | ||
print("\n"+shp_path.name) | ||
|
||
if "state" in path: | ||
sl = sl_lookup["S"] | ||
elif "counties" in path: | ||
sl = sl_lookup["C"] | ||
elif "tract" in path: | ||
sl = sl_lookup["T"] | ||
elif "zcta" in path: | ||
sl = sl_lookup["Z"] | ||
else: | ||
raise Exception("unexpected input shapefile") | ||
|
||
out_path = out_dir / Path(path).name | ||
with fiona.open(shp_path) as src: | ||
dst_schema = src.schema | ||
dst_schema['properties']['HEROP_ID'] = 'str' | ||
|
||
with fiona.open( | ||
out_path, | ||
mode="w", | ||
crs=src.crs, | ||
driver="ESRI Shapefile", | ||
schema=dst_schema, | ||
) as dst: | ||
for feat in src: | ||
geo_id = str(feat.properties[f_lookup[path]]).zfill(sl['id_length']) | ||
herop_id = f"{sl['code']}US{geo_id}" | ||
props = Properties.from_dict( | ||
**feat.properties, | ||
HEROP_ID=herop_id, | ||
) | ||
dst.write(Feature(geometry=feat.geometry, properties=props)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
fiona |