Skip to content

Commit

Permalink
add python scripts for generation of HEROP_ID in CSVs and SHPs #68
Browse files Browse the repository at this point in the history
  • Loading branch information
mradamcox committed Sep 1, 2023
1 parent 9a6b56e commit 1979591
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 0 deletions.
89 changes: 89 additions & 0 deletions code/SpatialIds/add_csv_herop_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import csv
from pathlib import Path

## This is one-off script used to add a new HEROP_ID to all of the CSVs
## The HEROP_ID is a similar to GEOIDs that can come from data.census.gov,
## as described here: https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html
## but is simplified to three parts:
## 3-digit summary level code, the letters "US", and the relevant GEOID.
## For example, the summary level code for State is 040, and the GEOID for states
## are FP codes, so 040US48 would be the HEROP_ID for Texas.

## Note that in the future, the source and destination paths in the script may
## no longer exist, so it functions more as a record of transformation than as a
## reusable snippet.

sl_lookup = {
"S": {
"code": "040",
"id_length": 2,
},
"C": {
"code": "050",
"id_length": 5,
},
"T": {
"code": "140",
"id_length": 11,
},
"Z": {
"code": "860",
"id_length": 5,
},
}

f_lookup = {
"C_1980.csv": "GEOID",
"C_1990.csv": "GEOID",
"C_2000.csv": "GEOID",
"C_2010.csv": "GEOID",
"C_Latest.csv": "GEOID",
"S_1980.csv": "STATEFP",
"S_1990.csv": "STATEFP",
"S_2000.csv": "STATEFP",
"S_2010.csv": "GEOID",
"S_Latest.csv": "GEOID",
"T_1980.csv": "GEOID",
"T_1990.csv": "GEOID",
"T_2000.csv": "GEOID",
"T_2010.csv": "GEOID",
"T_Latest.csv": "GEOID",
"Z_1980.csv": "ZCTA",
"Z_1990.csv": "ZCTA",
"Z_2000.csv": "ZCTA",
"Z_2010.csv": "GEOID",
"Z_Latest.csv": "GEOID",
}

drop_fields = ["STATEFP", "G_STATEFP", "STUSPS", "TRACTCE", "ZIP", "COUNTYFP", "ZCTA", "GEOID"]

new_suffix = "bq"

csv_dir = Path(__file__).resolve().parent.parent.parent / 'data_final' / 'v2.0' / 'tables'
paths = [i for i in csv_dir.glob("*.csv") if not str(i).endswith(f"{new_suffix}.csv")]

for path in paths:

print(path.name)
out_path = str(path).replace(".csv", f"_{new_suffix}.csv")
print(out_path)
geo = path.name[0]

rows = []
new_geoid_field = 'HEROP_ID'
with open(path, "r") as r:
reader = csv.DictReader(r)
fieldnames = [new_geoid_field] + [i for i in reader.fieldnames]

for r in reader:
str_geoid = str(r[f_lookup[path.name]]).zfill(sl_lookup[geo]['id_length'])
new_geoid = f"{sl_lookup[geo]['code']}US{str_geoid}"
r[new_geoid_field] = new_geoid
rows.append(r)

with open(out_path, "w") as w:
writer = csv.DictWriter(w, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)

Path(out_path).replace(path)
90 changes: 90 additions & 0 deletions code/SpatialIds/add_shp_herop_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import fiona
from fiona import Feature, Properties
from pathlib import Path

## This is one-off script used to add a new HEROP_ID to all of the shapefiles,
## while also moving them to a new final location.
## The HEROP_ID is a similar to GEOIDs that can come from data.census.gov,
## as described here: https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html
## but is simplified to three parts:
## 3-digit summary level code, the letters "US", and the relevant GEOID.
## For example, the summary level code for State is 040, and the GEOID for states
## are FP codes, so 040US48 would be the HEROP_ID for Texas.

## Note that in the future, the source and destination paths in the script may
## no longer exist, so it functions more as a record of transformation than as a
## reusable snippet.

sl_lookup = {
"S": {
"code": "040",
"id_length": 2,
},
"C": {
"code": "050",
"id_length": 5,
},
"T": {
"code": "140",
"id_length": 11,
},
"Z": {
"code": "860",
"id_length": 5,
},
}

# relative paths from the data_final/geometryFiles directory, field names with GEOID base
f_lookup = {
"state/states2018.shp": "STATEFP",
"county/counties2018.shp": "GEOID",
"tract/tracts2018.shp": "GEOID",
"zcta/zctas2018.shp": "GEOID10",
"tl_2010_state/states2010.shp": "STATEFP",
"tl_2010_county/counties2010.shp": "GEOID",
"tl_2010_tract/tracts2010.shp": "GEOID",
"tl_2010_zcta/tl_2010_us_zcta510-wgs84-generalized100ft.shp": "GEOID10",
}

new_suffix = "bq"

in_dir = Path(__file__).resolve().parent.parent.parent / 'data_final' / 'geometryFiles'
out_dir = Path(__file__).resolve().parent.parent.parent / 'data_final' / 'v2.0' / 'spatial_data'
out_dir.mkdir(exist_ok=True)

for path in f_lookup.keys():

shp_path = Path(in_dir, path)
print("\n"+shp_path.name)

if "state" in path:
sl = sl_lookup["S"]
elif "counties" in path:
sl = sl_lookup["C"]
elif "tract" in path:
sl = sl_lookup["T"]
elif "zcta" in path:
sl = sl_lookup["Z"]
else:
raise Exception("unexpected input shapefile")

out_path = out_dir / Path(path).name
with fiona.open(shp_path) as src:
dst_schema = src.schema
dst_schema['properties']['HEROP_ID'] = 'str'

with fiona.open(
out_path,
mode="w",
crs=src.crs,
driver="ESRI Shapefile",
schema=dst_schema,
) as dst:
for feat in src:
geo_id = str(feat.properties[f_lookup[path]]).zfill(sl['id_length'])
herop_id = f"{sl['code']}US{geo_id}"
props = Properties.from_dict(
**feat.properties,
HEROP_ID=herop_id,
)
dst.write(Feature(geometry=feat.geometry, properties=props))
1 change: 1 addition & 0 deletions code/SpatialIds/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fiona

0 comments on commit 1979591

Please sign in to comment.