Skip to content

Commit

Permalink
linted and aded a comment
Browse files Browse the repository at this point in the history
  • Loading branch information
Tommi-Tsuruga committed Jun 26, 2024
1 parent 55561f6 commit 5d69c9e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 9 deletions.
2 changes: 1 addition & 1 deletion reggie/configs/data/pennsylvania.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -332,4 +332,4 @@ county_names:

# Columns added to the end of the file
blacklist_columns:
- registration_method
- registration_method # Added June 17, 2024
36 changes: 28 additions & 8 deletions reggie/ingestion/preprocessor/pennsylvania_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,16 @@ def mapping(li, zone_dict=zone_dict):
li = [x for x in li if x != "nan"]
return li
else:
li = [zone_dict[x] for x in li if x != "nan" and x in zone_dict]
li = [
zone_dict[x]
for x in li
if x != "nan" and x in zone_dict
]
return li

return pd.Series(map(mapping, df_sub[columns].values.astype(str).tolist()))
return pd.Series(
map(mapping, df_sub[columns].values.astype(str).tolist())
)

sorted_codes = []
sorted_code_dict = defaultdict(defaultdict)
Expand All @@ -105,8 +111,12 @@ def mapping(li, zone_dict=zone_dict):
logging.info("Processing {} {}/{}".format(c, idx, len(counties)))
c = format_column_name(c)
try:
voter_file = next(f for f in voter_files if c in f["name"].lower())
election_map = next(f for f in election_maps if c in f["name"].lower())
voter_file = next(
f for f in voter_files if c in f["name"].lower()
)
election_map = next(
f for f in election_maps if c in f["name"].lower()
)
zones = next(f for f in zone_codes if c in f["name"].lower())
types = next(f for f in zone_types if c in f["name"].lower())
except StopIteration:
Expand Down Expand Up @@ -180,7 +190,9 @@ def mapping(li, zone_dict=zone_dict):

# Gather the pairs of election columns to iterate over both at the same time to collect the information
# contained in both of the columns per election
vote_column_list = list(zip(df.columns[70:150:2], df.columns[71:150:2]))
vote_column_list = list(
zip(df.columns[70:150:2], df.columns[71:150:2])
)

# get the value from the eleciton map key for the election name,
# then combine it with the value in the party and vote type cells for the full election information
Expand All @@ -189,7 +201,11 @@ def mapping(li, zone_dict=zone_dict):
# The columns are all named election_#_vote_type but the cells contain the relevant information
vote_hist_df = pd.DataFrame(
{
i: election_map[i.split("_")[1]] + " " + df[i] + " " + df[j]
i: election_map[i.split("_")[1]]
+ " "
+ df[i]
+ " "
+ df[j]
for i, j in vote_column_list
if i.split("_")[1] in election_map
}
Expand All @@ -214,7 +230,9 @@ def mapping(li, zone_dict=zone_dict):
sorted_code_dict[current_key] = new_dict_entry
# converts the dataframe to a series that contains the list of elections participate in indexed on position
vote_hist_df = list_map(vote_hist_df, vote_hist_df.columns)
districts = list_map(df[district_columns], district_columns, zone_dict)
districts = list_map(
df[district_columns], district_columns, zone_dict
)

df["all_history"] = vote_hist_df
df["districts"] = districts
Expand Down Expand Up @@ -247,7 +265,9 @@ def mapping(li, zone_dict=zone_dict):

logging.info("coercing")
main_df = config.coerce_dates(main_df)
main_df = self.config.coerce_strings(main_df, exclude=["county", "gender"])
main_df = self.config.coerce_strings(
main_df, exclude=["county", "gender"]
)
main_df = config.coerce_numeric(
main_df,
extra_cols=[
Expand Down

0 comments on commit 5d69c9e

Please sign in to comment.