Skip to content

Commit

Permalink
just making them no diff
Browse files Browse the repository at this point in the history
  • Loading branch information
Tommi-Tsuruga committed Jun 25, 2024
1 parent 9dc40e8 commit 3b55d41
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 30 deletions.
2 changes: 1 addition & 1 deletion reggie/configs/data/pennsylvania.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -331,5 +331,5 @@ county_names:
- York

# Columns added to the end of the file
additional_columns:
no_diff_columns:
- registration_method
38 changes: 9 additions & 29 deletions reggie/ingestion/preprocessor/pennsylvania_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def execute(self):

# PA Added a new column for registration_method on June 18, 2024
if date_from_str(self.raw_s3_file) > "2024-06-17":
dfcols.extend(config["additional_columns"])
dfcols.extend(config["no_diff_columns"])

# create a mapping that returns a series based on the values across rows (voters) of cells (election info).
# consolidates the non nan values into one string that can be appended as a column later for the all_history and
Expand All @@ -93,16 +93,10 @@ def mapping(li, zone_dict=zone_dict):
li = [x for x in li if x != "nan"]
return li
else:
li = [
zone_dict[x]
for x in li
if x != "nan" and x in zone_dict
]
li = [zone_dict[x] for x in li if x != "nan" and x in zone_dict]
return li

return pd.Series(
map(mapping, df_sub[columns].values.astype(str).tolist())
)
return pd.Series(map(mapping, df_sub[columns].values.astype(str).tolist()))

sorted_codes = []
sorted_code_dict = defaultdict(defaultdict)
Expand All @@ -111,12 +105,8 @@ def mapping(li, zone_dict=zone_dict):
logging.info("Processing {} {}/{}".format(c, idx, len(counties)))
c = format_column_name(c)
try:
voter_file = next(
f for f in voter_files if c in f["name"].lower()
)
election_map = next(
f for f in election_maps if c in f["name"].lower()
)
voter_file = next(f for f in voter_files if c in f["name"].lower())
election_map = next(f for f in election_maps if c in f["name"].lower())
zones = next(f for f in zone_codes if c in f["name"].lower())
types = next(f for f in zone_types if c in f["name"].lower())
except StopIteration:
Expand Down Expand Up @@ -190,9 +180,7 @@ def mapping(li, zone_dict=zone_dict):

# Gather the pairs of election columns to iterate over both at the same time to collect the information
# contained in both of the columns per election
vote_column_list = list(
zip(df.columns[70:150:2], df.columns[71:150:2])
)
vote_column_list = list(zip(df.columns[70:150:2], df.columns[71:150:2]))

# get the value from the eleciton map key for the election name,
# then combine it with the value in the party and vote type cells for the full election information
Expand All @@ -201,11 +189,7 @@ def mapping(li, zone_dict=zone_dict):
# The columns are all named election_#_vote_type but the cells contain the relevant information
vote_hist_df = pd.DataFrame(
{
i: election_map[i.split("_")[1]]
+ " "
+ df[i]
+ " "
+ df[j]
i: election_map[i.split("_")[1]] + " " + df[i] + " " + df[j]
for i, j in vote_column_list
if i.split("_")[1] in election_map
}
Expand All @@ -230,9 +214,7 @@ def mapping(li, zone_dict=zone_dict):
sorted_code_dict[current_key] = new_dict_entry
# converts the dataframe to a series that contains the list of elections participate in indexed on position
vote_hist_df = list_map(vote_hist_df, vote_hist_df.columns)
districts = list_map(
df[district_columns], district_columns, zone_dict
)
districts = list_map(df[district_columns], district_columns, zone_dict)

df["all_history"] = vote_hist_df
df["districts"] = districts
Expand Down Expand Up @@ -265,9 +247,7 @@ def mapping(li, zone_dict=zone_dict):

logging.info("coercing")
main_df = config.coerce_dates(main_df)
main_df = self.config.coerce_strings(
main_df, exclude=["county", "gender"]
)
main_df = self.config.coerce_strings(main_df, exclude=["county", "gender"])
main_df = config.coerce_numeric(
main_df,
extra_cols=[
Expand Down

0 comments on commit 3b55d41

Please sign in to comment.