Skip to content

Commit

Permalink
[feat] convert small solar project sizes to Mw, collect permit proces…
Browse files Browse the repository at this point in the history
…s, standardize renewable technology labels, map abbreviated energy tech from NYISO to readable string
  • Loading branch information
deenasun committed Oct 26, 2024
1 parent 4c79b35 commit 3811662
Show file tree
Hide file tree
Showing 8 changed files with 7,173 additions and 11,805 deletions.
3 changes: 2 additions & 1 deletion api/webscraper/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,9 @@ def nyiso_to_database():
project['renewable_energy_technology'] = renewable_energy_map[project.get('renewable_energy_technology')] # maps NYISO acronym to readable renewable energy tech
existing_project = supabase.table("Projects_duplicate").select("*").eq("interconnection_queue_number", project['interconnection_queue_number']).execute()
if len(existing_project.data) > 0:
# This helper function creates a dict of only fields that the existing project is missing
# but the NYISO data has
update_object = create_update_object(existing_project.data[0], project)
print(update_object)
try:
response= supabase.table("Projects_duplicate").update(update_object).eq("interconnection_queue_number", project['interconnection_queue_number']).execute()
print('UPDATE', response, '\n')
Expand Down
39 changes: 36 additions & 3 deletions api/webscraper/nyiso_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,52 @@
import pandas as pd
from io import BytesIO
import json
from utils.scraper_utils import clean_df_data

renewable_energy_set = {'Hydroelectric', 'Land Based Wind', 'Offshore Wind', 'Solar', 'Geothermal', 'Energy Storage', 'Pumped Storage'}

renewable_energy_map = {
'H': 'Hydroelectric',
'S': 'Solar',
'ES': 'Energy Storage',
'PS': 'Pumped Storage',
'OSW': 'Offshore Wind',
}

def query_nyiso():
nyiso = requests.get('https://www.nyiso.com/documents/20142/1407078/NYISO-Interconnection-Queue.xlsx')
nyiso_data = nyiso.content
nyiso_df = pd.read_excel(BytesIO(nyiso_data))

# TODO: if needed later, here is the code to read the data by sheets

# all_sheets = pd.read_excel(BytesIO(nyiso_data), sheet_name=None)

# sheet_names = list(all_sheets.keys())
# interconnection_queue_key = sheet_names[0]
# cluster_projects_key = sheet_names[1]

# interconnection_queue_df = all_sheets[interconnection_queue_key] # Interconnection Queue
# cluster_projects_df = all_sheets[cluster_projects_key] # Cluster Projects

# interconnection_queue_df = clean_df_data(interconnection_queue_df)
# cluster_projects_df = clean_df_data(cluster_projects_df)

# interconnection_queue_dict = interconnection_queue_df.to_dict(orient='records')
# cluster_projects_dict = cluster_projects_df.to_dict(orient='records')

nyiso_df.dropna(subset=['Project Name'], inplace=True) # drops rows of xlsx that don't correspond to project data
nyiso_df.fillna(None, inplace=True) # replaces NaN values with None
nyiso_df = nyiso_df.where(pd.notna(nyiso_df), None) # replaces NaN values with None
nyiso_df.replace(to_replace=['', 'N/A', 'n/a', 'NAN', 'n/a'], value=None, inplace=True)
nyiso_list = nyiso_df.to_dict(orient='records')

filtered_list = []
for item in nyiso_list:
if item.get('Type/ Fuel', None) not in renewable_energy_map.keys():
continue
project_dict = {'project_name': item.get('Project Name', None),
'project_status': 'Proposed', # missing
'renewable_energy_technology': item.get('Type/ Fuel', None),
'project_status': 'Proposed', # TODO: update this based on which sheet it's from
'renewable_energy_technology': renewable_energy_map[item.get('Type/ Fuel')], # map abbreviations into readable string
'size': item.get('SP (MW)', None),
'developer': item.get('Developer Name', None),
'proposed_cod': item.get('Proposed COD', None), # note: non-serializable into JSON --> can't directly write to file
Expand All @@ -35,6 +66,8 @@ def query_nyiso():

return filtered_list

print(query_nyiso())

def write_nyiso_to_json():
data = query_nyiso()
print(data)
Expand Down
Loading

0 comments on commit 3811662

Please sign in to comment.