Skip to content

Commit

Permalink
Add transit fare scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
jpfleischer committed Dec 23, 2024
1 parent 251890f commit 9aa2bd6
Show file tree
Hide file tree
Showing 18 changed files with 36,411 additions and 12,679 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ test_js*
shapefile
*.png
*.pdf
data/
*chloropleth_map.html
*.gif
denver/
15,211 changes: 15,211 additions & 0 deletions deprecated/headway.ipynb

Large diffs are not rendered by default.

4,296 changes: 1,850 additions & 2,446 deletions scripts/ntd.ipynb

Large diffs are not rendered by default.

885 changes: 885 additions & 0 deletions scripts/otp/aws_links.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion scripts/otp/scrape.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
"version": "3.12.7"
}
},
"nbformat": 4,
Expand Down
885 changes: 885 additions & 0 deletions scripts/otp/transitfeeds_links.json

Large diffs are not rendered by default.

274 changes: 274 additions & 0 deletions src/emcommon/metrics/transit/README.md

Large diffs are not rendered by default.

259 changes: 259 additions & 0 deletions src/emcommon/metrics/transit/deprecated_transit.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# example= {\"adult\": 1.00,\n",
"# \"senior\": 0.50,\n",
"# \"student\": 0.50,\n",
"# \"medicare_medicaid\": 0.50,\n",
"# \"veterans_active_military\": 0.50,\n",
"# \"pass\": {\"duration_hours\": 24, \"cost\": 3.00},\n",
"# \"school_free_fare\": True, # Northern Illinois University (NIU) students ride free with valid NIU OneCard\n",
"# \"live_fare_link\": \"https://www.cityofdekalb.com/430/Bus-Routes-and-Schedules\"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"from openai import OpenAI\n",
"from pprint import pprint\n",
"\n",
"# Initialize the OpenAI client\n",
"api_key = 'whoknowsthekey?'\n",
"client = OpenAI(api_key=api_key)\n",
"\n",
"# File path for saving the fare data\n",
"output_file = 'fare_data.json'\n",
"\n",
"# Function to load existing fare data if the JSON file exists\n",
"def load_existing_data(file_path):\n",
" if os.path.exists(file_path):\n",
" with open(file_path, 'r') as f:\n",
" return json.load(f)\n",
" return {}\n",
"\n",
"# Function to call OpenAI API and fetch fare data for each agency\n",
"def fetch_fare_data(dba):\n",
" # dba = \"City of Cedar Rapids, dba: Cedar Rapids Transit\"\n",
" prompt = f\"\"\"\n",
" Please format your response as a python dictionary.\n",
"\n",
" Give me the {dba}'s current live link to its Fares about its bus service. \n",
" Do not use the DoT's data, use the {dba}'s own website. \n",
" For example if the adult fare is $0.50 then make \"adult\": 0.50. If the senior fare is free then do \"senior\": 0.00, if it is 0.25 then do \"senior\": 0.25.\n",
" If there is a special extended duration pass, do \"pass\": {{\"duration_hours\": 24, \"cost\": 3.00}} for example.\n",
" if there isnt, do \"pass\": None\n",
" If there is \"medicare_medicaid\", include it. If there is \"veterans_active_military\", include it. \n",
" If there is a special collaboration with a university that gives free fare, say \"school_free_fare\": True, else make it False.\n",
" If there is a \"student\" cost, include it. Also student pass, \"student_pass\": {{\"duration\": \"semester\", \"cost\": 60.00}} for example.\n",
" If there is a link to the live fares, include it as \"live_fare_link\": \"mylinkhere\"\n",
" If there is a rate depending on age that notes specific years, do \"age_discount\": {{\"12-18\": 0.50, \"65+\": 0.50}}, \n",
" or if it is a child boarding with an adult, do \"age_discount\": {{\"with_adult\": 0.00}}, or \"age_discount\": {{\"may_qualify\": True}} if special terms.\n",
"\n",
" Lastly, give me any notes i may want to know, for example, fares suspended due to COVID, with \"notes\": \"your cool notes here\"\n",
" DO NOT INCLUDE FARES RELATING TO PARATRANSIT. BUS ONLY.\n",
" \"\"\"\n",
" \n",
" chat_completion = client.chat.completions.create(\n",
" messages=[\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": prompt,\n",
" }\n",
" ],\n",
" model=\"chatgpt-4o-latest\",\n",
" )\n",
" # print(chat_completion)\n",
" response = eval(chat_completion.choices[0].message.content) # Correct access\n",
" pprint(response)\n",
" return response\n",
"\n",
"# Function to save fare data to a JSON file\n",
"def save_fare_data(file_path, fare_data):\n",
" with open(file_path, 'w') as f:\n",
" json.dump(fare_data, f, indent=4)\n",
"\n",
"# Function to append fare data for a given number of rows\n",
"def append_fare_data(df, num_rows):\n",
" # Load existing fare data\n",
" fare_data = load_existing_data(output_file)\n",
" \n",
" # Find where we left off\n",
" processed_agencies = set(fare_data.keys())\n",
" new_rows = df[~df['Agency'].isin(processed_agencies)].head(num_rows)\n",
"\n",
" # Process each new row\n",
" for index, row in new_rows.iterrows():\n",
" dba = row['Agency']\n",
" \n",
" # Fetch fare data from API\n",
" fare_info = fetch_fare_data(dba)\n",
" \n",
" # Add the fare data to the dictionary\n",
" fare_data[dba] = fare_info\n",
"\n",
" # Save the updated fare data to the JSON file\n",
" save_fare_data(output_file, fare_data)\n",
"\n",
" print(f\"Processed {len(new_rows)} new rows.\")\n",
"\n",
"# Example usage: proceed with the next 6 rows\n",
"# append_fare_data(sorted_df, 4)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import tkinter as tk\n",
"from tkinter import messagebox\n",
"import pyperclip\n",
"\n",
"# File path for saving the fare data\n",
"output_file = 'fare_data.json'\n",
"\n",
"# Function to load existing fare data if the JSON file exists\n",
"def load_existing_data(file_path):\n",
" if os.path.exists(file_path):\n",
" with open(file_path, 'r') as f:\n",
" return json.load(f)\n",
" return {}\n",
"\n",
"# Function to save fare data to a JSON file\n",
"def save_fare_data(file_path, fare_data):\n",
" with open(file_path, 'w') as f:\n",
" json.dump(fare_data, f, indent=4)\n",
"\n",
"# Function to handle the Copy button click\n",
"def copy_prompt(dba):\n",
" prompt = f\"\"\"\n",
" Please format your response as a python dictionary.\n",
"\n",
" Give me the {dba}'s current live link to its Fares about its bus service. \n",
" Do not use the DoT's data, use the {dba}'s own website. \n",
" For example if the adult fare is $0.50 then make \"adult\": 0.50. If the senior fare is free then do \"senior\": 0.00, if it is 0.25 then do \"senior\": 0.25.\n",
" However, if the pass is a range, such as it costs money depending on how much zones you pass through,\n",
" provide a range of costs, if you are able to.\n",
" If there is a special extended duration pass, do \"pass\": {{\"duration_hours\": 24, \"cost\": 3.00}} for example.\n",
" if there isnt, do \"pass\": None\n",
" If there is \"medicare_medicaid\", include it. If there is \"veterans_active_military\", include it. \n",
" If there is a special collaboration with a university that gives free fare, say \"school_free_fare\": True, else make it False.\n",
" If there is a \"student\" cost, include it. Also student pass, \"student_pass\": {{\"duration\": \"semester\", \"cost\": 60.00}} for example.\n",
" If there is a link to the live fares, include it as \"live_fare_link\": \"mylinkhere\"\n",
" If there is a rate depending on age that notes specific years, do \"age_discount\": {{\"12-18\": 0.50, \"65+\": 0.50}}, \n",
" or if it is a child boarding with an adult, do \"age_discount\": {{\"with_adult\": 0.00}}, or \"age_discount\": {{\"may_qualify\": True}} if special terms.\n",
"\n",
" Lastly, give me any notes i may want to know, for example, fares suspended due to COVID, with \"notes\": \"your cool notes here\"\n",
" and give the copy-pasted data in \"data\": \"your data here\" so i can confirm.\n",
" You are supposed to rip it straight from the webpage for the data field. Copy and paste what you found.\n",
" DO NOT FORGET THE DATA FIELD.\n",
" DO NOT INCLUDE FARES RELATING TO PARATRANSIT. BUS ONLY.\n",
" \"\"\"\n",
" pyperclip.copy(prompt)\n",
" # messagebox.showinfo(\"Copied\", \"Prompt copied to clipboard!\")\n",
"\n",
"# Function to handle the Save button click\n",
"def save_data(dba, text_box, counter_label, current_index, total, window):\n",
" data = text_box.get(\"1.0\", tk.END).strip()\n",
" \n",
" try:\n",
" fare_info = eval(data) # Ensure the data is a valid dictionary\n",
" fare_data = load_existing_data(output_file)\n",
" fare_data[dba] = fare_info\n",
" save_fare_data(output_file, fare_data)\n",
" \n",
" # Update the counter label\n",
" counter_label.config(text=f\"{current_index + 1} / {total} to go\")\n",
" \n",
" # Close the current window to proceed to the next agency\n",
" window.destroy()\n",
" \n",
" except Exception as e:\n",
" messagebox.showerror(\"Error\", f\"Failed to save data: {e}\")\n",
"\n",
"# Function to create the GUI for each DBA\n",
"def show_dba_window(dba, current_index, total):\n",
" window = tk.Tk()\n",
" window.title(f\"Fare Data for {dba}\")\n",
"\n",
" # Display the DBA label\n",
" label = tk.Label(window, text=f\"Current DBA: {dba}\")\n",
" label.pack(pady=10)\n",
"\n",
" # Copy button\n",
" copy_button = tk.Button(window, text=\"Copy Prompt\", command=lambda: copy_prompt(dba))\n",
" copy_button.pack(pady=5)\n",
"\n",
" # Text box for pasting and saving the dictionary\n",
" text_box = tk.Text(window, height=15, width=70)\n",
" text_box.pack(pady=10)\n",
"\n",
" # Counter label showing the current DBA progress\n",
" counter_label = tk.Label(window, text=f\"{current_index + 1} / {total} to go\")\n",
" counter_label.pack(pady=5)\n",
"\n",
" # Save button\n",
" save_button = tk.Button(window, text=\"Save Data\", command=lambda: save_data(dba, text_box, counter_label, current_index, total, window))\n",
" save_button.pack(pady=5)\n",
"\n",
" # Bind Enter key to replicate Save button click\n",
" window.bind('<Return>', lambda event: save_data(dba, text_box, counter_label, current_index, total, window))\n",
"\n",
" window.mainloop()\n",
"\n",
"# Function to iterate over the DataFrame and show a window for each DBA\n",
"def process_dbas(df):\n",
" # Load existing fare data\n",
" fare_data = load_existing_data(output_file)\n",
" \n",
" # Filter the rows to process\n",
" new_rows = df[~df['Agency'].isin(fare_data.keys())]\n",
" \n",
" total_rows = len(new_rows)\n",
"\n",
" # Loop through each unprocessed DBA row\n",
" for current_index, (index, row) in enumerate(new_rows.iterrows()):\n",
" dba = row['Agency']\n",
" show_dba_window(dba, current_index, total_rows)\n",
"\n",
"\n",
"# Example usage: start processing all DBAs\n",
"# process_dbas(df)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# Assuming df is already defined\n",
"# Save the DataFrame to a pickle file\n",
"df.to_pickle('ntd.pkl')\n",
"\n"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
28 changes: 28 additions & 0 deletions src/emcommon/metrics/transit/fare_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"MTA New York City Transit": {
"adult": 2.9,
"senior": 1.45,
"student": 0.0,
"age_discount": {
"65+": 1.45,
"with_adult": 0.0
},
"pass": {
"duration_hours": null,
"cost": null
},
"7_day_pass": 34.0,
"30_day_pass": 132.0,
"express_bus_pass": {
"duration_days": 7,
"cost": 64.0
},
"medicare_medicaid": null,
"veterans_active_military": null,
"school_free_fare": true,
"student_pass": null,
"live_fare_link": "https://new.mta.info/fares/subways-and-buses",
"notes": "Q70 LaGuardia Link service is fare-free. Fare capping applies for OMNY."
},
"New Jersey Transit Corporation": "No"
}
Loading

0 comments on commit 9aa2bd6

Please sign in to comment.