Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix for transport data url not working #362

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions data/temp_hard_coded/transport_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
country,number cars,average fuel efficiency
AL,563106,0.4
AR,21633587,0.758
AU,18326236,0.753
AT,7421647,0.634
AZ,1330551,0.755
BD,2879708,0.858
BY,4192291,0.795
BE,7330718,0.714
BJ,469761,0.324
BO,1711005,0.593
BA,978229,0.863
BW,653274,0.679
BR,93867016,0.552
BG,4031748,0.805
CI,905537,0.689
KH,3751715,0.364
CM,758145,0.462
CA,23923806,0.682
CL,4960945,0.689
CN,294694457,0.914
CO,13477996,0.588
CR,1991398,0.314
HR,1996056,0.634
CU,633369,0.957
CY,650805,0.696
CZ,7325789,0.83
DK,3131673,0.671
DO,3854038,0.737
EC,1925368,0.583
EG,8412673,0.775
SV,1008080,0.519
ER,72405,0.696
EE,865040,0.873
ET,708416,0.531
FI,5217850,0.761
FR,42363000,0.576
GE,1126470,0.578
DE,56622000,0.786
GH,2066943,0.446
GR,9489299,0.752
GT,3250194,0.6
HN,1694504,0.605
HU,4022798,0.728
IS,289501,0.598
IN,210023289,0.885
ID,128398594,0.692
IR,30377065,0.751
IQ,5775777,0.794
IE,2573961,0.679
IL,3239305,0.739
IT,52581575,0.67
JM,541316,0.757
JP,81602046,0.825
JO,1502420,0.708
KZ,4383120,0.939
KE,2979910,0.471
KW,2001940,0.847
KG,993000,0.722
LV,803628,0.571
LB,1866407,0.756
LY,3553497,0.607
LT,1391568,0.538
LU,466472,0.332
MY,27613120,0.703
MT,358947,0.769
MU,507676,0.745
MX,40205671,0.649
MN,841537,0.889
ME,211219,0.766
MA,3791469,0.712
MZ,698864,0.432
MM,6381136,0.634
NA,371281,0.446
NP,2339169,0.56
NL,10757655,0.801
NZ,3656300,0.551
NE,436420,0.408
NG,11733425,0.646
MK,442962,0.785
NO,3969612,0.602
OM,1370913,0.792
PK,18352500,0.711
PA,1288573,0.636
PY,1871947,0.069
PE,5604789,0.597
PH,9251565,0.722
PL,27409106,0.843
PT,6590094,0.633
QA,1330487,0.818
KR,25680967,0.837
MD,894253,0.753
RO,7014661,0.776
RU,54014259,0.838
SA,6895799,0.741
SN,468051,0.611
RS,2282401,0.845
SG,933534,0.848
SK,2606412,0.793
SI,1468439,0.585
SS,69647,0.349
ES,32986384,0.647
LK,6795469,0.523
SD,1252740,0.408
SR,228388,0.663
SE,6102914,0.467
CH,5980512,0.552
SY,2396544,0.76
TR,21090424,0.802
TJ,439972,0.665
TH,37338139,0.753
TG,64118,0.227
TT,831803,0.862
TN,2015601,0.747
UA,14433709,0.886
AE,3391125,0.79
GB,38388214,0.715
TZ,2163623,0.424
US,281312446,0.666
UY,2342026,0.447
VE,7999760,0.677
VN,50666855,0.777
ZW,1198584,0.777
68 changes: 42 additions & 26 deletions scripts/prepare_transport_data_input.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import logging
import os
import shutil
from pathlib import Path

import country_converter as coco
Expand All @@ -21,7 +22,16 @@ def download_number_of_vehicles():
"""
fn = "https://apps.who.int/gho/athena/data/GHO/RS_194?filter=COUNTRY:*&ead=&x-sideaxis=COUNTRY;YEAR;DATASOURCE&x-topaxis=GHO&profile=crosstable&format=csv"
storage_options = {"User-Agent": "Mozilla/5.0"}
Nbr_vehicles_csv = pd.read_csv(fn, storage_options=storage_options, encoding="utf8")

# Read the 'Data' sheet directly from the csv file at the provided URL
try:
Nbr_vehicles_csv = pd.read_csv(
fn, storage_options=storage_options, encoding="utf8"
)
print("File read successfully.")
except Exception as e:
print("Failed to read the file:", e)
return pd.DataFrame()

Nbr_vehicles_csv = Nbr_vehicles_csv.rename(
columns={
Expand Down Expand Up @@ -110,28 +120,34 @@ def download_CO2_emissions():
# Downloaded and prepare CO2_emissions_csv:
CO2_emissions_csv = download_CO2_emissions().copy()

# Join the DataFrames by the 'country' column
merged_df = pd.merge(vehicles_csv, CO2_emissions_csv, on="country")
merged_df = merged_df[["country", "number cars", "average fuel efficiency"]]

# drop rows with NaN values in 'average fuel efficiency'
merged_df = merged_df.dropna(subset=["average fuel efficiency"])

# Convert the 'average fuel efficiency' to float
merged_df["average fuel efficiency"] = merged_df["average fuel efficiency"].astype(
float
)

# Round the 'average fuel efficiency' to three decimal places
merged_df.loc[:, "average fuel efficiency"] = merged_df[
"average fuel efficiency"
].round(3)

# Save
merged_df.to_csv(
snakemake.output.transport_data_input,
sep=",",
encoding="utf-8",
header="true",
index=False,
)
if vehicles_csv.empty or CO2_emissions_csv.empty:
# In case one of the urls is not working, we can use the hard-coded data
src = os.getcwd() + "/data/temp_hard_coded/transport_data.csv"
dest = snakemake.output.transport_data_input
shutil.copy(src, dest)
else:
# Join the DataFrames by the 'country' column
merged_df = pd.merge(vehicles_csv, CO2_emissions_csv, on="country")
merged_df = merged_df[["country", "number cars", "average fuel efficiency"]]

# Drop rows with NaN values in 'average fuel efficiency'
merged_df = merged_df.dropna(subset=["average fuel efficiency"])

# Convert the 'average fuel efficiency' to float
merged_df["average fuel efficiency"] = merged_df[
"average fuel efficiency"
].astype(float)

# Round the 'average fuel efficiency' to three decimal places
merged_df.loc[:, "average fuel efficiency"] = merged_df[
"average fuel efficiency"
].round(3)

# Save the merged DataFrame to a CSV file
merged_df.to_csv(
snakemake.output.transport_data_input,
sep=",",
encoding="utf-8",
header="true",
index=False,
)
Loading