From e07abf35ac9cd9d3c5456c73b2840b4f17092ddb Mon Sep 17 00:00:00 2001 From: Timo Clemens <43761260+clemone210@users.noreply.github.com> Date: Thu, 10 Aug 2023 11:13:14 +0200 Subject: [PATCH] first commit Contains working code to extract only the references and their specific cost which have been invoiced. The output will be .xlsx file. --- main.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 ++ 2 files changed, 46 insertions(+) create mode 100644 main.py create mode 100644 requirements.txt diff --git a/main.py b/main.py new file mode 100644 index 0000000..3b6187c --- /dev/null +++ b/main.py @@ -0,0 +1,44 @@ +import os +import pandas as pd + +# List all files in the 'invoices' directory with a .csv extension +invoice_files = [f for f in os.listdir('invoices') if f.endswith('.csv')] + +# Check if there are any invoice files +if not invoice_files: + print("No invoice files found.") + exit() + +all_dataframes = [] + +for file in invoice_files: + path = os.path.join('invoices', file) + df = pd.read_csv(path, delimiter=';') + + # Filter rows where 'Leistungsartenbezeichnung' is not empty + df = df[df['Leistungsartenbezeichnung'].notna()] + + all_dataframes.append(df) + +# Concatenate all the dataframes +final_df = pd.concat(all_dataframes, ignore_index=True) + +# Create the detailed overview DataFrame +output_df = final_df[['Erstauftragsnummer', 'Leistungsartenbezeichnung', 'Leistungsarten-Nettobetrag']] + +# Ensure the 'Leistungsarten-Nettobetrag' column is numeric + +# Convert using comma as a decimal separator (European format), for instance +final_df['Leistungsarten-Nettobetrag'] = pd.to_numeric(final_df['Leistungsarten-Nettobetrag'].str.replace(',', '.'), errors='coerce') + +# Check if there are any NaN values after conversion +if final_df['Leistungsarten-Nettobetrag'].isna().any(): + print("Warning: Some values in 'Leistungsarten-Nettobetrag' could not be converted to numeric format. They have been set to NaN.") + +# Sum the cost for each "Erstauftragsnummer" +summary_df = final_df.groupby('Erstauftragsnummer').agg({'Leistungsarten-Nettobetrag': 'sum'}).reset_index() + +# Save both DataFrames to separate sheets in a single Excel file +with pd.ExcelWriter('Invoice-overview.xlsx') as writer: + output_df.to_excel(writer, sheet_name='Detailed Overview', index=False) + summary_df.to_excel(writer, sheet_name='Cost Summary', index=False) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2f5b8d3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +pandas +openpyxl \ No newline at end of file