diff --git a/etl/README.md b/etl/README.md
index 0a5ea2505..06c8d2b19 100644
--- a/etl/README.md
+++ b/etl/README.md
@@ -8,6 +8,7 @@ The scripts in this directory are used to extract, transform and load (ETL) the
 - :penguin: [Making data available to Ubuntu](#penguin-making-data-available-to-ubuntu)
 - :new_moon: [Creating a Colouring London database from scratch](#new_moon-creating-a-colouring-london-database-from-scratch)
 - :full_moon: [Updating the Colouring London database with new OS data](#full_moon-updating-the-colouring-london-database-with-new-os-data)
+- ⚡ [Adding EPC data](#-adding-epc-data)
 
 # :arrow_down: Downloading Ordnance Survey data
 
@@ -175,4 +176,32 @@ Mark buildings with geometries not present in the update as demolished.
 
 **TODO:** Update this after PR [#794](https://github.com/colouring-cities/colouring-london/pull/794)
 
-Run the Colouring London [deployment scripts](https://github.com/colouring-cities/colouring-london-config#deployment).
\ No newline at end of file
+Run the Colouring London [deployment scripts](https://github.com/colouring-cities/colouring-london-config#deployment).
+
+# ⚡ Adding EPC data
+
+Download the EPC data.
+
+```
+git clone https://github.com/iagw/colouring-cities
+```
+
+Copy `gla-epc-subset.zstd.parquet` into `colouring-london/etl`.
+
+```
+cp /path/to/gla-epc-subset.zstd.parquet /path/to/colouring-london/etl
+```
+
+Run a conversion to csv (make sure you have an up to date Python 3 environment and pip installation and run `pip install -r requirements.txt` first if you haven't already).
+
+```
+python clean_epc_data.py
+```
+
+This should have created a csv in the `/etl` dir called `'gla-epc-subset.csv'`.
+
+Create a new table for the EPC data and load the csv data into it (if you didn't already, don't forget to change the permissions so this file can be run `chmod +x *.sh`)
+
+```
+./load_epc.sh
+```
\ No newline at end of file
diff --git a/etl/__init__.py b/etl/__init__.py
index a9f46b583..1cceea6af 100644
--- a/etl/__init__.py
+++ b/etl/__init__.py
@@ -1 +1,2 @@
-from .filter_mastermap import filter_mastermap
\ No newline at end of file
+from .filter_mastermap import filter_mastermap
+from .epc_cleaning_functions import floor_level_to_int, construction_to_int
\ No newline at end of file
diff --git a/etl/clean_epc_data.py b/etl/clean_epc_data.py
new file mode 100644
index 000000000..921b206a3
--- /dev/null
+++ b/etl/clean_epc_data.py
@@ -0,0 +1,36 @@
+# # Instructions
+#
+# 1. Download the GLA EPC data from GitHub in parquet format:
+#     github.com/iagw/colouring-cities/blob/master/gla-epc-subset.zstd.parquet
+# 2. Place the file in `colouring-london/etl`
+# 3. Run this script to convert it to CSV for easy loading into Postgres
+
+import pandas as pd
+from epc_cleaning_functions import floor_level_to_int, construction_to_int
+
+gla = pd.read_parquet('gla-epc-subset.zstd.parquet')
+
+# Remove invalid CURRENT_ENERGY_RATING
+gla = gla.replace('INVALID!', None)
+
+# Clean the FLOOR_LEVEL column
+gla['FLOOR_LEVEL'] = gla['FLOOR_LEVEL'].apply(floor_level_to_int)
+
+# Clean the CONSTRUCTION_AGE_BAND column
+gla['CONSTRUCTION_AGE_BAND'] = gla['CONSTRUCTION_AGE_BAND'].apply(construction_to_int)  # noqa: E501
+
+# Remove NaNs and non finite values
+with pd.option_context('mode.use_inf_as_null', True):
+    gla.dropna(inplace=True)
+
+# Ensure int not float
+gla['CONSTRUCTION_AGE_BAND'] = gla['CONSTRUCTION_AGE_BAND'].astype(int)
+
+# Ensure int not float
+gla['FLOOR_LEVEL'] = gla['FLOOR_LEVEL'].astype(int)
+
+# Ensure int not float
+gla['UPRN'] = gla['UPRN'].astype(int)
+
+# Export to csv
+gla.to_csv('gla-epc-subset.csv')
diff --git a/etl/epc_cleaning_functions.py b/etl/epc_cleaning_functions.py
new file mode 100644
index 000000000..ec5f8dc59
--- /dev/null
+++ b/etl/epc_cleaning_functions.py
@@ -0,0 +1,52 @@
+def floor_level_to_int(lvl):
+    """Convert differently formatted floor level strings to ints.
+       As you can see below, there are some assumptions made such as
+       the 'top floor' being 2. This has been done so we can get an int value
+       for the floor for each building automatically populated by EPC data.
+       Incorrect assumptions can be updated later via the Colouring London
+       interface.
+    """
+    if lvl is None:
+        return None
+    elif type(lvl) == int:
+        return lvl
+    # else assume we have a string
+    ordinals = ['st', 'nd', 'rd', 'th']
+    lvl = lvl.replace('or above', '')
+    lvl = lvl.replace('+', '')
+    try:
+        return int(lvl)
+    except ValueError:
+        if 'Ground' in lvl or 'ground' in lvl:
+            lvl = 0
+        elif 'basement' in lvl or 'Basement' in lvl:
+            lvl = -1
+        elif lvl == 'mid floor':
+            lvl = 1
+        elif lvl == 'top floor':
+            lvl = 2
+        elif lvl[0] == '0' and lvl != '0':
+            lvl = lvl[1]
+        elif any(ordinal in lvl for ordinal in ordinals):
+            for ordinal in ordinals:
+                lvl = lvl.replace(ordinal, '')
+        else:
+            return None
+        return int(lvl)
+
+
+def construction_to_int(year):
+    if year is None:
+        return None
+    elif type(year) == int:
+        return year
+    # else assume we have a string
+    if 'before' in year:
+        return int(year.split('before ')[-1])
+    elif '-' in year:
+        return round(sum(list(map(float, year.split(' ')[-1].split('-'))))/2)
+    elif 'onwards' in year:
+        return int(year.split(' onwards')[-2].split(' ')[-1])
+    elif year == 'NO DATA!' or year == 'INVALID!':
+        return None
+    return int(year)
diff --git a/etl/load_epc.sh b/etl/load_epc.sh
new file mode 100644
index 000000000..1f8b97776
--- /dev/null
+++ b/etl/load_epc.sh
@@ -0,0 +1,20 @@
+psql -c "DROP TABLE IF EXISTS epc;"
+
+# Create EPC data table
+## construction_age_band should match date_year in buildings table
+## uprn and toid can also be linked to building table
+psql -c "
+CREATE TABLE epc (
+    index integer,
+    current_energy_rating char(1),
+    lodgement_date timestamp,
+    floor_level integer,
+    construction_age_band smallint,
+    uprn bigint,
+    epc_data_from_file varchar,
+    toid varchar
+);
+"
+
+# Read in the EPC data
+psql -c "\copy epc FROM 'gla-epc-subset.csv' DELIMITER ',' CSV HEADER;"
\ No newline at end of file
diff --git a/etl/requirements.txt b/etl/requirements.txt
index 551883598..233e13ad8 100644
--- a/etl/requirements.txt
+++ b/etl/requirements.txt
@@ -5,3 +5,7 @@ psycopg2==2.7.5
 shapely==1.7
 retrying==1.3.3
 requests==2.23.0
+pyarrow
+fastparquet
+cython
+pandas
\ No newline at end of file
diff --git a/tests/test_epc.py b/tests/test_epc.py
new file mode 100644
index 000000000..cd810fed8
--- /dev/null
+++ b/tests/test_epc.py
@@ -0,0 +1,21 @@
+import pytest
+from etl import floor_level_to_int, construction_to_int
+
+
+def test_floor_level_to_int():
+    """Test that differently formatted floors can correctly converted."""
+    test_levels = ['01', '02', '1st', '2nd', '3rd', '4th', '1', '2', '0',
+                   'Ground', 'NODATA!', 'mid floor', 'Basement', 'ground floor', '21st or above',
+                   'top floor', '00', '20+', None, 5]
+    expected = [1, 2, 1, 2, 3, 4, 1, 2, 0, 0, None, 1, -1, 0, 21, 2, 0, 20, None, 5]
+    for lvl, ex in zip(test_levels, expected):
+        assert floor_level_to_int(lvl) == ex
+        
+        
+def test_construction_to_int():
+    """Test that differently formatted construction ages can correctly converted."""
+    test_dates = ['England and Wales: before 1900', None, 'England and Wales: 1991-1996',
+                  'NO DATA!', 'England and Wales: 2007 onwards', 'INVALID!', '1950']
+    expected  = [1900, None, 1994, None, 2007, None, 1950]
+    for date, ex in zip(test_dates, expected):
+        assert construction_to_int(date) == ex
\ No newline at end of file