From 112f4b594e5fc78f5a01f9043e477f2a95af1268 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Tue, 7 Dec 2021 14:06:51 +0000
Subject: [PATCH 01/20] Adding labelling disaggregation to spatial_aggregate +
 initial test

---
 .../features/location/spatial_aggregate.py    | 22 +++++++++++++++----
 flowmachine/tests/test_spatial_aggregate.py   | 20 +++++++++++++++++
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/flowmachine/flowmachine/features/location/spatial_aggregate.py b/flowmachine/flowmachine/features/location/spatial_aggregate.py
index 0383beca5b..49cefa5f13 100644
--- a/flowmachine/flowmachine/features/location/spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/spatial_aggregate.py
@@ -18,31 +18,45 @@ class SpatialAggregate(GeoDataMixin, Query):
     Parameters
     ----------
     locations : subscriber location query
+    subscriber_label: A query with a  column to further disaggregate by
+    label_column: Name of the column containing the label: defaults to 'value'
     """
 
-    def __init__(self, *, locations):
+    def __init__(self, *, locations, subscriber_labels=None, label_column="value"):
 
         self.locations = locations
         # self.spatial_unit is used in self._geo_augmented_query
         self.spatial_unit = locations.spatial_unit
+        self.subscriber_labels = subscriber_labels
+        self.label_column = label_column
         super().__init__()
 
     @property
     def column_names(self) -> List[str]:
+        # Do we want to return the labels column as well?
         return self.spatial_unit.location_id_columns + ["value"]
 
     def _make_query(self):
 
         aggregate_cols = ",".join(self.spatial_unit.location_id_columns)
 
+        if self.subscriber_labels:
+            label_clause = f"JOIN ({self.subscriber_labels.get_query()}) AS labels USING (subscriber)"
+            label_select_clause = f", labels.{self.label_column} AS label"
+            label_agg_clause = f", label"
+        else:
+            label_clause = ""
+            label_select_clause = ""
+            label_agg_clause = ""
+
         sql = f"""
         SELECT
-            {aggregate_cols},
+            {aggregate_cols}{label_select_clause},
             count(*) AS value
         FROM
-            ({self.locations.get_query()}) AS to_agg
+            ({self.locations.get_query()})  AS to_agg {label_clause}
         GROUP BY
-            {aggregate_cols}
+            {aggregate_cols}{label_agg_clause}
         """
 
         return sql
diff --git a/flowmachine/tests/test_spatial_aggregate.py b/flowmachine/tests/test_spatial_aggregate.py
index 657d0237f7..8468336821 100644
--- a/flowmachine/tests/test_spatial_aggregate.py
+++ b/flowmachine/tests/test_spatial_aggregate.py
@@ -8,6 +8,7 @@
     daily_location,
     SubscriberHandsetCharacteristic,
 )
+from flowmachine.features.location.spatial_aggregate import SpatialAggregate
 from flowmachine.features.location.joined_spatial_aggregate import (
     JoinedSpatialAggregate,
 )
@@ -62,3 +63,22 @@ def test_can_be_aggregated_admin3_distribution(get_dataframe):
     df = get_dataframe(agg)
     assert ["pcod", "metric", "key", "value"] == list(df.columns)
     assert all(df[df.metric == "value"].groupby("pcod").sum() == 1.0)
+
+
+def test_labelled_spatial_aggregate(get_dataframe):
+    """
+    Tests disaggregation by a label query
+    """
+    locations = locate_subscribers(
+        "2016-01-01",
+        "2016-01-02",
+        spatial_unit=make_spatial_unit("admin", level=3),
+        method="most-common",
+    )
+    metric = SubscriberHandsetCharacteristic(
+        "2016-01-01", "2016-01-02", characteristic="hnd_type"
+    )
+    labelled = SpatialAggregate(locations=locations, subscriber_labels=metric)
+    df = get_dataframe(labelled)
+    assert len(df) == 50
+    assert len(df.pcod.unique()) == 25

From ffdca9347d16b27e47de4fd8fac22eda103ac438 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Tue, 7 Dec 2021 15:12:11 +0000
Subject: [PATCH 02/20] Spatial_aggregate returns labels if used

---
 .../flowmachine/features/location/spatial_aggregate.py       | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/flowmachine/flowmachine/features/location/spatial_aggregate.py b/flowmachine/flowmachine/features/location/spatial_aggregate.py
index 49cefa5f13..61dba60df2 100644
--- a/flowmachine/flowmachine/features/location/spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/spatial_aggregate.py
@@ -34,7 +34,10 @@ def __init__(self, *, locations, subscriber_labels=None, label_column="value"):
     @property
     def column_names(self) -> List[str]:
         # Do we want to return the labels column as well?
-        return self.spatial_unit.location_id_columns + ["value"]
+        if self.subscriber_labels:
+            return self.spatial_unit.location_id_columns + ["label", "value"]
+        else:
+            return self.spatial_unit.location_id_columns + ["value"]
 
     def _make_query(self):
 

From 7c6b56c82a53fb33d88330c2f6ac299faf5ba23a Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Tue, 7 Dec 2021 15:13:10 +0000
Subject: [PATCH 03/20] Spatial_aggregate returns labels if used

---
 flowmachine/tests/test_spatial_aggregate.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flowmachine/tests/test_spatial_aggregate.py b/flowmachine/tests/test_spatial_aggregate.py
index 8468336821..d7a62a77b3 100644
--- a/flowmachine/tests/test_spatial_aggregate.py
+++ b/flowmachine/tests/test_spatial_aggregate.py
@@ -82,3 +82,4 @@ def test_labelled_spatial_aggregate(get_dataframe):
     df = get_dataframe(labelled)
     assert len(df) == 50
     assert len(df.pcod.unique()) == 25
+    assert len(df.label.unique()) == 2

From 96fa35f562c053526b716fcc81bf433315dcda69 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 10:32:08 +0000
Subject: [PATCH 04/20] Breaking labelled spatial aggregate into its own class
 + permitting multiple labels

---
 .../location/labelled_spatial_aggregate.py    | 56 +++++++++++++++++++
 .../features/location/spatial_aggregate.py    | 27 ++-------
 flowmachine/tests/test_spatial_aggregate.py   |  9 ++-
 3 files changed, 68 insertions(+), 24 deletions(-)
 create mode 100644 flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py

diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
new file mode 100644
index 0000000000..cf94c7a483
--- /dev/null
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -0,0 +1,56 @@
+from typing import List
+
+from flowmachine.core import Query
+from flowmachine.core.mixins import GeoDataMixin
+from flowmachine.features.location.spatial_aggregate import SpatialAggregate
+
+
+class LabelledSpatialAggregate(GeoDataMixin, Query):
+    """
+    Class represeneting a disaggregation of a SpatialAggregate by some label
+    """
+
+    def __init__(
+        self,
+        locations: Query,
+        subscriber_labels: Query,
+        label_columns: List[str] = ("value",),
+    ):
+        self.locations = locations
+        self.subscriber_labels = subscriber_labels
+        self.label_columns = list(label_columns)
+        super().__init__()
+
+    @property
+    def column_names(self) -> List[str]:
+        return (
+            self.locations.spatial_unit.location_id_columns
+            + [f"label_{label}" for label in self.label_columns]
+            + ["value"]
+        )
+
+    def _make_query(self):
+
+        aggregate_cols = ",".join(
+            f"agg.{agg_col}"
+            for agg_col in self.locations.spatial_unit.location_id_columns
+        )
+        label_select = ",".join(
+            f"labels.{label_col} AS label_{label_col}"
+            for label_col in self.label_columns
+        )
+        label_group = ",".join(
+            f"labels.{label_col}" for label_col in self.label_columns
+        )
+
+        sql = f"""
+            SELECT
+                {aggregate_cols}, {label_select}, count(*) AS value
+            FROM 
+                ({self.locations.get_query()}) AS agg
+            JOIN
+                ({self.subscriber_labels.get_query()}) AS labels USING (subscriber)
+            GROUP BY
+                {aggregate_cols}, {label_group}
+            """
+        return sql
diff --git a/flowmachine/flowmachine/features/location/spatial_aggregate.py b/flowmachine/flowmachine/features/location/spatial_aggregate.py
index 61dba60df2..0383beca5b 100644
--- a/flowmachine/flowmachine/features/location/spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/spatial_aggregate.py
@@ -18,48 +18,31 @@ class SpatialAggregate(GeoDataMixin, Query):
     Parameters
     ----------
     locations : subscriber location query
-    subscriber_label: A query with a  column to further disaggregate by
-    label_column: Name of the column containing the label: defaults to 'value'
     """
 
-    def __init__(self, *, locations, subscriber_labels=None, label_column="value"):
+    def __init__(self, *, locations):
 
         self.locations = locations
         # self.spatial_unit is used in self._geo_augmented_query
         self.spatial_unit = locations.spatial_unit
-        self.subscriber_labels = subscriber_labels
-        self.label_column = label_column
         super().__init__()
 
     @property
     def column_names(self) -> List[str]:
-        # Do we want to return the labels column as well?
-        if self.subscriber_labels:
-            return self.spatial_unit.location_id_columns + ["label", "value"]
-        else:
-            return self.spatial_unit.location_id_columns + ["value"]
+        return self.spatial_unit.location_id_columns + ["value"]
 
     def _make_query(self):
 
         aggregate_cols = ",".join(self.spatial_unit.location_id_columns)
 
-        if self.subscriber_labels:
-            label_clause = f"JOIN ({self.subscriber_labels.get_query()}) AS labels USING (subscriber)"
-            label_select_clause = f", labels.{self.label_column} AS label"
-            label_agg_clause = f", label"
-        else:
-            label_clause = ""
-            label_select_clause = ""
-            label_agg_clause = ""
-
         sql = f"""
         SELECT
-            {aggregate_cols}{label_select_clause},
+            {aggregate_cols},
             count(*) AS value
         FROM
-            ({self.locations.get_query()})  AS to_agg {label_clause}
+            ({self.locations.get_query()}) AS to_agg
         GROUP BY
-            {aggregate_cols}{label_agg_clause}
+            {aggregate_cols}
         """
 
         return sql
diff --git a/flowmachine/tests/test_spatial_aggregate.py b/flowmachine/tests/test_spatial_aggregate.py
index d7a62a77b3..3d7923b67d 100644
--- a/flowmachine/tests/test_spatial_aggregate.py
+++ b/flowmachine/tests/test_spatial_aggregate.py
@@ -15,6 +15,10 @@
 from flowmachine.features.subscriber.daily_location import locate_subscribers
 from flowmachine.utils import list_of_dates
 
+from flowmachine.features.location.labelled_spatial_aggregate import (
+    LabelledSpatialAggregate,
+)
+
 
 def test_can_be_aggregated_admin3(get_dataframe):
     """
@@ -78,8 +82,9 @@ def test_labelled_spatial_aggregate(get_dataframe):
     metric = SubscriberHandsetCharacteristic(
         "2016-01-01", "2016-01-02", characteristic="hnd_type"
     )
-    labelled = SpatialAggregate(locations=locations, subscriber_labels=metric)
+    labelled = LabelledSpatialAggregate(locations=locations, subscriber_labels=metric)
+    foo = labelled.get_query()
     df = get_dataframe(labelled)
     assert len(df) == 50
     assert len(df.pcod.unique()) == 25
-    assert len(df.label.unique()) == 2
+    assert len(df.label_value.unique()) == 2

From 0d95848bbdfa9ce86649869ef610baab1aa009c0 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 12:17:26 +0000
Subject: [PATCH 05/20] Implementing specialised redaction + test

---
 .../location/labelled_spatial_aggregate.py    | 23 ++++++--
 .../redacted_labelled_spatial_aggregate.py    | 55 +++++++++++++++++++
 .../tests/test_labelled_spatial_aggregate.py  | 25 +++++++++
 ...test_redacted_labelled_spatial_location.py | 43 +++++++++++++++
 flowmachine/tests/test_spatial_aggregate.py   | 26 ---------
 5 files changed, 140 insertions(+), 32 deletions(-)
 create mode 100644 flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
 create mode 100644 flowmachine/tests/test_labelled_spatial_aggregate.py
 create mode 100644 flowmachine/tests/test_redacted_labelled_spatial_location.py

diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
index cf94c7a483..d9f7e63772 100644
--- a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -7,7 +7,8 @@
 
 class LabelledSpatialAggregate(GeoDataMixin, Query):
     """
-    Class represeneting a disaggregation of a SpatialAggregate by some label
+    Class representing a disaggregation of a SpatialAggregate by some label or set of labels
+    Returns a query with
     """
 
     def __init__(
@@ -19,25 +20,35 @@ def __init__(
         self.locations = locations
         self.subscriber_labels = subscriber_labels
         self.label_columns = list(label_columns)
+        self.spatial_unit = locations.spatial_unit
+        self.label_columns = label_columns
+        self.out_label_columns = [
+            f"label_{label_col}" for label_col in self.label_columns
+        ]
         super().__init__()
 
     @property
     def column_names(self) -> List[str]:
         return (
-            self.locations.spatial_unit.location_id_columns
+            list(self.spatial_unit.location_id_columns)
             + [f"label_{label}" for label in self.label_columns]
             + ["value"]
         )
 
+    @property
+    def out_label_columns_as_string_list(self):
+        return ",".join(self.out_label_columns)
+
     def _make_query(self):
 
         aggregate_cols = ",".join(
-            f"agg.{agg_col}"
-            for agg_col in self.locations.spatial_unit.location_id_columns
+            f"agg.{agg_col}" for agg_col in self.spatial_unit.location_id_columns
         )
         label_select = ",".join(
-            f"labels.{label_col} AS label_{label_col}"
-            for label_col in self.label_columns
+            f"labels.{label_col} AS {out_label_col}"
+            for label_col, out_label_col in zip(
+                self.label_columns, self.out_label_columns
+            )
         )
         label_group = ",".join(
             f"labels.{label_col}" for label_col in self.label_columns
diff --git a/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
new file mode 100644
index 0000000000..c3a6ca73d3
--- /dev/null
+++ b/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
@@ -0,0 +1,55 @@
+from typing import List
+
+from flowmachine.features.location.labelled_spatial_aggregate import (
+    LabelledSpatialAggregate,
+)
+from flowmachine.core import Query
+from flowmachine.core.mixins import GeoDataMixin
+
+
+class RedactedLabelledSpatialAggregate(GeoDataMixin, Query):
+    """
+    Query that drops and reaggregates data if any 'label' column falls below redcation_threshold
+    Parameters
+    ----------
+    labelled_spatial_aggregate
+    redaction_threshold
+    """
+
+    def __init__(
+        self,
+        *,
+        labelled_spatial_aggregate: LabelledSpatialAggregate,
+        redaction_threshold=15,
+    ):
+
+        self.labelled_spatial_aggregate = labelled_spatial_aggregate
+        self.redaction_threshold = redaction_threshold
+        super().__init__()
+
+    @property
+    def column_names(self) -> List[str]:
+        return self.labelled_spatial_aggregate.column_names
+
+    def _make_query(self):
+
+        aggs = ",".join(
+            self.labelled_spatial_aggregate.spatial_unit.location_id_columns
+        )
+        labels = self.labelled_spatial_aggregate.out_label_columns_as_string_list
+        all_cols = self.labelled_spatial_aggregate.column_names_as_string_list
+
+        sql = f"""
+        WITH lsa_query AS(
+            SELECT *
+            FROM({self.labelled_spatial_aggregate.get_query()}) AS t
+        ), aggs_to_keep AS(
+            SELECT {aggs}
+            FROM lsa_query
+            GROUP BY {aggs}
+            HAVING min(value) > {self.redaction_threshold}
+        )
+        SELECT {all_cols}
+        FROM lsa_query INNER JOIN aggs_to_keep USING ({aggs})
+        """
+        return sql
diff --git a/flowmachine/tests/test_labelled_spatial_aggregate.py b/flowmachine/tests/test_labelled_spatial_aggregate.py
new file mode 100644
index 0000000000..7c11f472c4
--- /dev/null
+++ b/flowmachine/tests/test_labelled_spatial_aggregate.py
@@ -0,0 +1,25 @@
+from core import make_spatial_unit
+from features import SubscriberHandsetCharacteristic
+from features.location.labelled_spatial_aggregate import LabelledSpatialAggregate
+from features.subscriber.daily_location import locate_subscribers
+
+
+def test_labelled_spatial_aggregate(get_dataframe):
+    """
+    Tests disaggregation by a label query
+    """
+    locations = locate_subscribers(
+        "2016-01-01",
+        "2016-01-02",
+        spatial_unit=make_spatial_unit("admin", level=3),
+        method="most-common",
+    )
+    metric = SubscriberHandsetCharacteristic(
+        "2016-01-01", "2016-01-02", characteristic="hnd_type"
+    )
+    labelled = LabelledSpatialAggregate(locations=locations, subscriber_labels=metric)
+    foo = labelled.get_query()
+    df = get_dataframe(labelled)
+    assert len(df) == 50
+    assert len(df.pcod.unique()) == 25
+    assert len(df.label_value.unique()) == 2
diff --git a/flowmachine/tests/test_redacted_labelled_spatial_location.py b/flowmachine/tests/test_redacted_labelled_spatial_location.py
new file mode 100644
index 0000000000..87fe6f67c1
--- /dev/null
+++ b/flowmachine/tests/test_redacted_labelled_spatial_location.py
@@ -0,0 +1,43 @@
+import pytest
+
+import pandas as pd
+from pandas.testing import assert_frame_equal
+
+from flowmachine.core import make_spatial_unit
+from flowmachine.features import SubscriberHandsetCharacteristic
+from flowmachine.features.location.labelled_spatial_aggregate import (
+    LabelledSpatialAggregate,
+)
+from flowmachine.features.location.redacted_labelled_spatial_aggregate import (
+    RedactedLabelledSpatialAggregate,
+)
+from flowmachine.features.subscriber.daily_location import locate_subscribers
+
+
+def test_no_redaction(get_dataframe):
+
+    locations = locate_subscribers(
+        "2016-01-01",
+        "2016-01-02",
+        spatial_unit=make_spatial_unit("admin", level=3),
+        method="most-common",
+    )
+    metric = SubscriberHandsetCharacteristic(
+        "2016-01-01", "2016-01-02", characteristic="hnd_type"
+    )
+    labelled = LabelledSpatialAggregate(locations=locations, subscriber_labels=metric)
+
+    redacted = RedactedLabelledSpatialAggregate(labelled_spatial_aggregate=labelled)
+    redacted_df = get_dataframe(redacted)
+
+    target = pd.DataFrame(
+        [
+            ["524 3 08 44", "Feature", 36],
+            ["524 3 08 44", "Smart", 28],
+            ["524 4 12 62", "Feature", 44],
+            ["524 4 12 62", "Smart", 19],
+        ],
+        columns=["pcod", "label_value", "value"],
+    )
+
+    assert_frame_equal(redacted_df, target)
diff --git a/flowmachine/tests/test_spatial_aggregate.py b/flowmachine/tests/test_spatial_aggregate.py
index 3d7923b67d..657d0237f7 100644
--- a/flowmachine/tests/test_spatial_aggregate.py
+++ b/flowmachine/tests/test_spatial_aggregate.py
@@ -8,17 +8,12 @@
     daily_location,
     SubscriberHandsetCharacteristic,
 )
-from flowmachine.features.location.spatial_aggregate import SpatialAggregate
 from flowmachine.features.location.joined_spatial_aggregate import (
     JoinedSpatialAggregate,
 )
 from flowmachine.features.subscriber.daily_location import locate_subscribers
 from flowmachine.utils import list_of_dates
 
-from flowmachine.features.location.labelled_spatial_aggregate import (
-    LabelledSpatialAggregate,
-)
-
 
 def test_can_be_aggregated_admin3(get_dataframe):
     """
@@ -67,24 +62,3 @@ def test_can_be_aggregated_admin3_distribution(get_dataframe):
     df = get_dataframe(agg)
     assert ["pcod", "metric", "key", "value"] == list(df.columns)
     assert all(df[df.metric == "value"].groupby("pcod").sum() == 1.0)
-
-
-def test_labelled_spatial_aggregate(get_dataframe):
-    """
-    Tests disaggregation by a label query
-    """
-    locations = locate_subscribers(
-        "2016-01-01",
-        "2016-01-02",
-        spatial_unit=make_spatial_unit("admin", level=3),
-        method="most-common",
-    )
-    metric = SubscriberHandsetCharacteristic(
-        "2016-01-01", "2016-01-02", characteristic="hnd_type"
-    )
-    labelled = LabelledSpatialAggregate(locations=locations, subscriber_labels=metric)
-    foo = labelled.get_query()
-    df = get_dataframe(labelled)
-    assert len(df) == 50
-    assert len(df.pcod.unique()) == 25
-    assert len(df.label_value.unique()) == 2

From 66be2f0ddc1127d98557748ad0cec406dd7d5dbc Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 13:33:28 +0000
Subject: [PATCH 06/20] Fixing imports

---
 flowmachine/flowmachine/core/spatial_unit.py          |  6 +++---
 .../features/location/labelled_spatial_aggregate.py   |  1 -
 .../location/redacted_labelled_spatial_aggregate.py   |  2 +-
 flowmachine/tests/test_labelled_spatial_aggregate.py  | 11 ++++++-----
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/flowmachine/flowmachine/core/spatial_unit.py b/flowmachine/flowmachine/core/spatial_unit.py
index 3ab85fc1d8..c4a2e11b24 100644
--- a/flowmachine/flowmachine/core/spatial_unit.py
+++ b/flowmachine/flowmachine/core/spatial_unit.py
@@ -12,9 +12,9 @@
 
 from flowmachine.utils import get_name_and_alias
 from flowmachine.core.errors import InvalidSpatialUnitError
-from . import Query, Table
-from .context import get_db
-from .grid import Grid
+from flowmachine.core import Query, Table
+from flowmachine.core.context import get_db
+from flowmachine.core.grid import Grid
 
 # TODO: Currently most spatial units require a FlowDB connection at init time.
 # It would be useful to remove this requirement wherever possible, and instead
diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
index d9f7e63772..84a61e6877 100644
--- a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -8,7 +8,6 @@
 class LabelledSpatialAggregate(GeoDataMixin, Query):
     """
     Class representing a disaggregation of a SpatialAggregate by some label or set of labels
-    Returns a query with
     """
 
     def __init__(
diff --git a/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
index c3a6ca73d3..23e08b0092 100644
--- a/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
@@ -20,7 +20,7 @@ def __init__(
         self,
         *,
         labelled_spatial_aggregate: LabelledSpatialAggregate,
-        redaction_threshold=15,
+        redaction_threshold: int = 15,
     ):
 
         self.labelled_spatial_aggregate = labelled_spatial_aggregate
diff --git a/flowmachine/tests/test_labelled_spatial_aggregate.py b/flowmachine/tests/test_labelled_spatial_aggregate.py
index 7c11f472c4..bbcf977d03 100644
--- a/flowmachine/tests/test_labelled_spatial_aggregate.py
+++ b/flowmachine/tests/test_labelled_spatial_aggregate.py
@@ -1,7 +1,9 @@
-from core import make_spatial_unit
-from features import SubscriberHandsetCharacteristic
-from features.location.labelled_spatial_aggregate import LabelledSpatialAggregate
-from features.subscriber.daily_location import locate_subscribers
+from flowmachine.core import make_spatial_unit
+from flowmachine.features import SubscriberHandsetCharacteristic
+from flowmachine.features.location.labelled_spatial_aggregate import (
+    LabelledSpatialAggregate,
+)
+from flowmachine.features.subscriber.daily_location import locate_subscribers
 
 
 def test_labelled_spatial_aggregate(get_dataframe):
@@ -18,7 +20,6 @@ def test_labelled_spatial_aggregate(get_dataframe):
         "2016-01-01", "2016-01-02", characteristic="hnd_type"
     )
     labelled = LabelledSpatialAggregate(locations=locations, subscriber_labels=metric)
-    foo = labelled.get_query()
     df = get_dataframe(labelled)
     assert len(df) == 50
     assert len(df.pcod.unique()) == 25

From f289b92c55669958dceeb81141688cfc6c816343 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 13:51:47 +0000
Subject: [PATCH 07/20] Validation + test

---
 .../location/labelled_spatial_aggregate.py     |  7 +++++++
 .../tests/test_labelled_spatial_aggregate.py   | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
index 84a61e6877..ab388eec73 100644
--- a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -16,6 +16,13 @@ def __init__(
         subscriber_labels: Query,
         label_columns: List[str] = ("value",),
     ):
+        if any(
+            label_column not in subscriber_labels.column_names
+            for label_column in label_columns
+        ):
+            raise ValueError(
+                f"One of {label_columns} not a column of {subscriber_labels}"
+            )
         self.locations = locations
         self.subscriber_labels = subscriber_labels
         self.label_columns = list(label_columns)
diff --git a/flowmachine/tests/test_labelled_spatial_aggregate.py b/flowmachine/tests/test_labelled_spatial_aggregate.py
index bbcf977d03..94c874d483 100644
--- a/flowmachine/tests/test_labelled_spatial_aggregate.py
+++ b/flowmachine/tests/test_labelled_spatial_aggregate.py
@@ -1,3 +1,5 @@
+import pytest
+
 from flowmachine.core import make_spatial_unit
 from flowmachine.features import SubscriberHandsetCharacteristic
 from flowmachine.features.location.labelled_spatial_aggregate import (
@@ -24,3 +26,19 @@ def test_labelled_spatial_aggregate(get_dataframe):
     assert len(df) == 50
     assert len(df.pcod.unique()) == 25
     assert len(df.label_value.unique()) == 2
+
+
+def test_validation():
+    with pytest.raises(ValueError, match="not a column of"):
+        locations = locate_subscribers(
+            "2016-01-01",
+            "2016-01-02",
+            spatial_unit=make_spatial_unit("admin", level=3),
+            method="most-common",
+        )
+        metric = SubscriberHandsetCharacteristic(
+            "2016-01-01", "2016-01-02", characteristic="hnd_type"
+        )
+        labelled = LabelledSpatialAggregate(
+            locations=locations, subscriber_labels=metric, label_columns=["foo", "bar"]
+        )

From cddb3ee76093d2458799bacd8d8e6ba74a5ce6d4 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 14:50:01 +0000
Subject: [PATCH 08/20] Docs + tests + some renaming

---
 .../location/labelled_spatial_aggregate.py    | 40 ++++++++++++++++
 .../tests/test_labelled_spatial_aggregate.py  | 47 ++++++++++++++++++-
 ...st_redacted_labelled_spatial_aggregate.py} |  2 +-
 3 files changed, 87 insertions(+), 2 deletions(-)
 rename flowmachine/tests/{test_redacted_labelled_spatial_location.py => test_redacted_labelled_spatial_aggregate.py} (97%)

diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
index ab388eec73..090e0b3df2 100644
--- a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -8,6 +8,35 @@
 class LabelledSpatialAggregate(GeoDataMixin, Query):
     """
     Class representing a disaggregation of a SpatialAggregate by some label or set of labels
+
+    Parameters
+    ----------
+    locations: Query
+        Any query with a subscriber and location columns
+    subscriber_labels: Query
+        Any query with a subscriber column
+    label_columns: List[str]
+        A list of columns in subscriber_labels to aggregate on
+
+    Example:
+
+    >>> locations = locate_subscribers(
+    ...     "2016-01-01",
+    ...     "2016-01-02",
+    ...     spatial_unit=make_spatial_unit("admin", level=3),
+    ...     method="most-common",
+    ... )
+    >>> metric = SubscriberHandsetCharacteristic(
+    ...     "2016-01-01", "2016-01-02", characteristic="hnd_type"
+    ... )
+    >>> labelled = LabelledSpatialAggregate(locations=locations, subscriber_labels=metric)
+    >>> labelled.get_dataframe()
+               pcod label_value  value
+     0  524 3 08 44     Feature     36
+     1  524 3 08 44       Smart     28
+     2  524 4 12 62     Feature     44
+     3  524 4 12 62       Smart     19'
+
     """
 
     def __init__(
@@ -16,6 +45,7 @@ def __init__(
         subscriber_labels: Query,
         label_columns: List[str] = ("value",),
     ):
+
         if any(
             label_column not in subscriber_labels.column_names
             for label_column in label_columns
@@ -23,6 +53,13 @@ def __init__(
             raise ValueError(
                 f"One of {label_columns} not a column of {subscriber_labels}"
             )
+        if "subscriber" not in locations.column_names:
+            raise ValueError(f"Locations query must have a subscriber column")
+        if "spatial_unit" not in dir(locations):
+            raise ValueError(f"Locations must have a spatial_unit attribute")
+        if "subscriber" in label_columns:
+            raise ValueError(f"'subscriber' cannot be a label")
+
         self.locations = locations
         self.subscriber_labels = subscriber_labels
         self.label_columns = list(label_columns)
@@ -43,6 +80,9 @@ def column_names(self) -> List[str]:
 
     @property
     def out_label_columns_as_string_list(self):
+        """
+        Returns the label column heading as a single string
+        """
         return ",".join(self.out_label_columns)
 
     def _make_query(self):
diff --git a/flowmachine/tests/test_labelled_spatial_aggregate.py b/flowmachine/tests/test_labelled_spatial_aggregate.py
index 94c874d483..60dc66d7b5 100644
--- a/flowmachine/tests/test_labelled_spatial_aggregate.py
+++ b/flowmachine/tests/test_labelled_spatial_aggregate.py
@@ -1,5 +1,7 @@
 import pytest
 
+from flowmachine.core.dummy_query import DummyQuery
+from flowmachine.features import TotalLocationEvents
 from flowmachine.core import make_spatial_unit
 from flowmachine.features import SubscriberHandsetCharacteristic
 from flowmachine.features.location.labelled_spatial_aggregate import (
@@ -28,7 +30,7 @@ def test_labelled_spatial_aggregate(get_dataframe):
     assert len(df.label_value.unique()) == 2
 
 
-def test_validation():
+def test_label_validation():
     with pytest.raises(ValueError, match="not a column of"):
         locations = locate_subscribers(
             "2016-01-01",
@@ -42,3 +44,46 @@ def test_validation():
         labelled = LabelledSpatialAggregate(
             locations=locations, subscriber_labels=metric, label_columns=["foo", "bar"]
         )
+
+
+def test_loc_sub_validation():
+    with pytest.raises(ValueError, match="Locations query must have a subscriber"):
+        _ = LabelledSpatialAggregate(
+            locations=TotalLocationEvents("2016-01-01", "2016-01-02"),
+            subscriber_labels=SubscriberHandsetCharacteristic(
+                "2016-01-01", "2016-01-02", characteristic="hnd_type"
+            ),
+        )
+
+
+def test_col_sub_validation():
+    with pytest.raises(ValueError, match="cannot be a label"):
+        _ = LabelledSpatialAggregate(
+            locations=locate_subscribers(
+                "2016-01-01",
+                "2016-01-02",
+                spatial_unit=make_spatial_unit("admin", level=3),
+                method="most-common",
+            ),
+            subscriber_labels=SubscriberHandsetCharacteristic(
+                "2016-01-01", "2016-01-02", characteristic="hnd_type"
+            ),
+            label_columns=["subscriber"],
+        )
+
+
+class SubHavingQuery(DummyQuery):
+    @property
+    def column_names(self):
+        return ["subscriber"]
+
+
+def test_spatial_unit_validation():
+    dq = SubHavingQuery("foo")
+    with pytest.raises(ValueError, match="spatial_unit"):
+        _ = LabelledSpatialAggregate(
+            locations=dq,
+            subscriber_labels=SubscriberHandsetCharacteristic(
+                "2016-01-01", "2016-01-02", characteristic="hnd_type"
+            ),
+        )
diff --git a/flowmachine/tests/test_redacted_labelled_spatial_location.py b/flowmachine/tests/test_redacted_labelled_spatial_aggregate.py
similarity index 97%
rename from flowmachine/tests/test_redacted_labelled_spatial_location.py
rename to flowmachine/tests/test_redacted_labelled_spatial_aggregate.py
index 87fe6f67c1..b941f8c960 100644
--- a/flowmachine/tests/test_redacted_labelled_spatial_location.py
+++ b/flowmachine/tests/test_redacted_labelled_spatial_aggregate.py
@@ -14,7 +14,7 @@
 from flowmachine.features.subscriber.daily_location import locate_subscribers
 
 
-def test_no_redaction(get_dataframe):
+def test_redaction(get_dataframe):
 
     locations = locate_subscribers(
         "2016-01-01",

From 1e718bbff3d20a76d4dd38ec9aca6f366cbe4189 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 15:02:26 +0000
Subject: [PATCH 09/20] Headers + more docs

---
 .../features/location/labelled_spatial_aggregate.py |  3 +++
 .../location/redacted_labelled_spatial_aggregate.py | 13 ++++++++++---
 .../test_redacted_labelled_spatial_aggregate.py     |  4 ++++
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
index 090e0b3df2..693c39f8db 100644
--- a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -1,3 +1,6 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 from typing import List
 
 from flowmachine.core import Query
diff --git a/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
index 23e08b0092..e159133545 100644
--- a/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
@@ -1,3 +1,7 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 from typing import List
 
 from flowmachine.features.location.labelled_spatial_aggregate import (
@@ -9,11 +13,14 @@
 
 class RedactedLabelledSpatialAggregate(GeoDataMixin, Query):
     """
-    Query that drops and reaggregates data if any 'label' column falls below redcation_threshold
+    Query that drops any locations that, when disaggregated by label, reveal a number of subscribers
+    less than redaction_threshold
     Parameters
     ----------
-    labelled_spatial_aggregate
-    redaction_threshold
+    labelled_spatial_aggregate: LabelledSpatialAggregate
+        The LabelledSpatialAggregate query to redact
+    redaction_threshold: int default 15
+        If any labels within a location reveal less than this number of subscribers, that location is dropeed
     """
 
     def __init__(
diff --git a/flowmachine/tests/test_redacted_labelled_spatial_aggregate.py b/flowmachine/tests/test_redacted_labelled_spatial_aggregate.py
index b941f8c960..fb10b40d06 100644
--- a/flowmachine/tests/test_redacted_labelled_spatial_aggregate.py
+++ b/flowmachine/tests/test_redacted_labelled_spatial_aggregate.py
@@ -1,3 +1,7 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 import pytest
 
 import pandas as pd

From dc9007a30a5759c44c30011c910483bca28edf2c Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 15:05:19 +0000
Subject: [PATCH 10/20] Changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 27dc355a5d..8f17bc9e83 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - Added `PerSubscriberAggregate` query. [#4559](https://github.com/Flowminder/FlowKit/issues/4559)
 - Added FlowETL QA checks 'count_imeis', 'count_imsis', 'count_locatable_location_ids', 'count_null_imeis', 'count_null_imsis', 'count_null_location_ids', 'max_msisdns_per_imei', 'max_msisdns_per_imsi', 'count_added_rows_outgoing', 'count_null_counterparts', 'count_null_durations', 'count_onnet_msisdns_incoming', 'count_onnet_msisdns_outgoing', 'count_onnet_msisdns', 'max_duration' and 'median_duration'. [#4552](https://github.com/Flowminder/FlowKit/issues/4552)
 - Added `FilteredReferenceLocation` query, which returns only rows where a subscriber visited a reference location the required number of times. [#4584](https://github.com/Flowminder/FlowKit/issues/4584) 
+- Added `LabelledSpatialAggregate` query and redaction, which sub-aggregates by subscriber labels. [#4668](https://github.com/Flowminder/FlowKit/issues/4668)
 
 ### Changed
 - Harmonised FlowAPI parameter names for start and end dates. They are now all `start_date` and `end_date`

From 06e809d145fa455d0be74dae3942dcc065d25905 Mon Sep 17 00:00:00 2001
From: Thingus <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 15:32:37 +0000
Subject: [PATCH 11/20] Update
 flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py

Co-authored-by: James Harrison <james.harrison@flowminder.org>
---
 .../features/location/labelled_spatial_aggregate.py            | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
index 693c39f8db..8e8080f9ba 100644
--- a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -21,7 +21,8 @@ class LabelledSpatialAggregate(GeoDataMixin, Query):
     label_columns: List[str]
         A list of columns in subscriber_labels to aggregate on
 
-    Example:
+    Examples
+    --------
 
     >>> locations = locate_subscribers(
     ...     "2016-01-01",

From dad684f1b48ea5d91dd71bb94f53f834d0f8102f Mon Sep 17 00:00:00 2001
From: Thingus <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 16:24:28 +0000
Subject: [PATCH 12/20] Update
 flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py

Co-authored-by: James Harrison <james.harrison@flowminder.org>
---
 .../flowmachine/features/location/labelled_spatial_aggregate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
index 8e8080f9ba..6e36c6ff9b 100644
--- a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -78,7 +78,7 @@ def __init__(
     def column_names(self) -> List[str]:
         return (
             list(self.spatial_unit.location_id_columns)
-            + [f"label_{label}" for label in self.label_columns]
+            + self.out_label_columns
             + ["value"]
         )
 

From fc159d4c82cdd1f889403e1ac842adc90d658b92 Mon Sep 17 00:00:00 2001
From: Thingus <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 16:24:52 +0000
Subject: [PATCH 13/20] Update
 flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py

Co-authored-by: James Harrison <james.harrison@flowminder.org>
---
 .../flowmachine/features/location/labelled_spatial_aggregate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
index 6e36c6ff9b..34b50d6e76 100644
--- a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -59,7 +59,7 @@ def __init__(
             )
         if "subscriber" not in locations.column_names:
             raise ValueError(f"Locations query must have a subscriber column")
-        if "spatial_unit" not in dir(locations):
+        if not hasattr(locations, "spatial_unit"):
             raise ValueError(f"Locations must have a spatial_unit attribute")
         if "subscriber" in label_columns:
             raise ValueError(f"'subscriber' cannot be a label")

From 33a59fca7070e0e459e95be3b993141ec08d5722 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 16:25:50 +0000
Subject: [PATCH 14/20] Changing to left join

---
 .../flowmachine/features/location/labelled_spatial_aggregate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
index 34b50d6e76..9eaffe757f 100644
--- a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -109,7 +109,7 @@ def _make_query(self):
                 {aggregate_cols}, {label_select}, count(*) AS value
             FROM 
                 ({self.locations.get_query()}) AS agg
-            JOIN
+            LEFT JOIN
                 ({self.subscriber_labels.get_query()}) AS labels USING (subscriber)
             GROUP BY
                 {aggregate_cols}, {label_group}

From 8bc88352c1b28742bbea2e646551b9ec53685015 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 16:29:44 +0000
Subject: [PATCH 15/20] More changes from review

---
 .../features/location/labelled_spatial_aggregate.py    | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
index 9eaffe757f..2c3fa7c858 100644
--- a/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/labelled_spatial_aggregate.py
@@ -50,13 +50,9 @@ def __init__(
         label_columns: List[str] = ("value",),
     ):
 
-        if any(
-            label_column not in subscriber_labels.column_names
-            for label_column in label_columns
-        ):
-            raise ValueError(
-                f"One of {label_columns} not a column of {subscriber_labels}"
-            )
+        for label_column in label_columns:
+            if label_column not in subscriber_labels.column_names:
+                raise ValueError(f"{label_column} not a column of {subscriber_labels}")
         if "subscriber" not in locations.column_names:
             raise ValueError(f"Locations query must have a subscriber column")
         if not hasattr(locations, "spatial_unit"):

From c758cf8ccc6a29689518868a4bf43e655229d705 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 16:37:15 +0000
Subject: [PATCH 16/20] Adding spatial_unit to
 redacted_labelled_spatial_aggregate.py

---
 .../features/location/redacted_labelled_spatial_aggregate.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py b/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
index e159133545..b0ddd058ff 100644
--- a/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
+++ b/flowmachine/flowmachine/features/location/redacted_labelled_spatial_aggregate.py
@@ -20,7 +20,7 @@ class RedactedLabelledSpatialAggregate(GeoDataMixin, Query):
     labelled_spatial_aggregate: LabelledSpatialAggregate
         The LabelledSpatialAggregate query to redact
     redaction_threshold: int default 15
-        If any labels within a location reveal less than this number of subscribers, that location is dropeed
+        If any labels within a location reveal less than this number of subscribers, that location is dropped
     """
 
     def __init__(
@@ -32,6 +32,7 @@ def __init__(
 
         self.labelled_spatial_aggregate = labelled_spatial_aggregate
         self.redaction_threshold = redaction_threshold
+        self.spatial_unit = labelled_spatial_aggregate.spatial_unit
         super().__init__()
 
     @property
@@ -43,7 +44,6 @@ def _make_query(self):
         aggs = ",".join(
             self.labelled_spatial_aggregate.spatial_unit.location_id_columns
         )
-        labels = self.labelled_spatial_aggregate.out_label_columns_as_string_list
         all_cols = self.labelled_spatial_aggregate.column_names_as_string_list
 
         sql = f"""

From 2d48f1403da4052c9f61074cd126cb23cac3f5b5 Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 17:18:38 +0000
Subject: [PATCH 17/20] Extra tests

---
 .../tests/test_labelled_spatial_aggregate.py  | 37 ++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/flowmachine/tests/test_labelled_spatial_aggregate.py b/flowmachine/tests/test_labelled_spatial_aggregate.py
index 60dc66d7b5..7e9dfb6240 100644
--- a/flowmachine/tests/test_labelled_spatial_aggregate.py
+++ b/flowmachine/tests/test_labelled_spatial_aggregate.py
@@ -10,7 +10,7 @@
 from flowmachine.features.subscriber.daily_location import locate_subscribers
 
 
-def test_labelled_spatial_aggregate(get_dataframe):
+def test_one_label(get_dataframe):
     """
     Tests disaggregation by a label query
     """
@@ -28,6 +28,41 @@ def test_labelled_spatial_aggregate(get_dataframe):
     assert len(df) == 50
     assert len(df.pcod.unique()) == 25
     assert len(df.label_value.unique()) == 2
+    # Total number of values should equal the initial number of subscribers
+    assert sum(map(int, df.value.tolist())) == len(get_dataframe(metric))
+
+
+def test_multiple_labels(get_dataframe):
+    """
+    Tests disaggregation by multiple labels
+    """
+    locations = locate_subscribers(
+        "2016-01-01",
+        "2016-01-02",
+        spatial_unit=make_spatial_unit("admin", level=3),
+        method="most-common",
+    )
+    metric = SubscriberHandsetCharacteristic(
+        "2016-01-01", "2016-01-02", characteristic="hnd_type"
+    ).join(
+        SubscriberHandsetCharacteristic(
+            "2016-01-01", "2016-01-02", characteristic="model"
+        ),
+        on_left="subscriber",
+        left_append="_hnd_type",
+        right_append="_model",
+    )
+    labelled = LabelledSpatialAggregate(
+        locations=locations,
+        subscriber_labels=metric,
+        label_columns=["value_hnd_type", "value_model"],
+    )
+    df = get_dataframe(labelled)
+    foo = labelled.get_query()
+    assert all(
+        df.columns == ["pcod", "label_value_hnd_type", "label_value_model", "value"]
+    )
+    assert len(df) > 300
 
 
 def test_label_validation():

From 7cad6446c6c8848babf4254401484d969d2d39de Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 20:32:36 +0000
Subject: [PATCH 18/20] More extra tests

---
 flowmachine/tests/test_labelled_spatial_aggregate.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/flowmachine/tests/test_labelled_spatial_aggregate.py b/flowmachine/tests/test_labelled_spatial_aggregate.py
index 7e9dfb6240..b7d47c61d0 100644
--- a/flowmachine/tests/test_labelled_spatial_aggregate.py
+++ b/flowmachine/tests/test_labelled_spatial_aggregate.py
@@ -1,5 +1,8 @@
 import pytest
 
+from pandas.testing import assert_series_equal
+
+from flowmachine.features.location.spatial_aggregate import SpatialAggregate
 from flowmachine.core.dummy_query import DummyQuery
 from flowmachine.features import TotalLocationEvents
 from flowmachine.core import make_spatial_unit
@@ -29,7 +32,11 @@ def test_one_label(get_dataframe):
     assert len(df.pcod.unique()) == 25
     assert len(df.label_value.unique()) == 2
     # Total number of values should equal the initial number of subscribers
-    assert sum(map(int, df.value.tolist())) == len(get_dataframe(metric))
+    assert df.value.sum() == len(get_dataframe(locations))
+    assert (
+        df.groupby("pcod").value.sum().tolist()
+        == get_dataframe(SpatialAggregate(locations=locations)).value.tolist()
+    )
 
 
 def test_multiple_labels(get_dataframe):
@@ -58,7 +65,6 @@ def test_multiple_labels(get_dataframe):
         label_columns=["value_hnd_type", "value_model"],
     )
     df = get_dataframe(labelled)
-    foo = labelled.get_query()
     assert all(
         df.columns == ["pcod", "label_value_hnd_type", "label_value_model", "value"]
     )

From 0958ea1595d8fc3e318eca3bdfb3c97bbbb960ef Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 23:21:54 +0000
Subject: [PATCH 19/20] ADR

---
 ...action-strategy-for-labelled-aggregates.md | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 docs/source/developer/adr/0011-redaction-strategy-for-labelled-aggregates.md

diff --git a/docs/source/developer/adr/0011-redaction-strategy-for-labelled-aggregates.md b/docs/source/developer/adr/0011-redaction-strategy-for-labelled-aggregates.md
new file mode 100644
index 0000000000..4807f27b40
--- /dev/null
+++ b/docs/source/developer/adr/0011-redaction-strategy-for-labelled-aggregates.md
@@ -0,0 +1,25 @@
+# Redaction strategy for labelled aggregation
+
+Date: 08-12-2021
+
+# Status
+
+Proposed
+
+## Context
+
+At present, any rows in a spatial query are dropped if they return an aggregate of 15 subscribers or less. With new
+labelling disaggregation queries being added to Flowkit, there is an increased risk of deanonymization attacks
+using the increased resolution of information - we need to consider further redaction strategies to mitigate this. 
+
+## Decision
+
+For any labelled spatial aggregate queries, we drop any aggregation zone that contains any disaggregation less than 15
+(for consistency with the rest of the dissagregation strategy).
+
+## Consequences
+
+This disaggregation strategy could reveal that a given zone contains less than 15 of _something_, by comparing a
+disaggregated query to it's non-disaggregated counterpart.
+This could lead to zones being dropped prematurely in disaggregations if a label choice leads to a small number of
+subscribers appearing in a corner-case disaggregation

From 6cff7cccaa1bd1e2c9be14b9450372a2740d61de Mon Sep 17 00:00:00 2001
From: john <john.roberts@flowminder.org>
Date: Wed, 8 Dec 2021 23:23:38 +0000
Subject: [PATCH 20/20] More test

---
 flowmachine/tests/test_labelled_spatial_aggregate.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flowmachine/tests/test_labelled_spatial_aggregate.py b/flowmachine/tests/test_labelled_spatial_aggregate.py
index b7d47c61d0..1a5a062b31 100644
--- a/flowmachine/tests/test_labelled_spatial_aggregate.py
+++ b/flowmachine/tests/test_labelled_spatial_aggregate.py
@@ -68,6 +68,7 @@ def test_multiple_labels(get_dataframe):
     assert all(
         df.columns == ["pcod", "label_value_hnd_type", "label_value_model", "value"]
     )
+    assert all(df.columns == labelled.column_names)
     assert len(df) > 300