diff --git a/README.md b/README.md index 99bf561..69ff5ea 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ This repository currently contains SQL code and descriptions for the following a - [Aggregate 6: Directional connections between pairs of regions - count of unique subscribers moving between each pair of locations, each day](aggregate_6.md) - [Aggregate 7: Total number of calls per region per day](aggregate_7.md) - [Aggregate 8: Home location counts per region](aggregate_8.md) +- [Count of subscribers that are seen only in one region per region per day](count_subscribers_single_region.md) - [Trips between consecutive locations per day](od_matrix_directed_consecutive_pairs.md) - [Static resident counts per region per day](count_subscribers_home_region_per_day.md) diff --git a/count_subscribers_single_region.md b/count_subscribers_single_region.md new file mode 100644 index 0000000..ecf1e1a --- /dev/null +++ b/count_subscribers_single_region.md @@ -0,0 +1,17 @@ +# Count of subscribers that are seen only in one region per region per day + +## What is this? + +This is the total number of unique subscribers who are seen in only _one_ region in each time period. + +## How to produce the aggregate + +You can find the SQL code for producing this aggregate in [count_subscribers_single_region.sql](count_subscribers_single_region.sql). + +The query [`count_subscribers_single_region`](count_subscribers_single_region.sql#L5-L25) is a standalone query which can be run by itself to produce the aggregate. + +The first time you run this, you will need to include a timespan of data that includes the period before any mobility restrictions were enforced in your country, or before the first cases of COVID-19 were reported in your country. This is so that you can establish what ‘normal’ baseline behaviour looks like, and then see how this behaviour changed. We recommend that you include at least two weeks of ‘normal’ baseline data (i.e. the two weeks immediately before the announcement of restrictions or the outbreak), and preferably four weeks. + +## Usage and interpretation + +This can be useful in combination with other aggregates. For example, counts of static subscribers per region can be scaled using subscriber counts to get proportions of subscribers who are adhering to mobility restrictions per region. diff --git a/count_subscribers_single_region.sql b/count_subscribers_single_region.sql new file mode 100644 index 0000000..a8c34b1 --- /dev/null +++ b/count_subscribers_single_region.sql @@ -0,0 +1,25 @@ +-- This Source Code Form is subject to the terms of the Mozilla Public +-- License, v. 2.0. If a copy of the MPL was not distributed with this +-- file, You can obtain one at http://mozilla.org/MPL/2.0/. + +CREATE TABLE count_subscribers_single_region_per_day + AS WITH located AS (SELECT calls.msisdn, + calls.call_date, + cells.region + FROM calls + INNER JOIN cells ON calls.location_id = cells.cell_id + WHERE (calls.call_date >= '2020-02-01') + AND (calls.call_date <= CURRENT_DATE) + GROUP BY call_date, msisdn, region) + + SELECT region, + call_date AS count_date, + count(*) AS subscriber_count + FROM (SELECT msisdn, + call_date + FROM located + GROUP BY msisdn, call_date + HAVING count(*) = 1) AS unmoving + INNER JOIN located USING (msisdn, call_date) + GROUP BY region, call_date + HAVING count(*) >= 15; \ No newline at end of file