-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsample_postcodes.py
31 lines (24 loc) · 1017 Bytes
/
sample_postcodes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
import zipfile
import requests
import pandas as pd
# Download the ZIP file if the CSV file doesn't exist
if not os.path.exists("postcodes.csv"):
url = "https://www.doogal.co.uk/files/postcodes.zip"
r = requests.get(url)
with open("postcodes.zip", "wb") as f:
f.write(r.content)
# Extract the CSV file from the ZIP
with zipfile.ZipFile("postcodes.zip", "r") as zip_ref:
zip_ref.extractall()
# Remove the ZIP file
os.remove("postcodes.zip")
# Read the CSV file into a DataFrame
postcodes = pd.read_csv("postcodes.csv")
# Filter the DataFrame to include only rows where 'In.Use.' is 'Yes'
# and select the 'Postcode', 'Constituency', and 'Region' columns
postcodes = postcodes.loc[postcodes['In.Use.'] == 'Yes', ['Postcode', 'Constituency', 'Region']]
# Create a random subset of 500 postcodes
postcode_sample = postcodes.sample(n=500, random_state=1019)
# Write the random subset to a new CSV file
postcode_sample.to_csv("postcodes_sample.csv", index=False)