From 4ba7f59e4aeffb0792dda1d8f088d19bf3b3a81c Mon Sep 17 00:00:00 2001 From: Daniel Busch Date: Thu, 14 Nov 2024 15:23:56 +0100 Subject: [PATCH] changelog --- changelog/1.feature.md | 1 + scripts/download_all_domains.py | 57 --------------------------------- scripts/remove_downloads.py | 42 ------------------------ 3 files changed, 1 insertion(+), 99 deletions(-) create mode 100644 changelog/1.feature.md delete mode 100644 scripts/remove_downloads.py diff --git a/changelog/1.feature.md b/changelog/1.feature.md new file mode 100644 index 00000000..fe472ece --- /dev/null +++ b/changelog/1.feature.md @@ -0,0 +1 @@ +Script to download data set from FAOSTAT website. diff --git a/scripts/download_all_domains.py b/scripts/download_all_domains.py index fed63b8a..99ecc7a5 100755 --- a/scripts/download_all_domains.py +++ b/scripts/download_all_domains.py @@ -1,65 +1,8 @@ """Downloads all domain data sets from FAOSTAT website.""" - from faostat_data_primap.download import ( download_all_domains, ) -# def download_all_domains( -# domains: list[tuple[str]] = domains, -# downloaded_data_path: str = downloaded_data_path, -# ) -> list[str]: -# """ -# Download and unpack all climate-related domains from the FAO stat website. -# -# Extract the date when the data set was last updated and create a directory -# with the same name. Download the zip files for each domain if -# it does not already exist. Unpack the zip file and save in -# the same directory. -# -# Parameters -# ---------- -# sources -# Name of data set, url to domain overview, -# and download url -# -# Returns -# ------- -# List of input files that have been fetched or found locally. -# -# """ -# downloaded_files = [] -# for ds_name, urls in domains.items(): -# url = urls["url_domain"] -# url_download = urls["url_download"] -# url_methodology = urls["url_methodology"] -# -# soup = get_html_content(url) -# -# last_updated = get_last_updated_date(soup, url) -# -# if not downloaded_data_path.exists(): -# downloaded_data_path.mkdir() -# -# ds_path = downloaded_data_path / ds_name -# if not ds_path.exists(): -# ds_path.mkdir() -# -# local_data_dir = ds_path / last_updated -# if not local_data_dir.exists(): -# local_data_dir.mkdir() -# -# download_methodology(save_path=local_data_dir, url_download=url_methodology) -# -# local_filename = local_data_dir / f"{ds_name}.zip" -# -# download_file(url_download=url_download, save_path=local_filename) -# -# downloaded_files.append(str(local_filename)) -# -# unzip_file(local_filename) -# -# return downloaded_files - if __name__ == "__main__": download_all_domains() diff --git a/scripts/remove_downloads.py b/scripts/remove_downloads.py deleted file mode 100644 index beb6059e..00000000 --- a/scripts/remove_downloads.py +++ /dev/null @@ -1,42 +0,0 @@ -"""Remove all downloads. - -This script deletes all downloaded and unzipped files. It is -useful for testing purposes. Needs to be updated with the directory -structure or maybe can be deleted altogether later. -""" - -import os - -# import click -from faostat_data_primap.helper.definitions import downloaded_data_path - - -def run(): - """ - Delete all downloaded files for all domains and all releases - """ - domains = [ - d - for d in os.listdir(downloaded_data_path) - if os.path.isdir(downloaded_data_path / d) - ] - - for domain in domains: - path_to_releases = downloaded_data_path / domain - releases = [ - d - for d in os.listdir(path_to_releases) - if os.path.isdir(path_to_releases / d) - ] - - for release in releases: - path_to_files = downloaded_data_path / domain / release - files_to_delete = os.listdir(path_to_files) - - for file in files_to_delete: - path_to_file = path_to_files / file - os.remove(path_to_file) - - -if __name__ == "__main__": - run()