diff --git a/libsys_airflow/dags/data_exports/remove-archived.py b/libsys_airflow/dags/data_exports/remove-archived.py index dae37a39..db39d152 100644 --- a/libsys_airflow/dags/data_exports/remove-archived.py +++ b/libsys_airflow/dags/data_exports/remove-archived.py @@ -37,7 +37,7 @@ def gather_files_task(**kwargs) -> list[pathlib.Path]: airflow = kwargs.get("airflow", "/opt/airflow") _directory = pathlib.Path(airflow) / "data-export-files/*/transmitted/" - return find_files(downloads_directory=_directory) + return find_files(downloads_directory=_directory, prior_days=90) start = EmptyOperator(task_id='start_removing_archived') diff --git a/libsys_airflow/plugins/shared/purge.py b/libsys_airflow/plugins/shared/purge.py index 8416ad02..4ff1ebef 100644 --- a/libsys_airflow/plugins/shared/purge.py +++ b/libsys_airflow/plugins/shared/purge.py @@ -16,7 +16,7 @@ logger = logging.getLogger(__name__) -PRIOR_DAYS = 90 +PRIOR_DAYS = 180 @task(multiple_outputs=True) @@ -70,13 +70,15 @@ def _extract_uuids(directory: str): return output -def find_directories(archive_directory: pathlib.Path) -> list[str]: +def find_directories( + archive_directory: pathlib.Path, prior_days: int = PRIOR_DAYS +) -> list[str]: """ Iterates through archives to determine what vendor management directories to delete based on age """ target_dirs = [] - prior_datestamp = (datetime.utcnow() - timedelta(days=PRIOR_DAYS)).strftime( + prior_datestamp = (datetime.utcnow() - timedelta(days=prior_days)).strftime( "%Y%m%d" ) for directory in sorted(archive_directory.iterdir()): @@ -87,12 +89,12 @@ def find_directories(archive_directory: pathlib.Path) -> list[str]: return target_dirs -def find_files(downloads_directory: pathlib.Path): +def find_files(downloads_directory: pathlib.Path, prior_days: int = PRIOR_DAYS): """ Iterates through downloads directory determing what files to delete based on the file's age """ - prior_timestamp = (datetime.utcnow() - timedelta(days=PRIOR_DAYS)).timestamp() + prior_timestamp = (datetime.utcnow() - timedelta(days=prior_days)).timestamp() files = [] for file_path in downloads_directory.glob("**/*"): if file_path.is_file() and file_path.stat().st_mtime <= prior_timestamp: