Skip to content

Commit

Permalink
Merge pull request #1472 from sul-dlss/t1471-increase-days
Browse files Browse the repository at this point in the history
Increases PRIOR_DAYS to 180 for Purge Functions
  • Loading branch information
jermnelson authored Dec 9, 2024
2 parents e1ad303 + 5d06958 commit 453c781
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
2 changes: 1 addition & 1 deletion libsys_airflow/dags/data_exports/remove-archived.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def gather_files_task(**kwargs) -> list[pathlib.Path]:
airflow = kwargs.get("airflow", "/opt/airflow")
_directory = pathlib.Path(airflow) / "data-export-files/*/transmitted/"

return find_files(downloads_directory=_directory)
return find_files(downloads_directory=_directory, prior_days=90)

start = EmptyOperator(task_id='start_removing_archived')

Expand Down
12 changes: 7 additions & 5 deletions libsys_airflow/plugins/shared/purge.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
logger = logging.getLogger(__name__)


PRIOR_DAYS = 90
PRIOR_DAYS = 180


@task(multiple_outputs=True)
Expand Down Expand Up @@ -70,13 +70,15 @@ def _extract_uuids(directory: str):
return output


def find_directories(archive_directory: pathlib.Path) -> list[str]:
def find_directories(
archive_directory: pathlib.Path, prior_days: int = PRIOR_DAYS
) -> list[str]:
"""
Iterates through archives to determine what vendor management
directories to delete based on age
"""
target_dirs = []
prior_datestamp = (datetime.utcnow() - timedelta(days=PRIOR_DAYS)).strftime(
prior_datestamp = (datetime.utcnow() - timedelta(days=prior_days)).strftime(
"%Y%m%d"
)
for directory in sorted(archive_directory.iterdir()):
Expand All @@ -87,12 +89,12 @@ def find_directories(archive_directory: pathlib.Path) -> list[str]:
return target_dirs


def find_files(downloads_directory: pathlib.Path):
def find_files(downloads_directory: pathlib.Path, prior_days: int = PRIOR_DAYS):
"""
Iterates through downloads directory determing what files to
delete based on the file's age
"""
prior_timestamp = (datetime.utcnow() - timedelta(days=PRIOR_DAYS)).timestamp()
prior_timestamp = (datetime.utcnow() - timedelta(days=prior_days)).timestamp()
files = []
for file_path in downloads_directory.glob("**/*"):
if file_path.is_file() and file_path.stat().st_mtime <= prior_timestamp:
Expand Down

0 comments on commit 453c781

Please sign in to comment.