Skip to content

Commit

Permalink
Add utils.py
Browse files Browse the repository at this point in the history
Begin to pull parts of the code out that need to be more generic.
In this commit we're starting to test other AIP compression types.
  • Loading branch information
ross-spencer committed Jun 26, 2019
1 parent 81e551c commit a0d4eee
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
10 changes: 9 additions & 1 deletion reports/duplicates/duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@
from parsemets import read_premis_data
from serialize_to_csv import CSVOut

import utils


logging_dir = os.path.dirname(os.path.abspath(__file__))


Expand Down Expand Up @@ -208,18 +211,23 @@ def main():
# Get all AIPS that the storage service knows about.
aips = am.aips()
for aip in aips:
package_name = os.path.basename(aip.get("current_path")).replace(".7z", "")
package_name = os.path.basename(aip.get("current_path"))
for ext in utils.EXTS:
# TODO: make this more accurate...
package_name = package_name.replace(ext, "")
package_uuid = aip.get("uuid")
for algorithm in checksum_algorithms:
# Store our manifest somewhere.
relative_path = "{}/manifest-{}.txt".format(package_name, algorithm)
save_path = "{}-manifest-{}.txt".format(package_name, algorithm)
save_as_loc = os.path.join(temp_dir, save_path)

try:
retrieve_file(am, package_uuid, save_as_loc, relative_path)
except ExtractError:
logger.info("No result for algorithm: %s", algorithm)
continue

# Our dictionary keys are checksums and all filename entries with
# the same checksum are appended to create an array. If the array
# at the end is greater than one, we have duplicate files.
Expand Down
4 changes: 4 additions & 0 deletions reports/duplicates/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

EXTS = [".7z", ".tar.gz", ".tar.bz2"]

0 comments on commit a0d4eee

Please sign in to comment.