diff --git a/.gitignore b/.gitignore index b710dde7..f0c2eeda 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ build/ dist/ +tests/test_follow_symlinks/ +tests/test_follow_symlinks_non_archived/ zstash.egg-info/ *.pyc *~ diff --git a/tests/follow_symlinks.sh b/tests/follow_symlinks.sh new file mode 100755 index 00000000..1d85199e --- /dev/null +++ b/tests/follow_symlinks.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +setup() +{ + echo "##########################################################################################################" + use_hpss=$1 + follow_symlinks=$2 + case_name="${3}" + archive_name=$4 + if [[ "${use_hpss}" == "true" ]]; then + hsi rm -R ${archive_name} + fi + echo "use_hpss=${use_hpss}" + echo "follow_symlinks=${follow_symlinks}" + echo "case_name=${case_name}" + local_archive_name=test_follow_symlinks + non_archived_dir=${local_archive_name}_non_archived + test_dir=/global/homes/f/forsyth/zstash/tests + cd ${test_dir} + rm -rf ${local_archive_name} + rm -rf ${non_archived_dir} + mkdir ${local_archive_name} + # At the same level as local_archive_name + mkdir ${non_archived_dir} + cd ${local_archive_name} + + mkdir zstash_demo + mkdir zstash_demo/empty_dir + mkdir zstash_demo/dir + mkdir non_archived + echo -n '' > zstash_demo/file_empty.txt + echo 'file0 stuff' > zstash_demo/dir/file0.txt + echo 'file1 stuff' > non_archived/file1.txt + echo 'file2 stuff' > ../${non_archived_dir}/file2.txt + # NOTE: `ln -s` appears to require absolute paths for the source files + ln -s ${test_dir}/${local_archive_name}/non_archived/file1.txt zstash_demo/file3.txt + ln -s ${test_dir}/${non_archived_dir}/file2.txt zstash_demo/file4.txt + cat zstash_demo/file3.txt + cat zstash_demo/file4.txt +} + +zstash_create() +{ + archive_name=$1 + follow_symlinks=$2 + echo "Starting zstash create" + if [[ "${follow_symlinks}" == "true" ]]; then + zstash create --hpss=${archive_name} zstash_demo --follow-symlinks + else + zstash create --hpss=${archive_name} zstash_demo + fi +} + +zstash_extract() +{ + archive_name=$1 + mkdir zstash_extraction + cd zstash_extraction + if [[ "${archive_name}" == "none" ]]; then + echo "Copying zstash" + cp -r ../zstash_demo/zstash/ zstash + fi + echo "Starting zstash extract" + zstash extract --hpss=${archive_name} + cat file3.txt + cat file4.txt + echo "> ls" + ls + echo "> ls -l" + ls -l + echo "> zstash ls" + zstash ls --hpss=${archive_name} + echo "> zstash ls -l" + zstash ls -l --hpss=${archive_name} + cd .. +} + +test_cases() +{ + use_hpss=$1 + follow_symlinks=$2 + if [[ "${use_hpss}" == "true" ]]; then + archive_name=/home/f/forsyth/zstash_test_follow_symlinks + else + archive_name=none + fi + + case_name="Don't delete original file" + setup ${use_hpss} ${follow_symlinks} "${case_name}" ${archive_name} + zstash_create ${archive_name} ${follow_symlinks} + zstash_extract ${archive_name} + + case_name="Delete before create" + setup ${use_hpss} ${follow_symlinks} "${case_name}" ${archive_name} + rm non_archived/file1.txt + rm ../run_n247_non_archived/file2.txt + zstash_create ${archive_name} ${follow_symlinks} + zstash_extract ${archive_name} + + case_name="Delete after create" + setup ${use_hpss} ${follow_symlinks} "${case_name}" ${archive_name} + zstash_create ${archive_name} ${follow_symlinks} + rm non_archived/file1.txt + rm ../run_n247_non_archived/file2.txt + zstash_extract ${archive_name} + +} + +conda_env=zstash_dev_n247 +# Set up Conda +source /global/homes/f/forsyth/miniconda3/etc/profile.d/conda.sh +conda activate ${conda_env} +# Install branch +cd /global/homes/f/forsyth/zstash +pip install . +# Begin tests +test_cases true true # HPSS, follow symlinks +test_cases false true # No HPSS, follow symlinks +test_cases true false # HPSS, don't follow symlinks +test_cases false false # No HPSS, don't follow symlinks diff --git a/zstash/create.py b/zstash/create.py index a80c1c16..78778eb3 100644 --- a/zstash/create.py +++ b/zstash/create.py @@ -153,6 +153,11 @@ def setup_create() -> Tuple[str, argparse.Namespace]: action="store_true", help="For testing/debugging only. Will not create the tars table or compute the hashes of the tars.", ) + optional.add_argument( + "--follow-symlinks", + action="store_true", + help="Hard copy symlinks. This is useful for preventing broken links. Note that a broken link will result in a failed create.", + ) # Now that we're inside a subcommand, ignore the first two argvs # (zstash create) args: argparse.Namespace = parser.parse_args(sys.argv[2:]) @@ -234,16 +239,34 @@ def create_database(cache: str, args: argparse.Namespace) -> List[str]: files: List[str] = get_files_to_archive(cache, args.exclude) - # Add files to archive - failures: List[str] = add_files( - cur, - con, - -1, - files, - cache, - args.keep, - skip_tars_md5=args.no_tars_md5, - ) + failures: List[str] + if args.follow_symlinks: + try: + # Add files to archive + failures = add_files( + cur, + con, + -1, + files, + cache, + args.keep, + args.follow_symlinks, + skip_tars_md5=args.no_tars_md5, + ) + except FileNotFoundError: + raise Exception("Archive creation failed due to broken symlink.") + else: + # Add files to archive + failures = add_files( + cur, + con, + -1, + files, + cache, + args.keep, + args.follow_symlinks, + skip_tars_md5=args.no_tars_md5, + ) # Close database con.commit() diff --git a/zstash/extract.py b/zstash/extract.py index 452dc391..42a06dfb 100644 --- a/zstash/extract.py +++ b/zstash/extract.py @@ -241,6 +241,9 @@ def extract_database( else: logger.info("No matches for {}".format(args_file)) + if matches_ == []: + raise FileNotFoundError("There was nothing to extract.") + matches: List[FilesRow] = list(map(lambda match: FilesRow(match), matches_)) # Sort by the filename, tape (so the tar archive), diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py index 44ba1008..8b9a5bbf 100644 --- a/zstash/hpss_utils.py +++ b/zstash/hpss_utils.py @@ -1,7 +1,9 @@ from __future__ import absolute_import, print_function import hashlib +import os import os.path +import shutil import sqlite3 import tarfile import traceback @@ -60,6 +62,7 @@ def add_files( files: List[str], cache: str, keep: bool, + follow_symlinks: bool, skip_tars_md5: bool = False, ) -> List[str]: @@ -105,7 +108,7 @@ def add_files( size: int mtime: datetime md5: Optional[str] - offset, size, mtime, md5 = add_file(tar, current_file) + offset, size, mtime, md5 = add_file(tar, current_file, follow_symlinks) t: TupleFilesRowNoId = ( current_file, size, @@ -171,11 +174,15 @@ def add_files( # Add file to tar archive while computing its hash # Return file offset (in tar archive), size and md5 hash def add_file( - tar: tarfile.TarFile, file_name: str + tar: tarfile.TarFile, file_name: str, follow_symlinks: bool ) -> Tuple[int, int, datetime, Optional[str]]: # FIXME: error: "TarFile" has no attribute "offset" offset: int = tar.offset # type: ignore + if follow_symlinks and os.path.islink(file_name): + linked_file_name = os.path.realpath(file_name) + os.remove(file_name) # Remove symbolic link and create a hard copy + shutil.copy(linked_file_name, file_name) tarinfo: tarfile.TarInfo = tar.gettarinfo(file_name) # Change the size of any hardlinks from 0 to the size of the actual file if tarinfo.islnk(): diff --git a/zstash/ls.py b/zstash/ls.py index b71e39cd..8b6ad6e4 100644 --- a/zstash/ls.py +++ b/zstash/ls.py @@ -100,8 +100,11 @@ def ls_database(args: argparse.Namespace, cache: str) -> List[FilesRow]: hpss = config.hpss else: raise TypeError("Invalid config.hpss={}".format(config.hpss)) - # Retrieve from HPSS - hpss_get(hpss, get_db_filename(cache), cache) + try: + # Retrieve from HPSS + hpss_get(hpss, get_db_filename(cache), cache) + except RuntimeError: + raise FileNotFoundError("There was nothing to ls.") else: error_str: str = ( "--hpss argument is required when local copy of database is unavailable" @@ -139,6 +142,9 @@ def ls_database(args: argparse.Namespace, cache: str) -> List[FilesRow]: ) matches_ = matches_ + cur.fetchall() + if matches_ == []: + raise FileNotFoundError("There was nothing to ls.") + # Remove duplicates matches_ = list(set(matches_)) matches: List[FilesRow] = list(map(FilesRow, matches_)) diff --git a/zstash/update.py b/zstash/update.py index a85cdac0..29910696 100644 --- a/zstash/update.py +++ b/zstash/update.py @@ -98,6 +98,11 @@ def setup_update() -> Tuple[argparse.Namespace, str]: optional.add_argument( "-v", "--verbose", action="store_true", help="increase output verbosity" ) + optional.add_argument( + "--follow-symlinks", + action="store_true", + help="Hard copy symlinks. This is useful for preventing broken links. Note that a broken link will result in a failed update.", + ) args: argparse.Namespace = parser.parse_args(sys.argv[2:]) if args.hpss and args.hpss.lower() == "none": args.hpss = "none" @@ -112,7 +117,10 @@ def setup_update() -> Tuple[argparse.Namespace, str]: return args, cache -def update_database(args: argparse.Namespace, cache: str) -> Optional[List[str]]: +# C901 'update_database' is too complex (20) +def update_database( # noqa: C901 + args: argparse.Namespace, cache: str +) -> Optional[List[str]]: # Open database logger.debug("Opening index database") if not os.path.exists(get_db_filename(cache)): @@ -226,8 +234,20 @@ def update_database(args: argparse.Namespace, cache: str) -> Optional[List[str]] tfile_string: str = tfile[0] itar = max(itar, int(tfile_string[0:6], 16)) - # Add files - failures: List[str] = add_files(cur, con, itar, newfiles, cache, keep) + failures: List[str] + if args.follow_symlinks: + try: + # Add files + failures = add_files( + cur, con, itar, newfiles, cache, keep, args.follow_symlinks + ) + except FileNotFoundError: + raise Exception("Archive update failed due to broken symlink.") + else: + # Add files + failures = add_files( + cur, con, itar, newfiles, cache, keep, args.follow_symlinks + ) # Close database con.commit() diff --git a/zstash/utils.py b/zstash/utils.py index a04a040d..877ad23c 100644 --- a/zstash/utils.py +++ b/zstash/utils.py @@ -46,9 +46,9 @@ def run_command(command: str, error_str: str): if status != 0: error_str = "Error={}, Command was `{}`".format(error_str, command) if "hsi" in command: - error_str = "{}. This command includes `hsi`. Be sure that you have logged into `hsi`.".format( - error_str - ) + error_str = f"{error_str}. This command includes `hsi`. Be sure that you have logged into `hsi`" + if "cd" in command: + error_str = f"{error_str}. This command includes `cd`. Check that this directory exists and contains the needed files" logger.error(error_str) logger.debug("stdout:\n{!r}".format(stdout)) logger.debug("stderr:\n{!r}".format(stderr))