Skip to content

Commit

Permalink
Include symlinks
Browse files Browse the repository at this point in the history
  • Loading branch information
forsyth2 committed Apr 28, 2023
1 parent 054cf5f commit 400be37
Show file tree
Hide file tree
Showing 8 changed files with 202 additions and 21 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
build/
dist/
tests/test_follow_symlinks/
tests/test_follow_symlinks_non_archived/
zstash.egg-info/
*.pyc
*~
Expand Down
120 changes: 120 additions & 0 deletions tests/follow_symlinks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/bin/bash

setup()
{
echo "##########################################################################################################"
use_hpss=$1
follow_symlinks=$2
case_name="${3}"
archive_name=$4
if [[ "${use_hpss}" == "true" ]]; then
hsi rm -R ${archive_name}
fi
echo "use_hpss=${use_hpss}"
echo "follow_symlinks=${follow_symlinks}"
echo "case_name=${case_name}"
local_archive_name=test_follow_symlinks
non_archived_dir=${local_archive_name}_non_archived
test_dir=/global/homes/f/forsyth/zstash/tests
cd ${test_dir}
rm -rf ${local_archive_name}
rm -rf ${non_archived_dir}
mkdir ${local_archive_name}
# At the same level as local_archive_name
mkdir ${non_archived_dir}
cd ${local_archive_name}

mkdir zstash_demo
mkdir zstash_demo/empty_dir
mkdir zstash_demo/dir
mkdir non_archived
echo -n '' > zstash_demo/file_empty.txt
echo 'file0 stuff' > zstash_demo/dir/file0.txt
echo 'file1 stuff' > non_archived/file1.txt
echo 'file2 stuff' > ../${non_archived_dir}/file2.txt
# NOTE: `ln -s` appears to require absolute paths for the source files
ln -s ${test_dir}/${local_archive_name}/non_archived/file1.txt zstash_demo/file3.txt
ln -s ${test_dir}/${non_archived_dir}/file2.txt zstash_demo/file4.txt
cat zstash_demo/file3.txt
cat zstash_demo/file4.txt
}

zstash_create()
{
archive_name=$1
follow_symlinks=$2
echo "Starting zstash create"
if [[ "${follow_symlinks}" == "true" ]]; then
zstash create --hpss=${archive_name} zstash_demo --follow-symlinks
else
zstash create --hpss=${archive_name} zstash_demo
fi
}

zstash_extract()
{
archive_name=$1
mkdir zstash_extraction
cd zstash_extraction
if [[ "${archive_name}" == "none" ]]; then
echo "Copying zstash"
cp -r ../zstash_demo/zstash/ zstash
fi
echo "Starting zstash extract"
zstash extract --hpss=${archive_name}
cat file3.txt
cat file4.txt
echo "> ls"
ls
echo "> ls -l"
ls -l
echo "> zstash ls"
zstash ls --hpss=${archive_name}
echo "> zstash ls -l"
zstash ls -l --hpss=${archive_name}
cd ..
}

test_cases()
{
use_hpss=$1
follow_symlinks=$2
if [[ "${use_hpss}" == "true" ]]; then
archive_name=/home/f/forsyth/zstash_test_follow_symlinks
else
archive_name=none
fi

case_name="Don't delete original file"
setup ${use_hpss} ${follow_symlinks} "${case_name}" ${archive_name}
zstash_create ${archive_name} ${follow_symlinks}
zstash_extract ${archive_name}

case_name="Delete before create"
setup ${use_hpss} ${follow_symlinks} "${case_name}" ${archive_name}
rm non_archived/file1.txt
rm ../run_n247_non_archived/file2.txt
zstash_create ${archive_name} ${follow_symlinks}
zstash_extract ${archive_name}

case_name="Delete after create"
setup ${use_hpss} ${follow_symlinks} "${case_name}" ${archive_name}
zstash_create ${archive_name} ${follow_symlinks}
rm non_archived/file1.txt
rm ../run_n247_non_archived/file2.txt
zstash_extract ${archive_name}

}

conda_env=zstash_dev_n247
# Set up Conda
source /global/homes/f/forsyth/miniconda3/etc/profile.d/conda.sh
conda activate ${conda_env}
# Install branch
cd /global/homes/f/forsyth/zstash
pip install .
# Begin tests
test_cases true true # HPSS, follow symlinks
test_cases false true # No HPSS, follow symlinks
test_cases true false # HPSS, don't follow symlinks
test_cases false false # No HPSS, don't follow symlinks
43 changes: 33 additions & 10 deletions zstash/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ def setup_create() -> Tuple[str, argparse.Namespace]:
action="store_true",
help="For testing/debugging only. Will not create the tars table or compute the hashes of the tars.",
)
optional.add_argument(
"--follow-symlinks",
action="store_true",
help="Hard copy symlinks. This is useful for preventing broken links. Note that a broken link will result in a failed create.",
)
# Now that we're inside a subcommand, ignore the first two argvs
# (zstash create)
args: argparse.Namespace = parser.parse_args(sys.argv[2:])
Expand Down Expand Up @@ -234,16 +239,34 @@ def create_database(cache: str, args: argparse.Namespace) -> List[str]:

files: List[str] = get_files_to_archive(cache, args.exclude)

# Add files to archive
failures: List[str] = add_files(
cur,
con,
-1,
files,
cache,
args.keep,
skip_tars_md5=args.no_tars_md5,
)
failures: List[str]
if args.follow_symlinks:
try:
# Add files to archive
failures = add_files(
cur,
con,
-1,
files,
cache,
args.keep,
args.follow_symlinks,
skip_tars_md5=args.no_tars_md5,
)
except FileNotFoundError:
raise Exception("Archive creation failed due to broken symlink.")
else:
# Add files to archive
failures = add_files(
cur,
con,
-1,
files,
cache,
args.keep,
args.follow_symlinks,
skip_tars_md5=args.no_tars_md5,
)

# Close database
con.commit()
Expand Down
3 changes: 3 additions & 0 deletions zstash/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ def extract_database(
else:
logger.info("No matches for {}".format(args_file))

if matches_ == []:
raise FileNotFoundError("There was nothing to extract.")

matches: List[FilesRow] = list(map(lambda match: FilesRow(match), matches_))

# Sort by the filename, tape (so the tar archive),
Expand Down
13 changes: 10 additions & 3 deletions zstash/hpss_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import absolute_import, print_function

import hashlib
import os
import os.path
import shutil
import sqlite3
import tarfile
import traceback
Expand Down Expand Up @@ -60,6 +62,7 @@ def add_files(
files: List[str],
cache: str,
keep: bool,
follow_symlinks: bool,
skip_tars_md5: bool = False,
) -> List[str]:

Expand Down Expand Up @@ -105,7 +108,7 @@ def add_files(
size: int
mtime: datetime
md5: Optional[str]
offset, size, mtime, md5 = add_file(tar, current_file)
offset, size, mtime, md5 = add_file(tar, current_file, follow_symlinks)
t: TupleFilesRowNoId = (
current_file,
size,
Expand Down Expand Up @@ -171,11 +174,15 @@ def add_files(
# Add file to tar archive while computing its hash
# Return file offset (in tar archive), size and md5 hash
def add_file(
tar: tarfile.TarFile, file_name: str
tar: tarfile.TarFile, file_name: str, follow_symlinks: bool
) -> Tuple[int, int, datetime, Optional[str]]:

# FIXME: error: "TarFile" has no attribute "offset"
offset: int = tar.offset # type: ignore
if follow_symlinks and os.path.islink(file_name):
linked_file_name = os.path.realpath(file_name)
os.remove(file_name) # Remove symbolic link and create a hard copy
shutil.copy(linked_file_name, file_name)
tarinfo: tarfile.TarInfo = tar.gettarinfo(file_name)
# Change the size of any hardlinks from 0 to the size of the actual file
if tarinfo.islnk():
Expand All @@ -185,7 +192,7 @@ def add_file(

md5: Optional[str] = None
# Only add files or hardlinks.
# (So don't add directories or softlinks.)
# (So don't add directories).
if tarinfo.isfile() or tarinfo.islnk():
f: _io.TextIOWrapper = open(file_name, "rb")
hash_md5: _hashlib.HASH = hashlib.md5()
Expand Down
10 changes: 8 additions & 2 deletions zstash/ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,11 @@ def ls_database(args: argparse.Namespace, cache: str) -> List[FilesRow]:
hpss = config.hpss
else:
raise TypeError("Invalid config.hpss={}".format(config.hpss))
# Retrieve from HPSS
hpss_get(hpss, get_db_filename(cache), cache)
try:
# Retrieve from HPSS
hpss_get(hpss, get_db_filename(cache), cache)
except RuntimeError:
raise FileNotFoundError("There was nothing to ls.")
else:
error_str: str = (
"--hpss argument is required when local copy of database is unavailable"
Expand Down Expand Up @@ -139,6 +142,9 @@ def ls_database(args: argparse.Namespace, cache: str) -> List[FilesRow]:
)
matches_ = matches_ + cur.fetchall()

if matches_ == []:
raise FileNotFoundError("There was nothing to ls.")

# Remove duplicates
matches_ = list(set(matches_))
matches: List[FilesRow] = list(map(FilesRow, matches_))
Expand Down
26 changes: 23 additions & 3 deletions zstash/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ def setup_update() -> Tuple[argparse.Namespace, str]:
optional.add_argument(
"-v", "--verbose", action="store_true", help="increase output verbosity"
)
optional.add_argument(
"--follow-symlinks",
action="store_true",
help="Hard copy symlinks. This is useful for preventing broken links. Note that a broken link will result in a failed update.",
)
args: argparse.Namespace = parser.parse_args(sys.argv[2:])
if args.hpss and args.hpss.lower() == "none":
args.hpss = "none"
Expand All @@ -112,7 +117,10 @@ def setup_update() -> Tuple[argparse.Namespace, str]:
return args, cache


def update_database(args: argparse.Namespace, cache: str) -> Optional[List[str]]:
# C901 'update_database' is too complex (20)
def update_database( # noqa: C901
args: argparse.Namespace, cache: str
) -> Optional[List[str]]:
# Open database
logger.debug("Opening index database")
if not os.path.exists(get_db_filename(cache)):
Expand Down Expand Up @@ -226,8 +234,20 @@ def update_database(args: argparse.Namespace, cache: str) -> Optional[List[str]]
tfile_string: str = tfile[0]
itar = max(itar, int(tfile_string[0:6], 16))

# Add files
failures: List[str] = add_files(cur, con, itar, newfiles, cache, keep)
failures: List[str]
if args.follow_symlinks:
try:
# Add files
failures = add_files(
cur, con, itar, newfiles, cache, keep, args.follow_symlinks
)
except FileNotFoundError:
raise Exception("Archive update failed due to broken symlink.")
else:
# Add files
failures = add_files(
cur, con, itar, newfiles, cache, keep, args.follow_symlinks
)

# Close database
con.commit()
Expand Down
6 changes: 3 additions & 3 deletions zstash/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def run_command(command: str, error_str: str):
if status != 0:
error_str = "Error={}, Command was `{}`".format(error_str, command)
if "hsi" in command:
error_str = "{}. This command includes `hsi`. Be sure that you have logged into `hsi`.".format(
error_str
)
error_str = f"{error_str}. This command includes `hsi`. Be sure that you have logged into `hsi`"
if "cd" in command:
error_str = f"{error_str}. This command includes `cd`. Check that this directory exists and contains the needed files"
logger.error(error_str)
logger.debug("stdout:\n{!r}".format(stdout))
logger.debug("stderr:\n{!r}".format(stderr))
Expand Down

0 comments on commit 400be37

Please sign in to comment.