Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include symlinks #261

Merged
merged 1 commit into from
Apr 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
build/
dist/
tests/test_follow_symlinks/
tests/test_follow_symlinks_non_archived/
zstash.egg-info/
*.pyc
*~
Expand Down
120 changes: 120 additions & 0 deletions tests/follow_symlinks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/bin/bash

setup()
{
echo "##########################################################################################################"
use_hpss=$1
follow_symlinks=$2
case_name="${3}"
archive_name=$4
if [[ "${use_hpss}" == "true" ]]; then
hsi rm -R ${archive_name}
fi
echo "use_hpss=${use_hpss}"
echo "follow_symlinks=${follow_symlinks}"
echo "case_name=${case_name}"
local_archive_name=test_follow_symlinks
non_archived_dir=${local_archive_name}_non_archived
test_dir=/global/homes/f/forsyth/zstash/tests
cd ${test_dir}
rm -rf ${local_archive_name}
rm -rf ${non_archived_dir}
mkdir ${local_archive_name}
# At the same level as local_archive_name
mkdir ${non_archived_dir}
cd ${local_archive_name}

mkdir zstash_demo
mkdir zstash_demo/empty_dir
mkdir zstash_demo/dir
mkdir non_archived
echo -n '' > zstash_demo/file_empty.txt
echo 'file0 stuff' > zstash_demo/dir/file0.txt
echo 'file1 stuff' > non_archived/file1.txt
echo 'file2 stuff' > ../${non_archived_dir}/file2.txt
# NOTE: `ln -s` appears to require absolute paths for the source files
ln -s ${test_dir}/${local_archive_name}/non_archived/file1.txt zstash_demo/file3.txt
ln -s ${test_dir}/${non_archived_dir}/file2.txt zstash_demo/file4.txt
cat zstash_demo/file3.txt
cat zstash_demo/file4.txt
}

zstash_create()
{
archive_name=$1
follow_symlinks=$2
echo "Starting zstash create"
if [[ "${follow_symlinks}" == "true" ]]; then
zstash create --hpss=${archive_name} zstash_demo --follow-symlinks
else
zstash create --hpss=${archive_name} zstash_demo
fi
}

zstash_extract()
{
archive_name=$1
mkdir zstash_extraction
cd zstash_extraction
if [[ "${archive_name}" == "none" ]]; then
echo "Copying zstash"
cp -r ../zstash_demo/zstash/ zstash
fi
echo "Starting zstash extract"
zstash extract --hpss=${archive_name}
cat file3.txt
cat file4.txt
echo "> ls"
ls
echo "> ls -l"
ls -l
echo "> zstash ls"
zstash ls --hpss=${archive_name}
echo "> zstash ls -l"
zstash ls -l --hpss=${archive_name}
cd ..
}

test_cases()
{
use_hpss=$1
follow_symlinks=$2
if [[ "${use_hpss}" == "true" ]]; then
archive_name=/home/f/forsyth/zstash_test_follow_symlinks
else
archive_name=none
fi

case_name="Don't delete original file"
setup ${use_hpss} ${follow_symlinks} "${case_name}" ${archive_name}
zstash_create ${archive_name} ${follow_symlinks}
zstash_extract ${archive_name}

case_name="Delete before create"
setup ${use_hpss} ${follow_symlinks} "${case_name}" ${archive_name}
rm non_archived/file1.txt
rm ../run_n247_non_archived/file2.txt
zstash_create ${archive_name} ${follow_symlinks}
zstash_extract ${archive_name}

case_name="Delete after create"
setup ${use_hpss} ${follow_symlinks} "${case_name}" ${archive_name}
zstash_create ${archive_name} ${follow_symlinks}
rm non_archived/file1.txt
rm ../run_n247_non_archived/file2.txt
zstash_extract ${archive_name}

}

conda_env=zstash_dev_n247
# Set up Conda
source /global/homes/f/forsyth/miniconda3/etc/profile.d/conda.sh
conda activate ${conda_env}
# Install branch
cd /global/homes/f/forsyth/zstash
pip install .
# Begin tests
test_cases true true # HPSS, follow symlinks
test_cases false true # No HPSS, follow symlinks
test_cases true false # HPSS, don't follow symlinks
test_cases false false # No HPSS, don't follow symlinks
43 changes: 33 additions & 10 deletions zstash/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ def setup_create() -> Tuple[str, argparse.Namespace]:
action="store_true",
help="For testing/debugging only. Will not create the tars table or compute the hashes of the tars.",
)
optional.add_argument(
"--follow-symlinks",
action="store_true",
help="Hard copy symlinks. This is useful for preventing broken links. Note that a broken link will result in a failed create.",
)
# Now that we're inside a subcommand, ignore the first two argvs
# (zstash create)
args: argparse.Namespace = parser.parse_args(sys.argv[2:])
Expand Down Expand Up @@ -234,16 +239,34 @@ def create_database(cache: str, args: argparse.Namespace) -> List[str]:

files: List[str] = get_files_to_archive(cache, args.exclude)

# Add files to archive
failures: List[str] = add_files(
cur,
con,
-1,
files,
cache,
args.keep,
skip_tars_md5=args.no_tars_md5,
)
failures: List[str]
if args.follow_symlinks:
try:
# Add files to archive
failures = add_files(
cur,
con,
-1,
files,
cache,
args.keep,
args.follow_symlinks,
skip_tars_md5=args.no_tars_md5,
)
except FileNotFoundError:
raise Exception("Archive creation failed due to broken symlink.")
else:
# Add files to archive
failures = add_files(
cur,
con,
-1,
files,
cache,
args.keep,
args.follow_symlinks,
skip_tars_md5=args.no_tars_md5,
)

# Close database
con.commit()
Expand Down
3 changes: 3 additions & 0 deletions zstash/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ def extract_database(
else:
logger.info("No matches for {}".format(args_file))

if matches_ == []:
raise FileNotFoundError("There was nothing to extract.")

matches: List[FilesRow] = list(map(lambda match: FilesRow(match), matches_))

# Sort by the filename, tape (so the tar archive),
Expand Down
11 changes: 9 additions & 2 deletions zstash/hpss_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import absolute_import, print_function

import hashlib
import os
import os.path
import shutil
import sqlite3
import tarfile
import traceback
Expand Down Expand Up @@ -60,6 +62,7 @@ def add_files(
files: List[str],
cache: str,
keep: bool,
follow_symlinks: bool,
skip_tars_md5: bool = False,
) -> List[str]:

Expand Down Expand Up @@ -105,7 +108,7 @@ def add_files(
size: int
mtime: datetime
md5: Optional[str]
offset, size, mtime, md5 = add_file(tar, current_file)
offset, size, mtime, md5 = add_file(tar, current_file, follow_symlinks)
t: TupleFilesRowNoId = (
current_file,
size,
Expand Down Expand Up @@ -171,11 +174,15 @@ def add_files(
# Add file to tar archive while computing its hash
# Return file offset (in tar archive), size and md5 hash
def add_file(
tar: tarfile.TarFile, file_name: str
tar: tarfile.TarFile, file_name: str, follow_symlinks: bool
) -> Tuple[int, int, datetime, Optional[str]]:

# FIXME: error: "TarFile" has no attribute "offset"
offset: int = tar.offset # type: ignore
if follow_symlinks and os.path.islink(file_name):
linked_file_name = os.path.realpath(file_name)
os.remove(file_name) # Remove symbolic link and create a hard copy
shutil.copy(linked_file_name, file_name)
tarinfo: tarfile.TarInfo = tar.gettarinfo(file_name)
# Change the size of any hardlinks from 0 to the size of the actual file
if tarinfo.islnk():
Expand Down
10 changes: 8 additions & 2 deletions zstash/ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,11 @@ def ls_database(args: argparse.Namespace, cache: str) -> List[FilesRow]:
hpss = config.hpss
else:
raise TypeError("Invalid config.hpss={}".format(config.hpss))
# Retrieve from HPSS
hpss_get(hpss, get_db_filename(cache), cache)
try:
# Retrieve from HPSS
hpss_get(hpss, get_db_filename(cache), cache)
except RuntimeError:
raise FileNotFoundError("There was nothing to ls.")
else:
error_str: str = (
"--hpss argument is required when local copy of database is unavailable"
Expand Down Expand Up @@ -139,6 +142,9 @@ def ls_database(args: argparse.Namespace, cache: str) -> List[FilesRow]:
)
matches_ = matches_ + cur.fetchall()

if matches_ == []:
raise FileNotFoundError("There was nothing to ls.")

# Remove duplicates
matches_ = list(set(matches_))
matches: List[FilesRow] = list(map(FilesRow, matches_))
Expand Down
26 changes: 23 additions & 3 deletions zstash/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ def setup_update() -> Tuple[argparse.Namespace, str]:
optional.add_argument(
"-v", "--verbose", action="store_true", help="increase output verbosity"
)
optional.add_argument(
"--follow-symlinks",
action="store_true",
help="Hard copy symlinks. This is useful for preventing broken links. Note that a broken link will result in a failed update.",
)
args: argparse.Namespace = parser.parse_args(sys.argv[2:])
if args.hpss and args.hpss.lower() == "none":
args.hpss = "none"
Expand All @@ -112,7 +117,10 @@ def setup_update() -> Tuple[argparse.Namespace, str]:
return args, cache


def update_database(args: argparse.Namespace, cache: str) -> Optional[List[str]]:
# C901 'update_database' is too complex (20)
def update_database( # noqa: C901
args: argparse.Namespace, cache: str
) -> Optional[List[str]]:
# Open database
logger.debug("Opening index database")
if not os.path.exists(get_db_filename(cache)):
Expand Down Expand Up @@ -226,8 +234,20 @@ def update_database(args: argparse.Namespace, cache: str) -> Optional[List[str]]
tfile_string: str = tfile[0]
itar = max(itar, int(tfile_string[0:6], 16))

# Add files
failures: List[str] = add_files(cur, con, itar, newfiles, cache, keep)
failures: List[str]
if args.follow_symlinks:
try:
# Add files
failures = add_files(
cur, con, itar, newfiles, cache, keep, args.follow_symlinks
)
except FileNotFoundError:
raise Exception("Archive update failed due to broken symlink.")
else:
# Add files
failures = add_files(
cur, con, itar, newfiles, cache, keep, args.follow_symlinks
)

# Close database
con.commit()
Expand Down
6 changes: 3 additions & 3 deletions zstash/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def run_command(command: str, error_str: str):
if status != 0:
error_str = "Error={}, Command was `{}`".format(error_str, command)
if "hsi" in command:
error_str = "{}. This command includes `hsi`. Be sure that you have logged into `hsi`.".format(
error_str
)
error_str = f"{error_str}. This command includes `hsi`. Be sure that you have logged into `hsi`"
if "cd" in command:
error_str = f"{error_str}. This command includes `cd`. Check that this directory exists and contains the needed files"
logger.error(error_str)
logger.debug("stdout:\n{!r}".format(stdout))
logger.debug("stderr:\n{!r}".format(stderr))
Expand Down