Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade script #452

Merged
merged 22 commits into from
Apr 14, 2020
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5d356c0
Remove header property setters
achilleas-k Mar 25, 2020
7aaac7e
Cleaner file and header creation logic
achilleas-k Mar 25, 2020
c3b02b2
Set file ID on creation
achilleas-k Mar 25, 2020
7ea2a6d
Set file format version 1.2.0
achilleas-k Mar 25, 2020
d85eae4
Check for ID in header when opening existing files
achilleas-k Mar 25, 2020
6bc5596
Revert "Delete unused debugging function in validator test"
achilleas-k Mar 25, 2020
5d62d49
Update tests for new file header and methods
achilleas-k Mar 25, 2020
572789d
[validator] Check for file ID when appropriate
achilleas-k Mar 25, 2020
6e89d9a
File tests for missing ID and validation
achilleas-k Mar 25, 2020
b574853
README for the scripts directory
achilleas-k Mar 24, 2020
8581457
Remove scripts/ from release archives
achilleas-k Mar 24, 2020
c79de80
New script: nixio-upgrade
achilleas-k Mar 24, 2020
00cd38e
[property] Simplify version check for old style props
achilleas-k Mar 24, 2020
56c23a8
[property] Simplify old style value reading
achilleas-k Mar 24, 2020
83b7ff2
File metadata H5Group attribute should be private
achilleas-k Mar 24, 2020
30e9a01
Upgrade script: Add file ID and update file version
achilleas-k Mar 25, 2020
05133f3
Upgrade script: Update properties to new format
achilleas-k Mar 25, 2020
ecc5121
Use argparse in validate script
achilleas-k Mar 26, 2020
8523ed1
Upgrade script: Small docstring fix
achilleas-k Mar 27, 2020
a686f67
Upgrade script: Recheck property before fixing
achilleas-k Mar 27, 2020
5bba39f
Upgrade script: Conditionally add and run tasks
achilleas-k Mar 27, 2020
781c25a
Upgrade script: Copy over Property definition and unit
achilleas-k Mar 30, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
include LICENSE README.rst
recursive-include docs *
recursive-include scripts *
include nixio/info.json
3 changes: 0 additions & 3 deletions nixio/cmd/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
from .validate import main as validatemain

__all__ = ["validatemain"]
219 changes: 219 additions & 0 deletions nixio/cmd/upgrade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
import argparse
import nixio as nix
import h5py


def get_file_version(fname):
with h5py.File(fname, mode="r") as hfile:
return tuple(hfile.attrs["version"])


def has_valid_file_id(fname):
with h5py.File(fname, mode="r") as hfile:
fileid = hfile.attrs.get("id")
if fileid and nix.util.is_uuid(fileid):
return True
return False


def add_file_id(fname):
"""
Returns a closure that binds the filename if a file ID is required. When
the return value is called, it adds a UUID to the file header.
"""
if has_valid_file_id(fname):
return None

def add_id():
"Add a UUID to the file header"
with h5py.File(fname, mode="a") as hfile:
if has_valid_file_id(fname):
return
print("Adding file id")
hfile.attrs["id"] = nix.util.create_id()
return add_id


def update_property_values(fname):
"""
Returns a closure that binds the filename if at least one Property update
is required. When the return value is called, it rewrites all the metadata
Property objects to the new format.
"""
props = list()

with h5py.File(fname, mode="r") as hfile:
sections = hfile["metadata"]

def find_props(name, group):
if isinstance(group, h5py.Dataset) and len(group.dtype):
# structured/compound dtypes have non-zero length
props.append(group.name)

sections.visititems(find_props)

if not props:
return None

def update_props():
for propname in props:
with h5py.File(fname, mode="a") as hfile:
prop = hfile[propname]
if not (isinstance(prop, h5py.Dataset) and len(prop.dtype)):
# File was possibly changed since the tasks were
# collected. File may have been submitted twice or
# multiple instances of the script could be running.
# skip this prop
continue

print(f"Fixing {propname}")
# pull out the old extra attributes
uncertainty = prop["uncertainty"]
reference = prop["reference"]
filename = prop["filename"]
encoder = prop["encoder"]
checksum = prop["checksum"]

# replace base prop
values = prop["value"]
dt = values.dtype
del hfile[propname]
newprop = create_property(hfile, propname,
dtype=dt, data=values)

# Create properties for any extra attrs that are set
if len(set(uncertainty)) > 1:
# multiple values, make new prop
create_property(hfile, propname + ".uncertainty",
dtype=float, data=uncertainty)
elif any(uncertainty):
# single, unique, non-zero value; add to main prop attr
newprop.attrs["uncertainty"] = uncertainty[0]

if any(reference):
create_property(hfile, propname + ".reference",
dtype=nix.util.vlen_str_dtype,
data=reference)

if any(filename):
create_property(hfile, propname + ".filename",
dtype=nix.util.vlen_str_dtype,
data=filename)

if any(encoder):
create_property(hfile, propname + ".encoder",
dtype=nix.util.vlen_str_dtype,
data=encoder)

if any(checksum):
create_property(hfile, propname + ".checksum",
dtype=nix.util.vlen_str_dtype,
data=checksum)

update_props.__doc__ = "Update {} properties".format(len(props))
return update_props


def create_property(hfile, name, dtype, data):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am afraid the unit got lost during upgrading

Copy link
Member Author

@achilleas-k achilleas-k Mar 30, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh dear. Good catch!
Same for definition.

prop = hfile.create_dataset(name, dtype=dtype, data=data)
prop.attrs["name"] = name.split("/")[-1]
prop.attrs["entity_id"] = nix.util.create_id()
prop.attrs["created_at"] = nix.util.time_to_str(nix.util.now_int())
prop.attrs["updated_at"] = nix.util.time_to_str(nix.util.now_int())
return prop


def update_format_version(fname):
"""
Returns a closure that binds the filename. When the return value is
called, it updates the version in the header to the version in the library.
"""
def update_ver():
with h5py.File(fname, mode="a") as hfile:
hfile.attrs["version"] = nix.file.HDF_FF_VERSION
lib_verstr = ".".join(str(v) for v in nix.file.HDF_FF_VERSION)
update_ver.__doc__ = f"Update the file format version to {lib_verstr}"
return update_ver


def collect_tasks(fname):
file_ver = get_file_version(fname)
file_verstr = ".".join(str(v) for v in file_ver)
lib_verstr = ".".join(str(v) for v in nix.file.HDF_FF_VERSION)
if file_ver >= nix.file.HDF_FF_VERSION:
print(f"{fname}: Up to date ({file_verstr})")
return

# even if the version string indicates the file is old, check format
# details before scheduling tasks
tasks = list()
id_task = add_file_id(fname)
if id_task:
tasks.append(id_task)

props_task = update_property_values(fname)
if props_task:
tasks.append(props_task)

# always update the format last
tasks.append(update_format_version(fname))

# print task list
print(f"{fname}: {file_verstr} -> {lib_verstr}")
print(" - " + "\n - ".join(t.__doc__ for t in tasks) + "\n")

return tasks


def main():
parser = argparse.ArgumentParser(
description="Upgrade NIX files to newest version"
)
parser.add_argument("-f", "--force", action="store_true",
help="overwrite existing files without prompting")
parser.add_argument("file", type=str, nargs="+",
help="path to file to upgrade (at least one)")
args = parser.parse_args()
filenames = args.file

tasks = dict()
for fname in filenames:
tasklist = collect_tasks(fname)
if not tasklist:
continue

tasks[fname] = tasklist

if not tasks:
return

force = args.force
if not force:
print("""
PLEASE READ CAREFULLY

If you choose to continue, the changes listed above will be applied to the
respective files. This will make the files unreadable by older NIX library
versions. Although this procedure is generally fast and safe, interrupting it
may leave files in a corrupted state.

MAKE SURE YOUR FILES AND DATA ARE BACKED UP BEFORE CONTINUING.
""")
conf = None

while conf not in ("y", "n", "yes", "no"):
conf = input("Continue with changes? [yes/no] ")
conf = conf.lower()
else:
conf = "yes"

if conf in ("y", "yes"):
for fname, tasklist in tasks.items():
print(f"Processing {fname} ", end="", flush=True)
for task in tasklist:
task()
print("done")


if __name__ == "__main__":
main()
26 changes: 10 additions & 16 deletions nixio/cmd/validate.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,8 @@
import sys
import os
import argparse
import nixio as nix


def usage():
print("Usage")
print(" nix-validate <nixfile>...")
print()
print("Args")
print(" <nixfile>... One or more NIX files")


def format_obj(obj):
return "{} '{}' (ID: {})".format(type(obj).__name__, obj.name, obj.id)

Expand Down Expand Up @@ -46,13 +38,15 @@ def validate(filename):


def main():
args = sys.argv
if len(args) < 2:
usage()
sys.exit(1)

nixfnames = args[1:]
for nixfn in nixfnames:
parser = argparse.ArgumentParser(
description="Validate NIX files"
)
parser.add_argument("file", type=str, nargs="+",
help="path to file to validate (at least one)")
args = parser.parse_args()
filenames = args.file

for nixfn in filenames:
if os.path.exists(nixfn):
validate(nixfn)
else:
Expand Down
Loading