Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fetch_data.py (also PBP geometry fix) #303

Merged
merged 11 commits into from
Jul 23, 2024
147 changes: 147 additions & 0 deletions ImageD11/fetch_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# coding: utf-8

import os

import six
from six.moves import urllib # python 2/3 compatible

import ImageD11.sinograms.dataset as id11dset

if six.PY2: # python 2/3 compatibility
FileNotFoundError = IOError

"""
A list of functions used to import test datasets from Zenodo for CI/Jupyter notebooks.
Inspired by orix/data/__init__.py from https://github.com/pyxem/orix
"""

# what Zenodo folder can these be found in?
dataset_base_urls = {
'Si_cube_S3DXRD_nt_moves_dty': "https://sandbox.zenodo.org/records/90518/files/",
}

# What are the names of the files in the Zenodo folder?
dataset_filenames = {
'Si_cube_S3DXRD_nt_moves_dty': {
'sparsefile': 'Si_cube_S3DXRD_nt_moves_dty_sparse.h5',
'parfile': 'Si_refined.par',
'e2dxfile': 'e2dx_E-08-0144_20240205.edf',
'e2dyfile': 'e2dy_E-08-0144_20240205.edf'
}
}

# Also what's the sample and what's the dataset name?
dataset_metadata = {
'Si_cube_S3DXRD_nt_moves_dty': {
'sample': 'Si_cube',
'dataset': 'S3DXRD_nt_moves_dty',
}
}


def download_url(url, dest_file_path):
"""Download a URl using urllib"""
print('Downloading ' + url + ' to ' + dest_file_path)
urllib.request.urlretrieve(url, dest_file_path)
if not os.path.exists(dest_file_path):
raise FileNotFoundError('download failed!')


def _get_dataset(test_dataset_name, dest_folder, allow_download):
"""Get a dataset from disk or Zenodo given the dataset name and a destination folder"""
if test_dataset_name not in dataset_metadata.keys():
raise ValueError('Invalid dataset name supplied!')
if test_dataset_name not in dataset_filenames.keys():
raise ValueError('Invalid dataset name supplied!')
if test_dataset_name not in dataset_base_urls.keys():
raise ValueError('Invalid dataset name supplied!')

absolute_path = os.path.abspath(dest_folder)

if not os.path.exists(absolute_path):
raise FileNotFoundError('dest_folder path does not exist!')
dest_folder = absolute_path
# Destination folder exists, so we continue
# See if there's a dataset in the folder first

# Try to find the dataset locally
# Do we have a dataset H5 already?
sample = dataset_metadata[test_dataset_name]['sample']
dsname = dataset_metadata[test_dataset_name]['dataset']

raw_data_root_dir = os.path.join(dest_folder, 'raw')
processed_data_root_dir = os.path.join(dest_folder, 'processed')

# at this point, we have enough information to make a dataset object
# that way, we can use the paths generated by the dataset object
# and we don't need to redeclare the path structure for our processed data
ds = id11dset.DataSet(dataroot=raw_data_root_dir,
analysisroot=processed_data_root_dir,
sample=sample,
dset=dsname)

# if the dsfile path exists, the init method for ds should have populated everything, so return:

if os.path.exists(ds.dsfile):
# ds.dsfile exists!
# replace ds with one imported directly from file
# so we get the extra attributes
ds = id11dset.load(ds.dsfile)

print('Already found a dataset downloaded! Returning that')
return ds

if os.path.exists(ds.sparsefile):
print('Found a sparse file downloaded! Making a dataset to return...')
# we have a sparse h5 path
ds.import_from_sparse(ds.sparsefile)

# do we also have spatials and a parfile?

for filetype, filename in dataset_filenames[test_dataset_name].items():
# is it a file that could be used as an attribute?
if filetype in id11dset.DataSet.ATTRNAMES:
if filetype != "sparsefile":
# spatial or par
# should be in processed_data_root_dir
filepath = os.path.join(processed_data_root_dir, filename)
if os.path.exists(filepath):
setattr(ds, filetype, filepath)

ds.save()
return ds

if allow_download:
print('Downloading files!')
# We don't have local files
# We're allowed to download them
# make a raw and processed folder

os.mkdir(raw_data_root_dir)
os.makedirs(ds.analysispath)

for filetype, filename in dataset_filenames[test_dataset_name].items():
file_url = dataset_base_urls[test_dataset_name] + filename
# is it a file that could be used as an attribute?
if filetype in id11dset.DataSet.ATTRNAMES:
if hasattr(ds, filetype):
# the dataset has a path for this filetype already
filepath = getattr(ds, filetype)
download_url(file_url, filepath)
else:
# probably a spatial or a parfile
# chuck it in processed_data_root_dir
# set the attribute
filepath = os.path.join(processed_data_root_dir, filename)
download_url(file_url, filepath)
setattr(ds, filetype, filepath)

ds.import_from_sparse(ds.sparsefile)
ds.save()
return ds
else:
print("Couldn't find the files on the disk and allow_download is False!")


def si_cube_s3dxrd_dataset(dest_folder, allow_download=False):
return _get_dataset('Si_cube_S3DXRD_nt_moves_dty', dest_folder=dest_folder, allow_download=allow_download)
14 changes: 7 additions & 7 deletions ImageD11/grain.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
## Automatically adapted for numpy.oldnumeric Sep 06, 2007 by alter_code1.py


from __future__ import print_function

import h5py
# ImageD11_v0.4 Software for beamline ID11
# Copyright (C) 2005 Jon Wright
#
Expand All @@ -21,10 +16,15 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import numpy as np, math
import ImageD11.indexing, ImageD11.unitcell, ImageD11.finite_strain
from __future__ import print_function

import h5py
import numpy as np
import xfab.tools

import ImageD11.finite_strain
import ImageD11.unitcell


# helpers : put these into xfab.tools at some point?
def e6_to_symm(e):
Expand Down
5 changes: 3 additions & 2 deletions ImageD11/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@


import numpy as np
from . import cImageD11, unitcell, grain
from . import cImageD11, unitcell
from ImageD11.grain import grain
from xfab.tools import ubi_to_u, u_to_rod, ubi_to_rod

import math, time, sys
Expand Down Expand Up @@ -1388,7 +1389,7 @@ def do_index(
finally:
cImageD11.cimaged11_omp_set_num_threads(threadb4)

grains = [grain.grain(ubi) for ubi in indexer.ubis]
grains = [grain(ubi) for ubi in indexer.ubis]
print("Found {} grains".format(len(grains)))

return grains, indexer
29 changes: 18 additions & 11 deletions ImageD11/nbGui/nb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,18 +283,18 @@ def plot_index_results(ind, colfile, title):

# set a mask of all non-assigned g-vectors

m = ind.ga == -1
# m = colfile.grain_id == -1
# m = ind.ga == -1
m = colfile.grain_id == -1

# plot the assigned g-vectors omega vs dty (sinograms)

axs_flat[1].scatter(colfile.omega[~m],
colfile.dty[~m],
c=ind.ga[~m],
c=colfile.grain_id[~m],
s=2,
cmap='tab20')

axs_flat[1].set(title='Sinograms of {} grains'.format(ind.ga.max() + 1),
axs_flat[1].set(title='Sinograms of {} grains'.format(colfile.grain_id.max() + 1),
xlabel='Omega/deg',
ylabel='dty/um')

Expand Down Expand Up @@ -358,13 +358,20 @@ def plot_grain_sinograms(grains, cf, n_grains_to_plot=None):
nrows = (len(grains[::grains_step]) + grid_size - 1) // grid_size

fig, axs = plt.subplots(grid_size, nrows, figsize=(10, 10), layout="constrained", sharex=True, sharey=True)
for i, ax in enumerate(axs.ravel()):
if i < len(grains[::grains_step]):
# get corresponding grain for this axis
g = grains[::grains_step][i]
m = cf.grain_id == g.gid
ax.scatter(cf.omega[m], cf.dty[m], c=cf.sum_intensity[m], s=2)
ax.set_title(g.gid)
if grid_size == 1 & nrows == 1:
# only 1 grain
g = grains[0]
m = cf.grain_id == g.gid
axs.scatter(cf.omega[m], cf.dty[m], c=cf.sum_intensity[m], s=2)
axs.set_title(g.gid)
else:
for i, ax in enumerate(axs.ravel()):
if i < len(grains[::grains_step]):
# get corresponding grain for this axis
g = grains[::grains_step][i]
m = cf.grain_id == g.gid
ax.scatter(cf.omega[m], cf.dty[m], c=cf.sum_intensity[m], s=2)
ax.set_title(g.gid)

fig.supxlabel("Omega")
fig.supylabel("Y translation (um)")
Expand Down
54 changes: 31 additions & 23 deletions ImageD11/sinograms/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,16 @@ class DataSet:
NDNAMES = ("omega", "dty", "nnz", "frames_per_file", "nlm", "frames_per_scan")

def __init__(
self,
dataroot=".",
analysisroot=".",
sample="sample",
dset="dataset",
detector="eiger",
omegamotor="rot_center",
dtymotor="dty",
filename=None,
self,
dataroot=".",
analysisroot=".",
sample="sample",
dset="dataset",
detector="eiger",
omegamotor="rot_center",
dtymotor="dty",
filename=None,
analysispath=None
):
"""The things we need to know to process data"""

Expand Down Expand Up @@ -129,12 +130,17 @@ def __init__(
if filename is not None:
self.load(filename)
# paths for processed data

self.analysispath = analysispath

self.update_paths()

def update_paths(self):
def update_paths(self, force=False):
# paths for processed data
# root of analysis for this dataset for this sample:
self.analysispath = os.path.join(self.analysisroot, self.sample, self.dsname)
self.analysispath_default = os.path.join(self.analysisroot, self.sample, self.dsname)
if self.analysispath is None:
self.analysispath = self.analysispath_default

self.dsfile_default = os.path.join(
self.analysispath, self.dsname + "_dataset.h5"
Expand All @@ -156,7 +162,7 @@ def update_paths(self):
("pbpfile", "_pbp.txt"),
]:
# If the user has got a different name (via loading or edit), we keep that
if getattr(self, name, None) is None:
if (getattr(self, name, None) is None) or force:
# Otherwise, these are the defaults.
setattr(self, name, os.path.join(self.analysispath, self.dsname + extn))

Expand Down Expand Up @@ -259,9 +265,10 @@ def import_scans(self, scans=None, hname=None):
scan
for scan in list(hin["/"])
if (
scan.endswith(".1")
and ("measurement" in hin[scan])
and (self.detector in hin[scan]["measurement"])
scan.endswith(".1")
and ("measurement" in hin[scan])
and (self.detector in hin[scan]["measurement"])
and (self.omegamotor in hin[scan["measurement"]])
)
]
goodscans = []
Expand Down Expand Up @@ -298,7 +305,7 @@ def import_imagefiles(self):
bad = []
for i, scan in enumerate(self.scans):
if ("measurement" not in hin[scan]) or (
self.detector not in hin[scan]["measurement"]
self.detector not in hin[scan]["measurement"]
):
print("Bad scan", scan)
bad.append(scan)
Expand All @@ -316,7 +323,8 @@ def import_imagefiles(self):
assert self.limapath == vsrc.dset_name
self.frames_per_file = np.array(self.frames_per_file, int)
self.sparsefiles = [
name.replace("/", "_").replace(".h5", "_sparse.h5")
os.path.join('sparsefiles',
name.replace("/", "_").replace(".h5", "_sparse.h5"))
for name in self.imagefiles
]
logging.info("imported %d lima filenames" % (np.sum(self.frames_per_file)))
Expand All @@ -329,11 +337,11 @@ def import_motors_from_master(self):
"""
# self.guess_motornames()
self.omega = [
None,
] * len(self.scans)
None,
] * len(self.scans)
self.dty = [
None,
] * len(self.scans)
None,
] * len(self.scans)
with h5py.File(self.masterfile, "r") as hin:
bad = []
for i, scan in enumerate(self.scans):
Expand Down Expand Up @@ -774,7 +782,7 @@ def load(self, h5name=None, h5group="/"):
if name in grp:
stringlist = list(grp[name][()])
if hasattr(stringlist[0], "decode") or isinstance(
stringlist[0], np.ndarray
stringlist[0], np.ndarray
):
data = [s.decode() for s in stringlist]
else:
Expand All @@ -791,7 +799,7 @@ def load(self, h5name=None, h5group="/"):


def load(h5name, h5group='/'):
ds_obj = DataSet(filename = h5name)
ds_obj = DataSet(filename=h5name)
return ds_obj


Expand Down
5 changes: 4 additions & 1 deletion ImageD11/sinograms/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,10 @@ def dtyimask_from_sincos(si, sj, sinomega, cosomega, dtyi, y0, ystep):
# --> si = x / ystep
# --> sj = -y / ystep
# --> dtyi = np.round(dty_step).astype(int)
dtyi_calc = np.round(dtycalc_sincos(sinomega, cosomega, si, -sj, y0)) # ystep units

x, y = step_to_sample(si, sj, ystep) # flips sj sign (y = -sj * ystep)
dty = dtycalc_sincos(sinomega, cosomega, x, y, y0) # computes dty
dtyi_calc = dty_to_dtyi(dty, ystep) # flips sj sign again (sj = -y / ystep)
return dtyi == dtyi_calc


Expand Down
5 changes: 5 additions & 0 deletions ImageD11/sinograms/lima_segmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,11 @@ def setup_slurm_array(dsname, dsgroup="/", pythonpath=None):
sdir = os.path.join(dso.analysispath, "slurm")
if not os.path.exists(sdir):
os.makedirs(sdir)

sparsefilesdir = os.path.split(dstlima[0])[0]
if not os.path.exists(sparsefilesdir):
os.makedirs(sparsefilesdir)

options = SegmenterOptions()
options.load(dsname, dsgroup + "/lima_segmenter")

Expand Down
1 change: 1 addition & 0 deletions test/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"eps_sig.test_eps",
"test_finite_strain",
"test_stress"
"test_fetch_data"
]

if "all" in sys.argv:
Expand Down
Loading
Loading