FABLE-3DXRD · jadball · Jul 23, 2024 · Jul 22, 2024 · Jul 23, 2024 · Jul 23, 2024
diff --git a/ImageD11/fetch_data.py b/ImageD11/fetch_data.py
@@ -0,0 +1,147 @@
+# coding: utf-8
+
+import os
+
+import six
+from six.moves import urllib  # python 2/3 compatible
+
+import ImageD11.sinograms.dataset as id11dset
+
+if six.PY2:  # python 2/3 compatibility
+    FileNotFoundError = IOError
+
+"""
+A list of functions used to import test datasets from Zenodo for CI/Jupyter notebooks.
+Inspired by orix/data/__init__.py from https://github.com/pyxem/orix
+"""
+
+# what Zenodo folder can these be found in?
+dataset_base_urls = {
+    'Si_cube_S3DXRD_nt_moves_dty': "https://sandbox.zenodo.org/records/90518/files/",
+}
+
+# What are the names of the files in the Zenodo folder?
+dataset_filenames = {
+    'Si_cube_S3DXRD_nt_moves_dty': {
+        'sparsefile': 'Si_cube_S3DXRD_nt_moves_dty_sparse.h5',
+        'parfile': 'Si_refined.par',
+        'e2dxfile': 'e2dx_E-08-0144_20240205.edf',
+        'e2dyfile': 'e2dy_E-08-0144_20240205.edf'
+    }
+}
+
+# Also what's the sample and what's the dataset name?
+dataset_metadata = {
+    'Si_cube_S3DXRD_nt_moves_dty': {
+        'sample': 'Si_cube',
+        'dataset': 'S3DXRD_nt_moves_dty',
+    }
+}
+
+
+def download_url(url, dest_file_path):
+    """Download a URl using urllib"""
+    print('Downloading ' + url + ' to ' + dest_file_path)
+    urllib.request.urlretrieve(url, dest_file_path)
+    if not os.path.exists(dest_file_path):
+        raise FileNotFoundError('download failed!')
+
+
+def _get_dataset(test_dataset_name, dest_folder, allow_download):
+    """Get a dataset from disk or Zenodo given the dataset name and a destination folder"""
+    if test_dataset_name not in dataset_metadata.keys():
+        raise ValueError('Invalid dataset name supplied!')
+    if test_dataset_name not in dataset_filenames.keys():
+        raise ValueError('Invalid dataset name supplied!')
+    if test_dataset_name not in dataset_base_urls.keys():
+        raise ValueError('Invalid dataset name supplied!')
+
+    absolute_path = os.path.abspath(dest_folder)
+
+    if not os.path.exists(absolute_path):
+        raise FileNotFoundError('dest_folder path does not exist!')
+    dest_folder = absolute_path
+    # Destination folder exists, so we continue
+    # See if there's a dataset in the folder first
+
+    # Try to find the dataset locally
+    # Do we have a dataset H5 already?
+    sample = dataset_metadata[test_dataset_name]['sample']
+    dsname = dataset_metadata[test_dataset_name]['dataset']
+
+    raw_data_root_dir = os.path.join(dest_folder, 'raw')
+    processed_data_root_dir = os.path.join(dest_folder, 'processed')
+
+    # at this point, we have enough information to make a dataset object
+    # that way, we can use the paths generated by the dataset object
+    # and we don't need to redeclare the path structure for our processed data
+    ds = id11dset.DataSet(dataroot=raw_data_root_dir,
+                          analysisroot=processed_data_root_dir,
+                          sample=sample,
+                          dset=dsname)
+
+    # if the dsfile path exists, the init method for ds should have populated everything, so return:
+
+    if os.path.exists(ds.dsfile):
+        # ds.dsfile exists!
+        # replace ds with one imported directly from file
+        # so we get the extra attributes
+        ds = id11dset.load(ds.dsfile)
+
+        print('Already found a dataset downloaded! Returning that')
+        return ds
+
+    if os.path.exists(ds.sparsefile):
+        print('Found a sparse file downloaded! Making a dataset to return...')
+        # we have a sparse h5 path
+        ds.import_from_sparse(ds.sparsefile)
+
+        # do we also have spatials and a parfile?
+
+        for filetype, filename in dataset_filenames[test_dataset_name].items():
+            # is it a file that could be used as an attribute?
+            if filetype in id11dset.DataSet.ATTRNAMES:
+                if filetype != "sparsefile":
+                    # spatial or par
+                    # should be in processed_data_root_dir
+                    filepath = os.path.join(processed_data_root_dir, filename)
+                    if os.path.exists(filepath):
+                        setattr(ds, filetype, filepath)
+
+        ds.save()
+        return ds
+
+    if allow_download:
+        print('Downloading files!')
+        # We don't have local files
+        # We're allowed to download them
+        # make a raw and processed folder
+
+        os.mkdir(raw_data_root_dir)
+        os.makedirs(ds.analysispath)
+
+        for filetype, filename in dataset_filenames[test_dataset_name].items():
+            file_url = dataset_base_urls[test_dataset_name] + filename
+            # is it a file that could be used as an attribute?
+            if filetype in id11dset.DataSet.ATTRNAMES:
+                if hasattr(ds, filetype):
+                    # the dataset has a path for this filetype already
+                    filepath = getattr(ds, filetype)
+                    download_url(file_url, filepath)
+                else:
+                    # probably a spatial or a parfile
+                    # chuck it in processed_data_root_dir
+                    # set the attribute
+                    filepath = os.path.join(processed_data_root_dir, filename)
+                    download_url(file_url, filepath)
+                    setattr(ds, filetype, filepath)
+
+        ds.import_from_sparse(ds.sparsefile)
+        ds.save()
+        return ds
+    else:
+        print("Couldn't find the files on the disk and allow_download is False!")
+
+
+def si_cube_s3dxrd_dataset(dest_folder, allow_download=False):
+    return _get_dataset('Si_cube_S3DXRD_nt_moves_dty', dest_folder=dest_folder, allow_download=allow_download)
diff --git a/ImageD11/grain.py b/ImageD11/grain.py
@@ -1,9 +1,4 @@
 ## Automatically adapted for numpy.oldnumeric Sep 06, 2007 by alter_code1.py
-
-
-from __future__ import print_function
-
-import h5py
 # ImageD11_v0.4 Software for beamline ID11
 # Copyright (C) 2005  Jon Wright
 #
@@ -21,10 +16,15 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-import numpy as np, math
-import ImageD11.indexing, ImageD11.unitcell, ImageD11.finite_strain
+from __future__ import print_function
+
+import h5py
+import numpy as np
 import xfab.tools
 
+import ImageD11.finite_strain
+import ImageD11.unitcell
+
 
 # helpers : put these into xfab.tools at some point?
 def e6_to_symm(e):

diff --git a/ImageD11/indexing.py b/ImageD11/indexing.py
@@ -20,7 +20,8 @@
 
 
 import numpy as np
-from . import cImageD11, unitcell, grain
+from . import cImageD11, unitcell
+from ImageD11.grain import grain
 from xfab.tools import ubi_to_u, u_to_rod, ubi_to_rod
 
 import math, time, sys
@@ -1388,7 +1389,7 @@ def do_index(
     finally:
         cImageD11.cimaged11_omp_set_num_threads(threadb4)
 
-    grains = [grain.grain(ubi) for ubi in indexer.ubis]
+    grains = [grain(ubi) for ubi in indexer.ubis]
     print("Found {} grains".format(len(grains)))
 
     return grains, indexer
diff --git a/ImageD11/nbGui/nb_utils.py b/ImageD11/nbGui/nb_utils.py
@@ -283,18 +283,18 @@ def plot_index_results(ind, colfile, title):
 
     # set a mask of all non-assigned g-vectors
 
-    m = ind.ga == -1
-    # m = colfile.grain_id == -1
+    # m = ind.ga == -1
+    m = colfile.grain_id == -1
 
     # plot the assigned g-vectors omega vs dty (sinograms)
 
     axs_flat[1].scatter(colfile.omega[~m],
                         colfile.dty[~m],
-                        c=ind.ga[~m],
+                        c=colfile.grain_id[~m],
                         s=2,
                         cmap='tab20')
 
-    axs_flat[1].set(title='Sinograms of {} grains'.format(ind.ga.max() + 1),
+    axs_flat[1].set(title='Sinograms of {} grains'.format(colfile.grain_id.max() + 1),
                     xlabel='Omega/deg',
                     ylabel='dty/um')
 
@@ -358,13 +358,20 @@ def plot_grain_sinograms(grains, cf, n_grains_to_plot=None):
     nrows = (len(grains[::grains_step]) + grid_size - 1) // grid_size
 
     fig, axs = plt.subplots(grid_size, nrows, figsize=(10, 10), layout="constrained", sharex=True, sharey=True)
-    for i, ax in enumerate(axs.ravel()):
-        if i < len(grains[::grains_step]):
-            # get corresponding grain for this axis
-            g = grains[::grains_step][i]
-            m = cf.grain_id == g.gid
-            ax.scatter(cf.omega[m], cf.dty[m], c=cf.sum_intensity[m], s=2)
-            ax.set_title(g.gid)
+    if grid_size == 1 & nrows == 1:
+        # only 1 grain
+        g = grains[0]
+        m = cf.grain_id == g.gid
+        axs.scatter(cf.omega[m], cf.dty[m], c=cf.sum_intensity[m], s=2)
+        axs.set_title(g.gid)
+    else:
+        for i, ax in enumerate(axs.ravel()):
+            if i < len(grains[::grains_step]):
+                # get corresponding grain for this axis
+                g = grains[::grains_step][i]
+                m = cf.grain_id == g.gid
+                ax.scatter(cf.omega[m], cf.dty[m], c=cf.sum_intensity[m], s=2)
+                ax.set_title(g.gid)
 
     fig.supxlabel("Omega")
     fig.supylabel("Y translation (um)")

diff --git a/ImageD11/sinograms/dataset.py b/ImageD11/sinograms/dataset.py
@@ -77,15 +77,16 @@ class DataSet:
     NDNAMES = ("omega", "dty", "nnz", "frames_per_file", "nlm", "frames_per_scan")
 
     def __init__(
-        self,
-        dataroot=".",
-        analysisroot=".",
-        sample="sample",
-        dset="dataset",
-        detector="eiger",
-        omegamotor="rot_center",
-        dtymotor="dty",
-        filename=None,
+            self,
+            dataroot=".",
+            analysisroot=".",
+            sample="sample",
+            dset="dataset",
+            detector="eiger",
+            omegamotor="rot_center",
+            dtymotor="dty",
+            filename=None,
+            analysispath=None
     ):
         """The things we need to know to process data"""
 
@@ -129,12 +130,17 @@ def __init__(
         if filename is not None:
             self.load(filename)
         # paths for processed data
+
+        self.analysispath = analysispath
+
         self.update_paths()
 
-    def update_paths(self):
+    def update_paths(self, force=False):
         # paths for processed data
         # root of analysis for this dataset for this sample:
-        self.analysispath = os.path.join(self.analysisroot, self.sample, self.dsname)
+        self.analysispath_default = os.path.join(self.analysisroot, self.sample, self.dsname)
+        if self.analysispath is None:
+            self.analysispath = self.analysispath_default
 
         self.dsfile_default = os.path.join(
             self.analysispath, self.dsname + "_dataset.h5"
@@ -156,7 +162,7 @@ def update_paths(self):
             ("pbpfile", "_pbp.txt"),
         ]:
             # If the user has got a different name (via loading or edit), we keep that
-            if getattr(self, name, None) is None:
+            if (getattr(self, name, None) is None) or force:
                 # Otherwise, these are the defaults.
                 setattr(self, name, os.path.join(self.analysispath, self.dsname + extn))
 
@@ -259,9 +265,10 @@ def import_scans(self, scans=None, hname=None):
                     scan
                     for scan in list(hin["/"])
                     if (
-                        scan.endswith(".1")
-                        and ("measurement" in hin[scan])
-                        and (self.detector in hin[scan]["measurement"])
+                            scan.endswith(".1")
+                            and ("measurement" in hin[scan])
+                            and (self.detector in hin[scan]["measurement"])
+                            and (self.omegamotor in hin[scan["measurement"]])
                     )
                 ]
             goodscans = []
@@ -298,7 +305,7 @@ def import_imagefiles(self):
             bad = []
             for i, scan in enumerate(self.scans):
                 if ("measurement" not in hin[scan]) or (
-                    self.detector not in hin[scan]["measurement"]
+                        self.detector not in hin[scan]["measurement"]
                 ):
                     print("Bad scan", scan)
                     bad.append(scan)
@@ -316,7 +323,8 @@ def import_imagefiles(self):
                     assert self.limapath == vsrc.dset_name
         self.frames_per_file = np.array(self.frames_per_file, int)
         self.sparsefiles = [
-            name.replace("/", "_").replace(".h5", "_sparse.h5")
+            os.path.join('sparsefiles',
+                         name.replace("/", "_").replace(".h5", "_sparse.h5"))
             for name in self.imagefiles
         ]
         logging.info("imported %d lima filenames" % (np.sum(self.frames_per_file)))
@@ -329,11 +337,11 @@ def import_motors_from_master(self):
         """
         # self.guess_motornames()
         self.omega = [
-            None,
-        ] * len(self.scans)
+                         None,
+                     ] * len(self.scans)
         self.dty = [
-            None,
-        ] * len(self.scans)
+                       None,
+                   ] * len(self.scans)
         with h5py.File(self.masterfile, "r") as hin:
             bad = []
             for i, scan in enumerate(self.scans):
@@ -774,7 +782,7 @@ def load(self, h5name=None, h5group="/"):
                 if name in grp:
                     stringlist = list(grp[name][()])
                     if hasattr(stringlist[0], "decode") or isinstance(
-                        stringlist[0], np.ndarray
+                            stringlist[0], np.ndarray
                     ):
                         data = [s.decode() for s in stringlist]
                     else:
@@ -791,7 +799,7 @@ def load(self, h5name=None, h5group="/"):
 
 
 def load(h5name, h5group='/'):
-    ds_obj = DataSet(filename = h5name)
+    ds_obj = DataSet(filename=h5name)
     return ds_obj
 
 

diff --git a/ImageD11/sinograms/geometry.py b/ImageD11/sinograms/geometry.py
@@ -239,7 +239,10 @@ def dtyimask_from_sincos(si, sj, sinomega, cosomega, dtyi, y0, ystep):
     #  -->          si = x / ystep
     #  -->           sj = -y / ystep
     #  --> dtyi = np.round(dty_step).astype(int)
-    dtyi_calc = np.round(dtycalc_sincos(sinomega, cosomega, si, -sj, y0))  # ystep units
+
+    x, y = step_to_sample(si, sj, ystep)  # flips sj sign (y = -sj * ystep)
+    dty = dtycalc_sincos(sinomega, cosomega, x, y, y0)  # computes dty
+    dtyi_calc = dty_to_dtyi(dty, ystep)  # flips sj sign again (sj = -y / ystep)
     return dtyi == dtyi_calc
 
 

diff --git a/ImageD11/sinograms/lima_segmenter.py b/ImageD11/sinograms/lima_segmenter.py
@@ -399,6 +399,11 @@ def setup_slurm_array(dsname, dsgroup="/", pythonpath=None):
     sdir = os.path.join(dso.analysispath, "slurm")
     if not os.path.exists(sdir):
         os.makedirs(sdir)
+
+    sparsefilesdir = os.path.split(dstlima[0])[0]
+    if not os.path.exists(sparsefilesdir):
+        os.makedirs(sparsefilesdir)
+
     options = SegmenterOptions()
     options.load(dsname, dsgroup + "/lima_segmenter")
 

diff --git a/test/run_tests.py b/test/run_tests.py
@@ -33,6 +33,7 @@
     "eps_sig.test_eps",
     "test_finite_strain",
     "test_stress"
+    "test_fetch_data"
 ]
 
 if "all" in sys.argv: