Skip to content

Commit

Permalink
Merge pull request #174 from jonwright/master
Browse files Browse the repository at this point in the history
Should work for bigger s3DXRD maps
  • Loading branch information
jonwright authored Sep 12, 2023
2 parents dfaad1d + 94b8327 commit 1957cc0
Show file tree
Hide file tree
Showing 9 changed files with 969 additions and 121 deletions.
168 changes: 115 additions & 53 deletions ImageD11/sinograms/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class DataSet:
)
STRINGLISTS = ( "scans", "imagefiles", "sparsefiles" )
# sinograms
NDNAMES = ( "omega", "dty", "nnz", "frames_per_file", "nlm" )
NDNAMES = ( "omega", "dty", "nnz", "frames_per_file", "nlm", "frames_per_scan" )

def __init__(self,
dataroot = ".",
Expand All @@ -48,27 +48,28 @@ def __init__(self,
""" The things we need to know to process data """

self.detector = 'eiger' # frelon3
self.limapath = None
self.limapath = None # where is the data in the Lima files

self.omegamotor = 'rot_center' # diffrz
self.dtymotor = 'dty' # diffty
self.dtymotor = 'dty' # diffty

self.dataroot = dataroot
self.analysisroot = analysisroot
self.sample = sample
self.dset = dset
self.dataroot = dataroot # folder to find {sample}/{sample}_{dset}
self.analysisroot = analysisroot # where to write or find sparse data
self.sample = sample # from bliss path
self.dset = dset # from bliss path

self.dsname = "_".join( (self.sample, self.dset ) )

self.datapath = os.path.join( self.dataroot, self.sample, self.dsname )
self.analysispath = os.path.join( self.analysisroot, self.sample, self.dsname )
self.masterfile = os.path.join( self.datapath, self.dsname + '.h5' )

# These are in order !
self.scans = None # read from master or load from analysis
# These are in order ! The order of the lists is important - all things should match.
self.scans = None # read from master or load from analysis
self.frames_per_scan = None # how many frames (and motor positions) in each scan row.
self.imagefiles = None # List of strings. w.r.t self.datapath
self.frames_per_file = None # how many frames in this file
self.sparsefiles = None # matches to self.imagefiles
self.frames_per_file = None # how many frames in this file (Lima files)
self.sparsefiles = None # maps sparse files to self.imagefiles

self.shape = (0, 0)
self.omega = None
Expand Down Expand Up @@ -125,28 +126,44 @@ def report(self):
print("# Segmented %d missing %d"%(self.check_sparse()))


def import_all(self):
def import_all(self, scans=None, shape=None):
# collect the data
self.import_scans()
self.import_scans( scans=scans )
# lima frames
self.import_imagefiles()
# motor positions
self.import_motors_from_master()
self.guess_shape()
self.guessbins()
# pixels per frame
try:
self.import_nnz()
except:
logging.info("nnz not available. Segmentation done?")


def import_scans(self, hname = None):
def import_scans(self, scans=None, hname = None):
""" Reads in the scans from the bliss master file """
if hname is None:
hname = self.masterfile
frames_per_scan = []
with h5py.File( hname, 'r' ) as hin:
scans = list( hin['/'] )
self.scans = [scan for scan in scans if scan.endswith('.1')]
self.shape = (len(self.scans), self.shape[1])
if scans is None:
scans = [scan for scan in list(hin['/']) if
(scan.endswith('.1') and
('measurement' in hin[scan]) and
(self.detector in hin[scan]['measurement'])) ]
goodscans = []
for scan in scans:
frames = hin[scan]['measurement'][self.detector]
if len(frames.shape)==3: # need 1D series of frames
goodscans.append(scan)
frames_per_scan.append( frames.shape[0] )
else:
print('Bad scan', scan)
self.scans = goodscans
self.frames_per_scan = frames_per_scan

logging.info( 'imported %d scans from %s'%(len(self.scans),hname))
return self.scans

Expand All @@ -164,63 +181,99 @@ def import_imagefiles(self):
bad.append(scan)
continue
frames = hin[scan]['measurement'][self.detector]
imageshape = frames.shape[1:]
if npts is None:
npts = len(frames)
else:
if len(frames) != npts:
print('warning!, scan is not regular %d %s :: %s'%(npts, self.masterfile, scan))
print('removing scan',scan)
bad.append(scan)
continue
self.imageshape = frames.shape[1:]
for vsrc in frames.virtual_sources():
self.imagefiles.append( vsrc.file_name )
self.frames_per_file.append( vsrc.src_space.shape[0] ) # not sure about this
# check limapath
if self.limapath is None:
self.limapath = vsrc.dset_name
assert self.limapath == vsrc.dset_name
self.scans = [scan for scan in self.scans if scan not in bad]
self.frames_per_file = np.array( self.frames_per_file, int )
self.sparsefiles = [ name.replace( '/', '_' ).replace( '.h5', '_sparse.h5' ) for name in
self.imagefiles ]
self.shape = (self.shape[0], npts)
logging.info( 'imported %d lima filenames'%( np.sum(self.frames_per_file) ) )
logging.info( 'sinogram shape = ( %d , %d ) imageshape = ( %d , %d)'%(
self.shape[0], self.shape[1], imageshape[0], imageshape[1] ) )


def import_motors_from_master(self): # could also get these from sparse files if saved
""" read the motors from the lima file
you need to import the imagefiles first
these will be the motor positions to accompany the images
"""
self.omega = np.zeros( self.shape, float ) # list of counters for monitor?
self.dty = np.zeros( self.shape, float )
self.omega = [None,] * len(self.scans)
self.dty = [None,] * len(self.scans)
with h5py.File( self.masterfile, 'r' ) as hin:
bad = []
for i, scan in enumerate( self.scans ):
# Should always be there, if not, filter scans before you get to here
om = hin[scan][ 'measurement' ][ self.omegamotor ][()]
# print(i, scan, om.shape, self.shape[1])
if len(om) == self.shape[1]:
if len(om) == self.frames_per_scan[i]:
self.omega[i] = om
else:
self.omega[i][0] = om[0]
else: # hope the first point was good ? Probably corrupted MUSST data.
self.omega[i] = [om[0],]
bad.append(i)
self.dty[i] = hin[scan][ 'instrument/positioners' ][ self.dtymotor ][()]
# this can be an array or a scalar
dty = hin[scan][ 'instrument/positioners' ][ self.dtymotor ]
if len(dty.shape) == 0:
self.dty[i] = np.full( self.frames_per_scan[i], dty[()] )
elif dty.shape[0] == self.frames_per_scan[i]:
self.dty[i] = dty[:]
else:
# corrupted MUSST?
self.dty[i] = np.full( self.frames_per_scan[i], dty[0] )
for b in bad:
if b+2 not in bad:
print("replace bad scan omega",b,b+2)
self.omega[b] = self.omega[b+2]
dom = [ (abs( self.omega[i][0] - self.omega[b] ), i) for i in range(len(self.scans))
if i not in bad ]
if len(dom)>0:
j = np.argmin( dom[0][1] )
self.omega[b] = self.omega[j] # best match
print("replace bad scan omega",b, self.scans[b],"with",j, self.scans[j])
logging.info( 'imported omega/dty' )
self.guessbins()


def guess_shape(self):
"""Guess the shape if it was not given """
npts = np.sum( self.frames_per_scan )
if len(self.scans) == 1: # probably fscan2d or f2scan
with h5py.File(self.masterfile,'r') as hin:
s = hin[self.scans[0]]
title = s['title'].asstr()[()]
print('Scan title',title)
if title.split()[0] == 'fscan2d':
s0 = s['instrument/fscan_parameters/slow_npoints'][()]
s1 = s['instrument/fscan_parameters/fast_npoints'][()]
elif title.split()[0] == 'f2scan':
# good luck ? Assuming rotation was the inner loop here:
step = s['instrument/fscan_parameters/step_size'][()]
s1 = int( np.round( 360 / step ) )
s0 = npts // s1
else:
s0 = 1
s1 = npts
if len(self.scans) > 1:
s0 = len(self.scans)
s1 = npts // s0
self.shape = s0, s1
if np.prod( self.shape ) != npts:
print("Warning: irregular scan - might be bugs in here")
print(npts, len(self.scans))
self.omega = np.array( self.omega )
self.dty = np.array(self.dty )
logging.info( 'sinogram shape = ( %d , %d ) imageshape = ( %d , %d)'%(
self.shape[0], self.shape[1], self.imageshape[0], self.imageshape[1] ) )


def guessbins(self):
ny, nomega = self.shape
self.omin = self.omega.min()
self.omax = self.omega.max()
if (self.omax - self.omin)>360:
# multi-turn scan...
self.omin = 0.
self.omax = 360.
self.omega_for_bins = self.omega % 360
else:
self.omega_for_bins = self.omega
# values 0, 1, 2
# shape = 3
# step = 1
Expand All @@ -237,7 +290,7 @@ def guessbins(self):
self.ybinedges = np.linspace( self.ymin-self.ystep/2, self.ymax + self.ystep/2, ny + 1 )


def sinohist(self, weights=None, method='fast', omega=None, dty=None):
def sinohist(self, weights=None, omega=None, dty=None, method='fast'):
""" Bin some data onto the sinogram histogram """
bins = len(self.obincens), len(self.ybincens)
rng = ( (self.obinedges[0], self.obinedges[-1]),
Expand All @@ -247,7 +300,7 @@ def sinohist(self, weights=None, method='fast', omega=None, dty=None):
else:
wt = weights
if omega is None:
omega = self.omega
omega = self.omega_for_bins
if dty is None:
dty = self.dty
if method == 'numpy':
Expand Down Expand Up @@ -346,6 +399,7 @@ def save( self, h5name, h5group = '/' ):
for name in self.NDNAMES:
data = getattr(self, name, None)
if data is not None:
data = np.asarray( data )
try:
chunks = guess_chunks(name, data.shape)
ds = grp.require_dataset( name,
Expand Down Expand Up @@ -421,6 +475,10 @@ def import_from_sparse( hname,
else:
raise Exception('Cannot read %d dty %s %s'%(i, str(o), str(o.shape) ))
# assert ds.nlm.shape == ds.shape
try:
ds.guess_scans()
except:
print("warning, guess scans failed")
try:
ds.guessbins()
except:
Expand All @@ -435,7 +493,18 @@ def import_from_sparse( hname,
# dset = "DT600nm_Z1" )
# s.import_all()



def check( dataroot, analysisroot, sample, dset, destination, scans=None ):

h5o = DataSet( dataroot = dataroot, analysisroot = analysisroot, sample = sample, dset = dset )
h5o.import_all(scans=scans)
h5o.save( destination )

print("Checking: Read back from hdf5")
t = load( destination )
t.report()
return t.compare(h5o)

if __name__=="__main__":
import sys
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
Expand All @@ -444,13 +513,6 @@ def import_from_sparse( hname,
analysisroot = sys.argv[2]
sample = sys.argv[3]
dset = sys.argv[4]
destination = sys.argv[5]

h5o = DataSet( dataroot = dataroot, analysisroot = analysisroot, sample = sample, dset = dset )
h5o.import_all()
h5o.save( sys.argv[5] )

print("Read back from hdf5")
t = load( sys.argv[5] )
t.report()
t.compare(h5o)

check( dataroot, analysisroot, sample, dset, destination )
22 changes: 12 additions & 10 deletions ImageD11/sinograms/lima_segmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,7 @@ def main( options ):



def setup_slurm_array( dsname, dsgroup='/',
cores_per_job=8,
files_per_core=8 ):
def setup_slurm_array( dsname, dsgroup='/'):
""" Send the tasks to slurm """
dso = dataset.load( dsname, dsgroup )
nfiles = len(dso.sparsefiles)
Expand All @@ -344,7 +342,9 @@ def setup_slurm_array( dsname, dsgroup='/',
sdir = os.path.join( dso.analysispath, 'slurm' )
if not os.path.exists( sdir ):
os.makedirs( sdir )
options = SegmenterOptions( dsname, 'lima_segmenter' )
options = SegmenterOptions( )
options.load( dsname, dsgroup + '/lima_segmenter' )

files_per_job = options.files_per_core * options.cores_per_job
jobs_needed = math.ceil( nfiles / files_per_job )
sbat = os.path.join( sdir, "lima_segmenter_slurm.sh" )
Expand All @@ -369,15 +369,17 @@ def setup_slurm_array( dsname, dsgroup='/',
return sbat


def setup( dsname ):
def setup( dsname, **kwds ):
dso = dataset.load( dsname )
options = SegmenterOptions()

options = SegmenterOptions( **kwds )
if 'eiger' in dso.limapath:
options.cut = 1
options.maskfile = "/data/id11/nanoscope/Eiger/mask_20210428.edf"
if 'cut' not in kwds:
options.cut = 1
if 'maskfile' not in kwds:
options.maskfile = "/data/id11/nanoscope/Eiger/mask_20210428.edf"
elif 'frelon3' in dso.limapath:
options.cut = 25, # keep values abuve cut in first look at image
if 'cut' not in kwds:
options.cut = 25, # keep values abuve cut in first look at image
else:
print("I don't know what to do")
options.save( dsname, 'lima_segmenter' )
Expand Down
Loading

0 comments on commit 1957cc0

Please sign in to comment.