Skip to content

Commit

Permalink
Merge pull request #380 from LSSTDESC/doc-file-types
Browse files Browse the repository at this point in the history
Documentation Update
  • Loading branch information
joezuntz authored Jan 23, 2025
2 parents 4cc7d93 + eff9096 commit c6768d8
Show file tree
Hide file tree
Showing 61 changed files with 1,810 additions and 224 deletions.
62 changes: 62 additions & 0 deletions bin/gen-filetype-docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import h5py
import tabulate
import sys

class DescriptionGenerator:
def __init__(self):
self.rows = []

def __call__(self, name, obj):
if not isinstance(obj, h5py.Dataset):
return
bits = name.split('/')
groups = bits[:-1]
name = bits[-1]
kind = f"{obj.ndim}D {obj.dtype}"
self.rows.append([groups, name, kind])

def to_table(self):
groups = [row[0] for row in self.rows]
ngroup_max = max(len(group) for group in groups)
headers = ["Group"] + [""] * (ngroup_max - 1) + ["Name", "Kind", "Meaning"]
rows = []
for row in self.rows:
groups = row[0] + [""] * (ngroup_max - len(row[0]))
name = row[1]
kind = row[2]
meaning = ""
rows.append(groups + [name, kind, meaning])
return tabulate.tabulate(rows, headers=headers, tablefmt='rst')



def describe_file(filename, outfile):
with h5py.File(filename) as f:
gen = DescriptionGenerator()
f.visititems(gen)
outfile.write(gen.to_table())

files = {
"PhotometryCatalog": ("data/example/inputs/photometry_catalog.hdf5", "photometry"),
"MetacalShearCatalog": ("data/example/inputs/shear_catalog.hdf5", "metacal"),
"LensfitShearCatalog": ("data/example/inputs/lensfit_shear_catalog.hdf5", "lensfit"),
"MetadetectShearCatalog": ("data/example/inputs/metadetect_shear_catalog.hdf5", "metadetect"),
"StarCatalog": ("data/example/inputs/star_catalog.hdf5", "stars"),
"TomographyCatalog": ("data/example/outputs_metadetect/shear_tomography_catalog.hdf5", "tomography"),
"BinnedCatalog": ("data/example/outputs_metadetect/binned_lens_catalog.hdf5", "binned"),
"RandomsCatalog": ("data/example/outputs_metadetect/random_cats.hdf5", "randoms"),
"MapsFile": ("data/example/outputs_metadetect/lens_maps.hdf5", "maps"),
"MetaData": ("data/example/outputs_metadetect/tracer_metadata.hdf5", "metadata"),
}

if __name__ == "__main__":
if sys.argv[1] == "all":
for name, (filename, outfile) in files.items():
outpath = "docs/src/file_details/" + outfile + ".rst"
with open(outpath, "w") as f:
f.write(f"## {outfile.capitalize()}\n\n")
describe_file(filename, f)
f.write("\n\n\n")
else:
describe_file(sys.argv[1], sys.stdout)
sys.stdout.write("\n")
75 changes: 75 additions & 0 deletions bin/get_all_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import ceci

pipeline_files = [
"examples/2.2i/pipeline.yml",
"examples/buzzard/pipeline.yml",
"examples/clmm/pipeline.yml",
"examples/cosmodc2/pipeline.yml",
"examples/desy1/pipeline.yml",
"examples/desy3/pipeline.yml",
"examples/dp0.2/pipeline.yml",
"examples/kids-1000/pipeline.yml",
"examples/lensfit/pipeline.yml",
"examples/lognormal/pipeline.yml",
"examples/metacal/pipeline.yml",
"examples/metadetect/pipeline.yml",
"examples/metadetect_source_only/pipeline.yml",
"examples/mock_shear/pipeline.yml",
"examples/redmagic/pipeline.yml",
"examples/skysim/pipeline.yml",
]

def get_tags(pipeline_file):
tags = set()

pipe_config = ceci.Pipeline.build_config(pipeline_file, dry_run=True)

with ceci.prepare_for_pipeline(pipe_config) as pipeline:
p = ceci.Pipeline.create(pipe_config)

# First pass, get the classes for all the stages
stage_classes = []
for stage_name in p.stage_names:
sec = p.stage_execution_config[stage_name]
stage_classes.append(sec.build_stage_class())

stage_aliases = sec.aliases
stage_class = sec.stage_class
for tag,ftype in stage_class.outputs:
aliased_tag = stage_aliases.get(tag, tag)
if ftype.suffix is not None:
tags.add((ftype.suffix, aliased_tag))
# if ftype.suffix is None:
# print("NONE TAG TYPE:", stage_name, pipeline_file, stage_class)
for tag, ftype in stage_class.inputs:
aliased_tag = stage_aliases.get(tag, tag)
if ftype.suffix is not None:
tags.add((ftype.suffix, aliased_tag))
# if ftype.suffix is None:
# print("NONE TAG TYPE:", stage_name, pipeline_file, stage_class)

return tags




def main():
tags = set()
for pipeline_file in pipeline_files:
try:
tags.update(get_tags(pipeline_file))
except ceci.errors.StageNotFound:
print(f"Error: {pipeline_file} old")
continue
except ValueError:
print(f"Error: {pipeline_file} broken")
continue
# print(tags)
for tag in sorted(tags):
print(tag)




if __name__ == "__main__":
main()
File renamed without changes
File renamed without changes
File renamed without changes
62 changes: 55 additions & 7 deletions docs/make-stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,17 @@
- TXExposureInfo
- TXIngestStars
- TXMetacalGCRInput
- TXIngestDataPreview02
- TXSimpleMock
- TXMockTruthPZ
- TXLogNormalGlass
Photo-z:
blurb: |
These stages deal with photo-z PDF training and estimation
stages:
- TXPhotozStack
- TXTruePhotozStack
- PZRailTrainSource
- PZRailTrainLens
- PZRailTrainLensFromSource
Expand All @@ -38,6 +44,7 @@
bins.
stages:
- TXSourceSelector
- TXSourceSelectorSimple
- TXSourceSelectorMetacal
- TXSourceSelectorMetadetect
- TXSourceSelectorLensfit
Expand All @@ -46,6 +53,7 @@
- TXTruthLensSelector
- TXMeanLensSelector
- TXModeLensSelector
- TXRandomForestLensSelector
Calibration and Splitting:
Expand All @@ -57,7 +65,18 @@
- TXStarCatalogSplitter
- TXLensCatalogSplitter
- TXExternalLensCatalogSplitter
- TXTruthLensCatalogSplitter
- TXTruthLensCatalogSplitterWeighted
Weights:
blurb: |
These stages deal with weighting the lens sample
stages:
- TXLSSWeights
- TXLSSWeightsLinBinned
- TXLSSWeightsLinPix
- TXLSSWeightsUnit
Maps:
blurb: |
Expand All @@ -76,6 +95,8 @@
- TXConvergenceMaps
- TXMapCorrelations
- TXSimpleMask
- TXSimpleMaskSource
- TXSimpleMaskFrac
- TXAuxiliarySourceMaps
- TXAuxiliaryLensMaps
- TXUniformDepthMap
Expand All @@ -88,18 +109,25 @@
- TXPhotozLensStack
- TXSourceTrueNumberDensity
- TXLensTrueNumberDensity
- PZRailSummarize
Two-Point:
blurb: |
These stages deal with measuring or predicting two-point statistics.
stages:
- TXJackknifeCenters
- TXJackknifeCentersSource
- TXTwoPointFourier
- TXTwoPoint
- TXRandomCat
- TXSubsampleRandoms
- TXTwoPointTheoryReal
- TXTwoPointTheoryFourier
- TXTwoPointPixel
- TXTwoPointPixelExtCross
- TXTwoPointRLens
Covariance:
Expand Down Expand Up @@ -129,38 +157,58 @@
- TXTwoPointPlotsFourier
- TXConvergenceMapPlots
- TXMapPlots
- PZRealizationsPlot
- TXTwoPointPlotsTheory
- TXPhotozPlot
Diagnostics:
blurb: |
These stages compute and/or plot diagnostics of catalogs or other data
stages:
- TXGammaTFieldCenters
- TXGammaTStars
- TXGammaTRandoms
- TXApertureMass
- TXDiagnosticQuantiles
- TXSourceDiagnosticPlots
- TXLensDiagnosticPlots
- TXPSFDiagnostics
- TXPSFMomentCorr
- TXRoweStatistics
- TXTauStatistics
- TXGalaxyStarShear
- TXGalaxyStarDensity
- TXBrighterFatterPlot
- TXGammaTFieldCenters
- TXGammaTStars
- TXGammaTRandoms
- TXApertureMass
- TXFocalPlanePlot
Source Injection:
blurb: |
These stages ingest and use synthetic source injection information
stages:
- TXIngestSSIGCR
- TXMatchSSI
- TXIngestSSIMatched
- TXIngestSSIMatchedDESBalrog
- TXSSIMagnification
Extensions:
blurb: |
These stages are written for TXPipe extension projects.
stages:
- TXSelfCalibrationIA
- CLClusterBinningRedshiftRichness
- CLClusterShearCatalogs
- CLClusterEnsembleProfiles
New and Miscellaneous:
blurb: |
These stages either don't fit into a category above or have not yet been
assigned to one.
stages:
- TXTracerMetadata
- TXParqetToHDF
""")

Expand Down Expand Up @@ -196,7 +244,7 @@ def get_name(cls):
if class_name == "BaseStageDoNotRunDirectly":
continue
if class_name not in stages:
print("Warning - update the section list for ", c)
print("Warning - update the section list for ", class_name)

qual_name = get_name(cls)
section = stages.get(class_name, "New and Miscellaneous")
Expand Down Expand Up @@ -234,7 +282,7 @@ def get_name(cls):
:caption: Contents:
""")

f.write(" stages/base\n")
for s, sd in sections.items():
f.write(" stages/" + s + "\n")
f.write("\n")
2 changes: 1 addition & 1 deletion docs/src/adding.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ The name of the class and the attribute ``name`` should be the same, and be desc

You need to decide on the inputs and outputs for the file, and give them tags and types.

* For inputs, search the page on :ref:`current TXPipe files<TXPipe File Tags and Types>`.
* For inputs, search the page on :ref:`current TXPipe files<TXPipe File Tags>`.

* For each output, you can choose a tag, which will determine the name of the output file, and choose a file type from the various classes in the data\_types page in the stages listing for details.

Expand Down
4 changes: 2 additions & 2 deletions docs/src/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
language = "en"

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
Expand Down Expand Up @@ -137,7 +137,7 @@ def patched_parse(self):
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = []

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
Expand Down
14 changes: 7 additions & 7 deletions docs/src/example.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ Running an example pipeline
Running
-------

If you haven't already, you can download data for a test pipeline like this:
Download data for a test pipeline like this:

.. code-block:: bash
curl -O https://portal.nersc.gov/cfs/lsst/txpipe/data/example.tar.gz
tar -zxvf example.tar.gz
and run that test pipeline like this:
and run a test pipeline like this:

.. code-block:: bash
Expand All @@ -30,25 +30,25 @@ A flow chart showing the steps in the pipeline and the files it generates is sho
:width: 600
:alt: A flow chart of the example pipeline.

You can make your charts like this using (requires pygraphviz)::
You can make charts like this using:

.. code-block:: bash
python bin/flow_chart.py examples/metacal/pipeline.yml metacal.png
Results
-------

Once the pipeline is complete, the results will be stored in ``data/example/outputs``. Some are PNG images you can look at directly. Others are HDF5 files - see :ref:`Reading HDF5 Files`.
Once the pipeline is complete, the results will be stored in ``data/example/outputs_metadetect``. Some are PNG images you can look at directly. Others are HDF5 files - see :ref:`Reading HDF5 Files`.


Under the hood
----------------

When you do this, the following things are happening under the hood:

#. The ``ceci`` program reads the pipeline yml file and finds a list of python modules to read. In those modules, any subclass of the ceci ``PipelineStage`` class is a stage of the analysis that can be executed, and specifies the inputs and outpits from that stage.

#. ``ceci`` reads the list of stages from the pipeline yml file, and finds all the corresponding classes. It connects stages together so that the inputs of earlier ones can be the outputs of later ones. The yml file also lists initial inputs for the overall pipeline.
#. The ``ceci`` program reads the pipeline yml file and finds the ``PipelineStage`` classes listed in it. It connects stages together to pass data from one to the next.

#. ``ceci`` runs the stages one by one, printing out the command line it uses. The outputs and logs of the tasks are put in locations defines in the pipeline yml.

Expand Down
Loading

0 comments on commit c6768d8

Please sign in to comment.