Merge pull request #380 from LSSTDESC/doc-file-types

Documentation Update
LSSTDESC · Jan 23, 2025 · c6768d8 · c6768d8
2 parents 4cc7d93 + eff9096
commit c6768d8
Show file tree

Hide file tree

Showing 61 changed files with 1,810 additions and 224 deletions.
diff --git a/bin/gen-filetype-docs.py b/bin/gen-filetype-docs.py
@@ -0,0 +1,62 @@
+import h5py
+import tabulate
+import sys
+
+class DescriptionGenerator:
+    def __init__(self):
+        self.rows = []
+
+    def __call__(self, name, obj):
+        if not isinstance(obj, h5py.Dataset):
+            return
+        bits = name.split('/')
+        groups = bits[:-1]
+        name = bits[-1]
+        kind = f"{obj.ndim}D {obj.dtype}"
+        self.rows.append([groups, name, kind])
+
+    def to_table(self):
+        groups = [row[0] for row in self.rows]
+        ngroup_max = max(len(group) for group in groups)
+        headers = ["Group"] +  [""] * (ngroup_max - 1) + ["Name", "Kind", "Meaning"]
+        rows = []
+        for row in self.rows:
+            groups = row[0] + [""] * (ngroup_max - len(row[0]))
+            name = row[1]
+            kind = row[2]
+            meaning = ""
+            rows.append(groups + [name, kind, meaning])
+        return tabulate.tabulate(rows, headers=headers, tablefmt='rst')
+
+
+
+def describe_file(filename, outfile):
+    with h5py.File(filename) as f:
+        gen = DescriptionGenerator()
+        f.visititems(gen)
+        outfile.write(gen.to_table())
+
+files = {
+    "PhotometryCatalog":        ("data/example/inputs/photometry_catalog.hdf5", "photometry"),
+    "MetacalShearCatalog":      ("data/example/inputs/shear_catalog.hdf5", "metacal"),
+    "LensfitShearCatalog":      ("data/example/inputs/lensfit_shear_catalog.hdf5", "lensfit"),
+    "MetadetectShearCatalog":   ("data/example/inputs/metadetect_shear_catalog.hdf5", "metadetect"),
+    "StarCatalog":              ("data/example/inputs/star_catalog.hdf5", "stars"),
+    "TomographyCatalog":        ("data/example/outputs_metadetect/shear_tomography_catalog.hdf5", "tomography"),
+    "BinnedCatalog":            ("data/example/outputs_metadetect/binned_lens_catalog.hdf5", "binned"),
+    "RandomsCatalog":           ("data/example/outputs_metadetect/random_cats.hdf5", "randoms"),
+    "MapsFile":                 ("data/example/outputs_metadetect/lens_maps.hdf5", "maps"),
+    "MetaData":                 ("data/example/outputs_metadetect/tracer_metadata.hdf5", "metadata"),
+}
+
+if __name__ == "__main__":
+    if sys.argv[1] == "all":
+        for name, (filename, outfile) in files.items():
+            outpath = "docs/src/file_details/" + outfile + ".rst"
+            with open(outpath, "w") as f:
+                f.write(f"## {outfile.capitalize()}\n\n")
+                describe_file(filename, f)
+                f.write("\n\n\n")
+    else:            
+        describe_file(sys.argv[1], sys.stdout)
+        sys.stdout.write("\n")
diff --git a/bin/get_all_tags.py b/bin/get_all_tags.py
@@ -0,0 +1,75 @@
+import ceci
+
+pipeline_files = [
+    "examples/2.2i/pipeline.yml",
+    "examples/buzzard/pipeline.yml",
+    "examples/clmm/pipeline.yml",
+    "examples/cosmodc2/pipeline.yml",
+    "examples/desy1/pipeline.yml",
+    "examples/desy3/pipeline.yml",
+    "examples/dp0.2/pipeline.yml",
+    "examples/kids-1000/pipeline.yml",
+    "examples/lensfit/pipeline.yml",
+    "examples/lognormal/pipeline.yml",
+    "examples/metacal/pipeline.yml",
+    "examples/metadetect/pipeline.yml",
+    "examples/metadetect_source_only/pipeline.yml",
+    "examples/mock_shear/pipeline.yml",
+    "examples/redmagic/pipeline.yml",
+    "examples/skysim/pipeline.yml",
+]
+
+def get_tags(pipeline_file):
+    tags = set()
+
+    pipe_config = ceci.Pipeline.build_config(pipeline_file, dry_run=True)
+
+    with ceci.prepare_for_pipeline(pipe_config) as pipeline:
+        p = ceci.Pipeline.create(pipe_config)
+
+        # First pass, get the classes for all the stages
+        stage_classes = []
+        for stage_name in p.stage_names:
+            sec = p.stage_execution_config[stage_name]
+            stage_classes.append(sec.build_stage_class())
+
+            stage_aliases = sec.aliases
+            stage_class = sec.stage_class
+            for tag,ftype in stage_class.outputs:
+                aliased_tag = stage_aliases.get(tag, tag)
+                if ftype.suffix is not None:
+                    tags.add((ftype.suffix, aliased_tag))
+                # if ftype.suffix is None:
+                #     print("NONE TAG TYPE:", stage_name, pipeline_file, stage_class)
+            for tag, ftype in stage_class.inputs:
+                aliased_tag = stage_aliases.get(tag, tag)
+                if ftype.suffix is not None:
+                    tags.add((ftype.suffix, aliased_tag))
+                # if ftype.suffix is None:
+                #     print("NONE TAG TYPE:", stage_name, pipeline_file, stage_class)
+
+    return tags
+
+
+
+
+def main():
+    tags = set()
+    for pipeline_file in pipeline_files:
+        try:
+            tags.update(get_tags(pipeline_file))
+        except ceci.errors.StageNotFound:
+            print(f"Error: {pipeline_file} old")
+            continue
+        except ValueError:
+            print(f"Error: {pipeline_file} broken")
+            continue
+    # print(tags)
+    for tag in sorted(tags):
+        print(tag)
+
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/CLClusterBinning.png → docs/CLClusterBinning.png b/CLClusterBinning.png → docs/CLClusterBinning.png
diff --git a/CLClusterEnsemble.png → docs/CLClusterEnsemble.png b/CLClusterEnsemble.png → docs/CLClusterEnsemble.png
diff --git a/CL_pipeline.png → docs/CL_pipeline.png b/CL_pipeline.png → docs/CL_pipeline.png
diff --git a/docs/make-stages.py b/docs/make-stages.py
@@ -17,11 +17,17 @@
         - TXExposureInfo
         - TXIngestStars
         - TXMetacalGCRInput
+        - TXIngestDataPreview02
+        - TXSimpleMock
+        - TXMockTruthPZ
+        - TXLogNormalGlass
 
 Photo-z:
     blurb: |
         These stages deal with photo-z PDF training and estimation
     stages:
+        - TXPhotozStack
+        - TXTruePhotozStack
         - PZRailTrainSource
         - PZRailTrainLens
         - PZRailTrainLensFromSource
@@ -38,6 +44,7 @@
         bins.
     stages:
         - TXSourceSelector
+        - TXSourceSelectorSimple
         - TXSourceSelectorMetacal
         - TXSourceSelectorMetadetect
         - TXSourceSelectorLensfit
@@ -46,6 +53,7 @@
         - TXTruthLensSelector
         - TXMeanLensSelector
         - TXModeLensSelector
+        - TXRandomForestLensSelector
 
 
 Calibration and Splitting:
@@ -57,7 +65,18 @@
         - TXStarCatalogSplitter
         - TXLensCatalogSplitter
         - TXExternalLensCatalogSplitter
-
+        - TXTruthLensCatalogSplitter
+        - TXTruthLensCatalogSplitterWeighted
+        
+Weights:
+    blurb: |
+        These stages deal with weighting the lens sample
+    stages:
+        - TXLSSWeights
+        - TXLSSWeightsLinBinned
+        - TXLSSWeightsLinPix
+        - TXLSSWeightsUnit
+    
 
 Maps:
     blurb: |
@@ -76,6 +95,8 @@
         - TXConvergenceMaps
         - TXMapCorrelations
         - TXSimpleMask
+        - TXSimpleMaskSource
+        - TXSimpleMaskFrac
         - TXAuxiliarySourceMaps
         - TXAuxiliaryLensMaps
         - TXUniformDepthMap
@@ -88,18 +109,25 @@
         - TXPhotozLensStack
         - TXSourceTrueNumberDensity
         - TXLensTrueNumberDensity
+        - PZRailSummarize
+        
 
 
 Two-Point:
     blurb: |
         These stages deal with measuring or predicting two-point statistics.
     stages:
         - TXJackknifeCenters
+        - TXJackknifeCentersSource
         - TXTwoPointFourier
         - TXTwoPoint
         - TXRandomCat
+        - TXSubsampleRandoms
         - TXTwoPointTheoryReal
         - TXTwoPointTheoryFourier
+        - TXTwoPointPixel
+        - TXTwoPointPixelExtCross
+        - TXTwoPointRLens
 
 
 Covariance:
@@ -129,38 +157,58 @@
         - TXTwoPointPlotsFourier
         - TXConvergenceMapPlots
         - TXMapPlots
+        - PZRealizationsPlot
+        - TXTwoPointPlotsTheory
         - TXPhotozPlot
 
 
 Diagnostics:
     blurb: |
         These stages compute and/or plot diagnostics of catalogs or other data
     stages:
-        - TXGammaTFieldCenters
-        - TXGammaTStars
-        - TXGammaTRandoms
-        - TXApertureMass
+        - TXDiagnosticQuantiles
         - TXSourceDiagnosticPlots
         - TXLensDiagnosticPlots
         - TXPSFDiagnostics
+        - TXPSFMomentCorr
         - TXRoweStatistics
+        - TXTauStatistics
         - TXGalaxyStarShear
         - TXGalaxyStarDensity
         - TXBrighterFatterPlot
+        - TXGammaTFieldCenters
+        - TXGammaTStars
+        - TXGammaTRandoms
+        - TXApertureMass
+        - TXFocalPlanePlot
+
+Source Injection:
+    blurb: |
+        These stages ingest and use synthetic source injection information
+    stages:
+        - TXIngestSSIGCR
+        - TXMatchSSI
+        - TXIngestSSIMatched
+        - TXIngestSSIMatchedDESBalrog
+        - TXSSIMagnification
 
 
 Extensions:
     blurb: |
         These stages are written for TXPipe extension projects.
     stages:
         - TXSelfCalibrationIA
+        - CLClusterBinningRedshiftRichness
+        - CLClusterShearCatalogs
+        - CLClusterEnsembleProfiles
 
 New and Miscellaneous:
     blurb: |
         These stages either don't fit into a category above or have not yet been
         assigned to one.
     stages:
         - TXTracerMetadata
+        - TXParqetToHDF
 
 """)
 
@@ -196,7 +244,7 @@ def get_name(cls):
     if class_name == "BaseStageDoNotRunDirectly":
         continue
     if class_name not in stages:
-        print("Warning - update the section list for ", c)
+        print("Warning - update the section list for ", class_name)
 
     qual_name = get_name(cls)
     section = stages.get(class_name, "New and Miscellaneous")
@@ -234,7 +282,7 @@ def get_name(cls):
    :caption: Contents:
 
 """)
-
+    f.write("   stages/base\n")
     for s, sd in sections.items():
         f.write("   stages/" + s + "\n")
     f.write("\n")
diff --git a/docs/src/adding.rst b/docs/src/adding.rst
@@ -68,7 +68,7 @@ The name of the class and the attribute ``name`` should be the same, and be desc
 
 You need to decide on the inputs and outputs for the file, and give them tags and types. 
 
-* For inputs, search the page on :ref:`current TXPipe files<TXPipe File Tags and Types>`. 
+* For inputs, search the page on :ref:`current TXPipe files<TXPipe File Tags>`. 
 
 * For each output, you can choose a tag, which will determine the name of the output file, and choose a file type from the various classes in the data\_types page in the stages listing for details.
 

diff --git a/docs/src/conf.py b/docs/src/conf.py
@@ -79,7 +79,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -137,7 +137,7 @@ def patched_parse(self):
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = []
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.

diff --git a/docs/src/example.rst b/docs/src/example.rst
@@ -4,14 +4,14 @@ Running an example pipeline
 Running
 -------
 
-If you haven't already, you can download data for a test pipeline like this:
+Download data for a test pipeline like this:
 
 .. code-block:: bash
 
     curl -O https://portal.nersc.gov/cfs/lsst/txpipe/data/example.tar.gz
     tar -zxvf example.tar.gz
 
-and run that test pipeline like this:
+and run a test pipeline like this:
 
 .. code-block:: bash
 
@@ -30,25 +30,25 @@ A flow chart showing the steps in the pipeline and the files it generates is sho
   :width: 600
   :alt: A flow chart of the example pipeline.
 
-You can make your charts like this using (requires pygraphviz)::
+You can make charts like this using:
+
+.. code-block:: bash
 
     python bin/flow_chart.py examples/metacal/pipeline.yml metacal.png
 
 
 Results
 -------
 
-Once the pipeline is complete, the results will be stored in ``data/example/outputs``. Some are PNG images you can look at directly. Others are HDF5 files - see :ref:`Reading HDF5 Files`.
+Once the pipeline is complete, the results will be stored in ``data/example/outputs_metadetect``. Some are PNG images you can look at directly. Others are HDF5 files - see :ref:`Reading HDF5 Files`.
 
 
 Under the hood
 ----------------
 
 When you do this, the following things are happening under the hood:
 
-#. The ``ceci`` program reads the pipeline yml file and finds a list of python modules to read. In those modules, any subclass of the ceci ``PipelineStage`` class is a stage of the analysis that can be executed, and specifies the inputs and outpits from that stage.
-
-#. ``ceci`` reads the list of stages from the pipeline yml file, and finds all the corresponding classes. It connects stages together so that the inputs of earlier ones can be the outputs of later ones. The yml file also lists initial inputs for the overall pipeline.
+#. The ``ceci`` program reads the pipeline yml file and finds the ``PipelineStage`` classes listed in it.  It connects stages together to pass data from one to the next.
 
 #. ``ceci`` runs the stages one by one, printing out the command line it uses. The outputs and logs of the tasks are put in locations defines in the pipeline yml.