From afdfbc3c317883f18ea38718c4002dc2d9c8f019 Mon Sep 17 00:00:00 2001
From: Joe Zuntz <joezuntz@googlemail.com>
Date: Tue, 28 Jan 2025 14:18:25 +0000
Subject: [PATCH] update docstrings

---
 txpipe/base_stage.py      | 65 ++++++++++++++++++++++++++++++++++-----
 txpipe/ingest/ssi.py      |  2 +-
 txpipe/lssweights.py      | 10 +++---
 txpipe/psf_diagnostics.py |  4 +--
 4 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/txpipe/base_stage.py b/txpipe/base_stage.py
index 60c2e445..faad7bc9 100755
--- a/txpipe/base_stage.py
+++ b/txpipe/base_stage.py
@@ -19,6 +19,9 @@ class PipelineStage(PipelineStageBase):
     config_options = {}
 
     def run(self):
+        """
+        The main function that does the work and must be implemented when building a TXPipe class.
+        """
         print("Please do not execute this stage again.")
 
     def time_stamp(self, tag):
@@ -70,6 +73,35 @@ def memory_report(self, tag=None):
         sys.stdout.flush()
 
     def combined_iterators(self, rows, *inputs, parallel=True):
+        """
+        Iterate through multiple files at the same time.
+
+        If you have more several HDF files with the some
+        columns of the same length then you can use this method to
+        iterate through them all at once, and combine the data from
+        all of them into a single dictionary.
+
+        Parameters
+        ----------
+        rows: int
+            The number of rows to read in each chunk
+
+        *inputs: list
+            A list of (tag, group, cols) triples for each file to read.
+            In each case tag is the input file name tag, group is the
+            group within the HDF5 file to read, and cols is a list of
+            columns to read from that group.  Specify multiple triplets
+            to read from multiple files
+
+        parallel: bool
+            Whether to split up data among processes (parallel=True) or give
+            all processes all data (parallel=False).  Default = True.
+
+        Returns
+        -------
+        it: iterator
+            Iterator yielding (int, int, dict) tuples of (start, end, data)
+        """
         if not len(inputs) % 3 == 0:
             raise ValueError(
                 "Arguments to combined_iterators should be in threes: "
@@ -120,16 +152,35 @@ def open_output(self, tag, wrapper=False, **kwargs):
         """
         Find and open an output file with the given tag, in write mode.
 
-        For general files this will simply return a standard
-        python file object.
+        If final_name is True then they will be opened using their final
+        target output name.  Otherwise we will prepend `inprogress_` to their
+        file name. This means we know that if the final file exists then it
+        is completed.
+
+        If wrapper is True this will return an instance of the class
+        of the file as specified in the cls.outputs.  Otherwise it will
+        return an open file object (standard python one or something more
+        specialized).
+
+        Parameters
+        ----------
+        tag: str
+            Tag as listed in self.outputs
+
+        wrapper: bool
+            Whether to return an underlying file object (False) or a data type instange (True)
 
-        For specialized file types like FITS or HDF5 it will return
-        a more specific object - see the types.py file for more info.
+        final_name: bool
+            Default=False. Whether to save to
 
-        This is an extended version of the parent class method which
-        also saves configuration information.  Putting this here right
-        now for testing.
+        **kwargs:
+            Extra args are passed on to the file's class constructor,
+            most notably "parallel" for parallel HDF writing.
 
+        Returns
+        -------
+        obj: file or object
+            The opened file or object
         """
         # This is added to cope with the new "aliases" system
         # in ceci - it lets us run the same code with different
diff --git a/txpipe/ingest/ssi.py b/txpipe/ingest/ssi.py
index 51840d7b..5561e35d 100644
--- a/txpipe/ingest/ssi.py
+++ b/txpipe/ingest/ssi.py
@@ -34,7 +34,7 @@ def create_photometry(self, input_name, output_name, column_names, dummy_columns
         column names, creates datasets for photometric magnitudes and their errors,
         and iterates over chunks of data to save them in the output file.
 
-        Parameters:
+        Parameters
         ----------
         input_name : str
             name of (FITS) input
diff --git a/txpipe/lssweights.py b/txpipe/lssweights.py
index a7511f7f..ba761967 100644
--- a/txpipe/lssweights.py
+++ b/txpipe/lssweights.py
@@ -18,9 +18,9 @@
 
 class TXLSSWeights(TXMapCorrelations):
     """
-    BaseClass to compute LSS systematic weights
+    Base class for LSS systematic weights
 
-    Not to be run directly
+    Not to be run directly.
 
     This is an abstract base class, which other subclasses
     inherit from to use the same basic structure, which is:
@@ -499,7 +499,7 @@ def summarize(
 
 class TXLSSWeightsLinBinned(TXLSSWeights):
     """
-    Class compute LSS systematic weights using simultanious linear regression on the binned
+    Compute LSS systematic weights using simultanious linear regression on the binned
     1D correlations
 
     Model: Linear
@@ -987,7 +987,7 @@ def neg_log_like(params):
 
 class TXLSSWeightsLinPix(TXLSSWeightsLinBinned):
     """
-    Class compute LSS systematic weights using simultanious linear regression at the
+    Compute LSS systematic weights using simultanious linear regression at the
     pixel level using scikitlearn simple linear regression
 
     Model:                Linear
@@ -1161,7 +1161,7 @@ def hsplist2array(hsp_list):
 
 class TXLSSWeightsUnit(TXLSSWeights):
     """
-    Class assigns weight=1 to all lens objects
+    Assign weight=1 to all lens objects
 
     """
 
diff --git a/txpipe/psf_diagnostics.py b/txpipe/psf_diagnostics.py
index 73c687ad..ec7c9528 100644
--- a/txpipe/psf_diagnostics.py
+++ b/txpipe/psf_diagnostics.py
@@ -332,7 +332,7 @@ def save_stats(self, moments_stats):
 
 class TXTauStatistics(PipelineStage):
     """
-    Compute and plot PSF Tau statistics where the definition of Tau stats are eq.20-22
+    Compute and plot PSF Tau statistics where the definition of Tau stats are eq. 20-22
     of Gatti et al 2023.
     """
     name     = "TXTauStatistics"
@@ -752,7 +752,7 @@ class TXRoweStatistics(PipelineStage):
 
     People sometimes think that these statistics are called the Rho statistics,
     because we usually use that letter for them.  Not so.  They are named after
-    the wonderfully incorrigible rogue Barney Rowe, now sadly lost to high finance,
+    the wonderful Barney Rowe, now sadly lost to high finance,
     who presented the first two of them in MNRAS 404, 350 (2010).
     """