From afdfbc3c317883f18ea38718c4002dc2d9c8f019 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 28 Jan 2025 14:18:25 +0000 Subject: [PATCH] update docstrings --- txpipe/base_stage.py | 65 ++++++++++++++++++++++++++++++++++----- txpipe/ingest/ssi.py | 2 +- txpipe/lssweights.py | 10 +++--- txpipe/psf_diagnostics.py | 4 +-- 4 files changed, 66 insertions(+), 15 deletions(-) diff --git a/txpipe/base_stage.py b/txpipe/base_stage.py index 60c2e445..faad7bc9 100755 --- a/txpipe/base_stage.py +++ b/txpipe/base_stage.py @@ -19,6 +19,9 @@ class PipelineStage(PipelineStageBase): config_options = {} def run(self): + """ + The main function that does the work and must be implemented when building a TXPipe class. + """ print("Please do not execute this stage again.") def time_stamp(self, tag): @@ -70,6 +73,35 @@ def memory_report(self, tag=None): sys.stdout.flush() def combined_iterators(self, rows, *inputs, parallel=True): + """ + Iterate through multiple files at the same time. + + If you have more several HDF files with the some + columns of the same length then you can use this method to + iterate through them all at once, and combine the data from + all of them into a single dictionary. + + Parameters + ---------- + rows: int + The number of rows to read in each chunk + + *inputs: list + A list of (tag, group, cols) triples for each file to read. + In each case tag is the input file name tag, group is the + group within the HDF5 file to read, and cols is a list of + columns to read from that group. Specify multiple triplets + to read from multiple files + + parallel: bool + Whether to split up data among processes (parallel=True) or give + all processes all data (parallel=False). Default = True. + + Returns + ------- + it: iterator + Iterator yielding (int, int, dict) tuples of (start, end, data) + """ if not len(inputs) % 3 == 0: raise ValueError( "Arguments to combined_iterators should be in threes: " @@ -120,16 +152,35 @@ def open_output(self, tag, wrapper=False, **kwargs): """ Find and open an output file with the given tag, in write mode. - For general files this will simply return a standard - python file object. + If final_name is True then they will be opened using their final + target output name. Otherwise we will prepend `inprogress_` to their + file name. This means we know that if the final file exists then it + is completed. + + If wrapper is True this will return an instance of the class + of the file as specified in the cls.outputs. Otherwise it will + return an open file object (standard python one or something more + specialized). + + Parameters + ---------- + tag: str + Tag as listed in self.outputs + + wrapper: bool + Whether to return an underlying file object (False) or a data type instange (True) - For specialized file types like FITS or HDF5 it will return - a more specific object - see the types.py file for more info. + final_name: bool + Default=False. Whether to save to - This is an extended version of the parent class method which - also saves configuration information. Putting this here right - now for testing. + **kwargs: + Extra args are passed on to the file's class constructor, + most notably "parallel" for parallel HDF writing. + Returns + ------- + obj: file or object + The opened file or object """ # This is added to cope with the new "aliases" system # in ceci - it lets us run the same code with different diff --git a/txpipe/ingest/ssi.py b/txpipe/ingest/ssi.py index 51840d7b..5561e35d 100644 --- a/txpipe/ingest/ssi.py +++ b/txpipe/ingest/ssi.py @@ -34,7 +34,7 @@ def create_photometry(self, input_name, output_name, column_names, dummy_columns column names, creates datasets for photometric magnitudes and their errors, and iterates over chunks of data to save them in the output file. - Parameters: + Parameters ---------- input_name : str name of (FITS) input diff --git a/txpipe/lssweights.py b/txpipe/lssweights.py index a7511f7f..ba761967 100644 --- a/txpipe/lssweights.py +++ b/txpipe/lssweights.py @@ -18,9 +18,9 @@ class TXLSSWeights(TXMapCorrelations): """ - BaseClass to compute LSS systematic weights + Base class for LSS systematic weights - Not to be run directly + Not to be run directly. This is an abstract base class, which other subclasses inherit from to use the same basic structure, which is: @@ -499,7 +499,7 @@ def summarize( class TXLSSWeightsLinBinned(TXLSSWeights): """ - Class compute LSS systematic weights using simultanious linear regression on the binned + Compute LSS systematic weights using simultanious linear regression on the binned 1D correlations Model: Linear @@ -987,7 +987,7 @@ def neg_log_like(params): class TXLSSWeightsLinPix(TXLSSWeightsLinBinned): """ - Class compute LSS systematic weights using simultanious linear regression at the + Compute LSS systematic weights using simultanious linear regression at the pixel level using scikitlearn simple linear regression Model: Linear @@ -1161,7 +1161,7 @@ def hsplist2array(hsp_list): class TXLSSWeightsUnit(TXLSSWeights): """ - Class assigns weight=1 to all lens objects + Assign weight=1 to all lens objects """ diff --git a/txpipe/psf_diagnostics.py b/txpipe/psf_diagnostics.py index 73c687ad..ec7c9528 100644 --- a/txpipe/psf_diagnostics.py +++ b/txpipe/psf_diagnostics.py @@ -332,7 +332,7 @@ def save_stats(self, moments_stats): class TXTauStatistics(PipelineStage): """ - Compute and plot PSF Tau statistics where the definition of Tau stats are eq.20-22 + Compute and plot PSF Tau statistics where the definition of Tau stats are eq. 20-22 of Gatti et al 2023. """ name = "TXTauStatistics" @@ -752,7 +752,7 @@ class TXRoweStatistics(PipelineStage): People sometimes think that these statistics are called the Rho statistics, because we usually use that letter for them. Not so. They are named after - the wonderfully incorrigible rogue Barney Rowe, now sadly lost to high finance, + the wonderful Barney Rowe, now sadly lost to high finance, who presented the first two of them in MNRAS 404, 350 (2010). """