diff --git a/examples/cosmodc2/Cluster_pipelines/1deg2-nersc.sub b/examples/cosmodc2/Cluster_pipelines/1deg2-nersc.sub index 729e5b172..87c159f04 100644 --- a/examples/cosmodc2/Cluster_pipelines/1deg2-nersc.sub +++ b/examples/cosmodc2/Cluster_pipelines/1deg2-nersc.sub @@ -6,5 +6,6 @@ #SBATCH --nodes=1 #SBATCH --ntasks-per-node=32 -source $CFS/lsst/groups/WL/users/zuntz/setup-txpipe -tx ceci examples/cosmodc2/Cluster_pipelines/pipeline-1deg2-CL-nersc.yml +module load python +conda activate ./conda +ceci examples/cosmodc2/Cluster_pipelines/pipeline-1deg2-CL-nersc.yml diff --git a/examples/cosmodc2/Cluster_pipelines/20deg2-in2p3.sub b/examples/cosmodc2/Cluster_pipelines/20deg2-in2p3.sub index 3f4917d5d..f97ac5185 100644 --- a/examples/cosmodc2/Cluster_pipelines/20deg2-in2p3.sub +++ b/examples/cosmodc2/Cluster_pipelines/20deg2-in2p3.sub @@ -5,5 +5,6 @@ #SBATCH --cpus-per-task=1 #SBATCH --mem=128000 -source /pbs/throng/lsst/users/jzuntz/txpipe-environments/setup-txpipe +source ./conda/bin/activate +export HDF5_DO_MPI_FILE_SYNC=0 ceci examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-in2p3.yml diff --git a/examples/cosmodc2/Cluster_pipelines/20deg2-nersc.sub b/examples/cosmodc2/Cluster_pipelines/20deg2-nersc.sub index 821539970..9f67d3899 100644 --- a/examples/cosmodc2/Cluster_pipelines/20deg2-nersc.sub +++ b/examples/cosmodc2/Cluster_pipelines/20deg2-nersc.sub @@ -6,5 +6,7 @@ #SBATCH --nodes=1 #SBATCH --ntasks-per-node=32 -source $CFS/lsst/groups/WL/users/zuntz/setup-txpipe -tx ceci examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-nersc.yml + +module load python +conda activate ./conda +ceci examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-nersc.yml diff --git a/examples/cosmodc2/Cluster_pipelines/config-20deg2-CL.yml b/examples/cosmodc2/Cluster_pipelines/config-20deg2-CL.yml index 558e5054e..80f624a2b 100644 --- a/examples/cosmodc2/Cluster_pipelines/config-20deg2-CL.yml +++ b/examples/cosmodc2/Cluster_pipelines/config-20deg2-CL.yml @@ -10,7 +10,7 @@ TXSourceSelectorMetadetect: true_z: false shear_prefix: '' -Inform_BPZ_lite: +BPZliteInformer: zmin: 0.0 zmax: 3.0 nzbins: 301 @@ -32,7 +32,7 @@ Inform_BPZ_lite: -BPZ_lite: +BPZliteEstimator: zmin: 0.0 zmax: 3.0 dz: 0.01 @@ -64,7 +64,9 @@ CLClusterShearCatalogs: chunk_rows: 100_000 # rows to read at once from source cat max_radius: 5 # Mpc delta_z: 0.2 # redshift buffer - redshift_criterion: mean # might also need PDF + redshift_cut_criterion: pdf + redshift_weight_criterion: pdf + redshift_cut_criterion_pdf_fraction: 0.9 subtract_mean_shear: true diff --git a/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-in2p3.yml b/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-in2p3.yml index 357c05dd0..825a1a024 100644 --- a/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-in2p3.yml +++ b/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-in2p3.yml @@ -23,12 +23,12 @@ python_paths: [] stages: - name: TXSourceSelectorMetadetect nprocess: 30 - - name: Inform_BPZ_lite + - name: BPZliteInformer nprocess: 1 aliases: input: spectroscopic_catalog model: photoz_model - - name: BPZ_lite + - name: BPZliteEstimator nprocess: 30 aliases: model: photoz_model @@ -50,12 +50,12 @@ config: ./examples/cosmodc2/Cluster_pipelines/config-20deg2-CL.yml inputs: # See README for paths to download these files - shear_catalog: ./data/cosmodc2/20deg2/shear_catalog.hdf5 - #photometry_catalog: ./data/cosmodc2/20deg2/photometry_catalog.hdf5 + shear_catalog: /sps/lsst/users/mricci/TXPipe_data/data_link/cosmodc2/20deg2/shear_catalog.hdf5 + #photometry_catalog: /sps/lsst/users/mricci/TXPipe_data/data_link/cosmodc2/20deg2/photometry_catalog.hdf5 fiducial_cosmology: ./data/fiducial_cosmology.yml - calibration_table: ./data/cosmodc2/20deg2/sample_cosmodc2_w10year_errors.dat - spectroscopic_catalog: ./data/cosmodc2/20deg2/spectroscopic_catalog.hdf5 - cluster_catalog: ./data/cosmodc2/20deg2/cluster_catalog.hdf5 + calibration_table: /sps/lsst/users/mricci/TXPipe_data/data_link/cosmodc2/20deg2/sample_cosmodc2_w10year_errors.dat + spectroscopic_catalog: /sps/lsst/users/mricci/TXPipe_data/data_link/cosmodc2/20deg2/spectroscopic_catalog.hdf5 + cluster_catalog: /sps/lsst/users/mricci/TXPipe_data/data_link/cosmodc2/20deg2/cluster_catalog.hdf5 #shear_tomography_catalog: ./data/example/outputs_metadetect/shear_tomography_catalog.hdf5 #source_photoz_pdfs: ./data/example/inputs/photoz_pdfs.hdf5 resume: true diff --git a/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-nersc.yml b/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-nersc.yml index 29ddf674a..885c96647 100644 --- a/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-nersc.yml +++ b/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-nersc.yml @@ -6,8 +6,7 @@ #for NERSC site: - name: cori-batch - image: ghcr.io/lsstdesc/txpipe-dev + name: nersc-interactive #all the following steps should not depend on where you run @@ -23,12 +22,12 @@ python_paths: [] stages: - name: TXSourceSelectorMetadetect nprocess: 30 - - name: Inform_BPZ_lite + - name: BPZliteInformer nprocess: 1 aliases: input: spectroscopic_catalog model: photoz_model - - name: BPZ_lite + - name: BPZliteEstimator nprocess: 30 aliases: model: photoz_model @@ -37,7 +36,7 @@ stages: - name: CLClusterBinningRedshiftRichness nprocess: 1 - name: CLClusterShearCatalogs - nprocess: 1 #>1 does not work with mpi + nprocess: 32 #>1 does not work with mpi - name: CLClusterEnsembleProfiles nprocess: 1 # - name: CLClusterDataVector diff --git a/examples/cosmodc2/pipeline-20deg2-laptop.yml b/examples/cosmodc2/pipeline-20deg2-laptop.yml new file mode 100644 index 000000000..1a16d6550 --- /dev/null +++ b/examples/cosmodc2/pipeline-20deg2-laptop.yml @@ -0,0 +1,116 @@ +launcher: + name: mini + interval: 3.0 + +site: + name: local + + +modules: txpipe + +python_paths: + - submodules/WLMassMap/python/desc/ + - submodules/FlexZPipe + +stages: + - name: TXRandomCat + nprocess: 12 + - name: TXSourceSelectorMetadetect + nprocess: 12 + - name: TXLSSWeightsUnit + - name: TXRandomForestLensSelector + nprocess: 12 + - name: TXLensTrueNumberDensity + classname: TXTruePhotozStack + aliases: + tomography_catalog: lens_tomography_catalog + catalog: photometry_catalog + weights_catalog: none + photoz_stack: lens_photoz_stack + - name: TXSourceTrueNumberDensity + classname: TXTruePhotozStack + aliases: + tomography_catalog: shear_tomography_catalog + catalog: shear_catalog + weights_catalog: shear_catalog + photoz_stack: shear_photoz_stack + - name: TXPhotozPlotSource + classname: TXPhotozPlot + aliases: + photoz_stack: shear_photoz_stack + nz_plot: source_nz + - name: TXPhotozPlotLens + classname: TXPhotozPlot + aliases: + photoz_stack: lens_photoz_stack + nz_plot: lens_nz + - name: TXShearCalibration + nprocess: 12 + - name: TXTruthLensCatalogSplitter + nprocess: 12 + - name: TXTwoPointFourier + nprocess: 2 + nodes: 1 + threads_per_process: 6 + - name: TXTwoPointTheoryFourier + - name: TXTwoPointPlotsFourier + - name: TXJackknifeCenters + - name: TXSourceMaps + nprocess: 8 + - name: TXLensMaps + nprocess: 8 + - name: TXAuxiliarySourceMaps + nprocess: 8 + - name: TXAuxiliaryLensMaps + nprocess: 8 + - name: TXDensityMaps + - name: TXSourceNoiseMaps + nprocess: 4 + nodes: 1 + threads_per_process: 1 + - name: TXLensNoiseMaps + nprocess: 4 + nodes: 1 + threads_per_process: 1 + - name: TXSimpleMask + - name: TXMapPlots + - name: TXTracerMetadata + - name: TXNullBlinding + - name: TXTwoPoint + threads_per_process: 12 + nprocess: 1 + nodes: 1 + - name: TXTwoPointPlotsTheory + - name: TXDiagnosticQuantiles + nodes: 1 + nprocess: 12 + - name: TXLensDiagnosticPlots + nprocess: 12 + nodes: 1 + - name: TXSourceDiagnosticPlots + nprocess: 12 + nodes: 1 + - name: TXFourierGaussianCovariance + threads_per_process: 12 + - name: TXTwoPointTheoryReal + - name: TXRealGaussianCovariance + threads_per_process: 12 + - name: TXConvergenceMaps + threads_per_process: 12 + - name: TXConvergenceMapPlots + +output_dir: data/cosmodc2/outputs-20deg2 +config: examples/cosmodc2/config-20deg2.yml + +inputs: + # See README for paths to download these files + shear_catalog: ./data/cosmodc2/20deg2/shear_catalog.hdf5 + photometry_catalog: ./data/cosmodc2/20deg2/photometry_catalog.hdf5 + fiducial_cosmology: data/fiducial_cosmology.yml + calibration_table: ./data/cosmodc2/20deg2/sample_cosmodc2_w10year_errors.dat + none: none + +resume: true +log_dir: data/cosmodc2/logs +pipeline_log: data/cosmodc2/log.txt + diff --git a/examples/cosmodc2/pipeline-20deg2.yml b/examples/cosmodc2/pipeline-20deg2-nersc.yml similarity index 100% rename from examples/cosmodc2/pipeline-20deg2.yml rename to examples/cosmodc2/pipeline-20deg2-nersc.yml diff --git a/txpipe/extensions/clmm/sources_select_compute.py b/txpipe/extensions/clmm/sources_select_compute.py index d3e672f51..07dbf0a60 100644 --- a/txpipe/extensions/clmm/sources_select_compute.py +++ b/txpipe/extensions/clmm/sources_select_compute.py @@ -7,8 +7,12 @@ from collections import defaultdict import yaml import ceci +import warnings class CLClusterShearCatalogs(PipelineStage): + """ + + """ name = "CLClusterShearCatalogs" inputs = [ ("cluster_catalog", HDFFile), @@ -26,8 +30,11 @@ class CLClusterShearCatalogs(PipelineStage): "chunk_rows": 100_000, # rows to read at once from source cat "max_radius": 10.0, # Mpc "delta_z": 0.1, - "redshift_criterion": "mean", # might also need PDF + "redshift_cut_criterion": "zmean", # pdf / mean / true / median + "redshift_weight_criterion": "zmean", # pdf or point + "redshift_cut_criterion_pdf_fraction": 0.9, # pdf / mean / true / median "subtract_mean_shear": True, + "coordinate_system": "celestial", } def run(self): @@ -74,6 +81,10 @@ def run(self): nearby_gals, nearby_gal_dists = self.find_galaxies_near_each_cluster(data, clusters, tree, max_theta_max) + + redshift_cut_criterion = self.config["redshift_cut_criterion"] + redshift_cut_criterion_pdf_fraction = self.config["redshift_cut_criterion_pdf_fraction"] + # Now we have all the galaxies near each cluster # We need to cut down to a specific radius for this # cluster @@ -99,8 +110,21 @@ def run(self): cluster_dec = clusters[cluster_index]["dec"] # # Cut down to clusters that are in front of this galaxy - zgal = data["redshift"][gal_index] - z_good = zgal > cluster_z + delta_z + if redshift_cut_criterion == "pdf": + # If we cut based on the PDF then we need the probability + # that the galaxy z is behind the cluster to be greater + # than a cut criterion. + pdf_z = data["pdf_z"] + pdf = data["pdf_pz"][gal_index] + pdf_frac = pdf[:, pdf_z > cluster_z + delta_z].sum(axis=1) / pdf.sum(axis=1) + z_good = pdf_frac > redshift_cut_criterion_pdf_fraction + elif redshift_cut_criterion == "zmode": + # otherwise if we are not using the PDF we do a simple cut + zgal = data["redshift"][gal_index] + z_good = zgal > cluster_z + delta_z + else: + raise NotImplementedError("Not implemented other z cuts than zmode") + gal_index = gal_index[z_good] distance = distance[z_good] @@ -165,7 +189,7 @@ def run(self): indices = np.zeros(0, dtype=int) weights = np.zeros(0) tangential_comps = np.zeros(0) - cross_comp = np.zeros(0) + cross_comps = np.zeros(0) distances = np.zeros(0) else: # Each process flattens the list of all the galaxies for this cluster @@ -319,35 +343,35 @@ def compute_sources_quantities(self, clmm_cosmo, data, index, z_cluster, ra_clus # Depending on whether we are using the PDF or not, choose # some keywords to give to compute_galaxy_weights - if self.config["redshift_criterion"] == "pdf": + if self.config["redshift_weight_criterion"] == "pdf": # We need the z and PDF(z) arrays in this case pdf_z = data["pdf_z"] pdf_pz = data["pdf_pz"][index] - redshift_keywords = { - "pzpdf":pdf_pz, - "pzbins":pdf_z, - "use_pdz":True - } - else: + + # suppress user warnings containing string "nSome source redshifts are lower than the cluster redshift" + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + sigma_c = clmm.theory.compute_critical_surface_density_eff( + cosmo=clmm_cosmo, + z_cluster=z_cluster, + pzbins=pdf_z, + pzpdf=pdf_pz, + ) + elif self.config["redshift_weight_criterion"] == "zmode": # point-estimated redshift z_source = data["redshift"][index] - redshift_keywords = { - "z_source":z_source, - "use_pdz":False - } + sigma_c = clmm_cosmo.eval_sigma_crit(z_cluster, z_source) + else: + raise NotImplementedError("Not implemented zmean weighting") + weight = clmm.dataops.compute_galaxy_weights( - z_cluster, - clmm_cosmo, + sigma_c = sigma_c, is_deltasigma=True, - use_shape_noise=True, - use_shape_error=False, - validate_input=True, - shape_component1=data["g1"][index], - shape_component2=data["g2"][index], - **redshift_keywords + use_shape_noise=False, ) + coordinate_system = self.config["coordinate_system"] _, tangential_comp, cross_comp = clmm.compute_tangential_and_cross_components( ra_cluster, dec_cluster, @@ -355,17 +379,12 @@ def compute_sources_quantities(self, clmm_cosmo, data, index, z_cluster, ra_clus data['dec'][index], data["g1"][index], data["g2"][index], + coordinate_system=coordinate_system, geometry="curve", is_deltasigma=True, - cosmo=clmm_cosmo, - z_lens=z_cluster, - z_source=z_source, - sigma_c=None, - use_pdz=False, - pzbins=None, - pzpdf=None, + sigma_c=sigma_c, validate_input=True, - ) + ) return weight, tangential_comp, cross_comp @@ -419,13 +438,30 @@ def iterate_source_catalog(self): # where and what to read rom the PZ catalog. This is in a QP # format where the mode and mean are stored in a file called # "ancil". The columns are called zmode and zmean. - # TODO: Support "pdf" option here and read from /data/yvals - redshift_criterion = self.config["redshift_criterion"] + redshift_cut_criterion = self.config["redshift_cut_criterion"] + redshift_weight_criterion = self.config["redshift_cut_criterion"] + want_pdf = (redshift_cut_criterion == "pdf") or (redshift_weight_criterion == "pdf") + + point_pz_group = "ancil" + # TODO: We may extend this to use other options for the point redshift + # in the cuts / weighting + point_pz_cols = ["zmode"] + + if redshift_cut_criterion == "zmean" or redshift_weight_criterion == "zmean": + point_pz_cols.append("zmean") + + rename["zmode"] = "redshift" + + # if the PDF is available, load it + # use keyword to decide what to cut on + # use keyword to decide what to weight on - if redshift_criterion == "pdf": + if want_pdf: # This is not actually a single column but an array - pz_group = "data" - pz_cols = ["yvals"] + pdf_file = "source_photoz_pdfs" + pdf_group = "data" + pdf_cols = ["yvals"] + pdf_keywords = [pdf_file, pdf_group, pdf_cols] # we will also need the z axis values in this case with self.open_input("source_photoz_pdfs") as f: @@ -433,10 +469,7 @@ def iterate_source_catalog(self): # but that's the kind of thing they might change. pdf_z = np.squeeze(f["/meta/xvals"][0]) else: - pz_group = "ancil" - pz_col = "z" + self.config["redshift_criterion"] - pz_cols = [pz_col] - rename[pz_col] = "redshift" + pdf_keywords = [] # where and what to read from the tomography catalog. @@ -454,11 +487,12 @@ def iterate_source_catalog(self): shear_group, shear_cols, "source_photoz_pdfs", - pz_group, - pz_cols, + point_pz_group, + point_pz_cols, "shear_tomography_catalog", tomo_group, tomo_cols, + *pdf_keywords, parallel=True ): # For each chunk of data we also want to store the original @@ -486,7 +520,7 @@ def iterate_source_catalog(self): ) # If we are in PDF mode then we need this extra info - if redshift_criterion == "pdf": + if want_pdf: data["pdf_z"] = pdf_z # also rename this for clarity data["pdf_pz"] = data.pop("yvals")