sanderslab · yoda-vid · Feb 23, 2023 · Jan 10, 2023 · Feb 3, 2023 · Feb 3, 2023
diff --git a/clrstats/R/clrstats.R b/clrstats/R/clrstats.R
@@ -112,18 +112,35 @@ fitModel <- function(model, vals, genos, sides, ids=NULL) {
       # linear regression
       # TODO: see whether need to factorize genos
       if (length(unique(genos)) < 2) {
-        lm.formula <- as.formula(vals ~ sides)
+        form <- "vals ~ sides"
       } else if (length(unique(sides)) < 2) {
-        lm.formula <- as.formula(vals ~ genos)
+        form <- "vals ~ genos"
       } else {
-        lm.formula <- as.formula(vals ~ genos * sides)
+        form <- "vals ~ genos * sides"
       }
+
+      intercept <- config.env$Intercept
+      if (!intercept) {
+        # remove the implied intercept term
+        form <- paste0(form, " + 0")
+      }
+
+      # perform linear regression
+      lm.formula <- as.formula(form)
       fit <- lm(lm.formula)
       result.summary <- summary.lm(fit)
-      result <- result.summary$coefficients
+      result <- as.data.frame(result.summary$coefficients)
+      result$r.squared <- result.summary$r.squared
 
-      # remove first ("non-intercept") row
-      result <- result[-(1:1), ]
+      # store intercept coefficient
+      intercept.coef <- 0
+      if (intercept) {
+        # extract intercept coefficient and remove its row so result only
+        # has main effect and interactions
+        intercept.coef <- fit$coefficients[1]
+        result <- result[-(1:1), ]
+      }
+      result$intercept <- intercept.coef
 
     } else if (model == kModel[3]) {
       # generalized estimating equations
@@ -198,6 +215,15 @@ fitModel <- function(model, vals, genos, sides, ids=NULL) {
   }
   coef.tab$P <- result[rows, 4]
   coef.tab$N <- length(vals)
+
+  # transfer any additional stats to output table
+  extra.cols <- c("intercept", "r.squared")
+  for (col in extra.cols) {
+    if (col %in% colnames(result)) {
+      coef.tab[[col]] <- result[rows, col]
+    }
+  }
+
   print(coef.tab)
   return(coef.tab)
 }
@@ -652,10 +678,6 @@ filterStats <- function(stats, corr=NULL) {
   stats.filt <- stats[non.na, ]
   if (length(stats.filt$Stats) < 1) return(NULL)
 
-  filtered <- NULL
-  interactions <- NULL
-  offset <- 0 # number of columns ahead of coefficients
-
   # get names and mean and CI columns
   cols.names <- names(stats.filt)
   cols.means.cis <- c(
@@ -665,21 +687,26 @@ filterStats <- function(stats, corr=NULL) {
     cols.names[grepl(".sd", cols.names)], 
     cols.names[grepl(".ci", cols.names)])
 
-  # build data frame for pertinent coefficients from each type of main 
-  # effect or interaction
+  # set up common columns
+  cols <- list("Region", "Volume", "Nuclei")
+  cols.orig <- cols
+  offset <- length(cols) # number of columns ahead of coefficients
+
+  # get stats coefficients and main effects or interactions
   stats.coef <- stats.filt$Stats[1][[1]]
   interactions <- gsub(":", ".", rownames(stats.coef))
-  cols <- list("Region", "Volume", "Nuclei")
-  cols.orig <- cols # points to original vector if it is mutated
-  offset <- length(cols)
-  cols.suffixes <- c(
-    ".n", ".effect", ".ci.low", ".ci.hi", ".effect.raw", ".ci.low.raw",
-    ".ci.hi.raw", ".p", ".pcorr", ".logp")
+
+  # make a set of stat coefficient columns for each main effect/interaction
+  cols.suffixes <- paste0(".", tolower(colnames(stats.coef)))
+  cols.suffixes <- gsub("value", "effect", cols.suffixes)
+  cols.suffixes <- c(cols.suffixes, ".pcorr", ".logp")
   for (interact in interactions) {
     for (suf in cols.suffixes) {
       cols <- append(cols, paste0(interact, suf))
     }
   }
+
+  # build output data frame
   filtered <- data.frame(matrix(nrow=nrow(stats.filt), ncol=length(cols)))
   names(filtered) <- cols
   for (col in cols.orig) {
@@ -688,7 +715,7 @@ filterStats <- function(stats, corr=NULL) {
   }
   num.stat.cols <- length(names(stats.coef))
 
-  for (i in 1:nrow(stats.filt)) {
+  for (i in seq_len(nrow(stats.filt))) {
     if (is.na(stats.filt$Stats[i])) next
     # get coefficients, stored in one-element list
     stats.coef <- stats.filt$Stats[i][[1]]
@@ -891,6 +918,8 @@ setupConfig <- function(name=NULL) {
     # region-ID map from MagellanMapper, which should contain all regions
     # including hierarchical/ontological ones
     config.env$Labels.Path <- "../region_ids.csv"
+    # include linear regression intercept term
+    config.env$Intercept <- TRUE
 
   } else if (endsWith(name, ".R")) {
     # load a profile file

diff --git a/docs/cli.md b/docs/cli.md
@@ -34,7 +34,7 @@ Argument | Sub-argument | Function | Ver Added | Last updated
 `--labels` | `[path_ref="""] [level=0] [ID=0] [orig_colors=1] [symmetric_colors=1] [binary=<backround>,<foreground>] [translate_labels=<path>"] [translate_children=0]` | Atlas label settings; see `config.AtlasLabels`. <ul><li>`path_ref`: Path to the labels reference file. Should have at least these columns: `Region` or `id` for region IDs, and `RegionName` or `name` for corresponding names. Atlases generated in >=v1.5.0 with this flag will copy this file into the atlas directory so that this argument is not needed when loading the atlas and images registered to it.</li> <li>`level`: Ontology level. Structures in sub-levels will be grouped at this level for volume stats. | v1.0.0 | v1.0.0
 `--transform` | `[rotate=0] [flip_vert=0] [flip_horiz=0] [flip=axis] [rescale=0] [interpolation=1]` | Image transformations; see `config.Transforms`. <ul><li>`interpolation`: Interpolation order `n` for the main (intensity) image, where `n` is passed to the `order` argument in Scikit-image's [`transform.resize`](https://scikit-image.org/docs/dev/api/skimage.transform.html#skimage.transform.resize) function</li> <li>*Since [v1.6.0](#changes-in-magellanmapper-v16):* `flip`: Flip the selected axis, where `axis` = 0 for the z-axis, 1 for the y-axis, and 2 for the x-axis</li></ul> | v1.0.0 | v1.6.0
 `--reg_suffixes` | `[atlas=atlasVolume.mhd,...] [annotation=annotation.mhd,...] [borders=<path>]` | Suffixes of registered images to load; see `config.RegSuffixes` for suffixes used throughout the package. Atlases and regenerated and registered with output paths based on the path given by `--img` or `--prefix`. For example:<ul><li>"Base" path (from `--img` or `--prefix`): `/home/me/my_image`</li><li>Intensity/histology/"atlas" image: `/home/me/my_image_atlasVolume.nii.gz`</li><li>Annotation/labels image: `/home/me/my_image_annotation.nii.gz`</li><li>Other registered images, eg atlas edges: `/home/me/my_image_atlasEdge.nii.gz`</li></ul>To load intensity and annotations image: `./run.py --img /home/me/my_image --reg_suffixes atlasVolume.nii.gz annotation.nii.gz`. <ul><li>*Since [v1.5.0](#changes-in-magellanmapper-v15):* Suffixes can be also given as an absolute path, such as a labels image not registered to the main image.</li> <li>*Since [v1.6.0](#changes-in-magellanmapper-v16):* Multiple labels suffixes can be given, separated by commas.</li></ul> | v1.0.0 | [v1.5.0](#changes-in-magellanmapper-v15)
-`--plot_labels` | `[title=<title>] ...` | Plot labels; see `config.PlotLabels` for available parameters. | v1.0.0 | v1.0.0
+`--plot_labels` | `[title=<title>] [err_col_abs=<col>] [background=<color>] [vspan_col=<col>] [vspan_format=<str>] [rotation=<deg>] ...` | Plot labels; see `config.PlotLabels` for available parameters.<ul><li>*Since [v1.6.0](#changes-in-magellanmapper-v16):* Added several new sub-arguments.</li></ul> | v1.0.0 | [v1.6.0](#changes-in-magellanmapper-v16)
 `--set_meta` | `[resolutions=<x,y,z>] [magnification=<n>] [zoom=<n>] [shape=<c,x,y,z,...>] [dtype=<data-type>]` | Metadata to set when importing an image; see `config.MetaKeys`. | v1.0.0 | [v1.3.0](#changes-in-magellanmapper-v13)
 `--plane` | `<xy\|xz\|yz>` | Transpose to the given planar orientation | v1.0.0 | v1.0.0
 `--show` | `<0\|1>` | Show images after generating them in atlas pipelines. `0` to not show, `1` to show. | v1.0.0 | [v1.3.0](#changes-in-magellanmapper-v13)
@@ -53,10 +53,14 @@ Argument | Sub-argument | Function | Ver Added | Last updated
 
 Old | New | Version | Purpose of Change |
 --- | --- | --- | ---
-None | `--rgb` | v1.6.0 | Open images in RGB(a) mode.
-`--ref_suffixes annotation=` | `--transform [flip=axis] ...` | v1.6.0 | Flip the specified axis.
-`--transform ...` | `--transform [interpolation=n] ...` | v1.6.0 | Interpolation order can be specified when exporting the main image.
-`--transform ...` | `--transform [flip=axis] ...` | v1.6.0 | Flip the specified axis.
+None | `--rgb` | v1.6a1 | Open images in RGB(a) mode.
+`--plot_labels ...` | `--plot_labels err_col_abs=<col> ...` | v1.6a1 | Plot error bars with a column of absolute rather than relative values, now that Clrstats gives absolute values for effect sizes.
+` ` |  `--plot_labels background=<color>` | v1.6a1 | Change plot background color with a Matplotlib color string.
+` ` |  `--plot_labels vspan_col=<col> vspan_format=<str>` | v1.6a1 | Column denoting vertical span groups and string format for them, respectively.
+` ` |  `--plot_labels rotation=<deg>` | v1.6a2 | Change rotation in degrees.
+`--ref_suffixes annotation=` | `--transform [flip=axis] ...` | v1.6a1 | Flip the specified axis.
+`--transform ...` | `--transform [interpolation=n] ...` | v1.6a1 | Interpolation order can be specified when exporting the main image.
+`--transform ...` | `--transform [flip=axis] ...` | v1.6a1 | Flip the specified axis.
 
 ## Changes in MagellanMapper v1.5
 

diff --git a/docs/release/release_v1.6.md b/docs/release/release_v1.6.md
@@ -110,15 +110,16 @@
 
 #### I/O
 
-- Images can be viewed as RGB(A) using the `RGB` button or the `--rgb` CLI argument (#142)
-- EXPERIMENTAL: Some TIF files can be loaded directly, without importing the file first (#90, #213, #242)
+- Images can be viewed and exported as RGB(A) using the `RGB` button or the `--rgb` CLI argument (#142, #445)
+- EXPERIMENTAL: Some TIF files can be loaded directly, without importing the file first (#90, #213, #242, #445)
 - The `--proc export_planes` task can export a subset of image planes specified by `--slice`, or an ROI specified by `--offset` and `--size`
 - Image metadata is stored in the `Image5d` image object (#115)
 - Better 2D image support
   - Extended zero-crossing detection to 2D cases (#142)
   - Unit factor conversions adapts to image dimensions (eg 2D vs 3D) (#132)
   - Fixed ROI padding during blob verification and match-based colocalization for 2D images (#380)
 - Multiple multiplane image files can be selected directly instead of relying on file auto-detection (#201)
+- `openpyxl` package is now optional during region export (#445)
 - Fixed re-importing an image after loading it (#117)
 - Fixed to store the image path when loading a registered image as the main image, which fixes saving the experiment name used when saving blobs (#139)
 
@@ -132,7 +133,9 @@
   - `--plot labels err_col_abs=<col>`: plot error bars with a column of absolute rather than relative values, now that Clrstats gives absolute values for effect sizes
   - `--plot_labels background=<color>`: change plot background color with a Matplotlib color string
   - `--plot_labels vspan_col=<col> vspan_format=<str>`: column denoting vertical span groups and string format for them, respectively (#135, 137)
+  - `--plot_labels rotation=<deg>`: change rotation in degrees (#445)
 - The figure save wrapper (`plot_support.save_fig`) is more flexible (#215)
+- 2D plots can be set not to save (#445)
 - Discrete colormaps can use [Matplotlib named colors](https://matplotlib.org/stable/gallery/color/named_colors.html) and use them for symmetric colors (#226)
 - Fixed errors when generating labels difference heat maps, and conditions can be set through `--plot_labels condition=cond1,cond2,...` (#132)
 - Fixed alignment of headers and columns in data frames printed to console (#109)
@@ -148,6 +151,7 @@
 - The labels reference path has been moved to an environment variable, which can be configured through `--labels <path>` (#147)
 - The Shapiro-Wilks test has been implemented in `meansModel` for consistent table output (#164)
 - A basic `NAMESPACE` file is provided to fix installation and exporting functions (#303)
+- Linear regression intercept term can be toggled using the `Intercept` environment field, and r<sup>2</sup> and intercept are exported (#445)
 - Fixed t-test, which also provides Cohen's d as a standardized effect size through the `effectsize` package (#135)
 - Fixed jitter plot box plots to avoid covering labels (#147)
 - Fixed model fitting (#240, #304)

diff --git a/magmap/cv/verifier.py b/magmap/cv/verifier.py
@@ -44,26 +44,32 @@ def _match_blobs(blobs, blobs_master, close, close_master, dists):
     return matches
 
 
-def find_closest_blobs_cdist(blobs, blobs_master, thresh=None, scaling=None):
-    """Find the closest blobs within a given tolerance using the 
+def find_closest_blobs_cdist(
+        blobs: np.ndarray, blobs_master: np.ndarray,
+        thresh: Optional[float] = None, scaling:
+        Optional[Sequence[float]] = None
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Find the closest blobs within a given tolerance using the
     Hungarian algorithm to find blob matches.
 
     Args:
-        blobs: Blobs as a 2D array of [n, [z, row, column, ...]].
+        blobs: Blobs as a 2D array of ``[n, [z, row, column, ...]]``.
         blobs_master: Array in same format as ``blobs``.
-        thresh: Threshold distance beyond which blob pairings are excluded; 
+        thresh: Threshold distance beyond which blob pairings are excluded;
             defaults to None to include all matches.
-        scaling: Sequence of scaling factors by which to multiply the 
-            blob coordinates before computing distances, used to 
-            scale coordinates from an anisotropic to isotropic 
-            ROI before computing distances, which assumes isotropy. 
+        scaling: Sequence of scaling factors by which to multiply the
+            blob coordinates before computing distances, used to
+            scale coordinates from an anisotropic to isotropic
+            ROI before computing distances, which assumes isotropy.
             Defaults to None.
 
     Returns:
-        Tuple of ``rowis`` and ``colis``, arrays of row and corresponding 
-        column indices of the closest matches; and ``dists_closest``, an 
-        array of corresponding distances for these matches. Only matches 
-        within the given tolerance will be included.
+        Tuple of:
+        - ``rowis`` and ``colis``, arrays of row and corresponding
+          column indices of the closest matches
+        - ``dists_closest``, an array of corresponding distances for these
+          matches. Only matches within the given tolerance will be included.
+
     """
     blobs_scaled = blobs
     blobs_master_scaled = blobs_master
@@ -73,7 +79,7 @@ def find_closest_blobs_cdist(blobs, blobs_master, thresh=None, scaling=None):
         blobs_scaled = np.multiply(blobs[:, :len_scaling], scaling)
         blobs_master_scaled = np.multiply(blobs_master[:, :len_scaling], scaling)
 
-    # find Euclidean distances between each pair of points and determine 
+    # find Euclidean distances between each pair of points and determine
     # the optimal assignments using the Hungarian algorithm
     dists = distance.cdist(blobs_scaled, blobs_master_scaled)
     rowis, colis = optimize.linear_sum_assignment(dists)
@@ -82,14 +88,28 @@ def find_closest_blobs_cdist(blobs, blobs_master, thresh=None, scaling=None):
     if thresh is not None:
         # filter out matches beyond the given threshold distance
         dists_in = dists_closest < thresh
+
         if config.verbose:
-            for blob, blob_sc, blob_base, blob_base_sc, dist, dist_in in zip(
-                    blobs[rowis], blobs_scaled[rowis], blobs_master[colis],
-                    blobs_master_scaled[colis], dists_closest,
-                    dists_in):
-                print("blob: {} (scaled {}), base: {} ({}), dist: {}, in? {}"
-                      .format(blob[:3], blob_sc[:3], blob_base[:3],
-                              blob_base_sc[:3], dist, dist_in))
+            # show matches using original blob coordinates
+            for i, (blob, blob_base, dist_in) in enumerate(zip(
+                    blobs[rowis], blobs_master[colis], dists_in)):
+                _logger.debug(
+                    "%s: Detected blob: %s, truth blob: %s, in? %s",
+                    i, blob[:3], blob_base[:3], dist_in)
+            _logger.debug("")
+
+            # show corresponding scaled coordinates and distances
+            for i, (blob_sc, blob_base_sc, dist, dist_in) in enumerate(zip(
+                    blobs_scaled[rowis], blobs_master_scaled[colis],
+                    dists_closest, dists_in)):
+                _logger.debug(
+                    "%s: Scaled dist: %s", i, dist)
+                _logger.debug(
+                    "  %s (detected)", blob_sc[:3])
+                _logger.debug(
+                    "  %s (truth)", blob_base_sc[:3])
+            _logger.debug("")
+
         rowis = rowis[dists_in]
         colis = colis[dists_in]
         dists_closest = dists_closest[dists_in]

diff --git a/magmap/io/export_regions.py b/magmap/io/export_regions.py
@@ -96,8 +96,12 @@ def color_cells(s):
     if rgbs is not None:
         df = df.style.apply(color_cells, subset="RGB")
     path_xlsx = "{}.xlsx".format(os.path.splitext(path)[0])
-    df.to_excel(path_xlsx)
-    print("exported regions to styled spreadsheet: \"{}\"".format(path_xlsx))
+    try:
+        df.to_excel(path_xlsx)
+        _logger.info("Exported regions to styled spreadsheet: %s", path_xlsx)
+    except ModuleNotFoundError:
+        raise ModuleNotFoundError(
+            config.format_import_err("openpyxl", task="formatting Excel files"))
     return df