= pd.read_csv(
+ query "EXAMPLE_DATA_PATH"] + "eggnog/hydra.tsv",
+ os.environ[="\t",
+ sep="python",
+ engine
+ )= query[
+ query "Unnamed: 0", "eggNOG_OGs"]
+ [# I am only keeping the columns I need
+ ].copy() = ["gene_id", "eggNOG_OGs"] # rename so that it is easier to work with
+ query.columns "gene_id"] = "hy_" + query["gene_id"].astype(str) query[
dotplot_util
+This module contains a collection of utility functions for the pairwise dotplots, as well as some more general functions that are used in the dotplot generation process. I wrote documentation for the functions here, but I didn’t write tests for them. I’m not sure this page is very valuable to end users.
++ +
highlight_cluster
++++highlight_cluster (clusters:numpy.ndarray, cluster:Optional[str]=None, + bg:Union[str,tuple]='black', + hl:Union[str,tuple]='red')
Highlight a cluster in a list of clusters by setting the color of the cluster to hl
and the color of the rest to bg
.
+ | Type | +Default | +Details | +
---|---|---|---|
clusters | +ndarray | ++ | The array of cluster names. | +
cluster | +typing.Optional[str] | +None | +The cluster to highlight (default: None). | +
bg | +typing.Union[str, tuple] | +black | +The background color. Accepts all matplotlib-compatible color formats (default: “black”). | +
hl | +typing.Union[str, tuple] | +red | +The highlight color. Accepts all matplotlib-compatible color formats (default: “red”). | +
Returns | +ndarray | ++ | The array of colors, with the same length as clusters . |
+
+ +
unique_genes
++++unique_genes (connections:numpy.ndarray)
Extract the unique gene names from an array of connections.
++ | Type | +Details | +
---|---|---|
connections | +ndarray | +The array of connections. Columns should be (query_genes, target_genes, connection_strength). The last column is optional. | +
Returns | +ndarray | +The array of unique genes. | +
+ +
map_to_colormap
++++map_to_colormap (x:numpy.ndarray, + cmap:Union[str,matplotlib.colors.Colormap]='magma_r', + vmin:float=0, vmax:Optional[float]=None)
Map an array of values to a color palette.
++ | Type | +Default | +Details | +
---|---|---|---|
x | +ndarray | ++ | The array to map. | +
cmap | +typing.Union[str, matplotlib.colors.Colormap] | +magma_r | +The color map to use. Should be a matplotlib colormap object or a string with the name of a matplotlib colormap (default: “magma_r”). | +
vmin | +float | +0 | +The value to obtain the minimum color in the colormap. Should be <= np.min(x) to avoid truncation (default: 0). | +
vmax | +typing.Optional[float] | +None | +The value to obtain the maximum color in the colormap. Should be >= np.max(x), and will use np.max(x) if set to None (default: None). |
+
Returns | +ndarray | ++ | Array of RGBA values with a shape of x.shape + (4, ) . |
+
+ +
map_array_to_color
++++map_array_to_color (x:numpy.ndarray, palette:matplotlib.colors.Colormap, + xmax:Optional[float]=None)
Map an array of values to a color palette.
++ | Type | +Default | +Details | +
---|---|---|---|
x | +ndarray | ++ | The array to map. | +
palette | +Colormap | ++ | The color map to use. Should be a matplotlib colormap object. | +
xmax | +typing.Optional[float] | +None | +The maximum value to use for normalization. Should be >= np.max(x), and will use np.max(x) if set to None (default: None). |
+
Returns | +ndarray | ++ | Array of RGBA values with a shape of x.shape + (4, ) . |
+
+ +
add_homology_context
++++add_homology_context (connections:numpy.ndarray, + orthology:pandas.core.frame.DataFrame)
Add homology context to the given connections based on the orthology information.
++ | Type | +Details | +
---|---|---|
connections | +ndarray | +The connections between genes. The columns should be (query_gene, target_gene). | +
orthology | +DataFrame | +The orthology information as a DataFrame. | +
Returns | +ndarray | +The connections array with homology context added. The columns will be (query_gene, target_gene, connection_strength), and the values in connection_strength will depend on the content of the orthology DataFrame. |
+
+ +
plot_dotplot
++++plot_dotplot (query_avg_expr:numpy.ndarray, + target_avg_expr:numpy.ndarray, + query_perc_expr:numpy.ndarray, + target_perc_expr:numpy.ndarray, query_genes:List[str], + target_genes:List[str], connections:numpy.ndarray, + query_cluster_colors:Dict[str,str], + target_cluster_colors:Dict[str,str], + query_gene_colors:Dict[str,str], + target_gene_colors:Dict[str,str], query_species:str, + target_species:str, x_offset:float=1, y_offset:float=0, + grid_offset:int=30, query_clustering:str='leiden', + target_clustering:str='leiden', + output:str='./paired_dotplot.png', + title:Optional[str]=None, title_font_size:int=16, + center:bool=True, + cmap:matplotlib.colors.Colormap='magma_r')
Plot the paired dotplot based on the given data.
++ | Type | +Default | +Details | +
---|---|---|---|
query_avg_expr | +ndarray | ++ | Array representing the average expression values of query genes. | +
target_avg_expr | +ndarray | ++ | Array representing the average expression values of target genes. | +
query_perc_expr | +ndarray | ++ | Array representing the percentage expression values of query genes. | +
target_perc_expr | +ndarray | ++ | Array representing the percentage expression values of target genes. | +
query_genes | +typing.List[str] | ++ | List of query gene names. | +
target_genes | +typing.List[str] | ++ | List of target gene names. | +
connections | +ndarray | ++ | An array where each row contains two genes and (optionally) the strength of their connection. |
+
query_cluster_colors | +typing.Dict[str, str] | ++ | Dictionary mapping query cluster names to their colors. | +
target_cluster_colors | +typing.Dict[str, str] | ++ | Dictionary mapping target cluster names to their colors. | +
query_gene_colors | +typing.Dict[str, str] | ++ | Dictionary mapping query gene names to their colors. | +
target_gene_colors | +typing.Dict[str, str] | ++ | Dictionary mapping target gene names to their colors. | +
query_species | +str | ++ | Species name of the query genes. | +
target_species | +str | ++ | Species name of the target genes. | +
x_offset | +float | +1 | +Offset for the x-axis (default: 1). | +
y_offset | +float | +0 | +Offset for the y-axis (default: 0). | +
grid_offset | +int | +30 | +Offset for the grid spacing (default: 30). | +
query_clustering | +str | +leiden | +Clustering method for the query genes (default: “leiden”). | +
target_clustering | +str | +leiden | +Clustering method for the target genes (default: “leiden”). | +
output | +str | +./paired_dotplot.png | +Output file path for the plot (default: “./paired_dotplot.png”). | +
title | +typing.Optional[str] | +None | +Title of the plot (default: None). | +
title_font_size | +int | +16 | +Font size of the plot title (default: 16). | +
center | +bool | +True | +Whether to center the dotplots when the number of genes exceeds the maximum (default: True). | +
cmap | +Colormap | +magma_r | ++ |
Returns | +None | ++ | + |
+ +
add_connections
++++add_connections (fig:matplotlib.figure.Figure, connections:numpy.ndarray, + query_gene_names:List[str], + query_gene_colors:Dict[str,str], label_offset:float)
Add connections between genes to the given paired dotplot figure.
++ | Type | +Details | +
---|---|---|
fig | +Figure | +The paired dotplot figure to which the connections will be added. | +
connections | +ndarray | +An array where each row contains two genes and (optionally) the strength of their connection. |
+
query_gene_names | +typing.List[str] | +The list of query gene names. | +
query_gene_colors | +typing.Dict[str, str] | +The dictionary mapping query gene names to their colors. | +
label_offset | +float | +The offset for label positioning. | +
Returns | +None | ++ |
+ +
make_dotplot
++++make_dotplot (ax:matplotlib.axes._axes.Axes, avg:numpy.ndarray, + perc:numpy.ndarray, gene_names:List[str], species:str, + clustering:str, clust_color:List[str], + gene_color:List[str], side:str='left', + cmap:matplotlib.colors.Colormap='magma_r')
Make a dotplot on the given Axes object based on the average and percentage expression values.
++ | Type | +Default | +Details | +
---|---|---|---|
ax | +Axes | ++ | The Axes object on which to create the dotplot. | +
avg | +ndarray | ++ | The average expression values. | +
perc | +ndarray | ++ | The percentage expression values. | +
gene_names | +typing.List[str] | ++ | The list of gene names. | +
species | +str | ++ | The species name. | +
clustering | +str | ++ | The clustering information. | +
clust_color | +typing.List[str] | ++ | The list of colors for clusters. | +
gene_color | +typing.List[str] | ++ | The list of colors for genes. | +
side | +str | +left | +The side to place the y-axis labels, either “left” or “right” (default: “left”). | +
cmap | +Colormap | +magma_r | ++ |
Returns | +None | ++ | + |
+ +
plot_colorbar_legend
++++plot_colorbar_legend (cbar_legend:matplotlib.axes._axes.Axes, + query_avg_expr:numpy.ndarray, + target_avg_expr:numpy.ndarray, + cmap:matplotlib.colors.Colormap='magma_r')
Plot the colorbar legend based on the average expression values of query and target genes.
++ | Type | +Default | +Details | +
---|---|---|---|
cbar_legend | +Axes | ++ | The Axes object representing the colorbar legend. | +
query_avg_expr | +ndarray | ++ | Array representing the average expression values of query genes. | +
target_avg_expr | +ndarray | ++ | Array representing the average expression values of target genes. | +
cmap | +Colormap | +magma_r | +The Colormap instance or registered colormap name used to map scalar data to colors (default: “magma_r”). |
+
Returns | +None | ++ | + |
+ +
plot_dot_legend
++++plot_dot_legend (dot_legend, size_exponent=1.5, dot_size=200)
Create the dotplot legend, explaining dot size.
++ | Type | +Default | +Details | +
---|---|---|---|
dot_legend | +matplotlib axis |
++ | The subplot of the grid that contains the dotplot legend. | +
size_exponent | +float | +1.5 | +The exponent to raise the fraction of cells in a group to, to get the dot size. The default is 1.5. |
+
dot_size | +int | +200 | +The size of the largest dot. The default is 200. | +
+ +
get_dot_color
++++get_dot_color (query:anndata._core.anndata.AnnData, + target:anndata._core.anndata.AnnData, + query_clustering:str, target_clustering:str, + query_genes:Optional[numpy.ndarray]=None, + target_genes:Optional[numpy.ndarray]=None, + query_gene_names:Optional[numpy.ndarray]=None, + target_gene_names:Optional[numpy.ndarray]=None, + layer:Optional[str]=None)
Calculate average expression in each cluster and translate that to dot color for the dotplot. Note that this function does not know what you did with the matrix before; if you have log-transformed the data it will calculate an average of logs, not the log of the exp-transformed average.
++ | Type | +Default | +Details | +
---|---|---|---|
query | +AnnData | ++ | The query dataset. | +
target | +AnnData | ++ | The target dataset. | +
query_clustering | +str | ++ | The .obs column name to use for the query dataset. | +
target_clustering | +str | ++ | The .obs column name to use for the target dataset. | +
query_genes | +typing.Optional[numpy.ndarray] | +None | +Array of query genes to subset the data, if any. If None, use all genes (default: None). | +
target_genes | +typing.Optional[numpy.ndarray] | +None | +Array of target genes to subset the data, if any. If None, use all genes (default: None). | +
query_gene_names | +typing.Optional[numpy.ndarray] | +None | +Array of query gene names (default: None). | +
target_gene_names | +typing.Optional[numpy.ndarray] | +None | +Array of target gene names (default: None). | +
layer | +typing.Optional[str] | +None | +The layer to use for the average expression calculation. If not specified, it will use the.X slot of the AnnData objects. It is vital to set this correctly to avoid calculatingaverage expression on log1p-transformed data (default: None). |
+
Returns | +typing.Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame] | ++ | A tuple containing the dot color values for query and target datasets, respectively. | +
+ +
get_dot_size
++++get_dot_size (query:pandas.core.frame.DataFrame, + target:pandas.core.frame.DataFrame, query_clustering:str, + target_clustering:str, + query_genes:Optional[numpy.ndarray]=None, + target_genes:Optional[numpy.ndarray]=None, + query_gene_names:Optional[numpy.ndarray]=None, + target_gene_names:Optional[numpy.ndarray]=None)
Calculate which percentage of cells in each cluster express each gene, and translate that to dot size for the dotplot.
++ | Type | +Default | +Details | +
---|---|---|---|
query | +DataFrame | ++ | The query dataset. | +
target | +DataFrame | ++ | The target dataset. | +
query_clustering | +str | ++ | The .obs column name to use for the query dataset. | +
target_clustering | +str | ++ | The .obs column name to use for the target dataset. | +
query_genes | +typing.Optional[numpy.ndarray] | +None | +Array of query genes to subset the data, if any. If None, use all genes (default: None). | +
target_genes | +typing.Optional[numpy.ndarray] | +None | +Array of target genes to subset the data, if any. If None, use all genes (default: None). | +
query_gene_names | +typing.Optional[numpy.ndarray] | +None | +Array of query gene names (default: None). | +
target_gene_names | +typing.Optional[numpy.ndarray] | +None | +Array of target gene names (default: None). | +
Returns | +typing.Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame] | ++ | A tuple containing the dot size values for query and target datasets, respectively. | +
+ +
feature_colors
++++feature_colors (components:numpy.ndarray, query_G:int, seed:int=42)
Assign colors to the components based on the given array of components.
++ | Type | +Default | +Details | +
---|---|---|---|
components | +ndarray | ++ | The array of components. | +
query_G | +int | ++ | The number of components for the query genes. | +
seed | +int | +42 | +The seed value for the random number generator (default: 42). | +
Returns | +typing.Tuple[numpy.ndarray, numpy.ndarray] | ++ | A tuple containing the colored components for query genes and target genes, respectively. | +
+ +
gene_order
++++gene_order (full_adjacency:numpy.ndarray, components:numpy.ndarray, + query_G:int)
Calculate the order of genes based on the given full adjacency matrix and components. Highly connected genes are placed first, genes without any connections are randomly ordered in the bottom of the plot.
++ | Type | +Details | +
---|---|---|
full_adjacency | +ndarray | +The full adjacency matrix represented as a 2D numpy array. | +
components | +ndarray | +An array representing the components. | +
query_G | +int | +The number of query genes. | +
Returns | +typing.Tuple[numpy.ndarray, numpy.ndarray] | +A tuple containing the query gene order and the target gene order as numpy arrays. | +
+ +
calculate_adjacency_matrix
++++calculate_adjacency_matrix (connections:numpy.ndarray, + query_genes:List[Any], + target_genes:List[Any])
Calculate the adjacency matrix based on the given connections, query genes, and target genes.
++ | Type | +Details | +
---|---|---|
connections | +ndarray | +The 2D array representing the connections between genes. Each row contains two gene identifiers indicating a connection, and optionally the strength of that connection. |
+
query_genes | +typing.List[typing.Any] | +A list of genes that act as queries. | +
target_genes | +typing.List[typing.Any] | +A list of genes that act as targets. | +
Returns | +ndarray | +The adjacency matrix represented as a 2D numpy array. It has dimensions (query_G + target_G) x (query_G + target_G), where query_G and target_G are the lengths of query_genes and target_genes, respectively. |
+
+ +
label_pos
++++label_pos (display_coords:Dict[str,Tuple[float,float,float,float]], + key:str, side:str='left')
Get the edge coordinates of a label. Keep either the left or the right end of the word.
++ | Type | +Default | +Details | +
---|---|---|---|
display_coords | +typing.Dict[str, typing.Tuple[float, float, float, float]] | ++ | A dictionary that holds the window extents of tick labels. | +
key | +str | ++ | The label to retrieve; a gene name. | +
side | +str | +left | +One of “left” or “right”; depending on orientation will return the leftmost or rightmost position of the label (default: “left”). |
+
Returns | +typing.Tuple[float, float] | ++ | A tuple containing the x and y coordinates of the label. | +
+ +
prepare_dotplot
++++prepare_dotplot (avg_expr:pandas.core.frame.DataFrame, + perc_expr:pandas.core.frame.DataFrame, + cmap:Union[str,matplotlib.colors.Colormap]='magma_r', + vmin:float=0, vmax:Optional[float]=None, + size_exponent:float=1.5, dot_size:float=200)
Pivots average expression and percent expressed tables to make them dotplot-friendly.
++ | Type | +Default | +Details | +
---|---|---|---|
avg_expr | +DataFrame | ++ | Data frame that holds average expression for all genes and all clusters. | +
perc_expr | +DataFrame | ++ | Data frame that tracks the percentage of cells expressing each gene in every cluster. | +
cmap | +typing.Union[str, matplotlib.colors.Colormap] | +magma_r | +The Colormap instance or registered colormap name used to map scalar data to colors (default: “magma_r”). |
+
vmin | +float | +0 | +Minimum average expression value to show (default: 0). | +
vmax | +typing.Optional[float] | +None | +Maximum average expression value to show (default: maximum average expr. value). | +
size_exponent | +float | +1.5 | +Dot size is computed as fraction ** size_exponent * dot_size (default: 1.5). | +
dot_size | +float | +200 | +The size of the largest dot (default: 200). | +
Returns | +typing.Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame, numpy.ndarray] | ++ | A tuple containing the melted average expression data frame, the melted percentage expression data frame, and the array of RGBA-coded color values for the average expression in a cluster/gene combination, according to the input color map. |
+