Skip to content

Commit

Permalink
future proofing scFates
Browse files Browse the repository at this point in the history
  • Loading branch information
LouisFaure committed Oct 23, 2024
1 parent df08db4 commit ace49c4
Show file tree
Hide file tree
Showing 16 changed files with 55 additions and 51 deletions.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ scanpy>=1.7.0
leidenalg>=0.8.1
numpy<2.0
joblib>=0.17.0
pandas<2.0
pandas<=2.2.3
statsmodels>=0.11.1
matplotlib<3.9
plotly>=4.8.1
Expand All @@ -18,7 +18,7 @@ typing_extensions
scikit-learn
scikit-misc
simpleppt>=1.1.3
elpigraph-python==0.3.1
elpigraph-python==0.3.2
networkx<3.0
igraph>=0.9.6
adjustText>=0.7.3
Binary file modified scFates/datasets/test.h5ad
Binary file not shown.
4 changes: 2 additions & 2 deletions scFates/plot/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def trends(
fitted = pd.DataFrame(
adata[:, features].layers["fitted"], index=adata.obs_names, columns=features
).T.copy(deep=True)
g = adata.obs.groupby("seg")
g = adata.obs.groupby("seg",observed=False )

dct = graph["milestones"]
keys = np.array(list(dct.keys()))
Expand Down Expand Up @@ -476,7 +476,7 @@ def add_frames(axis, vert):
gs_emb = gridspec.GridSpecFromSubplotSpec(1, 1, subplot_spec=gsubs[0])
axemb = fig.add_subplot(gs_emb[0])
axs = axs + [axemb]
adata_temp.obs["mean_trajectory"] = 0
adata_temp.obs["mean_trajectory"] = 0.0
adata_temp.obs.loc[
fitted_sorted.columns, "mean_trajectory"
] = fitted_sorted.mean(axis=0).values
Expand Down
4 changes: 2 additions & 2 deletions scFates/plot/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def matrix(

if return_data:
data = pd.DataFrame(
0, index=features, columns=adata_sub.obs.split.cat.categories
0.0, index=features, columns=adata_sub.obs.split.cat.categories
)
allidx = data.stack().index
for group in data.columns:
Expand Down Expand Up @@ -338,7 +338,7 @@ def matrix(
borderpad=2,
)

mappable = cm.ScalarMappable(cmap=cm.get_cmap(cmap))
mappable = cm.ScalarMappable(cmap=plt.get_cmap(cmap))
cbar = fig.colorbar(mappable, cax=position, orientation="horizontal", aspect=50)
cbar.set_ticks([0, 1])
cbar.set_ticklabels(
Expand Down
6 changes: 3 additions & 3 deletions scFates/plot/slide_cors.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def slide_cors(
emb[np.argsort(freq), 0],
emb[np.argsort(freq), 1],
s=10,
c=freq[np.argsort(freq)],
c=freq.iloc[np.argsort(freq)],
cmap=gr,
rasterized=True,
)
Expand Down Expand Up @@ -327,11 +327,11 @@ def slide_cors(
pairwise_distances(np.array([0, 0]).reshape(1, -1), fB).argsort()
)[0][:top_focus]
texts_A = [
ax_scat.text(fA.loc[t][0], fA.loc[t][1], t, **kwargs_text)
ax_scat.text(fA.loc[t].iloc[0], fA.loc[t].iloc[1], t, **kwargs_text)
for t in fA.index[topA]
]
texts_B = [
ax_scat.text(fB.loc[t][0], fB.loc[t][1], t, **kwargs_text)
ax_scat.text(fB.loc[t].iloc[0], fB.loc[t].iloc[1], t, **kwargs_text)
for t in fB.index[topB]
]
texts = texts_A + texts_B
Expand Down
2 changes: 1 addition & 1 deletion scFates/plot/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ def _get_color_values(
# when plotting, the color of the dots is determined for each plot
# the data is either categorical or continuous and the data could be in
# 'obs' or in 'var'
if not is_categorical_dtype(values):
if not isinstance(values, pd.CategoricalDtype):
return values, False
else: # is_categorical_dtype(values)
color_key = f"{value_to_plot}_colors"
Expand Down
6 changes: 2 additions & 4 deletions scFates/plot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,11 @@ def make_projection_available(projection):


def is_categorical(data, c=None):
from pandas.api.types import is_categorical_dtype as cat

if c is None:
return cat(data) # if data is categorical/array
return isinstance(data, pd.CategoricalDtype) # if data is categorical/array
if not is_view(data): # if data is anndata view
strings_to_categoricals(data)
return isinstance(c, str) and c in data.obs.keys() and cat(data.obs[c])
return isinstance(c, str) and c in data.obs.keys() and isinstance(data.obs[c], pd.CategoricalDtype)


def is_view(adata):
Expand Down
15 changes: 11 additions & 4 deletions scFates/tests/test_w_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def test_pipeline():
Expand Down Expand Up @@ -80,6 +81,8 @@ def test_pipeline():
scf.pl.trajectory(adata_2, color_seg="milestones", arrows=True)
scf.pl.trajectory(adata_2, color_seg="seg", arrows=True)

plt.close()

df = scf.tl.getpath(adata, root_milestone="43", milestones=["18"])

adata.obsm["X_umap3d"] = np.concatenate(
Expand All @@ -89,6 +92,8 @@ def test_pipeline():
scf.pl.trajectory_3d(adata)
scf.pl.trajectory_3d(adata, color="seg")

plt.close()

adata_s1 = scf.tl.subset_tree(
adata, root_milestone="88", milestones=["18"], mode="substract", copy=True
)
Expand All @@ -100,7 +105,7 @@ def test_pipeline():

adata_lim = scf.tl.subset_tree(adata, t_max=adata.obs.t.max() / 4 * 3, copy=True)

scf.tl.test_association(adata, n_jobs=2)
scf.tl.test_association(adata, n_jobs=1)
scf.tl.test_association(adata, A_cut=0.3, reapply_filters=True)
scf.tl.test_association(adata_2, layer="scaled", A_cut=0.3)
nsigni = adata.var.signi.sum()
Expand Down Expand Up @@ -148,6 +153,7 @@ def test_pipeline():
plot_emb=False,
ordering="quantile",
)
plt.close()

scf.pl.trends(
adata,
Expand All @@ -158,6 +164,7 @@ def test_pipeline():
ordering="pearson",
show=False,
)
plt.close()

scf.pl.single_trend(adata, feature=adata.var_names[0], color_exp="k")

Expand All @@ -173,15 +180,15 @@ def test_pipeline():
)

scf.tl.test_fork(
adata, layer="scaled", root_milestone="43", milestones=["24", "18"], n_jobs=2
adata, layer="scaled", root_milestone="43", milestones=["24", "18"], n_jobs=1
)
scf.tl.test_fork(adata, root_milestone="43", milestones=["24", "18"], n_jobs=2)
scf.tl.test_fork(adata, root_milestone="43", milestones=["24", "18"], n_jobs=1)
signi_fdr_nonscaled = adata.uns["43->24<>18"]["fork"].signi_fdr.sum()
adata.uns["43->24<>18"]["fork"]["fdr"] = 0.02
scf.pl.test_fork(adata, root_milestone="43", milestones=["24", "18"])

scf.tl.test_fork(
adata, root_milestone="43", milestones=["24", "18"], n_jobs=2, rescale=True
adata, root_milestone="43", milestones=["24", "18"], n_jobs=1, rescale=True
)
signi_fdr_rescaled = adata.uns["43->24<>18"]["fork"].signi_fdr.sum()

Expand Down
20 changes: 10 additions & 10 deletions scFates/tools/bifurcation_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_fork(
copy: bool = False,
):

"""\
"""
Test for branch differential gene expression and differential upregulation from progenitor to terminal state.
First, differential gene expression between two branches is performed. The following model is used:
Expand Down Expand Up @@ -154,7 +154,7 @@ def get_branches(m):
return ddf

brcells = pd.concat([get_branches(m) for m in milestones])
brcells = brcells.loc[brcells.index.drop_duplicates(False)]
brcells = brcells.loc[brcells.index.drop_duplicates(keep=False)]

matw = None
if matw is None:
Expand Down Expand Up @@ -324,7 +324,7 @@ def gt_fun(data):

tmin = np.min([sdf.loc[sdf.i == i, "t"].max() for i in sdf.i.unique()])
sdf = sdf.loc[sdf.t <= tmin]
Amps = sdf.groupby("i").apply(lambda x: np.mean(x.exp))
Amps = sdf.groupby("i")['exp'].mean()
res = Amps - Amps.max()
res["de_p"] = rmgcv.summary_gam(m)[3][1]

Expand All @@ -351,7 +351,7 @@ def branch_specific(
copy: bool = False,
):

"""\
"""
Assign genes differentially expressed between two post-bifurcation branches.
Parameters
Expand Down Expand Up @@ -439,7 +439,7 @@ def activation(
layer=None,
):

"""\
"""
Identify pseudotime of activation of branch-specififc features.
This aims in classifying the genes according to their their activation timing
Expand Down Expand Up @@ -522,7 +522,7 @@ def activation_map(m):
df = adata.obs.loc[:, ["t", "seg"]]
else:
df = adata.uns["pseudotime_list"][str(m)]
acti = pd.Series(0, index=stats.index)
acti = pd.Series(0.0, index=stats.index)

for leave in leaves:
subtree = getpath(img, root, graph["tips"], leave, graph, df).sort_values(
Expand Down Expand Up @@ -617,9 +617,9 @@ def get_activation(data):
subtree = data[0]
subtree["exp"] = data[1]
# load parameters
deriv_cut = subtree["deriv_cut"][0]
nwin = subtree["nwin"][0]
steps = subtree["steps"][0]
deriv_cut = subtree["deriv_cut"].iloc[0]
nwin = subtree["nwin"].iloc[0]
steps = subtree["steps"].iloc[0]

wf = warnings.filters.copy()
warnings.filterwarnings("ignore")
Expand Down Expand Up @@ -676,7 +676,7 @@ def activation_lm(
layer=None,
):

"""\
"""
A more robust version of `tl.activation`.
This is considered to be a more robust version of :func:`scFates.tl.activation`.
Expand Down
4 changes: 2 additions & 2 deletions scFates/tools/dendrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def dendrogram(adata: anndata.AnnData, crowdedness: float = 1, n_jobs: int = 1):
adata.obsm["X_dendro"] = dend.loc[adata.obs_names].values

# generate segments
newseg = newseg[newseg.argsort()]
newseg = newseg.iloc[newseg.argsort()]

segments = []
for i, n in enumerate(newseg.index):
Expand Down Expand Up @@ -264,7 +264,7 @@ def _group_longform(self, vals, grouper, order):
"""Group a long-form variable by another with correct order."""

# Group the val data
grouped_vals = vals.groupby(grouper)
grouped_vals = vals.groupby(grouper,observed=False)
out_data = []
for g in order:
try:
Expand Down
2 changes: 1 addition & 1 deletion scFates/tools/fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def fit(
def gt_fun(data):
sdf = data[0]
sdf["exp"] = data[1]
gamma = sdf["gamma"][0]
gamma = sdf["gamma"].iloc[0]

global rmgcv
global rstats
Expand Down
5 changes: 2 additions & 3 deletions scFates/tools/graph_operations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional, Union, Iterable
from typing_extensions import Literal
from anndata import AnnData
import anndata as ad
import numpy as np
import pandas as pd
from pandas import DataFrame
Expand Down Expand Up @@ -433,9 +434,7 @@ def attach_tree(

newcells = np.concatenate([adata.obs_names, adata_branch.obs_names])

adata = adata.concatenate(
adata_branch, batch_key=None, index_unique=None, uns_merge="first", join="outer"
)
adata = ad.concat([adata,adata_branch],uns_merge="first", join="outer")

logg.info(" tree refitting")
use_rep = graph["use_rep"]
Expand Down
2 changes: 1 addition & 1 deletion scFates/tools/linearity_deviation.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def lindev_map(m):

t_perc = [cells.t.quantile(p / 100) for p in percentiles]

df[name + "_lindev_sel"] = np.nan
df[name + "_lindev_sel"] = 'none'
df.loc[
adata.obs_names.isin(cells.index), name + "_lindev_sel"
] = "putative bridge"
Expand Down
12 changes: 6 additions & 6 deletions scFates/tools/pseudotime.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def map_on_edges(v):

if vcells.shape[0] > 0:
nv = np.array(g.neighborhood(v, order=1))[1:]
nvd = g.shortest_paths(v, nv, weights=g.es["weight"])
nvd = g.distances(v, nv, weights=g.es["weight"])

ndf = pd.DataFrame(
{
Expand All @@ -314,7 +314,7 @@ def map_on_edges(v):
for i in range(ndf.shape[0])
]

ndf["seg"] = 0
ndf["seg"] = '0'
isinfork = (graph["pp_info"].loc[ndf.v0, "PP"].isin(graph["forks"])).values
ndf.loc[isinfork, "seg"] = (
graph["pp_info"].loc[ndf.loc[isinfork, "v1"], "seg"].values
Expand All @@ -332,7 +332,7 @@ def map_on_edges(v):
df.sort_values("cell", inplace=True)
# df.index = graph["cells_fitted"]

df["edge"] = df.apply(lambda x: str(int(x[1])) + "|" + str(int(x[2])), axis=1)
df["edge"] = df.apply(lambda x: str(int(x.iloc[1])) + "|" + str(int(x.iloc[2])), axis=1)

df.drop(["cell", "v0", "v1", "d"], axis=1, inplace=True)

Expand Down Expand Up @@ -417,11 +417,11 @@ def unroll_circle(adata: AnnData, copy: bool = False):

adata = adata.copy() if copy else adata
pp_seg = adata.uns["graph"]["pp_seg"]
adata.obs.t[adata.obs.seg == "0"] = (
adata.obs.loc[adata.obs.seg == "0",'t'] = (
-adata.obs.t[adata.obs.seg == "0"] + adata.obs.t[adata.obs.seg == "0"].max()
)

adata.obs.t[adata.obs.seg == "0"] = (
adata.obs.loc[adata.obs.seg == "0",'t'] = (
adata.obs.t[adata.obs.seg == "0"] + adata.obs.t[adata.obs.seg == "0"].max()
)

Expand Down Expand Up @@ -452,7 +452,7 @@ def unroll_circle(adata: AnnData, copy: bool = False):
"n": 0,
"from": a,
"to": b,
"d": g.shortest_paths(a, b, weights="weight")[0][0],
"d": g.distances(a, b, weights="weight")[0][0],
},
index=[0],
)
Expand Down
16 changes: 8 additions & 8 deletions scFates/tools/root.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ def root(

sub_g = g.vs.select(group=0).subgraph()
a, b = np.argwhere(np.array(sub_g.degree()) == 1).ravel()
dst_0 = sub_g.shortest_paths(a, b, weights="weight")[0][0]
dst_0 = sub_g.distances(a, b, weights="weight")[0][0]
a, b = sub_g.vs["label"][a], sub_g.vs["label"][b]
dst_1 = g.shortest_paths(root, furthest, weights="weight")[0][0]
dst_1 = g.distances(root, furthest, weights="weight")[0][0]
pp_seg = pd.concat(
[
pd.Series([0, a, b, dst_0], index=["n", "from", "to", "d"]),
Expand Down Expand Up @@ -290,7 +290,7 @@ def roots(adata: AnnData, roots, meeting, copy: bool = False):
nodes = np.argwhere(
np.apply_along_axis(arr=(csr > 0).todense(), axis=0, func1d=np.sum) != 2
).flatten()
pp_seg = pd.DataFrame(columns=["n", "from", "to", "d"])
pp_seg = []
for node1, node2 in itertools.combinations(nodes, 2):
paths12 = g.get_shortest_paths(node1, node2)
paths12 = np.array([val for sublist in paths12 for val in sublist])
Expand All @@ -300,19 +300,19 @@ def roots(adata: AnnData, roots, meeting, copy: bool = False):
path_root = root_dist_matrix[[node1, node2]]
fro = fromto[np.argmin(path_root)]
to = fromto[np.argmax(path_root)]
pp_info.loc[paths12, "seg"] = pp_seg.shape[0] + 1
pp_seg = pp_seg.append(
pp_info.loc[paths12, "seg"] = len(pp_seg) + 1
pp_seg.append(
pd.DataFrame(
{
"n": pp_seg.shape[0] + 1,
"n": len(pp_seg) + 1,
"from": fro,
"to": to,
"d": shortest_path(csr, directed=False, indices=fro)[to],
},
index=[pp_seg.shape[0] + 1],
index=[len(pp_seg) + 1],
)
)

pp_seg = pd.concat(pp_seg, axis=0)
pp_seg["n"] = pp_seg["n"].astype(int).astype(str)
pp_seg["n"] = pp_seg["n"].astype(int).astype(str)

Expand Down
Loading

0 comments on commit ace49c4

Please sign in to comment.