Fix tutorial and functions

WWXkenmo · Jan 20, 2025 · 9df4b31 · 9df4b31
1 parent c1d3612
commit 9df4b31
Show file tree

Hide file tree

Showing 11 changed files with 151 additions and 132 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,5 +16,5 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 RoxygenNote: 7.3.2
 NeedsCompilation: no
-Packaged: 2024-12-30 16:04:11 UTC; weixu.wang
+Packaged: 2025-01-20 18:15:25 UTC; weixu.wang
 Depends: R (>= 3.5.0)
diff --git a/R/FateCausal.R b/R/FateCausal.R
@@ -68,7 +68,7 @@ FateCausal <- function(dyn.out,L=30,alpha = 0,lambda=100,cutoff=0.25,weight=0.2,
   #velo_m <- velo_m[apply(velo_m,1,function(x){sum(is.nan(x))})==0,]
   fate_prob <- as.matrix(dyn.out$fate_prob)
   if(fate_method == "cellrank") fate_prob_velo <- as.matrix(dyn.out$fate_prob_velo)
-  pseudotime <- dyn.out$pseudotime
+  #pseudotime <- dyn.out$pseudotime
   metaCells <- which(rownames(fate_prob) %in% colnames(dyn.out$skeleton$metaExp))
   metaExp <- dyn.out$skeleton$metaExp
 

diff --git a/R/RunNetID.R b/R/RunNetID.R
@@ -279,7 +279,7 @@
                 LineageClass_velo[[colnames(fate_prob_velo)]] <- rownames(fate_prob_velo)
             }
             else {
-                LineageClass_velo <- LineageClassifer(fate_prob_velo, 
+                LineageClass_velo <- LineageClassifier(fate_prob_velo, 
                   maxState = maxState)
             }
         }

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# NetID
+# NetID_package
 A scalable method to infer fate specific gene regulatory network from single cell gene expression data
 
 <img src="https://github.com/WWXkenmo/NetID_package/blob/figures/figures/Concept_fig1.png" alt="NetID" width="600" />
@@ -46,20 +46,3 @@ pip install numpy==1.23.5
 #### install cytotrace and scent to determine the root cell
 ```
 devtools::install_github("aet21/SCENT")
-```
-
-### Citation
-```
-@article{PUSH:72058,
-  author = {Wang, W. and Wang, Y. and Lyu, R. and Gr{\"u}n, D.},
-  title = {{Scalable identification of lineage-specific gene regulatory networks from metacells with NetID.}},
-  journal = {Genome Biol.},
-  location = {Campus, 4 Crinan St, London N1 9xw, England},
-  publisher = {Bmc},
-  volume = {25},
-  number = {1},
-  year = {2024},
-  issn = {1474-760X},
-  eissn = {1465-6906},
-}
-```
diff --git a/doc/NetID.Rmd b/doc/NetID.Rmd
@@ -98,7 +98,7 @@ names(dyn.out)
 
 # Perform pruning on Shared Nearest-neighbor (SNN) graph
 
-NetID also accepts a Seurat object as input and employs the SNN graph for pruning and aggregation. To begin, we can construct the Seurat object.
+NetID also accepts a Seurat object as input and utilizes the SNN graph for pruning and aggregation. Compared to the raw KNN graph, the SNN graph can provide a clearer neighbor graphs. To begin, we first construct the Seurat object. This dataset includes both spliced and unspliced readouts; for building the GRN, we used the spliced readout. Users can also specify the gene read count layer they wish to use for constructing the GRN.
 
 ```{r Build Seurat object, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE}
 library(SummarizedExperiment)
@@ -206,9 +206,6 @@ barcode <- rownames(fate_prob)
 fate_prob <- as.matrix(fate_prob[, -ncol(fate_prob)])
 rownames(fate_prob) <- barcode
 colnames(fate_prob) <- ID
-
-pseudotime <- reticulate::py_to_r(ad$obs)$pseudotime
-names(pseudotime) <- rownames(reticulate::py_to_r(ad$obs))
 ```
 
 `fate_prob` is a cell fate probability matrix with cells as rows, columns as cell fates; values are the probabilities of each cell being assigned to each fate. This cell fate probability matrix can be inferred via `cellrank`, `palantir`, or any other method, e.g., `FateID`.
@@ -217,19 +214,26 @@ names(pseudotime) <- rownames(reticulate::py_to_r(ad$obs))
 head(fate_prob)
 ```
 
-`pseudotime` is an inferred measure of progression through a biological process, such as differentiation or cell cycle. This vector contains a numerical pseudotime value for every cell.
-```{r show pseudotime, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE}
-head(pseudotime)
-```
-
-Then, cells are assigned to specific cell fates based on `fate_prob` using the `LineageClassifier` function, and plugged it into the object `dyn.out`, along with `fate_prob` and `pseudotime` .
+Then, cells are assigned to specific cell fates based on `fate_prob` using the `LineageClassifier` function, and plugged it into the object `dyn.out`, along with `fate_prob`. The whole procedures are shown as follow:
 
 ```{r Plugin cell fate, eval=FALSE, echo=TRUE, warning=FALSE,message=FALSE}
+## Run NetID
+dyn.out <- RunNetID(sce,
+                    regulators = TF[,1], 
+                    targets = TF[,1],
+                    netID_params =
+                    list(normalize=FALSE,
+                         sketch.method = "geosketch"), 
+                    dynamicInfer = FALSE,
+                    velo=FALSE)
+
+## Inject dynamic information
 dyn.out$LineageClass <- LineageClassifier(fate_prob, maxState = 10, cut_off = 0)
-dyn.out$pseudotime <- pseudotime
 dyn.out$fate_prob <- fate_prob # cell fate probability matrix
 ```
 
+These step can be used to skip `FateDynamic` step. and the output can be directly used by the next step (`FateCausal`).
+
 NetID classifies the cells based on the fate probability matrix using a Gaussian Mixture Model. Next, we compute the lineage fate probability fold change to assign each cluster to a specific lineage. To visualize the cell fate probability in a PCA 2D space, the following function can be utilized:
 
 ```{r plot cell fate probability, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE}
@@ -239,6 +243,9 @@ dyn.out$basis <- reducedDim(sce, "PCA")[,c(1,2)]
 ## For SCseq, e.g.
 ## dyn.out$basis <- sce@umap
 
+## For Seurat, e.g.
+## dyn.out$basis <- Se@[email protected]
+
 library(cowplot)
 p1 = plotFateProb(dyn.out,basis=dyn.out$basis,basis_name = "PCA",
                   lineage = colnames(dyn.out$fate_prob)[1])

diff --git a/inst/doc/NetID.R b/inst/doc/NetID.R
@@ -123,18 +123,22 @@ fate_prob <- as.matrix(fate_prob[, -ncol(fate_prob)])
 rownames(fate_prob) <- barcode
 colnames(fate_prob) <- ID
 
-pseudotime <- reticulate::py_to_r(ad$obs)$pseudotime
-names(pseudotime) <- rownames(reticulate::py_to_r(ad$obs))
-
 ## ----show cell fate, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE--------
 head(fate_prob)
 
-## ----show pseudotime, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE-------
-head(pseudotime)
-
 ## ----Plugin cell fate, eval=FALSE, echo=TRUE, warning=FALSE,message=FALSE-----
+#  ## Run NetID
+#  dyn.out <- RunNetID(sce,
+#                      regulators = TF[,1],
+#                      targets = TF[,1],
+#                      netID_params =
+#                      list(normalize=FALSE,
+#                           sketch.method = "geosketch"),
+#                      dynamicInfer = FALSE,
+#                      velo=FALSE)
+#  
+#  ## Inject dynamic information
 #  dyn.out$LineageClass <- LineageClassifier(fate_prob, maxState = 10, cut_off = 0)
-#  dyn.out$pseudotime <- pseudotime
 #  dyn.out$fate_prob <- fate_prob # cell fate probability matrix
 
 ## ----plot cell fate probability, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE----
@@ -144,6 +148,9 @@ dyn.out$basis <- reducedDim(sce, "PCA")[,c(1,2)]
 ## For SCseq, e.g.
 ## dyn.out$basis <- sce@umap
 
+## For Seurat, e.g.
+## dyn.out$basis <- Se@[email protected]
+
 library(cowplot)
 p1 = plotFateProb(dyn.out,basis=dyn.out$basis,basis_name = "PCA",
                   lineage = colnames(dyn.out$fate_prob)[1])

diff --git a/inst/doc/NetID.Rmd b/inst/doc/NetID.Rmd
@@ -98,7 +98,7 @@ names(dyn.out)
 
 # Perform pruning on Shared Nearest-neighbor (SNN) graph
 
-NetID also accepts a Seurat object as input and employs the SNN graph for pruning and aggregation. To begin, we can construct the Seurat object.
+NetID also accepts a Seurat object as input and utilizes the SNN graph for pruning and aggregation. Compared to the raw KNN graph, the SNN graph can provide a clearer neighbor graphs. To begin, we first construct the Seurat object. This dataset includes both spliced and unspliced readouts; for building the GRN, we used the spliced readout. Users can also specify the gene read count layer they wish to use for constructing the GRN.
 
 ```{r Build Seurat object, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE}
 library(SummarizedExperiment)
@@ -206,9 +206,6 @@ barcode <- rownames(fate_prob)
 fate_prob <- as.matrix(fate_prob[, -ncol(fate_prob)])
 rownames(fate_prob) <- barcode
 colnames(fate_prob) <- ID
-
-pseudotime <- reticulate::py_to_r(ad$obs)$pseudotime
-names(pseudotime) <- rownames(reticulate::py_to_r(ad$obs))
 ```
 
 `fate_prob` is a cell fate probability matrix with cells as rows, columns as cell fates; values are the probabilities of each cell being assigned to each fate. This cell fate probability matrix can be inferred via `cellrank`, `palantir`, or any other method, e.g., `FateID`.
@@ -217,19 +214,26 @@ names(pseudotime) <- rownames(reticulate::py_to_r(ad$obs))
 head(fate_prob)
 ```
 
-`pseudotime` is an inferred measure of progression through a biological process, such as differentiation or cell cycle. This vector contains a numerical pseudotime value for every cell.
-```{r show pseudotime, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE}
-head(pseudotime)
-```
-
-Then, cells are assigned to specific cell fates based on `fate_prob` using the `LineageClassifier` function, and plugged it into the object `dyn.out`, along with `fate_prob` and `pseudotime` .
+Then, cells are assigned to specific cell fates based on `fate_prob` using the `LineageClassifier` function, and plugged it into the object `dyn.out`, along with `fate_prob`. The whole procedures are shown as follow:
 
 ```{r Plugin cell fate, eval=FALSE, echo=TRUE, warning=FALSE,message=FALSE}
+## Run NetID
+dyn.out <- RunNetID(sce,
+                    regulators = TF[,1], 
+                    targets = TF[,1],
+                    netID_params =
+                    list(normalize=FALSE,
+                         sketch.method = "geosketch"), 
+                    dynamicInfer = FALSE,
+                    velo=FALSE)
+
+## Inject dynamic information
 dyn.out$LineageClass <- LineageClassifier(fate_prob, maxState = 10, cut_off = 0)
-dyn.out$pseudotime <- pseudotime
 dyn.out$fate_prob <- fate_prob # cell fate probability matrix
 ```
 
+These step can be used to skip `FateDynamic` step. and the output can be directly used by the next step (`FateCausal`).
+
 NetID classifies the cells based on the fate probability matrix using a Gaussian Mixture Model. Next, we compute the lineage fate probability fold change to assign each cluster to a specific lineage. To visualize the cell fate probability in a PCA 2D space, the following function can be utilized:
 
 ```{r plot cell fate probability, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE}
@@ -239,6 +243,9 @@ dyn.out$basis <- reducedDim(sce, "PCA")[,c(1,2)]
 ## For SCseq, e.g.
 ## dyn.out$basis <- sce@umap
 
+## For Seurat, e.g.
+## dyn.out$basis <- Se@[email protected]
+
 library(cowplot)
 p1 = plotFateProb(dyn.out,basis=dyn.out$basis,basis_name = "PCA",
                   lineage = colnames(dyn.out$fate_prob)[1])

diff --git a/inst/doc/NetID.html b/inst/doc/NetID.html
diff --git a/inst/python/FateDynamic_py.py b/inst/python/FateDynamic_py.py
@@ -109,8 +109,8 @@ def terminal_index(ad,label,terminal_state, n_pc = 20):
         cell_index += list(compress(indices,dist == np.min(dist)))
 
     states = pd.Series(
-        cell_index,
-        index=terminal_state,
+        terminal_state,
+        index=cell_index,
     )
 
     return states

diff --git a/vignettes/NetID.Rmd b/vignettes/NetID.Rmd
@@ -98,7 +98,7 @@ names(dyn.out)
 
 # Perform pruning on Shared Nearest-neighbor (SNN) graph
 
-NetID also accepts a Seurat object as input and employs the SNN graph for pruning and aggregation. To begin, we can construct the Seurat object.
+NetID also accepts a Seurat object as input and utilizes the SNN graph for pruning and aggregation. Compared to the raw KNN graph, the SNN graph can provide a clearer neighbor graphs. To begin, we first construct the Seurat object. This dataset includes both spliced and unspliced readouts; for building the GRN, we used the spliced readout. Users can also specify the gene read count layer they wish to use for constructing the GRN.
 
 ```{r Build Seurat object, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE}
 library(SummarizedExperiment)
@@ -206,9 +206,6 @@ barcode <- rownames(fate_prob)
 fate_prob <- as.matrix(fate_prob[, -ncol(fate_prob)])
 rownames(fate_prob) <- barcode
 colnames(fate_prob) <- ID
-
-pseudotime <- reticulate::py_to_r(ad$obs)$pseudotime
-names(pseudotime) <- rownames(reticulate::py_to_r(ad$obs))
 ```
 
 `fate_prob` is a cell fate probability matrix with cells as rows, columns as cell fates; values are the probabilities of each cell being assigned to each fate. This cell fate probability matrix can be inferred via `cellrank`, `palantir`, or any other method, e.g., `FateID`.
@@ -217,19 +214,26 @@ names(pseudotime) <- rownames(reticulate::py_to_r(ad$obs))
 head(fate_prob)
 ```
 
-`pseudotime` is an inferred measure of progression through a biological process, such as differentiation or cell cycle. This vector contains a numerical pseudotime value for every cell.
-```{r show pseudotime, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE}
-head(pseudotime)
-```
-
-Then, cells are assigned to specific cell fates based on `fate_prob` using the `LineageClassifier` function, and plugged it into the object `dyn.out`, along with `fate_prob` and `pseudotime` .
+Then, cells are assigned to specific cell fates based on `fate_prob` using the `LineageClassifier` function, and plugged it into the object `dyn.out`, along with `fate_prob`. The whole procedures are shown as follow:
 
 ```{r Plugin cell fate, eval=FALSE, echo=TRUE, warning=FALSE,message=FALSE}
+## Run NetID
+dyn.out <- RunNetID(sce,
+                    regulators = TF[,1], 
+                    targets = TF[,1],
+                    netID_params =
+                    list(normalize=FALSE,
+                         sketch.method = "geosketch"), 
+                    dynamicInfer = FALSE,
+                    velo=FALSE)
+
+## Inject dynamic information
 dyn.out$LineageClass <- LineageClassifier(fate_prob, maxState = 10, cut_off = 0)
-dyn.out$pseudotime <- pseudotime
 dyn.out$fate_prob <- fate_prob # cell fate probability matrix
 ```
 
+These step can be used to skip `FateDynamic` step. and the output can be directly used by the next step (`FateCausal`).
+
 NetID classifies the cells based on the fate probability matrix using a Gaussian Mixture Model. Next, we compute the lineage fate probability fold change to assign each cluster to a specific lineage. To visualize the cell fate probability in a PCA 2D space, the following function can be utilized:
 
 ```{r plot cell fate probability, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE}
@@ -239,6 +243,9 @@ dyn.out$basis <- reducedDim(sce, "PCA")[,c(1,2)]
 ## For SCseq, e.g.
 ## dyn.out$basis <- sce@umap
 
+## For Seurat, e.g.
+## dyn.out$basis <- Se@[email protected]
+
 library(cowplot)
 p1 = plotFateProb(dyn.out,basis=dyn.out$basis,basis_name = "PCA",
                   lineage = colnames(dyn.out$fate_prob)[1])

diff --git a/vignettes/figures/palantir_res.png b/vignettes/figures/palantir_res.png