Skip to content

Commit

Permalink
public v1.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
lmw123 committed Jul 18, 2022
1 parent f9d4da1 commit 0e24cb9
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 32 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: FastIntegration
Type: Package
Title: Fast Integration of Single-cell Data
Version: 0.1.0
Version: 1.1.0
Author: c(
person(given = "Mengwei", family = "Li", email = "[email protected]", role = "aut"),
person(given = "Xiaomeng", family = "Zhang", email = "[email protected]", role = "aut"),
Expand Down
116 changes: 116 additions & 0 deletions R/GetDiscoData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
GetDiscoSample = function(
verbose = T
) {
if(!require("jsonlite")) {
stop("Please install jsonlite")
}

tryCatch(
{
if (verbose) {
message("Starting to download metadata from DISCO database")
}
meta = fromJSON("https://www.immunesinglecell.org/api/sample/all")
return(meta)
},
error=function(cond) {
stop("Network error. Please try again")
}
)
}

#' @import jsonlite
#' @export
FindSampleByMetadata = function(
tissue = c(),
disease = c(),
platform = c(),
project.id = c(),
sample.id = c(),
sample.type = c()
) {
meta.all = GetDiscoSample()
if (length(tissue) > 0) {
meta.all = meta.all[which(meta.all$tissue %in% tissue),]
}
if (length(disease) > 0) {
meta.all = meta.all[which(meta.all$disease %in% disease),]
}
if (length(platform) > 0) {
meta.all = meta.all[which(meta.all$platform %in% platform),]
}
if (length(project.id) > 0) {
meta.all = meta.all[which(meta.all$projectId %in% project.id),]
}
if (length(sample.type) > 0) {
meta.all = meta.all[which(meta.all$sampleType %in% sample.type),]
}

meta.all = meta.all[which(meta.all$processStatus == "QC pass"),]
if (nrow(meta.all) == 0) {
stop("Sorry, no sample is found. Please try to use other filters.")
} else {
return(meta.all)
}
}

#' @export
DownloadDiscoData = function(
metadata,
expressed.gene = c(),
unexpressed.gene = c(),
dir = "./disco_data"
) {

meta = metadata
dir.create(dir)
tryCatch(
{
for (i in 1:nrow(meta)) {
message(paste0("Downloading the ", i, "st sample"))
rna = readRDS(url(paste0(
"http://dc.vishuo.com:8887/api/vishuo/download/getExp?project=",meta$projectId[i],"&sample=",meta$sampleId[i]
)))

if (length(expressed.gene) > 0) {
for (j in 1:length(expressed.gene)) {
if (length(which(rna@assays$RNA@data[expressed.gene[j],] > 0)) > 0) {
rna = subset(rna, cells = names(which(rna@assays$RNA@data[expressed.gene[j],] > 0)))
} else {
rna = NULL
break
}
}
}

if (length(unexpressed.gene) > 0) {
for (j in 1:length(unexpressed.gene)) {
if (length(which(rna@assays$RNA@data[unexpressed.gene[j],] == 0)) > 0) {
rna = subset(rna, cells = names(which(rna@assays$RNA@data[unexpressed.gene[j],] == 0)))
} else {
rna = NULL
break
}
}
}

if (is.null(rna) == F) {
rna@assays$RNA@counts = expm1(rna@assays$RNA@data)
saveRDS(rna, paste0(dir, "/", meta$sampleId[i], ".rds"), compress = F)
} else {
message(paste0("Sking the ", i, "st sample. No cells are found after filtering."))
}

}
},
error=function(cond) {
stop("Network error. Please try again")
}
)
message("Job finished")
}





75 changes: 44 additions & 31 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,43 +1,45 @@
# FastIntegration v1.0.0
# FastIntegration v1.1.0

FastIntegration is a fast and high-capacity version of Seurat Integration. FastIntegrate can integrate thousands of scRNA-seq datasets and outputs batch-corrected values for downstream analysis.

**Recent update: New functions which allow users to filter and download data in DISCO (<https://www.immunesinglecell.org/repository>), comprising 5200+ single-cell samples!**

## Requirement

FastIntegration requires the following packages:

* [R](https://www.r-project.org/) (>= 4.0.0)
* [Seurat](https://cran.r-project.org/web/packages/Seurat/index.html) (>= 4.0.0)
* [SeuratObject](https://cran.r-project.org/web/packages/SeuratObject/index.html) (>= 4.0.0)
* [data.table](https://cran.r-project.org/web/packages/data.table/vignettes/datatable-intro.html)
* [Matrix](https://cran.r-project.org/web/packages/Matrix/index.html)
* [tictoc](https://cran.r-project.org/web/packages/tictoc/index.html)
* [dplyr](https://cran.r-project.org/web/packages/dplyr/index.html)
* [pbmcapply](https://cran.r-project.org/web/packages/pbmcapply/index.html)
- [R](https://www.r-project.org/) (\>= 4.0.0)
- [Seurat](https://cran.r-project.org/web/packages/Seurat/index.html) (\>= 4.0.0)
- [SeuratObject](https://cran.r-project.org/web/packages/SeuratObject/index.html) (\>= 4.0.0)
- [data.table](https://cran.r-project.org/web/packages/data.table/vignettes/datatable-intro.html)
- [Matrix](https://cran.r-project.org/web/packages/Matrix/index.html)
- [tictoc](https://cran.r-project.org/web/packages/tictoc/index.html)
- [dplyr](https://cran.r-project.org/web/packages/dplyr/index.html)
- [pbmcapply](https://cran.r-project.org/web/packages/pbmcapply/index.html)

We highly recommend you to build R with openblas which will accelerate integration 2-3x times.

Here is the common way to do it:

sudo yum install -y openblas openblas-threads openblas-openmp # for centos
sudo yum install -y openblas openblas-threads openblas-openmp \# for centos

sudo apt-get install libopenblas-dev \# for debian

sudo apt-get install libopenblas-dev # for debian
./configure --enable-R-shlib --enable-byte-compiled-packages --enable-BLAS-shlib --enable-memory-profiling

./configure --enable-R-shlib --enable-byte-compiled-packages \
--with-blas="-lopenblas"

--enable-BLAS-shlib --enable-memory-profiling \

--with-blas="-lopenblas"

## Installation

```R
``` r
devtools::install_github("[email protected]:JinmiaoChenLab/FastIntegrate.git")
```

## Usage

### Preprocess
```R

``` r
library(Seurat)
library(pbmcapply)
rna.list = readRDS("rna_list.rds") # read list of Seurat object, each element in list is a sample
Expand All @@ -50,27 +52,25 @@ for (i in 1:length(rna.list)) {
rna.list[[i]] = FindVariableFeatures(rna.list[[i]])
rna.list[[i]] = RenameCells(rna.list[[i]], new.names = paste0(Cells(rna.list[[i]]), "--", i))
}

```



### Onestop function

For large sample size (> 200 samples), we recommend to use step by step integration.
```R
For large sample size (\> 200 samples), we recommend to use step by step integration.

``` r
library(FastIntegration)
# rna.list is the list of seurat object
data = OneStopIntegration(
rna.list = rna.list,
tmp.dir = "./test/",
max.cores = 30
)

```

### Step by step integration
```R

``` r
library(Seurat)
library(pbmcapply)
library(FastIntegration)
Expand All @@ -94,14 +94,13 @@ pbmclapply(
rna.integrated = FastIntegration(tmp.dir = "./", npcs = 1:30, slot = "data",
features.to.integrate = genes[idx[[i]]])
saveRDS(rna.integrated, paste0("FastIntegrationTmp/inte/inte_", i, ".rds"), compress = F)
}, mc.cores = 20
}, mc.cores = 20
)

```


### After integration
```R

``` r
##### create Seurat obj with the variable features of integration (For very big dataset) #####
features = readRDS("FastIntegrationTmp/others/features.rds")
rna.data = pbmclapply(
Expand Down Expand Up @@ -137,13 +136,27 @@ rna.data = RunPCA(rna.data, features = features)
rna.data = FindNeighbors(rna.data, dims = 1:50)
rna.data = FindClusters(rna.data, resolution = 0.5, algorithm = 2)
rna.data = RunUMAP(rna.data, dims = 1:50)

```

### Download data from DISCO

``` r
##### Filter samples and get metadata #####
# You can filter samples by their different headers: tissue, disease, platform, project.id. sample.id, sample.type
# For each header, you can select multiple items as follows:
meta = FindSampleByMetadata(tissue = c("blood", "kidney"))

##### Download sample #####
# You can further filter cells by specifying expressed or unexpressed genes.
# dir is the location where the files are saved
DownloadDiscoData(meta, expressed.gene = c("CD3E"), unexpressed.gene = c("CD8A"), dir = "./disco") # mostly CD4 T cells (CD3E+CD8A-)

```

## Usage Scenario

We have apply FastIntegration to [DISCO](http://www.immunesinglecell.org/) database for integrating thousands of samples.

## License
All other code in this repository is licensed under a [GPL-3](https://www.r-project.org/Licenses/GPL-3) license.

All other code in this repository is licensed under a [GPL-3](https://www.r-project.org/Licenses/GPL-3) license.
Binary file modified src/FastIntegration.so
Binary file not shown.
Binary file modified src/RcppExports.o
Binary file not shown.
Binary file modified src/inetgration.o
Binary file not shown.

0 comments on commit 0e24cb9

Please sign in to comment.