Skip to content

Commit

Permalink
onestop
Browse files Browse the repository at this point in the history
  • Loading branch information
lmw123 committed Mar 23, 2022
1 parent 3962c35 commit c1d789f
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 61 deletions.
10 changes: 5 additions & 5 deletions R/FastFindAnchors.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ FastFindAnchors = function(

message("Finding all pairwise anchors")

if (nSample > 100) {
if (nSample >= 50) {
rna.list = pbmcapply::pbmclapply(
1:nSample, function(i) {
rna = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/", i,".rds"))
Expand Down Expand Up @@ -93,7 +93,7 @@ FastFindAnchors = function(
data.table::setindex(all.anchors, dataset1)

message("Merging data")
if (nSample <= 100) {
if (nSample < 50) {
anchor.group <- all.anchors %>% group_by(dataset1, dataset2) %>% summarise(n = n())
similarity.matrix = matrix(data = 0, ncol = nSample, nrow = nSample)
similarity.matrix[lower.tri(similarity.matrix, diag=FALSE) | upper.tri(similarity.matrix, diag=FALSE)] = anchor.group$n
Expand Down Expand Up @@ -121,11 +121,11 @@ FastFindAnchors = function(
rna.bind = cbind(rna.bind, rna@assays$RNA@data[features,])
}
} else {
n = ceiling(nSample/100)
n = ceiling(nSample/50)
rna.list = pbmcapply::pbmclapply(
1:n, function(i) {
start = (i-1)*100 + 1
end = min(nSample, i*100)
start = (i-1)*50 + 1
end = min(nSample, i*50)
rna.bind = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/", start,".rds"))
rna.bind = rna.bind@assays$RNA@data[features,]
for (j in (start+1):end) {
Expand Down
58 changes: 17 additions & 41 deletions R/FastIntegration.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ FastIntegration = function(
npcs = 1:30,
nn.k = 100,
slot = c("data", "counts"),
prevent.over.correct = F
cut.low = 0.1
) {
data.table::setDTthreads(threads = 1L)

Expand Down Expand Up @@ -58,7 +58,7 @@ FastIntegration = function(
nn.k = nn.k,
sample.tree = sample.tree,
objects.ncell = obj.lengths,
prevent.over.correct = prevent.over.correct
cut.low = cut.low
)
}

Expand All @@ -73,7 +73,7 @@ FastPairwiseIntegrateReference = function(
nn.k,
sample.tree,
objects.ncell,
prevent.over.correct = F
cut.low
) {

cellnames.list = list()
Expand Down Expand Up @@ -118,7 +118,7 @@ FastPairwiseIntegrateReference = function(
input.pca = input.pca,
id.table = id.table,
nn.k = nn.k,
prevent.over.correct = prevent.over.correct
cut.low = cut.low
)

object.list[[as.character(ii)]] = integrated.matrix
Expand All @@ -138,7 +138,7 @@ FastRunIntegration = function(
input.pca,
id.table,
nn.k,
prevent.over.correct = F
cut.low = 0.1
) {

if (dim(filtered.anchors)[1] > 100000) {
Expand Down Expand Up @@ -178,31 +178,17 @@ FastRunIntegration = function(
integrated = pbmcapply::pbmclapply(
1:length(ss),
function(i) {
if (prevent.over.correct) {
corrected = integration.matrix %*% weight.matrix[,ss[[i]]]
corrected = RmOutlierMatrix(corrected)
integrated = query[,ss[[i]]] - corrected

}else {
integrated = query[,ss[[i]]] - integration.matrix %*% weight.matrix[,ss[[i]]]
}
integrated = query[,ss[[i]]] - integration.matrix %*% weight.matrix[,ss[[i]]]
},
mc.cores = min(ceiling(dim(query)[2]/50000), 40)
)
integrated = do.call("cbind", integrated)
}else {
if (prevent.over.correct) {
corrected = integration.matrix %*% weight.matrix
corrected = RmOutlierMatrix(corrected)
integrated = query - corrected

}else {
integrated = query - integration.matrix %*% weight.matrix
}
integrated = query - integration.matrix %*% weight.matrix
}
max.cut = max(reference@x)
message(max.cut)
integrated[which(integrated < 0.1)] = 0
max.cut = max(c(reference@x, query@x))

integrated[which(integrated < cut.low)] = 0
integrated[which(integrated > max.cut)] = max.cut

integrated = Matrix::drop0(integrated)
Expand Down Expand Up @@ -238,7 +224,12 @@ FastFindWeights = function(
)

distances = Seurat::Distances(object = knn_2_2)
a = as.numeric(distances[,ncol(x = distances)]/distances[,2])
b = distances/distances[,2]
b = which(b > qnorm(0.99, mean(a), sd(a)))

distances = 1 - (distances / distances[, ncol(x = distances)])
distances[b] = 0
cell.index = Seurat::Indices(object = knn_2_2)
FindWeightsC = getFromNamespace("FindWeightsC", "Seurat")
weights = FindWeightsC(
Expand All @@ -253,23 +244,8 @@ FastFindWeights = function(
display_progress = TRUE
)

print("aa")

return(weights)
}

RmOutlierMatrix = function(m){
m = as.matrix(m)
for (i in 1:nrow(m)) {
Q <- quantile(m[i,], probs=c(.25, .75), na.rm = FALSE)
iqr <- IQR(m[i,])
up <- Q[2]+1.5*iqr
low<- Q[1]-1.5*iqr
if(up < max(m[i,])) {
m[i,which(m[i,] > up)] = up
}
if (low > min(m[i,])) {
m[i,which(m[i,] < low)] = low
}
}
m = as(m, "sparseMatrix")
return(m)
}
22 changes: 13 additions & 9 deletions R/OneStop.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ OneStopIntegration = function(
tmp.dir = "./",
feature.to.integration = NULL,
feature.to.return = NULL,
max.cores = 30
max.cores = 30,
nfeatures = 2000
) {
nCores = max.cores
message("Building integration files")
BuildIntegrationFile(rna.list, tmp.dir = tmp.dir, nCores = nCores)
BuildIntegrationFile(rna.list = rna.list, tmp.dir = tmp.dir, nCores = nCores, nfeatures = nfeatures)

message("Finding anchors")
FastFindAnchors(tmp.dir = tmp.dir, nCores = nCores)
Expand All @@ -22,8 +23,7 @@ OneStopIntegration = function(

pbmcapply::pbmclapply(
1:20, function(i) {
rna.integrated = FastIntegration(tmp.dir = tmp.dir,
npcs = 1:30, slot = "data",
rna.integrated = FastIntegration(tmp.dir = tmp.dir, npcs = 1:30, slot = "data",
features.to.integrate = feature.to.integration[idx[[i]]])
saveRDS(rna.integrated, paste0(tmp.dir, "/FastIntegrationTmp/inte/inte_", i, ".rds"), compress = F)
}, mc.cores = 20
Expand All @@ -35,12 +35,16 @@ OneStopIntegration = function(
rna.data = pbmcapply::pbmclapply(
1:20, function(i) {
rna = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/inte/inte_", i, ".rds"))
rna = rna[intersect(feature.to.return, rownames(rna)),]
rna[which(rna < 0.2)] = 0
return(rna)
if (length(intersect(rownames(rna), feature.to.return)) > 0) {
rna = rna[intersect(feature.to.return, rownames(rna)), , drop = F]
return(rna)
} else {
return(NULL)
}
}, mc.cores = 20
)

rna.data = do.call(rbind, rna.data)
na.id = unlist(lapply(rna.data, is.null))
rna.data = do.call(rbind, rna.data[which(na.id == F)])
rna.data = CreateSeuratObject(rna.data)
return(rna.data)
}
8 changes: 2 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@ library(FastIntegration)
# rna.list is the list of seurat object
data = OneStopIntegration(
rna.list = rna.list,
tmp.dir = "../test/",
max.cores = 30,
feature.to.integration = NULL,
feature.to.return = NULL
tmp.dir = "./test/",
max.cores = 30
)

```
Expand All @@ -49,8 +47,6 @@ FastFindAnchors(tmp.dir = "./", nCores = 50)
genes = readRDS("FastIntegrationTmp/raw/1.rds")
genes = rownames(genes)
idx = split(1:length(genes), cut(1:length(genes), 20, labels = FALSE))
pca = readRDS("raw_pca.rds")

pbmclapply(
1:20, function(i) {
rna.integrated = FastIntegration(tmp.dir = "./", npcs = 1:30, slot = "data",
Expand Down

0 comments on commit c1d789f

Please sign in to comment.