diff --git a/R/FastFindAnchors.R b/R/FastFindAnchors.R index d88a8b4..f1ac42c 100644 --- a/R/FastFindAnchors.R +++ b/R/FastFindAnchors.R @@ -21,7 +21,7 @@ FastFindAnchors = function( message("Finding all pairwise anchors") - if (nSample > 100) { + if (nSample >= 50) { rna.list = pbmcapply::pbmclapply( 1:nSample, function(i) { rna = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/", i,".rds")) @@ -93,7 +93,7 @@ FastFindAnchors = function( data.table::setindex(all.anchors, dataset1) message("Merging data") - if (nSample <= 100) { + if (nSample < 50) { anchor.group <- all.anchors %>% group_by(dataset1, dataset2) %>% summarise(n = n()) similarity.matrix = matrix(data = 0, ncol = nSample, nrow = nSample) similarity.matrix[lower.tri(similarity.matrix, diag=FALSE) | upper.tri(similarity.matrix, diag=FALSE)] = anchor.group$n @@ -121,11 +121,11 @@ FastFindAnchors = function( rna.bind = cbind(rna.bind, rna@assays$RNA@data[features,]) } } else { - n = ceiling(nSample/100) + n = ceiling(nSample/50) rna.list = pbmcapply::pbmclapply( 1:n, function(i) { - start = (i-1)*100 + 1 - end = min(nSample, i*100) + start = (i-1)*50 + 1 + end = min(nSample, i*50) rna.bind = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/", start,".rds")) rna.bind = rna.bind@assays$RNA@data[features,] for (j in (start+1):end) { diff --git a/R/FastIntegration.R b/R/FastIntegration.R index 3f45b84..afb15bd 100644 --- a/R/FastIntegration.R +++ b/R/FastIntegration.R @@ -14,7 +14,7 @@ FastIntegration = function( npcs = 1:30, nn.k = 100, slot = c("data", "counts"), - prevent.over.correct = F + cut.low = 0.1 ) { data.table::setDTthreads(threads = 1L) @@ -58,7 +58,7 @@ FastIntegration = function( nn.k = nn.k, sample.tree = sample.tree, objects.ncell = obj.lengths, - prevent.over.correct = prevent.over.correct + cut.low = cut.low ) } @@ -73,7 +73,7 @@ FastPairwiseIntegrateReference = function( nn.k, sample.tree, objects.ncell, - prevent.over.correct = F + cut.low ) { cellnames.list = list() @@ -118,7 +118,7 @@ FastPairwiseIntegrateReference = function( input.pca = input.pca, id.table = id.table, nn.k = nn.k, - prevent.over.correct = prevent.over.correct + cut.low = cut.low ) object.list[[as.character(ii)]] = integrated.matrix @@ -138,7 +138,7 @@ FastRunIntegration = function( input.pca, id.table, nn.k, - prevent.over.correct = F + cut.low = 0.1 ) { if (dim(filtered.anchors)[1] > 100000) { @@ -178,31 +178,17 @@ FastRunIntegration = function( integrated = pbmcapply::pbmclapply( 1:length(ss), function(i) { - if (prevent.over.correct) { - corrected = integration.matrix %*% weight.matrix[,ss[[i]]] - corrected = RmOutlierMatrix(corrected) - integrated = query[,ss[[i]]] - corrected - - }else { - integrated = query[,ss[[i]]] - integration.matrix %*% weight.matrix[,ss[[i]]] - } + integrated = query[,ss[[i]]] - integration.matrix %*% weight.matrix[,ss[[i]]] }, mc.cores = min(ceiling(dim(query)[2]/50000), 40) ) integrated = do.call("cbind", integrated) }else { - if (prevent.over.correct) { - corrected = integration.matrix %*% weight.matrix - corrected = RmOutlierMatrix(corrected) - integrated = query - corrected - - }else { - integrated = query - integration.matrix %*% weight.matrix - } + integrated = query - integration.matrix %*% weight.matrix } - max.cut = max(reference@x) - message(max.cut) - integrated[which(integrated < 0.1)] = 0 + max.cut = max(c(reference@x, query@x)) + + integrated[which(integrated < cut.low)] = 0 integrated[which(integrated > max.cut)] = max.cut integrated = Matrix::drop0(integrated) @@ -238,7 +224,12 @@ FastFindWeights = function( ) distances = Seurat::Distances(object = knn_2_2) + a = as.numeric(distances[,ncol(x = distances)]/distances[,2]) + b = distances/distances[,2] + b = which(b > qnorm(0.99, mean(a), sd(a))) + distances = 1 - (distances / distances[, ncol(x = distances)]) + distances[b] = 0 cell.index = Seurat::Indices(object = knn_2_2) FindWeightsC = getFromNamespace("FindWeightsC", "Seurat") weights = FindWeightsC( @@ -253,23 +244,8 @@ FastFindWeights = function( display_progress = TRUE ) + print("aa") + return(weights) } -RmOutlierMatrix = function(m){ - m = as.matrix(m) - for (i in 1:nrow(m)) { - Q <- quantile(m[i,], probs=c(.25, .75), na.rm = FALSE) - iqr <- IQR(m[i,]) - up <- Q[2]+1.5*iqr - low<- Q[1]-1.5*iqr - if(up < max(m[i,])) { - m[i,which(m[i,] > up)] = up - } - if (low > min(m[i,])) { - m[i,which(m[i,] < low)] = low - } - } - m = as(m, "sparseMatrix") - return(m) -} diff --git a/R/OneStop.R b/R/OneStop.R index 4b45fbf..b8bc3c7 100644 --- a/R/OneStop.R +++ b/R/OneStop.R @@ -4,11 +4,12 @@ OneStopIntegration = function( tmp.dir = "./", feature.to.integration = NULL, feature.to.return = NULL, - max.cores = 30 + max.cores = 30, + nfeatures = 2000 ) { nCores = max.cores message("Building integration files") - BuildIntegrationFile(rna.list, tmp.dir = tmp.dir, nCores = nCores) + BuildIntegrationFile(rna.list = rna.list, tmp.dir = tmp.dir, nCores = nCores, nfeatures = nfeatures) message("Finding anchors") FastFindAnchors(tmp.dir = tmp.dir, nCores = nCores) @@ -22,8 +23,7 @@ OneStopIntegration = function( pbmcapply::pbmclapply( 1:20, function(i) { - rna.integrated = FastIntegration(tmp.dir = tmp.dir, - npcs = 1:30, slot = "data", + rna.integrated = FastIntegration(tmp.dir = tmp.dir, npcs = 1:30, slot = "data", features.to.integrate = feature.to.integration[idx[[i]]]) saveRDS(rna.integrated, paste0(tmp.dir, "/FastIntegrationTmp/inte/inte_", i, ".rds"), compress = F) }, mc.cores = 20 @@ -35,12 +35,16 @@ OneStopIntegration = function( rna.data = pbmcapply::pbmclapply( 1:20, function(i) { rna = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/inte/inte_", i, ".rds")) - rna = rna[intersect(feature.to.return, rownames(rna)),] - rna[which(rna < 0.2)] = 0 - return(rna) + if (length(intersect(rownames(rna), feature.to.return)) > 0) { + rna = rna[intersect(feature.to.return, rownames(rna)), , drop = F] + return(rna) + } else { + return(NULL) + } }, mc.cores = 20 ) - - rna.data = do.call(rbind, rna.data) + na.id = unlist(lapply(rna.data, is.null)) + rna.data = do.call(rbind, rna.data[which(na.id == F)]) rna.data = CreateSeuratObject(rna.data) + return(rna.data) } diff --git a/README.md b/README.md index a18eb73..6edf37b 100644 --- a/README.md +++ b/README.md @@ -28,10 +28,8 @@ library(FastIntegration) # rna.list is the list of seurat object data = OneStopIntegration( rna.list = rna.list, - tmp.dir = "../test/", - max.cores = 30, - feature.to.integration = NULL, - feature.to.return = NULL + tmp.dir = "./test/", + max.cores = 30 ) ``` @@ -49,8 +47,6 @@ FastFindAnchors(tmp.dir = "./", nCores = 50) genes = readRDS("FastIntegrationTmp/raw/1.rds") genes = rownames(genes) idx = split(1:length(genes), cut(1:length(genes), 20, labels = FALSE)) -pca = readRDS("raw_pca.rds") - pbmclapply( 1:20, function(i) { rna.integrated = FastIntegration(tmp.dir = "./", npcs = 1:30, slot = "data",