Skip to content

Commit

Permalink
zUMIs2.5.4 featureCounts
Browse files Browse the repository at this point in the history
  • Loading branch information
cziegenhain committed Aug 9, 2019
1 parent 9b49033 commit aa3f2c9
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 19 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ We provide a script to convert zUMIs output into loom file automatically based o
To convert zUMIs output to loom, simply run `Rscript rds2loom.R myRun.yaml`.

## Changelog
09 Aug 2019: zUMIs2.5.4: zUMIs is now independent of the version of the Rsubread package. To improve code stability and reduce version-dependency, zUMIs now uses inbuilt precompiled Rsubread::featureCounts code.

08 Aug 2019: zUMIs2.5.3: optimizations in recently added features. Prevent Errors with missing options in YAML file ([see 130](https://github.com/sdparekh/zUMIs/issues/130#issuecomment-518840951)).

02 Aug 2019: zUMIs2.5.2: Speed up hamming distance calculations. Fix [missing barcode detection plot](https://github.com/sdparekh/zUMIs/issues/128#issuecomment-517355261) when using automatic detection + whitelist.
Expand Down
Binary file added misc/fcountsLib
Binary file not shown.
159 changes: 159 additions & 0 deletions misc/featureCounts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
featureCounts <- function(files,annot.inbuilt="mm10",annot.ext=NULL,isGTFAnnotationFile=FALSE,GTF.featureType="exon",GTF.attrType="gene_id",GTF.attrType.extra=NULL,chrAliases=NULL,useMetaFeatures=TRUE,allowMultiOverlap=FALSE,minOverlap=1,fracOverlap=0,fracOverlapFeature=0,largestOverlap=FALSE,nonOverlap=NULL,nonOverlapFeature=NULL,readShiftType="upstream",readShiftSize=0,readExtension5=0,readExtension3=0,read2pos=NULL,countMultiMappingReads=TRUE,fraction=FALSE,isLongRead=FALSE,minMQS=0,splitOnly=FALSE,nonSplitOnly=FALSE,primaryOnly=FALSE,ignoreDup=FALSE,strandSpecific=0,juncCounts=FALSE,genome=NULL,isPairedEnd=FALSE,requireBothEndsMapped=FALSE,checkFragLength=FALSE,minFragLength=50,maxFragLength=600,countChimericFragments=TRUE,autosort=TRUE,nthreads=1,byReadGroup=FALSE,reportReads=NULL,reportReadsPath=NULL,maxMOp=10,tmpDir=".",verbose=FALSE, fcounts_clib = NULL)
{
flag <- FALSE
files <- normalizePath(files, mustWork=T)
if(!is.null(annot.ext) && is.character(annot.ext)) annot.ext <- normalizePath(annot.ext, mustWork=T)
if(!is.null(chrAliases))chrAliases <- normalizePath(chrAliases, mustWork=T)
if(!is.null(genome)) genome <- normalizePath(genome, mustWork=T)
if(!is.null(reportReadsPath)){
reportReadsPath <- normalizePath(reportReadsPath, mustWork=T)
}else reportReadsPath <- ' '
strandSpecific<-as.character(strandSpecific)
strandSpecific<-paste(strandSpecific, collapse=".")
strandSpecific<-gsub(",", ".", strandSpecific)

if(readShiftSize < 0){
stop("The value of the readShiftSize parameter should not be negative.")
}

if(!(readShiftType %in% c("upstream","downstream","left","right"))){
stop("The value of the readShiftType parameter should be one of 'upstream', 'downstream', 'left' and 'right'.")
}

if(readExtension5 < 0){
stop("The value of the readExtension5 parameter should not be negative.")
}
if(readExtension3 < 0){
stop("The value of the readExtension3 parameter should not be negative.")
}

annot.screen.output <- 'R data.frame'
if(is.null(annot.ext)){
switch(tolower(as.character(annot.inbuilt)),
mm9={
ann <- system.file("annot","mm9_RefSeq_exon.txt",package="Rsubread")
annot.screen.output <- 'inbuilt (mm9)'
},
mm10={
ann <- system.file("annot","mm10_RefSeq_exon.txt",package="Rsubread")
annot.screen.output <- 'inbuilt (mm10)'
},
hg19={
ann <- system.file("annot","hg19_RefSeq_exon.txt",package="Rsubread")
annot.screen.output <- 'inbuilt (hg19)'
},
hg38={
ann <- system.file("annot","hg38_RefSeq_exon.txt",package="Rsubread")
annot.screen.output <- 'inbuilt (hg38)'
},
{
stop("In-built annotation for ", annot.inbuilt, " is not available.\n")
}
) # end switch
}
else{
if(is.character(annot.ext)){
ann <- annot.ext
annot.screen.output <- paste0(basename(ann), " (", ifelse(isGTFAnnotationFile, "GTF", "SAF"), ")");
}
else{
annot_df <- as.data.frame(annot.ext,stringsAsFactors=FALSE)
if(sum(c("geneid","chr","start","end", "strand") %in% tolower(colnames(annot_df))) != 5)
stop("One or more required columns are missing in the provided annotation data. Please refer to help page for annotation format.\n")
colnames(annot_df) <- tolower(colnames(annot_df))
annot_df <- data.frame(geneid=annot_df$geneid,chr=annot_df$chr,start=annot_df$start,end=annot_df$end,strand=annot_df$strand,stringsAsFactors=FALSE)
annot_df$chr <- as.character(annot_df$chr)
fout_annot <- file.path(".",paste(".Rsubread_UserProvidedAnnotation_pid",Sys.getpid(),sep=""))
oldScipen <- options(scipen=999)
write.table(x=annot_df,file=fout_annot,sep="\t",row.names=FALSE,quote=FALSE)
options(oldScipen)
ann <- fout_annot
flag <- TRUE
}
}

fout <- file.path(".",paste(".Rsubread_featureCounts_pid",Sys.getpid(),sep=""))

files_C <- paste(files,collapse=";")

if(nchar(files_C) == 0) stop("No read files provided!")

genome_C <- genome
if(is.null(genome))
genome_C <- " "

chrAliases_C <- chrAliases
if(is.null(chrAliases))
chrAliases_C <- " "

read2pos_C <- read2pos
if(is.null(read2pos)) read2pos_C <- 0

split_C <- 0
if(splitOnly) split_C <- 1
if(nonSplitOnly) split_C <- 2

PE_orientation <- "fr"

if(is.null(reportReads)) {
reportReads_C <- 0
} else if(reportReads == "CORE") {
reportReads_C <- 10
} else if(reportReads == "SAM") {
reportReads_C <- 50
} else if(reportReads == "BAM") {
reportReads_C <- 500
} else
stop("Invalid value was provided for reportReads parameter.")

do_detection_calls <- FALSE
max_missing_bases_in_read <- -1
max_missing_bases_in_feature <- -1
GTF.attrType.extra_str <- " "
if(!is.null(nonOverlap)) max_missing_bases_in_read <- nonOverlap
if(!is.null(nonOverlapFeature)) max_missing_bases_in_feature <- nonOverlapFeature
if(!is.null(GTF.attrType.extra))GTF.attrType.extra_str <- paste(GTF.attrType.extra, collapse="\t")

cmd <- paste("readSummary",ann,files_C,fout,as.numeric(isPairedEnd),minFragLength,maxFragLength,0,as.numeric(allowMultiOverlap),as.numeric(useMetaFeatures),nthreads,as.numeric(isGTFAnnotationFile),strandSpecific,reportReads_C,as.numeric(requireBothEndsMapped),as.numeric(!countChimericFragments),as.numeric(checkFragLength),GTF.featureType,GTF.attrType,minMQS,as.numeric(countMultiMappingReads),chrAliases_C," ",as.numeric(FALSE),14,readExtension5,readExtension3,minOverlap,split_C,read2pos_C," ",as.numeric(ignoreDup),as.numeric(!autosort),as.numeric(fraction),as.numeric(largestOverlap),PE_orientation,as.numeric(juncCounts),genome_C,maxMOp,0,as.numeric(fracOverlap),as.character(tmpDir),"0",as.numeric(byReadGroup),as.numeric(isLongRead),as.numeric(verbose),as.numeric(fracOverlapFeature), as.numeric(do_detection_calls), as.numeric(max_missing_bases_in_read), as.numeric(max_missing_bases_in_feature), as.numeric(primaryOnly), reportReadsPath, GTF.attrType.extra_str, annot.screen.output, readShiftType,readShiftSize ,sep=",")
n <- length(unlist(strsplit(cmd,",")))
dyn.load(fcounts_clib)
C_args <- .C("R_readSummary_wrapper",as.integer(n),as.character(cmd))
dyn.unload(fcounts_clib)

if(file.exists(fout)){
x <- read.delim(fout,stringsAsFactors=FALSE)
colnames(x)[1:6] <- c("GeneID","Chr","Start","End","Strand","Length")

x_summary <- read.delim(paste(fout,".summary",sep=""), stringsAsFactors=FALSE)

if(juncCounts)
x_jcounts <- read.delim(paste(fout,".jcounts",sep=""), stringsAsFactors=FALSE)

file.remove(fout)
file.remove(paste(fout,".summary",sep=""))

if(juncCounts)
file.remove(paste(fout,".jcounts",sep=""))

if(flag)
file.remove(fout_annot)

add_attr_numb <- 0
if(!is.null(GTF.attrType.extra)) add_attr_numb <- length(GTF.attrType.extra)
if(ncol(x) <= (6 + add_attr_numb)){
stop("No count data were generated.")
}

y <- as.matrix(x[,-c(1:(6 + add_attr_numb))])
colnames(y) <- colnames(x)[-c(1:(6 + add_attr_numb))]
rownames(y) <- x$GeneID

if(juncCounts)
z <- list(counts=y,counts_junction=x_jcounts,annotation=x[,1:(6 + add_attr_numb)],targets=colnames(y),stat=x_summary)
else
z <- list(counts=y,annotation=x[,1:(6 + add_attr_numb)],targets=colnames(y),stat=x_summary)
z
}else{
stop("No counts were generated.")
}
}
32 changes: 17 additions & 15 deletions runfeatureCountFUN.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@ suppressWarnings(suppressMessages(require(GenomicFeatures)))
suppressWarnings(suppressMessages(require(GenomicAlignments)))
suppressWarnings(suppressMessages(require(AnnotationDbi)))

checkRsubreadVersion<- function(){
if(length(grep("Rsubread",installed.packages()))==0){
print("I did not find Rsubread so I am installing it...")
BiocInstaller::biocLite("Rsubread",dependencies = TRUE, ask = FALSE)
}else{
if(all(as.numeric_version(packageVersion("Rsubread"))<'1.26.1')){
print("I need newer Rsubread so I am updating it...")
BiocInstaller::biocUpdatePackages("Rsubread", ask=FALSE)
}
}
suppressMessages(require("Rsubread"))
}
# checkRsubreadVersion<- function(){
# if(length(grep("Rsubread",installed.packages()))==0){
# print("I did not find Rsubread so I am installing it...")
# BiocInstaller::biocLite("Rsubread",dependencies = TRUE, ask = FALSE)
# }else{
# if(all(as.numeric_version(packageVersion("Rsubread"))<'1.26.1')){
# print("I need newer Rsubread so I am updating it...")
# BiocInstaller::biocUpdatePackages("Rsubread", ask=FALSE)
# }
# }
# suppressMessages(require("Rsubread"))
# }

.makeSAF<-function(gtf){
print("Loading reference annotation from:")
Expand Down Expand Up @@ -70,9 +70,10 @@ checkRsubreadVersion<- function(){
rm(se,gr.gene,intron,exon,intron.exon.red,intron.exon.dis,intron.only,ol.ex,ol.in,intron.saf,exon.saf)
return(saf)
}
.runFeatureCount<-function(abamfile,RG,saf,strand,type,primaryOnly,cpu,mem){
.runFeatureCount<-function(abamfile,RG,saf,strand,type,primaryOnly,cpu,mem,fcounts_clib){
print(paste0("Assigning reads to features (",type,")"))
fc.stat<-Rsubread::featureCounts(files=abamfile,
# fc.stat<-Rsubread::featureCounts(files=abamfile,
fc.stat <- featureCounts(files=abamfile,
annot.ext=saf,
isGTFAnnotationFile=F,
primaryOnly=primaryOnly,
Expand All @@ -81,7 +82,8 @@ checkRsubreadVersion<- function(){
reportReads="BAM",
strandSpecific=strand,
isPairedEnd=T,
countChimericFragments=F)$stat
countChimericFragments=F,
fcounts_clib = fcounts_clib)$stat
fn<-paste0(abamfile,".featureCounts.bam")
nfn<-paste0(abamfile,".",type,".featureCounts.bam")

Expand Down
10 changes: 7 additions & 3 deletions zUMIs-dge2.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ opt <-read_yaml(myYaml)
setwd(opt$out_dir)
#try(unixtools::set.tempdir(opt$out_dir))
source(paste0(opt$zUMIs_directory,"/runfeatureCountFUN.R"))
source(paste0(opt$zUMIs_directory,"/misc/featureCounts.R"))
source(paste0(opt$zUMIs_directory,"/UMIstuffFUN.R"))
source(paste0(opt$zUMIs_directory,"/barcodeIDFUN.R"))
options(datatable.fread.input.cmd.message=FALSE)
Expand All @@ -19,7 +20,8 @@ print(Sys.time())
samtoolsexc <- opt$samtools_exec
data.table::setDTthreads(threads=opt$num_threads)
#Check the version of Rsubread
checkRsubreadVersion()
#checkRsubreadVersion()
fcounts_clib <- paste0(opt$zUMIs_directory,"/misc/fcountsLib")

opt <- fixMissingOptions(opt)
#######################################################################
Expand Down Expand Up @@ -47,7 +49,8 @@ fnex<-.runFeatureCount(abamfile,
type="ex",
primaryOnly = opt$counting_opts$primaryHit,
cpu = opt$num_threads,
mem = opt$mem_limit)
mem = opt$mem_limit,
fcounts_clib = fcounts_clib)
ffiles<-fnex

if(opt$counting_opts$introns){
Expand All @@ -57,7 +60,8 @@ if(opt$counting_opts$introns){
type="in",
primaryOnly = opt$counting_opts$primaryHit,
cpu = opt$num_threads,
mem = opt$mem_limit)
mem = opt$mem_limit,
fcounts_clib = fcounts_clib)
ffiles<-c(ffiles,fnin)
}

Expand Down
2 changes: 1 addition & 1 deletion zUMIs-master.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Pipeline to run UMI-seq analysis from fastq to read count tables.
# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
# Contact: [email protected] or [email protected]
vers=2.5.3
vers=2.5.4
currentv=`curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/master/zUMIs-master.sh | grep '^vers=' | cut -f2 -d "="`
if [ "$currentv" != "$vers" ]; then echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------"; fi

Expand Down

0 comments on commit aa3f2c9

Please sign in to comment.