initial commit: dpeak ver. 1.0.1

keleslab · Dec 11, 2013 · 97b9693 · 97b9693
1 parent 81d49b2
commit 97b9693
Show file tree

Hide file tree

Showing 64 changed files with 9,493 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,15 @@
+Package: dpeak
+Type: Package
+Title: dPeak (Deconvolution of Peaks in ChIP-seq Analysis)
+Version: 1.0.1
+Depends: R (>= 2.11.1), methods, graphics, Rcpp
+Imports: MASS, IRanges
+Suggests: parallel
+LinkingTo: Rcpp
+SystemRequirements: Perl
+Date: 2013-07-31
+Author: Dongjun Chung
+Maintainer: Dongjun Chung <[email protected]>
+Description: This package provides functions for fitting dPeak, a statistical framework to deconvolve ChIP-seq peaks.
+License: GPL (>= 2)
+LazyLoad: yes
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,10 @@
+useDynLib(dpeak,.registration=TRUE)
+exportPattern("^[[:alpha:]]+")
+
+import( methods, graphics, Rcpp, MASS, IRanges )
+
+export( dpeakRead, dpeakFit )
+
+export( show, plot, print, printEmpty )
+
+export( export )
diff --git a/R/AllClasses.R b/R/AllClasses.R
@@ -0,0 +1,61 @@
+
+# Dpeak data class
+
+setClass( Class="DpeakData",
+    representation=representation(
+        fragSet="list",
+
+        PET="logical",
+        fragLenTable="table",
+        Fratio="numeric",
+        aveFragLen="numeric",
+
+        stackedFragment="list",
+
+        peakChr="character",
+        peakStart="numeric",
+        peakEnd="numeric",
+
+        emptyList="character"
+    )
+)
+
+# Dpeak fit class
+
+setClass( Class="DpeakFit",
+    representation=representation(
+        fits="list",
+
+        optFit="list",
+        optMu="list",
+        optPi="list",
+        optPi0="list",
+        optGamma="list",
+        optDelta="list",
+        optSigma="list",
+        bicVec="list",
+        aicVec="list",
+
+        fragSet="list",
+        PET="logical",
+        fragLenTable="table",
+        Fratio="numeric",
+        aveFragLen="numeric",
+
+        stackedFragment="list",
+
+        peakChr="character",
+        peakStart="numeric",
+        peakEnd="numeric",
+
+        estDelta="logical",
+        lbDelta="numeric",
+        lbSigma="numeric",
+        psize="numeric",
+        maxComp="numeric", 
+        pConst="numeric", 
+        iterInit="numeric", 
+        iterMain="numeric", 
+        epsilon="numeric"
+    )
+)
diff --git a/R/AllGenerics.R b/R/AllGenerics.R
@@ -0,0 +1,19 @@
+
+# generic methods for "DpeakData" class
+
+setGeneric( "printEmpty",
+    function( object, ... )
+    standardGeneric("printEmpty")
+)
+
+setGeneric( "dpeakFit",
+    function( object, ... )
+    standardGeneric("dpeakFit")
+)
+
+# generic methods for "DpeakFit" class
+
+setGeneric( "export",
+    function( object, ... )
+    standardGeneric("export")
+)
diff --git a/R/base_bg_SET.R b/R/base_bg_SET.R
@@ -0,0 +1,17 @@
+
+# calculate background by excluding combined binding sites
+
+.bg_SET <- function( L, mu, R, n_group ) {
+  denom <- R + L - 1 - L
+
+  if ( n_group > 1 ) {
+    for ( g in 2:n_group ) {
+      denom <- denom - as.numeric( mu[g] - mu[(g-1)] <= L ) * abs( mu[g] - mu[(g-1)] ) -
+        as.numeric( mu[g] - mu[(g-1)] > L ) * L
+    }
+  }
+
+  denom <- pmax( denom, 1 ) # avoid non-positive values
+
+  return( denom )
+}
diff --git a/R/base_combineModelPET.R b/R/base_combineModelPET.R
@@ -0,0 +1,33 @@
+# combine model with same dimension
+
+.combineModelPET <- function( fit_list, mu_list, pi_list, pi0_list,
+    gamma_list, BIC_vec, AIC_vec, maxComp ) { 
+
+    fit_list_new <- mu_list_new <- pi_list_new <- pi0_list_new <- 
+        gamma_list_new <- vector( "list", maxComp )        
+    BIC_vec_new <- AIC_vec_new <- rep( NA, maxComp )    
+    dimVec <- unlist(lapply( mu_list, length ))
+
+    for ( i in 1:maxComp ) {
+        model_i <- which( dimVec==i )
+
+        if ( length(model_i) > 0 ) {
+            # if more than one model have the same dim, choose one with smaller BIC
+
+            min_i <- model_i[ which.min( BIC_vec[model_i] ) ]
+
+            BIC_vec_new[i] <- BIC_vec[min_i]
+            AIC_vec_new[i] <- AIC_vec[min_i]
+            fit_list_new[[i]] <- fit_list[[min_i]]
+            mu_list_new[[i]] <- mu_list[[min_i]]
+            pi_list_new[[i]] <- pi_list[[min_i]]
+            pi0_list_new[[i]] <- pi0_list[[min_i]]
+            gamma_list_new[[i]] <- gamma_list[[min_i]]
+        }
+    }    
+
+    return( list( fit_list=fit_list_new, mu_list=mu_list_new,
+        pi_list=pi_list_new, pi0_list=pi0_list_new, 
+        gamma_list=gamma_list_new,
+        BIC_vec=BIC_vec_new, AIC_vec=AIC_vec_new ) )
+}
diff --git a/R/base_combineModelSET.R b/R/base_combineModelSET.R
@@ -0,0 +1,34 @@
+# combine model with same dimension
+
+.combineModelSET <- function( fit_list, mu_list, pi_list, pi0_list,
+    delta_list, sigma_list, BIC_vec, AIC_vec, maxComp ) { 
+
+    fit_list_new <- mu_list_new <- pi_list_new <- pi0_list_new <-
+        delta_list_new <- sigma_list_new <- vector( "list", maxComp )        
+    BIC_vec_new <- AIC_vec_new <- rep( NA, maxComp )    
+    dimVec <- unlist(lapply( mu_list, length ))
+
+    for ( i in 1:maxComp ) {
+        model_i <- which( dimVec==i )
+
+        if ( length(model_i) > 0 ) {
+            # if more than one model have the same dim, choose one with smaller BIC
+
+            min_i <- model_i[ which.min( BIC_vec[model_i] ) ]
+
+            BIC_vec_new[i] <- BIC_vec[min_i]
+            AIC_vec_new[i] <- AIC_vec[min_i]
+            fit_list_new[[i]] <- fit_list[[min_i]]
+            mu_list_new[[i]] <- mu_list[[min_i]]
+            pi_list_new[[i]] <- pi_list[[min_i]]
+            pi0_list_new[[i]] <- pi0_list[[min_i]]
+            delta_list_new[[i]] <- delta_list[[min_i]]
+            sigma_list_new[[i]] <- sigma_list[[min_i]]
+        }
+    }    
+
+    return( list( fit_list=fit_list_new, mu_list=mu_list_new, 
+        pi_list=pi_list_new, pi0_list=pi0_list_new, 
+        delta_list=delta_list_new, sigma_list=sigma_list_new,
+        BIC_vec=BIC_vec_new, AIC_vec=AIC_vec_new ) )
+}
diff --git a/R/base_constructExtRead.R b/R/base_constructExtRead.R
@@ -0,0 +1,79 @@
+
+# read alignment files and construct bin-level files
+
+.constructExtRead <- function( infile=NULL, outfile=NULL, summaryfile=NULL,
+    fileFormat="eland_result", PET=FALSE, fragLen=200, perl = "perl" )
+{   
+    # preprocessing perl script embedded in "dpeak/inst/Perl/"
+
+    if ( PET ) {
+        script <- "extract_coord_PET.pl"
+        allFormat <- "eland_result"
+        allFormatName <- "Eland result"
+    } else {
+        script <- "extract_coord_SET.pl"
+        allFormat <- c( "eland_result", "eland_extended", "eland_export", "bowtie", "sam", "bed" )
+        allFormatName <- c( "Eland result", "Eland extended", "Eland export", "Bowtie default", "SAM", "BED" )
+    }
+
+    # Check whether perl exists
+
+    CMD <- paste( perl, "-v" )
+    res <- system( CMD, intern = TRUE, ignore.stderr = TRUE )
+
+    if ( length(res) == 0 ) {
+        # cannot proceed if perl does not exist
+
+        stop( "Perl is not found on your system! Either check $PATH if installed or please install Perl." )
+    } else {
+        # process read files into bin-level files if perl exists
+
+        # check whether minimal options are missing
+
+        if ( length(infile) != 1 || is.null(infile) )
+        {
+            stop( "Please specify the aligned read file!" )
+        }   
+
+        if ( length(fileFormat) != 1 || is.null(fileFormat) )
+        {
+            stop( "Please specify aligned read file format! Read '?constructExtRead' for supported file formats" )
+        }   
+
+        # check file format specification
+
+        if ( length(which(!is.na(match( fileFormat, allFormat )))) == 0 )
+        {
+            stop( "Unsupported aligned read file format! Read '?constructExtRead' for supported file formats" )
+        } 
+
+        fileFormatName <- allFormatName[ match( fileFormat, allFormat ) ]
+
+
+        # get path to the perl code (unified script for all file formats)
+
+        Fn.Path <- system.file( file.path("Perl",script), package="dpeak" )
+
+
+        # process read file to bin-level files using perl codes
+
+        if ( PET ) {
+            CMD <- paste( perl, 
+                " ", Fn.Path,         
+                " ", infile, 
+                " ", outfile, 
+                " ", summaryfile, 
+                " ", fileFormat, sep="" )
+        } else {
+            CMD <- paste( perl, 
+                " ", Fn.Path,         
+                " ", infile, 
+                " ", outfile, 
+                " ", summaryfile, 
+                " ", fileFormat, 
+                " ", fragLen, sep="" )
+        }
+
+        res <- system( CMD, intern = TRUE )
+    }
+}
diff --git a/R/base_cpp_call.R b/R/base_cpp_call.R
@@ -0,0 +1,52 @@
+
+# Calculate score function to update mu (Rcpp)
+
+.ff_score <- function( grid, S, E, L, Zg, R, gamma ) {
+    stopifnot( length(grid) >= 1L, length(S) >= 1L, length(E) >= 1L,
+        length(L) >= 1L, length(Zg) >= 1L, R > 0, gamma > 0,
+        length(S) == length(E), length(E) == length(L), length(L) == length(Zg) )
+    out <- .Call( "cpp_score", grid, S, E, L, Zg, R, gamma, PACKAGE="dpeak" )
+    return(out)
+}
+
+# Stack fragments (Rcpp)
+
+.ff_stack <- function( S, E, minx, maxx ) {
+    stopifnot( length(S) >= 1L, length(E) >= 1L, length(S) == length(E) )
+    out <- .Call( "cpp_stack", S, E, minx, maxx, PACKAGE="dpeak" )
+    return(out)
+}
+
+# random sampling
+
+.ff_samp <- function(X) {
+    stopifnot( is.matrix(X), nrow(X) >= 1L, ncol(X) >= 1L )
+    out <- .Call( "cpp_samp", X, PACKAGE="dpeak" )
+    return(out)
+}
+
+# normalize
+
+.ff_normalize <- function(X) {
+    stopifnot( is.matrix(X), nrow(X) >= 1L, ncol(X) >= 1L )
+    out <- .Call( "cpp_normalize", X, PACKAGE="dpeak" )
+    return(out)
+}
+
+# match with table of fragment length
+
+.ff_dlength <- function( L, Lname, Lfreq ) {
+    stopifnot( length(L) >= 1L, length(Lname) >= 1L, length(Lfreq) >= 1L,
+        length(Lname) == length(Lfreq) )
+    out <- .Call( "cpp_dlength", L, Lname, Lfreq, PACKAGE="dpeak" )
+    return(out)
+}
+
+#  Is Z0 larger than each Z? 
+
+.ff_ismaxZ0 <- function( Z0, Z ) {
+    stopifnot( is.matrix(Z), nrow(Z) >= 1L, ncol(Z) >= 1L, length(Z0) >= 1L, 
+        nrow(Z) == length(Z0) )
+    out <- .Call( "cpp_ismaxZ0", Z0, Z, PACKAGE="dpeak" )
+    return(out)
+}