Skip to content

Commit

Permalink
initial commit: dpeak ver. 1.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
dongjunchung committed Dec 11, 2013
1 parent 81d49b2 commit 97b9693
Show file tree
Hide file tree
Showing 64 changed files with 9,493 additions and 0 deletions.
15 changes: 15 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Package: dpeak
Type: Package
Title: dPeak (Deconvolution of Peaks in ChIP-seq Analysis)
Version: 1.0.1
Depends: R (>= 2.11.1), methods, graphics, Rcpp
Imports: MASS, IRanges
Suggests: parallel
LinkingTo: Rcpp
SystemRequirements: Perl
Date: 2013-07-31
Author: Dongjun Chung
Maintainer: Dongjun Chung <[email protected]>
Description: This package provides functions for fitting dPeak, a statistical framework to deconvolve ChIP-seq peaks.
License: GPL (>= 2)
LazyLoad: yes
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
useDynLib(dpeak,.registration=TRUE)
exportPattern("^[[:alpha:]]+")

import( methods, graphics, Rcpp, MASS, IRanges )

export( dpeakRead, dpeakFit )

export( show, plot, print, printEmpty )

export( export )
61 changes: 61 additions & 0 deletions R/AllClasses.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@

# Dpeak data class

setClass( Class="DpeakData",
representation=representation(
fragSet="list",

PET="logical",
fragLenTable="table",
Fratio="numeric",
aveFragLen="numeric",

stackedFragment="list",

peakChr="character",
peakStart="numeric",
peakEnd="numeric",

emptyList="character"
)
)

# Dpeak fit class

setClass( Class="DpeakFit",
representation=representation(
fits="list",

optFit="list",
optMu="list",
optPi="list",
optPi0="list",
optGamma="list",
optDelta="list",
optSigma="list",
bicVec="list",
aicVec="list",

fragSet="list",
PET="logical",
fragLenTable="table",
Fratio="numeric",
aveFragLen="numeric",

stackedFragment="list",

peakChr="character",
peakStart="numeric",
peakEnd="numeric",

estDelta="logical",
lbDelta="numeric",
lbSigma="numeric",
psize="numeric",
maxComp="numeric",
pConst="numeric",
iterInit="numeric",
iterMain="numeric",
epsilon="numeric"
)
)
19 changes: 19 additions & 0 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

# generic methods for "DpeakData" class

setGeneric( "printEmpty",
function( object, ... )
standardGeneric("printEmpty")
)

setGeneric( "dpeakFit",
function( object, ... )
standardGeneric("dpeakFit")
)

# generic methods for "DpeakFit" class

setGeneric( "export",
function( object, ... )
standardGeneric("export")
)
17 changes: 17 additions & 0 deletions R/base_bg_SET.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

# calculate background by excluding combined binding sites

.bg_SET <- function( L, mu, R, n_group ) {
denom <- R + L - 1 - L

if ( n_group > 1 ) {
for ( g in 2:n_group ) {
denom <- denom - as.numeric( mu[g] - mu[(g-1)] <= L ) * abs( mu[g] - mu[(g-1)] ) -
as.numeric( mu[g] - mu[(g-1)] > L ) * L
}
}

denom <- pmax( denom, 1 ) # avoid non-positive values

return( denom )
}
33 changes: 33 additions & 0 deletions R/base_combineModelPET.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# combine model with same dimension

.combineModelPET <- function( fit_list, mu_list, pi_list, pi0_list,
gamma_list, BIC_vec, AIC_vec, maxComp ) {

fit_list_new <- mu_list_new <- pi_list_new <- pi0_list_new <-
gamma_list_new <- vector( "list", maxComp )
BIC_vec_new <- AIC_vec_new <- rep( NA, maxComp )
dimVec <- unlist(lapply( mu_list, length ))

for ( i in 1:maxComp ) {
model_i <- which( dimVec==i )

if ( length(model_i) > 0 ) {
# if more than one model have the same dim, choose one with smaller BIC

min_i <- model_i[ which.min( BIC_vec[model_i] ) ]

BIC_vec_new[i] <- BIC_vec[min_i]
AIC_vec_new[i] <- AIC_vec[min_i]
fit_list_new[[i]] <- fit_list[[min_i]]
mu_list_new[[i]] <- mu_list[[min_i]]
pi_list_new[[i]] <- pi_list[[min_i]]
pi0_list_new[[i]] <- pi0_list[[min_i]]
gamma_list_new[[i]] <- gamma_list[[min_i]]
}
}

return( list( fit_list=fit_list_new, mu_list=mu_list_new,
pi_list=pi_list_new, pi0_list=pi0_list_new,
gamma_list=gamma_list_new,
BIC_vec=BIC_vec_new, AIC_vec=AIC_vec_new ) )
}
34 changes: 34 additions & 0 deletions R/base_combineModelSET.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# combine model with same dimension

.combineModelSET <- function( fit_list, mu_list, pi_list, pi0_list,
delta_list, sigma_list, BIC_vec, AIC_vec, maxComp ) {

fit_list_new <- mu_list_new <- pi_list_new <- pi0_list_new <-
delta_list_new <- sigma_list_new <- vector( "list", maxComp )
BIC_vec_new <- AIC_vec_new <- rep( NA, maxComp )
dimVec <- unlist(lapply( mu_list, length ))

for ( i in 1:maxComp ) {
model_i <- which( dimVec==i )

if ( length(model_i) > 0 ) {
# if more than one model have the same dim, choose one with smaller BIC

min_i <- model_i[ which.min( BIC_vec[model_i] ) ]

BIC_vec_new[i] <- BIC_vec[min_i]
AIC_vec_new[i] <- AIC_vec[min_i]
fit_list_new[[i]] <- fit_list[[min_i]]
mu_list_new[[i]] <- mu_list[[min_i]]
pi_list_new[[i]] <- pi_list[[min_i]]
pi0_list_new[[i]] <- pi0_list[[min_i]]
delta_list_new[[i]] <- delta_list[[min_i]]
sigma_list_new[[i]] <- sigma_list[[min_i]]
}
}

return( list( fit_list=fit_list_new, mu_list=mu_list_new,
pi_list=pi_list_new, pi0_list=pi0_list_new,
delta_list=delta_list_new, sigma_list=sigma_list_new,
BIC_vec=BIC_vec_new, AIC_vec=AIC_vec_new ) )
}
79 changes: 79 additions & 0 deletions R/base_constructExtRead.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

# read alignment files and construct bin-level files

.constructExtRead <- function( infile=NULL, outfile=NULL, summaryfile=NULL,
fileFormat="eland_result", PET=FALSE, fragLen=200, perl = "perl" )
{
# preprocessing perl script embedded in "dpeak/inst/Perl/"

if ( PET ) {
script <- "extract_coord_PET.pl"
allFormat <- "eland_result"
allFormatName <- "Eland result"
} else {
script <- "extract_coord_SET.pl"
allFormat <- c( "eland_result", "eland_extended", "eland_export", "bowtie", "sam", "bed" )
allFormatName <- c( "Eland result", "Eland extended", "Eland export", "Bowtie default", "SAM", "BED" )
}

# Check whether perl exists

CMD <- paste( perl, "-v" )
res <- system( CMD, intern = TRUE, ignore.stderr = TRUE )

if ( length(res) == 0 ) {
# cannot proceed if perl does not exist

stop( "Perl is not found on your system! Either check $PATH if installed or please install Perl." )
} else {
# process read files into bin-level files if perl exists

# check whether minimal options are missing

if ( length(infile) != 1 || is.null(infile) )
{
stop( "Please specify the aligned read file!" )
}

if ( length(fileFormat) != 1 || is.null(fileFormat) )
{
stop( "Please specify aligned read file format! Read '?constructExtRead' for supported file formats" )
}

# check file format specification

if ( length(which(!is.na(match( fileFormat, allFormat )))) == 0 )
{
stop( "Unsupported aligned read file format! Read '?constructExtRead' for supported file formats" )
}

fileFormatName <- allFormatName[ match( fileFormat, allFormat ) ]


# get path to the perl code (unified script for all file formats)

Fn.Path <- system.file( file.path("Perl",script), package="dpeak" )


# process read file to bin-level files using perl codes

if ( PET ) {
CMD <- paste( perl,
" ", Fn.Path,
" ", infile,
" ", outfile,
" ", summaryfile,
" ", fileFormat, sep="" )
} else {
CMD <- paste( perl,
" ", Fn.Path,
" ", infile,
" ", outfile,
" ", summaryfile,
" ", fileFormat,
" ", fragLen, sep="" )
}

res <- system( CMD, intern = TRUE )
}
}
52 changes: 52 additions & 0 deletions R/base_cpp_call.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@

# Calculate score function to update mu (Rcpp)

.ff_score <- function( grid, S, E, L, Zg, R, gamma ) {
stopifnot( length(grid) >= 1L, length(S) >= 1L, length(E) >= 1L,
length(L) >= 1L, length(Zg) >= 1L, R > 0, gamma > 0,
length(S) == length(E), length(E) == length(L), length(L) == length(Zg) )
out <- .Call( "cpp_score", grid, S, E, L, Zg, R, gamma, PACKAGE="dpeak" )
return(out)
}

# Stack fragments (Rcpp)

.ff_stack <- function( S, E, minx, maxx ) {
stopifnot( length(S) >= 1L, length(E) >= 1L, length(S) == length(E) )
out <- .Call( "cpp_stack", S, E, minx, maxx, PACKAGE="dpeak" )
return(out)
}

# random sampling

.ff_samp <- function(X) {
stopifnot( is.matrix(X), nrow(X) >= 1L, ncol(X) >= 1L )
out <- .Call( "cpp_samp", X, PACKAGE="dpeak" )
return(out)
}

# normalize

.ff_normalize <- function(X) {
stopifnot( is.matrix(X), nrow(X) >= 1L, ncol(X) >= 1L )
out <- .Call( "cpp_normalize", X, PACKAGE="dpeak" )
return(out)
}

# match with table of fragment length

.ff_dlength <- function( L, Lname, Lfreq ) {
stopifnot( length(L) >= 1L, length(Lname) >= 1L, length(Lfreq) >= 1L,
length(Lname) == length(Lfreq) )
out <- .Call( "cpp_dlength", L, Lname, Lfreq, PACKAGE="dpeak" )
return(out)
}

# Is Z0 larger than each Z?

.ff_ismaxZ0 <- function( Z0, Z ) {
stopifnot( is.matrix(Z), nrow(Z) >= 1L, ncol(Z) >= 1L, length(Z0) >= 1L,
nrow(Z) == length(Z0) )
out <- .Call( "cpp_ismaxZ0", Z0, Z, PACKAGE="dpeak" )
return(out)
}
Loading

0 comments on commit 97b9693

Please sign in to comment.