Skip to content

Commit

Permalink
Remove and add
Browse files Browse the repository at this point in the history
  • Loading branch information
Xiaoyu committed Nov 19, 2024
1 parent 07fc08d commit f431a29
Show file tree
Hide file tree
Showing 15 changed files with 202 additions and 129 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
license
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Binary file added FLAMINGOrLite_0.0.0.9000.tar.gz
Binary file not shown.
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2024 XinruiYu

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(flamingo_backbone)
export(flamingo_basic)
export(flamingo_main)
export(write.vtk)
Expand Down
1 change: 0 additions & 1 deletion R/assemble_structure.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,3 @@ assemble_structure <- function(flamingo_backbone_prediction_obj,flamingo_final_r
res = data.frame(frag_id = unlist(id_list),x=all_points[,1],y=all_points[,2],z=all_points[,3])
return(res)
}

9 changes: 7 additions & 2 deletions R/data_utils.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

construct_obj_from_hic <- function(hic_file,
resolution,
chr_name,
Expand All @@ -22,7 +21,6 @@ construct_obj_from_hic <- function(hic_file,

}


construct_obj_from_mcool <- function(mcool_file,
resolution,
chr_name,
Expand All @@ -33,16 +31,20 @@ construct_obj_from_mcool <- function(mcool_file,
all_dir = rhdf5::h5ls(mcool_file)
parent_dir = all_dir[1,2]
available_resolutions = Brick_list_mcool_resolutions(mcool_file)

if(!resolution %in% available_resolutions){
stop(
paste('Not an available resolution! Resolution must be one of: ',paste(available_resolutions,collapse = ', '))
)
}

target_dir = paste(c("",parent_dir,resolution),collapse='/')
mcool_dat = rhdf5::h5read(mcool_file,target_dir)
available_normalization = setdiff(names(mcool_dat$bins),c('chrom','start','end','weight'))

Default_normalization = 'weight'
skip_normalization = 0

if(!normalization %in% available_normalization){
if(Default_normalization %in% names(mcool_dat$bins)){
print('Will proceed using data in weight as normalized data')
Expand All @@ -52,16 +54,19 @@ construct_obj_from_mcool <- function(mcool_file,
skip_normalization = 1
}
}

csr_rawcount = data.frame(bin_1 = mcool_dat$pixels$bin1_id,
bin_2 = mcool_dat$pixels$bin2_id,
value = mcool_dat$pixels$count)
chr_id <- which(mcool_dat$bins$chrom == chr_name)
offset <- mcool_dat$indexes$chrom_offset[which(mcool_dat$chroms$name==chr_name)]
n <- length(chr_id)

csr_rawcount <- subset(csr_rawcount,csr_rawcount[,1] %in% chr_id & csr_rawcount[,2] %in% chr_id)
csr_rawcount[,1] <- csr_rawcount[,1]-offset
csr_rawcount[,2] <- csr_rawcount[,2]-offset
csr_rawcount <- as.matrix(csr_rawcount)

print(length(csr_rawcount[,1]))
if(!skip_normalization){
normalization_file = mcool_dat$bins[[normalization]]
Expand Down
2 changes: 1 addition & 1 deletion R/flamingo_backbone.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ flamingo_backbone <- function(temp_folder,

return(flamingo_backbone_prediction)

}
}
46 changes: 23 additions & 23 deletions R/flamingo_basic.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,34 @@ flamingo_basic <- function(input_if,
inf_dist = 4)
{
require(Matrix)

#### generate pairwise distance
input_if = as.matrix(input_if)
input_if[which(is.na(input_if))] = 0

pd = if2pd(input_if,alpha,inf_dist)
n = nrow(pd)

#### invalid idx
rm_id = which(apply(input_if,1,max)==0)


#### identify the gram matrix of the input data
M = pd2gram(pd)


#### define measurement set omega
omega = get_measurement_set(input_if)
n_omega = dim(omega)[1]


#### sub diagonal set
diag_term <- which(omega[,2]-omega[,1]==1)
omega_diag = omega[diag_term,]
omega <<- omega[unique(c(diag_term,sample(1:n_omega,sample_rate*n_omega))),]
n_omega <- dim(omega)[1]


if(length(diag_term)==1){
n_omega_diag=1
omega_diag = matrix(omega_diag,ncol=2)
Expand All @@ -62,31 +62,31 @@ flamingo_basic <- function(input_if,
}else{
n_omega_diag <- dim(omega_diag)[1]
}


#### pre-calculate related data
# prepare for A*
precal_sample = get_element_adjoint_linear(omega)
func_list_sample = precal_sample$func_list
all_element_sample = precal_sample$all_element

# prepare for B*
precal_subdiag = get_element_adjoint_linear(omega_diag)
func_list_subdiag = precal_subdiag$func_list
all_element_subdiag = precal_subdiag$all_element


#### pre-calculate the b and d
b = linear_proj(omega,M)

d = linear_proj(omega_diag,M)

# control the sub-diagonal
for(i in 1:length(d)){
d[i] = min(d[i],max_dist)
}


#### run flamingo
P <- flamingo_worker(omega,
omega_diag,
Expand All @@ -95,15 +95,15 @@ flamingo_basic <- function(input_if,
all_element_sample,
all_element_subdiag,
b,d,n,lambda,r,error_threshold,max_iter)


#### keep the valid samples
if(length(rm_id)>0){
frag_id <- (1:n)[-rm_id]
}else{
frag_id <- 1:n
}

return(new('flamingo_prediction',id = frag_id, coordinates = P,input_n = n))

}
27 changes: 12 additions & 15 deletions R/flamingo_main.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' @param r Weights for distance between consecutive points. Default = 1.
#' @param max_dist Maximum allowed distance betwee two consecutive points. Default = 0.01
#' @param alpha Convertion factor between interaction frequency and pairwise distance. Default = -0.25.
#' @param inf_dist Maximun allowed distance betwee any two points. Default = 2.
#' @param inf_dist Maximun allowed distance betwee any two points. Default = 3.
#' @param error_threshold Error thresholds for reconstruction. Default = 1e-3.
#' @param max_iter Maximum iterations. Default = 500.
#' @keywords flamingo_main
Expand All @@ -30,7 +30,7 @@ flamingo_main <- function(hic_data,
r = 1,
max_dist = 0.01,
alpha = -0.25,
inf_dist = 2,
inf_dist = 4,
error_threshold = 1e-3,
max_iter=500)
{
Expand All @@ -43,16 +43,11 @@ flamingo_main <- function(hic_data,

library(parallel)
library(Matrix)
# source('flamingo_domain.R')
# source('flamingo_backbone.R')
# source('data_utils.R')
# source('init_object.R')
# source('model_utils.R')
# source('flamingo_basic.R')
# source('assemble_structure.R')

#### create temp folder

temp_folder = paste0("./temp", b)
print(temp_folder)
dir.create(temp_folder)
dir.create(paste0(temp_folder,'/domain_data'))
# dir.create(paste0(temp_folder,'/genomic_loc'))
Expand Down Expand Up @@ -83,7 +78,8 @@ flamingo_main <- function(hic_data,
}else{
stop("file format must be .hic or .mcool")
}
print(paste('Finished time: ',round(as.numeric(difftime(Sys.time(),a,units='mins')),digits=2), ' mins'))
print(paste('Finshed time: ',round(as.numeric(difftime(Sys.time(),a,units='mins')),digits=2), ' mins'))


#### Divide domain dataset
print('Dividing domains...')
Expand All @@ -93,34 +89,35 @@ flamingo_main <- function(hic_data,
domain_res = domain_res,
frag_res = frag_res,
temp_folder = temp_folder)
print(paste('Finished time: ',round(as.numeric(difftime(Sys.time(),a,units='mins')),digits=2), ' mins'))
print(paste('Finshed time: ',round(as.numeric(difftime(Sys.time(),a,units='mins')),digits=2), ' mins'))


#### Reconstruct backbone
print('Reconstructing backbones...')
a = Sys.time()
flamingo_backbone_prediction = flamingo_backbone(temp_folder,sample_rate,lambda,r,max_dist,error_threshold,max_iter,alpha,inf_dist)
print(paste('Finished time: ',round(as.numeric(difftime(Sys.time(),a,units='mins')),digits=2), ' mins'))
print(paste('Finshed time: ',round(as.numeric(difftime(Sys.time(),a,units='mins')),digits=2), ' mins'))


#### Reconstruct domain in parallel
print('Reconstructing intra-domain structures...')
a = Sys.time()
flamingo_intra_domain_prediction = flamingo_domain(temp_folder,sample_rate,lambda,r,max_dist,error_threshold,max_iter,alpha,inf_dist,nThread)
print(paste('Finished time: ',round(as.numeric(difftime(Sys.time(),a,units='mins')),digits=2), ' mins'))
print(paste('Finshed time: ',round(as.numeric(difftime(Sys.time(),a,units='mins')),digits=2), ' mins'))


print('Assembling structures...')
a = Sys.time()
res = assemble_structure(flamingo_backbone_prediction, flamingo_high_res_obj, flamingo_intra_domain_prediction,max_iter)
print(paste('Finished time: ',round(as.numeric(difftime(Sys.time(),a,units='mins')),digits=2), ' mins'))
print(paste('Finshed time: ',round(as.numeric(difftime(Sys.time(),a,units='mins')),digits=2), ' mins'))

#### Reformat results

res$chr = chr_name
res$start = (res$frag_id-1) * frag_res
res$end = res$frag_id * frag_res
res = res[,c('chr','start','end','x','y','z')]
print(paste('Reconstruction successful! Finished time: ',round(as.numeric(difftime(Sys.time(),b,units='mins')),digits=2), ' mins'))
print(paste('Reconstruction sucessfully! Finshed time: ',round(as.numeric(difftime(Sys.time(),b,units='mins')),digits=2), ' mins'))

return(res)
}
2 changes: 1 addition & 1 deletion R/init_object.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
#' @export
# library(Matrix)
setClass("flamingo", slots=list(IF="sparseMatrix", n_frag='numeric',chr_name='character'))
setClass("flamingo_prediction", slots=list(id="numeric", coordinates='matrix',input_n='numeric'))
setClass("flamingo_prediction", slots=list(id="numeric", coordinates='matrix',input_n='numeric'))
Loading

0 comments on commit f431a29

Please sign in to comment.