-
Notifications
You must be signed in to change notification settings - Fork 2
Analysis of mouse_skin dataset from SHARE seq technology by MOFA and scMVAE PoE model
cmzuo11 edited this page Apr 21, 2021
·
4 revisions
The preprocessing step is the same as mouse_skin dataset from SHARE-seq technology by DCCA model
1. Install MOFA+ based on instructions
library('MOFA2')
library('Seurat')
library('aricode')
source(./DCCA/Processing_data.R)
# convert peak count data into gene-activity data
gtf_file = paste0("/sibcb1/chenluonanlab6/zuochunman/Reference/","hg38_annotation.gtf")
activity.matrix = CreateGeneActivityMatrix(peak.matrix = peaks, annotation.file = gtf_file,
seq.levels = paste0("chr",c(1:19, "X", "Y")),
upstream = 2000, verbose = TRUE)
doubleExp = list("RNA" = scRNA_data[match(HVGs, row.names(scRNA_data)),], "ATAC" = activity.matrix)
mofa = create_mofa(doubleExp)
model_opts = get_default_model_options(mofa)
model_opts$num_factors = 20
train_opts = get_default_training_options(mofa)
mofa = prepare_mofa( object = mofa,
model_options = model_opts,
training_options = train_opts )
mofa = run_mofa(mofa)
# get latent code for further visualization
latent_data = get_factors(mofa, factors = "all", as.data.frame = FALSE)
# predict cell clusters
clusters = cluster_samples(mofa, k = 23 )
# calculate ARI and NMI
ARI(Seurat_obj[["Cell_type"]], clusters$cluster)
NMI(Seurat_obj[["Cell_type"]], clusters$cluster)
# impute two-omics data
mofa = impute(mofa,views = "all", groups = "all", factors = "all", add_intercept = TRUE)
RNA_imputation = mofa@imputed_data$RNA$group1$mean
ATAC_imputation = mofa@imputed_data$ATAC$group1$mean
# save model to reproduce results
save(mofa, file = "Mouse_skin_mofa2.RData" )
1. Install scMVAE based on instructions
# creat neural network for scMVAE-PoE
model = scMVAE_POE ( encoder_1 = [Nfeature, 1000, 128], hidden_1 = 128,
Z_DIMS = 20, decoder_share = [20, 128],
share_hidden = 128, decoder_1 = [20, 128],
hidden_2 = 128, encoder_l = [ Nfeature, 128 ],
hidden3 = 128, encoder_2 = [Nfeature1, 2000, 1000, 128],
hidden_4 = 128, encoder_l1 = [Nfeature1, 128],
hidden3_1 = 128, decoder_2 = [20, 128, 1000, 2000],
hidden_5 = 2000, drop_rate = 0.1,
log_variational = True, Type = "ZINB",
device = device, n_centroids = 4,
penality = "GMM", model = 0 )
# model training
train( args, adata, infer_data, model, train_index, test_index, lib_mean, lib_var,
lib_mean1, lib_var1, adata.obs['Group'], 0.0001, 1, "ZINB", "ZINB", device,
scale_factor = 5 )
# save model
save_checkpoint(model, "PoE_cell.pth.tar")
# load model to reproduce the result
load_checkpoint( './PoE_cell.pth.tar', model, device)
# get the training results
latent_z, recon_x1, norm_x1, recon_x_2, norm_x2 = model.Denoise_batch(total_loader) # latent_z for the joint-learning features
# latent_z for clustering and visualization