Skip to content

Analysis of mouse_skin dataset from SHARE seq technology by MOFA and scMVAE PoE model

cmzuo11 edited this page Apr 21, 2021 · 4 revisions

Preprocessing steps (R script)

The preprocessing step is the same as mouse_skin dataset from SHARE-seq technology by DCCA model

Run MOFA+ model (R script)

1. Install MOFA+ based on instructions

2. Run MOFA+ model with the following code:

Load libraries

library('MOFA2')
library('Seurat')
library('aricode')
source(./DCCA/Processing_data.R)

Preprocessing

# convert peak count data into gene-activity data 
gtf_file               = paste0("/sibcb1/chenluonanlab6/zuochunman/Reference/","hg38_annotation.gtf")
activity.matrix        = CreateGeneActivityMatrix(peak.matrix = peaks, annotation.file = gtf_file, 
                                                  seq.levels = paste0("chr",c(1:19, "X", "Y")), 
                                                  upstream = 2000, verbose = TRUE)

doubleExp              = list("RNA"  = scRNA_data[match(HVGs, row.names(scRNA_data)),], "ATAC" = activity.matrix)

Parameters setting

mofa                   = create_mofa(doubleExp)
model_opts             = get_default_model_options(mofa)
model_opts$num_factors = 20
train_opts             = get_default_training_options(mofa)
mofa                   = prepare_mofa( object           = mofa,
                                       model_options    = model_opts,
                                       training_options = train_opts )

Run mofa

mofa                   = run_mofa(mofa) 

# get latent code for further visualization
latent_data            = get_factors(mofa, factors = "all", as.data.frame = FALSE) 

# predict cell clusters
clusters               = cluster_samples(mofa, k = 23 ) 

# calculate ARI and NMI
ARI(Seurat_obj[["Cell_type"]], clusters$cluster) 
NMI(Seurat_obj[["Cell_type"]], clusters$cluster)

# impute two-omics data
mofa                   = impute(mofa,views = "all", groups = "all", factors = "all", add_intercept = TRUE)
RNA_imputation         = mofa@imputed_data$RNA$group1$mean
ATAC_imputation        = mofa@imputed_data$ATAC$group1$mean

# save model to reproduce results
save(mofa, file = "Mouse_skin_mofa2.RData" ) 

Run scMVAE-PoE model (Python script)

1. Install scMVAE based on instructions

2. Run scMVAE-PoE model with the following code:

# creat neural network for scMVAE-PoE
model  = scMVAE_POE ( encoder_1       = [Nfeature, 1000, 128], hidden_1  = 128, 
		      Z_DIMS          = 20,               decoder_share  = [20, 128],
		      share_hidden    = 128,              decoder_1      = [20, 128], 
		      hidden_2        = 128,             encoder_l       = [ Nfeature, 128 ],
		      hidden3         = 128,             encoder_2       = [Nfeature1, 2000, 1000, 128], 
		      hidden_4        = 128,             encoder_l1      = [Nfeature1, 128], 
		      hidden3_1       = 128,             decoder_2       = [20, 128, 1000, 2000],
		      hidden_5        = 2000,             drop_rate      = 0.1, 
		      log_variational = True,            Type            = "ZINB", 
		      device          = device,          n_centroids     = 4, 
		      penality        = "GMM",           model           = 0  )

# model training
train( args, adata, infer_data, model, train_index, test_index, lib_mean, lib_var, 
       lib_mean1, lib_var1, adata.obs['Group'], 0.0001, 1, "ZINB", "ZINB", device, 
       scale_factor = 5 )

# save model
save_checkpoint(model, "PoE_cell.pth.tar")

# load model to reproduce the result
load_checkpoint( './PoE_cell.pth.tar', model, device)

# get the training results
latent_z, recon_x1, norm_x1, recon_x_2, norm_x2 = model.Denoise_batch(total_loader) # latent_z for the joint-learning features

# latent_z for clustering and visualization