generated from mrvollger/SmkTemplate
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse_to_10x_conversion.R
69 lines (53 loc) · 2.2 KB
/
parse_to_10x_conversion.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# code adapted from
# https://support.parsebiosciences.com/hc/en-us/articles/360053078092-Seurat-Tutorial-65k-PBMCs
# https://rdrr.io/github/MarioniLab/DropletUtils/src/R/write10xCounts.R
# libraries
library(Seurat)
library(dplyr)
library(Matrix)
# config
data_path <- "/path/to/PARSE/data/DGE_filtered"
result_path <- "/path/to/result/directory"
dir.create(result_path, recursive = TRUE)
#### LOAD DATA
# load PARSE count data
data <- ReadMtx(
mtx = file.path(data_path, "count_matrix.mtx"),
cells = file.path(data_path, "cell_metadata.csv"),
features = file.path(data_path, "all_genes.csv"),
cell.column = 1,
feature.column = 2,
cell.sep = ",",
feature.sep = ",",
skip.cell = 1,
skip.feature = 1,
mtx.transpose = TRUE,
unique.features = TRUE,
strip.suffix = FALSE
)
# load cell metadata from PARSE
cell_meta <- read.csv(file.path(data_path, "cell_metadata.csv"), row.names = 1)
#### TRANSFORM DATA
# check to see if empty gene names are present, add name if so.
table(rownames(data) == "")
rownames(mat)[rownames(data) == ""] <- "unknown"
# transform metadata into desired format (e.g., split or add columns)
# <ADD YOUR CODE HERE>
# create pre-filtered Seurat object to reduce size
data_object <- CreateSeuratObject(data, min.genes = 100, min.cells = 100, names.field = 0, meta.data = cell_meta, project="project_name")
# check the created Seurat object
print(data_object)
#### SAVE DATA
# save Seurat object as RData object
saveRDS(data_object, file=file.path(result_path, "seurat_object.rds"))
# save metadata
write.csv([email protected], file.path(result_path, "metadata.csv"))
# save RNA counts
counts_RNA <- data_object@assays$RNA@counts
writeMM(counts_RNA, file=file.path(result_path, "matrix.mtx"))
# save barcodes (i.e., cells)
write(colnames(counts_RNA), file=file.path(result_path, "barcodes.tsv"))
# save features (i.e., genes)
gene.info <- data.frame(rownames(counts_RNA), rownames(counts_RNA), stringsAsFactors=FALSE)
gene.info$gene.type <- rep("Gene Expression", length.out=nrow(gene.info))
write.table(gene.info, file=file.path(result_path, "features.tsv"), row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t")