-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreduceGO.R
85 lines (63 loc) · 2.88 KB
/
reduceGO.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
### Lada's function
#source("https://bioconductor.org/biocLite.R")
#biocLite("GO.db")
library(GO.db)
obj.exist <- function(object){
exists(as.character(substitute(object)))
}
reduce_GO = function(godf,ngene_lim = 500){
### godf = dataframe with at least these column names "Geneset.ID" and "N.genes" - no particular order necessary, other columns ok so long as at least have the required ones
### ngene_lim is to pre-filter gene sets so that only smaller, more informative gene sets are used
### VERY IMPORTANT that the input godf is already ordered by some value, like FDR, rank, etc
### I recommend doing reduce_GO twice if results from your gene set enrichment test is 2-sided (e.g. enriched/depleted, up/down)
godf = subset(godf, N.genes <=ngene_lim)
if (!obj.exist(goall.off) & !obj.exist(goall.par) & !obj.exist(goall.anc)){
# find related GO terms, look at offspring and at parents
library(GO.db)
gobp.off <- as.list(GOBPOFFSPRING)
# Remove GO IDs that do not have any offspring
gobp.off <- gobp.off[!is.na(gobp.off)]
gobp.par <- as.list(GOBPPARENTS)
# Remove GO IDs that do not have any parents
gobp.par <- gobp.par[!is.na(gobp.par)]
gobp.anc <- as.list(GOBPANCESTOR)
# Remove GO IDs that do not have any ancestors
gobp.anc <- gobp.anc[!is.na(gobp.anc)]
gocc.off <- as.list(GOCCOFFSPRING)
# Remove GO IDs that do not have any offspring
gocc.off <- gocc.off[!is.na(gocc.off)]
gocc.par <- as.list(GOCCPARENTS)
# Remove GO IDs that do not have any parents
gocc.par <- gocc.par[!is.na(gocc.par)]
gocc.anc <- as.list(GOCCANCESTOR)
# Remove GO IDs that do not have any ancestors
gocc.anc <- gocc.anc[!is.na(gocc.anc)]
gomf.off <- as.list(GOMFOFFSPRING)
# Remove GO IDs that do not have any offspring
gomf.off <- gomf.off[!is.na(gomf.off)]
gomf.par <- as.list(GOMFPARENTS)
# Remove GO IDs that do not have any parents
gomf.par <- gomf.par[!is.na(gomf.par)]
gomf.anc <- as.list(GOMFANCESTOR)
# Remove GO IDs that do not have any ancestors
gomf.anc <- gomf.anc[!is.na(gomf.anc)]
goall.par = c(gobp.par,gocc.par,gomf.par)
goall.off = c(gobp.off,gocc.off,gomf.off)
goall.anc = c(gobp.anc,gocc.anc,gomf.anc)
}
if( ! "Description" %in% names(godf)){
godf$Description = Term(godf$Geneset.ID)
}
golist = godf$Geneset.ID
godf = data.frame(orig.row.num = 1:nrow(godf),godf)
ranked.list = godf[1,]
for (i in 2:length(golist)){
gterm = golist[i]
related = c(unlist(goall.off[which(names(goall.off)==gterm)]),unlist(goall.par[which(names(goall.par)==gterm)]),unlist(goall.anc[which(names(goall.anc)==gterm)]))
uniq = ifelse(sum(golist[1:(i-1)] %in% related)>0,0,1)
if(uniq==1){
ranked.list = rbind(ranked.list,godf[i,])
}
}
return(unique(ranked.list[complete.cases(ranked.list),]))
}