-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathglmnet_map.R
91 lines (71 loc) · 3.44 KB
/
glmnet_map.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
################################################################################
# Perform association mapping in an interval, select SNPs above a certain LOD
# and feed them into GLMnet to find groups of SNPs that may explain the QTL.
#
# Daniel Gatti
# Aug. 4, 2017
################################################################################
library(qtl2)
library(glmnet)
source("assoc_mapping.R")
# Arguments:
# pheno: Numeric matrix containing the phenotype to map. Samples in rows and
# 1 column. Sample IDs must be in rownames.
# genoprobs: qtl2-style genoprobs object for all chromosomes.
# covar: Numeric matrix (as from model.matrix) containing the additive
# covariates to use in the model.
# map: List of markers positions for each chromosome.
# assoc: A scan1 object from assoc_mapping.
# snpinfo: A snpinfo object from assoc_mapping.
# lod.thr: Number indicating a LOD threshold to use when selecting SNPs.
# We will only perform glmnet on SNPs with a LOD >= lod.thr.
# alpha: mixing proportion for LASSO vs. ridge regression Default = 0.5.
# See https://cran.r-project.org/web/packages/glmnet/glmnet.pdf.
# Returns: a list with 2 elements:
# [[1]]: numeric matrix containing the shrunken coefficients;
# [[2]]: snpinfo object that corresponds to the coeffients and founder SDPs.
# The goal is to be able to use plot_snpasso() with the returned objects.
# NOTE: we are using alpha = 0.5, which is between LASSO and Ridge.
assoc_glmnet = function(pheno, genoprobs, covar, map, assoc, snpinfo, lod.thr = 1, alpha = 0.5) {
# Expand the SNPs and LOD scores.
snpinfo.map = qtl2scan:::snpinfo_to_map(snpinfo)
expand.lod = qtl2plot:::expand_snp_results(assoc, snpinfo.map, snpinfo)
snpinfo = data.frame(snpinfo, lod = expand.lod$lod, stringsAsFactors = FALSE)
colnames(snpinfo)[ncol(snpinfo)] = "lod"
# Select the SNPs with LOD scores above lod.thr.
print(paste(nrow(snpinfo), "SNPs input."))
snpinfo = snpinfo[snpinfo$lod >= lod.thr,]
print(paste(nrow(snpinfo), "SNPs with LOD > ", lod.thr, "."))
# Get the SNP probs.
new.snpinfo = index_snps(map = map, snpinfo = snpinfo)
snpprobs = genoprob_to_snpprob(genoprobs, new.snpinfo)
# Create an x matrix with the covariates and SNP probs.
x = cbind(add_covar, snpprobs[[1]][,"A",])
# Run glmnet and cross valdiation to select an optimal lambda.
mod = glmnet(x = x, y = pheno[,1], alpha = alpha)
mod.cv = cv.glmnet(x = x, y = pheno[,1], alpha = alpha)
# Get the shrunken coefficients.
coefs = as.matrix(coef(mod, s = mod.cv$lambda.min))
# Strip off the additive coefficients.
coefs = coefs[-(1:(ncol(covar)+1)),, drop = FALSE]
return(list(coefs, new.snpinfo))
} # assoc_glmnet()
# Function to get
get_nonzero_snps = function(glm_output) {
coefs = glm_output[[1]]
snpinfo = glm_output[[2]]
# Expand the SNPs and LOD scores.
snpinfo.map = qtl2scan:::snpinfo_to_map(snpinfo)
expand.snps = qtl2plot:::expand_snp_results(coefs, snpinfo.map, snpinfo)
snpinfo = data.frame(snpinfo, expand.snps$lod, stringsAsFactors = FALSE)
colnames(snpinfo)[ncol(snpinfo)] = "coef"
# Select the SNPs with LOD scores above lod.thr.
print(paste(nrow(snpinfo), "SNPs input."))
snpinfo = snpinfo[(snpinfo$coef) > 0,]
print(paste(nrow(snpinfo), "SNPs with coefficients != 0."))
sdps = invert_sdp(snpinfo$sdp, 8)
colnames(sdps) = LETTERS[1:8]
snpinfo = cbind(snpinfo, sdps)
return(snpinfo)
} # get_nonzero_snps()