-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandomForest.R
68 lines (45 loc) · 2.22 KB
/
randomForest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#this is an attempt to use random forest to answer my research question.
#I am still not sure that this indeed does what I am asking, becuase this predicts the dependent variable
#but doesn't actually look at the specific relationship between the two varibales I am interested in..
#to do that we need to do anova between the two variables?
library(randomForest)
library(car)
local <- getwd()
#load cleaned data from CleanDataProject script
load(file = paste0(local, "/Bigdata/Dropbox (Technion Dropbox)/Rina_Benel/Home/MachineLearningMedicine/results/cleanData.RData"))
table(noNeonateData$InterpLos)
######################
#TRAIN AND TEST DATA
######################
set.seed(1234) #set seed so we always get the same sample train/test
train <- sample(nrow(noNeonateData), 0.7*nrow(noNeonateData)) #get 70%
train.df <- noNeonateData[train, ] #divide the data
test.df <- noNeonateData[-train, ] #everything we didnt take in the training place into test
table(train.df$InterpLos)
table(test.df$InterpLos)
#################################
#RANDOM FOREST LOGISITC REGRESSION
#################################
fit.forest <- randomForest(InterpLos ~ gender + binaryLang +
first_admit_age + simpleEthnic + marital_status +
insurance + sofa + sapsii,
data = train.df, importance = TRUE)
importance(fit.forest, type = 2)
forest.pred <- predict(fit.forest, test.df)
forest.preformance <- table(test.df$InterpLos, forest.pred)
###########
#ANOVA TEST
###########
#compute a one-way anova test, using logLOS bec it is numeric
res.anova <- aov(logLOS ~ binaryLang , data = noNeonateData)
summary(res.anova)
#bec the anova result is signifcant can look at which groups differ
#Tukey HSD
TukeyHSD(res.anova)
#check that the anova asumptions are valid, check the homogeneity of variance assumption
plot(res.anova, 1)
leveneTest(logLOS ~ binaryLang, data = noNeonateData) #uh oh this came out significant which means that
#we have violated the homogeneity of varience
#Welch one-way test), that does not require there to be equal variance for all groups
oneway.test(logLOS ~ binaryLang, data = noNeonateData)
kruskal.test(logLOS ~ binaryLang, data = noNeonateData)