-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathML_knn.R
58 lines (38 loc) · 1.76 KB
/
ML_knn.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
## Here, we examine and practice use of the kNN machine learning algorithm.
## load the data and the working directory (optional)
#assumes you run the clean_census.csv and generate these two datasets.
data12_knn <- data_2012[,4:17]
data16_knn <- data_2016[,4:17]
## We must ALWAYS normalize data before using the kNN algorithm. Why?
## Here, we write a function to normalize any vector of variables, x.
normalize <- function(x) {
return((x-min(x))/(max(x)-min(x)))
}
## Now, we divide the data into testin and training data (just the attributes)
prc_train <- as.data.frame(lapply(data12_knn[1:2489,1:13], normalize))
prc_test <- as.data.frame(lapply(data12_knn[2490:3112,1:13], normalize))
## We make separate vectors for the classes for training and testing that correspond to the
## matrices above:
prc_train_labels <- data12_knn[1:2334,14]
prc_test_labels <- data12_knn[2335:3112,14]
## "class" is the package that allows us to perform kNN analysis
library(class)
## Here we perform kNN analysis. k= 15
prc_test_pred <- knn(train=prc_train,test=prc_test,cl=prc_train_labels,k=15)
## Evaluate
library(gmodels)
CrossTable(x=prc_test_labels, y=prc_test_pred,prop.chisq = FALSE)
#2012 KNN Accuracy 0.8997429306
####2016 Election
prc_train <- as.data.frame(lapply(data16_knn[1:2334,1:13], normalize))
prc_test <- as.data.frame(lapply(data16_knn[2335:3112,1:13], normalize))
prc_train_labels <- data16_knn[1:2334,14]
prc_test_labels <- data16_knn[2335:3112,14]
## "class" is the package that allows us to perform kNN analysis
library(class)
## Here we perform kNN analysis. k= 15
prc_test_pred <- knn(train=prc_train,test=prc_test,cl=prc_train_labels,k=15)
## Evaluate
library(gmodels)
CrossTable(x=prc_test_labels, y=prc_test_pred,prop.chisq = FALSE)
#2016 accuracy 93.7%700