-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_svm.R
63 lines (55 loc) · 2.45 KB
/
run_svm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
run_svm <- function(features.split, filestem="") {
matrices = list()
for (set_no in 1:length(features.split)) {
print(paste0("Starting run_svm round ", set_no, " at ", date()))
# Get all except one portion as a training group
features <- rbindlist(features.split[-set_no], use.names=TRUE)
is_poetry <- rbindlist(features.split[-set_no], use.names=TRUE)$is_poetry
features$is_poetry <- is_poetry
if ("author" %in% names(features)) {
poetry_authors <- features$author[which(is_poetry=="POETRY")]
#author <- features$author
features$author <- is_known_author(features$author,
poetry_authors=poetry_authors,
ignore_NA=TRUE)
} else if ("varia_author" %in% names(features)) {
poetry_authors <- features$varia_author[which(is_poetry=="POETRY")]
#author <- features$author
features$varia_author <- is_known_author(features$varia_author,
poetry_authors=poetry_authors,
ignore_NA=TRUE)
}
x <- subset(features, select=-is_poetry)
y <- is_poetry
svm_model <- svm(is_poetry ~ ., data=x)
# Get the last portion as the test group
features2 <- rbindlist(features.split[set_no])
is_poetry2 <- features2$is_poetry
# On the fly! (Part 2)
if ("author" %in% names(features)) {
features2$author <- is_known_author(features2$author,
poetry_authors=poetry_authors,
ignore_NA=TRUE)
} else if ("varia_author" %in% names(features)) {
features2$varia_author <- is_known_author(features2$varia_author,
poetry_authors=poetry_authors,
ignore_NA=TRUE)
}
x2 <- subset(features2, select=-is_poetry)
y2 <- is_poetry2
pred2 <- predict(svm_model,x2)
cm <- confusionMatrix(data=pred2, reference=is_poetry2, positive="TRUE")
cm_df <- convert_cm_to_df(cm)
matrices[[set_no]] <- cm_df
gc()
}
aggregated_results <- aggregate_cm_dynamically(matrices)
sink(file = paste0(outputpath, "/", filestem ,"confusionMatrix_combined.txt"),
append=FALSE)
width <- getOption("width")
options("width"=1000)
print(aggregated_results)
options("width"=width)
sink()
return(aggregated_results)
}