quadrama · nilsreiter · Nov 2, 2017 · Oct 19, 2017 · Oct 20, 2017 · Oct 20, 2017
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,6 +1,6 @@
 ## Git branching scheme
 
-We follow the scheme described (here)[http://nvie.com/posts/a-successful-git-branching-model/], which can be summarised like this:
+We follow the scheme described [here](http://nvie.com/posts/a-successful-git-branching-model/), which can be summarised like this:
 
 - `master`: only stable, released code
 - `develop`: latest updates, new features integrated here

diff --git a/R/frequencytable.R b/R/frequencytable.R
@@ -1,8 +1,9 @@
-#' This function generates a matrix of word frequencies by figure or drama.
+#' This function generates a matrix of word frequencies by drama, act or scene and/or by figure.
 #' @param t The text table, potentially covering multiple texts
 #' @param acceptedPOS A list of accepted pos tags
 #' @param names Whether to use figure names or ids
 #' @param byFigure Wether the count is by figure or by text
+#' @param by Whether the count is by drama (default), act or scene
 #' @param column The column name we should use (should be either Token.surface or Token.lemma)
 #' @param sep The separation character that goes between drama name and figure (if applicable)
 #' @param normalize Whether to normalize values or not
@@ -20,23 +21,31 @@
 #' stylo(gui=F, frequencies = stylo_table)
 #' }
 #' @export
-frequencytable <- function(t, acceptedPOS = postags$de$words, names=FALSE, column="Token.surface", byFigure=FALSE, sep="|", normalize=TRUE, sortResult=FALSE) {
+frequencytable <- function(t, acceptedPOS = postags$de$words, names=FALSE, column="Token.surface", byFigure=FALSE, sep="|", normalize=FALSE, sortResult=FALSE, by="drama") {
   ft <- t
   if (length(acceptedPOS) > 0)
     ft <- t[t$Token.pos %in% acceptedPOS,]
 
+  switch(by,
+         drama = { 
+           if (byFigure == FALSE) { xt <- stats::xtabs(~drama + ft[,get(column)], data=ft) }
+           else if (names == TRUE) { xt <- stats::xtabs(~ paste(drama,Speaker.figure_surface,sep=sep) + ~ft[,get(column)], data=ft) }
+           else { xt <- stats::xtabs(~ paste(drama,Speaker.figure_id,sep=sep) + ~ft[,get(column)], data=ft) }
+         },
+         act = {
+           if (byFigure == FALSE) { xt <- stats::xtabs(~ paste(drama,Number.Act,sep=sep) + ~ft[,get(column)], data=ft) }
+           else if (names == TRUE) { xt <- stats::xtabs(~ paste(drama,Number.Act,Speaker.figure_surface,sep=sep) + ~ft[,get(column)], data=ft) }
+           else { xt <- stats::xtabs(~ paste(drama,Number.Act,Speaker.figure_id,sep=sep) + ~ft[,get(column)], data=ft) }
+         },
+         scene = {
+           if (byFigure == FALSE) { xt <- stats::xtabs(~ paste(drama,Number.Act,Number.Scene,sep=sep) + ~ft[,get(column)], data=ft) }
+           else if (names == TRUE) { xt <- stats::xtabs(~ paste(drama,Number.Act,Number.Scene,Speaker.figure_surface,sep=sep) + ~ft[,get(column)], data=ft) }
+           else { xt <- stats::xtabs(~ paste(drama,Number.Act,Number.Scene,Speaker.figure_id,sep=sep) + ~ft[,get(column)], data=ft) }
+         },
+         stop("Please enter valid string-value for argument 'by' (default = 'drama', 'act' or 'scene').")
+  )
 
-  if (byFigure == FALSE) {
-    xt <- stats::xtabs(~drama + ft[,get(column)], data=ft)
-    r <- as.matrix(stats::ftable(xt, row.vars = c(), col.vars = c()))
-  } else if (names == TRUE) {
-    xt <- stats::xtabs(~ paste(drama,Speaker.figure_surface,sep=sep) + ~ft[,get(column)], data=ft)
-    r <- as.matrix(stats::ftable(xt, row.vars = c(), col.vars = c()))
-  } else {
-    xt <- stats::xtabs(~paste(drama,Speaker.figure_id,sep=sep)+ft[,get(column)], data=ft)
-    r <- as.matrix(stats::ftable(xt, row.vars = c(), col.vars = c()))
-  }
-
+  r <- as.matrix(stats::ftable(xt, row.vars = c(), col.vars = c()))
 
   if (normalize == TRUE) {
     r <- t(apply(r,1,function(x) { x / sum(x)}))
@@ -55,18 +64,35 @@ frequencytable <- function(t, acceptedPOS = postags$de$words, names=FALSE, colum
 #' @param acceptedPOS A list of accepted pos tags
 #' @param names Whether to use figure names or ids
 #' @param byFigure Wether the count is by figure or by text
+#' @param by Whether the count is by drama (default), act or scene
 #' @param cols The column names we should use (should be either Token.surface or Token.lemma)
 #' @keywords internal
-frequencytable2 <- function(t, acceptedPOS = postags$de$words, names=FALSE, cols=c("Token.surface", "Token.surface"), byFigure=FALSE) {
+frequencytable2 <- function(t, acceptedPOS = postags$de$words, names=FALSE, cols=c("Token.surface", "Token.surface"), byFigure=FALSE, by="drama") {
   ft <- t
-  if (length(acceptedPOS) > 0)
+  if (length(acceptedPOS) > 0) {
     ft <- t[t$Token.pos %in% acceptedPOS,]
-  if (byFigure == FALSE)
-    index <- paste(ft$drama)
-  else if (names == TRUE)
-    index <- paste(ft$drama, ft$Speaker.figure_surface)
-  else
-    index <- paste(ft$drama, ft$Speaker.figure_id)
+  }
+
+  switch(by,
+         drama = { 
+           if (byFigure == FALSE) { index <- paste(ft$drama) }
+           else if (names == TRUE) { index <- paste(ft$drama, ft$Speaker.figure_surface) }
+           else { index <- paste(ft$drama, ft$Speaker.figure_id) }
+         },
+         act = {
+           if (byFigure == FALSE) { index <- paste(ft$drama, ft$Number.Act) }
+           else if (names == TRUE) { index <- paste(ft$drama, ft$Number.Act, ft$Speaker.figure_surface) }
+           else { index <- paste(ft$drama, ft$Number.Act, ft$Speaker.figure_id) }
+         },
+         scene = {
+           if (byFigure == FALSE) { index <- paste(ft$drama, ft$Number.Act, ft$Number.Scene) }
+           else if (names == TRUE) { index <- paste(ft$drama, ft$Number.Act, ft$Number.Scene, ft$Speaker.figure_surface) }
+           else { index <- paste(ft$drama, ft$Number.Act, ft$Number.Scene, ft$Speaker.figure_id) }
+         },
+         stop("Please enter valid string-value for argument 'by' (default = 'drama', 'act' or 'scene').")
+  )
+
   r <- do.call(rbind, tapply(paste(ft[[cols[1]]], ft[[cols[2]]][-1]), index, function(x){prop.table(table(x))}))
   r[,order(colSums(r),decreasing=TRUE)]
-}
+
+}
diff --git a/R/text.R b/R/text.R
@@ -91,7 +91,7 @@ tfidf1 <- function(word) {
 #' @export
 #' @examples
 #' data(rksp.0)
-#' rksp.0.ftable <- frequencytable(rksp.0$mtext,byFigure=TRUE)
+#' rksp.0.ftable <- frequencytable(rksp.0$mtext,byFigure=TRUE,normalize=TRUE)
 #' rksp.0.tfidf <- tfidf(rksp.0.ftable)
 #' @examples
 #' mat <- matrix(c(0.10,0.2, 0,

diff --git a/man/frequencytable.Rd b/man/frequencytable.Rd
diff --git a/man/frequencytable2.Rd b/man/frequencytable2.Rd
diff --git a/man/tfidf.Rd b/man/tfidf.Rd