apache · thirdwing · Dec 21, 2017 · Dec 11, 2017 · Dec 11, 2017 · Dec 11, 2017
@@ -67,8 +67,8 @@ mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data,
         train.execs <- lapply(seq_len(ndevice), function(i) {
           s <- slices[[i]]
           mx.symbol.bind(symbol = symbol[[names(train.data$bucketID)]], 
-                                 arg.arrays = c(s, train.execs[[i]]$arg.arrays[arg.params.names])[arg.update.idx],
-                                 aux.arrays = train.execs[[i]]$aux.arrays, ctx = ctx[[i]], grad.req = grad.req)
+                         arg.arrays = c(s, train.execs[[i]]$arg.arrays[arg.params.names])[arg.update.idx],
+                         aux.arrays = train.execs[[i]]$aux.arrays, ctx = ctx[[i]], grad.req = grad.req)
         })
       } else {
         for (i in seq_len(ndevice)) {

@@ -22,7 +22,7 @@ BucketIter <- setRefClass("BucketIter", fields = c("buckets", "bucket.names", "b
                               buckets_nb <- length(bucket.names)
                               buckets_id <- seq_len(buckets_nb)
                               buckets.size <- sapply(.self$buckets, function(x) {
-                                dim(x$data)[length(dim(x$data)) - 1]
+                                tail(dim(x$data), 1)
                               })
                               .self$batch.per.bucket <- ceiling(buckets.size/.self$batch.size)
                               .self$last.batch.pad <- .self$batch.size - buckets.size %% .self$batch.size
@@ -36,49 +36,49 @@ BucketIter <- setRefClass("BucketIter", fields = c("buckets", "bucket.names", "b
 
                               if (.self$shuffle) {
                                 set.seed(.self$seed)
-                                bucket_plan_names <- sample(rep.int(names(.self$batch.per.bucket), times = .self$batch.per.bucket))
+                                bucket_plan_names <- sample(rep(names(.self$batch.per.bucket), times = .self$batch.per.bucket))
                                 .self$bucket.plan <- ave(bucket_plan_names == bucket_plan_names, bucket_plan_names, 
                                                          FUN = cumsum)
                                 names(.self$bucket.plan) <- bucket_plan_names
-                                ### Return first BucketID at reset for initialization of the model
+                                # Return first BucketID at reset for initialization of the model
                                 .self$bucketID <- .self$bucket.plan[1]
 
                                 .self$buckets <- lapply(.self$buckets, function(x) {
-                                  shuffle_id <- sample.int(dim(x$data)[length(dim(x$data)) - 1])
+                                  shuffle_id <- sample(tail(dim(x$data), 1))
                                   if (length(dim(x$label)) == 0) {
-                                    list(data = x$data[shuffle_id, ], label = x$label[shuffle_id])
+                                    list(data = x$data[, shuffle_id], label = x$label[shuffle_id])
                                   } else {
-                                    list(data = x$data[shuffle_id, ], label = x$label[shuffle_id, ])
+                                    list(data = x$data[, shuffle_id], label = x$label[, shuffle_id])
                                   }
                                 })
                               } else {
-                                bucket_plan_names <- rep.int(names(.self$batch.per.bucket), times = .self$batch.per.bucket)
+                                bucket_plan_names <- rep(names(.self$batch.per.bucket), times = .self$batch.per.bucket)
                                 .self$bucket.plan <- ave(bucket_plan_names == bucket_plan_names, bucket_plan_names, 
                                                          FUN = cumsum)
                                 names(.self$bucket.plan) <- bucket_plan_names
                               }
                             }, iter.next = function() {
                               .self$batch <- .self$batch + 1
                               .self$bucketID <- .self$bucket.plan[batch]
-                              return(.self$batch < .self$batch.per.epoch)
+                              return(.self$batch <= .self$batch.per.epoch)
                             }, value = function() {
                               # bucketID is a named integer: the integer indicates the batch id for the given
-                              # bucket (used to fetch appropriate samples within the bucket) the name is the a
+                              # bucket (used to fetch appropriate samples within the bucket) the name is a
                               # character containing the sequence length of the bucket (used to unroll the rnn
                               # to appropriate sequence length)
                               idx <- (.self$bucketID - 1) * (.self$batch.size) + seq_len(batch.size)
 
-                              ### reuse first idx for padding
+                              # Reuse first idx for padding
                               if (bucketID == .self$batch.per.bucket[names(.self$bucketID)] & !.self$last.batch.pad[names(.self$bucketID)] == 0) {
                                 idx <- c(idx[seq_len(.self$batch.size - .self$last.batch.pad[names(.self$bucketID)])], seq_len(.self$last.batch.pad[names(.self$bucketID)]))
                               }
 
-                              data <- .self$buckets[[names(.self$bucketID)]]$data[idx, , drop = FALSE]
-                              seq.mask <- as.integer(names(bucketID)) - apply(data==.self$data.mask.element, 1, sum)
+                              data <- .self$buckets[[names(.self$bucketID)]]$data[, idx, drop = F]
+                              seq.mask <- as.integer(names(bucketID)) - apply(data==.self$data.mask.element, 2, sum)
                               if (length(dim(.self$buckets[[names(.self$bucketID)]]$label)) == 0) {
                                 label <- .self$buckets[[names(.self$bucketID)]]$label[idx]
                               } else {
-                                label <- .self$buckets[[names(.self$bucketID)]]$label[idx, , drop = FALSE]
+                                label <- .self$buckets[[names(.self$bucketID)]]$label[, idx, drop = F]
                               }
                               return(list(data = mx.nd.array(data), seq.mask = mx.nd.array(seq.mask), 
                                           label = mx.nd.array(label)))
@@ -103,4 +103,4 @@ mx.io.bucket.iter <- function(buckets, batch.size, data.mask.element = 0, shuffl
                               seed = 123) {
   return(BucketIter$new(buckets = buckets, batch.size = batch.size, data.mask.element = data.mask.element, 
                         shuffle = shuffle, seed = seed))
-}
+}
@@ -43,11 +43,7 @@ mx.opt.sgd <- function(learning.rate,
     grad <- grad * rescale.grad
     if (!is.null(clip_gradient)){
       if(clip_gradient >= 0){
-          grad_ctx <- ctx(grad)
-          grad <- as.array(grad)
-          grad <- pmax(grad, -1 * clip_gradient)
-          grad <- pmin(grad, clip_gradient)
-          grad <- mx.nd.array(grad, grad_ctx)
+        grad <- mx.nd.clip(grad, -clip_gradient, clip_gradient)
       } else {
         stop("Error: clip_gradient should be positive number.")
       }
@@ -125,11 +121,7 @@ mx.opt.rmsprop <- function(learning.rate=0.002,
     grad <- grad * rescale.grad
     if (!is.null(clip_gradient)){
       if(clip_gradient >= 0){
-          grad_ctx <- ctx(grad)
-          grad <- as.array(grad)
-          grad <- pmax(grad, -1 * clip_gradient)
-          grad <- pmin(grad, clip_gradient)
-          grad <- mx.nd.array(grad, grad_ctx)
+        grad <- mx.nd.clip(grad, -clip_gradient, clip_gradient)
       } else {
         stop("Error: clip_gradient should be positive number.")
       }
@@ -225,11 +217,7 @@ mx.opt.adam <- function(learning.rate=0.001,
     grad <- grad * rescale.grad
     if (!is.null(clip_gradient)){
       if(clip_gradient >= 0){
-          grad_ctx <- ctx(grad)
-          grad <- as.array(grad)
-          grad <- pmax(grad, -1 * clip_gradient)
-          grad <- pmin(grad, clip_gradient)
-          grad <- mx.nd.array(grad, grad_ctx)
+        grad <- mx.nd.clip(grad, -clip_gradient, clip_gradient)
       } else {
         stop("Error: clip_gradient should be positive number.")
       }
@@ -309,11 +297,7 @@ mx.opt.adagrad <- function(learning.rate=0.05,
     grad <- grad * rescale.grad
     if (!is.null(clip_gradient)){
       if(clip_gradient >= 0){
-          grad_ctx <- ctx(grad)
-          grad <- as.array(grad)
-          grad <- pmax(grad, -1 * clip_gradient)
-          grad <- pmin(grad, clip_gradient)
-          grad <- mx.nd.array(grad, grad_ctx)
+        grad <- mx.nd.clip(grad, -clip_gradient, clip_gradient)
       } else {
         stop("Error: clip_gradient should be positive number.")
       }
@@ -363,11 +347,7 @@ mx.opt.adadelta <- function(rho=0.90,
     grad <- grad * rescale.grad
     if (!is.null(clip_gradient)){
       if(clip_gradient >= 0){
-          grad_ctx <- ctx(grad)
-          grad <- as.array(grad)
-          grad <- pmax(grad, -1 * clip_gradient)
-          grad <- pmin(grad, clip_gradient)
-          grad <- mx.nd.array(grad, grad_ctx)
+        grad <- mx.nd.clip(grad, -clip_gradient, clip_gradient)
       } else {
         stop("Error: clip_gradient should be positive number.")
       }