Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

R RNN API fixes and Optimizer clip gradient on NDArray #9022

Merged
merged 5 commits into from
Dec 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions R-package/R/model.rnn.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data,
train.execs <- lapply(seq_len(ndevice), function(i) {
s <- slices[[i]]
mx.symbol.bind(symbol = symbol[[names(train.data$bucketID)]],
arg.arrays = c(s, train.execs[[i]]$arg.arrays[arg.params.names])[arg.update.idx],
aux.arrays = train.execs[[i]]$aux.arrays, ctx = ctx[[i]], grad.req = grad.req)
arg.arrays = c(s, train.execs[[i]]$arg.arrays[arg.params.names])[arg.update.idx],
aux.arrays = train.execs[[i]]$aux.arrays, ctx = ctx[[i]], grad.req = grad.req)
})
} else {
for (i in seq_len(ndevice)) {
Expand Down
28 changes: 14 additions & 14 deletions R-package/R/mx.io.bucket.iter.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ BucketIter <- setRefClass("BucketIter", fields = c("buckets", "bucket.names", "b
buckets_nb <- length(bucket.names)
buckets_id <- seq_len(buckets_nb)
buckets.size <- sapply(.self$buckets, function(x) {
dim(x$data)[length(dim(x$data)) - 1]
tail(dim(x$data), 1)
})
.self$batch.per.bucket <- ceiling(buckets.size/.self$batch.size)
.self$last.batch.pad <- .self$batch.size - buckets.size %% .self$batch.size
Expand All @@ -36,49 +36,49 @@ BucketIter <- setRefClass("BucketIter", fields = c("buckets", "bucket.names", "b

if (.self$shuffle) {
set.seed(.self$seed)
bucket_plan_names <- sample(rep.int(names(.self$batch.per.bucket), times = .self$batch.per.bucket))
bucket_plan_names <- sample(rep(names(.self$batch.per.bucket), times = .self$batch.per.bucket))
.self$bucket.plan <- ave(bucket_plan_names == bucket_plan_names, bucket_plan_names,
FUN = cumsum)
names(.self$bucket.plan) <- bucket_plan_names
### Return first BucketID at reset for initialization of the model
# Return first BucketID at reset for initialization of the model
.self$bucketID <- .self$bucket.plan[1]

.self$buckets <- lapply(.self$buckets, function(x) {
shuffle_id <- sample.int(dim(x$data)[length(dim(x$data)) - 1])
shuffle_id <- sample(tail(dim(x$data), 1))
if (length(dim(x$label)) == 0) {
list(data = x$data[shuffle_id, ], label = x$label[shuffle_id])
list(data = x$data[, shuffle_id], label = x$label[shuffle_id])
} else {
list(data = x$data[shuffle_id, ], label = x$label[shuffle_id, ])
list(data = x$data[, shuffle_id], label = x$label[, shuffle_id])
}
})
} else {
bucket_plan_names <- rep.int(names(.self$batch.per.bucket), times = .self$batch.per.bucket)
bucket_plan_names <- rep(names(.self$batch.per.bucket), times = .self$batch.per.bucket)
.self$bucket.plan <- ave(bucket_plan_names == bucket_plan_names, bucket_plan_names,
FUN = cumsum)
names(.self$bucket.plan) <- bucket_plan_names
}
}, iter.next = function() {
.self$batch <- .self$batch + 1
.self$bucketID <- .self$bucket.plan[batch]
return(.self$batch < .self$batch.per.epoch)
return(.self$batch <= .self$batch.per.epoch)
}, value = function() {
# bucketID is a named integer: the integer indicates the batch id for the given
# bucket (used to fetch appropriate samples within the bucket) the name is the a
# bucket (used to fetch appropriate samples within the bucket) the name is a
# character containing the sequence length of the bucket (used to unroll the rnn
# to appropriate sequence length)
idx <- (.self$bucketID - 1) * (.self$batch.size) + seq_len(batch.size)

### reuse first idx for padding
# Reuse first idx for padding
if (bucketID == .self$batch.per.bucket[names(.self$bucketID)] & !.self$last.batch.pad[names(.self$bucketID)] == 0) {
idx <- c(idx[seq_len(.self$batch.size - .self$last.batch.pad[names(.self$bucketID)])], seq_len(.self$last.batch.pad[names(.self$bucketID)]))
}

data <- .self$buckets[[names(.self$bucketID)]]$data[idx, , drop = FALSE]
seq.mask <- as.integer(names(bucketID)) - apply(data==.self$data.mask.element, 1, sum)
data <- .self$buckets[[names(.self$bucketID)]]$data[, idx, drop = F]
seq.mask <- as.integer(names(bucketID)) - apply(data==.self$data.mask.element, 2, sum)
if (length(dim(.self$buckets[[names(.self$bucketID)]]$label)) == 0) {
label <- .self$buckets[[names(.self$bucketID)]]$label[idx]
} else {
label <- .self$buckets[[names(.self$bucketID)]]$label[idx, , drop = FALSE]
label <- .self$buckets[[names(.self$bucketID)]]$label[, idx, drop = F]
}
return(list(data = mx.nd.array(data), seq.mask = mx.nd.array(seq.mask),
label = mx.nd.array(label)))
Expand All @@ -103,4 +103,4 @@ mx.io.bucket.iter <- function(buckets, batch.size, data.mask.element = 0, shuffl
seed = 123) {
return(BucketIter$new(buckets = buckets, batch.size = batch.size, data.mask.element = data.mask.element,
shuffle = shuffle, seed = seed))
}
}
30 changes: 5 additions & 25 deletions R-package/R/optimizer.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,7 @@ mx.opt.sgd <- function(learning.rate,
grad <- grad * rescale.grad
if (!is.null(clip_gradient)){
if(clip_gradient >= 0){
grad_ctx <- ctx(grad)
grad <- as.array(grad)
grad <- pmax(grad, -1 * clip_gradient)
grad <- pmin(grad, clip_gradient)
grad <- mx.nd.array(grad, grad_ctx)
grad <- mx.nd.clip(grad, -clip_gradient, clip_gradient)
} else {
stop("Error: clip_gradient should be positive number.")
}
Expand Down Expand Up @@ -125,11 +121,7 @@ mx.opt.rmsprop <- function(learning.rate=0.002,
grad <- grad * rescale.grad
if (!is.null(clip_gradient)){
if(clip_gradient >= 0){
grad_ctx <- ctx(grad)
grad <- as.array(grad)
grad <- pmax(grad, -1 * clip_gradient)
grad <- pmin(grad, clip_gradient)
grad <- mx.nd.array(grad, grad_ctx)
grad <- mx.nd.clip(grad, -clip_gradient, clip_gradient)
} else {
stop("Error: clip_gradient should be positive number.")
}
Expand Down Expand Up @@ -225,11 +217,7 @@ mx.opt.adam <- function(learning.rate=0.001,
grad <- grad * rescale.grad
if (!is.null(clip_gradient)){
if(clip_gradient >= 0){
grad_ctx <- ctx(grad)
grad <- as.array(grad)
grad <- pmax(grad, -1 * clip_gradient)
grad <- pmin(grad, clip_gradient)
grad <- mx.nd.array(grad, grad_ctx)
grad <- mx.nd.clip(grad, -clip_gradient, clip_gradient)
} else {
stop("Error: clip_gradient should be positive number.")
}
Expand Down Expand Up @@ -309,11 +297,7 @@ mx.opt.adagrad <- function(learning.rate=0.05,
grad <- grad * rescale.grad
if (!is.null(clip_gradient)){
if(clip_gradient >= 0){
grad_ctx <- ctx(grad)
grad <- as.array(grad)
grad <- pmax(grad, -1 * clip_gradient)
grad <- pmin(grad, clip_gradient)
grad <- mx.nd.array(grad, grad_ctx)
grad <- mx.nd.clip(grad, -clip_gradient, clip_gradient)
} else {
stop("Error: clip_gradient should be positive number.")
}
Expand Down Expand Up @@ -363,11 +347,7 @@ mx.opt.adadelta <- function(rho=0.90,
grad <- grad * rescale.grad
if (!is.null(clip_gradient)){
if(clip_gradient >= 0){
grad_ctx <- ctx(grad)
grad <- as.array(grad)
grad <- pmax(grad, -1 * clip_gradient)
grad <- pmin(grad, clip_gradient)
grad <- mx.nd.array(grad, grad_ctx)
grad <- mx.nd.clip(grad, -clip_gradient, clip_gradient)
} else {
stop("Error: clip_gradient should be positive number.")
}
Expand Down
Loading