Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Merge branch 'master' of https://github.com/apache/incubator-mxnet in…
Browse files Browse the repository at this point in the history
…to develop/add-higher-order/sinh-cosh
  • Loading branch information
kshitij12345 committed Jul 7, 2019
2 parents 093e854 + a6ed12f commit a9ce8f3
Show file tree
Hide file tree
Showing 39 changed files with 2,145 additions and 537 deletions.
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ List of Contributors
* [Shoubhik Bhattacharya](https://github.com/shoubhik)
* [Rohit Srivastava](https://github.com/access2rohit)
* [Caner Turkmen](https://github.com/canerturkmen)
* [Disi A](https://github.com/adis300)

Label Bot
---------
Expand Down
4 changes: 2 additions & 2 deletions amalgamation/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ jni_libmxnet_predict.so: jni_libmxnet_predict.o
ifneq ($(ANDROID), 1)
android:
else
CFLAGS+= -mhard-float -D_NDK_MATH_NO_SOFTFP=1 -O3
LDFLAGS+= -Wl,--no-warn-mismatch -lm_hard
CFLAGS+= -O3
LDFLAGS+= -Wl,--no-warn-mismatch -lm_hard
android: jni_libmxnet_predict.so
endif

Expand Down
81 changes: 6 additions & 75 deletions benchmark/opperf/utils/op_registry_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from operator import itemgetter
from mxnet import runtime
from mxnet.base import _LIB, check_call, py_str, OpHandle, c_str, mx_uint
import mxnet as mx

from benchmark.opperf.rules.default_params import DEFAULTS_INPUTS, MX_OP_MODULE

Expand Down Expand Up @@ -75,89 +76,19 @@ def _select_ops(operator_names, filters=("_contrib", "_"), merge_op_forward_back
return mx_operators


def _get_all_registered_ops():
"""Get all registered MXNet operator names.
Returns
-------
["operator_name"]
"""
plist = ctypes.POINTER(ctypes.c_char_p)()
size = ctypes.c_uint()

check_call(_LIB.MXListAllOpNames(ctypes.byref(size),
ctypes.byref(plist)))

mx_registered_operator_names = [py_str(plist[i]) for i in range(size.value)]
return mx_registered_operator_names


def _get_op_handles(op_name):
"""Get handle for an operator with given name - op_name.
Parameters
----------
op_name: str
Name of operator to get handle for.
"""
op_handle = OpHandle()
check_call(_LIB.NNGetOpHandle(c_str(op_name), ctypes.byref(op_handle)))
return op_handle


def _get_op_arguments(op_handle):
"""Given operator name and handle, fetch operator arguments - number of arguments,
argument names, argument types.
Parameters
----------
op_handle: OpHandle
Handle for the operator
Returns
-------
(narg, arg_names, arg_types)
"""
real_name = ctypes.c_char_p()
desc = ctypes.c_char_p()
num_args = mx_uint()
arg_names = ctypes.POINTER(ctypes.c_char_p)()
arg_types = ctypes.POINTER(ctypes.c_char_p)()
arg_descs = ctypes.POINTER(ctypes.c_char_p)()
key_var_num_args = ctypes.c_char_p()
ret_type = ctypes.c_char_p()

check_call(_LIB.MXSymbolGetAtomicSymbolInfo(
op_handle, ctypes.byref(real_name), ctypes.byref(desc),
ctypes.byref(num_args),
ctypes.byref(arg_names),
ctypes.byref(arg_types),
ctypes.byref(arg_descs),
ctypes.byref(key_var_num_args),
ctypes.byref(ret_type)))

narg = int(num_args.value)
arg_names = [py_str(arg_names[i]) for i in range(narg)]
arg_types = [py_str(arg_types[i]) for i in range(narg)]

return narg, arg_names, arg_types


def _set_op_arguments(mx_operators):
"""Fetch and set operator arguments - nargs, arg_names, arg_types
"""
for op_name in mx_operators:
op_handle = _get_op_handles(op_name)
narg, arg_names, arg_types = _get_op_arguments(op_handle)
mx_operators[op_name]["params"] = {"narg": narg,
"arg_names": arg_names,
"arg_types": arg_types}
operator_arguments = mx.operator.get_operator_arguments(op_name)
mx_operators[op_name]["params"] = {"narg": operator_arguments.narg,
"arg_names": operator_arguments.names,
"arg_types": operator_arguments.types}


def _get_all_mxnet_operators():
# Step 1 - Get all registered op names and filter it
operator_names = _get_all_registered_ops()
operator_names = mx.operator.get_all_registered_operators()
mx_operators = _select_ops(operator_names)

# Step 2 - Get all parameters for the operators
Expand Down
22 changes: 18 additions & 4 deletions contrib/clojure-package/examples/cnn-text-classification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ You also must download the glove word embeddings. The suggested one to use is th
## Usage

You can run through the repl with
`(train-convnet {:embedding-size 50 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :glove})`

`(train-convnet {:devs [(context/default-context)] :embedding-size 50 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :glove})`
or
`JVM_OPTS="-Xmx1g" lein run` (cpu)

Expand All @@ -49,6 +48,21 @@ and then run
- `lein uberjar`
- `java -Xms1024m -Xmx2048m -jar target/cnn-text-classification-0.1.0-SNAPSHOT-standalone.jar`

## Usage with fastText

Using fastText instead of glove is fairly straightforward, as the pretrained embedding format is very similar.

Download the 'Simple English' pretrained wiki word vectors (text) from the fastText
[site](https://fasttext.cc/docs/en/pretrained-vectors.html) and place them in the
`data/fasttext` directory. Alternatively just run `./get_fasttext_data.sh`.

Then you can run training on a subset of examples through the repl using:
```
(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fasttext})
```

Expect a validation accuracy of `~0.67` with the above parameters.

## Usage with word2vec

You can also use word2vec embeddings in order to train the text classification model.
Expand All @@ -58,15 +72,15 @@ you'll need to unzip them and place them in the `contrib/clojure-package/data` d

Then you can run training on a subset of examples through the repl using:
```
(train-convnet {:embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :word2vec})
(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :word2vec})
```
Note that loading word2vec embeddings consumes memory and takes some time.

You can also train them using `JVM_OPTS="-Xmx8g" lein run` once you've modified
the parameters to `train-convnet` (see above) in `src/cnn_text_classification/classifier.clj`.
In order to run training with word2vec on the complete data set, you will need to run:
```
(train-convnet {:embedding-size 300 :batch-size 100 :test-size 1000 :num-epoch 10 :pretrained-embedding :word2vec})
(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 1000 :num-epoch 10 :pretrained-embedding :word2vec})
```
You should be able to achieve an accuracy of `~0.78` using the parameters above.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -evx

mkdir -p data/fasttext
cd data/fasttext
wget https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.simple.vec
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
[embedding-size]
(format "data/glove/glove.6B.%dd.txt" embedding-size))

(def fasttext-file-path "data/fasttext/wiki.simple.vec")

(defn r-string
"Reads a string from the given DataInputStream `dis` until a space or newline is reached."
[dis]
Expand Down Expand Up @@ -62,7 +64,7 @@
vect (mapv (fn [_] (read-float dis)) (range embedding-size))]
(cons [word vect] (lazy-seq (load-w2v-vectors dis embedding-size (dec num-vectors)))))))

(defn load-word2vec-model
(defn load-word2vec-model!
"Loads the word2vec model stored in a binary format from the given `path`.
By default only the first 100 embeddings are loaded."
([path embedding-size opts]
Expand All @@ -75,8 +77,8 @@
_ (println "Processing with " {:dim dim :word-size word-size} " loading max vectors " max-vectors)
_ (if (not= embedding-size dim)
(throw (ex-info "Mismatch in embedding size"
{:input-embedding-size embedding-size
:word2vec-embedding-size dim})))
{:input-embedding-size embedding-size
:word2vec-embedding-size dim})))
vectors (load-w2v-vectors dis dim max-vectors)
word2vec (if vocab
(->> vectors
Expand All @@ -88,17 +90,30 @@
(println "Finished")
{:num-embed dim :word2vec word2vec})))
([path embedding-size]
(load-word2vec-model path embedding-size {:max-vectors 100})))
(load-word2vec-model! path embedding-size {:max-vectors 100})))

(defn read-text-embedding-pairs [rdr]
(for [^String line (line-seq rdr)
(defn read-text-embedding-pairs [pairs]
(for [^String line pairs
:let [fields (.split line " ")]]
[(aget fields 0)
(mapv #(Float/parseFloat ^String %) (rest fields))]))

(defn load-glove [glove-file-path]
(defn load-glove! [glove-file-path]
(println "Loading the glove pre-trained word embeddings from " glove-file-path)
(into {} (read-text-embedding-pairs (io/reader glove-file-path))))
(->> (io/reader glove-file-path)
line-seq
read-text-embedding-pairs
(into {})))

(def remove-fasttext-metadata rest)

(defn load-fasttext! [fasttext-file-path]
(println "Loading the fastText pre-trained word embeddings from " fasttext-file-path)
(->> (io/reader fasttext-file-path)
line-seq
remove-fasttext-metadata
read-text-embedding-pairs
(into {})))

(defn clean-str [s]
(-> s
Expand Down Expand Up @@ -188,9 +203,11 @@
sentences-padded (pad-sentences sentences)
vocab (build-vocab sentences-padded)
vocab-embeddings (case pretrained-embedding
:glove (->> (load-glove (glove-file-path embedding-size))
:glove (->> (load-glove! (glove-file-path embedding-size))
(build-vocab-embeddings vocab embedding-size))
:word2vec (->> (load-word2vec-model w2v-file-path embedding-size {:vocab vocab})
:fasttext (->> (load-fasttext! fasttext-file-path)
(build-vocab-embeddings vocab embedding-size))
:word2vec (->> (load-word2vec-model! w2v-file-path embedding-size {:vocab vocab})
(:word2vec)
(build-vocab-embeddings vocab embedding-size))
vocab)
Expand Down
14 changes: 14 additions & 0 deletions cpp-package/example/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ imagenet_inference --symbol_file <model symbol file in json format>
--num_inference_batches <number of batches used for inference>
--data_layer_type <default: "float32", choices: ["float32", "int8", "uint8"]>
--gpu <whether to run inference on GPU, default: false>
--enableTRT <whether to run inference with TensorRT, default: false>"
--benchmark <whether to use dummy data to run inference, default: false>
```

Expand Down Expand Up @@ -134,6 +135,19 @@ imagenet_inference.cpp:372: Running the forward pass on model to evaluate the pe
imagenet_inference.cpp:387: benchmark completed!
imagenet_inference.cpp:388: batch size: 1 num batch: 500 throughput: xxxx imgs/s latency:xxxx ms
```
For running this example with TensorRT, you can quickly try the following example to run a benchmark test for testing Inception BN:
```
./imagenet_inference --symbol_file "./model/Inception-BN-symbol.json" --params_file "./model/Inception-BN-0126.params" --batch_size 16 --num_inference_batches 500 --benchmark --enableTRT
```
Sample output will looks like this (the example is running on a AWS P3.2xl machine):
```
imagenet_inference.cpp:302: Loading the model from ./model/Inception-BN-symbol.json
build_subgraph.cc:686: start to execute partition graph.
imagenet_inference.cpp:317: Loading the model parameters from ./model/Inception-BN-0126.params
imagenet_inference.cpp:424: Running the forward pass on model to evaluate the performance..
imagenet_inference.cpp:439: benchmark completed!
imagenet_inference.cpp:440: batch size: 16 num batch: 500 throughput: 6284.78 imgs/s latency:0.159115 ms
```

## [sentiment_analysis_rnn.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/inference/sentiment_analysis_rnn.cpp>)
This example demonstrates how you can load a pre-trained RNN model and use it to predict the sentiment expressed in the given movie review with the MXNet C++ API. The example is capable of processing variable legnth inputs. It performs the following tasks
Expand Down
Loading

0 comments on commit a9ce8f3

Please sign in to comment.