No Learning for NiN on cifar10 #15

mahdaneh · 2017-11-13T21:53:22Z

Issue summary

When I define my network according to Network in Network architecture without using SSL regularization (similar to lenet_train_test.prototxt, which no SSL regularization has been used), the network can not train, however I played with LR and weight decays. Loss value during the training iterations is always constant value : Train net output #0: loss = 2.30259 (* 1 = 2.30259 loss)
Could you please help me with that?

Steps to reproduce

I am using docker container and here is my Dockerfile, which the image has been built from it:
`FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
LABEL maintainer [email protected]

RUN apt-get update && apt-get install -y --no-install-recommends
build-essential
cmake
git
wget
libatlas-base-dev
libboost-all-dev
libgflags-dev
libgoogle-glog-dev
libhdf5-serial-dev
libleveldb-dev
liblmdb-dev
libopencv-dev
libprotobuf-dev
libsnappy-dev
protobuf-compiler
python-dev
python-numpy
python-pip
python-setuptools
python-scipy &&
rm -rf /var/lib/apt/lists/*

ENV CAFFE_ROOT=/opt/caffe
WORKDIR $CAFFE_ROOT

ENV CLONE_TAG=1.0

RUN git clone -b scnn --depth 1 https://github.com/wenwei202/caffe.git . &&
pip install --upgrade pip &&
cd python && for req in $(cat requirements.txt) pydot; do pip install $req; done && cd .. &&
git clone https://github.com/NVIDIA/nccl.git && cd nccl && make -j install && cd .. && rm -rf nccl
#mkdir build && cd build &&
#cmake .. && \

RUN cp Makefile.config.example Makefile.config &&
echo 'INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial/' >>./Makefile.config &&
echo 'LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu/hdf5/serial/' >>./Makefile.config &&
make -j"$(nproc)" &&
make pycaffe -j"$(nproc)"

RUN pip install lmdb

ENV PYCAFFE_ROOT $CAFFE_ROOT/python
ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH
ENV PATH $CAFFE_ROOT/build/tools:$PYCAFFE_ROOT:$PATH
RUN echo "$CAFFE_ROOT/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig

WORKDIR /workspace
`

Your system configuration

Operating system: Ubuntu 16.04 LTS
Compiler:
CUDA version (if applicable): V8.0.61
CUDNN version (if applicable):
BLAS: I can not find it by grep OPENBLAS_VERSION /usr/local/include/openblas_config.h
Python or MATLAB version (for pycaffe and matcaffe respectively): python 2.7

wenwei202 · 2017-11-14T01:30:52Z

Seems it is an issue related to your net and solver? Copy them here might help.

mahdaneh · 2017-11-14T01:46:55Z

Thanks for your reply,
Here is mnist_NiN.prototxt

`name: "MNIST_NiN"
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
data_param {
source: "examples/mnist/mnist_train_lmdb"
batch_size: 64
backend: LMDB
}
}

layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
data_param {
source: "examples/mnist/mnist_test_lmdb"
batch_size: 100
backend: LMDB
}
}

layer {

type: "Convolution"

bottom: "data"

top: "conv1"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 192

pad: 2

kernel_size: 5
weight_filler {
  type: "gaussian"
  std: 0.05
}
bias_filler {
  type: "constant"
}

}
}

layer {

type: "ReLU"

bottom: "conv1"

top: "conv1"

}

layer {

type: "Convolution"

bottom: "conv1"

top: "cccp1"
param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2
decay_mult: 0

}

convolution_param {

num_output: 160

group: 1

kernel_size: 1

weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}

}

layer {

type: "ReLU"

bottom: "cccp1"

top: "cccp1"

}

layer {

type: "Convolution"

bottom: "cccp1"

top: "cccp2"
param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 96

kernel_size: 1
group: 1

weight_filler {
  type: "gaussian"
  std: 0.05
}
bias_filler {
  type: "constant"
value: 0
}

}

layer {

type: "ReLU"

bottom: "cccp2"

top: "cccp2"

}

layer {

type: "Pooling"

bottom: "cccp2"

top: "pool1"

pooling_param {

pool: MAX

kernel_size: 3

stride: 2
engine: CAFFE

}

layer {

type: "Dropout"

bottom: "pool1"

top: "pool1"

dropout_param {

dropout_ratio: 0.5

}

layer {

type: "Convolution"

bottom: "pool1"

top: "conv2"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 192

pad: 2

kernel_size: 5

}

layer {

type: "ReLU"

bottom: "conv2"

top: "conv2"

}

layer {

type: "Convolution"

bottom: "conv2"

top: "cccp3"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 192

kernel_size: 1

group: 1

weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}

}

layer {

type: "ReLU"

bottom: "cccp3"

top: "cccp3"

}

layer {

type: "Convolution"

bottom: "cccp3"

top: "cccp4"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 192

kernel_size: 1
group: 1
weight_filler {
  type: "gaussian"
  std: 0.05
}
bias_filler {
  type: "constant"
  value: 0
}

}

layer {

type: "ReLU"

bottom: "cccp4"

top: "cccp4"

}

layer {

type: "Pooling"

bottom: "cccp4"

top: "pool2"

pooling_param {

pool: AVE

kernel_size: 3

stride: 2
engine: CAFFE

}

layer {

type: "Dropout"

bottom: "pool2"

top: "pool2"

dropout_param {

dropout_ratio: 0.5

}

layer {

type: "Convolution"

bottom: "pool2"

top: "conv3"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 192

pad: 1

kernel_size: 3

weight_filler {
  type: "gaussian"
  std: 0.05
}
bias_filler {
  type: "constant"
  value: 0
}

}

layer {

type: "ReLU"

bottom: "conv3"

top: "conv3"

}

layer {

type: "Convolution"

bottom: "conv3"

top: "cccp5"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 192

kernel_size: 1
group: 1

weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}

}

layer {

type: "ReLU"

bottom: "cccp5"

top: "cccp5"

}

layer {

type: "Convolution"

bottom: "cccp5"

top: "cccp6"

param {

lr_mult: 0.1

decay_mult: 1

}

param {

lr_mult: 0.1

decay_mult: 0

}

convolution_param {

num_output: 10

kernel_size: 1
group:1

weight_filler {
type: "gaussian"
std: 0.05
}
bias_filler {
type: "constant"
value: 0
}

}

layer {

type: "ReLU"

bottom: "cccp6"

top: "cccp6"

}

layer {

type: "Pooling"

bottom: "cccp6"

top: "pool3"

pooling_param {

pool: AVE

kernel_size: 7

stride: 1
engine: CAFFE

}

layer {
name: "accuracy"
type: "Accuracy"
bottom: "pool3"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}

layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "pool3"
bottom: "label"
top: "loss"
}

net: "examples/mnist/mnist_NiN.prototxt" test_iter: 100 test_interval: 500 base_lr: 0.1 momentum: 0.9 weight_decay: 0.0005 lr_policy: "inv" gamma: 0.0001 power: 0.75 display: 100 max_iter: 10000 snapshot: 5000 snapshot_prefix: "examples/mnist/NiN" solver_mode: GPU

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

No Learning for NiN on cifar10 #15

No Learning for NiN on cifar10 #15

mahdaneh commented Nov 13, 2017 •

edited

Loading

wenwei202 commented Nov 14, 2017

mahdaneh commented Nov 14, 2017 •

edited

Loading

No Learning for NiN on cifar10 #15

No Learning for NiN on cifar10 #15

Comments

mahdaneh commented Nov 13, 2017 • edited Loading

Issue summary

Steps to reproduce

Your system configuration

wenwei202 commented Nov 14, 2017

mahdaneh commented Nov 14, 2017 • edited Loading

mahdaneh commented Nov 13, 2017 •

edited

Loading

mahdaneh commented Nov 14, 2017 •

edited

Loading