diff --git a/docs/en_US/NAS/TextNAS.md b/docs/en_US/NAS/TextNAS.md new file mode 100644 index 0000000000..7c455534ec --- /dev/null +++ b/docs/en_US/NAS/TextNAS.md @@ -0,0 +1,80 @@ +# TextNAS + +## Introduction + +This is the implementation of the TextNAS algorithm proposed in the paper [TextNAS: A Neural Architecture Search Space tailored for Text Representation](https://arxiv.org/pdf/1912.10729.pdf). TextNAS is a neural architecture search algorithm tailored for text representation, more specifically, TextNAS is based on a novel search space consists of operators widely adopted to solve various NLP tasks, and TextNAS also supports multi-path ensemble within a single network to balance the width and depth of the architecture. + +The search space of TextNAS contains: + + * 1-D convolutional operator with filter size 1, 3, 5, 7 + * recurrent operator (bi-directional GRU) + * self-attention operator + * pooling operator (max/average) + +Following the ENAS algorithm, TextNAS also utilizes parameter sharing to accelerate the search speed and adopts a reinforcement-learning controller for the architecture sampling and generation. Please refer to the paper for more details of TextNAS. + +## Preparation + +Prepare the word vectors and SST dataset, and organize them in data directory as shown below: + +``` +textnas +├── data +│ ├── sst +│ │ └── trees +│ │ ├── dev.txt +│ │ ├── test.txt +│ │ └── train.txt +│ └── glove.840B.300d.txt +├── dataloader.py +├── model.py +├── ops.py +├── README.md +├── search.py +└── utils.py +``` + +The following link might be helpful for finding and downloading the corresponding dataset: + +* [GloVe: Global Vectors for Word Representation](https://nlp.stanford.edu/projects/glove/) + * [glove.840B.300d.txt](http://nlp.stanford.edu/data/glove.840B.300d.zip) +* [Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank](https://nlp.stanford.edu/sentiment/) + * [trainDevTestTrees_PTB.zip](https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip) + +## Examples + +### Search Space + +[Example code](https://github.com/microsoft/nni/tree/master/examples/nas/textnas) + +```bash +# In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder. +git clone https://github.com/Microsoft/nni.git + +# search the best architecture +cd examples/nas/textnas + +# view more options for search +python3 search.py -h +``` + +After each search epoch, 10 sampled architectures will be tested directly. Their performances are expected to be 40% - 42% after 10 epochs. + +By default, 20 sampled architectures will be exported into `checkpoints` directory for next step. + +### retrain + +```bash +# In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder. +git clone https://github.com/Microsoft/nni.git + +# search the best architecture +cd examples/nas/textnas + +# default to retrain on sst-2 +sh run_retrain.sh +``` + +## Reference + +TextNAS directly uses EnasTrainer, please refer to [ENAS](./ENAS.md) for the trainer APIs. diff --git a/docs/en_US/nas.rst b/docs/en_US/nas.rst index 0a56caa742..f5a06c5c9a 100644 --- a/docs/en_US/nas.rst +++ b/docs/en_US/nas.rst @@ -26,5 +26,6 @@ For details, please refer to the following tutorials: SPOS CDARTS ProxylessNAS + TextNAS Customize a NAS Algorithm API Reference diff --git a/examples/nas/textnas/README.md b/examples/nas/textnas/README.md index fb261ad04d..f8ebe24afd 100644 --- a/examples/nas/textnas/README.md +++ b/examples/nas/textnas/README.md @@ -42,4 +42,8 @@ By default, 20 sampled architectures will be exported into `checkpoints` directo ## Retrain -Not ready. +``` +sh run_retrain.sh +``` + +By default, the script will retrain the architecture provided by the author on the SST-2 dataset. diff --git a/examples/nas/textnas/arc/final_arc.json b/examples/nas/textnas/arc/final_arc.json new file mode 100644 index 0000000000..c1e12c2d4b --- /dev/null +++ b/examples/nas/textnas/arc/final_arc.json @@ -0,0 +1,212 @@ +{ + "LayerChoice1": [ + false, false, false, false, false, true, false, false + ], + "InputChoice2": [ + true + ], + "LayerChoice3": [ + false, false, false, false, false, false, false, true + ], + "InputChoice4": [ + false + ], + "InputChoice5": [ + true, false + ], + "LayerChoice6": [ + false, false, false, true, false, false, false, false + ], + "InputChoice7": [ + false, false + ], + "InputChoice8": [ + false, false, true + ], + "LayerChoice9": [ + false, false, false, false, false, false, true, false + ], + "InputChoice10": [ + false, true, true + ], + "InputChoice11": [ + false, false, true, false + ], + "LayerChoice12": [ + false, true, false, false, false, false, false, false + ], + "InputChoice13": [ + false, true, false, false + ], + "InputChoice14": [ + false, false, false, false, true + ], + "LayerChoice15": [ + false, true, false, false, false, false, false, false + ], + "InputChoice16": [ + false, false, true, false, true + ], + "InputChoice17": [ + false, false, false, false, true + ], + "LayerChoice18": [ + true, false, false, false, false, false, false, false + ], + "InputChoice19": [ + false, false, true, true, true, true + ], + "InputChoice20": [ + true, false, false, false, false + ], + "LayerChoice21": [ + false, false, false, false, false, false, true, false + ], + "InputChoice22": [ + false, true, true, false, false, false, false + ], + "InputChoice23": [ + false, true, false, false, false + ], + "LayerChoice24": [ + false, false, false, false, false, true, false, false + ], + "InputChoice25": [ + false, true, false, true, true, false, true, true + ], + "InputChoice26": [ + false, false, true, false, false + ], + "LayerChoice27": [ + false, false, false, false, false, true, false, false + ], + "InputChoice28": [ + false, false, false, false, false, true, false, true, true + ], + "InputChoice29": [ + true, false, false, false, false + ], + "LayerChoice30": [ + false, false, false, false, false, false, false, true + ], + "InputChoice31": [ + true, true, false, false, true, false, false, true, true, false + ], + "InputChoice32": [ + true, false, false, false, false + ], + "LayerChoice33": [ + false, false, false, false, true, false, false, false + ], + "InputChoice34": [ + true, false, false, true, true, true, true, false, false, false, false + ], + "InputChoice35": [ + false, false, false, true, false + ], + "LayerChoice36": [ + false, true, false, false, false, false, false, false + ], + "InputChoice37": [ + true, true, false, true, false, true, false, false, true, false, false, false + ], + "InputChoice38": [ + false, false, false, true, false + ], + "LayerChoice39": [ + false, false, true, false, false, false, false, false + ], + "InputChoice40": [ + true, true, false, false, false, false, true, false, false, true, true, false, true + ], + "InputChoice41": [ + false, false, false, true, false + ], + "LayerChoice42": [ + true, false, false, false, false, false, false, false + ], + "InputChoice43": [ + false, false, true, false, false, false, true, true, true, false, true, true, false, false + ], + "InputChoice44": [ + false, false, false, false, true + ], + "LayerChoice45": [ + false, false, false, true, false, false, false, false + ], + "InputChoice46": [ + true, false, false, false, false, false, true, false, false, false, true, true, false, false, true + ], + "InputChoice47": [ + false, false, false, true, false + ], + "LayerChoice48": [ + false, false, true, false, false, false, false, false + ], + "InputChoice49": [ + false, false, false, false, false, false, false, false, false, true, true, false, true, false, true, false + ], + "InputChoice50": [ + false, false, false, false, true + ], + "LayerChoice51": [ + false, false, false, false, true, false, false, false + ], + "InputChoice52": [ + false, true, true, true, true, false, false, true, false, true, false, false, false, false, true, false, false + ], + "InputChoice53": [ + false, false, true, false, false + ], + "LayerChoice54": [ + false, false, false, true, false, false, false, false + ], + "InputChoice55": [ + false, false, false, false, false, true, false, false, false, false, false, false, false, true, true, true, false, true + ], + "InputChoice56": [ + false, false, true, false, false + ], + "LayerChoice57": [ + false, false, false, true, false, false, false, false + ], + "InputChoice58": [ + false, false, false, true, false, false, false, false, false, false, true, false, false, false, true, false, false, false, false + ], + "InputChoice59": [ + false, true, false, false, false + ], + "LayerChoice60": [ + false, false, false, false, false, true, false, false + ], + "InputChoice61": [ + true, true, false, false, false, false, false, false, false, false, true, true, false, false, true, true, true, true, false, false + ], + "InputChoice62": [ + true, false, false, false, false + ], + "LayerChoice63": [ + false, false, false, false, false, false, false, true + ], + "InputChoice64": [ + false, true, true, true, false, false, false, true, false, true, true, true, true, false, true, false, false, false, false, false, false + ], + "InputChoice65": [ + false, false, false, false, true + ], + "LayerChoice66": [ + false, false, false, false, false, false, false, true + ], + "InputChoice67": [ + false, false, true, true, true, true, false, true, false, true, true, false, false, false, false, true, false, false, false, false, false, true + ], + "InputChoice68": [ + false, false, false, true, false + ], + "LayerChoice69": [ + false, false, false, true, false, false, false, false + ], + "InputChoice70": [ + true, false, false, true, false, false, false, true, false, false, false, false, true, false, false, false, true, false, false, false, false, false, false + ] +} diff --git a/examples/nas/textnas/run_retrain.sh b/examples/nas/textnas/run_retrain.sh index 5c8ea66ae9..1f02121e31 100755 --- a/examples/nas/textnas/run_retrain.sh +++ b/examples/nas/textnas/run_retrain.sh @@ -4,7 +4,7 @@ export PYTHONPATH="$(pwd)" export CUDA_VISIBLE_DEVICES=0 -python -u retrain.py \ +python3 -u retrain.py \ --train_ratio=1.0 \ --valid_ratio=1.0 \ --min_count=1 \ @@ -36,6 +36,6 @@ python -u retrain.py \ --child_lr_T_0=10 \ --child_lr_T_mul=2 \ --multi_path=True \ - --child_fixed_arc="./checkpoints/architecture_00.json" \ + --child_fixed_arc="./arc/final_arc.json" \ --fixed_seed=True \ "$@"