diff --git a/README.md b/README.md index 5a2d41a701..bdb0a4022d 100644 --- a/README.md +++ b/README.md @@ -45,11 +45,9 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c **Skills** -[Goal(Task)-oriented Bot](http://docs.deeppavlov.ai/en/master/features/skills/go_bot.html) | [Seq2seq Goal-Oriented bot](http://docs.deeppavlov.ai/en/master/features/skills/seq2seq_go_bot.html) +[Goal(Task)-oriented Bot](http://docs.deeppavlov.ai/en/master/features/skills/go_bot.html) | [Open Domain Questions Answering](http://docs.deeppavlov.ai/en/master/features/skills/odqa.html) -[Open Domain Questions Answering](http://docs.deeppavlov.ai/en/master/features/skills/odqa.html) | [eCommerce Bot](http://docs.deeppavlov.ai/en/master/features/skills/ecommerce.html) - -[Frequently Asked Questions Answering](http://docs.deeppavlov.ai/en/master/features/skills/faq.html) | [Pattern Matching](http://docs.deeppavlov.ai/en/master/features/skills/pattern_matching.html) +[Frequently Asked Questions Answering](http://docs.deeppavlov.ai/en/master/features/skills/faq.html) **Embeddings** @@ -61,7 +59,7 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c **Auto ML** -[Tuning Models with Evolutionary Algorithm](http://docs.deeppavlov.ai/en/master/features/hypersearch.html) +[Tuning Models](http://docs.deeppavlov.ai/en/master/features/hypersearch.html) **Integrations** @@ -237,6 +235,10 @@ and others in the Integrations section for more info. ## Breaking Changes +**Breaking changes in version 0.15.0** +- [bert_as_summarizer](https://github.com/deepmipt/DeepPavlov/pull/1391), [seq2seq_go_bot](https://github.com/deepmipt/DeepPavlov/pull/1434) and all deeppavlov.deprecated components were removed +- hyperparameter optimization by neural evolution was [removed](https://github.com/deepmipt/DeepPavlov/pull/1436) + **Breaking changes in version 0.7.0** - in dialog logger config file [dialog_logger_config.json](deeppavlov/utils/settings/dialog_logger_config.json) `agent_name` parameter was renamed to `logger_name`, the default value was changed diff --git a/deeppavlov/_meta.py b/deeppavlov/_meta.py index ceb71ab332..50272a6d6e 100644 --- a/deeppavlov/_meta.py +++ b/deeppavlov/_meta.py @@ -1,4 +1,4 @@ -__version__ = '0.14.1' +__version__ = '0.15.0' __author__ = 'Neural Networks and Deep Learning lab, MIPT' __description__ = 'An open source library for building end-to-end dialog systems and training chatbots.' __keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot'] diff --git a/deeppavlov/configs/classifiers/boolqa_rubert.json b/deeppavlov/configs/classifiers/boolqa_rubert.json index 2c9f89483a..34045bfdb1 100644 --- a/deeppavlov/configs/classifiers/boolqa_rubert.json +++ b/deeppavlov/configs/classifiers/boolqa_rubert.json @@ -58,10 +58,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_v1.tar.gz", diff --git a/deeppavlov/configs/classifiers/entity_ranking_bert_eng_no_mention.json b/deeppavlov/configs/classifiers/entity_ranking_bert_eng_no_mention.json index 1fa4b1aefd..0c317cfad1 100644 --- a/deeppavlov/configs/classifiers/entity_ranking_bert_eng_no_mention.json +++ b/deeppavlov/configs/classifiers/entity_ranking_bert_eng_no_mention.json @@ -58,10 +58,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/entity_ranking_bert_eng_no_mention" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/cased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/classifiers/entity_ranking_bert_rus_no_mention.json b/deeppavlov/configs/classifiers/entity_ranking_bert_rus_no_mention.json index 8b8ea1ac8b..6dc5b247a3 100644 --- a/deeppavlov/configs/classifiers/entity_ranking_bert_rus_no_mention.json +++ b/deeppavlov/configs/classifiers/entity_ranking_bert_rus_no_mention.json @@ -58,10 +58,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/entity_ranking_bert_rus_no_mention" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/classifiers/glue/glue_cola_cased_bert_torch.json b/deeppavlov/configs/classifiers/glue/glue_cola_cased_bert_torch.json index 4e75d5071d..e326765368 100644 --- a/deeppavlov/configs/classifiers/glue/glue_cola_cased_bert_torch.json +++ b/deeppavlov/configs/classifiers/glue/glue_cola_cased_bert_torch.json @@ -125,10 +125,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_cola_torch_cased_bert", "BASE_MODEL": "bert-base-cased" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ] + } } } diff --git a/deeppavlov/configs/classifiers/glue/glue_mnli_cased_bert_torch.json b/deeppavlov/configs/classifiers/glue/glue_mnli_cased_bert_torch.json index b1262f863e..ff6dd9c6a4 100644 --- a/deeppavlov/configs/classifiers/glue/glue_mnli_cased_bert_torch.json +++ b/deeppavlov/configs/classifiers/glue/glue_mnli_cased_bert_torch.json @@ -91,10 +91,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mnli_torch_cased_bert", "BASE_MODEL": "bert-base-cased" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ] + } } } diff --git a/deeppavlov/configs/classifiers/glue/glue_mnli_mm_cased_bert_torch.json b/deeppavlov/configs/classifiers/glue/glue_mnli_mm_cased_bert_torch.json index 52e5747266..60ac560732 100644 --- a/deeppavlov/configs/classifiers/glue/glue_mnli_mm_cased_bert_torch.json +++ b/deeppavlov/configs/classifiers/glue/glue_mnli_mm_cased_bert_torch.json @@ -91,10 +91,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mnli_mm_torch_cased_bert", "BASE_MODEL": "bert-base-cased" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ] + } } } diff --git a/deeppavlov/configs/classifiers/glue/glue_mrpc_cased_bert_torch.json b/deeppavlov/configs/classifiers/glue/glue_mrpc_cased_bert_torch.json index 33230e3d39..990ca20dbc 100644 --- a/deeppavlov/configs/classifiers/glue/glue_mrpc_cased_bert_torch.json +++ b/deeppavlov/configs/classifiers/glue/glue_mrpc_cased_bert_torch.json @@ -81,10 +81,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mrpc_torch_cased_bert", "BASE_MODEL": "bert-base-cased" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ] + } } } diff --git a/deeppavlov/configs/classifiers/glue/glue_qnli_cased_bert_torch.json b/deeppavlov/configs/classifiers/glue/glue_qnli_cased_bert_torch.json index 8ada236189..f4dcc1cd5c 100644 --- a/deeppavlov/configs/classifiers/glue/glue_qnli_cased_bert_torch.json +++ b/deeppavlov/configs/classifiers/glue/glue_qnli_cased_bert_torch.json @@ -91,10 +91,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_qnli_torch_cased_bert", "BASE_MODEL": "bert-base-cased" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ] + } } } diff --git a/deeppavlov/configs/classifiers/glue/glue_qqp_cased_bert_torch.json b/deeppavlov/configs/classifiers/glue/glue_qqp_cased_bert_torch.json index d7aaa98aea..4a2117c9fc 100644 --- a/deeppavlov/configs/classifiers/glue/glue_qqp_cased_bert_torch.json +++ b/deeppavlov/configs/classifiers/glue/glue_qqp_cased_bert_torch.json @@ -81,10 +81,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_qqp_torch_cased_bert", "BASE_MODEL" : "bert-base-cased" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ] + } } } diff --git a/deeppavlov/configs/classifiers/glue/glue_rte_cased_bert_torch.json b/deeppavlov/configs/classifiers/glue/glue_rte_cased_bert_torch.json index c0da7b614a..41e7d6d576 100644 --- a/deeppavlov/configs/classifiers/glue/glue_rte_cased_bert_torch.json +++ b/deeppavlov/configs/classifiers/glue/glue_rte_cased_bert_torch.json @@ -91,10 +91,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_rte_torch_cased_bert", "BASE_MODEL": "bert-base-cased" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ] + } } } diff --git a/deeppavlov/configs/classifiers/glue/glue_sst2_cased_bert_torch.json b/deeppavlov/configs/classifiers/glue/glue_sst2_cased_bert_torch.json index 167b2444f0..72feec4ee2 100644 --- a/deeppavlov/configs/classifiers/glue/glue_sst2_cased_bert_torch.json +++ b/deeppavlov/configs/classifiers/glue/glue_sst2_cased_bert_torch.json @@ -125,10 +125,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_sst2_torch_cased_bert", "BASE_MODEL": "bert-base-cased" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ] + } } } diff --git a/deeppavlov/configs/classifiers/glue/glue_stsb_cased_bert_torch.json b/deeppavlov/configs/classifiers/glue/glue_stsb_cased_bert_torch.json index 2480277a1a..701c70b30d 100644 --- a/deeppavlov/configs/classifiers/glue/glue_stsb_cased_bert_torch.json +++ b/deeppavlov/configs/classifiers/glue/glue_stsb_cased_bert_torch.json @@ -68,10 +68,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_stsb_torch_cased_bert", "BASE_MODEL": "bert-base-cased" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ] + } } } diff --git a/deeppavlov/configs/classifiers/insults_kaggle.json b/deeppavlov/configs/classifiers/insults_kaggle.json index 34e1b800ea..8627589eca 100644 --- a/deeppavlov/configs/classifiers/insults_kaggle.json +++ b/deeppavlov/configs/classifiers/insults_kaggle.json @@ -137,10 +137,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/insults_kaggle_v2" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/insults_data.tar.gz", diff --git a/deeppavlov/configs/classifiers/insults_kaggle_bert.json b/deeppavlov/configs/classifiers/insults_kaggle_bert.json index 2206c698d2..d64f2363b4 100644 --- a/deeppavlov/configs/classifiers/insults_kaggle_bert.json +++ b/deeppavlov/configs/classifiers/insults_kaggle_bert.json @@ -119,10 +119,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/insults_kaggle_v3" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/insults_data.tar.gz", diff --git a/deeppavlov/configs/classifiers/insults_kaggle_bert_torch.json b/deeppavlov/configs/classifiers/insults_kaggle_bert_torch.json index 2a8cd8d67e..a9ff62015e 100644 --- a/deeppavlov/configs/classifiers/insults_kaggle_bert_torch.json +++ b/deeppavlov/configs/classifiers/insults_kaggle_bert_torch.json @@ -135,10 +135,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/insults_kaggle_torch_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/insults_data.tar.gz", diff --git a/deeppavlov/configs/classifiers/insults_kaggle_conv_bert.json b/deeppavlov/configs/classifiers/insults_kaggle_conv_bert.json index c819a6e034..01f13affca 100644 --- a/deeppavlov/configs/classifiers/insults_kaggle_conv_bert.json +++ b/deeppavlov/configs/classifiers/insults_kaggle_conv_bert.json @@ -135,10 +135,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/insults_kaggle_v4" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/insults_data.tar.gz", diff --git a/deeppavlov/configs/classifiers/intents_dstc2.json b/deeppavlov/configs/classifiers/intents_dstc2.json index 48d0776fad..9290978dcf 100644 --- a/deeppavlov/configs/classifiers/intents_dstc2.json +++ b/deeppavlov/configs/classifiers/intents_dstc2.json @@ -142,10 +142,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_dstc2_v10" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/dstc2_fastText_model.bin", diff --git a/deeppavlov/configs/classifiers/intents_dstc2_bert.json b/deeppavlov/configs/classifiers/intents_dstc2_bert.json index cb8599da08..4992fac32c 100644 --- a/deeppavlov/configs/classifiers/intents_dstc2_bert.json +++ b/deeppavlov/configs/classifiers/intents_dstc2_bert.json @@ -102,10 +102,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_dstc2_bert_v0" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/cased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/classifiers/intents_dstc2_big.json b/deeppavlov/configs/classifiers/intents_dstc2_big.json index 8359c57937..a6f9d47d5e 100644 --- a/deeppavlov/configs/classifiers/intents_dstc2_big.json +++ b/deeppavlov/configs/classifiers/intents_dstc2_big.json @@ -141,10 +141,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_dstc2_v11" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", diff --git a/deeppavlov/configs/classifiers/intents_sample_csv.json b/deeppavlov/configs/classifiers/intents_sample_csv.json index e2530c95c2..4b01a2d301 100644 --- a/deeppavlov/configs/classifiers/intents_sample_csv.json +++ b/deeppavlov/configs/classifiers/intents_sample_csv.json @@ -142,10 +142,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_v9" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/snips_intents/train.csv", diff --git a/deeppavlov/configs/classifiers/intents_sample_json.json b/deeppavlov/configs/classifiers/intents_sample_json.json index 2b5e0318e5..b87d3274be 100644 --- a/deeppavlov/configs/classifiers/intents_sample_json.json +++ b/deeppavlov/configs/classifiers/intents_sample_json.json @@ -137,10 +137,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_v9" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/snips_intents/train.json", diff --git a/deeppavlov/configs/classifiers/intents_snips.json b/deeppavlov/configs/classifiers/intents_snips.json index 914c0f41d9..5f0aa89cd0 100644 --- a/deeppavlov/configs/classifiers/intents_snips.json +++ b/deeppavlov/configs/classifiers/intents_snips.json @@ -127,10 +127,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_v9" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/dstc2_fastText_model.bin", diff --git a/deeppavlov/configs/classifiers/intents_snips_big.json b/deeppavlov/configs/classifiers/intents_snips_big.json index 6b638d1ec8..15b5adc648 100644 --- a/deeppavlov/configs/classifiers/intents_snips_big.json +++ b/deeppavlov/configs/classifiers/intents_snips_big.json @@ -127,10 +127,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_v10" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", diff --git a/deeppavlov/configs/classifiers/intents_snips_sklearn.json b/deeppavlov/configs/classifiers/intents_snips_sklearn.json index a407d23fb6..7847aa3e15 100644 --- a/deeppavlov/configs/classifiers/intents_snips_sklearn.json +++ b/deeppavlov/configs/classifiers/intents_snips_sklearn.json @@ -154,10 +154,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_sklearn_v11" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_snips_sklearn_v11.tar.gz", diff --git a/deeppavlov/configs/classifiers/intents_snips_tfidf_weighted.json b/deeppavlov/configs/classifiers/intents_snips_tfidf_weighted.json index 580ca692d8..b7f4e70712 100644 --- a/deeppavlov/configs/classifiers/intents_snips_tfidf_weighted.json +++ b/deeppavlov/configs/classifiers/intents_snips_tfidf_weighted.json @@ -168,10 +168,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_sklearn_v12" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_snips_sklearn_v12.tar.gz", diff --git a/deeppavlov/configs/classifiers/paraphraser_bert.json b/deeppavlov/configs/classifiers/paraphraser_bert.json index 061b96da9e..81da5ccb9d 100644 --- a/deeppavlov/configs/classifiers/paraphraser_bert.json +++ b/deeppavlov/configs/classifiers/paraphraser_bert.json @@ -82,10 +82,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/paraphraser_bert_v0" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", diff --git a/deeppavlov/configs/classifiers/paraphraser_rubert.json b/deeppavlov/configs/classifiers/paraphraser_rubert.json index 19a22e1141..0d8f8adff8 100644 --- a/deeppavlov/configs/classifiers/paraphraser_rubert.json +++ b/deeppavlov/configs/classifiers/paraphraser_rubert.json @@ -59,10 +59,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", diff --git a/deeppavlov/configs/classifiers/query_pr.json b/deeppavlov/configs/classifiers/query_pr.json index 6e2a03b509..f3fcee2e22 100644 --- a/deeppavlov/configs/classifiers/query_pr.json +++ b/deeppavlov/configs/classifiers/query_pr.json @@ -95,10 +95,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models_kbqa/cased_L-12_H-768_A-12", "MODEL_PATH": "{MODELS_PATH}/classifiers/query_prediction" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "labels": { "telegram_utils": "IntentModel", "server_utils": "KerasIntentModel" diff --git a/deeppavlov/configs/classifiers/rel_ranking_bert.json b/deeppavlov/configs/classifiers/rel_ranking_bert.json index 991ae4508b..0ac3b504ce 100644 --- a/deeppavlov/configs/classifiers/rel_ranking_bert.json +++ b/deeppavlov/configs/classifiers/rel_ranking_bert.json @@ -59,10 +59,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models_kbqa/cased_L-12_H-768_A-12", "MODEL_PATH": "{MODELS_PATH}/rel_ranking_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/kbqa/datasets/rel_ranking_bert.tar.gz", diff --git a/deeppavlov/configs/classifiers/rel_ranking_bert_rus.json b/deeppavlov/configs/classifiers/rel_ranking_bert_rus.json index 3f7e521266..f3bcfd7ccb 100644 --- a/deeppavlov/configs/classifiers/rel_ranking_bert_rus.json +++ b/deeppavlov/configs/classifiers/rel_ranking_bert_rus.json @@ -58,10 +58,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/rel_ranking_bert_rus" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/classifiers/relation_prediction_rus.json b/deeppavlov/configs/classifiers/relation_prediction_rus.json index 10bba27a5e..24f16cc159 100644 --- a/deeppavlov/configs/classifiers/relation_prediction_rus.json +++ b/deeppavlov/configs/classifiers/relation_prediction_rus.json @@ -110,10 +110,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/kbqa_mix_lowercase/relation_prediction" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "labels": { "telegram_utils": "IntentModel", "server_utils": "KerasIntentModel" diff --git a/deeppavlov/configs/classifiers/rusentiment_bert.json b/deeppavlov/configs/classifiers/rusentiment_bert.json index 25f54c8fa2..9e29925500 100644 --- a/deeppavlov/configs/classifiers/rusentiment_bert.json +++ b/deeppavlov/configs/classifiers/rusentiment_bert.json @@ -132,10 +132,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_bert_v0/" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/classifiers/rusentiment_bigru_superconv.json b/deeppavlov/configs/classifiers/rusentiment_bigru_superconv.json index eef5f71889..ceff4b647a 100644 --- a/deeppavlov/configs/classifiers/rusentiment_bigru_superconv.json +++ b/deeppavlov/configs/classifiers/rusentiment_bigru_superconv.json @@ -151,10 +151,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_v14" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/embeddings/ft_native_300_ru_twitter_nltk_word_tokenize.bin", diff --git a/deeppavlov/configs/classifiers/rusentiment_cnn.json b/deeppavlov/configs/classifiers/rusentiment_cnn.json index a2c1ff5de8..0706d803e7 100644 --- a/deeppavlov/configs/classifiers/rusentiment_cnn.json +++ b/deeppavlov/configs/classifiers/rusentiment_cnn.json @@ -153,10 +153,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_v3" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/embeddings/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize.bin", diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_bert.json b/deeppavlov/configs/classifiers/rusentiment_convers_bert.json index f02f5ec5ee..74430e3a0a 100644 --- a/deeppavlov/configs/classifiers/rusentiment_convers_bert.json +++ b/deeppavlov/configs/classifiers/rusentiment_convers_bert.json @@ -132,10 +132,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_bert_v0/" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/ru_conversational_cased_L-12_H-768_A-12.tar.gz", diff --git a/deeppavlov/configs/classifiers/rusentiment_elmo_twitter_cnn.json b/deeppavlov/configs/classifiers/rusentiment_elmo_twitter_cnn.json index db4cd5455c..1418b30dc2 100644 --- a/deeppavlov/configs/classifiers/rusentiment_elmo_twitter_cnn.json +++ b/deeppavlov/configs/classifiers/rusentiment_elmo_twitter_cnn.json @@ -160,10 +160,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_v10" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_v10.tar.gz", diff --git a/deeppavlov/configs/classifiers/sentiment_imdb_bert.json b/deeppavlov/configs/classifiers/sentiment_imdb_bert.json index 75e4963da3..8e62aefe8c 100644 --- a/deeppavlov/configs/classifiers/sentiment_imdb_bert.json +++ b/deeppavlov/configs/classifiers/sentiment_imdb_bert.json @@ -128,10 +128,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_imdb_bert_v0/" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "labels": { "telegram_utils": "IntentModel", "server_utils": "KerasIntentModel" diff --git a/deeppavlov/configs/classifiers/sentiment_imdb_conv_bert.json b/deeppavlov/configs/classifiers/sentiment_imdb_conv_bert.json index 10b9bcda67..4e1a1287b5 100644 --- a/deeppavlov/configs/classifiers/sentiment_imdb_conv_bert.json +++ b/deeppavlov/configs/classifiers/sentiment_imdb_conv_bert.json @@ -128,10 +128,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_imdb_conv_bert_v0/" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "labels": { "telegram_utils": "IntentModel", "server_utils": "KerasIntentModel" diff --git a/deeppavlov/configs/classifiers/sentiment_sst_conv_bert.json b/deeppavlov/configs/classifiers/sentiment_sst_conv_bert.json index 6a33aabbee..f88b0ae9b7 100644 --- a/deeppavlov/configs/classifiers/sentiment_sst_conv_bert.json +++ b/deeppavlov/configs/classifiers/sentiment_sst_conv_bert.json @@ -121,10 +121,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_sst_bert_v2" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/stanfordSentimentTreebank.zip", diff --git a/deeppavlov/configs/classifiers/sentiment_sst_multi_bert.json b/deeppavlov/configs/classifiers/sentiment_sst_multi_bert.json index 4a2ec2e088..95a46ad544 100644 --- a/deeppavlov/configs/classifiers/sentiment_sst_multi_bert.json +++ b/deeppavlov/configs/classifiers/sentiment_sst_multi_bert.json @@ -121,10 +121,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_sst_bert_v1" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/stanfordSentimentTreebank.zip", diff --git a/deeppavlov/configs/classifiers/sentiment_twitter.json b/deeppavlov/configs/classifiers/sentiment_twitter.json index aa2af5b46e..0d02ec5927 100644 --- a/deeppavlov/configs/classifiers/sentiment_twitter.json +++ b/deeppavlov/configs/classifiers/sentiment_twitter.json @@ -128,10 +128,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_twitter_v6" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/sentiment_twitter_data.tar.gz", diff --git a/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json b/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json index 9552132c68..6a4fb9756a 100644 --- a/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json +++ b/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json @@ -130,11 +130,6 @@ "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_twitter_bert_emb", "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_pt" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt", - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/sentiment_twitter_data.tar.gz", diff --git a/deeppavlov/configs/classifiers/sentiment_twitter_preproc.json b/deeppavlov/configs/classifiers/sentiment_twitter_preproc.json index b62ddc7cad..2d6a250958 100644 --- a/deeppavlov/configs/classifiers/sentiment_twitter_preproc.json +++ b/deeppavlov/configs/classifiers/sentiment_twitter_preproc.json @@ -141,10 +141,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_twitter_v7" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/sentiment_twitter_data.tar.gz", diff --git a/deeppavlov/configs/classifiers/sentiment_yelp_conv_bert.json b/deeppavlov/configs/classifiers/sentiment_yelp_conv_bert.json index f0b9b8a045..f1b1a40561 100644 --- a/deeppavlov/configs/classifiers/sentiment_yelp_conv_bert.json +++ b/deeppavlov/configs/classifiers/sentiment_yelp_conv_bert.json @@ -135,10 +135,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_yelp_bert_v2" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/yelp_review_full_csv.tar.gz", diff --git a/deeppavlov/configs/classifiers/sentiment_yelp_multi_bert.json b/deeppavlov/configs/classifiers/sentiment_yelp_multi_bert.json index 630319f160..d18dab7b05 100644 --- a/deeppavlov/configs/classifiers/sentiment_yelp_multi_bert.json +++ b/deeppavlov/configs/classifiers/sentiment_yelp_multi_bert.json @@ -135,10 +135,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_yelp_bert_v1" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/yelp_review_full_csv.tar.gz", diff --git a/deeppavlov/configs/classifiers/sst_torch_swcnn.json b/deeppavlov/configs/classifiers/sst_torch_swcnn.json index 8318dff980..9709d333fd 100644 --- a/deeppavlov/configs/classifiers/sst_torch_swcnn.json +++ b/deeppavlov/configs/classifiers/sst_torch_swcnn.json @@ -134,11 +134,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/sst_torch_v0" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", diff --git a/deeppavlov/configs/classifiers/topic_ag_news.json b/deeppavlov/configs/classifiers/topic_ag_news.json index 93152e3491..0e56578b55 100644 --- a/deeppavlov/configs/classifiers/topic_ag_news.json +++ b/deeppavlov/configs/classifiers/topic_ag_news.json @@ -136,10 +136,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/topic_ag_news_v3" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/ag_news_data.tar.gz", diff --git a/deeppavlov/configs/classifiers/yahoo_convers_vs_info.json b/deeppavlov/configs/classifiers/yahoo_convers_vs_info.json index 8542aa9f72..ec1cd427b0 100644 --- a/deeppavlov/configs/classifiers/yahoo_convers_vs_info.json +++ b/deeppavlov/configs/classifiers/yahoo_convers_vs_info.json @@ -153,10 +153,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/yahoo_convers_vs_info_v2" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/embeddings/yahooo-sber-questions_epoches_n_15.tar.gz", diff --git a/deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json b/deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json index 04dd679783..7b0e79994d 100644 --- a/deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json +++ b/deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json @@ -146,10 +146,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/yahoo_convers_vs_info_v3" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/conversational_cased_L-12_H-768_A-12.tar.gz", diff --git a/deeppavlov/configs/doc_retrieval/en_ranker_pop_enwiki20180211.json b/deeppavlov/configs/doc_retrieval/en_ranker_pop_enwiki20180211.json index 8d072fbf39..20402495c1 100644 --- a/deeppavlov/configs/doc_retrieval/en_ranker_pop_enwiki20180211.json +++ b/deeppavlov/configs/doc_retrieval/en_ranker_pop_enwiki20180211.json @@ -74,10 +74,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/wikipedia/enwiki.tar.gz", diff --git a/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_enwiki20161221.json b/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_enwiki20161221.json index 7635c89ded..4fe8f63780 100644 --- a/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_enwiki20161221.json +++ b/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_enwiki20161221.json @@ -66,10 +66,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/wikipedia/enwiki20161221.tar.gz", diff --git a/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_wiki.json b/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_wiki.json index ce3c2f7dac..b29b1fc7fb 100644 --- a/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_wiki.json +++ b/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_wiki.json @@ -68,10 +68,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/wikipedia/enwiki.tar.gz", diff --git a/deeppavlov/configs/doc_retrieval/ru_ranker_tfidf_wiki.json b/deeppavlov/configs/doc_retrieval/ru_ranker_tfidf_wiki.json index 629152ad6c..1ba5da819f 100644 --- a/deeppavlov/configs/doc_retrieval/ru_ranker_tfidf_wiki.json +++ b/deeppavlov/configs/doc_retrieval/ru_ranker_tfidf_wiki.json @@ -66,7 +66,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [], "download": [ { "url": "http://files.deeppavlov.ai/datasets/wikipedia/ruwiki.tar.gz", diff --git a/deeppavlov/configs/ecommerce_skill/bleu_retrieve.json b/deeppavlov/configs/ecommerce_skill/bleu_retrieve.json deleted file mode 100644 index e754becbfe..0000000000 --- a/deeppavlov/configs/ecommerce_skill/bleu_retrieve.json +++ /dev/null @@ -1,101 +0,0 @@ -{ - "dataset_reader": { - "class_name": "amazon_ecommerce_reader", - "data_path": "{DOWNLOADS_PATH}/amazon_ecommerce", - "catalog": "Phones" - }, - "dataset_iterator": { - "class_name": "data_learning_iterator" - }, - "chainer": { - "in": [ - "query", - "history", - "state" - ], - "out": [ - "response", - "confidence", - "state" - ], - "pipe": [ - { - "class_name": "ecommerce_skill_bleu", - "in": [ - "query", - "history", - "state" - ], - "fit_on": [ - "y" - ], - "min_similarity": 0.5, - "min_entropy": 0.5, - "entropy_fields": [ - "Size", - "Brand", - "Author", - "Color", - "Genre" - ], - "save_path": "{MODELS_PATH}/ecommerce_skill/ecommerce_Phones_model.pkl", - "load_path": [ - "{MODELS_PATH}/ecommerce_skill/ecommerce_Phones_model.pkl" - ], - "preprocess": { - "class_name": "ecommerce_preprocess", - "spacy_model": "en_core_web_sm", - "disable": [ - "parser" - ] - }, - "out": [ - "response", - "confidence", - "state" - ] - } - ] - }, - "train": { - "evaluation_targets": [], - "class_name": "fit_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], - "download": [ - { - "url": "http://files.deeppavlov.ai/ecommerce_skill/ecommerce_Phones_model.pkl", - "subdir": "{MODELS_PATH}/ecommerce_skill" - }, - { - "url": "http://files.deeppavlov.ai/ecommerce_skill/ecommerce_Books_model.pkl", - "subdir": "{MODELS_PATH}/ecommerce_skill" - }, - { - "url": "http://files.deeppavlov.ai/ecommerce_skill/ecommerce_Movies_model.pkl", - "subdir": "{MODELS_PATH}/ecommerce_skill" - }, - { - "url": "http://files.deeppavlov.ai/ecommerce_skill/ecommerce_Electronics_model.pkl", - "subdir": "{MODELS_PATH}/ecommerce_skill" - }, - { - "url": "http://files.deeppavlov.ai/ecommerce_skill/ecommerce_Home_model.pkl", - "subdir": "{MODELS_PATH}/ecommerce_skill" - }, - { - "url": "http://files.deeppavlov.ai/ecommerce_skill/ecommerce_Automotive_model.pkl", - "subdir": "{MODELS_PATH}/ecommerce_skill" - } - ] - } -} diff --git a/deeppavlov/configs/ecommerce_skill/tfidf_retrieve.json b/deeppavlov/configs/ecommerce_skill/tfidf_retrieve.json deleted file mode 100644 index 72b9632abe..0000000000 --- a/deeppavlov/configs/ecommerce_skill/tfidf_retrieve.json +++ /dev/null @@ -1,116 +0,0 @@ -{ - "dataset_reader": { - "class_name": "amazon_ecommerce_reader", - "data_path": "{DOWNLOADS_PATH}/amazon_ecommerce", - "catalog": [ - "Phones", - "Home", - "Automotive", - "Electronics", - "Movies", - "Books" - ] - }, - "dataset_iterator": { - "class_name": "data_learning_iterator" - }, - "chainer": { - "in": [ - "query", - "history", - "state" - ], - "out": [ - "response", - "confidence", - "state" - ], - "pipe": [ - { - "class_name": "stream_spacy_tokenizer", - "in": "query", - "id": "my_tokenizer", - "lemmas": true, - "alphas_only": false, - "out": "q_token_lemmas" - }, - { - "ref": "my_tokenizer", - "in": "q_token_lemmas", - "out": "q_lem" - }, - { - "in": [ - "q_lem" - ], - "out": [ - "q_vect" - ], - "fit_on": [ - "q_lem" - ], - "id": "tfidf_vec", - "class_name": "sklearn_component", - "save_path": "{MODELS_PATH}/ecommerce_skill/tfidf_vectorizer_ecommerce.pkl", - "load_path": "{MODELS_PATH}/ecommerce_skill/tfidf_vectorizer_ecommerce.pkl", - "model_class": "sklearn.feature_extraction.text:TfidfVectorizer", - "infer_method": "transform", - "analyzer": "word", - "use_idf": true - }, - { - "class_name": "ecommerce_skill_tfidf", - "in": [ - "q_vect", - "history", - "state" - ], - "fit_on": [ - "y", - "q_vect" - ], - "min_similarity": 0.5, - "min_entropy": 0.5, - "entropy_fields": [ - "Size", - "Brand", - "Author", - "Color", - "Genre" - ], - "save_path": "{MODELS_PATH}/ecommerce_skill/ecommerce_tfidf_model.pkl", - "load_path": "{MODELS_PATH}/ecommerce_skill/ecommerce_tfidf_model.pkl", - "out": [ - "response", - "confidence", - "state" - ] - } - ] - }, - "train": { - "evaluation_targets": [], - "class_name": "fit_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], - "download": [ - { - "url": "http://files.deeppavlov.ai/ecommerce_skill/ecommerce_tfidf_model.pkl", - "subdir": "{MODELS_PATH}/ecommerce_skill" - }, - { - "url": "http://files.deeppavlov.ai/ecommerce_skill/tfidf_vectorizer_ecommerce.pkl", - "subdir": "{MODELS_PATH}/ecommerce_skill" - } - ] - } -} diff --git a/deeppavlov/configs/elmo/elmo_1b_benchmark.json b/deeppavlov/configs/elmo/elmo_1b_benchmark.json index 972b01d187..806272b771 100644 --- a/deeppavlov/configs/elmo/elmo_1b_benchmark.json +++ b/deeppavlov/configs/elmo/elmo_1b_benchmark.json @@ -67,10 +67,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/1-billion-word-language-modeling-benchmark-r13output.tar.gz", diff --git a/deeppavlov/configs/elmo/elmo_1b_benchmark_test.json b/deeppavlov/configs/elmo/elmo_1b_benchmark_test.json index d43ae1f7be..15af5b02ae 100644 --- a/deeppavlov/configs/elmo/elmo_1b_benchmark_test.json +++ b/deeppavlov/configs/elmo/elmo_1b_benchmark_test.json @@ -69,10 +69,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo-1b-benchmark_test.tar.gz", diff --git a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_news.json b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_news.json index d0006595dc..ecaa2afb39 100644 --- a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_news.json +++ b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_news.json @@ -69,10 +69,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo-lm-ready4fine-example-data.tar.gz", diff --git a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_news_simple.json b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_news_simple.json index a70c937824..f7a95f1238 100644 --- a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_news_simple.json +++ b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_news_simple.json @@ -69,10 +69,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo-lm-ready4fine-example-data.tar.gz", diff --git a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_twitter.json b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_twitter.json index 106b83a1fd..9a4a2f9007 100644 --- a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_twitter.json +++ b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_twitter.json @@ -69,10 +69,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo-lm-ready4fine-example-data.tar.gz", diff --git a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_twitter_simple.json b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_twitter_simple.json index c12253b4ed..6ffd491f07 100644 --- a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_twitter_simple.json +++ b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_twitter_simple.json @@ -69,10 +69,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo-lm-ready4fine-example-data.tar.gz", diff --git a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_wiki.json b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_wiki.json index d12e849f1a..c44e850215 100644 --- a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_wiki.json +++ b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_wiki.json @@ -69,10 +69,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo-lm-ready4fine-example-data.tar.gz", diff --git a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_wiki_simple.json b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_wiki_simple.json index cbf5d7e41b..c4188744e4 100644 --- a/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_wiki_simple.json +++ b/deeppavlov/configs/elmo/elmo_lm_ready4fine_tuning_ru_wiki_simple.json @@ -69,10 +69,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo-lm-ready4fine-example-data.tar.gz", diff --git a/deeppavlov/configs/elmo/elmo_paraphraser_fine_tuning.json b/deeppavlov/configs/elmo/elmo_paraphraser_fine_tuning.json index 78296864e7..fce6382ffd 100644 --- a/deeppavlov/configs/elmo/elmo_paraphraser_fine_tuning.json +++ b/deeppavlov/configs/elmo/elmo_paraphraser_fine_tuning.json @@ -70,10 +70,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/paraphraser_train_and_pretrain_texts.tar.gz", diff --git a/deeppavlov/configs/embedder/bert_embedder.json b/deeppavlov/configs/embedder/bert_embedder.json index 99282cf7f3..c2cbf59a18 100644 --- a/deeppavlov/configs/embedder/bert_embedder.json +++ b/deeppavlov/configs/embedder/bert_embedder.json @@ -28,10 +28,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12_pt" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/transformers.txt", - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt" - ], "labels": {}, "download": [ { diff --git a/deeppavlov/configs/embedder/bert_sentence_embedder.json b/deeppavlov/configs/embedder/bert_sentence_embedder.json index 3ea12b3cfc..348616ae27 100644 --- a/deeppavlov/configs/embedder/bert_sentence_embedder.json +++ b/deeppavlov/configs/embedder/bert_sentence_embedder.json @@ -28,10 +28,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/sentence_multi_cased_L-12_H-768_A-12_pt" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/transformers.txt", - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt" - ], "labels": {}, "download": [ { diff --git a/deeppavlov/configs/embedder/elmo_en_1billion.json b/deeppavlov/configs/embedder/elmo_en_1billion.json index 615e073c7a..c79d4908af 100644 --- a/deeppavlov/configs/embedder/elmo_en_1billion.json +++ b/deeppavlov/configs/embedder/elmo_en_1billion.json @@ -31,10 +31,6 @@ "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ] + } } } diff --git a/deeppavlov/configs/embedder/elmo_ru_news.json b/deeppavlov/configs/embedder/elmo_ru_news.json index c3bdb312a8..86d78bfe52 100644 --- a/deeppavlov/configs/embedder/elmo_ru_news.json +++ b/deeppavlov/configs/embedder/elmo_ru_news.json @@ -32,10 +32,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo_ru-news_wmt11-16_1.5M_steps.tar.gz", diff --git a/deeppavlov/configs/embedder/elmo_ru_twitter.json b/deeppavlov/configs/embedder/elmo_ru_twitter.json index 2476e934e5..df4c6013d4 100644 --- a/deeppavlov/configs/embedder/elmo_ru_twitter.json +++ b/deeppavlov/configs/embedder/elmo_ru_twitter.json @@ -32,10 +32,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo_ru-twitter_2013-01_2018-04_600k_steps.tar.gz", diff --git a/deeppavlov/configs/embedder/elmo_ru_wiki.json b/deeppavlov/configs/embedder/elmo_ru_wiki.json index 5a70bf630d..f234430e6f 100644 --- a/deeppavlov/configs/embedder/elmo_ru_wiki.json +++ b/deeppavlov/configs/embedder/elmo_ru_wiki.json @@ -32,10 +32,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo_ru-wiki_600k_steps.tar.gz", diff --git a/deeppavlov/configs/evolution/evolve_intents_snips.json b/deeppavlov/configs/evolution/evolve_intents_snips.json deleted file mode 100644 index d6ee797560..0000000000 --- a/deeppavlov/configs/evolution/evolve_intents_snips.json +++ /dev/null @@ -1,204 +0,0 @@ -{ - "dataset_reader": { - "class_name": "snips_reader", - "x": "text", - "y": "intents", - "data_path": "{DOWNLOADS_PATH}/snips" - }, - "dataset_iterator": { - "class_name": "snips_intents_iterator", - "seed": { - "evolve_range": [ - 50, - 500 - ], - "discrete": true - } - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "x", - "out": "x_lower", - "class_name": "str_lower" - }, - { - "in": "x_lower", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "id": "my_one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 1, - 2, - 3 - ], - "filters_cnn": { - "evolve_range": [ - 50, - 100 - ], - "discrete": true - }, - "optimizer": "Adam", - "learning_rate": { - "evolve_range": [ - 0.0001, - 0.1 - ], - "scale": "log" - }, - "learning_rate_decay": { - "evolve_range": [ - 0.0001, - 0.1 - ], - "scale": "log" - }, - "loss": "categorical_crossentropy", - "coef_reg_cnn": { - "evolve_range": [ - 1e-6, - 1e-3 - ] - }, - "coef_reg_den": { - "evolve_range": [ - 1e-6, - 1e-3 - ] - }, - "dropout_rate": { - "evolve_range": [ - 0.1, - 0.9 - ] - }, - "dense_size": { - "evolve_range": [ - 50, - 100 - ], - "discrete": true - }, - "model_name": "cnn_model", - "check_bool": { - "evolve_bool": true - } - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": { - "evolve_range": [ - 5, - 10 - ], - "discrete": true - }, - "batch_size": { - "evolve_range": [ - 50, - 500 - ], - "discrete": true - }, - "metrics": [ - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "evaluation_targets": [ - "train", - "valid" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_evolution" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} diff --git a/deeppavlov/configs/evolution/evolve_rusentiment_cnn.json b/deeppavlov/configs/evolution/evolve_rusentiment_cnn.json deleted file mode 100644 index 595c0b9785..0000000000 --- a/deeppavlov/configs/evolution/evolve_rusentiment_cnn.json +++ /dev/null @@ -1,213 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "text", - "y": "label", - "data_path": "{DOWNLOADS_PATH}/rusentiment/", - "train": "rusentiment_random_posts.csv", - "test": "rusentiment_test.csv" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42, - "field_to_split": "train", - "split_fields": [ - "train", - "valid" - ], - "split_proportions": [ - 0.9, - 0.1 - ] - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": [ - "x" - ], - "out": [ - "x_prep" - ], - "class_name": "dirty_comments_preprocessor" - }, - { - "in": "x_prep", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize.bin", - "dim": 300, - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 1, - 2, - 3 - ], - "filters_cnn": { - "evolve_range": [ - 50, - 100 - ], - "discrete": true - }, - "optimizer": "Adam", - "learning_rate": { - "evolve_range": [ - 1e-4, - 1e-1 - ] - }, - "learning_rate_decay": { - "evolve_range": [ - 1e-6, - 1e-2 - ] - }, - "loss": "categorical_crossentropy", - "last_layer_activation": "softmax", - "coef_reg_cnn": { - "evolve_range": [ - 1e-6, - 1e-2 - ] - }, - "coef_reg_den": { - "evolve_range": [ - 1e-6, - 1e-2 - ] - }, - "dropout_rate": { - "evolve_range": [ - 0, - 1 - ] - }, - "dense_size": { - "evolve_range": [ - 50, - 100 - ], - "discrete": true - }, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": { - "evolve_range": [ - 50, - 200 - ], - "discrete": true - }, - "metrics": [ - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_evolution" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], - "download": [ - { - "url": "https://github.com/text-machine-lab/rusentiment/raw/master/Dataset/rusentiment_random_posts.csv", - "subdir": "{DOWNLOADS_PATH}/rusentiment" - }, - { - "url": "https://github.com/text-machine-lab/rusentiment/raw/master/Dataset/rusentiment_test.csv", - "subdir": "{DOWNLOADS_PATH}/rusentiment" - } - ] - } -} diff --git a/deeppavlov/configs/faq/fasttext_avg_autofaq.json b/deeppavlov/configs/faq/fasttext_avg_autofaq.json index 3a72491379..59fcb0ac47 100644 --- a/deeppavlov/configs/faq/fasttext_avg_autofaq.json +++ b/deeppavlov/configs/faq/fasttext_avg_autofaq.json @@ -55,9 +55,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/faq/school/fasttext_cos_classifier.pkl", diff --git a/deeppavlov/configs/faq/fasttext_tfidf_autofaq.json b/deeppavlov/configs/faq/fasttext_tfidf_autofaq.json index 350716790c..0eb3e112fc 100644 --- a/deeppavlov/configs/faq/fasttext_tfidf_autofaq.json +++ b/deeppavlov/configs/faq/fasttext_tfidf_autofaq.json @@ -90,9 +90,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/faq/school/fasttext_cos_classifier.pkl", diff --git a/deeppavlov/configs/faq/tfidf_logreg_en_faq.json b/deeppavlov/configs/faq/tfidf_logreg_en_faq.json index 04ab1f34d4..6146bbb295 100644 --- a/deeppavlov/configs/faq/tfidf_logreg_en_faq.json +++ b/deeppavlov/configs/faq/tfidf_logreg_en_faq.json @@ -97,10 +97,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/faq/mipt/en_mipt_faq_v4.tar.gz", diff --git a/deeppavlov/configs/go_bot/gobot_dstc2.json b/deeppavlov/configs/go_bot/gobot_dstc2.json index 3380424d2b..2611af6f05 100644 --- a/deeppavlov/configs/go_bot/gobot_dstc2.json +++ b/deeppavlov/configs/go_bot/gobot_dstc2.json @@ -12,7 +12,7 @@ "out": ["y_predicted"], "pipe": [ { - "class_name": "deeppavlov.models.go_bot.wrapper:DialogComponentWrapper", + "class_name": "dialog_component_wrapper", "component": { "class_name": "split_tokenizer" }, "in": ["x"], "out": ["x_tokens"] @@ -107,12 +107,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/gobot_dstc2" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/gobot_dstc2_v9.tar.gz", diff --git a/deeppavlov/configs/go_bot/gobot_dstc2_best.json b/deeppavlov/configs/go_bot/gobot_dstc2_best.json index 4bfeb17a65..b13c680f9e 100644 --- a/deeppavlov/configs/go_bot/gobot_dstc2_best.json +++ b/deeppavlov/configs/go_bot/gobot_dstc2_best.json @@ -12,7 +12,7 @@ "out": ["y_predicted"], "pipe": [ { - "class_name": "deeppavlov.models.go_bot.wrapper:DialogComponentWrapper", + "class_name": "dialog_component_wrapper", "component": { "class_name": "split_tokenizer" }, "in": ["x"], "out": ["x_tokens"] @@ -115,12 +115,6 @@ "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", "DSTC2_DATA_PATH": "{DOWNLOADS_PATH}/dstc2_v3" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/gobot_dstc2_best_v4.tar.gz", diff --git a/deeppavlov/configs/go_bot/gobot_dstc2_best_json_nlg.json b/deeppavlov/configs/go_bot/gobot_dstc2_best_json_nlg.json index 0a11250b1b..19202edb74 100644 --- a/deeppavlov/configs/go_bot/gobot_dstc2_best_json_nlg.json +++ b/deeppavlov/configs/go_bot/gobot_dstc2_best_json_nlg.json @@ -12,7 +12,7 @@ "out": ["y_predicted"], "pipe": [ { - "class_name": "deeppavlov.models.go_bot.wrapper:DialogComponentWrapper", + "class_name": "dialog_component_wrapper", "component": { "class_name": "split_tokenizer" }, "in": ["x"], "out": ["x_tokens"] @@ -115,12 +115,6 @@ "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", "DSTC2_DATA_PATH": "{DOWNLOADS_PATH}/dstc2_v3" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/gobot_dstc2_best_v4.tar.gz", diff --git a/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json b/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json index 4e96e0127b..032b8e05ac 100644 --- a/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json +++ b/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json @@ -12,7 +12,7 @@ "out": ["y_predicted"], "pipe": [ { - "class_name": "deeppavlov.models.go_bot.wrapper:DialogComponentWrapper", + "class_name": "dialog_component_wrapper", "component": { "class_name": "split_tokenizer" }, "in": ["x"], "out": ["x_tokens"] @@ -97,12 +97,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/gobot_dstc2_minimal" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/gobot_dstc2_v9.tar.gz", diff --git a/deeppavlov/configs/go_bot/gobot_md_yaml_minimal.json b/deeppavlov/configs/go_bot/gobot_md_yaml_minimal.json index d6fcc8edcd..ae6a8158ae 100644 --- a/deeppavlov/configs/go_bot/gobot_md_yaml_minimal.json +++ b/deeppavlov/configs/go_bot/gobot_md_yaml_minimal.json @@ -12,7 +12,7 @@ "out": ["y_predicted"], "pipe": [ { - "class_name": "deeppavlov.models.go_bot.wrapper:DialogComponentWrapper", + "class_name": "dialog_component_wrapper", "component": { "class_name": "split_tokenizer" }, "in": ["x"], "out": ["x_tokens"] @@ -98,12 +98,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/gobot_md_yaml_minimal" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt", diff --git a/deeppavlov/configs/go_bot/gobot_simple_dstc2.json b/deeppavlov/configs/go_bot/gobot_simple_dstc2.json index 5842e95d37..52093d0ecb 100644 --- a/deeppavlov/configs/go_bot/gobot_simple_dstc2.json +++ b/deeppavlov/configs/go_bot/gobot_simple_dstc2.json @@ -12,7 +12,7 @@ "out": ["y_predicted"], "pipe": [ { - "class_name": "deeppavlov.models.go_bot.wrapper:DialogComponentWrapper", + "class_name": "dialog_component_wrapper", "component": { "class_name": "split_tokenizer" }, "in": ["x"], "out": ["x_tokens"] @@ -107,12 +107,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/gobot_dstc2" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/gobot_dstc2_v9.tar.gz", diff --git a/deeppavlov/configs/intent_catcher/intent_catcher.json b/deeppavlov/configs/intent_catcher/intent_catcher.json index 684a4b823e..0c527b2774 100644 --- a/deeppavlov/configs/intent_catcher/intent_catcher.json +++ b/deeppavlov/configs/intent_catcher/intent_catcher.json @@ -87,11 +87,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/intent_catcher" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/xeger.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/intent_catcher/intent_catcher.tar.gz", diff --git a/deeppavlov/configs/kbqa/entity_linking_eng.json b/deeppavlov/configs/kbqa/entity_linking_eng.json index 51bdd19b8b..3f337dee3d 100644 --- a/deeppavlov/configs/kbqa/entity_linking_eng.json +++ b/deeppavlov/configs/kbqa/entity_linking_eng.json @@ -67,13 +67,6 @@ "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", "NER_PATH": "{MODELS_PATH}/ner_ontonotes_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/faiss.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt", - "{DEEPPAVLOV_PATH}/requirements/pyinflect.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/kbqa/wikidata/entity_linking_eng.tar.gz", diff --git a/deeppavlov/configs/kbqa/entity_linking_rus.json b/deeppavlov/configs/kbqa/entity_linking_rus.json index 049e346475..8593501860 100644 --- a/deeppavlov/configs/kbqa/entity_linking_rus.json +++ b/deeppavlov/configs/kbqa/entity_linking_rus.json @@ -67,13 +67,6 @@ "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", "NER_PATH": "{MODELS_PATH}/ner_rus_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/faiss.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt", - "{DEEPPAVLOV_PATH}/requirements/pyinflect.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/kbqa/wikidata/entity_linking_rus.tar.gz", diff --git a/deeppavlov/configs/kbqa/kbqa_cq.json b/deeppavlov/configs/kbqa/kbqa_cq.json index c69310cf2a..d87c20aa19 100644 --- a/deeppavlov/configs/kbqa/kbqa_cq.json +++ b/deeppavlov/configs/kbqa/kbqa_cq.json @@ -134,19 +134,6 @@ "NER_PATH": "{MODELS_PATH}/ner_lcquad_ent_and_type", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt", - "{DEEPPAVLOV_PATH}/requirements/faiss.txt", - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/wikihow.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", - "{DEEPPAVLOV_PATH}/requirements/pyinflect.txt" - ], "labels": { "telegram_utils": "NERCoNLL2003Model", "server_utils": "NER" diff --git a/deeppavlov/configs/kbqa/kbqa_cq_bert_ranker.json b/deeppavlov/configs/kbqa/kbqa_cq_bert_ranker.json index 5effb700bd..bc365b78a0 100644 --- a/deeppavlov/configs/kbqa/kbqa_cq_bert_ranker.json +++ b/deeppavlov/configs/kbqa/kbqa_cq_bert_ranker.json @@ -121,19 +121,6 @@ "NER_PATH": "{MODELS_PATH}/ner_lcquad_ent_and_type", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt", - "{DEEPPAVLOV_PATH}/requirements/faiss.txt", - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/wikihow.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", - "{DEEPPAVLOV_PATH}/requirements/pyinflect.txt" - ], "labels": { "telegram_utils": "NERCoNLL2003Model", "server_utils": "NER" diff --git a/deeppavlov/configs/kbqa/kbqa_cq_mt_bert.json b/deeppavlov/configs/kbqa/kbqa_cq_mt_bert.json index 6e3d7de342..e3d5bff600 100644 --- a/deeppavlov/configs/kbqa/kbqa_cq_mt_bert.json +++ b/deeppavlov/configs/kbqa/kbqa_cq_mt_bert.json @@ -219,19 +219,6 @@ "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", "MT_BERT_PATH": "{MODELS_PATH}/mt_bert_kbqa" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt", - "{DEEPPAVLOV_PATH}/requirements/faiss.txt", - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/wikihow.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", - "{DEEPPAVLOV_PATH}/requirements/pyinflect.txt" - ], "labels": { "telegram_utils": "KBQA_MT_BERT_MODEL", "server_utils": "KBQA" diff --git a/deeppavlov/configs/kbqa/kbqa_cq_online.json b/deeppavlov/configs/kbqa/kbqa_cq_online.json index 90752598d1..4918186abd 100644 --- a/deeppavlov/configs/kbqa/kbqa_cq_online.json +++ b/deeppavlov/configs/kbqa/kbqa_cq_online.json @@ -128,19 +128,6 @@ "NER_PATH": "{MODELS_PATH}/ner_lcquad_ent_and_type", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt", - "{DEEPPAVLOV_PATH}/requirements/faiss.txt", - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/wikihow.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", - "{DEEPPAVLOV_PATH}/requirements/pyinflect.txt" - ], "labels": { "telegram_utils": "NERCoNLL2003Model", "server_utils": "NER" diff --git a/deeppavlov/configs/kbqa/kbqa_cq_online_mt_bert.json b/deeppavlov/configs/kbqa/kbqa_cq_online_mt_bert.json index e239d671d6..d1b0095d2c 100644 --- a/deeppavlov/configs/kbqa/kbqa_cq_online_mt_bert.json +++ b/deeppavlov/configs/kbqa/kbqa_cq_online_mt_bert.json @@ -224,19 +224,6 @@ "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", "MT_BERT_PATH": "{MODELS_PATH}/mt_bert_kbqa" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt", - "{DEEPPAVLOV_PATH}/requirements/faiss.txt", - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/wikihow.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", - "{DEEPPAVLOV_PATH}/requirements/pyinflect.txt" - ], "labels": { "telegram_utils": "NERCoNLL2003Model", "server_utils": "NER" diff --git a/deeppavlov/configs/kbqa/kbqa_cq_rus.json b/deeppavlov/configs/kbqa/kbqa_cq_rus.json index fdae81219a..2c62a33206 100644 --- a/deeppavlov/configs/kbqa/kbqa_cq_rus.json +++ b/deeppavlov/configs/kbqa/kbqa_cq_rus.json @@ -159,20 +159,6 @@ "NER_PATH": "{MODELS_PATH}/ner_ent_and_type_rus", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/udpipe.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt", - "{DEEPPAVLOV_PATH}/requirements/faiss.txt", - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/wikihow.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", - "{DEEPPAVLOV_PATH}/requirements/pyinflect.txt" - ], "labels": { "telegram_utils": "NERCoNLL2003Model", "server_utils": "NER" diff --git a/deeppavlov/configs/kbqa/kbqa_cq_sep.json b/deeppavlov/configs/kbqa/kbqa_cq_sep.json index 678dc6a33a..418522958e 100644 --- a/deeppavlov/configs/kbqa/kbqa_cq_sep.json +++ b/deeppavlov/configs/kbqa/kbqa_cq_sep.json @@ -129,19 +129,6 @@ "NER_PATH": "{MODELS_PATH}/ner_lcquad_ent_and_type", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt", - "{DEEPPAVLOV_PATH}/requirements/faiss.txt", - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/wikihow.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", - "{DEEPPAVLOV_PATH}/requirements/pyinflect.txt" - ], "labels": { "telegram_utils": "NERCoNLL2003Model", "server_utils": "NER" diff --git a/deeppavlov/configs/kbqa/kbqa_entity_linking.json b/deeppavlov/configs/kbqa/kbqa_entity_linking.json index f129b10918..821c67d9e4 100644 --- a/deeppavlov/configs/kbqa/kbqa_entity_linking.json +++ b/deeppavlov/configs/kbqa/kbqa_entity_linking.json @@ -37,14 +37,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt", - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/pyinflect.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/kbqa/wikidata/kbqa_entity_linking_eng.tar.gz", diff --git a/deeppavlov/configs/kbqa/kbqa_mt_bert_train.json b/deeppavlov/configs/kbqa/kbqa_mt_bert_train.json index fae050d603..7aef5caf22 100644 --- a/deeppavlov/configs/kbqa/kbqa_mt_bert_train.json +++ b/deeppavlov/configs/kbqa/kbqa_mt_bert_train.json @@ -221,13 +221,6 @@ "MT_BERT_PATH": "{MODELS_PATH}/mt_bert_kbqa", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/kbqa/datasets/query_prediction.tar.gz", diff --git a/deeppavlov/configs/kbqa/wiki_parser.json b/deeppavlov/configs/kbqa/wiki_parser.json index b0745d3a86..db94007eb3 100644 --- a/deeppavlov/configs/kbqa/wiki_parser.json +++ b/deeppavlov/configs/kbqa/wiki_parser.json @@ -20,7 +20,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": ["{DEEPPAVLOV_PATH}/requirements/hdt.txt"], "download": [ { "url": "http://files.deeppavlov.ai/kbqa/wikidata/wikidata_compr.pickle", diff --git a/deeppavlov/configs/morpho_tagger/BERT/morpho_ru_syntagrus_bert.json b/deeppavlov/configs/morpho_tagger/BERT/morpho_ru_syntagrus_bert.json index 7a977724cd..e7589c0e9e 100644 --- a/deeppavlov/configs/morpho_tagger/BERT/morpho_ru_syntagrus_bert.json +++ b/deeppavlov/configs/morpho_tagger/BERT/morpho_ru_syntagrus_bert.json @@ -148,11 +148,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1", "WORK_PATH": "{MODELS_PATH}/morpho_ru_syntagrus" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/BERT/morpho_ru_syntagrus_bert.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ar.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ar.json index 80038c4f5c..1abd931d5d 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ar.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ar.json @@ -159,9 +159,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/ar.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_cs.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_cs.json index f83bda3adf..047a08cf24 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_cs.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_cs.json @@ -159,9 +159,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/cs.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_de.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_de.json index 4bdeba1c3f..c0c7aa19f1 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_de.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_de.json @@ -159,9 +159,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/de.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_en.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_en.json index 9f47acd7e2..dd771f8216 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_en.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_en.json @@ -159,9 +159,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/en.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_es_ancora.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_es_ancora.json index 35cf73673a..ce6c39f736 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_es_ancora.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_es_ancora.json @@ -159,9 +159,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/es_ancora.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_fr.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_fr.json index 7899039f3f..7c944e807a 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_fr.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_fr.json @@ -159,9 +159,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/fr.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hi.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hi.json index a80c32fea9..ff10e2e4ba 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hi.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hi.json @@ -159,9 +159,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/hi.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hu.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hu.json index 8e7cb96692..6e399a3a43 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hu.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hu.json @@ -159,9 +159,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/hu.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_it.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_it.json index acacc011f4..a84510a2e2 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_it.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_it.json @@ -159,9 +159,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/it.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus.json index fb040d1190..b5ec00c85e 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus.json @@ -159,9 +159,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/ru_syntagrus.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy.json index d0206cdbe7..ef67338faa 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy.json @@ -179,10 +179,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/ru_syntagrus.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy_lemmatize.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy_lemmatize.json index 3b55661a5f..da40a4f2a5 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy_lemmatize.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy_lemmatize.json @@ -187,10 +187,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/ru_syntagrus.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_tr.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_tr.json index 3163670a6c..e7887f1560 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_tr.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_tr.json @@ -160,9 +160,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "RESULTS_PATH": "{ROOT_PATH}/results" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/tr.tar.gz", diff --git a/deeppavlov/configs/nemo/asr.json b/deeppavlov/configs/nemo/asr.json index 19ca6622e3..410e0ac560 100644 --- a/deeppavlov/configs/nemo/asr.json +++ b/deeppavlov/configs/nemo/asr.json @@ -16,10 +16,6 @@ "variables": { "NEMO_PATH": "~/.deeppavlov/models/nemo" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/nemo-pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/quartznet15x5.tar.gz", diff --git a/deeppavlov/configs/nemo/asr_tts.json b/deeppavlov/configs/nemo/asr_tts.json index f71dcec727..8ecc10c304 100644 --- a/deeppavlov/configs/nemo/asr_tts.json +++ b/deeppavlov/configs/nemo/asr_tts.json @@ -34,11 +34,6 @@ "NEMO_PATH": "~/.deeppavlov/models/nemo", "TTS_PATH": "{NEMO_PATH}/tacotron2_waveglow" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/nemo-pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt", - "{DEEPPAVLOV_PATH}/requirements/nemo-tts.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/quartznet15x5.tar.gz", diff --git a/deeppavlov/configs/nemo/tts.json b/deeppavlov/configs/nemo/tts.json index ccbc4d60bd..6cbac9a043 100644 --- a/deeppavlov/configs/nemo/tts.json +++ b/deeppavlov/configs/nemo/tts.json @@ -17,11 +17,6 @@ "NEMO_PATH": "~/.deeppavlov/models/nemo", "TTS_PATH": "{NEMO_PATH}/tacotron2_waveglow" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/nemo-pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt", - "{DEEPPAVLOV_PATH}/requirements/nemo-tts.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/tacotron2_waveglow.tar.gz", diff --git a/deeppavlov/configs/ner/conll2003_m1.json b/deeppavlov/configs/ner/conll2003_m1.json index c9fe2944cc..c792ca336c 100644 --- a/deeppavlov/configs/ner/conll2003_m1.json +++ b/deeppavlov/configs/ner/conll2003_m1.json @@ -134,10 +134,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models/conll2003_m1" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_conll2003_m1.tar.gz", diff --git a/deeppavlov/configs/ner/ner_bert_ent_and_type_rus.json b/deeppavlov/configs/ner/ner_bert_ent_and_type_rus.json index 37a238e661..f7c3f6fcc7 100644 --- a/deeppavlov/configs/ner/ner_bert_ent_and_type_rus.json +++ b/deeppavlov/configs/ner/ner_bert_ent_and_type_rus.json @@ -96,10 +96,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12", "NER_PATH": "{MODELS_PATH}/ner_ent_and_type_rus" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "labels": { "telegram_utils": "NERCoNLL2003Model", "server_utils": "NER" diff --git a/deeppavlov/configs/ner/ner_collection3_m1.json b/deeppavlov/configs/ner/ner_collection3_m1.json index ee0215214c..0662c521cc 100644 --- a/deeppavlov/configs/ner/ner_collection3_m1.json +++ b/deeppavlov/configs/ner/ner_collection3_m1.json @@ -120,10 +120,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models/collection3" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_collection3_m1.tar.gz", diff --git a/deeppavlov/configs/ner/ner_conll2003.json b/deeppavlov/configs/ner/ner_conll2003.json index caaa220be7..25510db208 100644 --- a/deeppavlov/configs/ner/ner_conll2003.json +++ b/deeppavlov/configs/ner/ner_conll2003.json @@ -163,10 +163,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "NER_PATH": "{MODELS_PATH}/ner_conll2003" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_conll2003_v5.tar.gz", diff --git a/deeppavlov/configs/ner/ner_conll2003_bert.json b/deeppavlov/configs/ner/ner_conll2003_bert.json index c98af28a85..2314b8b875 100644 --- a/deeppavlov/configs/ner/ner_conll2003_bert.json +++ b/deeppavlov/configs/ner/ner_conll2003_bert.json @@ -98,10 +98,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12", "NER_PATH": "{MODELS_PATH}/ner_conll2003_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_conll2003_bert_v1.tar.gz", diff --git a/deeppavlov/configs/ner/ner_conll2003_pos.json b/deeppavlov/configs/ner/ner_conll2003_pos.json index 63b27a1810..3ddd6ab55d 100644 --- a/deeppavlov/configs/ner/ner_conll2003_pos.json +++ b/deeppavlov/configs/ner/ner_conll2003_pos.json @@ -179,10 +179,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt", diff --git a/deeppavlov/configs/ner/ner_conll2003_torch_bert.json b/deeppavlov/configs/ner/ner_conll2003_torch_bert.json index fdc289ffc1..c7df510000 100644 --- a/deeppavlov/configs/ner/ner_conll2003_torch_bert.json +++ b/deeppavlov/configs/ner/ner_conll2003_torch_bert.json @@ -17,8 +17,8 @@ ], "pipe": [ { - "class_name": "torch_bert_ner_preprocessor", - "vocab_file": "bert-base-cased", + "class_name": "torch_transformers_ner_preprocessor", + "vocab_file": "{TRANSFORMER}", "do_lower_case": false, "max_seq_length": 512, "max_subword_length": 15, @@ -41,8 +41,8 @@ "O" ], "pad_with_zeros": true, - "save_path": "{NER_PATH}/tag.dict", - "load_path": "{NER_PATH}/tag.dict", + "save_path": "{MODEL_PATH}/tag.dict", + "load_path": "{MODEL_PATH}/tag.dict", "fit_on": [ "y" ], @@ -54,9 +54,9 @@ ] }, { - "class_name": "torch_bert_sequence_tagger", + "class_name": "torch_transformers_sequence_tagger", "n_tags": "#tag_vocab.len", - "pretrained_bert": "bert-base-cased", + "pretrained_bert": "{TRANSFORMER}", "attention_probs_keep_prob": 0.5, "return_probas": false, "encoder_layer_ids": [ @@ -77,8 +77,8 @@ "learning_rate_drop_patience": 30, "learning_rate_drop_div": 1.5, "load_before_drop": true, - "save_path": "{NER_PATH}/model", - "load_path": "{NER_PATH}/model", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", "in": [ "x_subword_tok_ids", "attention_mask", @@ -142,15 +142,12 @@ "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models", - "NER_PATH": "{MODELS_PATH}/ner_conll2003_torch_bert" + "TRANSFORMER": "bert-base-uncased", + "MODEL_PATH": "{MODELS_PATH}/ner_conll2003_torch_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ], "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_conll2003_torch_bert_v0.tar.gz", + "url": "http://files.deeppavlov.ai/v1/ner/ner_conll2003_torch_bert.tar.gz", "subdir": "{MODELS_PATH}" } ] diff --git a/deeppavlov/configs/ner/ner_dstc2.json b/deeppavlov/configs/ner/ner_dstc2.json index fbe958f664..4f35d4b530 100644 --- a/deeppavlov/configs/ner/ner_dstc2.json +++ b/deeppavlov/configs/ner/ner_dstc2.json @@ -112,9 +112,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/slotfill_dstc2" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/dstc_slot_vals.tar.gz", diff --git a/deeppavlov/configs/ner/ner_few_shot_ru.json b/deeppavlov/configs/ner/ner_few_shot_ru.json index 393f440b03..ad60b46567 100644 --- a/deeppavlov/configs/ner/ner_few_shot_ru.json +++ b/deeppavlov/configs/ner/ner_few_shot_ru.json @@ -94,11 +94,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo_ru-news_wmt11-16_1.5M_steps.tar.gz", diff --git a/deeppavlov/configs/ner/ner_few_shot_ru_simulate.json b/deeppavlov/configs/ner/ner_few_shot_ru_simulate.json index 5c880357d5..cb58707224 100644 --- a/deeppavlov/configs/ner/ner_few_shot_ru_simulate.json +++ b/deeppavlov/configs/ner/ner_few_shot_ru_simulate.json @@ -130,11 +130,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo_ru-news_wmt11-16_1.5M_steps.tar.gz", diff --git a/deeppavlov/configs/ner/ner_kb_rus.json b/deeppavlov/configs/ner/ner_kb_rus.json index f54c9063a5..1bef6b87b1 100644 --- a/deeppavlov/configs/ner/ner_kb_rus.json +++ b/deeppavlov/configs/ner/ner_kb_rus.json @@ -145,10 +145,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/kbqa_mix_lowercase" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_kb_rus.tar.gz", diff --git a/deeppavlov/configs/ner/ner_lcquad_bert_ent_and_type.json b/deeppavlov/configs/ner/ner_lcquad_bert_ent_and_type.json index cc206fea6c..0010f48b4f 100644 --- a/deeppavlov/configs/ner/ner_lcquad_bert_ent_and_type.json +++ b/deeppavlov/configs/ner/ner_lcquad_bert_ent_and_type.json @@ -96,10 +96,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models_kbqa/cased_L-12_H-768_A-12", "NER_PATH": "{MODELS_PATH}/ner_lcquad_ent_and_type" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "labels": { "telegram_utils": "NERCoNLL2003Model", "server_utils": "NER" diff --git a/deeppavlov/configs/ner/ner_lcquad_bert_probas.json b/deeppavlov/configs/ner/ner_lcquad_bert_probas.json index 00a2a606e8..60c4febd57 100644 --- a/deeppavlov/configs/ner/ner_lcquad_bert_probas.json +++ b/deeppavlov/configs/ner/ner_lcquad_bert_probas.json @@ -96,10 +96,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12", "NER_PATH": "{MODELS_PATH}/ner_lcquad" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "labels": { "telegram_utils": "NERCoNLL2003Model", "server_utils": "NER" diff --git a/deeppavlov/configs/ner/ner_ontonotes.json b/deeppavlov/configs/ner/ner_ontonotes.json index df65ff66fd..ca0827eacf 100644 --- a/deeppavlov/configs/ner/ner_ontonotes.json +++ b/deeppavlov/configs/ner/ner_ontonotes.json @@ -151,10 +151,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/ner_ontonotes" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_ontonotes_v3_cpu_compatible.tar.gz", diff --git a/deeppavlov/configs/ner/ner_ontonotes_bert.json b/deeppavlov/configs/ner/ner_ontonotes_bert.json index a0a9595f16..7b67e977f0 100644 --- a/deeppavlov/configs/ner/ner_ontonotes_bert.json +++ b/deeppavlov/configs/ner/ner_ontonotes_bert.json @@ -98,10 +98,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12", "NER_PATH": "{MODELS_PATH}/ner_ontonotes_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { diff --git a/deeppavlov/configs/ner/ner_ontonotes_bert_emb.json b/deeppavlov/configs/ner/ner_ontonotes_bert_emb.json index 2319436e6c..513af21f5f 100644 --- a/deeppavlov/configs/ner/ner_ontonotes_bert_emb.json +++ b/deeppavlov/configs/ner/ner_ontonotes_bert_emb.json @@ -112,10 +112,6 @@ "MODEL_PATH": "{MODELS_PATH}/ner_ontonotes_bert_emb", "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12_pt" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12_pt.tar.gz", diff --git a/deeppavlov/configs/ner/ner_ontonotes_bert_mult.json b/deeppavlov/configs/ner/ner_ontonotes_bert_mult.json index a596a7599d..da6138d1a2 100644 --- a/deeppavlov/configs/ner/ner_ontonotes_bert_mult.json +++ b/deeppavlov/configs/ner/ner_ontonotes_bert_mult.json @@ -98,10 +98,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12", "NER_PATH": "{MODELS_PATH}/ner_ontonotes_bert_mult" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { diff --git a/deeppavlov/configs/ner/ner_ontonotes_bert_probas.json b/deeppavlov/configs/ner/ner_ontonotes_bert_probas.json index f58d66c31e..9b1912fdbb 100644 --- a/deeppavlov/configs/ner/ner_ontonotes_bert_probas.json +++ b/deeppavlov/configs/ner/ner_ontonotes_bert_probas.json @@ -93,10 +93,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12", "NER_PATH": "{MODELS_PATH}/ner_ontonotes_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_ontonotes_bert_v1.tar.gz", diff --git a/deeppavlov/configs/ner/ner_ontonotes_m1.json b/deeppavlov/configs/ner/ner_ontonotes_m1.json index 61262379bb..6e4e85d66e 100644 --- a/deeppavlov/configs/ner/ner_ontonotes_m1.json +++ b/deeppavlov/configs/ner/ner_ontonotes_m1.json @@ -117,10 +117,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODEL_PATH": "{ROOT_PATH}/models/ontonotes" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_ontonotes_m1.tar.gz", diff --git a/deeppavlov/configs/ner/ner_rus.json b/deeppavlov/configs/ner/ner_rus.json index bf7b3771e0..b6546f706a 100644 --- a/deeppavlov/configs/ner/ner_rus.json +++ b/deeppavlov/configs/ner/ner_rus.json @@ -163,10 +163,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "NER_PATH": "{MODELS_PATH}/ner_rus" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_rus_v3_cpu_compatible.tar.gz", diff --git a/deeppavlov/configs/ner/ner_rus_bert.json b/deeppavlov/configs/ner/ner_rus_bert.json index 620d42d7aa..9a00116886 100644 --- a/deeppavlov/configs/ner/ner_rus_bert.json +++ b/deeppavlov/configs/ner/ner_rus_bert.json @@ -98,10 +98,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1", "NER_PATH": "{MODELS_PATH}/ner_rus_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_rus_bert_v1.tar.gz", diff --git a/deeppavlov/configs/ner/ner_rus_bert_probas.json b/deeppavlov/configs/ner/ner_rus_bert_probas.json index 1972d1c0db..8e0189dee1 100644 --- a/deeppavlov/configs/ner/ner_rus_bert_probas.json +++ b/deeppavlov/configs/ner/ner_rus_bert_probas.json @@ -93,10 +93,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1", "NER_PATH": "{MODELS_PATH}/ner_rus_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_rus_bert_v1.tar.gz", diff --git a/deeppavlov/configs/ner/ner_rus_bert_torch.json b/deeppavlov/configs/ner/ner_rus_bert_torch.json new file mode 100644 index 0000000000..0c8e5c568b --- /dev/null +++ b/deeppavlov/configs/ner/ner_rus_bert_torch.json @@ -0,0 +1,155 @@ +{ + "dataset_reader": { + "class_name": "conll2003_reader", + "data_path": "{DOWNLOADS_PATH}/total_rus/", + "dataset_name": "collection_rus", + "provide_pos": false + }, + "dataset_iterator": { + "class_name": "data_learning_iterator" + }, + "chainer": { + "in": [ + "x" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_ner_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": false, + "max_seq_length": 512, + "max_subword_length": 15, + "token_masking_prob": 0.0, + "in": [ + "x" + ], + "out": [ + "x_tokens", + "x_subword_tokens", + "x_subword_tok_ids", + "startofword_markers", + "attention_mask" + ] + }, + { + "id": "tag_vocab", + "class_name": "simple_vocab", + "unk_token": [ + "O" + ], + "pad_with_zeros": true, + "save_path": "{MODEL_PATH}/tag.dict", + "load_path": "{MODEL_PATH}/tag.dict", + "fit_on": [ + "y" + ], + "in": [ + "y" + ], + "out": [ + "y_ind" + ] + }, + { + "class_name": "torch_transformers_sequence_tagger", + "n_tags": "#tag_vocab.len", + "pretrained_bert": "{TRANSFORMER}", + "attention_probs_keep_prob": 0.5, + "return_probas": false, + "encoder_layer_ids": [ + -1 + ], + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2e-5, + "weight_decay": 1e-6, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-6 + }, + "clip_norm": 1.0, + "min_learning_rate": 1e-7, + "learning_rate_drop_patience": 30, + "learning_rate_drop_div": 1.5, + "load_before_drop": true, + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "in": [ + "x_subword_tok_ids", + "attention_mask", + "startofword_markers" + ], + "in_y": [ + "y_ind" + ], + "out": [ + "y_pred_ind" + ] + }, + { + "ref": "tag_vocab", + "in": [ + "y_pred_ind" + ], + "out": [ + "y_pred" + ] + } + ], + "out": [ + "x_tokens", + "y_pred" + ] + }, + "train": { + "epochs": 30, + "batch_size": 10, + "metrics": [ + { + "name": "ner_f1", + "inputs": [ + "y", + "y_pred" + ] + }, + { + "name": "ner_token_f1", + "inputs": [ + "y", + "y_pred" + ] + } + ], + "validation_patience": 100, + "val_every_n_batches": 20, + "log_every_n_batches": 20, + "show_examples": false, + "pytest_max_batches": 2, + "pytest_batch_size": 8, + "evaluation_targets": [ + "valid", + "test" + ], + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "TRANSFORMER": "DeepPavlov/rubert-base-cased", + "MODEL_PATH": "{MODELS_PATH}/ner_rus_bert_torch" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/v1/ner/ner_rus_bert_torch.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/ner/slotfill_dstc2.json b/deeppavlov/configs/ner/slotfill_dstc2.json index bd30951611..e1df2f26fd 100644 --- a/deeppavlov/configs/ner/slotfill_dstc2.json +++ b/deeppavlov/configs/ner/slotfill_dstc2.json @@ -50,10 +50,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/slotfill_dstc2" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/dstc_slot_vals.tar.gz", diff --git a/deeppavlov/configs/ner/slotfill_dstc2_raw.json b/deeppavlov/configs/ner/slotfill_dstc2_raw.json index 1f52e81f00..9138d99c01 100644 --- a/deeppavlov/configs/ner/slotfill_dstc2_raw.json +++ b/deeppavlov/configs/ner/slotfill_dstc2_raw.json @@ -44,9 +44,6 @@ "DATA_PATH": "{ROOT_PATH}/downloads/dstc2", "SLOT_VALS_PATH": "{DATA_PATH}/dstc_slot_vals.json" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/dstc_slot_vals.tar.gz", diff --git a/deeppavlov/configs/ner/slotfill_simple_dstc2_raw.json b/deeppavlov/configs/ner/slotfill_simple_dstc2_raw.json index 1b3389b81d..d6f9750e34 100644 --- a/deeppavlov/configs/ner/slotfill_simple_dstc2_raw.json +++ b/deeppavlov/configs/ner/slotfill_simple_dstc2_raw.json @@ -44,9 +44,6 @@ "DATA_PATH": "{ROOT_PATH}/downloads/simple-dstc2", "SLOT_VALS_PATH": "{DATA_PATH}/dstc_slot_vals.json" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/dstc_slot_vals.tar.gz", diff --git a/deeppavlov/configs/ner/slotfill_simple_rasa_raw.json b/deeppavlov/configs/ner/slotfill_simple_rasa_raw.json index cf483887d5..1365ebe7f4 100644 --- a/deeppavlov/configs/ner/slotfill_simple_rasa_raw.json +++ b/deeppavlov/configs/ner/slotfill_simple_rasa_raw.json @@ -33,9 +33,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "DATA_PATH": "{DOWNLOADS_PATH}/rasa_configs_reader" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/dp_minimal_rasa_demo.tar.gz", diff --git a/deeppavlov/configs/ner/vlsp2016_full.json b/deeppavlov/configs/ner/vlsp2016_full.json index d5b2182f35..ec8d10ffbe 100644 --- a/deeppavlov/configs/ner/vlsp2016_full.json +++ b/deeppavlov/configs/ner/vlsp2016_full.json @@ -156,10 +156,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models/vlsp2016_full" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_vlsp2016_full.tar.gz", diff --git a/deeppavlov/configs/odqa/en_odqa_infer_enwiki20161221.json b/deeppavlov/configs/odqa/en_odqa_infer_enwiki20161221.json index 2247f01731..7b011f13d4 100644 --- a/deeppavlov/configs/odqa/en_odqa_infer_enwiki20161221.json +++ b/deeppavlov/configs/odqa/en_odqa_infer_enwiki20161221.json @@ -63,11 +63,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ ] } diff --git a/deeppavlov/configs/odqa/en_odqa_infer_wiki.json b/deeppavlov/configs/odqa/en_odqa_infer_wiki.json index 6ecb7b407f..dcaee5cf19 100644 --- a/deeppavlov/configs/odqa/en_odqa_infer_wiki.json +++ b/deeppavlov/configs/odqa/en_odqa_infer_wiki.json @@ -45,11 +45,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ ] } diff --git a/deeppavlov/configs/odqa/en_odqa_pop_infer_enwiki20180211.json b/deeppavlov/configs/odqa/en_odqa_pop_infer_enwiki20180211.json index 9d034898fc..82e5730644 100644 --- a/deeppavlov/configs/odqa/en_odqa_pop_infer_enwiki20180211.json +++ b/deeppavlov/configs/odqa/en_odqa_pop_infer_enwiki20180211.json @@ -75,11 +75,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" - ], "download": [ ] } diff --git a/deeppavlov/configs/odqa/ru_odqa_infer_wiki.json b/deeppavlov/configs/odqa/ru_odqa_infer_wiki.json index 5ec716cbea..c84e02125c 100644 --- a/deeppavlov/configs/odqa/ru_odqa_infer_wiki.json +++ b/deeppavlov/configs/odqa/ru_odqa_infer_wiki.json @@ -63,9 +63,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ ] } diff --git a/deeppavlov/configs/odqa/ru_odqa_infer_wiki_retr_noans.json b/deeppavlov/configs/odqa/ru_odqa_infer_wiki_retr_noans.json index 1f553addf3..b9c12d682c 100644 --- a/deeppavlov/configs/odqa/ru_odqa_infer_wiki_retr_noans.json +++ b/deeppavlov/configs/odqa/ru_odqa_infer_wiki_retr_noans.json @@ -63,9 +63,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ ] } diff --git a/deeppavlov/configs/odqa/ru_odqa_infer_wiki_rubert.json b/deeppavlov/configs/odqa/ru_odqa_infer_wiki_rubert.json index 2174dd4de2..934ff0f9a6 100644 --- a/deeppavlov/configs/odqa/ru_odqa_infer_wiki_rubert.json +++ b/deeppavlov/configs/odqa/ru_odqa_infer_wiki_rubert.json @@ -64,10 +64,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ ] } diff --git a/deeppavlov/configs/odqa/ru_odqa_infer_wiki_rubert_noans.json b/deeppavlov/configs/odqa/ru_odqa_infer_wiki_rubert_noans.json index be6a883d58..756e28cb97 100644 --- a/deeppavlov/configs/odqa/ru_odqa_infer_wiki_rubert_noans.json +++ b/deeppavlov/configs/odqa/ru_odqa_infer_wiki_rubert_noans.json @@ -64,10 +64,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ ] } diff --git a/deeppavlov/configs/ranking/paraphrase_ident_elmo_interact.json b/deeppavlov/configs/ranking/paraphrase_ident_elmo_interact.json index 2bfb4727cf..458064f0a7 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_elmo_interact.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_elmo_interact.json @@ -102,10 +102,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/paraphraser_elmo_ft_pre_1_model.tar.gz", diff --git a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser.json b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser.json index 13d20ccdb1..d9cb4c5ec6 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser.json @@ -90,10 +90,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", diff --git a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_elmo.json b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_elmo.json index 1592ab463b..6916a169d9 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_elmo.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_elmo.json @@ -95,10 +95,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", diff --git a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_interact.json b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_interact.json index 7bae82f9b3..4c6e3fa28b 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_interact.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_interact.json @@ -99,10 +99,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/paraphrase_ident_paraphraser.tar.gz", diff --git a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_pretrain.json b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_pretrain.json index 146a9844cb..cc084628ae 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_pretrain.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_pretrain.json @@ -90,10 +90,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/paraphraser_pretrain_train.zip", diff --git a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_tune.json b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_tune.json index 6b535a2452..398c343936 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_tune.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_tune.json @@ -90,10 +90,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", diff --git a/deeppavlov/configs/ranking/paraphrase_ident_qqp.json b/deeppavlov/configs/ranking/paraphrase_ident_qqp.json index ce282512fa..5a6f5bb4f5 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_qqp.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_qqp.json @@ -93,10 +93,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/quora_question_pairs.zip", diff --git a/deeppavlov/configs/ranking/paraphrase_ident_qqp_bilstm.json b/deeppavlov/configs/ranking/paraphrase_ident_qqp_bilstm.json index 14a8a10dbe..53c1f01f97 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_qqp_bilstm.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_qqp_bilstm.json @@ -93,10 +93,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/quora_question_pairs.zip", diff --git a/deeppavlov/configs/ranking/paraphrase_ident_qqp_bilstm_interact.json b/deeppavlov/configs/ranking/paraphrase_ident_qqp_bilstm_interact.json index f9671281a6..de19ee9901 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_qqp_bilstm_interact.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_qqp_bilstm_interact.json @@ -101,10 +101,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/paraphrase_ident_qqp_bilstm.tar.gz", diff --git a/deeppavlov/configs/ranking/paraphrase_ident_qqp_interact.json b/deeppavlov/configs/ranking/paraphrase_ident_qqp_interact.json index 7712935491..626e3bb548 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_qqp_interact.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_qqp_interact.json @@ -101,10 +101,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/paraphrase_ident_qqp_27112020.tar.gz", diff --git a/deeppavlov/configs/ranking/paraphrase_ident_tune_interact.json b/deeppavlov/configs/ranking/paraphrase_ident_tune_interact.json index 853c7f0562..63b287f74e 100644 --- a/deeppavlov/configs/ranking/paraphrase_ident_tune_interact.json +++ b/deeppavlov/configs/ranking/paraphrase_ident_tune_interact.json @@ -99,10 +99,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/paraphrase_ident_paraphraser_tuned.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_default.json b/deeppavlov/configs/ranking/ranking_default.json index 6f674ecaac..8d3ac4f15f 100644 --- a/deeppavlov/configs/ranking/ranking_default.json +++ b/deeppavlov/configs/ranking/ranking_default.json @@ -92,10 +92,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/default_ranking_data.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_default_triplet.json b/deeppavlov/configs/ranking/ranking_default_triplet.json index 7416d58970..fdfeb74621 100644 --- a/deeppavlov/configs/ranking/ranking_default_triplet.json +++ b/deeppavlov/configs/ranking/ranking_default_triplet.json @@ -94,10 +94,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/default_ranking_data_triplet.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_insurance.json b/deeppavlov/configs/ranking/ranking_insurance.json index 56d5158983..bdb6572365 100644 --- a/deeppavlov/configs/ranking/ranking_insurance.json +++ b/deeppavlov/configs/ranking/ranking_insurance.json @@ -97,10 +97,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/insuranceQA-master.zip", diff --git a/deeppavlov/configs/ranking/ranking_insurance_interact.json b/deeppavlov/configs/ranking/ranking_insurance_interact.json index 4c1e2f9671..53a9fc8839 100644 --- a/deeppavlov/configs/ranking/ranking_insurance_interact.json +++ b/deeppavlov/configs/ranking/ranking_insurance_interact.json @@ -106,10 +106,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/insurance_ranking.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_dam.json b/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_dam.json index 33600486eb..fab0f536f1 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_dam.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_dam.json @@ -113,10 +113,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "NUM_CONTEXT_TURNS": 10 }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/gensim.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ubuntu_v1_mt_word2vec_dam.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_dam_transformer.json b/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_dam_transformer.json index f4f381a2d1..429eb5d32d 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_dam_transformer.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_dam_transformer.json @@ -116,11 +116,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "NUM_CONTEXT_TURNS": 10 }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt", - "{DEEPPAVLOV_PATH}/requirements/gensim.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ubuntu_v1_mt_word2vec_dam_transformer.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_smn.json b/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_smn.json index 2864e86337..d2f44feafb 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_smn.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v1_mt_word2vec_smn.json @@ -110,10 +110,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "NUM_CONTEXT_TURNS": 10 }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/gensim.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ubuntu_v1_mt_word2vec_smn.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2.json index 6b738ff847..c71543371e 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2.json @@ -90,10 +90,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/ubuntu_v2_data.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_sep.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_sep.json index 55fae0e257..200ac499bd 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_sep.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_sep.json @@ -54,10 +54,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/ubuntu_v2_uncased_bert_sep_model" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/ubuntu_v2_data.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_sep_interact.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_sep_interact.json index 669ee578ed..8884dcfc24 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_sep_interact.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_sep_interact.json @@ -72,10 +72,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/ubuntu_v2_uncased_bert_sep_predictor_model" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/ubuntu_v2_data.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_uncased.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_uncased.json index 10232ee36c..266fe02630 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_uncased.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2_bert_uncased.json @@ -54,10 +54,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/ubuntu_v2_uncased_bert_model" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/ubuntu_v2_data.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_interact.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_interact.json index d57dafb433..1d7be11e17 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_interact.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2_interact.json @@ -98,10 +98,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ubuntu_v2_ranking.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt.json index add8bdc538..499bd3d3dc 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt.json @@ -93,10 +93,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "NUM_CONTEXT_TURNS": 10 }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/ubuntu_v2_data.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_interact.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_interact.json index 835a30cb57..3ece2399dd 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_interact.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_interact.json @@ -103,10 +103,6 @@ "NUM_CONTEXT_TURNS": 10 }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ubuntu_v2_mt_ranking.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_dam.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_dam.json index 8af5b566d9..24390093f4 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_dam.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_dam.json @@ -113,10 +113,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "NUM_CONTEXT_TURNS": 10 }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/gensim.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ubuntu_v2_mt_word2vec_dam.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_dam_transformer.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_dam_transformer.json index aaaca46106..fb7d8aa31f 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_dam_transformer.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_dam_transformer.json @@ -116,11 +116,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "NUM_CONTEXT_TURNS": 10 }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt", - "{DEEPPAVLOV_PATH}/requirements/gensim.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ubuntu_v2_mt_word2vec_dam_transformer.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_smn.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_smn.json index 4e6eece3a5..e6ef4cdd5e 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_smn.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_word2vec_smn.json @@ -109,10 +109,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "NUM_CONTEXT_TURNS": 10 }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/gensim.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/ubuntu_v2_mt_word2vec_smn.tar.gz", diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_torch_bert_uncased.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_torch_bert_uncased.json index d6254d891d..67d76ed6ef 100644 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_torch_bert_uncased.json +++ b/deeppavlov/configs/ranking/ranking_ubuntu_v2_torch_bert_uncased.json @@ -83,10 +83,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/ubuntu_v2_uncased_torch_bert_model" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/ubuntu_v2_data.tar.gz", diff --git a/deeppavlov/configs/ranking/rel_ranking.json b/deeppavlov/configs/ranking/rel_ranking.json index 6525e299a2..22e7008b14 100644 --- a/deeppavlov/configs/ranking/rel_ranking.json +++ b/deeppavlov/configs/ranking/rel_ranking.json @@ -70,10 +70,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/rel_ranking" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/kbqa/datasets/rel_ranking.tar.gz", diff --git a/deeppavlov/configs/sentence_segmentation/sentseg_dailydialog.json b/deeppavlov/configs/sentence_segmentation/sentseg_dailydialog.json index 60e9ffe573..3e3737ee67 100644 --- a/deeppavlov/configs/sentence_segmentation/sentseg_dailydialog.json +++ b/deeppavlov/configs/sentence_segmentation/sentseg_dailydialog.json @@ -116,10 +116,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models/sentseg_dailydialog" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/gensim.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/sentseg_dailydialog.tar.gz", diff --git a/deeppavlov/configs/seq2seq_go_bot/bot_kvret.json b/deeppavlov/configs/seq2seq_go_bot/bot_kvret.json deleted file mode 100644 index 1498263f90..0000000000 --- a/deeppavlov/configs/seq2seq_go_bot/bot_kvret.json +++ /dev/null @@ -1,143 +0,0 @@ -{ - "dataset_reader": { - "class_name": "kvret_reader", - "data_path": "{DOWNLOADS_PATH}/kvret" - }, - "dataset_iterator": { - "class_name": "kvret_dialog_iterator", - "shuffle": false - }, - "chainer": { - "in": ["x_text", "dialog_id"], - "in_y": ["y_text", "y_domain"], - "out": ["prediction_text"], - "pipe": [ - { - "id": "stream_spacy", - "class_name": "stream_spacy_tokenizer", - "lowercase": true, - "alphas_only": false, - "in": ["x_text"], - "out": ["x_tokens"] - }, - { - "id": "dialog_history", - "class_name": "dialog_state", - "in": ["dialog_id"], - "out": ["history_tokens"] - }, - { - "id": "kb", - "class_name": "knowledge_base", - "tokenizer": "#stream_spacy", - "in": ["dialog_id"], - "out": ["kb_entries"], - "save_path": "{MODELS_PATH}/seq2seq_go_bot/kvret_kb.json", - "load_path": "{MODELS_PATH}/seq2seq_go_bot/kvret_kb.json" - }, - { - "id": "src_token_vocab", - "class_name": "simple_vocab", - "unk_token": "", - "special_tokens": ["", ""], - "save_path": "{MODELS_PATH}/vocabs/kvret_src_tokens.dict", - "load_path": "{MODELS_PATH}/vocabs/kvret_src_tokens.dict" - }, - { - "id": "tgt_token_vocab", - "class_name": "simple_vocab", - "unk_token": "", - "special_tokens": ["", "", ""], - "save_path": "{MODELS_PATH}/vocabs/kvret_tgt_tokens.dict", - "load_path": "{MODELS_PATH}/vocabs/kvret_tgt_tokens.dict" - }, - { - "id": "token_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin" - }, - { - "in": ["x_tokens", "history_tokens", "kb_entries"], - "out": ["prediction_norm_tokens"], - "main": true, - "class_name": "seq2seq_go_bot", - "load_path": "{MODELS_PATH}/seq2seq_go_bot/model", - "save_path": "{MODELS_PATH}/seq2seq_go_bot/model", - "start_of_sequence_token": "", - "end_of_sequence_token": "", - "embedder": "#token_embedder", - "network_parameters": { - "learning_rate": 0.0002, - "dropout_rate": 0.2, - "state_dropout_rate": 0.07, - "beam_width": 1, - "target_start_of_sequence_index": "#tgt_token_vocab.__getitem__('')", - "target_end_of_sequence_index": "#tgt_token_vocab.__getitem__('')", - "source_vocab_size": "#src_token_vocab.__len__()", - "target_vocab_size": "#tgt_token_vocab.__len__()", - "hidden_size": 256, - "kb_attention_hidden_sizes": [64, 32] - }, - "debug": false, - "source_vocab": "#src_token_vocab", - "target_vocab": "#tgt_token_vocab", - "knowledge_base_keys": "#kb.primary_keys" - }, - { - "class_name": "knowledge_base_entity_normalizer", - "denormalize": true, - "in": ["prediction_norm_tokens", "kb_entries"], - "out": ["prediction_tokens"] - }, - { - "ref": "stream_spacy", - "in": ["prediction_tokens"], - "out": ["prediction_text"] - }, - { - "ref": "dialog_history", - "in": ["dialog_id", "x_tokens"], - "out": ["x_history_tokens"] - }, - { - "ref": "dialog_history", - "in": ["dialog_id", "prediction_tokens"], - "out": ["x_pred_history_tokens"] - } - ] - }, - "train": { - "class_name": "fit_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/vocabs.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/seq2seq_go_bot_v2.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} diff --git a/deeppavlov/configs/seq2seq_go_bot/bot_kvret_train.json b/deeppavlov/configs/seq2seq_go_bot/bot_kvret_train.json deleted file mode 100644 index 391cca444d..0000000000 --- a/deeppavlov/configs/seq2seq_go_bot/bot_kvret_train.json +++ /dev/null @@ -1,176 +0,0 @@ -{ - "dataset_reader": { - "class_name": "kvret_reader", - "data_path": "{DOWNLOADS_PATH}/kvret" - }, - "dataset_iterator": { - "class_name": "kvret_dialog_iterator", - "shuffle": false - }, - "chainer": { - "in": ["x_text", "dialog_id", "history", "kb_columns", "kb_items"], - "in_y": ["y_text", "y_domain"], - "out": ["prediction_text"], - "pipe": [ - { - "id": "stream_spacy", - "class_name": "stream_spacy_tokenizer", - "lowercase": true, - "alphas_only": false, - "in": ["x_text"], - "out": ["x_tokens"] - }, - { - "ref": "stream_spacy", - "in": ["y_text"], - "out": ["y_tokens"] - }, - { - "ref": "stream_spacy", - "in": ["history"], - "out": ["history_tokens"] - }, - { - "id": "kb", - "class_name": "knowledge_base", - "fit_on": ["dialog_id", "kb_columns", "kb_items"], - "tokenizer": "#stream_spacy", - "in": ["dialog_id", "kb_columns", "kb_items"], - "out": ["kb_entries"], - "save_path": "{MODELS_PATH}/seq2seq_go_bot/kvret_kb.json", - "load_path": "{MODELS_PATH}/seq2seq_go_bot/kvret_kb.json" - }, - { - "class_name": "knowledge_base_entity_normalizer", - "in": ["y_tokens", "kb_entries"], - "out": ["y_norm_tokens"] - }, - { - "class_name": "knowledge_base_entity_normalizer", - "remove": true, - "in": ["y_tokens", "kb_entries"], - "out": ["y_without_entities_tokens"] - }, - { - "id": "src_token_vocab", - "fit_on": ["x_tokens", "y_tokens"], - "class_name": "simple_vocab", - "min_freq": 2, - "unk_token": "", - "special_tokens": ["", ""], - "save_path": "{MODELS_PATH}/vocabs/kvret_src_tokens.dict", - "load_path": "{MODELS_PATH}/vocabs/kvret_src_tokens.dict" - }, - { - "id": "tgt_token_vocab", - "fit_on": ["y_without_entities_tokens"], - "class_name": "simple_vocab", - "unk_token": "", - "special_tokens": ["", "", ""], - "save_path": "{MODELS_PATH}/vocabs/kvret_tgt_tokens.dict", - "load_path": "{MODELS_PATH}/vocabs/kvret_tgt_tokens.dict" - }, - { - "id": "token_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin" - }, - { - "in": ["x_tokens", "history_tokens", "kb_entries"], - "in_y": ["y_norm_tokens"], - "out": ["prediction_norm_tokens"], - "main": true, - "class_name": "seq2seq_go_bot", - "load_path": "{MODELS_PATH}/seq2seq_go_bot/model", - "save_path": "{MODELS_PATH}/seq2seq_go_bot/model", - "start_of_sequence_token": "", - "end_of_sequence_token": "", - "embedder": "#token_embedder", - "network_parameters": { - "learning_rate": 0.0002, - "dropout_rate": 0.2, - "state_dropout_rate": 0.07, - "beam_width": 1, - "target_start_of_sequence_index": "#tgt_token_vocab.__getitem__('')", - "target_end_of_sequence_index": "#tgt_token_vocab.__getitem__('')", - "source_vocab_size": "#src_token_vocab.__len__()", - "target_vocab_size": "#tgt_token_vocab.__len__()", - "hidden_size": 256, - "kb_attention_hidden_sizes": [64, 32] - }, - "debug": false, - "source_vocab": "#src_token_vocab", - "target_vocab": "#tgt_token_vocab", - "knowledge_base_keys": "#kb.primary_keys" - }, - { - "class_name": "knowledge_base_entity_normalizer", - "denormalize": true, - "in": ["prediction_norm_tokens", "kb_entries"], - "out": ["prediction_tokens"] - }, - { - "ref": "stream_spacy", - "in": ["prediction_tokens"], - "out": ["prediction_text"] - } - ] - }, - "train": { - "epochs": 200, - "batch_size": 16, - - "metrics": [ - { - "name": "google_bleu", - "inputs": ["y_text", "prediction_text"] - }, - { - "name": "bleu", - "inputs": ["y_text", "prediction_text"] - }, - { - "name": "accuracy", - "inputs": ["y_text", "prediction_text"] - } - ], - "validation_patience": 30, - "val_every_n_epochs": 1, - - "log_every_n_batches": -1, - "log_every_n_epochs": 1, - "show_examples": false, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/spacy.txt", - "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" - ], - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/vocabs.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/seq2seq_go_bot_v2.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} diff --git a/deeppavlov/configs/skills/aiml_skill.json b/deeppavlov/configs/skills/aiml_skill.json index a45b80cb1f..5a454fa4da 100644 --- a/deeppavlov/configs/skills/aiml_skill.json +++ b/deeppavlov/configs/skills/aiml_skill.json @@ -34,9 +34,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/aiml_skill.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/aiml_skill/aiml_scripts.tar.gz", diff --git a/deeppavlov/configs/skills/rasa_skill.json b/deeppavlov/configs/skills/rasa_skill.json index fae1521b0a..22936c660d 100644 --- a/deeppavlov/configs/skills/rasa_skill.json +++ b/deeppavlov/configs/skills/rasa_skill.json @@ -29,10 +29,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "PROJECT_ROOT": "{DOWNLOADS_PATH}/rasa_tutorial_project" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/rasa_skill.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/rasa_skill/rasa_tutorial_project.tar.gz", diff --git a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json index 75cc4fb8d4..d24b70d8e4 100644 --- a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json +++ b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json @@ -43,7 +43,7 @@ "load_path": "{MODELS_PATH}/error_model/error_model_ru.tsv" }, { - "class_name": "deeppavlov.models.spelling_correction.electors.kenlm_elector:KenlmElector", + "class_name": "kenlm_elector", "in": ["tokens_candidates"], "out": ["y_predicted_tokens"], "load_path": "{DOWNLOADS_PATH}/language_models/ru_wiyalen_no_punkt.arpa.binary" @@ -68,10 +68,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/kenlm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/error_model.tar.gz", diff --git a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json index e5fe011ddb..d35561b812 100644 --- a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json +++ b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json @@ -45,7 +45,7 @@ "load_path": "{MODELS_PATH}/error_model/error_model_ru.tsv" }, { - "class_name": "deeppavlov.models.spelling_correction.electors.kenlm_elector:KenlmElector", + "class_name": "kenlm_electorr", "in": ["tokens_candidates"], "out": ["y_predicted_tokens"], "load_path": "{DOWNLOADS_PATH}/language_models/ru_wiyalen_no_punkt.arpa.binary" @@ -70,10 +70,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/kenlm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/error_model.tar.gz", diff --git a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json index 9c93e92e30..6aa7de9c85 100644 --- a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json +++ b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json @@ -43,7 +43,7 @@ "load_path": "{MODELS_PATH}/error_model/error_model_ru.tsv" }, { - "class_name": "deeppavlov.models.spelling_correction.electors.top1_elector:TopOneElector", + "class_name": "top1_elector", "in": ["tokens_candidates"], "out": ["y_predicted_tokens"] }, @@ -67,9 +67,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spelling.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/error_model.tar.gz", diff --git a/deeppavlov/configs/spelling_correction/brillmoore_wikitypos_en.json b/deeppavlov/configs/spelling_correction/brillmoore_wikitypos_en.json index e75cc4215a..60ed162888 100644 --- a/deeppavlov/configs/spelling_correction/brillmoore_wikitypos_en.json +++ b/deeppavlov/configs/spelling_correction/brillmoore_wikitypos_en.json @@ -1,10 +1,10 @@ { "dataset_reader": { - "class_name": "deeppavlov.dataset_readers.typos_reader:TyposWikipedia", + "class_name": "typos_wikipedia_reader", "data_path": "{DOWNLOADS_PATH}" }, "dataset_iterator": { - "class_name": "deeppavlov.dataset_iterators.typos_iterator:TyposDatasetIterator", + "class_name": "typos_iterator", "test_ratio": 0.05 }, "chainer":{ @@ -36,13 +36,13 @@ "window": 1, "candidates_count": 4, "dictionary": { - "class_name": "deeppavlov.vocabs.typos:Wiki100KDictionary", + "class_name": "wikitionary_100K_vocab", "data_dir": "{DOWNLOADS_PATH}/vocabs" }, "save_path": "{MODELS_PATH}/error_model/error_model.tsv" }, { - "class_name": "deeppavlov.models.spelling_correction.electors.kenlm_elector:KenlmElector", + "class_name": "kenlm_elector", "in": ["tokens_candidates"], "out": ["y_predicted_tokens"], "load_path": "{DOWNLOADS_PATH}/language_models/en_wiki_no_punkt.arpa.binary" @@ -65,10 +65,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/kenlm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/error_model.tar.gz", diff --git a/deeppavlov/configs/spelling_correction/levenshtein_corrector_ru.json b/deeppavlov/configs/spelling_correction/levenshtein_corrector_ru.json index 7355930b33..8052847209 100644 --- a/deeppavlov/configs/spelling_correction/levenshtein_corrector_ru.json +++ b/deeppavlov/configs/spelling_correction/levenshtein_corrector_ru.json @@ -23,11 +23,11 @@ { "in": ["x_tokens"], "out": ["tokens_candidates"], - "class_name": "deeppavlov.models.spelling_correction.levenshtein.searcher_component:LevenshteinSearcherComponent", + "class_name": "spelling_levenshtein", "words": "#vocab.keys()" }, { - "class_name": "deeppavlov.models.spelling_correction.electors.kenlm_elector:KenlmElector", + "class_name": "kenlm_elector", "in": ["tokens_candidates"], "out": ["y_predicted_tokens"], "load_path": "{DOWNLOADS_PATH}/language_models/ru_wiyalen_no_punkt.arpa.binary" @@ -46,10 +46,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/spelling.txt", - "{DEEPPAVLOV_PATH}/requirements/kenlm.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/vocabs/russian_words_vocab.dict.gz", diff --git a/deeppavlov/configs/squad/multi_squad_noans.json b/deeppavlov/configs/squad/multi_squad_noans.json index 90c9aff905..c423b47036 100644 --- a/deeppavlov/configs/squad/multi_squad_noans.json +++ b/deeppavlov/configs/squad/multi_squad_noans.json @@ -130,9 +130,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/multi_squad_model_noans_1.1.tar.gz", diff --git a/deeppavlov/configs/squad/multi_squad_noans_infer.json b/deeppavlov/configs/squad/multi_squad_noans_infer.json index 00fbfd1f72..99338627e3 100644 --- a/deeppavlov/configs/squad/multi_squad_noans_infer.json +++ b/deeppavlov/configs/squad/multi_squad_noans_infer.json @@ -130,9 +130,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/multi_squad_model_noans_1.1.tar.gz", diff --git a/deeppavlov/configs/squad/multi_squad_retr_noans.json b/deeppavlov/configs/squad/multi_squad_retr_noans.json index e46842ece1..c0fd6cfe7a 100644 --- a/deeppavlov/configs/squad/multi_squad_retr_noans.json +++ b/deeppavlov/configs/squad/multi_squad_retr_noans.json @@ -141,9 +141,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/multi_squad_model_noans_1.1.tar.gz", diff --git a/deeppavlov/configs/squad/multi_squad_ru_retr_noans.json b/deeppavlov/configs/squad/multi_squad_ru_retr_noans.json index 67e5602afd..3b28beb9e6 100644 --- a/deeppavlov/configs/squad/multi_squad_ru_retr_noans.json +++ b/deeppavlov/configs/squad/multi_squad_ru_retr_noans.json @@ -141,9 +141,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/multi_squad_model_ru_1.0.tar.gz", diff --git a/deeppavlov/configs/squad/multi_squad_ru_retr_noans_rubert.json b/deeppavlov/configs/squad/multi_squad_ru_retr_noans_rubert.json index 9009c78f6e..cb78714e49 100644 --- a/deeppavlov/configs/squad/multi_squad_ru_retr_noans_rubert.json +++ b/deeppavlov/configs/squad/multi_squad_ru_retr_noans_rubert.json @@ -92,9 +92,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_v1.tar.gz", diff --git a/deeppavlov/configs/squad/multi_squad_ru_retr_noans_rubert_infer.json b/deeppavlov/configs/squad/multi_squad_ru_retr_noans_rubert_infer.json index 9d25378746..d17891ae5a 100644 --- a/deeppavlov/configs/squad/multi_squad_ru_retr_noans_rubert_infer.json +++ b/deeppavlov/configs/squad/multi_squad_ru_retr_noans_rubert_infer.json @@ -55,10 +55,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_v1.tar.gz", diff --git a/deeppavlov/configs/squad/squad.json b/deeppavlov/configs/squad/squad.json index acf66ffc4e..451b0bb2a9 100644 --- a/deeppavlov/configs/squad/squad.json +++ b/deeppavlov/configs/squad/squad.json @@ -120,9 +120,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_model_1.4_cpu_compatible.tar.gz", diff --git a/deeppavlov/configs/squad/squad_bert.json b/deeppavlov/configs/squad/squad_bert.json index 4cd960d990..18435be6f1 100644 --- a/deeppavlov/configs/squad/squad_bert.json +++ b/deeppavlov/configs/squad/squad_bert.json @@ -90,10 +90,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/cased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/squad/squad_bert_infer.json b/deeppavlov/configs/squad/squad_bert_infer.json index 5f4a1920f3..dcc5747d31 100644 --- a/deeppavlov/configs/squad/squad_bert_infer.json +++ b/deeppavlov/configs/squad/squad_bert_infer.json @@ -61,10 +61,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [{ "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/cased_L-12_H-768_A-12.zip", "subdir": "{DOWNLOADS_PATH}/bert_models" diff --git a/deeppavlov/configs/squad/squad_bert_multilingual_freezed_emb.json b/deeppavlov/configs/squad/squad_bert_multilingual_freezed_emb.json index 5a73bdd4e1..ed3a89c02a 100644 --- a/deeppavlov/configs/squad/squad_bert_multilingual_freezed_emb.json +++ b/deeppavlov/configs/squad/squad_bert_multilingual_freezed_emb.json @@ -51,10 +51,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/squad/squad_bert_uncased.json b/deeppavlov/configs/squad/squad_bert_uncased.json index ff09ca809a..5542458965 100644 --- a/deeppavlov/configs/squad/squad_bert_uncased.json +++ b/deeppavlov/configs/squad/squad_bert_uncased.json @@ -92,10 +92,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/uncased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/squad/squad_ru.json b/deeppavlov/configs/squad/squad_ru.json index 28c1aac323..2d66da3143 100644 --- a/deeppavlov/configs/squad/squad_ru.json +++ b/deeppavlov/configs/squad/squad_ru.json @@ -121,9 +121,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_model_ru_1.4_cpu_compatible.tar.gz", diff --git a/deeppavlov/configs/squad/squad_ru_bert.json b/deeppavlov/configs/squad/squad_ru_bert.json index cdb55121a4..7b105b47ef 100644 --- a/deeppavlov/configs/squad/squad_ru_bert.json +++ b/deeppavlov/configs/squad/squad_ru_bert.json @@ -93,10 +93,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/squad/squad_ru_bert_infer.json b/deeppavlov/configs/squad/squad_ru_bert_infer.json index a6b69cff58..83cc2cdd68 100644 --- a/deeppavlov/configs/squad/squad_ru_bert_infer.json +++ b/deeppavlov/configs/squad/squad_ru_bert_infer.json @@ -63,10 +63,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/squad/squad_ru_rubert.json b/deeppavlov/configs/squad/squad_ru_rubert.json index 3dd1de2d93..e8070409da 100644 --- a/deeppavlov/configs/squad/squad_ru_rubert.json +++ b/deeppavlov/configs/squad/squad_ru_rubert.json @@ -93,10 +93,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_v1.tar.gz", diff --git a/deeppavlov/configs/squad/squad_ru_rubert_infer.json b/deeppavlov/configs/squad/squad_ru_rubert_infer.json index 2999038b64..5ea0c6e3e4 100644 --- a/deeppavlov/configs/squad/squad_ru_rubert_infer.json +++ b/deeppavlov/configs/squad/squad_ru_rubert_infer.json @@ -63,10 +63,6 @@ "MODELS_PATH": "{ROOT_PATH}/models", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_v1.tar.gz", diff --git a/deeppavlov/configs/squad/squad_torch_bert.json b/deeppavlov/configs/squad/squad_torch_bert.json index a2ad3a56fc..64d75f00ee 100644 --- a/deeppavlov/configs/squad/squad_torch_bert.json +++ b/deeppavlov/configs/squad/squad_torch_bert.json @@ -162,10 +162,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models/squad_torch_bert" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_torch_bert_v0.tar.gz", diff --git a/deeppavlov/configs/squad/squad_torch_bert_infer.json b/deeppavlov/configs/squad/squad_torch_bert_infer.json index 034eb86cb4..6a3b054844 100644 --- a/deeppavlov/configs/squad/squad_torch_bert_infer.json +++ b/deeppavlov/configs/squad/squad_torch_bert_infer.json @@ -61,10 +61,6 @@ "MODELS_PATH": "{ROOT_PATH}/models/squad_torch_bert", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_torch_bert_v0.tar.gz", diff --git a/deeppavlov/configs/squad/squad_zh_bert_mult.json b/deeppavlov/configs/squad/squad_zh_bert_mult.json index b50bd89d08..50cac7569f 100644 --- a/deeppavlov/configs/squad/squad_zh_bert_mult.json +++ b/deeppavlov/configs/squad/squad_zh_bert_mult.json @@ -103,11 +103,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/jieba.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/squad/squad_zh_bert_zh.json b/deeppavlov/configs/squad/squad_zh_bert_zh.json index 344a5f216c..5864236cf9 100644 --- a/deeppavlov/configs/squad/squad_zh_bert_zh.json +++ b/deeppavlov/configs/squad/squad_zh_bert_zh.json @@ -103,11 +103,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/jieba.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/chinese_L-12_H-768_A-12.zip", diff --git a/deeppavlov/configs/summarization/bert_as_summarizer.json b/deeppavlov/configs/summarization/bert_as_summarizer.json deleted file mode 100644 index 1cad9c12ee..0000000000 --- a/deeppavlov/configs/summarization/bert_as_summarizer.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "chainer": { - "in": ["texts"], - "pipe": [ - { - "class_name": "bert_as_summarizer", - "bert_config_file": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v2/bert_config.json", - "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v2/bert_model.ckpt", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v2/vocab.txt", - "max_summary_length": 100, - "max_summary_length_in_tokens": true, - "lang": "ru", - "do_lower_case": false, - "max_seq_length": 512, - "in": ["texts"], - "out": ["summarized_text"] - } - ], - "out": ["summarized_text"] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_v2.tar.gz", - "subdir": "{DOWNLOADS_PATH}/bert_models" - } - ] - } - } - - \ No newline at end of file diff --git a/deeppavlov/configs/summarization/bert_as_summarizer_with_init.json b/deeppavlov/configs/summarization/bert_as_summarizer_with_init.json deleted file mode 100644 index 7f85029c64..0000000000 --- a/deeppavlov/configs/summarization/bert_as_summarizer_with_init.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "chainer": { - "in": ["texts", "init_sentences"], - "pipe": [ - { - "class_name": "bert_as_summarizer", - "bert_config_file": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v2/bert_config.json", - "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v2/bert_model.ckpt", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v2/vocab.txt", - "max_summary_length": 100, - "max_summary_length_in_tokens": true, - "lang": "ru", - "do_lower_case": false, - "max_seq_length": 512, - "in": ["texts", "init_sentences"], - "out": ["summarized_text"] - } - ], - "out": ["summarized_text"] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" - ], - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_v2.tar.gz", - "subdir": "{DOWNLOADS_PATH}/bert_models" - } - ] - } - } - - \ No newline at end of file diff --git a/deeppavlov/configs/summarization/torch_bert_as_en_summarizer.json b/deeppavlov/configs/summarization/torch_bert_as_en_summarizer.json deleted file mode 100644 index 05b1db9498..0000000000 --- a/deeppavlov/configs/summarization/torch_bert_as_en_summarizer.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "chainer": { - "in": [ - "texts" - ], - "pipe": [ - { - "class_name": "torch_bert_as_summarizer", - "pretrained_bert": "bert-base-uncased", - "vocab_file": "bert-base-uncased", - "max_summary_length": 100, - "max_summary_length_in_tokens": true, - "lang": "en", - "do_lower_case": false, - "max_seq_length": 512, - "in": [ - "texts" - ], - "out": [ - "summarized_text" - ] - } - ], - "out": [ - "summarized_text" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ] - } -} - - \ No newline at end of file diff --git a/deeppavlov/configs/syntax/ru_syntagrus_joint_parsing.json b/deeppavlov/configs/syntax/ru_syntagrus_joint_parsing.json index ca0c29a1b5..739a09433c 100644 --- a/deeppavlov/configs/syntax/ru_syntagrus_joint_parsing.json +++ b/deeppavlov/configs/syntax/ru_syntagrus_joint_parsing.json @@ -28,12 +28,6 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", "MODELS_PATH": "{ROOT_PATH}/models" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt", - "{DEEPPAVLOV_PATH}/requirements/syntax_parser.txt" - ] + } } -} \ No newline at end of file +} diff --git a/deeppavlov/configs/syntax/syntax_ru_syntagrus_bert.json b/deeppavlov/configs/syntax/syntax_ru_syntagrus_bert.json index 4e7c018f98..86244bee5f 100644 --- a/deeppavlov/configs/syntax/syntax_ru_syntagrus_bert.json +++ b/deeppavlov/configs/syntax/syntax_ru_syntagrus_bert.json @@ -165,11 +165,6 @@ "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1", "WORK_PATH": "{MODELS_PATH}/syntax_ru_syntagrus" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/syntax_parser.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/deeppavlov_data/syntax_parser/syntax_ru_syntagrus_bert.tar.gz", diff --git a/deeppavlov/configs/tutorials/mt_bert/mt_bert_inference_tutorial.json b/deeppavlov/configs/tutorials/mt_bert/mt_bert_inference_tutorial.json index 4f4069a5b0..9cb64a6fd3 100644 --- a/deeppavlov/configs/tutorials/mt_bert/mt_bert_inference_tutorial.json +++ b/deeppavlov/configs/tutorials/mt_bert/mt_bert_inference_tutorial.json @@ -134,13 +134,6 @@ "INSULTS_PATH": "{MT_BERT_PATH}/insults", "SENTIMENT_PATH": "{MT_BERT_PATH}/sentiment", "NER_PATH": "{MT_BERT_PATH}/ner" - }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt" - ] + } } } diff --git a/deeppavlov/configs/tutorials/mt_bert/mt_bert_train_tutorial.json b/deeppavlov/configs/tutorials/mt_bert/mt_bert_train_tutorial.json index d469593475..b6a30ad6e7 100644 --- a/deeppavlov/configs/tutorials/mt_bert/mt_bert_train_tutorial.json +++ b/deeppavlov/configs/tutorials/mt_bert/mt_bert_train_tutorial.json @@ -293,13 +293,6 @@ "SENTIMENT_PATH": "{MT_BERT_PATH}/sentiment", "NER_PATH": "{MT_BERT_PATH}/ner" }, - "requirements": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt", - "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", - "{DEEPPAVLOV_PATH}/requirements/fasttext.txt", - "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", - "{DEEPPAVLOV_PATH}/requirements/hdt.txt" - ], "download": [ { "url": "http://files.deeppavlov.ai/datasets/insults_data.tar.gz", diff --git a/deeppavlov/core/commands/utils.py b/deeppavlov/core/commands/utils.py index 995bbf5988..1543591835 100644 --- a/deeppavlov/core/commands/utils.py +++ b/deeppavlov/core/commands/utils.py @@ -12,10 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +from copy import deepcopy from pathlib import Path from typing import Union, Dict, TypeVar from deeppavlov.core.common.file import read_json, find_config +from deeppavlov.core.common.registry import inverted_registry +from deeppavlov.core.data.utils import get_all_elems_from_json # noinspection PyShadowingBuiltins _T = TypeVar('_T', str, float, bool, list, dict) @@ -59,14 +62,44 @@ def _get_variables_from_config(config: Union[str, Path, dict]): return variables, variables_exact +def _update_requirements(config: dict) -> dict: + """ + Generates requirements for DeepPavlov model and adds them as ``metadata.requirements`` field to the returned dict. + + Searches for the ``class_name`` keys in the passed config at all nesting levels. For each found component, + function looks for dependencies in the requirements registry. Found dependencies are added to the returned copy of + the config as ``metadata.requirements``. If the config already has ``metadata.requirements``, the existing one + is complemented by the found requirements. + + Args: + config: DeepPavlov model config + Returns: + config copy with updated ``metadata.requirements`` field according to the config components. + """ + components = get_all_elems_from_json(config, 'class_name') + components = {inverted_registry.get(component, component) for component in components} + requirements_registry_path = Path(__file__).parents[1] / 'common' / 'requirements_registry.json' + requirements_registry = read_json(requirements_registry_path) + requirements = [] + for component in components: + requirements.extend(requirements_registry.get(component, [])) + requirements.extend(config.get('metadata', {}).get('requirements', [])) + response = deepcopy(config) + response['metadata'] = response.get('metadata', {}) + response['metadata']['requirements'] = list(set(requirements)) + return response + + def parse_config(config: Union[str, Path, dict]) -> dict: """Apply variables' values to all its properties""" if isinstance(config, (str, Path)): config = read_json(find_config(config)) - variables, variables_exact = _get_variables_from_config(config) + updated_config = _update_requirements(config) + + variables, variables_exact = _get_variables_from_config(updated_config) - return _parse_config_property(config, variables, variables_exact) + return _parse_config_property(updated_config, variables, variables_exact) def expand_path(path: Union[str, Path]) -> Path: diff --git a/deeppavlov/core/common/registry.json b/deeppavlov/core/common/registry.json index d4241687d9..30f493b085 100644 --- a/deeppavlov/core/common/registry.json +++ b/deeppavlov/core/common/registry.json @@ -1,13 +1,11 @@ { "UD_pymorphy_lemmatizer": "deeppavlov.models.morpho_tagger.lemmatizer:UDPymorphyLemmatizer", "aiml_skill": "deeppavlov.skills.aiml_skill.aiml_skill:AIMLSkill", - "amazon_ecommerce_reader": "deeppavlov.dataset_readers.amazon_ecommerce_reader:AmazonEcommerceReader", "api_requester": "deeppavlov.models.api_requester.api_requester:ApiRequester", "api_router": "deeppavlov.models.api_requester.api_router:ApiRouter", "base64_decode_bytesIO": "deeppavlov.models.nemo.common:ascii_to_bytes_io", "basic_classification_iterator": "deeppavlov.dataset_iterators.basic_classification_iterator:BasicClassificationDatasetIterator", "basic_classification_reader": "deeppavlov.dataset_readers.basic_classification_reader:BasicClassificationDatasetReader", - "bert_as_summarizer": "deeppavlov.models.bert.bert_as_summarizer:BertAsSummarizer", "bert_classifier": "deeppavlov.models.bert.bert_classifier:BertClassifierModel", "bert_ner_preprocessor": "deeppavlov.models.preprocessors.bert_preprocessor:BertNerPreprocessor", "bert_preprocessor": "deeppavlov.models.preprocessors.bert_preprocessor:BertPreprocessor", @@ -37,10 +35,10 @@ "data_fitting_iterator": "deeppavlov.core.data.data_fitting_iterator:DataFittingIterator", "data_learning_iterator": "deeppavlov.core.data.data_learning_iterator:DataLearningIterator", "dependency_output_prettifier": "deeppavlov.models.morpho_tagger.common:DependencyOutputPrettifier", + "dialog_component_wrapper": "deeppavlov.models.go_bot.wrapper:DialogComponentWrapper", "dialog_db_result_iterator": "deeppavlov.dataset_iterators.dialog_iterator:DialogDBResultDatasetIterator", "dialog_indexing_iterator": "deeppavlov.dataset_iterators.dialog_iterator:DialogDatasetIndexingIterator", "dialog_iterator": "deeppavlov.dataset_iterators.dialog_iterator:DialogDatasetIterator", - "dialog_state": "deeppavlov.models.seq2seq_go_bot.dialog_state:DialogState", "dictionary_vectorizer": "deeppavlov.models.vectorizers.word_vectorizer:DictionaryVectorizer", "dirty_comments_preprocessor": "deeppavlov.models.preprocessors.dirty_comments_preprocessor:DirtyCommentsPreprocessor", "document_chunker": "deeppavlov.models.preprocessors.odqa_preprocessors:DocumentChunker", @@ -48,9 +46,6 @@ "dstc2_ner_iterator": "deeppavlov.dataset_iterators.dstc2_ner_iterator:Dstc2NerDatasetIterator", "dstc2_reader": "deeppavlov.dataset_readers.dstc2_reader:DSTC2DatasetReader", "dstc_slotfilling": "deeppavlov.models.slotfill.slotfill:DstcSlotFillingNetwork", - "ecommerce_preprocess": "deeppavlov.models.preprocessors.ecommerce_preprocess:EcommercePreprocess", - "ecommerce_skill_bleu": "deeppavlov.deprecated.skills.ecommerce_skill.bleu_retrieve:EcommerceSkillBleu", - "ecommerce_skill_tfidf": "deeppavlov.deprecated.skills.ecommerce_skill.tfidf_retrieve:EcommerceSkillTfidf", "elmo_embedder": "deeppavlov.models.embedders.elmo_embedder:ELMoEmbedder", "elmo_file_paths_iterator": "deeppavlov.dataset_iterators.elmo_file_paths_iterator:ELMoFilePathsIterator", "elmo_model": "deeppavlov.models.elmo.elmo:ELMo", @@ -80,8 +75,6 @@ "kbqa_reader": "deeppavlov.dataset_readers.kbqa_reader:KBQAReader", "kenlm_elector": "deeppavlov.models.spelling_correction.electors.kenlm_elector:KenlmElector", "keras_classification_model": "deeppavlov.models.classifiers.keras_classification_model:KerasClassificationModel", - "knowledge_base": "deeppavlov.models.seq2seq_go_bot.kb:KnowledgeBase", - "knowledge_base_entity_normalizer": "deeppavlov.models.seq2seq_go_bot.kb:KnowledgeBaseEntityNormalizer", "kvret_dialog_iterator": "deeppavlov.dataset_iterators.kvret_dialog_iterator:KvretDialogDatasetIterator", "kvret_reader": "deeppavlov.dataset_readers.kvret_reader:KvretDatasetReader", "lazy_tokenizer": "deeppavlov.models.tokenizers.lazy_tokenizer:LazyTokenizer", @@ -118,7 +111,6 @@ "odqa_reader": "deeppavlov.dataset_readers.odqa_reader:ODQADataReader", "one_hotter": "deeppavlov.models.preprocessors.one_hotter:OneHotter", "ontonotes_reader": "deeppavlov.dataset_readers.ontonotes_reader:OntonotesReader", - "params_evolution": "deeppavlov.models.evolution.evolution_param_generator:ParamsEvolution", "params_search": "deeppavlov.core.common.params_search:ParamsSearch", "paraphraser_pretrain_reader": "deeppavlov.dataset_readers.paraphraser_pretrain_reader:ParaphraserPretrainReader", "paraphraser_reader": "deeppavlov.dataset_readers.paraphraser_reader:ParaphraserReader", @@ -144,8 +136,6 @@ "russian_words_vocab": "deeppavlov.vocabs.typos:RussianWordsVocab", "sanitizer": "deeppavlov.models.preprocessors.sanitizer:Sanitizer", "sentseg_restore_sent": "deeppavlov.models.preprocessors.sentseg_preprocessor:SentSegRestoreSent", - "seq2seq_go_bot": "deeppavlov.models.seq2seq_go_bot.bot:Seq2SeqGoalOrientedBot", - "seq2seq_go_bot_nn": "deeppavlov.models.seq2seq_go_bot.network:Seq2SeqGoalOrientedBotNetwork", "siamese_iterator": "deeppavlov.dataset_iterators.siamese_iterator:SiameseIterator", "siamese_predictor": "deeppavlov.models.ranking.siamese_predictor:SiamesePredictor", "siamese_preprocessor": "deeppavlov.models.preprocessors.siamese_preprocessor:SiamesePreprocessor", @@ -188,11 +178,10 @@ "tfidf_ranker": "deeppavlov.models.doc_retrieval.tfidf_ranker:TfidfRanker", "tfidf_weighted": "deeppavlov.models.embedders.tfidf_weighted_embedder:TfidfWeightedEmbedder", "top1_elector": "deeppavlov.models.spelling_correction.electors.top1_elector:TopOneElector", - "torch_bert_as_summarizer": "deeppavlov.models.torch_bert.torch_bert_as_summarizer:TorchBertAsSummarizer", - "torch_bert_ner_preprocessor": "deeppavlov.models.preprocessors.torch_transformers_preprocessor:TorchBertNerPreprocessor", + "torch_transformers_ner_preprocessor": "deeppavlov.models.preprocessors.torch_transformers_preprocessor:TorchTransformersNerPreprocessor", "torch_bert_ranker": "deeppavlov.models.torch_bert.torch_bert_ranker:TorchBertRankerModel", "torch_bert_ranker_preprocessor": "deeppavlov.models.preprocessors.torch_transformers_preprocessor:TorchBertRankerPreprocessor", - "torch_bert_sequence_tagger": "deeppavlov.models.torch_bert.torch_bert_sequence_tagger:TorchBertSequenceTagger", + "torch_transformers_sequence_tagger": "deeppavlov.models.torch_bert.torch_transformers_sequence_tagger:TorchTransformersSequenceTagger", "torch_squad_bert_infer": "deeppavlov.models.torch_bert.torch_bert_squad:TorchBertSQuADInferModel", "torch_squad_bert_model": "deeppavlov.models.torch_bert.torch_bert_squad:TorchBertSQuADModel", "torch_text_classification_model": "deeppavlov.models.classifiers.torch_classification_model:TorchTextClassificationModel", @@ -219,4 +208,4 @@ "wikitionary_100K_vocab": "deeppavlov.vocabs.typos:Wiki100KDictionary", "intent_catcher_reader": "deeppavlov.dataset_readers.intent_catcher_reader:IntentCatcherReader", "intent_catcher": "deeppavlov.models.intent_catcher.intent_catcher:IntentCatcher" -} \ No newline at end of file +} diff --git a/deeppavlov/core/common/registry.py b/deeppavlov/core/common/registry.py index 932c4da714..55edc10bea 100644 --- a/deeppavlov/core/common/registry.py +++ b/deeppavlov/core/common/registry.py @@ -28,6 +28,8 @@ else: _REGISTRY = {} +inverted_registry = {val: key for key, val in _REGISTRY.items()} + def cls_from_str(name: str) -> type: """Returns a class object with the name given as a string.""" diff --git a/deeppavlov/core/common/requirements_registry.json b/deeppavlov/core/common/requirements_registry.json new file mode 100644 index 0000000000..9afb597ab0 --- /dev/null +++ b/deeppavlov/core/common/requirements_registry.json @@ -0,0 +1,350 @@ +{ + "UD_pymorphy_lemmatizer": [ + "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "aiml_skill": [ + "{DEEPPAVLOV_PATH}/requirements/aiml_skill.txt" + ], + "bert_classifier": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "bert_ner_preprocessor": [ + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "bert_preprocessor": [ + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "bert_ranker": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "bert_ranker_preprocessor": [ + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "bert_sep_ranker": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "bert_sep_ranker_predictor": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "bert_sep_ranker_predictor_preprocessor": [ + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "bert_sep_ranker_preprocessor": [ + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "bert_sequence_network": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "bert_sequence_tagger": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "bert_syntax_parser": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "chu_liu_edmonds_transformer": [ + "{DEEPPAVLOV_PATH}/requirements/syntax_parser.txt" + ], + "dam_nn": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "dam_nn_use_transformer": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" + ], + "dependency_output_prettifier": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "dictionary_vectorizer": [ + "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "dstc_slotfilling": [ + "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt" + ], + "elmo_embedder": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" + ], + "elmo_model": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt" + ], + "fasttext": [ + "{DEEPPAVLOV_PATH}/requirements/fasttext.txt" + ], + "glove": [ + "{DEEPPAVLOV_PATH}/requirements/gensim.txt" + ], + "go_bot": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "hybrid_ner_model": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt", + "{DEEPPAVLOV_PATH}/requirements/gensim.txt" + ], + "input_splitter": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "jieba_tokenizer": [ + "{DEEPPAVLOV_PATH}/requirements/jieba.txt" + ], + "joint_tagger_parser": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "kenlm_elector": [ + "{DEEPPAVLOV_PATH}/requirements/kenlm.txt" + ], + "keras_classification_model": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "lemmatized_output_prettifier": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "morpho_tagger": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "mpm_nn": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "mt_bert": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "mt_bert_classification_task": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "mt_bert_reuser": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "mt_bert_seq_tagging_task": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "ner": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "pymorphy_vectorizer": [ + "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "rasa_skill": [ + "{DEEPPAVLOV_PATH}/requirements/rasa_skill.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "rel_ranker": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "rel_ranking_infer": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "siamese_predictor": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "smn_nn": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "squad_bert_infer": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "squad_bert_model": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" + ], + "squad_model": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "stream_spacy_tokenizer": [ + "{DEEPPAVLOV_PATH}/requirements/spacy.txt", + "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" + ], + "tag_output_prettifier": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "two_sentences_emb": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "bilstm_gru_nn": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "wiki_parser": [ + "{DEEPPAVLOV_PATH}/requirements/hdt.txt" + ], + "bilstm_nn": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt" + ], + "typos_wikipedia_reader": [ + "{DEEPPAVLOV_PATH}/requirements/lxml.txt" + ], + "static_dictionary": [ + "{DEEPPAVLOV_PATH}/requirements/lxml.txt" + ], + "base64_decode_bytesIO": [ + "{DEEPPAVLOV_PATH}/requirements/nemo.txt" + ], + "wikitionary_100K_vocab": [ + "{DEEPPAVLOV_PATH}/requirements/lxml.txt" + ], + "huggingface_dataset_iterator": [ + "{DEEPPAVLOV_PATH}/requirements/datasets.txt" + ], + "bytesIO_encode_base64": [ + "{DEEPPAVLOV_PATH}/requirements/nemo.txt" + ], + "typos_custom_reader": [ + "{DEEPPAVLOV_PATH}/requirements/lxml.txt" + ], + "torch_text_classification_model": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt" + ], + "huggingface_dataset_reader": [ + "{DEEPPAVLOV_PATH}/requirements/datasets.txt" + ], + "tree_to_sparql": [ + "{DEEPPAVLOV_PATH}/requirements/udpipe.txt" + ], + "torch_squad_bert_model": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", + "{DEEPPAVLOV_PATH}/requirements/transformers.txt" + ], + "torch_transformers_preprocessor": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", + "{DEEPPAVLOV_PATH}/requirements/transformers.txt" + ], + "torch_bert_ranker": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", + "{DEEPPAVLOV_PATH}/requirements/transformers.txt" + ], + "torch_transformers_classifier": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", + "{DEEPPAVLOV_PATH}/requirements/transformers.txt" + ], + "torch_transformers_sequence_tagger": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", + "{DEEPPAVLOV_PATH}/requirements/transformers.txt" + ], + "ru_adj_to_noun": [ + "{DEEPPAVLOV_PATH}/requirements/udpipe.txt" + ], + "transformers_bert_embedder": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", + "{DEEPPAVLOV_PATH}/requirements/transformers.txt" + ], + "torch_transformers_ner_preprocessor": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", + "{DEEPPAVLOV_PATH}/requirements/transformers.txt" + ], + "torch_bert_ranker_preprocessor": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", + "{DEEPPAVLOV_PATH}/requirements/transformers.txt" + ], + "transformers_bert_preprocessor": [ + "{DEEPPAVLOV_PATH}/requirements/transformers.txt" + ], + "spelling_levenshtein": [ + "{DEEPPAVLOV_PATH}/requirements/sortedcontainers.txt" + ], + "typos_kartaslov_reader": [ + "{DEEPPAVLOV_PATH}/requirements/lxml.txt" + ], + "torch_squad_bert_infer": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", + "{DEEPPAVLOV_PATH}/requirements/transformers.txt" + ], + "nemo_asr": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch14.txt", + "{DEEPPAVLOV_PATH}/requirements/nemo.txt", + "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt" + ], + "nemo_tts": [ + "{DEEPPAVLOV_PATH}/requirements/pytorch14.txt", + "{DEEPPAVLOV_PATH}/requirements/nemo.txt", + "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt", + "{DEEPPAVLOV_PATH}/requirements/transformers.txt", + "{DEEPPAVLOV_PATH}/requirements/nemo-tts.txt" + ], + "spelling_error_model": [ + "{DEEPPAVLOV_PATH}/requirements/lxml.txt" + ], + "udpipe_parser": [ + "{DEEPPAVLOV_PATH}/requirements/udpipe.txt" + ], + "torchtext_classification_data_reader": [ + "{DEEPPAVLOV_PATH}/requirements/torchtext.txt" + ], + "russian_words_vocab": [ + "{DEEPPAVLOV_PATH}/requirements/lxml.txt" + ], + "query_generator": [ + "{DEEPPAVLOV_PATH}/requirements/hdt.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/spacy.txt", + "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", + "{DEEPPAVLOV_PATH}/requirements/whapi.txt", + "{DEEPPAVLOV_PATH}/requirements/faiss.txt" + ], + "kbqa_entity_linker": [ + "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt", + "{DEEPPAVLOV_PATH}/requirements/hdt.txt", + "{DEEPPAVLOV_PATH}/requirements/sortedcontainers.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/spacy.txt", + "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" + ], + "rel_ranking_bert_infer": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/hdt.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/spacy.txt", + "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" + ], + "query_generator_online": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/hdt.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/spacy.txt", + "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt", + "{DEEPPAVLOV_PATH}/requirements/whapi.txt", + "{DEEPPAVLOV_PATH}/requirements/faiss.txt" + ], + "ner_chunker": [ + "{DEEPPAVLOV_PATH}/requirements/faiss.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/hdt.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/spacy.txt", + "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" + ], + "intent_catcher": [ + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/tf-hub.txt", + "{DEEPPAVLOV_PATH}/requirements/xeger.txt" + ], + "entity_linker": [ + "{DEEPPAVLOV_PATH}/requirements/faiss.txt", + "{DEEPPAVLOV_PATH}/requirements/tf.txt", + "{DEEPPAVLOV_PATH}/requirements/hdt.txt", + "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt", + "{DEEPPAVLOV_PATH}/requirements/spacy.txt", + "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt" + ] +} diff --git a/deeppavlov/core/data/utils.py b/deeppavlov/core/data/utils.py index 6cb4912283..6d4eb88661 100644 --- a/deeppavlov/core/data/utils.py +++ b/deeppavlov/core/data/utils.py @@ -37,7 +37,7 @@ tqdm.monitor_interval = 0 -def _get_download_token() -> str: +def get_download_token() -> str: """Return a download token from ~/.deeppavlov/token file. If token file does not exists, creates the file and writes to it a random URL-safe text string @@ -78,7 +78,7 @@ def s3_download(url: str, destination: str) -> None: file_object.download_file(destination, Callback=pbar.update) -def simple_download(url: str, destination: Union[Path, str]) -> None: +def simple_download(url: str, destination: Union[Path, str], headers: Optional[dict] = None) -> None: """Download a file from URL to target location. Displays a progress bar to the terminal during the download process. @@ -86,6 +86,7 @@ def simple_download(url: str, destination: Union[Path, str]) -> None: Args: url: The source URL. destination: Path to the file destination (including file name). + headers: Headers for file server. """ destination = Path(destination) @@ -99,7 +100,6 @@ def simple_download(url: str, destination: Union[Path, str]) -> None: chunk_size = 32 * 1024 temporary = destination.with_suffix(destination.suffix + '.part') - headers = {'dp-token': _get_download_token()} r = requests.get(url, stream=True, headers=headers) if r.status_code != 200: raise RuntimeError(f'Got status code {r.status_code} when trying to download {url}') @@ -137,13 +137,15 @@ def simple_download(url: str, destination: Union[Path, str]) -> None: temporary.rename(destination) -def download(dest_file_path: [List[Union[str, Path]]], source_url: str, force_download: bool = True) -> None: +def download(dest_file_path: [List[Union[str, Path]]], source_url: str, force_download: bool = True, + headers: Optional[dict] = None) -> None: """Download a file from URL to one or several target locations. Args: dest_file_path: Path or list of paths to the file destination (including file name). source_url: The source URL. force_download: Download file if it already exists, or not. + headers: Headers for file server. """ @@ -173,7 +175,7 @@ def download(dest_file_path: [List[Union[str, Path]]], source_url: str, force_do if not cached_exists: first_dest_path.parent.mkdir(parents=True, exist_ok=True) - simple_download(source_url, first_dest_path) + simple_download(source_url, first_dest_path, headers) else: log.info(f'Found cached {source_url} in {first_dest_path}') @@ -223,7 +225,8 @@ def ungzip(file_path: Union[Path, str], extract_path: Optional[Union[Path, str]] def download_decompress(url: str, download_path: Union[Path, str], - extract_paths: Optional[Union[List[Union[Path, str]], Path, str]] = None) -> None: + extract_paths: Optional[Union[List[Union[Path, str]], Path, str]] = None, + headers: Optional[dict] = None) -> None: """Download and extract .tar.gz or .gz file to one or several target locations. The archive is deleted if extraction was successful. @@ -232,6 +235,7 @@ def download_decompress(url: str, url: URL for file downloading. download_path: Path to the directory where downloaded file will be stored until the end of extraction. extract_paths: Path or list of paths where contents of archive will be extracted. + headers: Headers for file server. """ file_name = Path(urlparse(url).path).name @@ -253,7 +257,7 @@ def download_decompress(url: str, extracted_path = cache_dir / (url_hash + '_extracted') extracted = extracted_path.exists() if not extracted and not arch_file_path.exists(): - simple_download(url, arch_file_path) + simple_download(url, arch_file_path, headers) else: if extracted: log.info(f'Found cached and extracted {url} in {extracted_path}') @@ -261,7 +265,7 @@ def download_decompress(url: str, log.info(f'Found cached {url} in {arch_file_path}') else: arch_file_path = download_path / file_name - simple_download(url, arch_file_path) + simple_download(url, arch_file_path, headers) extracted_path = extract_paths.pop() if not extracted: diff --git a/deeppavlov/dataset_readers/amazon_ecommerce_reader.py b/deeppavlov/dataset_readers/amazon_ecommerce_reader.py deleted file mode 100644 index b8a0dc0acc..0000000000 --- a/deeppavlov/dataset_readers/amazon_ecommerce_reader.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright 2018 Neural Networks and Deep Learning lab, MIPT -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -from logging import getLogger -from pathlib import Path -from typing import List, Any, Dict, Tuple - -from deeppavlov.core.commands.utils import expand_path -from deeppavlov.core.common.registry import register -from deeppavlov.core.data.dataset_reader import DatasetReader -from deeppavlov.core.data.utils import download_decompress, mark_done, is_done - -logger = getLogger(__name__) - - -@register('amazon_ecommerce_reader') -class AmazonEcommerceReader(DatasetReader): - """Class to download and load ecommerce data catalog""" - - def read(self, data_path: str, catalog: list, **kwargs) -> Dict[str, List[Tuple[Any, Any]]]: - """Load data from specific catalog - - Parameters: - data_path: where the dataset is located - catalog: names of the specific subcategories - - Returns: - dataset: loaded dataset - """ - - logger.info(f"Ecommerce loader is loaded with catalog {catalog}") - - if not isinstance(catalog, list): - catalog = [catalog] - - ec_data_global: List[Any] = [] - data_path = Path(expand_path(data_path)) - - if not is_done(data_path): - self._download_data(data_path) - - if data_path.is_dir(): - for fname in data_path.rglob("*.txt"): - if any(cat in fname.name for cat in catalog): - logger.info(f"File {fname.name} is loaded") - ec_data_global += self._load_amazon_ecommerce_file(fname) - - dataset = { - 'train': [((item['Title'], [], {}), item) for item in ec_data_global], - 'valid': [], - 'test': [] - } - - logger.info(f"In total {len(ec_data_global)} items are loaded") - return dataset - - def _download_data(self, data_path: str) -> None: - """Download dataset""" - url = "https://github.com/SamTube405/Amazon-E-commerce-Data-set/archive/master.zip" - download_decompress(url, data_path) - mark_done(data_path) - - def _load_amazon_ecommerce_file(self, fname: str) -> List[Dict[Any, Any]]: - """Parse dataset - - Parameters: - fname: catalog file - - Returns: - ec_data: parsed catalog data - """ - - ec_data = [] - item: Dict = {} - new_item_re = re.compile("ITEM *\d+") - - with open(fname, 'r', encoding='utf-8', errors='ignore') as file: - for line in file: - if new_item_re.match(line): - if len(item.keys()) > 0: - if 'Title' in item and 'Feature' in item: - ec_data.append(item) - item = {'Item': int(line[5:]), 'Category': fname.name.split("_")[1]} - else: - row = line.strip().split("=") - if len(row) == 2: - if row[0] in item: - item[row[0]] += "." + row[1] - else: - item[row[0]] = row[1] - return ec_data diff --git a/deeppavlov/deprecated/__init__.py b/deeppavlov/deprecated/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/deeppavlov/deprecated/agent/__init__.py b/deeppavlov/deprecated/agent/__init__.py deleted file mode 100644 index ae1ae69d49..0000000000 --- a/deeppavlov/deprecated/agent/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .agent import Agent, SkillWrapper -from .filter import Filter -from .processor import Processor -from .rich_content import RichControl, RichMessage diff --git a/deeppavlov/deprecated/agent/agent.py b/deeppavlov/deprecated/agent/agent.py deleted file mode 100644 index 2154f9d4a5..0000000000 --- a/deeppavlov/deprecated/agent/agent.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import ABCMeta, abstractmethod -from collections import defaultdict -from typing import List, Dict, Tuple, Optional - -from deeppavlov.core.models.component import Component -from deeppavlov.utils.connector.dialog_logger import DialogLogger - - -class Agent(Component, metaclass=ABCMeta): - """Abstract class for agents. - - Agent is an entity which receives inputs from the outer word, processes - them and returns response to each input. Usually agent implements real-life - task, business or user case. Agent encapsulates skills instances, management - of skills inference and skills inference results processing. Also agent - provides management both for history and state for each utterance and uses - only incoming utterances IDs to distinguish them. - - Args: - skills: List of initiated agent skills instances. - - Attributes: - skills: List of initiated Skill or Component instances. - Components API should should implement API of Skill abstract class. - history: Histories for each each dialog with agent indexed - by dialog ID. Each history is represented by list of incoming - and outcoming replicas of the dialog casted to str and updated automatically. - states: States for each skill with agent indexed by dialog ID. Each - state updated automatically after each wrapped skill inference. - So we highly recommend use this attribute only for reading and - not to use it for your custom skills management. - wrapped_skills: Skills wrapped to SkillWrapper objects. SkillWrapper - object gives to Skill __call__ signature of Agent __call__ and - handles automatic state management for skill. All skills are - wrapped to SkillsWrapper automatically during agent initialisation. - We highly recommend to use wrapped skills for skills inference. - dialog_logger: DeepPavlov dialog logging facility. - """ - - def __init__(self, skills: List[Component]) -> None: - self.skills = skills - self.history: Dict = defaultdict(list) - self.states: Dict = defaultdict(lambda: [None] * len(self.skills)) - self.wrapped_skills: List[SkillWrapper] = \ - [SkillWrapper(skill, skill_id, self) for skill_id, skill in enumerate(self.skills)] - self.dialog_logger: DialogLogger = DialogLogger() - - def __call__(self, utterances_batch: list, utterances_ids: Optional[list] = None) -> list: - """Wraps _call method and updates utterances history. - - Args: - utterances_batch: Batch of incoming utterances. - utterances_ids: Batch of dialog IDs corresponding to incoming utterances. - - Returns: - responses: A batch of responses corresponding to the - utterance batch received by agent. - """ - responses_batch = self._call(utterances_batch, utterances_ids) - - batch_size = len(utterances_batch) - ids = utterances_ids or list(range(batch_size)) - - for utt_batch_idx, utt_id in enumerate(ids): - self.history[utt_id].append(str(utterances_batch[utt_batch_idx])) - self.dialog_logger.log_in(utterances_batch[utt_batch_idx], utt_id) - - self.history[utt_id].append(str(responses_batch[utt_batch_idx])) - self.dialog_logger.log_out(responses_batch[utt_batch_idx], utt_id) - - return responses_batch - - @abstractmethod - def _call(self, utterances_batch: list, utterances_ids: Optional[list] = None) -> list: - """Processes batch of utterances and returns corresponding responses batch. - - Each call of Agent processes incoming utterances and returns response - for each utterance Batch of dialog IDs can be provided, in other case - utterances indexes in incoming batch are used as dialog IDs. - - Args: - utterances_batch: Batch of incoming utterances. - utterances_ids: Batch of dialog IDs corresponding to incoming utterances. - - Returns: - responses: A batch of responses corresponding to the - utterance batch received by agent. - """ - pass - - -class SkillWrapper: - """Skill instances wrapper for internal use in Agent. - - SkillWrapper gives to skill interface of Agent and handles automatic state - management for skill. - - Args: - skill: Wrapped skill. - skill_id: Skill index in Agent.skills list. - agent: Agent instance. - - Attributes: - skill: Wrapped skill. - skill_id: Skill index in Agent.skills list. - agent: Agent instance. - """ - - def __init__(self, skill: Component, skill_id: int, agent: Agent) -> None: - self.skill = skill - self.skill_id = skill_id - self.agent = agent - - def __call__(self, utterances_batch: list, utterances_ids: Optional[list] = None) -> Tuple[list, list]: - """Wraps __call__ method of Skill instance. - - Provides skill __call__ with signature of Agent __call__ and handles - automatic state management for skill. - - Args: - utterances_batch: Batch of incoming utterances. - utterances_ids: Batch of dialog IDs corresponding to incoming utterances. - - Returns: - response: A batch of arbitrary typed skill inference results. - confidence: A batch of float typed confidence levels for each of - skill inference result. - states: Optional. A batch of arbitrary typed states for each - response. - """ - history_batch = [self.agent.history[utt_id] for utt_id in utterances_ids] - states_batch = [self.agent.states[utt_id][self.skill_id] for utt_id in utterances_ids] - - predicted, confidence, *states = self.skill(utterances_batch, history_batch, states_batch) - - states = states[0] if states else [None] * len(predicted) - for utt_id, state in zip(utterances_ids, states): - self.agent.states[utt_id][self.skill_id] = state - - return predicted, confidence diff --git a/deeppavlov/deprecated/agent/filter.py b/deeppavlov/deprecated/agent/filter.py deleted file mode 100644 index 65d60f46bd..0000000000 --- a/deeppavlov/deprecated/agent/filter.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import ABCMeta, abstractmethod - -from deeppavlov.core.models.component import Component - - -class Filter(Component, metaclass=ABCMeta): - """Abstract class for filters. Filter is a DeepPavlov component, - which is used in Agent to select utterances from incoming batch - to be processed for each Agent skill. - """ - - @abstractmethod - def __call__(self, utterances_batch: list, history_batch: list) -> list: - """Returns skills-utterances application matrix. - - Returns skills-utterances application matrix which contains - information about Agent skills to be applied to each utterance - from incoming batch. - - Args: - utterances_batch: A batch of utterances of any type. - history_batch: A batch of list typed histories - for each utterance. - - Returns: - response: Skills-utterances application matrix, - for example: - [[True, False, True, True], - [False, True, True, True]] - Where each inner dict corresponds to one of the Agent - skills and each value in the inner dict contains information - about whether the skill will be applied to the utterance - with the same position in the utterances_batch. - - """ - pass diff --git a/deeppavlov/deprecated/agent/processor.py b/deeppavlov/deprecated/agent/processor.py deleted file mode 100644 index b2e99f9bcc..0000000000 --- a/deeppavlov/deprecated/agent/processor.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import ABCMeta, abstractmethod - -from deeppavlov.core.models.component import Component - - -class Processor(Component, metaclass=ABCMeta): - """Abstract class for processors. Processor is a DeepPavlov component, - which is used in Agent to process skills responses and give one final - response for each utterance. - """ - - # TODO: change *responses to [[], [], ...] argument - @abstractmethod - def __call__(self, utterances_batch: list, history_batch: list, *responses: list) -> list: - """Returns final response for each incoming utterance. - - Processes Agent skills and generates one final response for each - utterance in incoming batch. - - Args: - utterances_batch: A batch of utterances of any type - history_batch: A batch of list typed histories - for each utterance - responses: Each response positional argument corresponds to - response of one of Agent skills and is represented by - batch (list) of (response, confidence) tuple structures. - - Returns: - responses: A batch of responses corresponding to the - utterance batch received by agent. - """ - pass diff --git a/deeppavlov/deprecated/agent/rich_content.py b/deeppavlov/deprecated/agent/rich_content.py deleted file mode 100644 index 1b4070b951..0000000000 --- a/deeppavlov/deprecated/agent/rich_content.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import ABCMeta, abstractmethod -from typing import Union - - -class RichItem(metaclass=ABCMeta): - """Base class for rich content elements. - - Every rich content element - is presumed to return its state (including state of nested controls) - at least in json format (mandatory) as well as in the formats compatible - with other channels. - """ - - @abstractmethod - def json(self) -> Union[list, dict]: - """Returns json compatible state of the control instance including - its nested controls. - - Returns: - control: Json representation of control state. - """ - pass - - def ms_bot_framework(self): - """Returns MS Bot Framework compatible state of the control instance - including its nested controls. - - Returns: - control: MS Bot Framework representation of control state. - """ - return None - - def telegram(self): - """Returns Telegram compatible state of the control instance - including its nested controls. - - Returns: - control: Telegram representation of control state. - """ - return None - - def alexa(self): - """Returns Amazon Alexa compatible state of the control instance - including its nested controls. - - Returns: - control: Amazon Alexa representation of control state. - """ - return None - - -class RichControl(RichItem, metaclass=ABCMeta): - """Base class for rich controls. - - Rich control can be a button, buttons box, plain text, image, etc. - All rich control classes should be derived from RichControl. - - Args: - control_type: Name of the rich control type. - - Attributes: - control_type: Name of the rich control type. - content: Arbitrary used control content holder. - control_json: Control json representation template, which - contains control type and content fields. - """ - - def __init__(self, control_type: str) -> None: - self.control_type: str = control_type - self.content = None - self.control_json: dict = {'type': control_type, 'content': None} - - def __str__(self) -> str: - return '' - - -class RichMessage(RichItem): - """Container for rich controls. - - All rich content elements returned by agent as a result of single - inference should be embedded into RichMessage instance in the order - these elements should be displayed. - - Attributes: - controls: Container for RichControl instances. - """ - - def __init__(self) -> None: - self.controls: list = [] - - def __str__(self) -> str: - result = '\n'.join(filter(bool, map(str, self.controls))) - return result - - def add_control(self, control: RichControl): - """Adds RichControl instance to RichMessage. - - Args: - control: RichControl instance. - """ - self.controls.append(control) - - def json(self) -> list: - """Returns list of json compatible states of the RichMessage instance - nested controls. - - Returns: - json_controls: Json representation of RichMessage instance - nested controls. - """ - json_controls = [control.json() for control in self.controls] - return json_controls - - def ms_bot_framework(self) -> list: - """Returns list of MS Bot Framework compatible states of the - RichMessage instance nested controls. - - Returns: - ms_bf_controls: MS Bot Framework representation of RichMessage instance - nested controls. - """ - ms_bf_controls = [control.ms_bot_framework() for control in self.controls] - return ms_bf_controls - - def telegram(self) -> list: - """Returns list of Telegram compatible states of the RichMessage - instance nested controls. - - Returns: - telegram_controls: Telegram representation of RichMessage instance nested - controls. - """ - telegram_controls = [control.telegram() for control in self.controls] - return telegram_controls - - def alexa(self) -> list: - """Returns list of Amazon Alexa compatible states of the RichMessage - instance nested controls. - - Returns: - alexa_controls: Amazon Alexa representation of RichMessage instance nested - controls. - """ - alexa_controls = [control.alexa() for control in self.controls] - return alexa_controls diff --git a/deeppavlov/deprecated/agents/__init__.py b/deeppavlov/deprecated/agents/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/deeppavlov/deprecated/agents/default_agent/__init__.py b/deeppavlov/deprecated/agents/default_agent/__init__.py deleted file mode 100644 index f5b18b62e8..0000000000 --- a/deeppavlov/deprecated/agents/default_agent/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .default_agent import DefaultAgent diff --git a/deeppavlov/deprecated/agents/default_agent/default_agent.py b/deeppavlov/deprecated/agents/default_agent/default_agent.py deleted file mode 100644 index fbcfff1f93..0000000000 --- a/deeppavlov/deprecated/agents/default_agent/default_agent.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List, Optional - -from deeppavlov.deprecated.agent import Agent, Filter, Processor -from deeppavlov.deprecated.agents.filters import TransparentFilter -from deeppavlov.deprecated.agents.processors import HighestConfidenceSelector -from deeppavlov.core.models.component import Component - - -class DefaultAgent(Agent): - """ - DeepPavlov default implementation of Agent abstraction. - - Default Agent is an implementation of agent template, with following - pipeline for each utterance batch received by agent: - 1) Utterance batch is processed through agent Filter which selects utterances to be processed with each agent skill; - 2) Utterances are processed through skills selected for them; - 3) Utterances and skill responses are processed through agent Processor which generates agent's response for the outer world. - Defining DefaultAgent means: - a) To define set of skills it uses; - b) To implement skills Filter; - c) To implement Processor. - You can refer to :class:`deeppavlov.deprecated.skill.Skill`, :class:`deeppavlov.deprecated.agent.Filter`, - :class:`deeppavlov.deprecated.agent.Processor` base classes to get more info. - - Args: - skills: List of initiated agent skills or components instances. - skills_processor: Initiated agent processor. - skills_filter: Initiated agent filter. - - Attributes: - skills: List of initiated agent skills instances. - skills_processor: Initiated agent processor. - skills_filter: Initiated agent filter. - """ - - def __init__(self, skills: List[Component], skills_processor: Optional[Processor] = None, - skills_filter: Optional[Filter] = None, *args, **kwargs) -> None: - super(DefaultAgent, self).__init__(skills=skills) - self.skills_filter = skills_filter or TransparentFilter(len(skills)) - self.skills_processor = skills_processor or HighestConfidenceSelector() - - def _call(self, utterances_batch: list, utterances_ids: Optional[list] = None) -> list: - """ - Processes batch of utterances and returns corresponding responses batch. - - Each call of Agent passes incoming utterances batch through skills filter, - agent skills, skills processor. Batch of dialog IDs can be provided, in - other case utterances indexes in incoming batch are used as dialog IDs. - - Args: - utterances_batch: Batch of incoming utterances. - utterances_ids: Batch of dialog IDs corresponding to incoming utterances. - - Returns: - responses: A batch of responses corresponding to the - utterance batch received by agent. - """ - batch_size = len(utterances_batch) - ids = utterances_ids or list(range(batch_size)) - batch_history = [self.history[utt_id] for utt_id in ids] - responses = [] - - filtered = self.skills_filter(utterances_batch, batch_history) - - for skill_i, (filtered_utterances, skill) in enumerate(zip(filtered, self.wrapped_skills)): - skill_i_utt_indexes = [utt_index for utt_index, utt_filter in enumerate(filtered_utterances) if utt_filter] - - if skill_i_utt_indexes: - skill_i_utt_batch = [utterances_batch[i] for i in skill_i_utt_indexes] - skill_i_utt_ids = [ids[i] for i in skill_i_utt_indexes] - res = [(None, 0.)] * batch_size - predicted, confidence = skill(skill_i_utt_batch, skill_i_utt_ids) - - for i, predicted, confidence in zip(skill_i_utt_indexes, predicted, confidence): - res[i] = (predicted, confidence) - - responses.append(res) - - responses = self.skills_processor(utterances_batch, batch_history, *responses) - - return responses diff --git a/deeppavlov/deprecated/agents/ecommerce_agent/__init__.py b/deeppavlov/deprecated/agents/ecommerce_agent/__init__.py deleted file mode 100644 index d83851d715..0000000000 --- a/deeppavlov/deprecated/agents/ecommerce_agent/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .ecommerce_agent import EcommerceAgent diff --git a/deeppavlov/deprecated/agents/ecommerce_agent/ecommerce_agent.py b/deeppavlov/deprecated/agents/ecommerce_agent/ecommerce_agent.py deleted file mode 100644 index d349d64979..0000000000 --- a/deeppavlov/deprecated/agents/ecommerce_agent/ecommerce_agent.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -from collections import defaultdict -from logging import getLogger -from typing import List, Dict, Any - -from deeppavlov.core.commands.infer import build_model -from deeppavlov.deep import find_config -from deeppavlov.deprecated.agent import Agent, RichMessage -from deeppavlov.deprecated.agents.rich_content import PlainText, ButtonsFrame, Button -from deeppavlov.deprecated.skill import Skill -from deeppavlov.utils.ms_bot_framework import start_ms_bf_server - -parser = argparse.ArgumentParser() -parser.add_argument("-i", "--ms-id", help="microsoft bot framework app id", type=str) -parser.add_argument("-s", "--ms-secret", help="microsoft bot framework app secret", type=str) - -log = getLogger(__name__) - - -class EcommerceAgent(Agent): - """DeepPavlov Ecommerce agent. - - Args: - skill: List of initiated agent skills instances. - - Attributes: - skill: List of initiated agent skills instances. - history: Histories for each each dialog with agent indexed - by dialog ID. Each history is represented by list of incoming - and outcoming replicas of the dialog. - states: States for each each dialog with agent indexed by dialog ID. - """ - - def __init__(self, skills: List[Skill], *args, **kwargs) -> None: - super(EcommerceAgent, self).__init__(skills=skills) - self.states: dict = defaultdict(lambda: [{"start": 0, "stop": 5} for _ in self.skills]) - - def _call(self, utterances_batch: List[str], utterances_ids: List[int] = None) -> List[RichMessage]: - """Processes batch of utterances and returns corresponding responses batch. - - Args: - utterances_batch: Batch of incoming utterances. - utterances_ids: Batch of dialog IDs corresponding to incoming utterances. - - Returns: - responses: A batch of responses corresponding to the - utterance batch received by agent. - """ - - rich_message = RichMessage() - for utt_id, utt in enumerate(utterances_batch): - - if utterances_ids: - id_ = utterances_ids[utt_id] - - log.debug(f'Utterance: {utt}') - - if utt == "/start": - welcome = "I am a new e-commerce bot. I will help you to find products that you are looking for. Please type your request in plain text." - rich_message.add_control(PlainText(welcome)) - continue - - if utt[0] == "@": - command, *parts = utt.split(":") - log.debug(f'Actions: {parts}') - - if command == "@details": - batch_index = int(parts[0]) # batch index in history list - item_index = int(parts[1]) # index in batch - rich_message.add_control(PlainText(show_details( - self.history[id_][batch_index][item_index]))) - continue - - if command == "@entropy": - state = self.history[id_][int(parts[0])] - state[parts[1]] = parts[2] - state["start"] = 0 - state["stop"] = 5 - utt = state['query'] - self.states[id_] = state - - if command == "@next": - state = self.history[id_][int(parts[0])] - state['start'] = state['stop'] - state['stop'] = state['stop'] + 5 - utt = state['query'] - self.states[id_] = state - else: - if id_ not in self.states: - self.states[id_] = {} - - self.states[id_]["start"] = 0 - self.states[id_]["stop"] = 5 - - responses_batch, confidences_batch, state_batch = self.skills[0]( - [utt], self.history[id_], [self.states[id_]]) - - # update `self.states` with retrieved results - self.states[id_] = state_batch[0] - self.states[id_]["query"] = utt - - items_batch, entropy_batch = responses_batch - - for batch_idx, items in enumerate(items_batch): - - self.history[id_].append(items) - self.history[id_].append(self.states[id_]) - - for idx, item in enumerate(items): - rich_message.add_control(_draw_item(item, idx, self.history[id_])) - - if len(items) == self.states[id_]['stop'] - self.states[id_]['start']: - buttons_frame = _draw_tail(entropy_batch[batch_idx], self.history[id_]) - rich_message.add_control(buttons_frame) - - return [rich_message] - - -def _draw_tail(entropy, history): - buttons_frame = ButtonsFrame(text="") - buttons_frame.add_button(Button('More', "@next:" + str(len(history) - 1))) - caption = "Press More " - - if entropy: - caption += "specify a " + entropy[0][1] - for ent_value in entropy[0][2][:4]: - button_a = Button(ent_value[0], f'@entropy:{len(history) - 1}:{entropy[0][1]}:{ent_value[0]}') - buttons_frame.add_button(button_a) - - buttons_frame.text = caption - return buttons_frame - - -def _draw_item(item, idx, history): - title = item['Title'] - if 'ListPrice' in item: - title += " - **$" + item['ListPrice'].split('$')[1] + "**" - - buttons_frame = ButtonsFrame(text=title) - buttons_frame.add_button(Button('Show details', "@details:" + str(len(history) - 2) + ":" + str(idx))) - return buttons_frame - - -def show_details(item_data: Dict[Any, Any]) -> str: - """Format catalog item output - - Parameters: - item_data: item's attributes values - - Returns: - [rich_message]: list of formatted rich message - """ - - txt = "" - - for key, value in item_data.items(): - txt += "**" + str(key) + "**" + ': ' + str(value) + " \n" - - return txt - - -def make_agent() -> EcommerceAgent: - """Make an agent - - Returns: - agent: created Ecommerce agent - """ - - config_path = find_config('tfidf_retrieve') - skill = build_model(config_path) - agent = EcommerceAgent(skills=[skill]) - return agent - - -def main(): - """Parse parameters and run ms bot framework""" - - args = parser.parse_args() - start_ms_bf_server(app_id=args.ms_id, - app_secret=args.ms_secret) - - -if __name__ == '__main__': - main() diff --git a/deeppavlov/deprecated/agents/filters/__init__.py b/deeppavlov/deprecated/agents/filters/__init__.py deleted file mode 100644 index 6535882349..0000000000 --- a/deeppavlov/deprecated/agents/filters/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .transparent_filter import TransparentFilter diff --git a/deeppavlov/deprecated/agents/filters/transparent_filter.py b/deeppavlov/deprecated/agents/filters/transparent_filter.py deleted file mode 100644 index 1cfc8d2ae5..0000000000 --- a/deeppavlov/deprecated/agents/filters/transparent_filter.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from deeppavlov.deprecated.agent import Filter - - -class TransparentFilter(Filter): - """Filter that applies each agent skill to all of batch utterances. - - Args: - skills_count: Number of agent skills. - - Attributes: - size: Number of agent skills. - """ - - def __init__(self, skills_count: int, *args, **kwargs) -> None: - self.size: int = skills_count - - def __call__(self, utterances_batch: list, history_batch: list) -> list: - """Returns skills-utterances application matrix. - - Generates skills-utterances application matrix with all True - elements. - - Args: - utterances_batch: A batch of utterances of any type. - history_batch: Not used. - - Returns: - response: Skills-utterances application matrix with all True - elements. - """ - return [[True] * len(utterances_batch)] * self.size diff --git a/deeppavlov/deprecated/agents/hello_bot_agent/__init__.py b/deeppavlov/deprecated/agents/hello_bot_agent/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/deeppavlov/deprecated/agents/hello_bot_agent/hello_bot_agent.py b/deeppavlov/deprecated/agents/hello_bot_agent/hello_bot_agent.py deleted file mode 100644 index 3d8adc8217..0000000000 --- a/deeppavlov/deprecated/agents/hello_bot_agent/hello_bot_agent.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from deeppavlov.deprecated.agents.default_agent import DefaultAgent -from deeppavlov.deprecated.agents.processors import HighestConfidenceSelector -from deeppavlov.deprecated.skills.pattern_matching_skill import PatternMatchingSkill - - -def make_hello_bot_agent() -> DefaultAgent: - """Builds agent based on PatternMatchingSkill and HighestConfidenceSelector. - - This is agent building tutorial. You can use this .py file to check how hello-bot agent works. - - Returns: - agent: Agent capable of handling several simple greetings. - """ - skill_hello = PatternMatchingSkill(['Hello world'], patterns=['hi', 'hello', 'good day']) - skill_bye = PatternMatchingSkill(['Goodbye world', 'See you around'], patterns=['bye', 'chao', 'see you']) - skill_fallback = PatternMatchingSkill(['I don\'t understand, sorry', 'I can say "Hello world"']) - - agent = DefaultAgent([skill_hello, skill_bye, skill_fallback], skills_processor=HighestConfidenceSelector()) - - return agent - - -if __name__ == '__main__': - hello_bot_agent = make_hello_bot_agent() - response = hello_bot_agent(['Hello', 'Bye', 'Or not']) - print(response) diff --git a/deeppavlov/deprecated/agents/processors/__init__.py b/deeppavlov/deprecated/agents/processors/__init__.py deleted file mode 100644 index 5e7b8aeaca..0000000000 --- a/deeppavlov/deprecated/agents/processors/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .default_rich_content_processor import DefaultRichContentWrapper -from .highest_confidence_selector import HighestConfidenceSelector -from .random_selector import RandomSelector diff --git a/deeppavlov/deprecated/agents/processors/default_rich_content_processor.py b/deeppavlov/deprecated/agents/processors/default_rich_content_processor.py deleted file mode 100644 index 1baeb4a22f..0000000000 --- a/deeppavlov/deprecated/agents/processors/default_rich_content_processor.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from deeppavlov.deprecated.agent import Processor, RichMessage -from deeppavlov.deprecated.agents.rich_content import PlainText - - -class DefaultRichContentWrapper(Processor): - """Returns RichControl wrapped responses with highest confidence.""" - - def __init__(self, *args, **kwargs) -> None: - pass - - def __call__(self, utterances: list, batch_history: list, *responses: list) -> list: - """Selects for each utterance response with highest confidence and wraps them to RichControl objects. - - Args: - utterances_batch: Not used. - history_batch: Not used. - responses: Each response positional argument corresponds to - response of one of Agent skills and is represented by - batch (list) of (response, confidence) tuple structures. - - Returns: - result: A batch of responses corresponding to the utterance - batch received by agent. - """ - responses, confidences = zip(*[zip(*r) for r in responses]) - indexes = [c.index(max(c)) for c in zip(*confidences)] - result = [] - for i, *responses in zip(indexes, *responses): - rich_message = RichMessage() - plain_text = PlainText(str(responses[i])) - rich_message.add_control(plain_text) - result.append(rich_message) - return result diff --git a/deeppavlov/deprecated/agents/processors/highest_confidence_selector.py b/deeppavlov/deprecated/agents/processors/highest_confidence_selector.py deleted file mode 100644 index 21a41b85a7..0000000000 --- a/deeppavlov/deprecated/agents/processors/highest_confidence_selector.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from deeppavlov.deprecated.agent import Processor - - -class HighestConfidenceSelector(Processor): - """Returns for each utterance response with highest confidence.""" - - def __init__(self, *args, **kwargs) -> None: - pass - - def __call__(self, utterances: list, batch_history: list, *responses: list) -> list: - """Selects for each utterance response with highest confidence. - - Args: - utterances_batch: Not used. - history_batch: Not used. - responses: Each response positional argument corresponds to - response of one of Agent skills and is represented by - batch (list) of (response, confidence) tuple structures. - - Returns: - responses: A batch of responses corresponding to the - utterance batch received by agent. - """ - responses, confidences = zip(*[zip(*r) for r in responses]) - indexes = [c.index(max(c)) for c in zip(*confidences)] - result = [responses[i] for i, *responses in zip(indexes, *responses)] - return result diff --git a/deeppavlov/deprecated/agents/processors/random_selector.py b/deeppavlov/deprecated/agents/processors/random_selector.py deleted file mode 100644 index 58b5c64177..0000000000 --- a/deeppavlov/deprecated/agents/processors/random_selector.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random - -from deeppavlov.deprecated.agent import Processor - - -class RandomSelector(Processor): - """Returns response of a random skill for each utterance.""" - - def __init__(self, *args, **kwargs) -> None: - pass - - def __call__(self, utterances: list, batch_history: list, *responses: list) -> list: - """Selects result of a random skill for each utterance. - - Args: - utterances_batch: Not used. - history_batch: Not used. - responses: Each response positional argument corresponds to - response of one of Agent skills and is represented by - batch (list) of (response, confidence) tuple structures. - - Returns: - result: A batch of responses corresponding to the utterance - batch received by agent. - """ - result = [random.choice([t for t, sc in r if t]) for r in zip(*responses)] - return result diff --git a/deeppavlov/deprecated/agents/rich_content/__init__.py b/deeppavlov/deprecated/agents/rich_content/__init__.py deleted file mode 100644 index 743e6566d1..0000000000 --- a/deeppavlov/deprecated/agents/rich_content/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .default_rich_content import Button, ButtonsFrame, PlainText diff --git a/deeppavlov/deprecated/agents/rich_content/default_rich_content.py b/deeppavlov/deprecated/agents/rich_content/default_rich_content.py deleted file mode 100644 index fa31c1c12d..0000000000 --- a/deeppavlov/deprecated/agents/rich_content/default_rich_content.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional - -from deeppavlov.deprecated.agent import RichControl - - -class PlainText(RichControl): - """Plain text message as a rich control. - - Args: - text: Text of the message. - - Attributes: - content: Text of the message. - """ - - def __init__(self, text: str) -> None: - super(PlainText, self).__init__('plain_text') - self.content: str = text - - def __str__(self) -> str: - return self.content - - def json(self) -> dict: - """Returns json compatible state of the PlainText instance. - - Returns: - control_json: Json representation of PlainText state. - """ - self.control_json['content'] = self.content - return self.control_json - - def ms_bot_framework(self) -> dict: - """Returns MS Bot Framework compatible state of the PlainText instance. - - Creating MS Bot Framework activity blank with "text" field populated. - - Returns: - out_activity: MS Bot Framework representation of PlainText state. - """ - out_activity = {} - out_activity['type'] = 'message' - out_activity['text'] = self.content - return out_activity - - def alexa(self) -> dict: - """Returns Amazon Alexa compatible state of the PlainText instance. - - Creating Amazon Alexa response blank with populated "outputSpeech" and - "card sections. - - Returns: - response: Amazon Alexa representation of PlainText state. - """ - response = { - 'response': { - 'shouldEndSession': False, - 'outputSpeech': { - 'type': 'PlainText', - 'text': self.content}, - 'card': { - 'type': 'Simple', - 'content': self.content - } - } - } - - return response - - -class Button(RichControl): - """Button with plain text callback. - - Args: - name: Displayed name of the button. - callback: Plain text returned as callback when button pressed. - - Attributes: - name: Displayed name of the button. - callback: Plain text returned as callback when button pressed. - """ - - def __init__(self, name: str, callback: str) -> None: - super(Button, self).__init__('button') - self.name: str = name - self.callback: str = callback - - def json(self) -> dict: - """Returns json compatible state of the Button instance. - - Returns: - control_json: Json representation of Button state. - """ - content = {} - content['name'] = self.name - content['callback'] = self.callback - self.control_json['content'] = content - return self.control_json - - def ms_bot_framework(self) -> dict: - """Returns MS Bot Framework compatible state of the Button instance. - - Creates MS Bot Framework CardAction (button) with postBack value return. - - Returns: - control_json: MS Bot Framework representation of Button state. - """ - card_action = {} - card_action['type'] = 'postBack' - card_action['title'] = self.name - card_action['value'] = self.callback = self.callback - return card_action - - -class ButtonsFrame(RichControl): - """ButtonsFrame is a container for several Buttons objects. - - ButtonsFrame embeds several Buttons and allows to post them - in one channel message. - - Args: - text: Text displayed with embedded buttons. - - Attributes: - text: Text displayed with embedded buttons. - content: Container with Button objects. - """ - - def __init__(self, text: Optional[str] = None) -> None: - super(ButtonsFrame, self).__init__('buttons_frame') - self.text: [str, None] = text - self.content: list = [] - - def add_button(self, button: Button): - """Adds Button instance to RichMessage. - - Args: - button: Button instance. - """ - self.content.append(button) - - def json(self) -> dict: - """Returns json compatible state of the ButtonsFrame instance. - - Returns json compatible state of the ButtonsFrame instance including - all nested buttons. - - Returns: - control_json: Json representation of ButtonsFrame state. - """ - content = {} - - if self.text: - content['text'] = self.text - - content['controls'] = [control.json() for control in self.content] - - self.control_json['content'] = content - - return self.control_json - - def ms_bot_framework(self) -> dict: - """Returns MS Bot Framework compatible state of the ButtonsFrame instance. - - Creating MS Bot Framework activity blank with RichCard in "attachments". RichCard - is populated with CardActions corresponding buttons embedded in ButtonsFrame. - - Returns: - control_json: MS Bot Framework representation of ButtonsFrame state. - """ - rich_card = {} - - buttons = [button.ms_bot_framework() for button in self.content] - rich_card['buttons'] = buttons - - if self.text: - rich_card['title'] = self.text - - attachments = [ - { - "contentType": "application/vnd.microsoft.card.thumbnail", - "content": rich_card - } - ] - - out_activity = {} - out_activity['type'] = 'message' - out_activity['attachments'] = attachments - - return out_activity diff --git a/deeppavlov/deprecated/skill/__init__.py b/deeppavlov/deprecated/skill/__init__.py deleted file mode 100644 index 1949352372..0000000000 --- a/deeppavlov/deprecated/skill/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .skill import Skill diff --git a/deeppavlov/deprecated/skill/skill.py b/deeppavlov/deprecated/skill/skill.py deleted file mode 100644 index 15b46f4dbb..0000000000 --- a/deeppavlov/deprecated/skill/skill.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import ABCMeta, abstractmethod -from typing import Tuple, Optional, Union - -from deeppavlov.core.models.component import Component - - -class Skill(Component, metaclass=ABCMeta): - """Abstract class for skills. - - Skill is a DeepPavlov component, which provides handling dialog state, - dialog history and rich content. - """ - - @abstractmethod - def __call__(self, utterances_batch: list, history_batch: list, - states_batch: Optional[list] = None) -> Union[Tuple[list, list], Tuple[list, list, Optional[list]]]: - """Returns skill inference result. - - Returns batches of skill inference results, estimated confidence - levels and up to date states corresponding to incoming utterance - batch. - - Args: - utterances_batch: A batch of utterances of any type. - history_batch: A batch of list typed histories for each utterance. - states_batch: Optional. A batch of arbitrary typed states for - each utterance. - - Returns: - response: A batch of arbitrary typed skill inference results. - confidence: A batch of float typed confidence levels for each of - skill inference result. - states: Optional. A batch of arbitrary typed states for each - response. - """ diff --git a/deeppavlov/deprecated/skills/__init__.py b/deeppavlov/deprecated/skills/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/deeppavlov/deprecated/skills/default_skill/__init__.py b/deeppavlov/deprecated/skills/default_skill/__init__.py deleted file mode 100644 index 579fd145fc..0000000000 --- a/deeppavlov/deprecated/skills/default_skill/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .default_skill import DefaultStatelessSkill diff --git a/deeppavlov/deprecated/skills/default_skill/default_skill.py b/deeppavlov/deprecated/skills/default_skill/default_skill.py deleted file mode 100644 index 730518d1e5..0000000000 --- a/deeppavlov/deprecated/skills/default_skill/default_skill.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Tuple, Optional, List - -from deeppavlov.core.common.chainer import Chainer -from deeppavlov.deprecated.skill import Skill - -proposals = { - 'en': 'expecting_arg: {}', - 'ru': 'Пожалуйста, введите параметр {}' -} - - -class DefaultStatelessSkill(Skill): - """Default stateless skill class. - - The class is intended to be used for as a default skill wrapping DeepPavlov models. - - Attributes: - model: DeepPavlov model to be wrapped into default skill instance. - """ - - def __init__(self, model: Chainer, lang: str = 'en', *args, **kwargs) -> None: - self.model = model - self.proposal: str = proposals[lang] - - def __call__(self, utterances_batch: list, history_batch: list, - states_batch: Optional[list] = None) -> Tuple[list, list, list]: - """Returns skill inference result. - - Returns batches of skill inference results, estimated confidence - levels and up to date states corresponding to incoming utterance - batch. Also handles interaction with multiargument models using - skill states. - - Args: - utterances_batch: A batch of utterances of any type. - history_batch: Not used. A batch of list typed histories for each - utterance. - states_batch: A batch of states for each utterance. - - Returns: - response: A batch of arbitrary typed skill inference results. - confidence: A batch of float typed confidence levels for each of - skill inference result. - states: Optional. A batch of states for each response. - """ - batch_len = len(utterances_batch) - confidence_batch = [1.0] * batch_len - - response_batch: List[Optional[str]] = [None] * batch_len - infer_indexes = [] - - if not states_batch: - states_batch: List[Optional[dict]] = [None] * batch_len - - for utt_i, utterance in enumerate(utterances_batch): - if not states_batch[utt_i]: - states_batch[utt_i] = {'expected_args': list(self.model.in_x), 'received_values': []} - - if utterance: - states_batch[utt_i]['expected_args'].pop(0) - states_batch[utt_i]['received_values'].append(utterance) - - if states_batch[utt_i]['expected_args']: - response = self.proposal.format(states_batch[utt_i]['expected_args'][0]) - response_batch[utt_i] = response - else: - infer_indexes.append(utt_i) - - if infer_indexes: - infer_utterances = zip(*[tuple(states_batch[i]['received_values']) for i in infer_indexes]) - infer_results = self.model(*infer_utterances) - - if len(self.model.out_params) > 1: - infer_results = ['; '.join([str(out_y) for out_y in result]) for result in zip(*infer_results)] - - for infer_i, infer_result in zip(infer_indexes, infer_results): - response_batch[infer_i] = infer_result - states_batch[infer_i] = None - - return response_batch, confidence_batch, states_batch diff --git a/deeppavlov/deprecated/skills/ecommerce_skill/__init__.py b/deeppavlov/deprecated/skills/ecommerce_skill/__init__.py deleted file mode 100644 index 3dd9ad83f1..0000000000 --- a/deeppavlov/deprecated/skills/ecommerce_skill/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .bleu_retrieve import EcommerceSkillBleu -from .tfidf_retrieve import EcommerceSkillTfidf - diff --git a/deeppavlov/deprecated/skills/ecommerce_skill/bleu_retrieve.py b/deeppavlov/deprecated/skills/ecommerce_skill/bleu_retrieve.py deleted file mode 100644 index 4303746b47..0000000000 --- a/deeppavlov/deprecated/skills/ecommerce_skill/bleu_retrieve.py +++ /dev/null @@ -1,262 +0,0 @@ -# Copyright 2018 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -import json -from collections import Counter -from logging import getLogger -from operator import itemgetter -from pathlib import Path -from typing import List, Tuple, Dict, Any - -import numpy as np -from scipy.stats import entropy - -from deeppavlov.core.commands.utils import expand_path -from deeppavlov.core.common.file import save_pickle, load_pickle -from deeppavlov.core.common.registry import register -from deeppavlov.core.models.estimator import Component -from deeppavlov.deprecated.skill import Skill -from deeppavlov.metrics.bleu import bleu_advanced - -log = getLogger(__name__) - - -@register("ecommerce_skill_bleu") -class EcommerceSkillBleu(Skill): - """Class to retrieve product items from `load_path` catalogs - in sorted order according to the similarity measure - Retrieve the specification attributes with corresponding values - in sorted order according to entropy. - - Parameters: - preprocess: text preprocessing component - save_path: path to save a model - load_path: path to load a model - entropy_fields: the specification attributes of the catalog items - min_similarity: similarity threshold for ranking - min_entropy: min entropy threshold for specifying - """ - - def __init__(self, - preprocess: Component, - save_path: str, - load_path: str, - entropy_fields: list, - min_similarity: float = 0.5, - min_entropy: float = 0.5, - **kwargs) -> None: - - self.preprocess = preprocess - self.save_path = expand_path(save_path) - - if isinstance(load_path, list): - self.load_path: List = [expand_path(path) for path in load_path] - else: - self.load_path: List = [expand_path(load_path)] - - self.min_similarity = min_similarity - self.min_entropy = min_entropy - self.entropy_fields = entropy_fields - self.ec_data: List = [] - if kwargs.get('mode') != 'train': - self.load() - - def fit(self, data: List[Dict[Any, Any]]) -> None: - """Preprocess items `title` and `description` from the `data` - - Parameters: - data: list of catalog items - - Returns: - None - """ - - log.info(f"Items to nlp: {len(data)}") - self.ec_data = [dict(item, **{ - 'title_nlped': self.preprocess.spacy2dict(self.preprocess.analyze(item['Title'])), - 'feat_nlped': self.preprocess.spacy2dict(self.preprocess.analyze(item['Title'] + '. ' + item['Feature'])) - }) for item in data] - log.info('Data are nlped') - - def save(self, **kwargs) -> None: - """Save classifier parameters""" - log.info(f"Saving model to {self.save_path}") - save_pickle(self.ec_data, self.save_path) - - def load(self, **kwargs) -> None: - """Load classifier parameters""" - log.info(f"Loading model from {self.load_path}") - for path in self.load_path: - if Path.is_file(path): - self.ec_data += load_pickle(path) - else: - raise FileNotFoundError - - log.info(f"Loaded items {len(self.ec_data)}") - - def __call__(self, queries: List[str], history: List[Any], states: List[Dict[Any, Any]]) -> \ - Tuple[Tuple[List[Any], List[Any]], List[float], List[Any]]: - """Retrieve catalog items according to the BLEU measure - - Parameters: - queries: list of queries - history: list of previous queries - states: list of dialog state - - Returns: - response: items: list of retrieved items - entropies: list of entropy attributes with corresponding values - - confidence: list of similarity scores - state: dialog state - """ - - response: List = [] - confidence: List = [] - results_args: List = [] - entropies: List = [] - back_states: List = [] - results_args_sim: List = [] - - log.debug(f"queries: {queries} states: {states}") - - for item_idx, query in enumerate(queries): - - state = states[item_idx] - - if isinstance(state, str): - try: - state = json.loads(state) - except: - state = self.preprocess.parse_input(state) - - if not state: - state = {} - - start = state['start'] if 'start' in state else 0 - stop = state['stop'] if 'stop' in state else 5 - - state['start'] = start - state['stop'] = stop - - query = self.preprocess.analyze(query) - - query, money_range = self.preprocess.extract_money(query) - log.debug(f"money detected: {query} {money_range}") - - if len(money_range) == 2: - state['Price'] = money_range - - score_title = [bleu_advanced(self.preprocess.lemmas(item['title_nlped']), - self.preprocess.lemmas(self.preprocess.filter_nlp_title(query)), - weights=(1,), penalty=False) for item in self.ec_data] - - score_feat = [bleu_advanced(self.preprocess.lemmas(item['feat_nlped']), - self.preprocess.lemmas(self.preprocess.filter_nlp(query)), - weights=(0.3, 0.7), penalty=False) for idx, item in enumerate(self.ec_data)] - - scores = np.mean([score_feat, score_title], axis=0).tolist() - - scores_title = [(score, -len(self.ec_data[idx]['Title'])) for idx, score in enumerate(scores)] - - raw_scores_ar = np.array(scores_title, dtype=[('x', 'float_'), ('y', 'int_')]) - - results_args = np.argsort(raw_scores_ar, order=('x', 'y'))[::-1].tolist() - - results_args_sim = [idx for idx in results_args if scores[idx] >= self.min_similarity] - - log.debug( - f"Items before similarity filtering {len(results_args)} and after {len(results_args_sim)} with th={self.min_similarity} " + - f"the best one has score {scores[results_args[0]]} with title {self.ec_data[results_args[0]]['Title']}") - - results_args_sim = self._filter_state(state, results_args_sim) - - results_args_sim_fil = [idx for idx in results_args_sim[start:stop]] - - local_response = self._clean_items(results_args_sim_fil) - - response.append(local_response) - - confidence.append([(score_title[idx], score_feat[idx]) - for idx in results_args_sim[start:stop]]) - - entropies.append(self._entropy_subquery(results_args_sim)) - log.debug(f"Total number of relevant answers {len(results_args_sim)}") - back_states.append(state) - - return (response, entropies), confidence, back_states - - def _clean_items(self, results: List[int]) -> List[Any]: - local_response: List = [] - for idx in results: - temp = copy.copy(self.ec_data[idx]) - del temp['title_nlped'] - del temp['feat_nlped'] - local_response.append(temp) - return local_response - - def _filter_state(self, state: Dict[Any, Any], results_args_sim: List[int]) -> List[Any]: - for key, value in state.items(): - log.debug(f"Filtering for {key}:{value}") - - if key == 'Price': - price = value - log.debug(f"Items before price filtering {len(results_args_sim)} with price {price}") - results_args_sim = [idx for idx in results_args_sim - if price[0] <= self.preprocess.price(self.ec_data[idx]) <= price[1] and - self.preprocess.price(self.ec_data[idx]) != 0] - log.debug(f"Items after price filtering {len(results_args_sim)}") - - elif key in ['query', 'start', 'stop', 'history']: - continue - - else: - results_args_sim = [idx for idx in results_args_sim - if key in self.ec_data[idx] - if self.ec_data[idx][key].lower() == value.lower()] - - return results_args_sim - - def _entropy_subquery(self, results_args: List[int]) -> List[Tuple[float, str, List[Tuple[str, int]]]]: - """Calculate entropy of selected attributes for items from the catalog. - - Parameters: - results_args: items id to consider - - Returns: - entropies: entropy score with attribute name and corresponding values - """ - - ent_fields: Dict = {} - - for idx in results_args: - for field in self.entropy_fields: - if field in self.ec_data[idx]: - if field not in ent_fields: - ent_fields[field] = [] - - ent_fields[field].append(self.ec_data[idx][field].lower()) - - entropies = [] - for key, value in ent_fields.items(): - count = Counter(value) - entropies.append( - (entropy(list(count.values()), base=2), key, count.most_common())) - - entropies = sorted(entropies, key=itemgetter(0), reverse=True) - entropies = [ - ent_item for ent_item in entropies if ent_item[0] >= self.min_entropy] - - return entropies diff --git a/deeppavlov/deprecated/skills/ecommerce_skill/tfidf_retrieve.py b/deeppavlov/deprecated/skills/ecommerce_skill/tfidf_retrieve.py deleted file mode 100644 index 5ba516c441..0000000000 --- a/deeppavlov/deprecated/skills/ecommerce_skill/tfidf_retrieve.py +++ /dev/null @@ -1,281 +0,0 @@ -# Copyright 2018 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import Counter -from logging import getLogger -from operator import itemgetter -from typing import List, Tuple, Dict, Union, Any - -import numpy as np -from scipy.sparse import csr_matrix, vstack -from scipy.sparse.linalg import norm as sparse_norm -from scipy.stats import entropy - -from deeppavlov.core.commands.utils import expand_path -from deeppavlov.core.common.file import save_pickle, load_pickle -from deeppavlov.core.common.registry import register -from deeppavlov.core.models.estimator import Component - -log = getLogger(__name__) - - -@register("ecommerce_skill_tfidf") -class EcommerceSkillTfidf(Component): - """Class to retrieve product items from `load_path` catalogs - in sorted order according to the similarity measure - Retrieve the specification attributes with corresponding values - in sorted order according to entropy. - - Parameters: - save_path: path to save a model - load_path: path to load a model - entropy_fields: the specification attributes of the catalog items - min_similarity: similarity threshold for ranking - min_entropy: min entropy threshold for specifying - """ - - def __init__(self, - save_path: str, - load_path: str, - entropy_fields: list, - min_similarity: float = 0.5, - min_entropy: float = 0.5, - **kwargs) -> None: - - self.save_path = expand_path(save_path) - self.load_path = expand_path(load_path) - self.min_similarity = min_similarity - self.min_entropy = min_entropy - self.entropy_fields = entropy_fields - self.ec_data: List = [] - self.x_train_features = None - if kwargs.get('mode') != 'train': - self.load() - - def fit(self, data, query) -> None: - """Preprocess items `title` and `description` from the `data` - - Parameters: - data: list of catalog items - - Returns: - None - """ - - self.x_train_features = vstack(list(query)) - self.ec_data = data - - def save(self) -> None: - """Save classifier parameters""" - log.info("Saving to {}".format(self.save_path)) - path = expand_path(self.save_path) - save_pickle((self.ec_data, self.x_train_features), path) - - def load(self) -> None: - """Load classifier parameters""" - log.info("Loading from {}".format(self.load_path)) - self.ec_data, self.x_train_features = load_pickle( - expand_path(self.load_path)) - - def __call__(self, q_vects: List[csr_matrix], histories: List[Any], states: List[Dict[Any, Any]]) -> Tuple[ - Tuple[List[Dict[Any, Any]], List[Any]], List[float], Dict[Any, Any]]: - """Retrieve catalog items according to the TFIDF measure - - Parameters: - queries: list of queries - history: list of previous queries - states: list of dialog state - - Returns: - response: items: list of retrieved items - entropies: list of entropy attributes with corresponding values - - confidence: list of similarity scores - state: dialog state - """ - - log.info(f"Total catalog {len(self.ec_data)}") - - if not isinstance(q_vects, list): - q_vects = [q_vects] - - if not isinstance(states, list): - states = [states] - - if not isinstance(histories, list): - histories = [histories] - - items: List = [] - confidences: List = [] - back_states: List = [] - entropies: List = [] - - for idx, q_vect in enumerate(q_vects): - - log.info(f"Search query {q_vect}") - - if len(states) >= idx + 1: - state = states[idx] - else: - state = {'start': 0, 'stop': 5} - - if not isinstance(state, dict): - state = {'start': 0, 'stop': 5} - - if 'start' not in state: - state['start'] = 0 - if 'stop' not in state: - state['stop'] = 5 - - if 'history' not in state: - state['history'] = [] - - log.info(f"Current state {state}") - - if state['history']: - his_vect = self._list_to_csr(state['history'][-1]) - if not np.array_equal(his_vect.todense(), q_vect.todense()): - q_comp = q_vect.maximum(his_vect) - complex_bool = self._take_complex_query(q_comp, q_vect) - log.info(f"Complex query:{complex_bool}") - - if complex_bool is True: - q_vect = q_comp - state['start'] = 0 - state['stop'] = 5 - else: - # current short query wins that means that the state should be zeroed - state['history'] = [] - else: - log.info("the save query came") - else: - log.info("history is empty") - - state['history'].append(self._csr_to_list(q_vect)) - log.info(f"Final query {q_vect}") - - scores = self._similarity(q_vect) - answer_ids = np.argsort(scores)[::-1] - answer_ids = [idx for idx in answer_ids if scores[idx] >= self.min_similarity] - - answer_ids = self._state_based_filter(answer_ids, state) - - items.append([self.ec_data[idx] - for idx in answer_ids[state['start']:state['stop']]]) - confidences.append( - [scores[idx] for idx in answer_ids[state['start']:state['stop']]]) - back_states.append(state) - - entropies.append(self._entropy_subquery(answer_ids)) - return (items, entropies), confidences, back_states - - def _csr_to_list(self, csr: csr_matrix) -> List[Any]: - return [csr.data.tolist(), csr.indices.tolist()] - - def _list_to_csr(self, _list: List) -> csr_matrix: - row_ind = [0] * len(_list[0]) - col_ind = _list[1] - return csr_matrix((_list[0], (row_ind, col_ind))) - - def _take_complex_query(self, q_prev: csr_matrix, q_cur: csr_matrix) -> bool: - """Decides whether to use the long compound query or the current short query - - Parameters: - q_prev: previous query - q_cur: current query - - Returns: - Bool: whether to use the compound query - """ - - prev_sim = self._similarity(q_prev) - cur_sim = self._similarity(q_cur) - - log.debug(f"prev_sim.max(): {prev_sim.max()}") - log.debug(f"cur_sim.max(): {cur_sim.max()}") - - if prev_sim.max() > cur_sim.max(): - return True - - return False - - def _similarity(self, q_vect: Union[csr_matrix, List]) -> List[float]: - """Calculates cosine similarity between the user's query and product items. - - Parameters: - q_cur: user's query - - Returns: - cos_similarities: lits of similarity scores - """ - - norm = sparse_norm(q_vect) * sparse_norm(self.x_train_features, axis=1) - cos_similarities = np.array(q_vect.dot(self.x_train_features.T).todense()) / norm - - cos_similarities = cos_similarities[0] - cos_similarities = np.nan_to_num(cos_similarities) - return cos_similarities - - def _state_based_filter(self, ids: List[int], state: Dict[Any, Any]): - """Filters the candidates based on the key-values from the state - - Parameters: - ids: list of candidates - state: dialog state - - Returns: - ids: filtered list of candidates - """ - - for key, value in state.items(): - log.debug(f"Filtering for {key}:{value}") - - if key in ['query', 'start', 'stop', 'history']: - continue - - else: - ids = [idx for idx in ids - if key in self.ec_data[idx] - if self.ec_data[idx][key].lower() == value.lower()] - return ids - - def _entropy_subquery(self, results_args: List[int]) -> List[Tuple[float, str, List[Tuple[str, int]]]]: - """Calculate entropy of selected attributes for items from the catalog. - - Parameters: - results_args: items id to consider - - Returns: - entropies: entropy score with attribute name and corresponding values - """ - - ent_fields: Dict = {} - - for idx in results_args: - for field in self.entropy_fields: - if field in self.ec_data[idx]: - if field not in ent_fields: - ent_fields[field] = [] - - ent_fields[field].append(self.ec_data[idx][field].lower()) - - entropies = [] - for key, value in ent_fields.items(): - count = Counter(value) - entropies.append((entropy(list(count.values()), base=2), key, count.most_common())) - - entropies = sorted(entropies, key=itemgetter(0), reverse=True) - entropies = [ent_item for ent_item in entropies if ent_item[0] >= self.min_entropy] - - return entropies diff --git a/deeppavlov/deprecated/skills/pattern_matching_skill/__init__.py b/deeppavlov/deprecated/skills/pattern_matching_skill/__init__.py deleted file mode 100644 index b9d29d5ead..0000000000 --- a/deeppavlov/deprecated/skills/pattern_matching_skill/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .pattern_matching_skill import PatternMatchingSkill diff --git a/deeppavlov/deprecated/skills/pattern_matching_skill/pattern_matching_skill.py b/deeppavlov/deprecated/skills/pattern_matching_skill/pattern_matching_skill.py deleted file mode 100644 index 5fb8357d4f..0000000000 --- a/deeppavlov/deprecated/skills/pattern_matching_skill/pattern_matching_skill.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random -import re -from typing import List, Tuple, Optional - -from deeppavlov.deprecated.skill import Skill - - -class PatternMatchingSkill(Skill): - """Skill, matches utterances to patterns, returns predefined answers. - - Allows to create skills as pre-defined responses for a user's input - containing specific keywords or regular expressions. Every skill returns - response and confidence. - - Args: - responses: List of str responses from which response will be randomly - selected. - patterns: List of str patterns for utterance matching. Patterns may - be all plain texts or all regexps. - regex: Turns on regular expressions matching mode. - ignore_case: Turns on utterances case ignoring. - default_confidence: The default confidence. - - Attributes: - responses: List of str responses from which response will be randomly - selected. - patterns: List of str patterns for utterance matching. Patterns may - be all plain texts or all regexps. - regex: Turns on regular expressions matching mode. - ignore_case: Turns on utterances case ignoring. - default_confidence: The default confidence. - """ - - def __init__(self, responses: List[str], patterns: Optional[List[str]] = None, - regex: bool = False, ignore_case: bool = True, default_confidence: float = 1) -> None: - if isinstance(responses, str): - responses = [responses] - self.responses = responses - if isinstance(patterns, str): - patterns = [patterns] - self.regex = regex - self.ignore_case = ignore_case - self.default_confidence = default_confidence - if regex: - if patterns: - flags = re.IGNORECASE if ignore_case else 0 - patterns = [re.compile(pattern, flags) for pattern in patterns] - else: - if patterns and ignore_case: - patterns = [pattern.lower() for pattern in patterns] - self.patterns = patterns - - def __call__(self, utterances_batch: list, history_batch: list, - states_batch: Optional[list] = None) -> Tuple[list, list]: - """Returns skill inference result. - - Returns batches of skill inference results, estimated confidence - levels and up to date states corresponding to incoming utterance - batch. - - Args: - utterances_batch: A batch of utterances of any type. - history_batch: A batch of list typed histories for each utterance. - states_batch: Optional. A batch of arbitrary typed states for - each utterance. - - Returns: - response: A batch of arbitrary typed skill inference results. - confidence: A batch of float typed confidence levels for each of - skill inference result. - """ - response = [random.choice(self.responses) for _ in utterances_batch] - if self.patterns is None: - confidence = [self.default_confidence] * len(utterances_batch) - else: - if self.ignore_case: - utterances_batch = [utterance.lower() for utterance in utterances_batch] - if self.regex: - confidence = [ - self.default_confidence * float(any([pattern.search(utterance) for pattern in self.patterns])) - for utterance in utterances_batch] - else: - confidence = [self.default_confidence * float(any([pattern in utterance for pattern in self.patterns])) - for utterance in utterances_batch] - - return response, confidence diff --git a/deeppavlov/deprecated/skills/similarity_matching_skill/__init__.py b/deeppavlov/deprecated/skills/similarity_matching_skill/__init__.py deleted file mode 100644 index ad5067d5a7..0000000000 --- a/deeppavlov/deprecated/skills/similarity_matching_skill/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .similarity_matching_skill import SimilarityMatchingSkill diff --git a/deeppavlov/deprecated/skills/similarity_matching_skill/similarity_matching_skill.py b/deeppavlov/deprecated/skills/similarity_matching_skill/similarity_matching_skill.py deleted file mode 100644 index eb46832f93..0000000000 --- a/deeppavlov/deprecated/skills/similarity_matching_skill/similarity_matching_skill.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from logging import getLogger -from typing import Tuple, Optional, List - -from deeppavlov import build_model, train_model -from deeppavlov.configs import configs -from deeppavlov.core.commands.utils import expand_path -from deeppavlov.core.common.file import read_json -from deeppavlov.core.data.utils import update_dict_recursive -from deeppavlov.deprecated.skill import Skill - -log = getLogger(__name__) - - -class SimilarityMatchingSkill(Skill): - """The skill matches utterances to predefined phrases and returns corresponding answers. - - The skill is based on the FAQ-alike .csv table that contains questions and corresponding responses. - The skill returns responses and confidences. - - Args: - data_path: URL or local path to '.csv' file that contains two columns with Utterances and Responses. - User's utterance will be compared to the Utterances column and response will be selected - from the Responses column. - config_type: The selected configuration file ('tfidf_autofaq' by default). - x_col_name: The question column name in the '.csv' file ('Question' by default). - y_col_name: The response column name in the '.csv' file ('Answer' by default). - save_load_path: Path, where the model will be saved or loaded from ('./similarity_matching' by default). - edit_dict: Dictionary of edits to the selected configuration (overwrites other parameters). - train: Should model be trained or not (True by default). - - Attributes: - model: Classifies user's utterance - """ - - def __init__(self, data_path: Optional[str] = None, config_type: Optional[str] = 'tfidf_autofaq', - x_col_name: Optional[str] = 'Question', y_col_name: Optional[str] = 'Answer', - save_load_path: Optional[str] = './similarity_matching', - edit_dict: Optional[dict] = None, train: Optional[bool] = True): - - if config_type not in configs.faq: - raise ValueError("There is no config named '{0}'. Possible options are: {1}" - .format(config_type, ", ".join(configs.faq.keys()))) - model_config = read_json(configs.faq[config_type]) - - if x_col_name is not None: - model_config['dataset_reader']['x_col_name'] = x_col_name - if y_col_name is not None: - model_config['dataset_reader']['y_col_name'] = y_col_name - - model_config['metadata']['variables']['MODELS_PATH'] = save_load_path - - if data_path is not None: - if expand_path(data_path).exists(): - if 'data_url' in model_config['dataset_reader']: - del model_config['dataset_reader']['data_url'] - model_config['dataset_reader']['data_path'] = data_path - else: - if 'data_path' in model_config['dataset_reader']: - del model_config['dataset_reader']['data_path'] - model_config['dataset_reader']['data_url'] = data_path - - if edit_dict is not None: - update_dict_recursive(model_config, edit_dict) - - if train: - self.model = train_model(model_config, download=True) - log.info('Your model was saved at: \'' + save_load_path + '\'') - else: - self.model = build_model(model_config, download=False) - - def __call__(self, utterances_batch: List[str], history_batch: List[List[str]], - states_batch: Optional[list] = None) -> Tuple[List[str], List[float]]: - """It returns the skill inference result. - - Output is batches of the skill inference results and estimated confidences. - - Args: - utterances_batch: A batch of utterances. - history_batch: A batch of list typed histories for each utterance. - states_batch: Optional. A batch of arbitrary typed states for - each utterance. - - Returns: - Batches of the skill inference results and estimated confidences. - """ - responses, confidences = self.model(utterances_batch) - - # in case if model returns not the highest probability, but the whole distribution - if isinstance(confidences[0], list): - confidences = [max(c) for c in confidences] - - return responses, confidences diff --git a/deeppavlov/download.py b/deeppavlov/download.py index 655f9e6192..9b2c94b092 100644 --- a/deeppavlov/download.py +++ b/deeppavlov/download.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import secrets import shutil import sys from argparse import ArgumentParser, Namespace @@ -20,14 +21,13 @@ from pathlib import Path from typing import Union, Optional, Dict, Iterable, Set, Tuple, List from urllib.parse import urlparse - import requests from filelock import FileLock import deeppavlov from deeppavlov.core.commands.utils import expand_path, parse_config from deeppavlov.core.data.utils import download, download_decompress, get_all_elems_from_json, file_md5, \ - set_query_parameter, path_set_md5 + set_query_parameter, path_set_md5, get_download_token log = getLogger(__name__) @@ -77,7 +77,7 @@ def get_configs_downloads(config: Optional[Union[str, Path, dict]] = None) -> Di return all_downloads -def check_md5(url: str, dest_paths: List[Path]) -> bool: +def check_md5(url: str, dest_paths: List[Path], headers: Optional[dict] = None) -> bool: url_md5 = path_set_md5(url) try: @@ -89,7 +89,7 @@ def check_md5(url: str, dest_paths: List[Path]) -> bool: obj = s3.Object(bucket, key) data = obj.get()['Body'].read().decode('utf8') else: - r = requests.get(url_md5) + r = requests.get(url_md5, headers=headers) if r.status_code != 200: return False data = r.text @@ -126,7 +126,7 @@ def check_md5(url: str, dest_paths: List[Path]) -> bool: return True -def download_resource(url: str, dest_paths: Iterable[Union[Path, str]]) -> None: +def download_resource(url: str, dest_paths: Iterable[Union[Path, str]], headers: Optional[dict] = None) -> None: dest_paths = [Path(dest) for dest in dest_paths] download_path = dest_paths[0].parent download_path.mkdir(parents=True, exist_ok=True) @@ -134,13 +134,13 @@ def download_resource(url: str, dest_paths: Iterable[Union[Path, str]]) -> None: lockfile = download_path / f'.{file_name}.lock' with FileLock(lockfile).acquire(poll_intervall=10): - if check_md5(url, dest_paths): + if check_md5(url, dest_paths, headers): log.info(f'Skipped {url} download because of matching hashes') elif any(ext in url for ext in ('.tar.gz', '.gz', '.zip')): - download_decompress(url, download_path, dest_paths) + download_decompress(url, download_path, dest_paths, headers=headers) else: dest_files = [dest_path / file_name for dest_path in dest_paths] - download(dest_files, url) + download(dest_files, url, headers=headers) def download_resources(args: Namespace) -> None: @@ -159,11 +159,19 @@ def download_resources(args: Namespace) -> None: def deep_download(config: Union[str, Path, dict]) -> None: downloads = get_configs_downloads(config) - - for url, dest_paths in downloads.items(): + last_id = len(downloads) - 1 + session_id = secrets.token_urlsafe(32) + + for file_id, (url, dest_paths) in enumerate(downloads.items()): + headers = { + 'dp-token': get_download_token(), + 'dp-session': session_id, + 'dp-file-id': str(last_id - file_id), + 'dp-version': deeppavlov.__version__ + } if not url.startswith('s3://') and not isinstance(config, dict): url = set_query_parameter(url, 'config', Path(config).stem) - download_resource(url, dest_paths) + download_resource(url, dest_paths, headers) def main(args: Optional[List[str]] = None) -> None: diff --git a/deeppavlov/evolve.py b/deeppavlov/evolve.py deleted file mode 100644 index 206f908c97..0000000000 --- a/deeppavlov/evolve.py +++ /dev/null @@ -1,276 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import json -import os -import sys -from collections import defaultdict -from logging import getLogger -from subprocess import Popen - -import pandas as pd - -from deeppavlov.core.commands.utils import expand_path, parse_config, parse_value_with_config -from deeppavlov.core.common.errors import ConfigError -from deeppavlov.core.common.file import read_json, save_json, find_config -from deeppavlov.models.evolution.evolution_param_generator import ParamsEvolution - -log = getLogger(__name__) - -parser = argparse.ArgumentParser() - -parser.add_argument("config_path", help="path to a pipeline json config", type=str) -parser.add_argument('--key_main_model', help='key inserted in dictionary of main model in pipe', default="main") -parser.add_argument('--p_cross', help='probability of crossover', type=float, default=0.2) -parser.add_argument('--pow_cross', help='crossover power', type=float, default=0.1) -parser.add_argument('--p_mut', help='probability of mutation', type=float, default=1.) -parser.add_argument('--pow_mut', help='mutation power', type=float, default=0.1) - -parser.add_argument('--p_size', help='population size', type=int, default=10) -parser.add_argument('--gpus', help='visible GPUs divided by comma <<,>>', default="-1") -parser.add_argument('--train_partition', - help='partition of splitted train file', default=1) -parser.add_argument('--start_from_population', - help='population number to start from. 0 means from scratch', default=0) -parser.add_argument('--path_to_population', - help='path to population to start from', default="") -parser.add_argument('--elitism_with_weights', - help='whether to save elite models with weights or without', action='store_true') -parser.add_argument('--iterations', help='Number of iterations', type=int, default=-1) - - -def main(): - args = parser.parse_args() - - pipeline_config_path = find_config(args.config_path) - key_main_model = args.key_main_model - population_size = args.p_size - gpus = [int(gpu) for gpu in args.gpus.split(",")] - train_partition = int(args.train_partition) - start_from_population = int(args.start_from_population) - path_to_population = args.path_to_population - elitism_with_weights = args.elitism_with_weights - iterations = int(args.iterations) - - p_crossover = args.p_cross - pow_crossover = args.pow_cross - p_mutation = args.p_mut - pow_mutation = args.pow_mut - - if os.environ.get("CUDA_VISIBLE_DEVICES") is None: - pass - else: - cvd = [int(gpu) for gpu in os.environ.get("CUDA_VISIBLE_DEVICES").split(",")] - if gpus == [-1]: - gpus = cvd - else: - try: - gpus = [cvd[gpu] for gpu in gpus] - except IndexError: - raise ConfigError("Can not use gpus `{}` with CUDA_VISIBLE_DEVICES='{}'".format( - ",".join(map(str, gpus)), ",".join(map(str, cvd)) - )) - - basic_params = read_json(pipeline_config_path) - log.info("Given basic params: {}\n".format(json.dumps(basic_params, indent=2))) - - # Initialize evolution - evolution = ParamsEvolution(population_size=population_size, - p_crossover=p_crossover, crossover_power=pow_crossover, - p_mutation=p_mutation, mutation_power=pow_mutation, - key_main_model=key_main_model, - seed=42, - train_partition=train_partition, - elitism_with_weights=elitism_with_weights, - **basic_params) - - considered_metrics = evolution.get_value_from_config(evolution.basic_config, - list(evolution.find_model_path( - evolution.basic_config, "metrics"))[0] + ["metrics"]) - considered_metrics = [metric['name'] if isinstance(metric, dict) else metric for metric in considered_metrics] - - log.info(considered_metrics) - evolve_metric = considered_metrics[0] - - # Create table variable for gathering results - abs_path_to_main_models = expand_path(parse_value_with_config(evolution.models_path, - evolution.basic_config)) - abs_path_to_main_models.mkdir(parents=True, exist_ok=True) - - result_file = abs_path_to_main_models / "result_table.tsv" - print(result_file) - - result_table_columns = [] - result_table_dict = {} - for el in considered_metrics: - result_table_dict[el + "_valid"] = [] - result_table_dict[el + "_test"] = [] - result_table_columns.extend([el + "_valid", el + "_test"]) - - result_table_dict["params"] = [] - result_table_columns.append("params") - - if start_from_population == 0: - # if starting evolution from scratch - iters = 0 - result_table = pd.DataFrame(result_table_dict) - # write down result table file - result_table.loc[:, result_table_columns].to_csv(result_file, index=False, sep='\t') - - log.info("Iteration #{} starts".format(iters)) - # randomly generate the first population - population = evolution.first_generation() - else: - # if starting evolution from already existing population - iters = start_from_population - log.info("Iteration #{} starts".format(iters)) - - population = [] - for i in range(population_size): - config = read_json(expand_path(path_to_population) / f"model_{i}" / "config.json") - - evolution.insert_value_or_dict_into_config( - config, evolution.path_to_models_save_path, - str(evolution.main_model_path / f"population_{start_from_population}" / f"model_{i}")) - - population.append(config) - - run_population(population, evolution, gpus) - population_scores = results_to_table(population, evolution, considered_metrics, - result_file, result_table_columns)[evolve_metric] - log.info("Population scores: {}".format(population_scores)) - log.info("Iteration #{} was done".format(iters)) - iters += 1 - - while True: - if iterations != -1 and start_from_population + iterations == iters: - log.info("End of evolution on iteration #{}".format(iters)) - break - log.info("Iteration #{} starts".format(iters)) - population = evolution.next_generation(population, population_scores, iters) - run_population(population, evolution, gpus) - population_scores = results_to_table(population, evolution, considered_metrics, - result_file, result_table_columns)[evolve_metric] - log.info("Population scores: {}".format(population_scores)) - log.info("Iteration #{} was done".format(iters)) - iters += 1 - - -def run_population(population, evolution, gpus): - """ - Change save and load paths for obtained population, save config.json with model config, - run population via current python executor (with which evolve.py already run) - and on given devices (-1 means CPU, other integeres - visible for evolve.py GPUs) - Args: - population: list of dictionaries - configs of current population - evolution: ParamsEvolution - gpus: list of given devices (list of integers) - - Returns: - None - """ - population_size = len(population) - for k in range(population_size // len(gpus) + 1): - procs = [] - for j in range(len(gpus)): - i = k * len(gpus) + j - if i < population_size: - save_path = expand_path( - evolution.get_value_from_config(parse_config(population[i]), - evolution.path_to_models_save_path)) - - save_path.mkdir(parents=True, exist_ok=True) - f_name = save_path / "config.json" - save_json(population[i], f_name) - - with save_path.joinpath('out.txt').open('w', encoding='utf8') as outlog, \ - save_path.joinpath('err.txt').open('w', encoding='utf8') as errlog: - env = dict(os.environ) - if len(gpus) > 1 or gpus[0] != -1: - env['CUDA_VISIBLE_DEVICES'] = str(gpus[j]) - - procs.append(Popen("{} -m deeppavlov train {}".format(sys.executable, str(f_name)), - shell=True, stdout=outlog, stderr=errlog, env=env)) - for j, proc in enumerate(procs): - i = k * len(gpus) + j - log.info(f'Waiting on {i}th proc') - if proc.wait() != 0: - save_path = expand_path( - evolution.get_value_from_config(parse_config(population[i]), - evolution.path_to_models_save_path)) - with save_path.joinpath('err.txt').open(encoding='utf8') as errlog: - log.warning(f'Population {i} returned an error code {proc.returncode} and an error log:\n' + - errlog.read()) - return None - - -def results_to_table(population, evolution, considered_metrics, result_file, result_table_columns): - population_size = len(population) - train_config = evolution.basic_config.get('train', {}) - - if 'evaluation_targets' in train_config: - evaluation_targets = train_config['evaluation_targets'] - else: - evaluation_targets = [] - if train_config.get('validate_best', True): - evaluation_targets.append('valid') - elif train_config.get('test_best', True): - evaluation_targets.append('test') - - if 'valid' in evaluation_targets: - target = 'valid' - elif 'test' in evaluation_targets: - target = 'test' - elif 'train' in evaluation_targets: - target = 'train' - else: - raise ConfigError('evaluation_targets are empty. Can not evolve') - - if target != 'valid': - log.info(f"Tuning parameters on {target}") - - population_metrics = {} - for m in considered_metrics: - population_metrics[m] = [] - for i in range(population_size): - log_path = expand_path(evolution.get_value_from_config(parse_config(population[i]), - evolution.path_to_models_save_path) - ) / "out.txt" - - report = {} - with log_path.open(encoding='utf8') as f: - for line in f: - try: - report.update(json.loads(line)) - except: - pass - - result_table_dict = defaultdict(list) - - for m in considered_metrics: - for data_type in evaluation_targets: - result_table_dict[f'{m}_{data_type}'].append(report[data_type]['metrics'][m]) - if data_type == target: - population_metrics[m].append(report[data_type]['metrics'][m]) - - result_table_dict[result_table_columns[-1]] = [json.dumps(population[i])] - result_table = pd.DataFrame(result_table_dict) - result_table.loc[:, result_table_columns].to_csv(result_file, index=False, sep='\t', mode='a', header=None) - - return population_metrics - - -if __name__ == "__main__": - main() diff --git a/deeppavlov/models/bert/bert_as_summarizer.py b/deeppavlov/models/bert/bert_as_summarizer.py deleted file mode 100644 index 66efdad7a4..0000000000 --- a/deeppavlov/models/bert/bert_as_summarizer.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -from logging import getLogger -from typing import List, Optional - -import numpy as np -import tensorflow.compat.v1 as tf - -from bert_dp.modeling import BertConfig, BertModel, create_initializer, get_assignment_map_from_checkpoint -from deeppavlov.core.commands.utils import expand_path -from deeppavlov.core.common.registry import register -from deeppavlov.core.models.tf_model import TFModel -from deeppavlov.models.preprocessors.bert_preprocessor import BertPreprocessor - -logger = getLogger(__name__) - - -@register('bert_as_summarizer') -class BertAsSummarizer(TFModel): - """Naive Extractive Summarization model based on BERT. - BERT model was trained on Masked Language Modeling (MLM) and Next Sentence Prediction (NSP) tasks. - NSP head was trained to detect in ``[CLS] text_a [SEP] text_b [SEP]`` if text_b follows text_a in original document. - - This NSP head can be used to stack sentences from a long document, based on a initial sentence: - - summary_0 = init_sentence - - summary_1 = summary_0 + argmax(nsp_score(candidates)) - - summary_2 = summary_1 + argmax(nsp_score(candidates)) - - ... - - , where candidates are all sentences from a document. - - Args: - bert_config_file: path to Bert configuration file - pretrained_bert: path to pretrained Bert checkpoint - vocab_file: path to Bert vocabulary - max_summary_length: limit on summary length, number of sentences is used if ``max_summary_length_in_tokens`` - is set to False, else number of tokens is used. - max_summary_length_in_tokens: Use number of tokens as length of summary. - Defaults to ``False``. - max_seq_length: max sequence length in subtokens, including ``[SEP]`` and ``[CLS]`` tokens. - `max_seq_length` is used in Bert to compute NSP scores. Defaults to ``128``. - do_lower_case: set ``True`` if lowercasing is needed. Defaults to ``False``. - lang: use ru_sent_tokenizer for 'ru' and ntlk.sent_tokener for other languages. - Defaults to ``'ru'``. - """ - - def __init__(self, bert_config_file: str, - pretrained_bert: str, - vocab_file: str, - max_summary_length: int, - max_summary_length_in_tokens: Optional[bool] = False, - max_seq_length: Optional[int] = 128, - do_lower_case: Optional[bool] = False, - lang: Optional[str] = 'ru', - **kwargs) -> None: - - self.max_summary_length = max_summary_length - self.max_summary_length_in_tokens = max_summary_length_in_tokens - self.bert_config = BertConfig.from_json_file(str(expand_path(bert_config_file))) - - self.bert_preprocessor = BertPreprocessor(vocab_file=vocab_file, do_lower_case=do_lower_case, - max_seq_length=max_seq_length) - - self.tokenize_reg = re.compile(r"[\w']+|[^\w ]") - - if lang == 'ru': - from ru_sent_tokenize import ru_sent_tokenize - self.sent_tokenizer = ru_sent_tokenize - else: - from nltk import sent_tokenize - self.sent_tokenizer = sent_tokenize - - self.sess_config = tf.ConfigProto(allow_soft_placement=True) - self.sess_config.gpu_options.allow_growth = True - self.sess = tf.Session(config=self.sess_config) - - self._init_graph() - - self.sess.run(tf.global_variables_initializer()) - - if pretrained_bert is not None: - pretrained_bert = str(expand_path(pretrained_bert)) - - if tf.train.checkpoint_exists(pretrained_bert): - logger.info('[initializing model with Bert from {}]'.format(pretrained_bert)) - tvars = tf.trainable_variables() - assignment_map, _ = get_assignment_map_from_checkpoint(tvars, pretrained_bert) - tf.train.init_from_checkpoint(pretrained_bert, assignment_map) - - def _init_graph(self): - self._init_placeholders() - - self.bert = BertModel(config=self.bert_config, - is_training=self.is_train_ph, - input_ids=self.input_ids_ph, - input_mask=self.input_masks_ph, - token_type_ids=self.token_types_ph, - use_one_hot_embeddings=False, - ) - # next sentence prediction head - with tf.variable_scope("cls/seq_relationship"): - output_weights = tf.get_variable( - "output_weights", - shape=[2, self.bert_config.hidden_size], - initializer=create_initializer(self.bert_config.initializer_range)) - output_bias = tf.get_variable( - "output_bias", shape=[2], initializer=tf.zeros_initializer()) - - nsp_logits = tf.matmul(self.bert.get_pooled_output(), output_weights, transpose_b=True) - nsp_logits = tf.nn.bias_add(nsp_logits, output_bias) - self.nsp_probs = tf.nn.softmax(nsp_logits, axis=-1) - - def _init_placeholders(self): - self.input_ids_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='ids_ph') - self.input_masks_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='masks_ph') - self.token_types_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='token_types_ph') - - self.is_train_ph = tf.placeholder_with_default(False, shape=[], name='is_train_ph') - - def _build_feed_dict(self, input_ids, input_masks, token_types): - feed_dict = { - self.input_ids_ph: input_ids, - self.input_masks_ph: input_masks, - self.token_types_ph: token_types, - } - return feed_dict - - def _get_nsp_predictions(self, sentences: List[str], candidates: List[str]): - """Compute NextSentence probability for every (sentence_i, candidate_i) pair. - - [CLS] sentence_i [SEP] candidate_i [SEP] - - Args: - sentences: list of sentences - candidates: list of candidates to be the next sentence - - Returns: - probabilities that candidate is a next sentence - """ - features = self.bert_preprocessor(texts_a=sentences, texts_b=candidates) - input_ids = [f.input_ids for f in features] - input_masks = [f.input_mask for f in features] - input_type_ids = [f.input_type_ids for f in features] - feed_dict = self._build_feed_dict(input_ids, input_masks, input_type_ids) - nsp_probs = self.sess.run(self.nsp_probs, feed_dict=feed_dict) - return nsp_probs[:, 0] - - def __call__(self, texts: List[str], init_sentences: Optional[List[str]] = None) -> List[List[str]]: - """Builds summary for text from `texts` - - Args: - texts: texts to build summaries for - init_sentences: ``init_sentence`` is used as the first sentence in summary. - Defaults to None. - - Returns: - List[List[str]]: summaries tokenized on sentences - """ - summaries = [] - # build summaries for each text, init_sentence pair - if init_sentences is None: - init_sentences = [None] * len(texts) - - for text, init_sentence in zip(texts, init_sentences): - text_sentences = self.sent_tokenizer(text) - - if init_sentence is None: - init_sentence = text_sentences[0] - text_sentences = text_sentences[1:] - - # remove duplicates - text_sentences = list(set(text_sentences)) - # remove init_sentence from text sentences - text_sentences = [sent for sent in text_sentences if sent != init_sentence] - - summary = [init_sentence] - if self.max_summary_length_in_tokens: - # get length in tokens - def get_length(x): - return len(self.tokenize_reg.findall(' '.join(x))) - else: - # get length as number of sentences - get_length = len - - candidates = text_sentences[:] - while len(candidates) > 0: - # todo: use batches - candidates_scores = [self._get_nsp_predictions([' '.join(summary)], [cand]) for cand in candidates] - best_candidate_idx = np.argmax(candidates_scores) - best_candidate = candidates[best_candidate_idx] - del candidates[best_candidate_idx] - if get_length(summary + [best_candidate]) > self.max_summary_length: - break - summary = summary + [best_candidate] - summaries += [summary] - return summaries - - def train_on_batch(self, **kwargs): - raise NotImplementedError diff --git a/deeppavlov/models/evolution/Results_analysis.ipynb b/deeppavlov/models/evolution/Results_analysis.ipynb deleted file mode 100644 index 1eff4ff590..0000000000 --- a/deeppavlov/models/evolution/Results_analysis.ipynb +++ /dev/null @@ -1,347 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from pathlib import Path\n", - "import matplotlib.pyplot as plt\n", - "import copy\n", - "import json\n", - "%matplotlib inline\n", - "\n", - "from deeppavlov.core.commands.utils import expand_path\n", - "from deeppavlov.models.evolution.evolution_param_generator import ParamsEvolution" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Set here path to your config file, key main model and population size" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "CONFIG_FILE = \"../../configs/evolution/evolve_intents_snips.json\"\n", - "KEY_MAIN_MODEL = \"main\"\n", - "POPULATION_SIZE = 2\n", - " \n", - "with open(CONFIG_FILE, \"r\", encoding='utf8') as f:\n", - " basic_params = json.load(f)\n", - "\n", - "print(\"Considered basic config:\\n{}\".format(json.dumps(basic_params, indent=2)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "evolution = ParamsEvolution(population_size=POPULATION_SIZE,\n", - " key_main_model=KEY_MAIN_MODEL,\n", - " **basic_params)\n", - "\n", - "validate_best = evolution.get_value_from_config(\n", - " evolution.basic_config, list(evolution.find_model_path(\n", - " evolution.basic_config, \"validate_best\"))[0] + [\"validate_best\"])\n", - "test_best = evolution.get_value_from_config(\n", - " evolution.basic_config, list(evolution.find_model_path(\n", - " evolution.basic_config, \"test_best\"))[0] + [\"test_best\"])\n", - "\n", - "TITLE = str(Path(evolution.get_value_from_config(\n", - " evolution.basic_config, evolution.main_model_path + [\"save_path\"])).stem)\n", - "print(\"Title name for the considered evolution is `{}`.\".format(TITLE))\n", - "\n", - "data = pd.read_csv(str(expand_path(Path(evolution.get_value_from_config(\n", - " evolution.basic_config, evolution.main_model_path + [\"save_path\"])).joinpath(\n", - " \"result_table.tsv\"))), sep='\\t')\n", - "print(\"Number of populations: {}.\".format(int(data.shape[0] / POPULATION_SIZE)))\n", - "data.fillna(0., inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "MEASURES = evolution.get_value_from_config(\n", - " evolution.basic_config, list(evolution.find_model_path(\n", - " evolution.basic_config, \"metrics\"))[0] + [\"metrics\"])\n", - "\n", - "for measure in MEASURES:\n", - " print(\"\\nMeasure: {}\".format(measure))\n", - " for data_type in [\"valid\", \"test\"]:\n", - " print(\"{}:\".format(data_type))\n", - " argmin = data[measure + \"_\" + data_type].argmin()\n", - " argmax = data[measure + \"_\" + data_type].argmax()\n", - " print(\"min for\\t{} model on\\t{} population\".format(argmin % POPULATION_SIZE,\n", - " argmin // POPULATION_SIZE))\n", - " print(\"max for\\t{} model on\\t{} population\".format(argmax % POPULATION_SIZE,\n", - " argmax // POPULATION_SIZE))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## If you want to plot measures depending on population colored by evolved measure value" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "path_to_pics = expand_path(Path(evolution.get_value_from_config(\n", - " evolution.basic_config, evolution.main_model_path + [\"save_path\"])).joinpath(\"pics\"))\n", - "path_to_pics.mkdir(exist_ok=True, parents=True)\n", - "\n", - "if validate_best:\n", - " evolve_metric = MEASURES[0] + \"_valid\"\n", - "elif test_best:\n", - " evolve_metric = MEASURES[0] + \"_test\"\n", - " \n", - "cmap = plt.get_cmap('rainbow')\n", - "colors = [cmap(i) for i in np.linspace(0, 1, data.shape[0])]\n", - "color_ids = np.argsort(data.loc[:, evolve_metric].values)\n", - "\n", - "ylims = [(0., 1)] * len(MEASURES)\n", - "\n", - "for metric, ylim in zip(MEASURES, ylims):\n", - " plt.figure(figsize=(12,6))\n", - " if validate_best:\n", - " for i in range(data.shape[0]):\n", - " plt.scatter(i // POPULATION_SIZE, \n", - " data.loc[:, metric + \"_valid\"].values[i], \n", - " c=colors[np.where(color_ids == i)[0][0]], alpha=0.5, marker='o')\n", - " plt.plot(np.arange(data.shape[0]//POPULATION_SIZE), \n", - " data.loc[:, metric + \"_valid\"].max() * np.ones(data.shape[0]//POPULATION_SIZE), \n", - " c=colors[-1])\n", - " plt.plot(np.arange(data.shape[0]//POPULATION_SIZE), \n", - " data.loc[:, metric + \"_valid\"].min() * np.ones(data.shape[0]//POPULATION_SIZE), \n", - " c=colors[0])\n", - " if test_best:\n", - " for i in range(data.shape[0]):\n", - " plt.scatter(i // POPULATION_SIZE, \n", - " data.loc[:, metric + \"_test\"].values[i], \n", - " c=colors[np.where(color_ids == i)[0][0]], alpha=0.5, marker='+', s=200)\n", - " plt.plot(np.arange(data.shape[0]//POPULATION_SIZE), \n", - " data.loc[:, metric + \"_test\"].max() * np.ones(data.shape[0]//POPULATION_SIZE), \"--\",\n", - " c=colors[-1])\n", - " plt.plot(np.arange(data.shape[0]//POPULATION_SIZE), \n", - " data.loc[:, metric + \"_test\"].min() * np.ones(data.shape[0]//POPULATION_SIZE), \"--\",\n", - " c=colors[0])\n", - " \n", - "\n", - " plt.ylabel(metric, fontsize=20)\n", - " plt.xlabel(\"population\", fontsize=20)\n", - " plt.title(TITLE, fontsize=20)\n", - " plt.ylim(ylim[0], ylim[1])\n", - " plt.xticks(fontsize=20)\n", - " plt.yticks(fontsize=20)\n", - " plt.savefig(path_to_pics.joinpath(metric + \".png\"))\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "## If you want to plot measures depending on population colored by `evolution_model_id`\n", - "\n", - "#### That means model of the same `id` are of the same color." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "params_dictionaries = []\n", - "models_ids = []\n", - "\n", - "for i in range(data.shape[0]):\n", - " data.loc[i, \"params\"] = data.loc[i, \"params\"].replace(\"False\", \"false\")\n", - " data.loc[i, \"params\"] = data.loc[i, \"params\"].replace(\"True\", \"true\")\n", - " json_acceptable_string = data.loc[i, \"params\"].replace(\"'\", \"\\\"\")\n", - " d = json.loads(json_acceptable_string)\n", - " params_dictionaries.append(d)\n", - " models_ids.append(d[\"evolution_model_id\"])\n", - "\n", - "models_ids = np.array(models_ids)\n", - "models_ids" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "cmap = plt.get_cmap('rainbow')\n", - "colors = [cmap(i) for i in np.linspace(0, 1, len(np.unique(models_ids)))]\n", - "\n", - "ylims = [(0., 1)] * len(MEASURES)\n", - "\n", - "for metric, ylim in zip(MEASURES, ylims):\n", - " plt.figure(figsize=(12,6))\n", - " if validate_best:\n", - " for i in range(data.shape[0]):\n", - " plt.scatter(i // POPULATION_SIZE, \n", - " data.loc[:, metric + \"_valid\"].values[i], \n", - "# c=colors[models_ids[i]], alpha=0.5, marker='o')\n", - " c=colors[np.where(models_ids[i] == np.unique(models_ids))[0][0]], alpha=0.5, marker='o')\n", - " \n", - " plt.plot(np.arange(data.shape[0]//POPULATION_SIZE), \n", - " data.loc[:, metric + \"_valid\"].max() * np.ones(data.shape[0]//POPULATION_SIZE), \n", - " c=colors[-1])\n", - " plt.plot(np.arange(data.shape[0]//POPULATION_SIZE), \n", - " data.loc[:, metric + \"_valid\"].min() * np.ones(data.shape[0]//POPULATION_SIZE), \n", - " c=colors[0])\n", - " if test_best:\n", - " for i in range(data.shape[0]):\n", - " plt.scatter(i // POPULATION_SIZE, \n", - " data.loc[:, metric + \"_test\"].values[i], \n", - " c=colors[np.where(models_ids[i] == np.unique(models_ids))[0][0]], alpha=0.5, marker='+', s=200)\n", - " plt.plot(np.arange(data.shape[0]//POPULATION_SIZE), \n", - " data.loc[:, metric + \"_test\"].max() * np.ones(data.shape[0]//POPULATION_SIZE), \"--\",\n", - " c=colors[-1])\n", - " plt.plot(np.arange(data.shape[0]//POPULATION_SIZE), \n", - " data.loc[:, metric + \"_test\"].min() * np.ones(data.shape[0]//POPULATION_SIZE), \"--\",\n", - " c=colors[0])\n", - " \n", - "\n", - " plt.ylabel(metric, fontsize=20)\n", - " plt.xlabel(\"population\", fontsize=20)\n", - " plt.title(TITLE, fontsize=20)\n", - " plt.ylim(ylim[0], ylim[1])\n", - " plt.xticks(fontsize=20)\n", - " plt.yticks(fontsize=20)\n", - " plt.savefig(path_to_pics.joinpath(metric + \"_colored_ids.png\"))\n", - " plt.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "cmap = plt.get_cmap('rainbow')\n", - "colors = [cmap(i) for i in np.linspace(0, 1, data.shape[0])]\n", - "color_ids = np.argsort(data.loc[:, evolve_metric].values)\n", - "\n", - "for param_path in evolution.paths_to_evolving_params:\n", - " param_name = param_path[-1]\n", - " print(param_path, param_name)\n", - " \n", - " plt.figure(figsize=(12,12))\n", - " for i in range(data.shape[0]):\n", - " param_dict = evolution.get_value_from_config(evolution.basic_config, param_path)\n", - " if param_dict.get(\"evolve_range\") and param_dict.get(\"discrete\"):\n", - " plt.scatter(i // POPULATION_SIZE, \n", - " evolution.get_value_from_config(params_dictionaries[i], param_path),\n", - "# + (np.random.random() - 0.5) / 2,\n", - " c=colors[np.where(color_ids == i)[0][0]], alpha=0.5)\n", - " elif param_dict.get(\"evolve_range\"):\n", - " plt.scatter(i // POPULATION_SIZE, \n", - " evolution.get_value_from_config(params_dictionaries[i], param_path),\n", - " c=colors[np.where(color_ids == i)[0][0]], alpha=0.5)\n", - " elif param_dict.get(\"evolve_choice\"):\n", - " values = np.array(param_dict.get(\"evolve_choice\"))\n", - " plt.scatter(i // POPULATION_SIZE, \n", - " np.where(values == evolution.get_value_from_config(\n", - " params_dictionaries[i], param_path))[0][0],\n", - " c=colors[np.where(color_ids == i)[0][0]], alpha=0.5)\n", - " plt.yticks(np.arange(len(values)), values, fontsize=20)\n", - " elif param_dict.get(\"evolve_bool\"):\n", - " values = np.array([False, True])\n", - " plt.scatter(i // POPULATION_SIZE, \n", - " np.where(values == evolution.get_value_from_config(\n", - " params_dictionaries[i], param_path))[0][0],\n", - " c=colors[np.where(color_ids == i)[0][0]], alpha=0.5)\n", - " plt.yticks(np.arange(len(values)), [\"False\", \"True\"], fontsize=20)\n", - "\n", - " plt.ylabel(param_name, fontsize=20)\n", - " plt.xlabel(\"population\", fontsize=20)\n", - " plt.title(TITLE, fontsize=20)\n", - " plt.xticks(fontsize=20)\n", - " plt.yticks(fontsize=20)\n", - " plt.savefig(path_to_pics.joinpath(param_name + \".png\"))\n", - " plt.show()\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python-deep36", - "language": "python", - "name": "deep36" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/deeppavlov/models/evolution/__init__.py b/deeppavlov/models/evolution/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/deeppavlov/models/evolution/evolution_param_generator.py b/deeppavlov/models/evolution/evolution_param_generator.py deleted file mode 100644 index cd40027ec9..0000000000 --- a/deeppavlov/models/evolution/evolution_param_generator.py +++ /dev/null @@ -1,420 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from copy import deepcopy -from logging import getLogger -from pathlib import Path -from typing import List, Any - -import numpy as np - -from deeppavlov.core.common.params_search import ParamsSearch -from deeppavlov.core.common.registry import register - -log = getLogger(__name__) - - -@register('params_evolution') -class ParamsEvolution(ParamsSearch): - """ - Class performs full evolutionary process (task scores -> max): - 1. initializes random population - 2. makes replacement to get next generation: - a. selection according to obtained scores - b. crossover (recombination) with given probability p_crossover - c. mutation with given mutation rate p_mutation (probability to mutate) - according to given mutation power sigma - (current mutation power is randomly from -sigma to sigma) - - Args: - population_size: number of individuums per generation - p_crossover: probability to cross over for current replacement - crossover_power: part of EVOLVING parents parameters to exchange for offsprings - p_mutation: probability of mutation for current replacement - mutation_power: allowed percentage of mutation - key_model_to_evolve: binary flag that should be inserted into the dictionary - with main model in the basic config (to determine save and load paths that will be changed) - seed: random seed for initialization - train_partition: integer number of train data parts - elitism_with_weights: whether to save elite models with weigths or without - prefix: prefix to determine special keys like `PREFIX_range`, `PREFIX_bool`, `PREFIX_choice` - **kwargs: basic config with parameters - - Attributes: - basic_config: dictionary with initial evolutionary config - main_model_path: list of keys and/or integers (for list) with relative path to main model (subdictionary) - prefix: prefix to determine special keys like `PREFIX_range`, `PREFIX_bool`, `PREFIX_choice` - population_size: number of individuums per generation - p_crossover: probability to cross over for current replacement - p_mutation: probability of mutation for current replacement - mutation_power: allowed percentage of mutation - crossover_power: part of EVOLVING parents parameters to exchange for offsprings - elitism_with_weights: whether to save elite models with weigths or without - n_saved_best_pretrained: number of saved models per current generation - train_partition: integer number of train data parts - paths_to_params: list of lists of keys and/or integers (for list) - with relative paths to evolving parameters - n_params: number of evolving parameters - evolution_model_id: identity number of model (the same for loaded pre-trained models) - models_path: path to models given in config variable `MODEL_PATH`. This variable \ - should be used as prefix to all fitted and trained model in config~ - eps: EPS value - paths_to_fiton_dicts: list of lists of keys and/or integers (for list)\ - with relative paths to dictionaries that can be "fitted on" - n_fiton_dicts: number of dictionaries that can be "fitted on" - evolve_metric_optimization: whether to maximize or minimize considered metric \ - Set of Values: ``"maximize", "minimize"`` - """ - - def __init__(self, - population_size: int, - p_crossover: float = 0.5, crossover_power: float = 0.5, - p_mutation: float = 0.5, mutation_power: float = 0.1, - key_main_model: str = "main", - seed: int = None, - train_partition: int = 1, - elitism_with_weights: bool = False, - prefix: str = "evolve", - models_path_variable: str = "MODEL_PATH", - **kwargs): - """ - Initialize evolution with random population - """ - super().__init__(prefix=prefix, seed=seed, **kwargs) - - self.main_model_path = list(self.find_model_path(self.basic_config, key_main_model))[0] - log.info("Main model path in config: {}".format(self.main_model_path)) - - self.population_size = population_size - self.p_crossover = p_crossover - self.p_mutation = p_mutation - self.mutation_power = mutation_power - self.crossover_power = crossover_power - self.elitism_with_weights = elitism_with_weights - - self.n_saved_best_pretrained = 0 - self.train_partition = train_partition - self.evolution_model_id = 0 - self.basic_config, self.models_path = self.remove_key_from_config( - self.basic_config, ["metadata", "variables", models_path_variable]) - self.models_path = Path(self.models_path) - for path_name in ["save_path", "load_path"]: - occured_mpaths = list(self.find_model_path(self.basic_config, path_name)) - for ppath in occured_mpaths: - new_path = self.get_value_from_config( - self.basic_config, - ppath + [path_name]).replace(models_path_variable, "MODEL_" + path_name.upper()) - self.insert_value_or_dict_into_config(self.basic_config, ppath + [path_name], new_path) - - self.path_to_models_save_path = ["metadata", "variables", "MODEL_SAVE_PATH"] - self.path_to_models_load_path = ["metadata", "variables", "MODEL_LOAD_PATH"] - self.insert_value_or_dict_into_config(self.basic_config, self.path_to_models_save_path, str(self.models_path)) - self.insert_value_or_dict_into_config(self.basic_config, self.path_to_models_load_path, str(self.models_path)) - - try: - self.evolve_metric_optimization = self.get_value_from_config( - self.basic_config, list(self.find_model_path( - self.basic_config, "metric_optimization"))[0] + ["metric_optimization"]) - except: - self.evolve_metric_optimization = "maximize" - - def first_generation(self, iteration: int = 0) -> List[dict]: - """ - Initialize first generation randomly according to the given constraints is self.params - - Args: - iteration: number of iteration - - Returns: - first generation that consists of self.population_size individuums - """ - population = [] - for i in range(self.population_size): - config = self.initialize_params_in_config(self.basic_config, self.paths_to_params) - - self.insert_value_or_dict_into_config(config, self.path_to_models_save_path, - str(self.models_path / f"population_{iteration}" / f"model_{i}")) - self.insert_value_or_dict_into_config(config, self.path_to_models_load_path, - str(self.models_path / f"population_{iteration}" / f"model_{i}")) - # set model_id - config["evolution_model_id"] = self.evolution_model_id - # next id available - self.evolution_model_id += 1 - population.append(config) - - return population - - def next_generation(self, generation: List[dict], scores: List[float], iteration: int) -> List[dict]: - """ - Provide replacement - - Args: - generation: current generation (set of self.population_size configs - scores: corresponding scores that should be maximized - iteration: iteration number - - Returns: - the next generation according to the given scores of current generation - """ - - next_population = self.selection_of_best_with_weights(generation, scores) - log.info("Saved with weights: {} models".format(self.n_saved_best_pretrained)) - offsprings = self.crossover(generation, scores) - - changable_next = self.mutation(offsprings) - - next_population.extend(changable_next) - - for i in range(self.n_saved_best_pretrained): - # if several train files: - if self.train_partition != 1: - file_ext = str(Path(next_population[i]["dataset_reader"]["train"]).suffix) - next_population[i]["dataset_reader"]["train"] = "_".join( - Path(next_population[i]["dataset_reader"]["train"]).stem.split("_")[:-1] - ) + "_" + str(iteration % self.train_partition) + file_ext - # load_paths - if self.elitism_with_weights: - # if elite models are saved with weights - self.insert_value_or_dict_into_config( - next_population[i], self.path_to_models_load_path, - self.get_value_from_config(next_population[i], self.path_to_models_save_path)) - else: - # if elite models are saved only as configurations and trained again - self.insert_value_or_dict_into_config( - next_population[i], self.path_to_models_load_path, - str(self.models_path / f"population_{iteration}" / f"model_{i}")) - - self.insert_value_or_dict_into_config( - next_population[i], self.path_to_models_save_path, - str(self.models_path / f"population_{iteration}" / f"model_{i}")) - - for i in range(self.n_saved_best_pretrained, self.population_size): - # if several train files - if self.train_partition != 1: - file_ext = str(Path(next_population[i]["dataset_reader"]["train"]).suffix) - next_population[i]["dataset_reader"]["train"] = "_".join( - [str(p) for p in Path(next_population[i]["dataset_reader"]["train"]).stem.split("_")[:-1]]) \ - + "_" + str(iteration % self.train_partition) + file_ext - self.insert_value_or_dict_into_config( - next_population[i], self.path_to_models_save_path, - str(self.models_path / f"population_{iteration}" / f"model_{i}")) - self.insert_value_or_dict_into_config( - next_population[i], self.path_to_models_load_path, - str(self.models_path / f"population_{iteration}" / f"model_{i}")) - - next_population[i]["evolution_model_id"] = self.evolution_model_id - self.evolution_model_id += 1 - - return next_population - - def selection_of_best_with_weights(self, population: List[dict], scores: List[float]) -> List[dict]: - """ - Select individuums to save with weights for the next generation from given population. - Range is an order of an individuum within sorted scores (1 range = max-score, self.population_size = min-score) - Individuum with the best score has probability equal to 1 (100%). - Individuum with the worst score has probability equal to 0 (0%). - Probability of i-th individuum to be selected with weights is (a * range_i + b) - where a = 1. / (1. - self.population_size), and - b = self.population_size / (self.population_size - 1.) - - Args: - population: self.population_size individuums - scores: list of corresponding scores - - Returns: - selected self.n_saved_best_pretrained (changable) individuums - """ - ranges = self.range_scores(scores) - a = 1. / (1. - self.population_size) - b = self.population_size / (self.population_size - 1.) - probas_to_be_selected = a * ranges + b - - selected = [] - for i in range(self.population_size): - if self.decision(probas_to_be_selected[i]): - selected.append(deepcopy(population[i])) - - self.n_saved_best_pretrained = len(selected) - return selected - - def range_scores(self, scores: List[float]) -> np.ndarray: - """ - Ranges scores, - range 1 corresponds to the best score, - range self.population_size corresponds to the worst score. - - Args: - scores: list of corresponding scores of population - - Returns: - ranges - """ - not_none_scores = np.array([x for x in scores if x is not None]) - if len(not_none_scores) == 0: - not_none_scores = np.array([0]) - min_score = np.min(not_none_scores) - max_score = np.max(not_none_scores) - for i in range(self.population_size): - if scores[i] is None: - if self.evolve_metric_optimization == "maximize": - scores[i] = min_score - self.eps - else: - scores[i] = max_score + self.eps - scores = np.array(scores, dtype='float') - - sorted_ids = np.argsort(scores) - if self.evolve_metric_optimization == "minimize": - sorted_ids = sorted_ids[::-1] - ranges = np.array([self.population_size - np.where(i == sorted_ids)[0][0] - for i in np.arange(self.population_size)]) - return ranges - - def crossover(self, population: List[dict], scores: List[float]) -> List[dict]: - """ - Recombine randomly population in pairs and cross over them with given probability. - Cross over from two parents produces two offsprings - each of which contains crossover_power portion of the parameter values from one parent, - and the other (1 - crossover_power portion) from the other parent - - Args: - population: self.population_size individuums - scores: list of corresponding scores - - Returns: - (self.population_size - self.n_saved_best_pretained) offsprings - """ - offsprings = [] - - ranges = self.range_scores(scores) - a = 1. / (1. - self.population_size) - b = self.population_size / (self.population_size - 1.) - probas_to_be_parent = (a * ranges + b) / np.sum(a * ranges + b) - intervals = np.array([np.sum(probas_to_be_parent[:i]) for i in range(self.population_size)]) - - for i in range(self.population_size - self.n_saved_best_pretrained): - rs = np.random.random(2) - parents = population[np.where(rs[0] > intervals)[0][-1]], population[np.where(rs[1] > intervals)[0][-1]] - - if self.decision(self.p_crossover): - params_perm = np.random.permutation(self.n_params) - - curr_offsprings = [deepcopy(parents[0]), - deepcopy(parents[1])] - - part = int(self.crossover_power * self.n_params) - - for j in range(self.n_params - part, self.n_params): - self.insert_value_or_dict_into_config(curr_offsprings[0], - self.paths_to_params[ - params_perm[j]], - self.get_value_from_config( - parents[1], - self.paths_to_params[ - params_perm[j]])) - - self.insert_value_or_dict_into_config(curr_offsprings[1], - self.paths_to_params[ - params_perm[j]], - self.get_value_from_config( - parents[0], - self.paths_to_params[ - params_perm[j]])) - offsprings.append(deepcopy(curr_offsprings[0])) - else: - offsprings.append(deepcopy(parents[0])) - - return offsprings - - def mutation(self, population: List[dict]) -> List[dict]: - """ - Mutate each parameter of each individuum in population - - Args: - population: self.population_size individuums - - Returns: - mutated population - """ - mutated = [] - - for individuum in population: - mutated_individuum = deepcopy(individuum) - for path_ in self.paths_to_params: - param_value = self.get_value_from_config(individuum, path_) - self.insert_value_or_dict_into_config( - mutated_individuum, path_, - self.mutation_of_param(path_, param_value)) - mutated.append(mutated_individuum) - - return mutated - - def mutation_of_param(self, param_path: list, - param_value: [int, float, str, list, dict, bool, np.ndarray]) -> Any: - """ - Mutate particular parameter separately - - Args: - param_path: path to parameter in basic config - param_value: current parameter valuer - - Returns: - mutated parameter value - """ - if self.decision(self.p_mutation): - param_name = param_path[-1] - basic_value = self.get_value_from_config(self.basic_config, param_path) - if isinstance(basic_value, dict): - if basic_value.get('discrete', False): - val = round(param_value + - ((2 * np.random.random() - 1.) * self.mutation_power - * self.sample_params(**{param_name: basic_value})[param_name])) - val = min(max(basic_value["evolve_range"][0], val), - basic_value["evolve_range"][1]) - new_mutated_value = val - elif 'evolve_range' in basic_value.keys(): - val = param_value + \ - ((2 * np.random.random() - 1.) * self.mutation_power - * self.sample_params(**{param_name: basic_value})[param_name]) - val = min(max(basic_value["evolve_range"][0], val), - basic_value["evolve_range"][1]) - new_mutated_value = val - elif basic_value.get("evolve_choice"): - new_mutated_value = self.sample_params(**{param_name: basic_value})[param_name] - elif basic_value.get("evolve_bool"): - new_mutated_value = self.sample_params(**{param_name: basic_value})[param_name] - else: - new_mutated_value = param_value - else: - new_mutated_value = param_value - else: - new_mutated_value = param_value - - return new_mutated_value - - @staticmethod - def decision(probability: float = 1.) -> bool: - """ - Make decision whether to do action or not with given probability - - Args: - probability: probability whether to do action or not - - Returns: - bool decision - """ - r = np.random.random() - if r < probability: - return True - else: - return False diff --git a/deeppavlov/models/go_bot/wrapper.py b/deeppavlov/models/go_bot/wrapper.py index 7099f17206..2a61f6ba5a 100644 --- a/deeppavlov/models/go_bot/wrapper.py +++ b/deeppavlov/models/go_bot/wrapper.py @@ -14,9 +14,11 @@ from typing import Iterable +from deeppavlov.core.common.registry import register from deeppavlov.core.models.component import Component +@register('dialog_component_wrapper') class DialogComponentWrapper(Component): def __init__(self, component: Component, **kwargs): diff --git a/deeppavlov/models/kbqa/sentence_answer.py b/deeppavlov/models/kbqa/sentence_answer.py index 28eda7ac44..ea7042c819 100644 --- a/deeppavlov/models/kbqa/sentence_answer.py +++ b/deeppavlov/models/kbqa/sentence_answer.py @@ -15,7 +15,6 @@ import re from logging import getLogger -import pyinflect import spacy log = getLogger(__name__) diff --git a/deeppavlov/models/preprocessors/ecommerce_preprocess.py b/deeppavlov/models/preprocessors/ecommerce_preprocess.py deleted file mode 100644 index 8dfecc2281..0000000000 --- a/deeppavlov/models/preprocessors/ecommerce_preprocess.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright 2018 Neural Networks and Deep Learning lab, MIPT -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import re -from typing import List, Any, Dict, Iterable, Optional, Tuple - -import spacy -from spacy.matcher import Matcher - -from deeppavlov.core.common.registry import register -from deeppavlov.core.models.component import Component -from deeppavlov.models.tokenizers.spacy_tokenizer import _try_load_spacy_model - - -@register('ecommerce_preprocess') -class EcommercePreprocess(Component): - """Class to process strings for EcommerceBot skill - - Parameters: - spacy_model: SpaCy model name - disable: SpaCy pipeline to disable - """ - - def __init__(self, spacy_model: str = 'en_core_web_sm', disable: Optional[Iterable[str]] = None, **kwargs): - if disable is None: - disable = ['parser', 'ner'] - - self.model = _try_load_spacy_model(spacy_model, disable=disable) - - below = lambda text: bool(re.compile(r'below|cheap').match(text)) - BELOW = self.model.vocab.add_flag(below) - - above = lambda text: bool(re.compile(r'above|start').match(text)) - ABOVE = self.model.vocab.add_flag(above) - - self.matcher = Matcher(self.model.vocab) - - self.matcher.add('below', None, [{BELOW: True}, {'LOWER': 'than', 'OP': '?'}, - {'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'}, - {'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}]) - - self.matcher.add('above', None, [{ABOVE: True}, {'LOWER': 'than', 'OP': '?'}, - {'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'}, - {'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}]) - - def __call__(self, **kwargs): - pass - - def extract_money(self, doc: spacy.tokens.Doc) -> Tuple[List, Tuple[float, float]]: - """Extract money entities and money related tokens from `doc`. - - Parameters: - doc: a list of tokens with corresponding tags, lemmas, etc. - - Returns: - doc_no_money: doc with no money related tokens. - money_range: money range from `money_range[0]` to `money_range[1]` extracted from the doc. - """ - - matches = self.matcher(doc) - money_range: Tuple = () - doc_no_money = list(doc) - negated = False - - for match_id, start, end in matches: - string_id = self.model.vocab.strings[match_id] - span = doc[start:end] - for child in doc[start].children: - if child.dep_ == 'neg': - negated = True - - num_token = [token for token in span if token.like_num == True] - if (string_id == 'below' and negated == False) or (string_id == 'above' and negated == True): - money_range = (0, float(num_token[0].text)) - - if (string_id == 'above' and negated == False) or (string_id == 'below' and negated == True): - money_range = (float(num_token[0].text), float(math.inf)) - - del doc_no_money[start:end + 1] - return doc_no_money, money_range - - def analyze(self, text: str) -> Iterable: - """SpaCy `text` preprocessing""" - return self.model(text) - - def spacy2dict(self, doc: spacy.tokens.Doc, fields: List[str] = None) -> List[Dict[Any, Any]]: - """Convert SpaCy doc into list of tokens with `fields` properties only""" - if fields is None: - fields = ['tag_', 'like_num', 'lemma_', 'text'] - return [{field: getattr(token, field) for field in fields} for token in doc] - - def filter_nlp(self, tokens: Iterable) -> List[Any]: - """Filter tokens according to the POS tags""" - res = [] - for word in tokens: - if word.tag_ not in ['MD', 'SP', 'DT', 'TO']: - res.append(word) - return res - - def filter_nlp_title(self, doc: Iterable) -> List[Any]: - """Filter item titles according to the POS tags""" - return [w for w in doc if w.tag_ in ['NNP', 'NN', 'PROPN', 'JJ'] and not w.like_num] - - def lemmas(self, doc: Iterable) -> List[str]: - """Return lemma of `doc`""" - return [w.get('lemma_') if isinstance(w, dict) else w.lemma_ for w in doc] - - def price(self, item: Dict[Any, Any]) -> float: - """Return price of item in a proper format""" - if 'ListPrice' in item: - return float(item['ListPrice'].split('$')[1].replace(",", "")) - return 0 - - def parse_input(self, inp: str) -> Dict[Any, Any]: - """Convert space-delimited string into dialog state""" - state: List = [] - for i in range(len(inp.split()) // 2, 0, -1): - state.append([inp.split(None, 1)[0], inp.split(None, 1)[1].split()[0]]) - - if i > 1: - inp = inp.split(None, 2)[2] - - return dict(state) diff --git a/deeppavlov/models/preprocessors/torch_transformers_preprocessor.py b/deeppavlov/models/preprocessors/torch_transformers_preprocessor.py index 2623842031..ca28bf2dec 100644 --- a/deeppavlov/models/preprocessors/torch_transformers_preprocessor.py +++ b/deeppavlov/models/preprocessors/torch_transformers_preprocessor.py @@ -107,8 +107,8 @@ def __call__(self, texts_a: List[str], texts_b: Optional[List[str]] = None) -> U return input_features -@register('torch_bert_ner_preprocessor') -class TorchBertNerPreprocessor(Component): +@register('torch_transformers_ner_preprocessor') +class TorchTransformersNerPreprocessor(Component): """Takes tokens and splits them into bert subtokens, encodes subtokens with their indices. Creates a mask of subtokens (one for the first subtoken, zero for the others). @@ -186,6 +186,7 @@ def __call__(self, f"length of sow_marker({len(sw_marker)}), tokens({len(sw_toks)})," \ f" token ids({len(subword_tok_ids[-1])}) and ys({len(ys)})" \ f" for tokens = `{toks}` should match" + subword_tok_ids = zero_pad(subword_tok_ids, dtype=int, padding=0) startofword_markers = zero_pad(startofword_markers, dtype=int, padding=0) attention_mask = Mask()(subword_tokens) diff --git a/deeppavlov/models/seq2seq_go_bot/__init__.py b/deeppavlov/models/seq2seq_go_bot/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/deeppavlov/models/seq2seq_go_bot/bot.py b/deeppavlov/models/seq2seq_go_bot/bot.py deleted file mode 100644 index 9a309c0dfd..0000000000 --- a/deeppavlov/models/seq2seq_go_bot/bot.py +++ /dev/null @@ -1,229 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from logging import getLogger -from typing import Dict - -import numpy as np - -from deeppavlov.core.common.registry import register -from deeppavlov.core.models.component import Component -from deeppavlov.core.models.nn_model import NNModel -from deeppavlov.models.seq2seq_go_bot.network import Seq2SeqGoalOrientedBotNetwork - -log = getLogger(__name__) - - -@register("seq2seq_go_bot") -class Seq2SeqGoalOrientedBot(NNModel): - """ - A goal-oriented bot based on a sequence-to-sequence rnn. For implementation details see - :class:`~deeppavlov.models.seq2seq_go_bot.network.Seq2SeqGoalOrientedBotNetwork`. - Pretrained for :class:`~deeppavlov.dataset_readers.kvret_reader.KvretDatasetReader` dataset. - - Parameters: - network_parameters: parameters passed to object of - :class:`~deeppavlov.models.seq2seq_go_bot.network.Seq2SeqGoalOrientedBotNetwork` class. - embedder: word embeddings model, see - :doc:`deeppavlov.models.embedders `. - source_vocab: vocabulary of input tokens. - target_vocab: vocabulary of bot response tokens. - start_of_sequence_token: token that defines start of input sequence. - end_of_sequence_token: token that defines end of input sequence and start of - output sequence. - debug: whether to display debug output. - **kwargs: parameters passed to parent - :class:`~deeppavlov.core.models.nn_model.NNModel` class. - """ - - def __init__(self, - network_parameters: Dict, - embedder: Component, - source_vocab: Component, - target_vocab: Component, - start_of_sequence_token: str, - end_of_sequence_token: str, - knowledge_base_keys, - save_path: str, - load_path: str = None, - debug: bool = False, - **kwargs) -> None: - super().__init__(save_path=save_path, load_path=load_path, **kwargs) - - self.embedder = embedder - self.embedding_size = embedder.dim - self.src_vocab = source_vocab - self.tgt_vocab = target_vocab - self.tgt_vocab_size = len(target_vocab) - self.kb_keys = knowledge_base_keys - self.kb_size = len(self.kb_keys) - self.sos_token = start_of_sequence_token - self.eos_token = end_of_sequence_token - self.debug = debug - - network_parameters['load_path'] = load_path - network_parameters['save_path'] = save_path - self.network = self._init_network(network_parameters) - - def _init_network(self, params): - if 'target_start_of_sequence_index' not in params: - params['target_start_of_sequence_index'] = self.tgt_vocab[self.sos_token] - if 'target_end_of_sequence_index' not in params: - params['target_end_of_sequence_index'] = self.tgt_vocab[self.eos_token] - if 'source_vocab_size' not in params: - params['source_vocab_size'] = len(self.src_vocab) - if 'target_vocab_size' not in params: - params['target_vocab_size'] = len(self.tgt_vocab) - # contruct matrix of knowledge bases values embeddings - params['knowledge_base_entry_embeddings'] = \ - [self._embed_kb_key(val) for val in self.kb_keys] - # contrcust matrix of decoder input token embeddings (zeros for sos_token) - dec_embs = self.embedder([[self.tgt_vocab[idx] - for idx in range(self.tgt_vocab_size)]])[0] - dec_embs[self.tgt_vocab[self.sos_token]][:] = 0. - params['decoder_embeddings'] = dec_embs - return Seq2SeqGoalOrientedBotNetwork(**params) - - def _embed_kb_key(self, key): - # TODO: fasttext embedder to work with tokens - emb = np.array(self.embedder([key.split('_')], mean=True)[0]) - if self.debug: - log.debug("embedding key tokens='{}', embedding shape = {}" - .format(key.split('_'), emb.shape)) - return emb - - def train_on_batch(self, utters, history_list, kb_entry_list, responses): - b_enc_ins, b_src_lens = [], [] - b_dec_ins, b_dec_outs, b_tgt_lens = [], [], [] - for x_tokens, history, y_tokens in zip(utters, history_list, responses): - x_tokens = history + x_tokens - enc_in = self._encode_context(x_tokens) - b_enc_ins.append(enc_in) - b_src_lens.append(len(enc_in)) - - dec_in, dec_out = self._encode_response(y_tokens) - b_dec_ins.append(dec_in) - b_dec_outs.append(dec_out) - b_tgt_lens.append(len(dec_out)) - - # Sequence padding - batch_size = len(b_enc_ins) - max_src_len = max(b_src_lens) - max_tgt_len = max(b_tgt_lens) - # b_enc_ins_np = self.src_vocab[self.sos_token] *\ - # np.ones((batch_size, max_src_len), dtype=np.float32) - b_enc_ins_np = np.zeros((batch_size, max_src_len, self.embedding_size), - dtype=np.float32) - b_dec_ins_np = self.tgt_vocab[self.eos_token] * \ - np.ones((batch_size, max_tgt_len), dtype=np.float32) - b_dec_outs_np = self.tgt_vocab[self.eos_token] * \ - np.ones((batch_size, max_tgt_len), dtype=np.float32) - b_tgt_weights_np = np.zeros((batch_size, max_tgt_len), dtype=np.float32) - b_kb_masks_np = np.zeros((batch_size, self.kb_size), np.float32) - for i, (src_len, tgt_len, kb_entries) in \ - enumerate(zip(b_src_lens, b_tgt_lens, kb_entry_list)): - b_enc_ins_np[i, :src_len] = b_enc_ins[i] - b_dec_ins_np[i, :tgt_len] = b_dec_ins[i] - b_dec_outs_np[i, :tgt_len] = b_dec_outs[i] - b_tgt_weights_np[i, :tgt_len] = 1. - if self.debug: - if len(kb_entries) != len(set([e[0] for e in kb_entries])): - log.debug("Duplicates in kb_entries = {}".format(kb_entries)) - for k, v in kb_entries: - b_kb_masks_np[i, self.kb_keys.index(k)] = 1. - - """if self.debug: - log.debug("b_enc_ins = {}".format(b_enc_ins)) - log.debug("b_dec_ins = {}".format(b_dec_ins)) - log.debug("b_dec_outs = {}".format(b_dec_outs)) - log.debug("b_src_lens = {}".format(b_src_lens)) - log.debug("b_tgt_lens = {}".format(b_tgt_lens)) - log.debug("b_tgt_weights = {}".format(b_tgt_weights))""" - - return self.network.train_on_batch(b_enc_ins_np, b_dec_ins_np, b_dec_outs_np, - b_src_lens, b_tgt_lens, b_tgt_weights_np, - b_kb_masks_np) - - def _encode_context(self, tokens): - if self.debug: - log.debug("Context tokens = \"{}\"".format(tokens)) - # token_idxs = self.src_vocab([tokens])[0] - # return token_idxs - return np.array(self.embedder([tokens])[0]) - - def _encode_response(self, tokens): - if self.debug: - log.debug("Response tokens = \"{}\"".format(tokens)) - token_idxs = [] - for token in tokens: - if token in self.kb_keys: - token_idxs.append(self.tgt_vocab_size + self.kb_keys.index(token)) - else: - token_idxs.append(self.tgt_vocab[token]) - # token_idxs = self.tgt_vocab([tokens])[0] - return ([self.tgt_vocab[self.sos_token]] + token_idxs, - token_idxs + [self.tgt_vocab[self.eos_token]]) - - def _decode_response(self, token_idxs): - def _idx2token(idxs): - for idx in idxs: - if idx < self.tgt_vocab_size: - token = self.tgt_vocab([[idx]])[0][0] - if token == self.eos_token: - break - yield token - else: - yield self.kb_keys[idx - self.tgt_vocab_size] - - return [list(_idx2token(utter_idxs)) for utter_idxs in token_idxs] - - def __call__(self, *batch): - return self._infer_on_batch(*batch) - - # def _infer_on_batch(self, utters, kb_entry_list=itertools.repeat([])): - def _infer_on_batch(self, utters, history_list, kb_entry_list): - b_enc_ins, b_src_lens = [], [] - if (len(utters) == 1) and not utters[0]: - utters = [['hi']] - for utter, history in zip(utters, history_list): - utter = history + utter - enc_in = self._encode_context(utter) - - b_enc_ins.append(enc_in) - b_src_lens.append(len(enc_in)) - - # Sequence padding - batch_size = len(b_enc_ins) - max_src_len = max(b_src_lens) - b_enc_ins_np = np.zeros((batch_size, max_src_len, self.embedding_size), - dtype=np.float32) - b_kb_masks_np = np.zeros((batch_size, self.kb_size), dtype=np.float32) - for i, (src_len, kb_entries) in enumerate(zip(b_src_lens, kb_entry_list)): - b_enc_ins_np[i, :src_len] = b_enc_ins[i] - if self.debug: - log.debug("infer: kb_entries = {}".format(kb_entries)) - for k, v in kb_entries: - b_kb_masks_np[i, self.kb_keys.index(k)] = 1. - - pred_idxs = self.network(b_enc_ins_np, b_src_lens, b_kb_masks_np) - preds = self._decode_response(pred_idxs) - if self.debug: - log.debug("Dialog prediction = \"{}\"".format(preds[-1])) - return preds - - def save(self): - self.network.save() - - def load(self): - pass diff --git a/deeppavlov/models/seq2seq_go_bot/dialog_state.py b/deeppavlov/models/seq2seq_go_bot/dialog_state.py deleted file mode 100644 index a316250ea4..0000000000 --- a/deeppavlov/models/seq2seq_go_bot/dialog_state.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from deeppavlov.core.common.registry import register -from deeppavlov.core.models.component import Component - - -@register("dialog_state") -class DialogState(Component): - def __init__(self, *args, **kwargs): - self.states = {} - - def __call__(self, user_ids, utterances=None, *args, **kwargs): - if utterances is None: - return [self.states.get(u, []) for u in user_ids] - - for user, utter in zip(user_ids, utterances): - self.states[user] = self.states.get(user, []) + utter - return diff --git a/deeppavlov/models/seq2seq_go_bot/kb.py b/deeppavlov/models/seq2seq_go_bot/kb.py deleted file mode 100644 index aecde5626b..0000000000 --- a/deeppavlov/models/seq2seq_go_bot/kb.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import itertools -import json -import re -from collections import defaultdict -from logging import getLogger -from typing import Callable, List, Tuple - -from deeppavlov.core.common.registry import register -from deeppavlov.core.models.component import Component -from deeppavlov.core.models.estimator import Estimator - -log = getLogger(__name__) - - -@register("knowledge_base") -class KnowledgeBase(Estimator): - """ - A custom dictionary that encodes knowledge facts from - :class:`~deeppavlov.dataset_readers.kvret_reader.KvretDatasetReader` data. - - Example: - .. code:: python - - >>> from deeppavlov.models.seq2seq_go_bot.kb import KnowledgeBase - >>> kb = KnowledgeBase(save_path="kb.json", load_path="kb.json") - >>> kb.fit(['person1'], [['name', 'hair', 'eyes']], [[{'name': 'Sasha', 'hair': 'long dark', 'eyes': 'light blue '}]]) - - >>> kb(['person1']) - [[('sasha_name', 'Sasha'), ('sasha_hair', 'long dark'), ('sasha_eyes', 'light blue ')]] - - >>> kb(['person_that_doesnt_exist']) - [[]] - - Parameters: - save_path: path to save the dictionary with knowledge. - load_path: path to load the json with knowledge. - tokenizer: tokenizer used to split entity values into tokens (inputs batch - of strings and outputs batch of lists of tokens). - **kwargs: parameters passed to parent - :class:`~deeppavlov.core.models.estimator.Estimator`. - """ - - def __init__(self, - save_path: str, - load_path: str = None, - tokenizer: Callable = None, - *args, **kwargs) -> None: - super().__init__(save_path=save_path, - load_path=load_path, - *args, **kwargs) - self.tokenizer = tokenizer - self.kb = defaultdict(lambda: []) - self.primary_keys = [] - if self.load_path and self.load_path.is_file(): - self.load() - - def fit(self, *args): - self.reset() - self._update(*args) - - def _update(self, keys, kb_columns_list, kb_items_list, update_primary_keys=True): - for key, cols, items in zip(keys, kb_columns_list, kb_items_list): - if (None not in (key, items, cols)) and (key not in self.kb): - kv_entry_list = (self._key_value_entries(item, cols, - update=update_primary_keys) - for item in items) - self.kb[key] = list(itertools.chain(*kv_entry_list)) - - def _key_value_entries(self, kb_item, kb_columns, update=True): - def _format(s): - return re.sub('\s+', '_', s.lower().strip()) - - first_key = _format(kb_item[kb_columns[0]]) - for col in kb_columns: - key = first_key + '_' + _format(col) - if update and (key not in self.primary_keys): - self.primary_keys.append(key) - if col in kb_item: - if self.tokenizer is not None: - yield (key, self.tokenizer([kb_item[col]])[0]) - else: - yield (key, kb_item[col]) - - def __call__(self, keys, kb_columns_list=None, kb_items_list=None): - if None not in (kb_columns_list, kb_items_list): - self._update(keys, kb_columns_list, kb_items_list, update_primary_keys=False) - res = [] - for key in keys: - res.append(self.kb[key]) - for k, value in res[-1]: - if k not in self.primary_keys: - raise ValueError("Primary key `{}` is not present in knowledge base" - .format(k)) - return res - - def __len__(self): - return len(self.kb) - - def keys(self): - return self.kb.keys() - - def reset(self): - self.kb = defaultdict(lambda: []) - self.primary_keys = [] - - def save(self): - log.info("[saving knowledge base to {}]".format(self.save_path)) - json.dump(self.kb, self.save_path.open('wt')) - json.dump(self.primary_keys, self.save_path.with_suffix('.keys.json').open('wt')) - - def load(self): - log.info("[loading knowledge base from {}]".format(self.load_path)) - self.kb.update(json.load(self.load_path.open('rt')), primary_keys=False) - self.primary_keys = json.load(self.load_path.with_suffix('.keys.json').open('rt')) - - -@register("knowledge_base_entity_normalizer") -class KnowledgeBaseEntityNormalizer(Component): - """ - Uses instance of :class:`~deeppavlov.models.seq2seq_go_bot.kb.KnowledgeBase` - to normalize or to undo normalization of entities in the input utterance. - - To normalize is to substitute all mentions of database entities with their - normalized form. - - To undo normalization is to substitute all mentions of database normalized entities - with their original form. - - Example: - .. code:: python - - >>> from deeppavlov.models.seq2seq_go_bot.kb import KnowledgeBase - >>> kb = KnowledgeBase(save_path="kb.json", load_path="kb.json", tokenizer=lambda strings: [s.split() for s in strings]) - >>> kb.fit(['person1'], [['name', 'hair', 'eyes']], [[{'name': 'Sasha', 'hair': 'long dark', 'eyes': 'light blue '}]]) - >>> kb(['person1']) - [[('sasha_name', ['Sasha']), ('sasha_hair', ['long', 'dark']), ('sasha_eyes', ['light','blue'])]] - - >>> from deeppavlov.models.seq2seq_go_bot.kb import KnowledgeBaseEntityNormalizer - >>> normalizer = KnowledgeBaseEntityNormalizer(denormalize=False, remove=False) - >>> normalizer([["some", "guy", "with", "long", "dark", "hair", "said", "hi"]], kb(['person1'])) - [['some', 'guy', 'with', 'sasha_hair', 'hair', 'said', 'hi']] - - >>> denormalizer = KnowledgeBaseEntityNormalizer(denormalize=True) - >>> denormalizer([['some', 'guy', 'with', 'sasha_hair', 'hair', 'said', 'hi']], kb(['person1'])) - [['some', 'guy', 'with', 'long', 'dark', 'hair', 'said', 'hi']] - - >>> remover = KnowledgeBaseEntityNormalizer(denormalize=False, remove=True) - >>> remover([["some", "guy", "with", "long", "dark", "hair", "said", "hi"]], kb(['person1'])) - [['some', 'guy', 'with', 'hair', 'said', 'hi'] - - - Parameters: - denormalize: flag indicates whether to normalize or to undo normalization - ("denormalize"). - remove: flag indicates whether to remove entities or not while normalizing - (``denormalize=False``). Is ignored for ``denormalize=True``. - **kwargs: parameters passed to parent - :class:`~deeppavlov.core.models.component.Component` class. - """ - - def __init__(self, - remove: bool = False, - denormalize: bool = False, - **kwargs): - self.denormalize_flag = denormalize - self.remove = remove - - def normalize(self, tokens, entries): - for entity, ent_tokens in sorted(entries, key=lambda e: -len(e[1])): - ent_num_tokens = len(ent_tokens) - if ' '.join(ent_tokens).strip(): - for i in range(len(tokens)): - if tokens[i:i + ent_num_tokens] == ent_tokens: - if self.remove: - tokens = tokens[:i] + tokens[i + ent_num_tokens:] - else: - tokens = tokens[:i] + [entity] + tokens[i + ent_num_tokens:] - return tokens - - def denormalize(self, tokens, entries): - for entity, ent_tokens in entries: - while (entity in tokens): - ent_pos = tokens.index(entity) - tokens = tokens[:ent_pos] + ent_tokens + tokens[ent_pos + 1:] - return tokens - - def __call__(self, - tokens_list: List[List[str]], - entries_list: List[Tuple[str, List[str]]]) -> List[List[str]]: - if self.denormalize_flag: - return [self.denormalize(t, e) for t, e in zip(tokens_list, entries_list)] - return [self.normalize(t, e) for t, e in zip(tokens_list, entries_list)] diff --git a/deeppavlov/models/seq2seq_go_bot/kb_attn_layer.py b/deeppavlov/models/seq2seq_go_bot/kb_attn_layer.py deleted file mode 100644 index 66dbff0d66..0000000000 --- a/deeppavlov/models/seq2seq_go_bot/kb_attn_layer.py +++ /dev/null @@ -1,178 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import tensorflow as tf -from tensorflow.python.framework import tensor_shape -from tensorflow.python.layers import base -from tensorflow.python.ops import init_ops - - -class KBAttention(base.Layer): - # TODO: update class doc - """Densely-connected layer class. - Arguments: - units: Integer or Long, dimensionality of the output space. - activation: Activation function (callable). Set it to None to maintain a - linear activation. - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: Initializer function for the weight matrix. - If ``None`` (default), weights are initialized using the default - initializer used by `tf.get_variable`. - bias_initializer: Initializer function for the bias. - kernel_regularizer: Regularizer function for the weight matrix. - bias_regularizer: Regularizer function for the bias. - activity_regularizer: Regularizer function for the output. - kernel_constraint: An optional projection function to be applied to the - kernel after being updated by an `Optimizer` (e.g. used to implement - norm constraints or value constraints for layer weights). The function - must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are - not safe to use when doing asynchronous distributed training. - bias_constraint: An optional projection function to be applied to the - bias after being updated by an `Optimizer`. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: String, the name of the layer. Layers with the same name will - share weights, but to avoid mistakes we require reuse=True in such cases. - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - Properties: - units: Python integer, dimensionality of the output space. - activation: Activation function (callable). - use_bias: Boolean, whether the layer uses a bias. - kernel_initializer: Initializer instance (or name) for the kernel matrix. - bias_initializer: Initializer instance (or name) for the bias. - kernel_regularizer: Regularizer instance for the kernel matrix (callable) - bias_regularizer: Regularizer instance for the bias (callable). - activity_regularizer: Regularizer instance for the output (callable) - kernel_constraint: Constraint function for the kernel matrix. - bias_constraint: Constraint function for the bias. - kernel: Weight matrix (TensorFlow variable or tensor). - bias: Bias vector, if applicable (TensorFlow variable or tensor). - """ - - def __init__(self, units, hidden_sizes, - kb_inputs, - kb_mask, - activation=None, - use_bias=True, - kernel_initializer=None, - bias_initializer=init_ops.zeros_initializer(), - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - trainable=True, - name=None, - reuse=None, - **kwargs): - super(KBAttention, self).__init__(trainable=trainable, name=name, - activity_regularizer=activity_regularizer, - *kwargs) - self.units = units - self.hidden_sizes = hidden_sizes - self.kb_inputs = kb_inputs - self.kb_mask = kb_mask - self.kb_input_shape = kb_inputs.get_shape().as_list() - self.dense_name = name or "mlp" - self.dense_params = { - "activation": activation, - "use_bias": use_bias, - "kernel_initializer": kernel_initializer, - "bias_initializer": bias_initializer, - "kernel_regularizer": kernel_regularizer, - "bias_regularizer": bias_regularizer, - "activity_regularizer": activity_regularizer, - "kernel_constraint": kernel_constraint, - "bias_constraint": bias_constraint, - "trainable": trainable, - "dtype": self.kb_inputs.dtype.base_dtype, - "_reuse": reuse - } - # print("KB shape =", self.kb_input_shape) - - def build(self, input_shape): - # if in_shape[:-1] != self.kb_inputs.shape - # TODO: check input shape - # print("in build") - in_shape = input_shape[:1].concatenate(self.kb_input_shape) - in_shape = in_shape[:-1].concatenate(in_shape[-1] + input_shape[-1]) - # print("first in_shape =", in_shape) - self.layers = [] - for i, size in enumerate(self.hidden_sizes): - name = self.dense_name - if name is not None: - name = name + '{:d}'.format(i) - layer = tf.layers.Dense(size, name=name, _scope=name, **self.dense_params) - layer.build(in_shape) - in_shape = layer.compute_output_shape(in_shape) - - self.layers.append(layer) - - # print("input_shape =", input_shape) - # print("last in_shape =", in_shape) - # in_shape = in_shape[:-2].concatenate(in_shape[-2] + input_shape[-1]) - # print("last in_shape =", in_shape) - self.output_layer = tf.layers.Dense(self.units, **self.dense_params) - self.output_layer.build(input_shape) - # print("build = True") - self.built = True - - def call(self, inputs): - # print("in call") - # TODO: check input dtype - - # Tile kb_inputs - kb_inputs = self.kb_inputs - for i in range(inputs.shape.ndims - 1): - kb_inputs = tf.expand_dims(kb_inputs, 0) - kb_inputs = tf.tile(kb_inputs, tf.concat((tf.shape(inputs)[:-1], [1, 1]), 0)) - - # Expand kb_mask - kb_mask = self.kb_mask - for i in range(inputs.shape.ndims - 2): - kb_mask = tf.expand_dims(kb_mask, 1) - kb_mask = tf.expand_dims(kb_mask, -1) - - # Tile inputs - kb_size = tf.shape(self.kb_inputs)[0] - tiling = tf.concat(([1] * (inputs.shape.ndims - 1), [kb_size], [1]), 0) - cell_inputs = tf.tile(tf.expand_dims(inputs, -2), tiling) - - outputs = tf.concat([kb_inputs, cell_inputs], -1) - outputs = tf.multiply(outputs, kb_mask) - for layer in self.layers: - outputs = layer.call(outputs) - # outputs = tf.Print(outputs, [outputs], "KB attention pre-last layer output =") - outputs = tf.squeeze(outputs, [-1]) - # print("inputs shape =", inputs.shape) - # print("outputs shape =", outputs.shape) - outputs = tf.concat([self.output_layer(inputs), outputs], -1) - # print("out of call") - return outputs - - def _compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape) - input_shape = input_shape.with_rank_at_least(2) - if input_shape[-1].value is None: - raise ValueError( - 'The innermost dimension of input_shape must be defined, but saw: %s' - % input_shape) - output_shape = input_shape[:-1].concatenate(self.units + self.kb_input_shape[0]) - # print("computed output shape is", output_shape) - return output_shape - - def compute_output_shape(self, input_shape): - return self._compute_output_shape(input_shape) diff --git a/deeppavlov/models/seq2seq_go_bot/network.py b/deeppavlov/models/seq2seq_go_bot/network.py deleted file mode 100644 index 758006e804..0000000000 --- a/deeppavlov/models/seq2seq_go_bot/network.py +++ /dev/null @@ -1,492 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import math -from logging import getLogger -from typing import List - -import numpy as np -import tensorflow as tf - -from deeppavlov.core.common.errors import ConfigError -from deeppavlov.core.common.registry import register -from deeppavlov.core.models.tf_model import TFModel -from deeppavlov.models.seq2seq_go_bot.kb_attn_layer import KBAttention - -log = getLogger(__name__) - - -@register("seq2seq_go_bot_nn") -class Seq2SeqGoalOrientedBotNetwork(TFModel): - """ - The :class:`~deeppavlov.models.seq2seq_go_bot.bot.GoalOrientedBotNetwork` - is a recurrent network that encodes user utterance and generates response - in a sequence-to-sequence manner. - - For network architecture is similar to https://arxiv.org/abs/1705.05414 . - - Parameters: - hidden_size: RNN hidden layer size. - source_vocab_size: size of a vocabulary of encoder tokens. - target_vocab_size: size of a vocabulary of decoder tokens. - target_start_of_sequence_index: index of a start of sequence token during - decoding. - target_end_of_sequence_index: index of an end of sequence token during decoding. - knowledge_base_entry_embeddings: matrix with embeddings of knowledge base entries, - size is (number of entries, embedding size). - kb_attention_hidden_sizes: list of sizes for attention hidden units. - decoder_embeddings: matrix with embeddings for decoder output tokens, size is - (`targer_vocab_size` + number of knowledge base entries, embedding size). - beam_width: width of beam search decoding. - learning_rate: learning rate during training. - end_learning_rate: if set, learning rate starts from ``learning_rate`` value - and decays polynomially to the value of ``end_learning_rate``. - decay_steps: number of steps of learning rate decay. - decay_power: power used to calculate learning rate decay for polynomial strategy. - dropout_rate: probability of weights' dropout. - state_dropout_rate: probability of rnn state dropout. - optimizer: one of tf.train.Optimizer subclasses as a string. - **kwargs: parameters passed to a parent - :class:`~deeppavlov.core.models.tf_model.TFModel` class. - """ - - GRAPH_PARAMS = ['knowledge_base_size', 'source_vocab_size', - 'target_vocab_size', 'hidden_size', 'embedding_size', - 'kb_embedding_control_sum', 'kb_attention_hidden_sizes'] - - def __init__(self, - hidden_size: int, - source_vocab_size: int, - target_vocab_size: int, - target_start_of_sequence_index: int, - target_end_of_sequence_index: int, - knowledge_base_entry_embeddings: np.ndarray, - kb_attention_hidden_sizes: List[int], - decoder_embeddings: np.ndarray, - learning_rate: float, - beam_width: int = 1, - end_learning_rate: float = None, - decay_steps: int = 1000, - decay_power: float = 1.0, - dropout_rate: float = 0.0, - state_dropout_rate: float = 0.0, - optimizer: str = 'AdamOptimizer', - **kwargs) -> None: - end_learning_rate = end_learning_rate or learning_rate - - # initialize knowledge base embeddings - self.kb_embedding = np.array(knowledge_base_entry_embeddings) - log.debug("recieved knowledge_base_entry_embeddings with shape = {}" - .format(self.kb_embedding.shape)) - # initialize decoder embeddings - self.decoder_embedding = np.array(decoder_embeddings) - if self.kb_embedding.shape[1] != self.decoder_embedding.shape[1]: - raise ValueError("decoder embeddings should have the same dimension" - " as knowledge base entries' embeddings") - - # specify model options - self.opt = { - 'hidden_size': hidden_size, - 'source_vocab_size': source_vocab_size, - 'target_vocab_size': target_vocab_size, - 'target_start_of_sequence_index': target_start_of_sequence_index, - 'target_end_of_sequence_index': target_end_of_sequence_index, - 'kb_attention_hidden_sizes': kb_attention_hidden_sizes, - 'kb_embedding_control_sum': float(np.sum(self.kb_embedding)), - 'knowledge_base_size': self.kb_embedding.shape[0], - 'embedding_size': self.kb_embedding.shape[1], - 'learning_rate': learning_rate, - 'beam_width': beam_width, - 'end_learning_rate': end_learning_rate, - 'decay_steps': decay_steps, - 'decay_power': decay_power, - 'dropout_rate': dropout_rate, - 'state_dropout_rate': state_dropout_rate, - 'optimizer': optimizer - } - - # initialize other parameters - self._init_params() - # build computational graph - self._build_graph() - # initialize session - self.sess = tf.Session() - # from tensorflow.python import debug as tf_debug - # self.sess = tf_debug.TensorBoardDebugWrapperSession(self.sess, "vimary-pc:7019") - self.global_step = 0 - - self.sess.run(tf.global_variables_initializer()) - - super().__init__(**kwargs) - - if tf.train.checkpoint_exists(str(self.load_path.resolve())): - log.info("[initializing `{}` from saved]".format(self.__class__.__name__)) - self.load() - else: - log.info("[initializing `{}` from scratch]".format(self.__class__.__name__)) - - def _init_params(self): - self.hidden_size = self.opt['hidden_size'] - self.src_vocab_size = self.opt['source_vocab_size'] - self.tgt_vocab_size = self.opt['target_vocab_size'] - self.tgt_sos_id = self.opt['target_start_of_sequence_index'] - self.tgt_eos_id = self.opt['target_end_of_sequence_index'] - self.learning_rate = self.opt['learning_rate'] - self.kb_attn_hidden_sizes = self.opt['kb_attention_hidden_sizes'] - self.embedding_size = self.opt['embedding_size'] - self.kb_size = self.opt['knowledge_base_size'] - self.beam_width = self.opt['beam_width'] - self.learning_rate = self.opt['learning_rate'] - self.end_learning_rate = self.opt['end_learning_rate'] - self.dropout_rate = self.opt['dropout_rate'] - self.state_dropout_rate = self.opt['state_dropout_rate'] - self.decay_steps = self.opt['decay_steps'] - self.decay_power = self.opt['decay_power'] - - self._optimizer = None - if hasattr(tf.train, self.opt['optimizer']): - self._optimizer = getattr(tf.train, self.opt['optimizer']) - if not issubclass(self._optimizer, tf.train.Optimizer): - raise ConfigError("`optimizer` parameter should be a name of" - " tf.train.Optimizer subclass") - - def _build_graph(self): - - self._add_placeholders() - - _logits, self._predictions = self._build_body() - - _weights = tf.expand_dims(self._tgt_weights, -1) - _loss_tensor = \ - tf.losses.sparse_softmax_cross_entropy(logits=_logits, - labels=self._decoder_outputs, - weights=_weights, - reduction=tf.losses.Reduction.NONE) - # normalize loss by batch_size - _loss_tensor = \ - tf.verify_tensor_all_finite(_loss_tensor, "Non finite values in loss tensor.") - self._loss = tf.reduce_sum(_loss_tensor) / tf.cast(self._batch_size, tf.float32) - # self._loss = tf.reduce_mean(_loss_tensor, name='loss') - # TODO: tune clip_norm - self._train_op = \ - self.get_train_op(self._loss, - learning_rate=self._learning_rate, - optimizer=self._optimizer, - clip_norm=2.) - # log.info("Trainable variables") - # for v in tf.trainable_variables(): - # log.info(v) - # self.print_number_of_parameters() - - def _add_placeholders(self): - self._dropout_keep_prob = tf.placeholder_with_default( - 1.0, shape=[], name='dropout_keep_prob') - self._state_dropout_keep_prob = tf.placeholder_with_default( - 1.0, shape=[], name='state_dropout_keep_prob') - self._learning_rate = tf.placeholder(tf.float32, - shape=[], - name='learning_rate') - # _encoder_inputs: [batch_size, max_input_time] - # _encoder_inputs: [batch_size, max_input_time, embedding_size] - self._encoder_inputs = tf.placeholder(tf.float32, - [None, None, self.embedding_size], - name='encoder_inputs') - self._batch_size = tf.shape(self._encoder_inputs)[0] - # _decoder_inputs: [batch_size, max_output_time] - self._decoder_inputs = tf.placeholder(tf.int32, - [None, None], - name='decoder_inputs') - # _decoder_embedding: [tgt_vocab_size + kb_size, embedding_size] - self._decoder_embedding = \ - tf.get_variable("decoder_embedding", - shape=(self.tgt_vocab_size + self.kb_size, - self.embedding_size), - dtype=tf.float32, - initializer=tf.constant_initializer(self.decoder_embedding), - trainable=False) - # _decoder_outputs: [batch_size, max_output_time] - self._decoder_outputs = tf.placeholder(tf.int32, - [None, None], - name='decoder_outputs') - # _kb_embedding: [kb_size, embedding_size] - # TODO: try training embeddings - kb_W = np.array(self.kb_embedding)[:, :self.embedding_size] - self._kb_embedding = tf.get_variable("kb_embedding", - shape=(kb_W.shape[0], kb_W.shape[1]), - dtype=tf.float32, - initializer=tf.constant_initializer(kb_W), - trainable=True) - # _kb_mask: [batch_size, kb_size] - self._kb_mask = tf.placeholder(tf.float32, [None, None], name='kb_mask') - - # TODO: compute sequence lengths on the go - # _src_sequence_lengths, _tgt_sequence_lengths: [batch_size] - self._src_sequence_lengths = tf.placeholder(tf.int32, - [None], - name='input_sequence_lengths') - self._tgt_sequence_lengths = tf.placeholder(tf.int32, - [None], - name='output_sequence_lengths') - # _tgt_weights: [batch_size, max_output_time] - self._tgt_weights = tf.placeholder(tf.int32, - [None, None], - name='target_weights') - - def _build_body(self): - self._build_encoder() - self._build_decoder() - return self._logits, self._predictions - - def _build_encoder(self): - with tf.variable_scope("Encoder"): - # Encoder embedding - # _encoder_embedding = tf.get_variable( - # "encoder_embedding", [self.src_vocab_size, self.embedding_size]) - # _encoder_emb_inp = tf.nn.embedding_lookup(_encoder_embedding, - # self._encoder_inputs) - # _encoder_emb_inp = tf.one_hot(self._encoder_inputs, self.src_vocab_size) - _encoder_emb_inp = self._encoder_inputs - - _encoder_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_size, - name='basic_lstm_cell') - _encoder_cell = tf.contrib.rnn.DropoutWrapper( - _encoder_cell, - input_size=self.embedding_size, - dtype=tf.float32, - input_keep_prob=self._dropout_keep_prob, - output_keep_prob=self._dropout_keep_prob, - state_keep_prob=self._state_dropout_keep_prob, - variational_recurrent=True) - # Run Dynamic RNN - # _encoder_outputs: [max_time, batch_size, hidden_size] - # _encoder_state: [batch_size, hidden_size] - # input_states? - _encoder_outputs, _encoder_state = tf.nn.dynamic_rnn( - _encoder_cell, _encoder_emb_inp, dtype=tf.float32, - sequence_length=self._src_sequence_lengths, time_major=False) - - self._encoder_outputs = _encoder_outputs - self._encoder_state = _encoder_state - - def _build_decoder(self): - with tf.variable_scope("Decoder"): - # Decoder embedding - # _decoder_embedding = tf.get_variable( - # "decoder_embedding", [self.tgt_vocab_size + self.kb_size, - # self.embedding_size]) - # _decoder_emb_inp = tf.one_hot(self._decoder_inputs, - # self.tgt_vocab_size + self.kb_size) - _decoder_emb_inp = tf.nn.embedding_lookup(self._decoder_embedding, - self._decoder_inputs) - - # Tiling outputs, states, sequence lengths - _tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch( - self._encoder_outputs, multiplier=self.beam_width) - _tiled_encoder_state = tf.contrib.seq2seq.tile_batch( - self._encoder_state, multiplier=self.beam_width) - _tiled_src_sequence_lengths = tf.contrib.seq2seq.tile_batch( - self._src_sequence_lengths, multiplier=self.beam_width) - - with tf.variable_scope("AttentionOverKB"): - _kb_attn_layer = KBAttention(self.tgt_vocab_size, - self.kb_attn_hidden_sizes + [1], - self._kb_embedding, - self._kb_mask, - activation=tf.nn.relu, - use_bias=False) - # Output dense layer - # _projection_layer = \ - # tf.layers.Dense(self.tgt_vocab_size, use_bias=False, _reuse=reuse) - - # Decoder Cell - _decoder_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_size, - name='basic_lstm_cell') - _decoder_cell = tf.contrib.rnn.DropoutWrapper( - _decoder_cell, - input_size=self.embedding_size + self.hidden_size, - dtype=tf.float32, - input_keep_prob=self._dropout_keep_prob, - output_keep_prob=self._dropout_keep_prob, - state_keep_prob=self._state_dropout_keep_prob, - variational_recurrent=True) - - def build_dec_cell(enc_out, enc_seq_len, reuse=None): - with tf.variable_scope("dec_cell_attn", reuse=reuse): - # Create an attention mechanism - # _attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( - _attention_mechanism = tf.contrib.seq2seq.LuongAttention( - self.hidden_size, - memory=enc_out, - memory_sequence_length=enc_seq_len) - _cell = tf.contrib.seq2seq.AttentionWrapper( - _decoder_cell, - _attention_mechanism, - attention_layer_size=self.hidden_size) - return _cell - - # TRAIN MODE - _decoder_cell_tr = build_dec_cell(self._encoder_outputs, - self._src_sequence_lengths) - self._decoder_cell_tr = _decoder_cell_tr - # Train Helper to feed inputs for training: - # read inputs from dense ground truth vectors - _helper_tr = tf.contrib.seq2seq.TrainingHelper( - _decoder_emb_inp, self._tgt_sequence_lengths, time_major=False) - # Copy encoder hidden state to decoder inital state - _decoder_init_state = \ - _decoder_cell_tr.zero_state(self._batch_size, dtype=tf.float32) \ - .clone(cell_state=self._encoder_state) - _decoder_tr = \ - tf.contrib.seq2seq.BasicDecoder(_decoder_cell_tr, _helper_tr, - initial_state=_decoder_init_state, - output_layer=_kb_attn_layer) - # Wrap into variable scope to share attention parameters - # Required! - with tf.variable_scope('decode_with_shared_attention'): - _outputs_inf, _, _ = \ - tf.contrib.seq2seq.dynamic_decode(_decoder_tr, - impute_finished=False, - output_time_major=False) - # _logits = decode(_helper, "decode").beam_search_decoder_output.scores - _logits = _outputs_inf.rnn_output - - # INFER MODE - _decoder_cell_inf = build_dec_cell(_tiled_encoder_outputs, - _tiled_src_sequence_lengths, - reuse=True) - self._decoder_cell_inf = _decoder_cell_inf - # Infer Helper - _max_iters = tf.round(tf.reduce_max(self._src_sequence_lengths) * 2) - # NOTE: helper is not needed? - # _helper_inf = tf.contrib.seq2seq.GreedyEmbeddingHelper( - # self._decoder_embedding, - # tf.fill([self._batch_size], self.tgt_sos_id), self.tgt_eos_id) - # lambda d: tf.one_hot(d, self.tgt_vocab_size + self.kb_size), - # Decoder Init State - _decoder_init_state = \ - _decoder_cell_inf.zero_state(tf.shape(_tiled_encoder_outputs)[0], - dtype=tf.float32) \ - .clone(cell_state=_tiled_encoder_state) - # Define a beam-search decoder - _start_tokens = tf.tile(tf.constant([self.tgt_sos_id], tf.int32), - [self._batch_size]) - # _start_tokens = tf.fill([self._batch_size], self.tgt_sos_id) - _decoder_inf = tf.contrib.seq2seq.BeamSearchDecoder( - cell=_decoder_cell_inf, - embedding=self._decoder_embedding, - start_tokens=_start_tokens, - end_token=self.tgt_eos_id, - initial_state=_decoder_init_state, - beam_width=self.beam_width, - output_layer=_kb_attn_layer, - length_penalty_weight=0.0) - - # Wrap into variable scope to share attention parameters - # Required! - with tf.variable_scope("decode_with_shared_attention", reuse=True): - # TODO: try impute_finished = True, - _outputs_inf, _, _ = \ - tf.contrib.seq2seq.dynamic_decode(_decoder_inf, - impute_finished=False, - maximum_iterations=_max_iters, - output_time_major=False) - _predictions = _outputs_inf.predicted_ids[:, :, 0] - # TODO: rm indexing - # _predictions = \ - # decode(_helper_infer, "decode", _max_iters, reuse=True).sample_id - self._logits = _logits - self._predictions = _predictions - - def __call__(self, enc_inputs, src_seq_lengths, kb_masks, prob=False): - predictions = self.sess.run( - self._predictions, - feed_dict={ - self._dropout_keep_prob: 1., - self._state_dropout_keep_prob: 1., - self._learning_rate: 1., - self._encoder_inputs: enc_inputs, - self._src_sequence_lengths: src_seq_lengths, - self._kb_mask: kb_masks - } - ) - # TODO: implement infer probabilities - if prob: - raise NotImplementedError("Probs not available for now.") - return predictions - - def train_on_batch(self, enc_inputs, dec_inputs, dec_outputs, - src_seq_lengths, tgt_seq_lengths, tgt_weights, kb_masks): - _, loss_value = self.sess.run( - [self._train_op, self._loss], - feed_dict={ - self._dropout_keep_prob: 1 - self.dropout_rate, - self._state_dropout_keep_prob: 1 - self.state_dropout_rate, - self._learning_rate: self.get_learning_rate(), - self._encoder_inputs: enc_inputs, - self._decoder_inputs: dec_inputs, - self._decoder_outputs: dec_outputs, - self._src_sequence_lengths: src_seq_lengths, - self._tgt_sequence_lengths: tgt_seq_lengths, - self._tgt_weights: tgt_weights, - self._kb_mask: kb_masks - } - ) - return {'loss': loss_value, 'learning_rate': self.get_learning_rate()} - - def get_learning_rate(self): - # polynomial decay - global_step = min(self.global_step, self.decay_steps) - decayed_learning_rate = \ - (self.learning_rate - self.end_learning_rate) * \ - (1 - global_step / self.decay_steps) ** self.decay_power + \ - self.end_learning_rate - return decayed_learning_rate - - def load(self, *args, **kwargs): - self.load_params() - super().load(*args, **kwargs) - - def load_params(self): - path = str(self.load_path.with_suffix('.json').resolve()) - log.info('[loading parameters from {}]'.format(path)) - with open(path, 'r', encoding='utf8') as fp: - params = json.load(fp) - for p in self.GRAPH_PARAMS: - if self.opt.get(p) != params.get(p): - if p in ('kb_embedding_control_sum') and \ - (math.abs(self.opt.get(p, 0.) - params.get(p, 0.)) < 1e-3): - continue - raise ConfigError("`{}` parameter must be equal to saved model" - " parameter value `{}`, but is equal to `{}`" - .format(p, params.get(p), self.opt.get(p))) - - def save(self, *args, **kwargs): - super().save(*args, **kwargs) - self.save_params() - - def save_params(self): - path = str(self.save_path.with_suffix('.json').resolve()) - log.info('[saving parameters to {}]'.format(path)) - with open(path, 'w', encoding='utf8') as fp: - json.dump(self.opt, fp) - - def process_event(self, event_name, data): - if event_name == 'after_epoch': - log.info("Updating global step, learning rate = {:.6f}." - .format(self.get_learning_rate())) - self.global_step += 1 - - def shutdown(self): - self.sess.close() diff --git a/deeppavlov/models/torch_bert/torch_bert_as_summarizer.py b/deeppavlov/models/torch_bert/torch_bert_as_summarizer.py deleted file mode 100644 index 2e75e6be5e..0000000000 --- a/deeppavlov/models/torch_bert/torch_bert_as_summarizer.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -from logging import getLogger -from pathlib import Path -from typing import List, Optional - -import numpy as np -import torch -from overrides import overrides -from transformers import BertForNextSentencePrediction, BertConfig - -from deeppavlov.core.common.errors import ConfigError -from deeppavlov.core.commands.utils import expand_path -from deeppavlov.core.common.registry import register -from deeppavlov.core.models.torch_model import TorchModel -from deeppavlov.models.preprocessors.torch_transformers_preprocessor import TorchTransformersPreprocessor - -logger = getLogger(__name__) - - -@register('torch_bert_as_summarizer') -class TorchBertAsSummarizer(TorchModel): - """Naive Extractive Summarization model based on BERT on PyTorch. - BERT model was trained on Masked Language Modeling (MLM) and Next Sentence Prediction (NSP) tasks. - NSP head was trained to detect in ``[CLS] text_a [SEP] text_b [SEP]`` if text_b follows text_a in original document. - - This NSP head can be used to stack sentences from a long document, based on a initial sentence: - - summary_0 = init_sentence - - summary_1 = summary_0 + argmax(nsp_score(candidates)) - - summary_2 = summary_1 + argmax(nsp_score(candidates)) - - ... - - , where candidates are all sentences from a document. - - Args: - pretrained_bert: pretrained Bert checkpoint path or key title (e.g. "bert-base-uncased") - bert_config_file: path to Bert configuration file (not used if pretrained_bert is key title) - vocab_file: path to Bert vocabulary - max_summary_length: limit on summary length, number of sentences is used if ``max_summary_length_in_tokens`` - is set to False, else number of tokens is used. - max_summary_length_in_tokens: Use number of tokens as length of summary. - Defaults to ``False``. - max_seq_length: max sequence length in subtokens, including ``[SEP]`` and ``[CLS]`` tokens. - `max_seq_length` is used in Bert to compute NSP scores. Defaults to ``128``. - do_lower_case: set ``True`` if lowercasing is needed. Defaults to ``False``. - lang: use ru_sent_tokenizer for 'ru' and ntlk.sent_tokener for other languages. - Defaults to ``'ru'``. - """ - - def __init__(self, pretrained_bert: str, - vocab_file: str, - max_summary_length: int, - bert_config_file: Optional[str] = None, - max_summary_length_in_tokens: bool = False, - max_seq_length: int = 128, - do_lower_case: bool = False, - lang: str = 'ru', - save_path: Optional[str] = None, - **kwargs) -> None: - - self.max_summary_length = max_summary_length - self.max_summary_length_in_tokens = max_summary_length_in_tokens - self.pretrained_bert = pretrained_bert - self.bert_config_file = bert_config_file - self.bert_preprocessor = TorchTransformersPreprocessor(vocab_file=vocab_file, do_lower_case=do_lower_case, - max_seq_length=max_seq_length) - - self.tokenize_reg = re.compile(r"[\w']+|[^\w ]") - - if lang == 'ru': - from ru_sent_tokenize import ru_sent_tokenize - self.sent_tokenizer = ru_sent_tokenize - else: - from nltk import sent_tokenize - self.sent_tokenizer = sent_tokenize - - super().__init__(save_path=save_path, **kwargs) - - @overrides - def load(self, fname=None): - if fname is not None: - self.load_path = fname - - if self.pretrained_bert and not Path(self.pretrained_bert).is_file(): - self.model = BertForNextSentencePrediction.from_pretrained( - self.pretrained_bert, output_attentions=False, output_hidden_states=False) - elif self.bert_config_file and Path(self.bert_config_file).is_file(): - self.bert_config = BertConfig.from_json_file(str(expand_path(self.bert_config_file))) - - if self.attention_probs_keep_prob is not None: - self.bert_config.attention_probs_dropout_prob = 1.0 - self.attention_probs_keep_prob - if self.hidden_keep_prob is not None: - self.bert_config.hidden_dropout_prob = 1.0 - self.hidden_keep_prob - self.model = BertForNextSentencePrediction(config=self.bert_config) - else: - raise ConfigError("No pre-trained BERT model is given.") - - self.model.to(self.device) - - def _get_nsp_predictions(self, sentences: List[str], candidates: List[str]): - """Compute NextSentence probability for every (sentence_i, candidate_i) pair. - - [CLS] sentence_i [SEP] candidate_i [SEP] - - Args: - sentences: list of sentences - candidates: list of candidates to be the next sentence - - Returns: - probabilities that candidate is a next sentence - """ - features = self.bert_preprocessor(texts_a=sentences, texts_b=candidates) - - input_ids = [f.input_ids for f in features] - input_masks = [f.attention_mask for f in features] - input_type_ids = [f.token_type_ids for f in features] - - b_input_ids = torch.cat(input_ids, dim=0).to(self.device) - b_input_masks = torch.cat(input_masks, dim=0).to(self.device) - b_input_type_ids = torch.cat(input_type_ids, dim=0).to(self.device) - - pred = self.model(input_ids=b_input_ids, attention_mask=b_input_masks, token_type_ids=b_input_type_ids)[0] - nsp_probs = torch.nn.functional.softmax(pred, dim=-1) - return nsp_probs[:, 0] - - def __call__(self, texts: List[str], init_sentences: Optional[List[str]] = None) -> List[List[str]]: - """Builds summary for text from `texts` - - Args: - texts: texts to build summaries for - init_sentences: ``init_sentence`` is used as the first sentence in summary. - Defaults to None. - - Returns: - List[List[str]]: summaries tokenized on sentences - """ - summaries = [] - # build summaries for each text, init_sentence pair - if init_sentences is None: - init_sentences = [None] * len(texts) - - for text, init_sentence in zip(texts, init_sentences): - text_sentences = self.sent_tokenizer(text) - - if init_sentence is None: - init_sentence = text_sentences[0] - text_sentences = text_sentences[1:] - - # remove duplicates - text_sentences = list(set(text_sentences)) - # remove init_sentence from text sentences - text_sentences = [sent for sent in text_sentences if sent != init_sentence] - - summary = [init_sentence] - if self.max_summary_length_in_tokens: - # get length in tokens - def get_length(x): - return len(self.tokenize_reg.findall(' '.join(x))) - else: - # get length as number of sentences - get_length = len - - candidates = text_sentences[:] - while len(candidates) > 0: - # todo: use batches - candidates_scores = [self._get_nsp_predictions([' '.join(summary)], [cand]) for cand in candidates] - best_candidate_idx = np.argmax(candidates_scores) - best_candidate = candidates[best_candidate_idx] - del candidates[best_candidate_idx] - if get_length(summary + [best_candidate]) > self.max_summary_length: - break - summary = summary + [best_candidate] - summaries += [summary] - return summaries - - def train_on_batch(self, **kwargs): - raise NotImplementedError diff --git a/deeppavlov/models/torch_bert/torch_bert_sequence_tagger.py b/deeppavlov/models/torch_bert/torch_transformers_sequence_tagger.py similarity index 93% rename from deeppavlov/models/torch_bert/torch_bert_sequence_tagger.py rename to deeppavlov/models/torch_bert/torch_transformers_sequence_tagger.py index 5a6859b042..9c1959a3ac 100644 --- a/deeppavlov/models/torch_bert/torch_bert_sequence_tagger.py +++ b/deeppavlov/models/torch_bert/torch_transformers_sequence_tagger.py @@ -19,10 +19,10 @@ import numpy as np import torch from overrides import overrides -from transformers import BertForTokenClassification, BertConfig +from transformers import AutoModelForTokenClassification, AutoConfig -from deeppavlov.core.common.errors import ConfigError from deeppavlov.core.commands.utils import expand_path +from deeppavlov.core.common.errors import ConfigError from deeppavlov.core.common.registry import register from deeppavlov.core.models.torch_model import TorchModel @@ -192,10 +192,10 @@ def token_labels_to_subtoken_labels(labels, y_mask, input_mask): return subtoken_labels -@register('torch_bert_sequence_tagger') -class TorchBertSequenceTagger(TorchModel): - """BERT-based model on PyTorch for text tagging. It predicts a label for every token (not subtoken) in the text. - You can use it for sequence labeling tasks, such as morphological tagging or named entity recognition. +@register('torch_transformers_sequence_tagger') +class TorchTransformersSequenceTagger(TorchModel): + """Transformer-based model on PyTorch for text tagging. It predicts a label for every token (not subtoken) + in the text. You can use it for sequence labeling tasks, such as morphological tagging or named entity recognition. Args: n_tags: number of distinct tags @@ -276,7 +276,7 @@ def train_on_batch(self, b_labels = torch.from_numpy(np.array(subtoken_labels)).to(torch.int64).to(self.device) self.optimizer.zero_grad() - loss, logits = self.model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_masks, + loss, logits = self.model(input_ids=b_input_ids, attention_mask=b_input_masks, labels=b_labels) loss.backward() # Clip the norm of the gradients to 1.0. @@ -310,7 +310,7 @@ def __call__(self, with torch.no_grad(): # Forward pass, calculate logit predictions - logits = self.model(b_input_ids, token_type_ids=None, attention_mask=b_input_masks) + logits = self.model(b_input_ids, attention_mask=b_input_masks) # Move logits and labels to CPU and to numpy arrays logits = token_from_subtoken(logits[0].detach().cpu(), torch.from_numpy(y_masks)) @@ -331,18 +331,18 @@ def load(self, fname=None): if fname is not None: self.load_path = fname - if self.pretrained_bert and not Path(self.pretrained_bert).is_file(): - self.model = BertForTokenClassification.from_pretrained( - self.pretrained_bert, num_labels=self.n_classes, - output_attentions=False, output_hidden_states=False) + if self.pretrained_bert: + config = AutoConfig.from_pretrained(self.pretrained_bert, num_labels=self.n_classes, + output_attentions=False, output_hidden_states=False) + self.model = AutoModelForTokenClassification.from_pretrained(self.pretrained_bert, config=config) elif self.bert_config_file and Path(self.bert_config_file).is_file(): - self.bert_config = BertConfig.from_json_file(str(expand_path(self.bert_config_file))) + self.bert_config = AutoConfig.from_json_file(str(expand_path(self.bert_config_file))) if self.attention_probs_keep_prob is not None: self.bert_config.attention_probs_dropout_prob = 1.0 - self.attention_probs_keep_prob if self.hidden_keep_prob is not None: self.bert_config.hidden_dropout_prob = 1.0 - self.hidden_keep_prob - self.model = BertForTokenClassification(config=self.bert_config) + self.model = AutoModelForTokenClassification(config=self.bert_config) else: raise ConfigError("No pre-trained BERT model is given.") diff --git a/deeppavlov/requirements/datasets.txt b/deeppavlov/requirements/datasets.txt new file mode 100644 index 0000000000..f24eead325 --- /dev/null +++ b/deeppavlov/requirements/datasets.txt @@ -0,0 +1 @@ +datasets==1.1.2 \ No newline at end of file diff --git a/deeppavlov/requirements/hdt.txt b/deeppavlov/requirements/hdt.txt index 12a932617d..ffd5cad4b0 100644 --- a/deeppavlov/requirements/hdt.txt +++ b/deeppavlov/requirements/hdt.txt @@ -1,2 +1 @@ -pybind11==2.2.4 hdt==2.3 diff --git a/deeppavlov/requirements/lxml.txt b/deeppavlov/requirements/lxml.txt new file mode 100644 index 0000000000..eaf4d54c65 --- /dev/null +++ b/deeppavlov/requirements/lxml.txt @@ -0,0 +1 @@ +lxml==4.4.2 diff --git a/deeppavlov/requirements/nemo-asr.txt b/deeppavlov/requirements/nemo-asr.txt index 63d50bc4c8..1a072b36b7 100644 --- a/deeppavlov/requirements/nemo-asr.txt +++ b/deeppavlov/requirements/nemo-asr.txt @@ -1,4 +1,3 @@ -nemo-toolkit==0.10.0 frozendict==1.2 kaldi-io==0.9.4 inflect==4.1.0 diff --git a/deeppavlov/requirements/nemo-tts.txt b/deeppavlov/requirements/nemo-tts.txt index 80f13f45dd..a0f3139b34 100644 --- a/deeppavlov/requirements/nemo-tts.txt +++ b/deeppavlov/requirements/nemo-tts.txt @@ -1,4 +1,3 @@ matplotlib==3.2.1 sentencepiece==0.1.85 -transformers==2.8.0 youtokentome==1.0.6 \ No newline at end of file diff --git a/deeppavlov/requirements/nemo.txt b/deeppavlov/requirements/nemo.txt new file mode 100644 index 0000000000..e6f8ff402a --- /dev/null +++ b/deeppavlov/requirements/nemo.txt @@ -0,0 +1 @@ +nemo-toolkit==0.10.0 \ No newline at end of file diff --git a/deeppavlov/requirements/pyinflect.txt b/deeppavlov/requirements/pyinflect.txt deleted file mode 100644 index 080de04034..0000000000 --- a/deeppavlov/requirements/pyinflect.txt +++ /dev/null @@ -1 +0,0 @@ -pyinflect==0.5.1 diff --git a/deeppavlov/requirements/pytorch.txt b/deeppavlov/requirements/pytorch.txt deleted file mode 100644 index b84ccad8dc..0000000000 --- a/deeppavlov/requirements/pytorch.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch==1.6.0 -torchvision==0.7.0 -torchtext==0.6.0 -torchsummary==1.5.1 diff --git a/deeppavlov/requirements/nemo-pytorch.txt b/deeppavlov/requirements/pytorch14.txt similarity index 100% rename from deeppavlov/requirements/nemo-pytorch.txt rename to deeppavlov/requirements/pytorch14.txt diff --git a/deeppavlov/requirements/pytorch16.txt b/deeppavlov/requirements/pytorch16.txt new file mode 100644 index 0000000000..0d41debc01 --- /dev/null +++ b/deeppavlov/requirements/pytorch16.txt @@ -0,0 +1,2 @@ +torch==1.6.0 +torchvision==0.7.0 \ No newline at end of file diff --git a/deeppavlov/requirements/sortedcontainers.txt b/deeppavlov/requirements/sortedcontainers.txt new file mode 100644 index 0000000000..ecb69929c1 --- /dev/null +++ b/deeppavlov/requirements/sortedcontainers.txt @@ -0,0 +1 @@ +sortedcontainers==2.1.0 \ No newline at end of file diff --git a/deeppavlov/requirements/sparqlwrapper.txt b/deeppavlov/requirements/sparqlwrapper.txt deleted file mode 100644 index fb9a9fb1dc..0000000000 --- a/deeppavlov/requirements/sparqlwrapper.txt +++ /dev/null @@ -1 +0,0 @@ -SPARQLWrapper==1.8.5 diff --git a/deeppavlov/requirements/spelling.txt b/deeppavlov/requirements/spelling.txt deleted file mode 100644 index 16c4b9c093..0000000000 --- a/deeppavlov/requirements/spelling.txt +++ /dev/null @@ -1,3 +0,0 @@ -lxml==4.4.2 -python-Levenshtein==0.12.0 -sortedcontainers==2.1.0 \ No newline at end of file diff --git a/deeppavlov/requirements/torchtext.txt b/deeppavlov/requirements/torchtext.txt new file mode 100644 index 0000000000..766718b628 --- /dev/null +++ b/deeppavlov/requirements/torchtext.txt @@ -0,0 +1 @@ +torchtext==0.6.0 \ No newline at end of file diff --git a/deeppavlov/requirements/transformers.txt b/deeppavlov/requirements/transformers.txt index 7523410ab8..ec122c087f 100644 --- a/deeppavlov/requirements/transformers.txt +++ b/deeppavlov/requirements/transformers.txt @@ -1,2 +1 @@ -transformers==2.8.0 -datasets==1.1.2 \ No newline at end of file +transformers==2.8.0 \ No newline at end of file diff --git a/deeppavlov/requirements/whapi.txt b/deeppavlov/requirements/whapi.txt new file mode 100644 index 0000000000..8637c13b43 --- /dev/null +++ b/deeppavlov/requirements/whapi.txt @@ -0,0 +1 @@ +whapi==0.6.2 \ No newline at end of file diff --git a/deeppavlov/requirements/wikihow.txt b/deeppavlov/requirements/wikihow.txt deleted file mode 100644 index fb90705f32..0000000000 --- a/deeppavlov/requirements/wikihow.txt +++ /dev/null @@ -1,2 +0,0 @@ -whapi==0.6.2 -beautifulsoup4==4.9.0 diff --git a/docs/_templates/footer.html b/docs/_templates/footer.html index c2b05c38af..30f37e7371 100644 --- a/docs/_templates/footer.html +++ b/docs/_templates/footer.html @@ -1,6 +1,20 @@ {#{% extends '!footer.html' %}#}