From 7fe272c91c10a0c1b7edb4023bb734e3784cfa18 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 26 Feb 2024 18:39:56 -0700 Subject: [PATCH] Add dep notice for notebooks (#8522) (#8523) * add dep notice * revert --------- Signed-off-by: eharper Co-authored-by: Eric Harper Signed-off-by: Pablo Garay --- tutorials/asr/Offline_ASR.ipynb | 228 ++++++++++++------------ tutorials/nlp/MegatronBert_export.ipynb | 8 +- tutorials/nlp/Question_Answering.ipynb | 6 + 3 files changed, 130 insertions(+), 112 deletions(-) diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index 41cc79ff6783..d879fd4718a3 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -1,17 +1,4 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "accelerator": "GPU" - }, "cells": [ { "cell_type": "markdown", @@ -19,6 +6,12 @@ "id": "_wIWPxBVc3_O" }, "source": [ + "### Deprecation Notice\n", + "\n", + "This tutorial is deprecated as of r1.23.0 and will be removed in the next release.\n", + "\n", + "---\n", + "\n", "# NeMo offline ASR\n", "\n", "This notebook demonstrates how to \n", @@ -48,9 +41,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "I9eIxAyKHREB" }, + "outputs": [], "source": [ "BRANCH = 'main'\n", "try:\n", @@ -85,24 +80,22 @@ " print('Restarting Colab runtime to successfully import built module.')\n", " print('Please re-run the notebook.')\n", " os.kill(os.getpid(), 9)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "-X2OyAxreGfl" }, + "outputs": [], "source": [ "import numpy as np\n", "# Import audio processing library\n", "import librosa\n", "# We'll use this to listen to audio\n", "from IPython.display import Audio, display" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -120,14 +113,14 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "f_J9cuU1H6Bn" }, + "outputs": [], "source": [ "nemo_asr.models.EncDecCTCModel.list_available_models()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -140,14 +133,14 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "ZhWmR7lbvwSm" }, + "outputs": [], "source": [ "asr_model = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name='QuartzNet15x5Base-En', strict=False)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -169,9 +162,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "02gDfK7czSVV" }, + "outputs": [], "source": [ "# Download audio sample which we'll try\n", "# This is a sample from LibriSpeech dev clean subset - the model hasn't seen it before\n", @@ -233,9 +228,7 @@ " }\n", ")\n", "fig_spectrum.show()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -251,17 +244,17 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "s0ERrXIzKpwu" }, + "outputs": [], "source": [ "# Convert our audio sample to text\n", "files = [AUDIO_FILENAME]\n", "transcript = asr_model.transcribe(paths2audio_files=files)[0]\n", "print(f'Transcript: \"{transcript}\"')" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -275,9 +268,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "-0Sk0C9-LmAR" }, + "outputs": [], "source": [ "# softmax implementation in NumPy\n", "def softmax(logits):\n", @@ -315,9 +310,7 @@ " }\n", ")\n", "fig_probs.show()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -330,9 +323,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "32aaW3HEJ89l" }, + "outputs": [], "source": [ "# get timestamps for space symbols\n", "spaces = []\n", @@ -355,9 +350,7 @@ "\n", "if state == 'space':\n", " spaces.append([idx_state, len(pred)-1])" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -370,9 +363,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "a-LSg9dSL_O1" }, + "outputs": [], "source": [ "# calibration offset for timestamps: 180 ms\n", "offset = -0.18\n", @@ -392,9 +387,7 @@ "display(words[j+1])\n", "display(Audio(signal[int(pos_prev*sample_rate):],\n", " rate=sample_rate))" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -411,9 +404,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "EIh8wTVs5uH7" }, + "outputs": [], "source": [ "import gzip\n", "import os, shutil, wget\n", @@ -443,9 +438,7 @@ " for line in f_upper:\n", " f_lower.write(line.lower())\n", "print('Converted language model file to lowercase.')" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -458,9 +451,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "_qgKa9L954bJ" }, + "outputs": [], "source": [ "beam_search_lm = nemo_asr.modules.BeamSearchDecoderWithLM(\n", " vocab=list(asr_model.decoder.vocabulary),\n", @@ -469,9 +464,7 @@ " lm_path=lm_path,\n", " num_cpus=max(os.cpu_count(), 1),\n", " input_tensor=False)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -484,17 +477,20 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "nV1CAy0Dit-g" }, + "outputs": [], "source": [ "beam_search_lm.forward(log_probs = np.expand_dims(probs, axis=0), log_probs_length=None)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "LPtMzLE4T7T-" + }, "source": [ "# Greedy Decoding Time Stamps\n", "\n", @@ -503,134 +499,136 @@ "We therefore provide a simple way to obtain greedy decoding word time stamps directly using the familiar \"model.transcribe()\" method, which works quite well for character and subword models.\n", "\n", "**Note**: We find that larger models that have converged to strong scores on the dataset usually have better word alignments. If evaluated on a completely out of domain audio sample, it might produce very poor time stamps." - ], - "metadata": { - "id": "LPtMzLE4T7T-" - } + ] }, { "cell_type": "code", - "source": [ - "from omegaconf import OmegaConf, open_dict" - ], + "execution_count": null, "metadata": { "id": "z_0pO-TaUIHU" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from omegaconf import OmegaConf, open_dict" + ] }, { "cell_type": "markdown", - "source": [ - "For the purposes of this demonstration, we will use Conformer CTC Large, a 120 M parameter model trained on thousands of hours of English speech." - ], "metadata": { "id": "i0Epb8D-rW3-" - } + }, + "source": [ + "For the purposes of this demonstration, we will use Conformer CTC Large, a 120 M parameter model trained on thousands of hours of English speech." + ] }, { "cell_type": "code", - "source": [ - "asr_model_subword = nemo_asr.models.ASRModel.from_pretrained(\"stt_en_conformer_ctc_large\")" - ], + "execution_count": null, "metadata": { "id": "Ky7OpuikbBTb" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "asr_model_subword = nemo_asr.models.ASRModel.from_pretrained(\"stt_en_conformer_ctc_large\")" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "vwN6wddTrhno" + }, "source": [ "## CTC Decoding Strategy\n", "\n", "NeMo CTC models have an internal decoding strategy that can be updated after training. In our case, we will enable the greedy decoding step to compute word time stamps, as well as preserve the log probability predictions." - ], - "metadata": { - "id": "vwN6wddTrhno" - } + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "ubpcxp6z3ZF-" }, + "outputs": [], "source": [ "decoding_cfg = asr_model_subword.cfg.decoding\n", "print(OmegaConf.to_yaml(decoding_cfg))" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pKUsMlUbUAxv" + }, + "outputs": [], "source": [ "decoding_cfg.preserve_alignments = True\n", "decoding_cfg.compute_timestamps = True\n", "asr_model_subword.change_decoding_strategy(decoding_cfg)" - ], - "metadata": { - "id": "pKUsMlUbUAxv" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Next, we simply transcribe the audio file, and pass the flag `return_hypotheses=True`. This will return a list of `Hypothesis` objects instead of the predicted text." - ], "metadata": { "id": "EdX0Drncr8Yl" - } + }, + "source": [ + "Next, we simply transcribe the audio file, and pass the flag `return_hypotheses=True`. This will return a list of `Hypothesis` objects instead of the predicted text." + ] }, { "cell_type": "code", - "source": [ - "hypothesis = asr_model_subword.transcribe([AUDIO_FILENAME], return_hypotheses=True)[0]" - ], + "execution_count": null, "metadata": { "id": "SUkfIyYzUbaB" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "hypothesis = asr_model_subword.transcribe([AUDIO_FILENAME], return_hypotheses=True)[0]" + ] }, { "cell_type": "code", - "source": [ - "print(\"Greedy prediction :\", hypothesis.text)" - ], + "execution_count": null, "metadata": { "id": "duaxOSPXUmQ0" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "print(\"Greedy prediction :\", hypothesis.text)" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "_5hfsiDGsM19" + }, "source": [ "## Hypothesis - Time Stamps\n", "\n", "Since we previously set the flag for `decoding_cfg.compute_timestamps`, the hypothesis now contains a dictionary in it, accessed via `hypothesis.timestep`. This dictionary contains multiple useful lists, detailing the time step at which some token was predicted, the character / subword / word time stamps." - ], - "metadata": { - "id": "_5hfsiDGsM19" - } + ] }, { "cell_type": "code", - "source": [ - "timestamp_dict = hypothesis.timestep\n", - "print(\"Hypothesis contains following timestep information :\", list(timestamp_dict.keys()))" - ], + "execution_count": null, "metadata": { "id": "vh7K_9D1UrQp" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "timestamp_dict = hypothesis.timestep\n", + "print(\"Hypothesis contains following timestep information :\", list(timestamp_dict.keys()))" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fogttpCTVTEZ" + }, + "outputs": [], "source": [ "# 40ms is duration of a timestep at output of the Conformer\n", "time_stride = 4 * asr_model_subword.cfg.preprocessor.window_stride\n", @@ -646,12 +644,20 @@ "\n", " print(f\"Time : {start:0.2f} - {end:0.2f} - {word}\")\n", " display(Audio(signal[int(start * sample_rate) : int(end * sample_rate)], rate=sample_rate))" - ], - "metadata": { - "id": "fogttpCTVTEZ" - }, - "execution_count": null, - "outputs": [] + ] } - ] + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb index f925d2bc59b0..c19c07b67005 100644 --- a/tutorials/nlp/MegatronBert_export.ipynb +++ b/tutorials/nlp/MegatronBert_export.ipynb @@ -51,6 +51,12 @@ "id": "e9fb1a66", "metadata": {}, "source": [ + "### Deprecation Notice\n", + "\n", + "This tutorial is deprecated as of r1.23.0 and will be removed in the next release.\n", + "\n", + "---\n", + "\n", "# Task Description\n", "In this tutorial, we are going to describe how to export NeMo NLP models with BERT based models as the pre-trained model." ] @@ -271,4 +277,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/tutorials/nlp/Question_Answering.ipynb b/tutorials/nlp/Question_Answering.ipynb index a211c8320d51..054928245d9d 100644 --- a/tutorials/nlp/Question_Answering.ipynb +++ b/tutorials/nlp/Question_Answering.ipynb @@ -15,6 +15,12 @@ "id": "PucJwfbhVC3L" }, "source": [ + "### Deprecation Notice\n", + "\n", + "This tutorial is deprecated as of r1.23.0 and will be removed in the next release.\n", + "\n", + "---\n", + "\n", "This tutorial will demonstrate how to train, evaluate, and test three types of models for Question-Answering -\n", "1. BERT-like models for Extractive Question-Answering\n", "2. Sequence-to-Sequence (S2S) models for Generative Question-Answering (ex. T5/BART-like)\n",