Skip to content

Commit

Permalink
cherry pick of NVIDIA#9266 (NVIDIA#9411)
Browse files Browse the repository at this point in the history
* add deprecation warnings for non-mcore models

Signed-off-by: dimapihtar <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* change warning default time

Signed-off-by: dimapihtar <[email protected]>

* remove unused import

Signed-off-by: dimapihtar <[email protected]>

* Apply isort and black reformatting

Signed-off-by: dimapihtar <[email protected]>

* remove deprecated tests

Signed-off-by: dimapihtar <[email protected]>

* set mcore_gpt to True

Signed-off-by: dimapihtar <[email protected]>

* set mcore_bert to True

Signed-off-by: dimapihtar <[email protected]>

* remove deprecated tests

Signed-off-by: dimapihtar <[email protected]>

* remove deprecated unit tests

Signed-off-by: dimapihtar <[email protected]>

* add deprecation warning

Signed-off-by: dimapihtar <[email protected]>

* Apply isort and black reformatting

Signed-off-by: dimapihtar <[email protected]>

* remove deprecated playbook

Signed-off-by: dimapihtar <[email protected]>

* remove deprecated tutorial

Signed-off-by: dimapihtar <[email protected]>

* turn off FA for Bert

Signed-off-by: dimapihtar <[email protected]>

* turn of FA for Bert

Signed-off-by: dimapihtar <[email protected]>

* change mcore commit

Signed-off-by: dimapihtar <[email protected]>

* adjustments

* update TE commit

Signed-off-by: dimapihtar <[email protected]>

* fix mcore precision issue

Signed-off-by: dimapihtar <[email protected]>

* change precision for bert

Signed-off-by: dimapihtar <[email protected]>

* change precision for fine-tuning

Signed-off-by: dimapihtar <[email protected]>

* turn off fused attention for bert

Signed-off-by: dimapihtar <[email protected]>

* fix bert test

Signed-off-by: dimapihtar <[email protected]>

* revert tests

Signed-off-by: dimapihtar <[email protected]>

* fix typo

Signed-off-by: dimapihtar <[email protected]>

* remove unnecessary

Signed-off-by: dimapihtar <[email protected]>

---------

Signed-off-by: dimapihtar <[email protected]>
Signed-off-by: dimapihtar <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: dimapihtar <[email protected]>
Co-authored-by: Pablo Garay <[email protected]>
  • Loading branch information
4 people authored and JesusPaz committed Jun 18, 2024
1 parent 74a89d1 commit 839beac
Show file tree
Hide file tree
Showing 58 changed files with 1,252 additions and 8,709 deletions.
2,065 changes: 649 additions & 1,416 deletions .github/workflows/cicd-main.yml

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions examples/nlp/language_modeling/conf/megatron_bert_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ trainer:
devices: 1
num_nodes: 1
accelerator: gpu
precision: 16
precision: bf16
logger: False # logger provided by exp_manager
enable_checkpointing: False
use_distributed_sampler: False
Expand Down Expand Up @@ -41,7 +41,7 @@ exp_manager:

model:
# model parallelism
mcore_bert: False
mcore_bert: True
micro_batch_size: 4
global_batch_size: 8
tensor_model_parallel_size: 1
Expand Down Expand Up @@ -85,7 +85,7 @@ model:
fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16

# Megatron O2-style half-precision
megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters
megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters
grad_allreduce_chunk_size_mb: 125
grad_div_ar_fusion: False

Expand Down Expand Up @@ -158,4 +158,4 @@ model:
name: CosineAnnealing
warmup_steps: 500
constant_steps: 50000
min_lr: 2e-5
min_lr: 2e-5
6 changes: 3 additions & 3 deletions examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ trainer:
devices: 1
num_nodes: 1
accelerator: gpu
precision: 16
precision: bf16
logger: False # logger provided by exp_manager
enable_checkpointing: False
use_distributed_sampler: False
Expand Down Expand Up @@ -56,7 +56,7 @@ exp_manager:

model:
# use GPTModel from megatron.core
mcore_gpt: False
mcore_gpt: True

# specify micro_batch_size, global_batch_size, and model parallelism
# gradient accumulation will be done automatically based on data_parallel_size
Expand Down Expand Up @@ -121,7 +121,7 @@ model:
fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16

# Megatron O2-style half-precision
megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters
megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters
grad_allreduce_chunk_size_mb: 125

# Fusion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from nemo.collections.nlp.data.dialogue.data_processor.data_processor import DialogueDataProcessor
from nemo.collections.nlp.data.dialogue.input_example.input_example import DialogueInputExample
from nemo.utils.decorators import deprecated_warning

__all__ = ['DialogueAssistantDataProcessor']

Expand All @@ -31,6 +32,9 @@ def __init__(self, data_dir: str, tokenizer: object, cfg):
data_dir: path to data directory
tokenizer: tokenizer object
"""
# deprecation warning
deprecated_warning("DialogueAssistantDataProcessor")

self.data_dir = data_dir
self._tokenizer = tokenizer
self.cfg = cfg
Expand Down Expand Up @@ -69,16 +73,15 @@ def open_file(self, filename):

@staticmethod
def get_continuous_slots(slot_ids, empty_slot_id, bio_slot_ids_to_unified_slot_ids):

"""
Extract continuous spans of slot_ids
To accomodate slots with distinct labels for B-label1 and I-label1,
To accomodate slots with distinct labels for B-label1 and I-label1,
slot_id = self.bio_slot_ids_to_unified_slot_ids[slot_id] is called to map them both to label1
Args:
Slot: list of int representing slot of each word token
For instance, 54 54 54 54 54 54 54 54 18 54 44 44 54 46 46 54 12
For instance, 54 54 54 54 54 54 54 54 18 54 44 44 54 46 46 54 12
Corresponds to "please set an alarm clock for my next meeting with the team at three pm next friday"
Except for the empty_slot_id (54 in this case), we hope to extract the continuous spans of tokens,
each containing a start position and an exclusive end position
Expand Down Expand Up @@ -124,7 +127,7 @@ def map_bio_format_slots_to_unified_slots(slots):
def get_dialog_examples(self, dataset_split: str):
"""
Process raw files into DialogueInputExample
Args:
Args:
dataset_split: {train, dev, test}
For the assistant dataset, there is no explicit dev set (instead uses the test set as the dev set)
Therefore, this function creates a dev set and a new train set from the train set.
Expand Down Expand Up @@ -177,7 +180,11 @@ def get_dialog_examples(self, dataset_split: str):
"labels": {"service": intent.split('_')[0], "intent": intent, "slots": slot_to_words},
"label_positions": {
"slots": {
slot: {"start": position[0], "exclusive_end": position[1], "slot": slot,}
slot: {
"start": position[0],
"exclusive_end": position[1],
"slot": slot,
}
for slot, position in slot_to_start_and_exclusive_end.items()
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import random

from nemo.collections.nlp.data.data_utils.data_preprocessing import DataProcessor
from nemo.utils.decorators import deprecated_warning

__all__ = ['DialogueDataProcessor']

Expand All @@ -40,6 +41,9 @@ class DialogueDataProcessor(DataProcessor):
"""

def __init__(self):
# deprecation warning
deprecated_warning("DialogueDataProcessor")

raise NotImplementedError()

def get_train_examples(self):
Expand All @@ -58,8 +62,8 @@ def get_test_examples(self):
def get_relevant_idxs(dataset_split, n_samples, dev_proportion):
"""
Obtain indexes for each dataset_split, when train and dev sets are not in separate files
Args:
Args:
dataset_split: train, dev or test
n_samples: total number of samples
dev_proportion: value from 1 to 99 that represent proportion of data in dev set
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from nemo.collections.nlp.data.dialogue.data_processor.data_processor import DialogueDataProcessor
from nemo.collections.nlp.data.dialogue.input_example.input_example import DialogueInputExample
from nemo.utils.decorators import deprecated_warning

__all__ = ['DialogueDesignDataProcessor']

Expand All @@ -34,6 +35,9 @@ def __init__(self, data_dir: str, tokenizer: object, cfg=None):
tokenizer: tokenizer object
cfg: cfg container for dataset
"""
# deprecation warning
deprecated_warning("DialogueDesignDataProcessor")

self.data_dir = data_dir
self._tokenizer = tokenizer
self.cfg = cfg
Expand All @@ -50,7 +54,7 @@ def open_csv(self, filename):
def get_dialog_examples(self, dataset_split: str):
"""
Process raw files into DialogueInputExample
Args:
Args:
dataset_split: {train, dev, test}
Dev set contains self.cfg.dev_proportion % of samples with the rest going into the train set
Test set contains the whole dataset (Dev + Train) as this dataset is small (~100) and primarily used in a zero shot setting
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@

from nemo.collections.nlp.data.dialogue.data_processor.data_processor import DialogueDataProcessor
from nemo.collections.nlp.data.dialogue.input_example.input_example import DialogueInputExample
from nemo.utils.decorators import deprecated_warning

__all__ = ['DialogueMellonQADataProcessor']


class DialogueMellonQADataProcessor(DialogueDataProcessor):
"""Data Processor for Mellon QA dialogues.
"""
"""Data Processor for Mellon QA dialogues."""

def __init__(self, data_dir: str, tokenizer: object, cfg=None):
"""
Expand All @@ -35,6 +35,9 @@ def __init__(self, data_dir: str, tokenizer: object, cfg=None):
tokenizer: tokenizer object
cfg: cfg container for dataset
"""
# deprecation warning
deprecated_warning("DialogueMellonQADataProcessor")

self.data_dir = data_dir
self._tokenizer = tokenizer
self.cfg = cfg
Expand All @@ -51,7 +54,7 @@ def open_csv(self, filename):
def get_dialog_examples(self, dataset_split: str):
"""
Process raw files into DialogueInputExample
Args:
Args:
dataset_split: {train, dev, test}
For the Mellon QA dataset, there is no explicit dev set (instead uses the test set as the dev set)
Therefore, this function creates a dev set and a new train set from the train set.
Expand Down Expand Up @@ -82,7 +85,11 @@ def get_dialog_examples(self, dataset_split: str):
input_example = {
"utterance": utterance,
"example_id": i,
"labels": {"response": answer, "fluent_response": well_formed_answer, "passage": passage,},
"labels": {
"response": answer,
"fluent_response": well_formed_answer,
"passage": passage,
},
}
example = DialogueInputExample(input_example)
examples.append(example)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@

from nemo.collections.nlp.data.dialogue.data_processor.data_processor import DialogueDataProcessor
from nemo.collections.nlp.data.dialogue.input_example.input_example import DialogueInputExample
from nemo.utils.decorators import deprecated_warning

__all__ = ['DialogueMSMarcoDataProcessor']


class DialogueMSMarcoDataProcessor(DialogueDataProcessor):
"""Data Processor for MS Marco dialogues. (https://github.com/microsoft/MSMARCO-Question-Answering)
Please agree to the Terms of Use before downloading data at
https://msmarco.blob.core.windows.net/msmarco/train_v2.1.json.gz
https://msmarco.blob.core.windows.net/msmarco/dev_v2.1.json.gz
Please agree to the Terms of Use before downloading data at
https://msmarco.blob.core.windows.net/msmarco/train_v2.1.json.gz
https://msmarco.blob.core.windows.net/msmarco/dev_v2.1.json.gz
"""

def __init__(self, data_dir: str, tokenizer: object, cfg=None):
Expand All @@ -39,6 +40,9 @@ def __init__(self, data_dir: str, tokenizer: object, cfg=None):
debug_mode: reduce number of samples to load in order to increase speed of processing
cfg: cfg container for dataset
"""
# deprecation warning
deprecated_warning("DialogueMSMarcoDataProcessor")

self.data_dir = data_dir
self._tokenizer = tokenizer
self.cfg = cfg
Expand All @@ -55,7 +59,7 @@ def open_json(self, filename):
def get_dialog_examples(self, dataset_split: str):
"""
Process raw files into DialogueInputExample
Args:
Args:
dataset_split: {train, dev, test}
For the MS Marco dataset, there is no explicit dev set (instead uses the test set as the dev set)
Therefore, this function creates a dev set and a new train set from the train set.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from nemo.collections.nlp.data.dialogue.input_example.input_example import DialogueInputExample
from nemo.collections.nlp.data.dialogue.sgd.schema import Schema
from nemo.utils import logging
from nemo.utils.decorators import deprecated_warning
from nemo.utils.get_rank import is_global_rank_zero

__all__ = ['DialogueSGDDataProcessor']
Expand All @@ -51,7 +52,7 @@ class DialogueSGDDataProcessor(DialogueDataProcessor):
# git clone https://github.com/google-research-datasets/dstc8-schema-guided-dialogue.git
***Data format***
SGD data comes with a JSON schema file and dialogue files for each dataset split.
SGD data comes with a JSON schema file and dialogue files for each dataset split.
In the following we will show an example for a service entry in the schema file.
* service_name
Expand All @@ -70,7 +71,7 @@ class DialogueSGDDataProcessor(DialogueDataProcessor):
* result_slots (not used)
In the following we will show an example for a dialogue.
In the following we will show an example for a dialogue.
* dialogue_id
* services
* turns
Expand All @@ -87,14 +88,18 @@ class DialogueSGDDataProcessor(DialogueDataProcessor):
* state
* active_intent
* requeste_slots
* slot_values
* slot_values
* speaker - [USER, SYSTEM]
* utterance
"""

def __init__(
self, data_dir: str, dialogues_example_dir: str, tokenizer: object, cfg=None,
self,
data_dir: str,
dialogues_example_dir: str,
tokenizer: object,
cfg=None,
):
"""
Constructs DialogueSGDDataProcessor
Expand All @@ -104,6 +109,9 @@ def __init__(
tokenizer: tokenizer object
cfg: cfg container for dataset
"""
# deprecation warning
deprecated_warning("DialogueSGDDataProcessor")

self.data_dir = data_dir
self.cfg = cfg

Expand Down Expand Up @@ -213,7 +221,7 @@ def get_labels(self):

def get_dialog_examples(self, dataset_split: str) -> List[object]:
"""
Loads preprocessed dialogue examples from disk.
Loads preprocessed dialogue examples from disk.
Args:
dataset_split: dataset split
Returns:
Expand Down Expand Up @@ -260,7 +268,7 @@ def _generate_dialog_examples(self, dataset_split: str, schemas: object, subsamp
Returns a list of `InputExample`s of the data splits' dialogues.
Args:
dataset_split: data split, can be "train", "dev", or "test".
schemas: schema for all services of all datasets
schemas: schema for all services of all datasets
subsample: whether to balance postive and negative samples in the dataset
Returns:
examples: a list of `InputExample`s.
Expand Down Expand Up @@ -447,9 +455,9 @@ def _create_examples_from_turn(
"example_id_num": example_id_num,
"utterance": user_utterance,
"system_utterance": system_utterance,
"system_slots": {slot["slot"]: slot for slot in system_frame["slots"]}
if system_frame is not None
else None,
"system_slots": (
{slot["slot"]: slot for slot in system_frame["slots"]} if system_frame is not None else None
),
"system_actions": system_frame["actions"] if system_frame is not None else None,
"labels": {
"service": service,
Expand All @@ -464,9 +472,11 @@ def _create_examples_from_turn(
for intent in schemas.get_service_schema(service).intents
],
"slots": {
slot: schemas.get_service_schema(service).get_categorical_slot_values(slot)
if slot in categorical_slots
else []
slot: (
schemas.get_service_schema(service).get_categorical_slot_values(slot)
if slot in categorical_slots
else []
)
for slot in all_possible_slots
},
},
Expand Down
Loading

0 comments on commit 839beac

Please sign in to comment.