Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cherry pick of #9266 #9411

Merged
merged 29 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
93cedad
add deprecation warnings for non-mcore models
dimapihtar Apr 29, 2024
db0148b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 29, 2024
26fc6c5
change warning default time
dimapihtar Apr 29, 2024
e317c98
remove unused import
dimapihtar Apr 29, 2024
1687f4d
Apply isort and black reformatting
dimapihtar May 14, 2024
7840c34
remove deprecated tests
dimapihtar May 14, 2024
4a35d54
set mcore_gpt to True
dimapihtar May 14, 2024
8729470
set mcore_bert to True
dimapihtar May 14, 2024
7e881f3
remove deprecated tests
dimapihtar May 14, 2024
28ee1df
remove deprecated unit tests
dimapihtar May 14, 2024
e294e32
add deprecation warning
dimapihtar May 20, 2024
140b464
Apply isort and black reformatting
dimapihtar May 20, 2024
6510d32
remove deprecated playbook
dimapihtar May 21, 2024
d057a9c
remove deprecated tutorial
dimapihtar May 21, 2024
1964d60
turn off FA for Bert
dimapihtar May 22, 2024
e0e3add
turn of FA for Bert
dimapihtar May 22, 2024
0a54b6f
change mcore commit
dimapihtar May 23, 2024
fc38067
adjustments
pablo-garay May 24, 2024
79c61dc
update TE commit
dimapihtar May 24, 2024
965df31
fix mcore precision issue
dimapihtar May 24, 2024
1e7a4b4
change precision for bert
dimapihtar May 24, 2024
928b717
change precision for fine-tuning
dimapihtar May 24, 2024
0702d47
turn off fused attention for bert
dimapihtar May 25, 2024
9d41fbd
fix bert test
dimapihtar May 25, 2024
6e6d5fb
revert tests
dimapihtar Jun 7, 2024
cb7e882
Merge branch 'main' into dpykhtar/deprecate_non_mcore_cherry_pick
dimapihtar Jun 7, 2024
6f77b18
fix typo
dimapihtar Jun 7, 2024
1a9df1b
remove unnecessary
dimapihtar Jun 7, 2024
4796b6c
Merge branch 'main' into dpykhtar/deprecate_non_mcore_cherry_pick
dimapihtar Jun 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,065 changes: 649 additions & 1,416 deletions .github/workflows/cicd-main.yml

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions examples/nlp/language_modeling/conf/megatron_bert_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ trainer:
devices: 1
num_nodes: 1
accelerator: gpu
precision: 16
precision: bf16
logger: False # logger provided by exp_manager
enable_checkpointing: False
use_distributed_sampler: False
Expand Down Expand Up @@ -41,7 +41,7 @@ exp_manager:

model:
# model parallelism
mcore_bert: False
mcore_bert: True
micro_batch_size: 4
global_batch_size: 8
tensor_model_parallel_size: 1
Expand Down Expand Up @@ -85,7 +85,7 @@ model:
fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16

# Megatron O2-style half-precision
megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters
megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters
grad_allreduce_chunk_size_mb: 125
grad_div_ar_fusion: False

Expand Down Expand Up @@ -158,4 +158,4 @@ model:
name: CosineAnnealing
warmup_steps: 500
constant_steps: 50000
min_lr: 2e-5
min_lr: 2e-5
6 changes: 3 additions & 3 deletions examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ trainer:
devices: 1
num_nodes: 1
accelerator: gpu
precision: 16
precision: bf16
logger: False # logger provided by exp_manager
enable_checkpointing: False
use_distributed_sampler: False
Expand Down Expand Up @@ -56,7 +56,7 @@ exp_manager:

model:
# use GPTModel from megatron.core
mcore_gpt: False
mcore_gpt: True

# specify micro_batch_size, global_batch_size, and model parallelism
# gradient accumulation will be done automatically based on data_parallel_size
Expand Down Expand Up @@ -121,7 +121,7 @@ model:
fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16

# Megatron O2-style half-precision
megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters
megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters
grad_allreduce_chunk_size_mb: 125

# Fusion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from nemo.collections.nlp.data.dialogue.data_processor.data_processor import DialogueDataProcessor
from nemo.collections.nlp.data.dialogue.input_example.input_example import DialogueInputExample
from nemo.utils.decorators import deprecated_warning

__all__ = ['DialogueAssistantDataProcessor']

Expand All @@ -31,6 +32,9 @@ def __init__(self, data_dir: str, tokenizer: object, cfg):
data_dir: path to data directory
tokenizer: tokenizer object
"""
# deprecation warning
deprecated_warning("DialogueAssistantDataProcessor")

self.data_dir = data_dir
self._tokenizer = tokenizer
self.cfg = cfg
Expand Down Expand Up @@ -69,16 +73,15 @@ def open_file(self, filename):

@staticmethod
def get_continuous_slots(slot_ids, empty_slot_id, bio_slot_ids_to_unified_slot_ids):

"""
Extract continuous spans of slot_ids
To accomodate slots with distinct labels for B-label1 and I-label1,
To accomodate slots with distinct labels for B-label1 and I-label1,
slot_id = self.bio_slot_ids_to_unified_slot_ids[slot_id] is called to map them both to label1
Args:
Slot: list of int representing slot of each word token
For instance, 54 54 54 54 54 54 54 54 18 54 44 44 54 46 46 54 12
For instance, 54 54 54 54 54 54 54 54 18 54 44 44 54 46 46 54 12
Corresponds to "please set an alarm clock for my next meeting with the team at three pm next friday"
Except for the empty_slot_id (54 in this case), we hope to extract the continuous spans of tokens,
each containing a start position and an exclusive end position
Expand Down Expand Up @@ -124,7 +127,7 @@ def map_bio_format_slots_to_unified_slots(slots):
def get_dialog_examples(self, dataset_split: str):
"""
Process raw files into DialogueInputExample
Args:
Args:
dataset_split: {train, dev, test}
For the assistant dataset, there is no explicit dev set (instead uses the test set as the dev set)
Therefore, this function creates a dev set and a new train set from the train set.
Expand Down Expand Up @@ -177,7 +180,11 @@ def get_dialog_examples(self, dataset_split: str):
"labels": {"service": intent.split('_')[0], "intent": intent, "slots": slot_to_words},
"label_positions": {
"slots": {
slot: {"start": position[0], "exclusive_end": position[1], "slot": slot,}
slot: {
"start": position[0],
"exclusive_end": position[1],
"slot": slot,
}
for slot, position in slot_to_start_and_exclusive_end.items()
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import random

from nemo.collections.nlp.data.data_utils.data_preprocessing import DataProcessor
from nemo.utils.decorators import deprecated_warning

__all__ = ['DialogueDataProcessor']

Expand All @@ -40,6 +41,9 @@ class DialogueDataProcessor(DataProcessor):
"""

def __init__(self):
# deprecation warning
deprecated_warning("DialogueDataProcessor")

raise NotImplementedError()

def get_train_examples(self):
Expand All @@ -58,8 +62,8 @@ def get_test_examples(self):
def get_relevant_idxs(dataset_split, n_samples, dev_proportion):
"""
Obtain indexes for each dataset_split, when train and dev sets are not in separate files
Args:

Args:
dataset_split: train, dev or test
n_samples: total number of samples
dev_proportion: value from 1 to 99 that represent proportion of data in dev set
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from nemo.collections.nlp.data.dialogue.data_processor.data_processor import DialogueDataProcessor
from nemo.collections.nlp.data.dialogue.input_example.input_example import DialogueInputExample
from nemo.utils.decorators import deprecated_warning

__all__ = ['DialogueDesignDataProcessor']

Expand All @@ -34,6 +35,9 @@ def __init__(self, data_dir: str, tokenizer: object, cfg=None):
tokenizer: tokenizer object
cfg: cfg container for dataset
"""
# deprecation warning
deprecated_warning("DialogueDesignDataProcessor")

self.data_dir = data_dir
self._tokenizer = tokenizer
self.cfg = cfg
Expand All @@ -50,7 +54,7 @@ def open_csv(self, filename):
def get_dialog_examples(self, dataset_split: str):
"""
Process raw files into DialogueInputExample
Args:
Args:
dataset_split: {train, dev, test}
Dev set contains self.cfg.dev_proportion % of samples with the rest going into the train set
Test set contains the whole dataset (Dev + Train) as this dataset is small (~100) and primarily used in a zero shot setting
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@

from nemo.collections.nlp.data.dialogue.data_processor.data_processor import DialogueDataProcessor
from nemo.collections.nlp.data.dialogue.input_example.input_example import DialogueInputExample
from nemo.utils.decorators import deprecated_warning

__all__ = ['DialogueMellonQADataProcessor']


class DialogueMellonQADataProcessor(DialogueDataProcessor):
"""Data Processor for Mellon QA dialogues.
"""
"""Data Processor for Mellon QA dialogues."""

def __init__(self, data_dir: str, tokenizer: object, cfg=None):
"""
Expand All @@ -35,6 +35,9 @@ def __init__(self, data_dir: str, tokenizer: object, cfg=None):
tokenizer: tokenizer object
cfg: cfg container for dataset
"""
# deprecation warning
deprecated_warning("DialogueMellonQADataProcessor")

self.data_dir = data_dir
self._tokenizer = tokenizer
self.cfg = cfg
Expand All @@ -51,7 +54,7 @@ def open_csv(self, filename):
def get_dialog_examples(self, dataset_split: str):
"""
Process raw files into DialogueInputExample
Args:
Args:
dataset_split: {train, dev, test}
For the Mellon QA dataset, there is no explicit dev set (instead uses the test set as the dev set)
Therefore, this function creates a dev set and a new train set from the train set.
Expand Down Expand Up @@ -82,7 +85,11 @@ def get_dialog_examples(self, dataset_split: str):
input_example = {
"utterance": utterance,
"example_id": i,
"labels": {"response": answer, "fluent_response": well_formed_answer, "passage": passage,},
"labels": {
"response": answer,
"fluent_response": well_formed_answer,
"passage": passage,
},
}
example = DialogueInputExample(input_example)
examples.append(example)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@

from nemo.collections.nlp.data.dialogue.data_processor.data_processor import DialogueDataProcessor
from nemo.collections.nlp.data.dialogue.input_example.input_example import DialogueInputExample
from nemo.utils.decorators import deprecated_warning

__all__ = ['DialogueMSMarcoDataProcessor']


class DialogueMSMarcoDataProcessor(DialogueDataProcessor):
"""Data Processor for MS Marco dialogues. (https://github.com/microsoft/MSMARCO-Question-Answering)
Please agree to the Terms of Use before downloading data at
https://msmarco.blob.core.windows.net/msmarco/train_v2.1.json.gz
https://msmarco.blob.core.windows.net/msmarco/dev_v2.1.json.gz
Please agree to the Terms of Use before downloading data at
https://msmarco.blob.core.windows.net/msmarco/train_v2.1.json.gz
https://msmarco.blob.core.windows.net/msmarco/dev_v2.1.json.gz
"""

def __init__(self, data_dir: str, tokenizer: object, cfg=None):
Expand All @@ -39,6 +40,9 @@ def __init__(self, data_dir: str, tokenizer: object, cfg=None):
debug_mode: reduce number of samples to load in order to increase speed of processing
cfg: cfg container for dataset
"""
# deprecation warning
deprecated_warning("DialogueMSMarcoDataProcessor")

self.data_dir = data_dir
self._tokenizer = tokenizer
self.cfg = cfg
Expand All @@ -55,7 +59,7 @@ def open_json(self, filename):
def get_dialog_examples(self, dataset_split: str):
"""
Process raw files into DialogueInputExample
Args:
Args:
dataset_split: {train, dev, test}
For the MS Marco dataset, there is no explicit dev set (instead uses the test set as the dev set)
Therefore, this function creates a dev set and a new train set from the train set.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from nemo.collections.nlp.data.dialogue.input_example.input_example import DialogueInputExample
from nemo.collections.nlp.data.dialogue.sgd.schema import Schema
from nemo.utils import logging
from nemo.utils.decorators import deprecated_warning
from nemo.utils.get_rank import is_global_rank_zero

__all__ = ['DialogueSGDDataProcessor']
Expand All @@ -51,7 +52,7 @@ class DialogueSGDDataProcessor(DialogueDataProcessor):
# git clone https://github.com/google-research-datasets/dstc8-schema-guided-dialogue.git
***Data format***
SGD data comes with a JSON schema file and dialogue files for each dataset split.
SGD data comes with a JSON schema file and dialogue files for each dataset split.
In the following we will show an example for a service entry in the schema file.
* service_name
Expand All @@ -70,7 +71,7 @@ class DialogueSGDDataProcessor(DialogueDataProcessor):
* result_slots (not used)
In the following we will show an example for a dialogue.
In the following we will show an example for a dialogue.
* dialogue_id
* services
* turns
Expand All @@ -87,14 +88,18 @@ class DialogueSGDDataProcessor(DialogueDataProcessor):
* state
* active_intent
* requeste_slots
* slot_values
* slot_values
* speaker - [USER, SYSTEM]
* utterance
"""

def __init__(
self, data_dir: str, dialogues_example_dir: str, tokenizer: object, cfg=None,
self,
data_dir: str,
dialogues_example_dir: str,
tokenizer: object,
cfg=None,
):
"""
Constructs DialogueSGDDataProcessor
Expand All @@ -104,6 +109,9 @@ def __init__(
tokenizer: tokenizer object
cfg: cfg container for dataset
"""
# deprecation warning
deprecated_warning("DialogueSGDDataProcessor")

self.data_dir = data_dir
self.cfg = cfg

Expand Down Expand Up @@ -213,7 +221,7 @@ def get_labels(self):

def get_dialog_examples(self, dataset_split: str) -> List[object]:
"""
Loads preprocessed dialogue examples from disk.
Loads preprocessed dialogue examples from disk.
Args:
dataset_split: dataset split
Returns:
Expand Down Expand Up @@ -260,7 +268,7 @@ def _generate_dialog_examples(self, dataset_split: str, schemas: object, subsamp
Returns a list of `InputExample`s of the data splits' dialogues.
Args:
dataset_split: data split, can be "train", "dev", or "test".
schemas: schema for all services of all datasets
schemas: schema for all services of all datasets
subsample: whether to balance postive and negative samples in the dataset
Returns:
examples: a list of `InputExample`s.
Expand Down Expand Up @@ -447,9 +455,9 @@ def _create_examples_from_turn(
"example_id_num": example_id_num,
"utterance": user_utterance,
"system_utterance": system_utterance,
"system_slots": {slot["slot"]: slot for slot in system_frame["slots"]}
if system_frame is not None
else None,
"system_slots": (
{slot["slot"]: slot for slot in system_frame["slots"]} if system_frame is not None else None
),
"system_actions": system_frame["actions"] if system_frame is not None else None,
"labels": {
"service": service,
Expand All @@ -464,9 +472,11 @@ def _create_examples_from_turn(
for intent in schemas.get_service_schema(service).intents
],
"slots": {
slot: schemas.get_service_schema(service).get_categorical_slot_values(slot)
if slot in categorical_slots
else []
slot: (
schemas.get_service_schema(service).get_categorical_slot_values(slot)
if slot in categorical_slots
else []
)
for slot in all_possible_slots
},
},
Expand Down
Loading
Loading