Skip to content

Commit

Permalink
Merge pull request #161 from kermitt2/features/learning_rate_param
Browse files Browse the repository at this point in the history
add learning rate visualisation and manual parameter
  • Loading branch information
kermitt2 authored Jul 16, 2023
2 parents bc19e28 + 5b3ea93 commit 2f8976c
Show file tree
Hide file tree
Showing 10 changed files with 123 additions and 57 deletions.
20 changes: 14 additions & 6 deletions delft/applications/datasetTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ def configure(architecture, output_path=None, max_sequence_length=-1, batch_size
# train a model with all available data
def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
input_path=None, output_path=None, fold_count=1,
features_indices=None, max_sequence_length=-1, batch_size=-1, max_epoch=-1, use_ELMo=False, patience=-1):
features_indices=None, max_sequence_length=-1,
batch_size=-1, max_epoch=-1, use_ELMo=False, patience=-1,
learning_rate=None):
print('Loading data...')
if input_path is None:
x_all1 = y_all1 = x_all2 = y_all2 = x_all3 = y_all3 = []
Expand Down Expand Up @@ -110,7 +112,8 @@ def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing,
early_stop=early_stop,
patience=patience)
patience=patience,
learning_rate=learning_rate)

start_time = time.time()
model.train(x_train, y_train, x_valid=x_valid, y_valid=y_valid)
Expand All @@ -129,7 +132,7 @@ def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
def train_eval(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
input_path=None, output_path=None, fold_count=1,
features_indices=None, max_sequence_length=-1, batch_size=-1, max_epoch=-1, use_ELMo=False,
patience=-1):
patience=-1, learning_rate=None):
print('Loading data...')
if input_path is None:
x_all1 = y_all1 = x_all2 = y_all2 = x_all3 = y_all3 = []
Expand Down Expand Up @@ -175,7 +178,8 @@ def train_eval(embeddings_name=None, architecture='BidLSTM_CRF', transformer=Non
use_ELMo=use_ELMo,
multiprocessing=multiprocessing,
early_stop=early_stop,
patience=patience)
patience=patience,
learning_rate=learning_rate)

start_time = time.time()

Expand Down Expand Up @@ -280,6 +284,7 @@ def annotate_text(texts, output_format, architecture='BidLSTM_CRF', features=Non
parser.add_argument("--batch-size", type=int, default=-1, help="batch-size parameter to be used.")
parser.add_argument("--patience", type=int, default=-1, help="patience, number of extra epochs to perform after "
"the best epoch before stopping a training.")
parser.add_argument("--learning-rate", type=float, default=None, help="Initial learning rate")

args = parser.parse_args()

Expand All @@ -293,6 +298,7 @@ def annotate_text(texts, output_format, architecture='BidLSTM_CRF', features=Non
transformer = args.transformer
use_ELMo = args.use_ELMo
patience = args.patience
learning_rate = args.learning_rate

if transformer is None and embeddings_name is None:
# default word embeddings
Expand All @@ -307,7 +313,8 @@ def annotate_text(texts, output_format, architecture='BidLSTM_CRF', features=Non
max_sequence_length=max_sequence_length,
batch_size=batch_size,
use_ELMo=use_ELMo,
patience=patience)
patience=patience,
learning_rate=learning_rate)

if action == "eval":
if args.fold_count is not None and args.fold_count > 1:
Expand All @@ -329,7 +336,8 @@ def annotate_text(texts, output_format, architecture='BidLSTM_CRF', features=Non
max_sequence_length=max_sequence_length,
batch_size=batch_size,
use_ELMo=use_ELMo,
patience=patience)
patience=patience,
learning_rate=learning_rate)

if action == "tag":
someTexts = []
Expand Down
19 changes: 13 additions & 6 deletions delft/applications/grobidTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def configure(model, architecture, output_path=None, max_sequence_length=-1, bat
# train a GROBID model with all available data
def train(model, embeddings_name=None, architecture=None, transformer=None, input_path=None,
output_path=None, features_indices=None, max_sequence_length=-1, batch_size=-1, max_epoch=-1,
use_ELMo=False, incremental=False, input_model_path=None, patience=-1):
use_ELMo=False, incremental=False, input_model_path=None, patience=-1, learning_rate=None):

print('Loading data...')
if input_path == None:
Expand Down Expand Up @@ -176,7 +176,8 @@ def train(model, embeddings_name=None, architecture=None, transformer=None, inpu
use_ELMo=use_ELMo,
multiprocessing=multiprocessing,
early_stop=early_stop,
patience=patience)
patience=patience,
learning_rate=learning_rate)

if incremental:
if input_model_path != None:
Expand All @@ -202,7 +203,7 @@ def train(model, embeddings_name=None, architecture=None, transformer=None, inpu
def train_eval(model, embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
input_path=None, output_path=None, fold_count=1,
features_indices=None, max_sequence_length=-1, batch_size=-1, max_epoch=-1,
use_ELMo=False, incremental=False, input_model_path=None, patience=-1):
use_ELMo=False, incremental=False, input_model_path=None, patience=-1, learning_rate=None):
print('Loading data...')
if input_path is None:
x_all, y_all, f_all = load_data_and_labels_crf_file('data/sequenceLabelling/grobid/'+model+'/'+model+'-060518.train')
Expand Down Expand Up @@ -242,7 +243,8 @@ def train_eval(model, embeddings_name=None, architecture='BidLSTM_CRF', transfor
use_ELMo=use_ELMo,
multiprocessing=multiprocessing,
early_stop=early_stop,
patience=patience)
patience=patience,
learning_rate=learning_rate)

if incremental:
if input_model_path != None:
Expand Down Expand Up @@ -336,6 +338,7 @@ class Tasks:
EVAL = 'eval'
TAG = 'tag'


if __name__ == "__main__":
parser = argparse.ArgumentParser(description = "Trainer for GROBID models using the DeLFT library")

Expand Down Expand Up @@ -392,6 +395,7 @@ class Tasks:
parser.add_argument("--batch-size", type=int, default=-1, help="batch-size parameter to be used.")
parser.add_argument("--patience", type=int, default=-1, help="patience, number of extra epochs to perform after "
"the best epoch before stopping a training.")
parser.add_argument("--learning-rate", type=float, default=None, help="Initial learning rate")



Expand All @@ -410,6 +414,7 @@ class Tasks:
use_ELMo = args.use_ELMo
incremental = args.incremental
patience = args.patience
learning_rate = args.learning_rate

if architecture is None:
raise ValueError("A model architecture has to be specified: " + str(architectures))
Expand All @@ -430,7 +435,8 @@ class Tasks:
use_ELMo=use_ELMo,
incremental=incremental,
input_model_path=input_model_path,
patience=patience)
patience=patience,
learning_rate=learning_rate)

if action == Tasks.EVAL:
if args.fold_count is not None and args.fold_count > 1:
Expand All @@ -454,7 +460,8 @@ class Tasks:
batch_size=batch_size,
use_ELMo=use_ELMo,
incremental=incremental,
input_model_path=input_model_path)
input_model_path=input_model_path,
learning_rate=learning_rate)

if action == Tasks.TAG:
someTexts = []
Expand Down
10 changes: 6 additions & 4 deletions delft/applications/insultTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def configure(architecture, embeddings_name):

return batch_size, maxlen, patience, early_stop, max_epoch, embeddings_name

def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None, use_ELMo=False):
def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None, use_ELMo=False, learning_rate=None):
batch_size, maxlen, patience, early_stop, max_epoch, embeddings_name = configure(architecture, embeddings_name)

root = 'data/sequenceLabelling/toxic/'
Expand All @@ -41,7 +41,7 @@ def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None, us

model = Sequence(model_name, max_epoch=max_epoch, batch_size=batch_size, max_sequence_length=maxlen,
embeddings_name=embeddings_name, architecture=architecture, patience=patience, early_stop=early_stop,
transformer_name=transformer, use_ELMo=use_ELMo)
transformer_name=transformer, use_ELMo=use_ELMo, learning_rate=learning_rate)
model.train(x_train, y_train, x_valid=x_valid, y_valid=y_valid)
print('training done')

Expand Down Expand Up @@ -113,7 +113,8 @@ def annotate(texts, output_format, architecture='BidLSTM_CRF', transformer=None,
"HuggingFace transformers hub will be used otherwise to fetch the model, see https://huggingface.co/models " + \
"for model names"
)
parser.add_argument("--use-ELMo", action="store_true", help="Use ELMo contextual embeddings")
parser.add_argument("--use-ELMo", action="store_true", help="Use ELMo contextual embeddings")
parser.add_argument("--learning-rate", type=float, default=None, help="Initial learning rate")

args = parser.parse_args()

Expand All @@ -124,13 +125,14 @@ def annotate(texts, output_format, architecture='BidLSTM_CRF', transformer=None,
architecture = args.architecture
transformer = args.transformer
use_ELMo = args.use_ELMo
learning_rate = args.learning_rate

if transformer == None and embeddings_name == None:
# default word embeddings
embeddings_name = "glove-840B"

if args.action == 'train':
train(embeddings_name=embeddings_name, architecture=architecture, transformer=transformer, use_ELMo=use_ELMo)
train(embeddings_name=embeddings_name, architecture=architecture, transformer=transformer, use_ELMo=use_ELMo, learning_rate=learning_rate)

if args.action == 'tag':
someTexts = ['This is a gentle test.',
Expand Down
53 changes: 36 additions & 17 deletions delft/applications/nerTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def configure(architecture, dataset_type, lang, embeddings_name, use_ELMo, max_s

# train a model with all available for a given dataset
def train(dataset_type='conll2003', lang='en', embeddings_name=None, architecture='BidLSTM_CRF',
transformer=None, data_path=None, use_ELMo=False, max_sequence_length=-1, batch_size=-1, patience=-1):
transformer=None, data_path=None, use_ELMo=False, max_sequence_length=-1, batch_size=-1, patience=-1, learning_rate=None):

batch_size, max_sequence_length, patience, recurrent_dropout, early_stop, max_epoch, embeddings_name, word_lstm_units, multiprocessing = \
configure(architecture, dataset_type, lang, embeddings_name, use_ELMo, max_sequence_length, batch_size, patience)
Expand Down Expand Up @@ -102,7 +102,9 @@ def train(dataset_type='conll2003', lang='en', embeddings_name=None, architectur
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)

elif (dataset_type == 'conll2012') and (lang == 'en'):
print('Loading Ontonotes 5.0 CoNLL-2012 NER data...')

Expand Down Expand Up @@ -134,7 +136,8 @@ def train(dataset_type='conll2003', lang='en', embeddings_name=None, architectur
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
elif (lang == 'fr'):
print('Loading data...')
dataset_type = 'lemonde'
Expand All @@ -159,7 +162,8 @@ def train(dataset_type='conll2003', lang='en', embeddings_name=None, architectur
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
print("dataset/language combination is not supported:", dataset_type, lang)
return
Expand Down Expand Up @@ -190,7 +194,8 @@ def train_eval(embeddings_name=None,
use_ELMo=False,
patience=-1,
batch_size=-1,
max_sequence_length=-1):
max_sequence_length=-1,
learning_rate=None):

batch_size, max_sequence_length, patience, recurrent_dropout, early_stop, max_epoch, embeddings_name, word_lstm_units, multiprocessing = \
configure(architecture, dataset_type, lang, embeddings_name, use_ELMo,
Expand Down Expand Up @@ -222,7 +227,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
# also use validation set to train (no early stop, hyperparmeters must be set preliminarly),
# as (Chui & Nochols, 2016) and (Peters and al., 2017)
Expand All @@ -240,7 +246,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)

elif (dataset_type == 'ontonotes-all') and (lang == 'en'):
print("Loading all Ontonotes 5.0 XML data, evaluation will be on 10\% random partition")
Expand All @@ -266,7 +273,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)

elif (dataset_type == 'conll2012') and (lang == 'en'):
print('Loading Ontonotes 5.0 CoNLL-2012 NER data...')
Expand Down Expand Up @@ -294,7 +302,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
# also use validation set to train (no early stop, hyperparameters must be set preliminarly),
# as (Chui & Nochols, 2016) and (Peters and al., 2017)
Expand All @@ -312,7 +321,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)

elif (lang == 'fr') and (dataset_type == 'ftb' or dataset_type is None):
print('Loading data for ftb...')
Expand All @@ -339,7 +349,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
elif (lang == 'fr') and (dataset_type == 'ftb_force_split'):
print('Loading data for ftb_force_split...')
x_train, y_train = load_data_and_labels_conll('data/sequenceLabelling/leMonde/ftb6_train.conll')
Expand Down Expand Up @@ -367,7 +378,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
# also use validation set to train (no early stop, hyperparmeters must be set preliminarly),
# as (Chui & Nochols, 2016) and (Peters and al., 2017)
Expand All @@ -385,7 +397,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
elif (lang == 'fr') and (dataset_type == 'ftb_force_split_xml'):
print('Loading data for ftb_force_split_xml...')
x_train, y_train = load_data_and_labels_lemonde('data/sequenceLabelling/leMonde/ftb6_ALL.EN.docs.relinked.train.xml')
Expand Down Expand Up @@ -413,7 +426,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
# also use validation set to train (no early stop, hyperparmeters must be set preliminarly),
# as (Chui & Nochols, 2016) and (Peters and al., 2017)
Expand All @@ -431,7 +445,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
print("dataset/language combination is not supported:", dataset_type, lang)
return
Expand Down Expand Up @@ -597,6 +612,7 @@ def annotate(output_format,
parser.add_argument("--batch-size", type=int, default=-1, help="batch-size parameter to be used.")
parser.add_argument("--patience", type=int, default=-1, help="patience, number of extra epochs to perform after "
"the best epoch before stopping a training.")
parser.add_argument("--learning-rate", type=float, default=None, help="Initial learning rate")

args = parser.parse_args()

Expand All @@ -617,6 +633,7 @@ def annotate(output_format,
patience = args.patience
max_sequence_length = args.max_sequence_length
batch_size = args.batch_size
learning_rate = args.learning_rate

# name of embeddings refers to the file delft/resources-registry.json
# be sure to use here the same name as in the registry ('glove-840B', 'fasttext-crawl', 'word2vec'),
Expand All @@ -635,7 +652,8 @@ def annotate(output_format,
use_ELMo=use_ELMo,
max_sequence_length=max_sequence_length,
batch_size=batch_size,
patience=patience
patience=patience,
learning_rate=learning_rate
)

if action == 'train_eval':
Expand All @@ -653,7 +671,8 @@ def annotate(output_format,
use_ELMo=use_ELMo,
max_sequence_length=max_sequence_length,
batch_size=batch_size,
patience=patience
patience=patience,
learning_rate=learning_rate
)

if action == 'eval':
Expand Down
Loading

0 comments on commit 2f8976c

Please sign in to comment.