Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add learning rate visualisation and manual parameter #161

Merged
merged 11 commits into from
Jul 16, 2023
20 changes: 14 additions & 6 deletions delft/applications/datasetTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ def configure(architecture, output_path=None, max_sequence_length=-1, batch_size
# train a model with all available data
def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
input_path=None, output_path=None, fold_count=1,
features_indices=None, max_sequence_length=-1, batch_size=-1, max_epoch=-1, use_ELMo=False, patience=-1):
features_indices=None, max_sequence_length=-1,
batch_size=-1, max_epoch=-1, use_ELMo=False, patience=-1,
learning_rate=None):
print('Loading data...')
if input_path is None:
x_all1 = y_all1 = x_all2 = y_all2 = x_all3 = y_all3 = []
Expand Down Expand Up @@ -110,7 +112,8 @@ def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing,
early_stop=early_stop,
patience=patience)
patience=patience,
learning_rate=learning_rate)

start_time = time.time()
model.train(x_train, y_train, x_valid=x_valid, y_valid=y_valid)
Expand All @@ -129,7 +132,7 @@ def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
def train_eval(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
input_path=None, output_path=None, fold_count=1,
features_indices=None, max_sequence_length=-1, batch_size=-1, max_epoch=-1, use_ELMo=False,
patience=-1):
patience=-1, learning_rate=None):
print('Loading data...')
if input_path is None:
x_all1 = y_all1 = x_all2 = y_all2 = x_all3 = y_all3 = []
Expand Down Expand Up @@ -175,7 +178,8 @@ def train_eval(embeddings_name=None, architecture='BidLSTM_CRF', transformer=Non
use_ELMo=use_ELMo,
multiprocessing=multiprocessing,
early_stop=early_stop,
patience=patience)
patience=patience,
learning_rate=learning_rate)

start_time = time.time()

Expand Down Expand Up @@ -280,6 +284,7 @@ def annotate_text(texts, output_format, architecture='BidLSTM_CRF', features=Non
parser.add_argument("--batch-size", type=int, default=-1, help="batch-size parameter to be used.")
parser.add_argument("--patience", type=int, default=-1, help="patience, number of extra epochs to perform after "
"the best epoch before stopping a training.")
parser.add_argument("--learning-rate", type=float, default=None, help="Initial learning rate")

args = parser.parse_args()

Expand All @@ -293,6 +298,7 @@ def annotate_text(texts, output_format, architecture='BidLSTM_CRF', features=Non
transformer = args.transformer
use_ELMo = args.use_ELMo
patience = args.patience
learning_rate = args.learning_rate

if transformer is None and embeddings_name is None:
# default word embeddings
Expand All @@ -307,7 +313,8 @@ def annotate_text(texts, output_format, architecture='BidLSTM_CRF', features=Non
max_sequence_length=max_sequence_length,
batch_size=batch_size,
use_ELMo=use_ELMo,
patience=patience)
patience=patience,
learning_rate=learning_rate)

if action == "eval":
if args.fold_count is not None and args.fold_count > 1:
Expand All @@ -329,7 +336,8 @@ def annotate_text(texts, output_format, architecture='BidLSTM_CRF', features=Non
max_sequence_length=max_sequence_length,
batch_size=batch_size,
use_ELMo=use_ELMo,
patience=patience)
patience=patience,
learning_rate=learning_rate)

if action == "tag":
someTexts = []
Expand Down
19 changes: 13 additions & 6 deletions delft/applications/grobidTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def configure(model, architecture, output_path=None, max_sequence_length=-1, bat
# train a GROBID model with all available data
def train(model, embeddings_name=None, architecture=None, transformer=None, input_path=None,
output_path=None, features_indices=None, max_sequence_length=-1, batch_size=-1, max_epoch=-1,
use_ELMo=False, incremental=False, input_model_path=None, patience=-1):
use_ELMo=False, incremental=False, input_model_path=None, patience=-1, learning_rate=None):

print('Loading data...')
if input_path == None:
Expand Down Expand Up @@ -176,7 +176,8 @@ def train(model, embeddings_name=None, architecture=None, transformer=None, inpu
use_ELMo=use_ELMo,
multiprocessing=multiprocessing,
early_stop=early_stop,
patience=patience)
patience=patience,
learning_rate=learning_rate)

if incremental:
if input_model_path != None:
Expand All @@ -202,7 +203,7 @@ def train(model, embeddings_name=None, architecture=None, transformer=None, inpu
def train_eval(model, embeddings_name=None, architecture='BidLSTM_CRF', transformer=None,
input_path=None, output_path=None, fold_count=1,
features_indices=None, max_sequence_length=-1, batch_size=-1, max_epoch=-1,
use_ELMo=False, incremental=False, input_model_path=None, patience=-1):
use_ELMo=False, incremental=False, input_model_path=None, patience=-1, learning_rate=None):
print('Loading data...')
if input_path is None:
x_all, y_all, f_all = load_data_and_labels_crf_file('data/sequenceLabelling/grobid/'+model+'/'+model+'-060518.train')
Expand Down Expand Up @@ -242,7 +243,8 @@ def train_eval(model, embeddings_name=None, architecture='BidLSTM_CRF', transfor
use_ELMo=use_ELMo,
multiprocessing=multiprocessing,
early_stop=early_stop,
patience=patience)
patience=patience,
learning_rate=learning_rate)

if incremental:
if input_model_path != None:
Expand Down Expand Up @@ -336,6 +338,7 @@ class Tasks:
EVAL = 'eval'
TAG = 'tag'


if __name__ == "__main__":
parser = argparse.ArgumentParser(description = "Trainer for GROBID models using the DeLFT library")

Expand Down Expand Up @@ -392,6 +395,7 @@ class Tasks:
parser.add_argument("--batch-size", type=int, default=-1, help="batch-size parameter to be used.")
parser.add_argument("--patience", type=int, default=-1, help="patience, number of extra epochs to perform after "
"the best epoch before stopping a training.")
parser.add_argument("--learning-rate", type=float, default=None, help="Initial learning rate")



Expand All @@ -410,6 +414,7 @@ class Tasks:
use_ELMo = args.use_ELMo
incremental = args.incremental
patience = args.patience
learning_rate = args.learning_rate

if architecture is None:
raise ValueError("A model architecture has to be specified: " + str(architectures))
Expand All @@ -430,7 +435,8 @@ class Tasks:
use_ELMo=use_ELMo,
incremental=incremental,
input_model_path=input_model_path,
patience=patience)
patience=patience,
learning_rate=learning_rate)

if action == Tasks.EVAL:
if args.fold_count is not None and args.fold_count > 1:
Expand All @@ -454,7 +460,8 @@ class Tasks:
batch_size=batch_size,
use_ELMo=use_ELMo,
incremental=incremental,
input_model_path=input_model_path)
input_model_path=input_model_path,
learning_rate=learning_rate)

if action == Tasks.TAG:
someTexts = []
Expand Down
10 changes: 6 additions & 4 deletions delft/applications/insultTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def configure(architecture, embeddings_name):

return batch_size, maxlen, patience, early_stop, max_epoch, embeddings_name

def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None, use_ELMo=False):
def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None, use_ELMo=False, learning_rate=None):
batch_size, maxlen, patience, early_stop, max_epoch, embeddings_name = configure(architecture, embeddings_name)

root = 'data/sequenceLabelling/toxic/'
Expand All @@ -41,7 +41,7 @@ def train(embeddings_name=None, architecture='BidLSTM_CRF', transformer=None, us

model = Sequence(model_name, max_epoch=max_epoch, batch_size=batch_size, max_sequence_length=maxlen,
embeddings_name=embeddings_name, architecture=architecture, patience=patience, early_stop=early_stop,
transformer_name=transformer, use_ELMo=use_ELMo)
transformer_name=transformer, use_ELMo=use_ELMo, learning_rate=learning_rate)
model.train(x_train, y_train, x_valid=x_valid, y_valid=y_valid)
print('training done')

Expand Down Expand Up @@ -113,7 +113,8 @@ def annotate(texts, output_format, architecture='BidLSTM_CRF', transformer=None,
"HuggingFace transformers hub will be used otherwise to fetch the model, see https://huggingface.co/models " + \
"for model names"
)
parser.add_argument("--use-ELMo", action="store_true", help="Use ELMo contextual embeddings")
parser.add_argument("--use-ELMo", action="store_true", help="Use ELMo contextual embeddings")
parser.add_argument("--learning-rate", type=float, default=None, help="Initial learning rate")

args = parser.parse_args()

Expand All @@ -124,13 +125,14 @@ def annotate(texts, output_format, architecture='BidLSTM_CRF', transformer=None,
architecture = args.architecture
transformer = args.transformer
use_ELMo = args.use_ELMo
learning_rate = args.learning_rate

if transformer == None and embeddings_name == None:
# default word embeddings
embeddings_name = "glove-840B"

if args.action == 'train':
train(embeddings_name=embeddings_name, architecture=architecture, transformer=transformer, use_ELMo=use_ELMo)
train(embeddings_name=embeddings_name, architecture=architecture, transformer=transformer, use_ELMo=use_ELMo, learning_rate=learning_rate)

if args.action == 'tag':
someTexts = ['This is a gentle test.',
Expand Down
53 changes: 36 additions & 17 deletions delft/applications/nerTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def configure(architecture, dataset_type, lang, embeddings_name, use_ELMo, max_s

# train a model with all available for a given dataset
def train(dataset_type='conll2003', lang='en', embeddings_name=None, architecture='BidLSTM_CRF',
transformer=None, data_path=None, use_ELMo=False, max_sequence_length=-1, batch_size=-1, patience=-1):
transformer=None, data_path=None, use_ELMo=False, max_sequence_length=-1, batch_size=-1, patience=-1, learning_rate=None):

batch_size, max_sequence_length, patience, recurrent_dropout, early_stop, max_epoch, embeddings_name, word_lstm_units, multiprocessing = \
configure(architecture, dataset_type, lang, embeddings_name, use_ELMo, max_sequence_length, batch_size, patience)
Expand Down Expand Up @@ -102,7 +102,9 @@ def train(dataset_type='conll2003', lang='en', embeddings_name=None, architectur
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)

elif (dataset_type == 'conll2012') and (lang == 'en'):
print('Loading Ontonotes 5.0 CoNLL-2012 NER data...')

Expand Down Expand Up @@ -134,7 +136,8 @@ def train(dataset_type='conll2003', lang='en', embeddings_name=None, architectur
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
elif (lang == 'fr'):
print('Loading data...')
dataset_type = 'lemonde'
Expand All @@ -159,7 +162,8 @@ def train(dataset_type='conll2003', lang='en', embeddings_name=None, architectur
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
print("dataset/language combination is not supported:", dataset_type, lang)
return
Expand Down Expand Up @@ -190,7 +194,8 @@ def train_eval(embeddings_name=None,
use_ELMo=False,
patience=-1,
batch_size=-1,
max_sequence_length=-1):
max_sequence_length=-1,
learning_rate=None):

batch_size, max_sequence_length, patience, recurrent_dropout, early_stop, max_epoch, embeddings_name, word_lstm_units, multiprocessing = \
configure(architecture, dataset_type, lang, embeddings_name, use_ELMo,
Expand Down Expand Up @@ -222,7 +227,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
# also use validation set to train (no early stop, hyperparmeters must be set preliminarly),
# as (Chui & Nochols, 2016) and (Peters and al., 2017)
Expand All @@ -240,7 +246,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)

elif (dataset_type == 'ontonotes-all') and (lang == 'en'):
print("Loading all Ontonotes 5.0 XML data, evaluation will be on 10\% random partition")
Expand All @@ -266,7 +273,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)

elif (dataset_type == 'conll2012') and (lang == 'en'):
print('Loading Ontonotes 5.0 CoNLL-2012 NER data...')
Expand Down Expand Up @@ -294,7 +302,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
# also use validation set to train (no early stop, hyperparameters must be set preliminarly),
# as (Chui & Nochols, 2016) and (Peters and al., 2017)
Expand All @@ -312,7 +321,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)

elif (lang == 'fr') and (dataset_type == 'ftb' or dataset_type is None):
print('Loading data for ftb...')
Expand All @@ -339,7 +349,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
elif (lang == 'fr') and (dataset_type == 'ftb_force_split'):
print('Loading data for ftb_force_split...')
x_train, y_train = load_data_and_labels_conll('data/sequenceLabelling/leMonde/ftb6_train.conll')
Expand Down Expand Up @@ -367,7 +378,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
# also use validation set to train (no early stop, hyperparmeters must be set preliminarly),
# as (Chui & Nochols, 2016) and (Peters and al., 2017)
Expand All @@ -385,7 +397,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
elif (lang == 'fr') and (dataset_type == 'ftb_force_split_xml'):
print('Loading data for ftb_force_split_xml...')
x_train, y_train = load_data_and_labels_lemonde('data/sequenceLabelling/leMonde/ftb6_ALL.EN.docs.relinked.train.xml')
Expand Down Expand Up @@ -413,7 +426,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
# also use validation set to train (no early stop, hyperparmeters must be set preliminarly),
# as (Chui & Nochols, 2016) and (Peters and al., 2017)
Expand All @@ -431,7 +445,8 @@ def train_eval(embeddings_name=None,
patience=patience,
max_sequence_length=max_sequence_length,
use_ELMo=use_ELMo,
multiprocessing=multiprocessing)
multiprocessing=multiprocessing,
learning_rate=learning_rate)
else:
print("dataset/language combination is not supported:", dataset_type, lang)
return
Expand Down Expand Up @@ -597,6 +612,7 @@ def annotate(output_format,
parser.add_argument("--batch-size", type=int, default=-1, help="batch-size parameter to be used.")
parser.add_argument("--patience", type=int, default=-1, help="patience, number of extra epochs to perform after "
"the best epoch before stopping a training.")
parser.add_argument("--learning-rate", type=float, default=None, help="Initial learning rate")

args = parser.parse_args()

Expand All @@ -617,6 +633,7 @@ def annotate(output_format,
patience = args.patience
max_sequence_length = args.max_sequence_length
batch_size = args.batch_size
learning_rate = args.learning_rate

# name of embeddings refers to the file delft/resources-registry.json
# be sure to use here the same name as in the registry ('glove-840B', 'fasttext-crawl', 'word2vec'),
Expand All @@ -635,7 +652,8 @@ def annotate(output_format,
use_ELMo=use_ELMo,
max_sequence_length=max_sequence_length,
batch_size=batch_size,
patience=patience
patience=patience,
learning_rate=learning_rate
)

if action == 'train_eval':
Expand All @@ -653,7 +671,8 @@ def annotate(output_format,
use_ELMo=use_ELMo,
max_sequence_length=max_sequence_length,
batch_size=batch_size,
patience=patience
patience=patience,
learning_rate=learning_rate
)

if action == 'eval':
Expand Down
Loading