-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig_args.py
160 lines (150 loc) · 9.04 KB
/
config_args.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python
# encoding: utf-8
'''
#-------------------------------------------------------------------#
# CONFIDENTIAL --- CUSTOM STUDIOS #
#-------------------------------------------------------------------#
# #
# @Project Name : xlnet_classification #
# #
# @File Name : config_args.py #
# #
# @Programmer : Jeffrey #
# #
# @Start Date : 2020/11/16 10:04 #
# #
# @Last Update : 2020/11/16 10:04 #
# #
#-------------------------------------------------------------------#
# Classes: #
# #
#-------------------------------------------------------------------#
'''
import argparse
import os
import torch
from transformers import TrainingArguments
def deal_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", default=None, type=str, required=True,
help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
parser.add_argument("--model_type", default=None, type=str, required=True,
help="Model type selected in the list: ")
parser.add_argument("--model_name", default=None, type=str, required=True,
help="Path to pre-trained model or shortcut name selected in the list")
parser.add_argument("--output_dir", default=None, type=str, required=True,
help="The output directory where the model predictions and checkpoints will be written.")
## Other parameters
parser.add_argument("--num_labels", default=5, type=int,
help="the number of the labels.")
parser.add_argument("--config_name", default="", type=str,
help="Pretrained config name or path if not the same as model_name")
parser.add_argument("--tokenizer_name", default="", type=str,
help="Pretrained tokenizer name or path if not the same as model_name")
parser.add_argument("--cache_dir", default="", type=str,
help="Where do you want to store the pre-trained models downloaded from s3")
parser.add_argument("--logging_dir", default="", type=str,
help="logg")
parser.add_argument("--max_seq_length", default=512, type=int,
help="The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded.")
parser.add_argument("--predict_all_checkpoints", action='store_true',
help="Predict all checkpoints starting with the same prefix as model_name ending and ending with step number.")
parser.add_argument("--task_name", default=None, type=str, required=True,
help="The name of the task to train selected in the list: ")
parser.add_argument("--per_gpu_train_batch_size", default=2, type=int,
help="Batch size per GPU/CPU for training.")
parser.add_argument("--per_gpu_eval_batch_size", default=2, type=int,
help="Batch size per GPU/CPU for evaluation.")
parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
help="Number of updates steps to accumulate before performing a backward/update pass.")
parser.add_argument("--learning_rate", default=5e-5, type=float,
help="The initial learning rate for Adam.")
parser.add_argument("--weight_decay", default=0.0, type=float,
help="Weight deay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-6, type=float,
help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float,
help="Max gradient norm.")
parser.add_argument("--num_train_epochs", default=5.0, type=float,
help="Total number of training epochs to perform.")
parser.add_argument("--max_steps", default=-1, type=int,
help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
parser.add_argument("--warmup_proportion", default=0.1, type=float,
help="Proportion of training to perform linear learning rate warmup for,E.g., 0.1 = 10% of training.")
parser.add_argument('--logging_steps', type=int, default=10,
help="Log every X updates steps.")
parser.add_argument('--save_strategy', type=str, default="epoch",
help="Save strategy, it can be epoch or steps.")
parser.add_argument('--save_steps', type=int, default=1000,
help="Save checkpoint every X updates steps.")
parser.add_argument("--eval_all_checkpoints", action='store_true',
help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number")
parser.add_argument("--no_cuda", action='store_true',
help="Avoid using CUDA when available")
parser.add_argument("--evaluate", action='store_true',
help="evaluate during the train")
parser.add_argument('--overwrite_output_dir', action='store_true',
help="Overwrite the content of the output directory")
parser.add_argument('--overwrite_cache', action='store_true',
help="Overwrite the cached training and evaluation sets")
parser.add_argument('--seed', type=int, default=42,
help="random seed for initialization")
parser.add_argument('--fp16', action='store_true',
help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit")
parser.add_argument('--fp16_opt_level', type=str, default='O1',
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html")
parser.add_argument("--local_rank", type=int, default=-1,
help="For distributed training: local_rank")
parser.add_argument('--server_ip', type=str, default='', help="For distant debugging.")
parser.add_argument('--server_port', type=str, default='', help="For distant debugging.")
args = parser.parse_args()
return args
def set_args_again(args):
"""
set args with training args.
:param args: the original args.
:return: args, training_args
"""
if not os.path.exists(args.output_dir):
os.mkdir(args.output_dir)
args.output_dir = args.output_dir + '{}'.format(args.model_type)
if not os.path.exists(args.output_dir):
os.mkdir(args.output_dir)
if os.path.exists(args.output_dir) and os.listdir(
args.output_dir) and not args.overwrite_output_dir:
raise ValueError(
"Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
args.output_dir))
# Setup distant debugging if needed
if args.server_ip and args.server_port:
# Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
import ptvsd
print("Waiting for debugger attach")
ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
ptvsd.wait_for_attach()
# Setup CUDA, GPU & distributed training
if args.local_rank == -1 or args.no_cuda:
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
args.n_gpu = torch.cuda.device_count()
else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
torch.distributed.init_process_group(backend='nccl')
args.n_gpu = 1
args.device = device
training_args = TrainingArguments(
output_dir=args.output_dir,
num_train_epochs=args.num_train_epochs,
per_device_train_batch_size=args.per_gpu_train_batch_size,
per_device_eval_batch_size=args.per_gpu_eval_batch_size,
warmup_steps=500,
weight_decay=0.01,
logging_dir=args.logging_dir,
logging_steps=args.logging_steps,
save_strategy=args.save_strategy,
save_steps=args.save_steps,
learning_rate=args.learning_rate,
)
return args, training_args