Skip to content

Commit

Permalink
support log_interval for TS
Browse files Browse the repository at this point in the history
  • Loading branch information
changdazhou committed Oct 17, 2024
1 parent 9f23500 commit 913c800
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 42 deletions.
45 changes: 25 additions & 20 deletions paddlets/models/classify/dl/paddle_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,18 @@ class PaddleBaseClassifier(BaseClassifier):
_callback_container(CallbackContainer): Container holding a list of callbacks.
"""

def __init__(
self,
loss_fn: Callable[..., paddle.Tensor]=F.cross_entropy,
optimizer_fn: Callable[..., Optimizer]=paddle.optimizer.Adam,
optimizer_params: Dict[str, Any]=dict(learning_rate=1e-3),
eval_metrics: List[str]=[],
callbacks: List[Callback]=[],
batch_size: int=32,
max_epochs: int=10,
verbose: int=1,
patience: int=4,
seed: Optional[int]=None, ):
def __init__(self,
loss_fn: Callable[..., paddle.Tensor]=F.cross_entropy,
optimizer_fn: Callable[..., Optimizer]=paddle.optimizer.Adam,
optimizer_params: Dict[str, Any]=dict(learning_rate=1e-3),
eval_metrics: List[str]=[],
callbacks: List[Callback]=[],
batch_size: int=32,
max_epochs: int=10,
verbose: int=1,
patience: int=4,
seed: Optional[int]=None,
config: Dict[str, Any]=None):
super(PaddleBaseClassifier, self).__init__()
self._loss_fn = loss_fn
self._optimizer_fn = optimizer_fn
Expand All @@ -112,6 +112,7 @@ def __init__(
self._classes_ = []
self._n_class = 0
self.start_epoch = 1
self._config = config
if optimizer_params is not None and optimizer_params.get('start_epoch',
None):
self.start_epoch = int(optimizer_params.pop('start_epoch'))
Expand Down Expand Up @@ -146,7 +147,8 @@ def _check_params(self):
if not self._eval_metrics:
self._eval_metrics = ["acc"]

def _check_tsdatasets(self, tsdatasets: List[TSDataset],
def _check_tsdatasets(self,
tsdatasets: List[TSDataset],
labels: np.ndarray):
"""Ensure the robustness of input data (consistent feature order), at the same time,
check whether the data types are compatible. If not, the processing logic is as follows.
Expand Down Expand Up @@ -214,7 +216,8 @@ def _init_optimizer(self) -> Optimizer:

else:
return self._optimizer_fn(
**self._optimizer_params, parameters=self._network.parameters())
**self._optimizer_params,
parameters=self._network.parameters())

def _init_fit_dataloaders(
self,
Expand Down Expand Up @@ -259,8 +262,8 @@ def _init_fit_dataloaders(
valid_tsdatasets, valid_labels,
self._fit_params['input_lens'])
else:
valid_dataset = data_adapter.to_paddle_dataset(valid_tsdatasets,
valid_labels)
valid_dataset = data_adapter.to_paddle_dataset(
valid_tsdatasets, valid_labels)
valid_dataloader = data_adapter.to_paddle_dataloader(
valid_dataset, self._batch_size, shuffle=False)

Expand All @@ -282,8 +285,8 @@ def _init_predict_dataloader(
tsdatasets = [tsdatasets]
self._check_tsdatasets(tsdatasets, labels)
data_adapter = ClassifyDataAdapter()
dataset = data_adapter.to_paddle_dataset(tsdatasets, labels,
self._fit_params['input_lens'])
dataset = data_adapter.to_paddle_dataset(
tsdatasets, labels, self._fit_params['input_lens'])
dataloader = data_adapter.to_paddle_dataloader(
dataset, self._batch_size, shuffle=False)
return dataloader
Expand Down Expand Up @@ -324,7 +327,8 @@ def _init_callbacks(self) -> Tuple[History, CallbackContainer]:
early_stopping_metric = (self._metrics_names[-1]
if len(self._metrics_names) > 0 else None)
# Set callback functions, including history, early stopping, etc..
history, callbacks = History(self._verbose), [] # nqa
print(self._config)
history, callbacks = History(self._verbose, self._config), [] # nqa
callbacks.append(history)
if (early_stopping_metric is not None) and (self._patience > 0):
early_stopping = EarlyStopping(
Expand Down Expand Up @@ -420,7 +424,8 @@ def predict(
# np.save('probs',probs)
rng = check_random_state(self._seed)
return np.array([
self._classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))]
self._classes_[int(
rng.choice(np.flatnonzero(prob == prob.max())))]
for prob in probs
])

Expand Down
11 changes: 4 additions & 7 deletions paddlets/models/common/callbacks/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,14 +375,11 @@ def on_batch_end(self, batch: int, logs: Optional[Dict[str, Any]]=None):
max_mem_reserved_str = ""
max_mem_allocated_str = ""
if paddle.device.is_compiled_with_cuda() and utils.print_mem_info:
if paddle.device.cuda.max_memory_reserved() / (1024**2) < 1:
max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // 1024} KB"
max_mem_allocated_str = f", max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // 1024} KB"
else:
max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB"
max_mem_allocated_str = f", max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB"
max_mem_allocated_str = f", max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"

msg += f"{max_mem_reserved_str}{max_mem_allocated_str}"
total_time = int(time.time() - self._start_time)
msg += f" | {str(datetime.timedelta(seconds=total_time)) + 's':<6}"
logger.info(msg)
if batch % utils.log_interval == 0:
logger.info(msg)
13 changes: 10 additions & 3 deletions paddlets/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ def seq_len(self) -> int:
def epoch(self) -> int:
return self.dic.get('epoch')

@property
def log_interval(self) -> int:
return self.dic.get('log_interval', 1)

@property
def print_mem_info(self) -> bool:
return self.dic.get('print_mem_info', False)

@property
def model(self) -> Dict:
return self.dic.get('model', {}).copy()
Expand Down Expand Up @@ -157,9 +165,8 @@ def update_config_dict(
if learning_rate:
dic['model']['model_cfg']['optimizer_params'][
'learning_rate'] = learning_rate
dic['model']['model_cfg']['optimizer_params'][
'learning_rate'] = float(dic['model']['model_cfg']['optimizer_params'][
'learning_rate'])
dic['model']['model_cfg']['optimizer_params']['learning_rate'] = float(dic[
'model']['model_cfg']['optimizer_params']['learning_rate'])
if batch_size:
dic['batch_size'] = batch_size
if epoch:
Expand Down
20 changes: 15 additions & 5 deletions paddlets/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,19 @@
logger = Logger(__name__)
# Compatible with earlier versions
print_mem_info = True
log_interval = 1


def set_print_mem_info(flag: bool):
global print_mem_info
print_mem_info = flag


def set_log_interval(interval: int):
global log_interval
log_interval = interval


def check_model_fitted(model: Trainable, msg: str=None):
"""
check if model has fitted, Raise Exception if not fitted
Expand Down Expand Up @@ -182,7 +188,8 @@ def check_train_valid_continuity(train_data: TSDataset,
pd.to_timedelta(train_index.freq))
elif isinstance(train_index, pd.RangeIndex):
if isinstance(valid_index, pd.RangeIndex):
continuious = (valid_index[0] - train_index[-1] == train_index.step)
continuious = (
valid_index[0] - train_index[-1] == train_index.step)
else:
raise_log("Unsupport data index format")

Expand Down Expand Up @@ -317,7 +324,8 @@ def get_tsdataset_max_len(dataset: TSDataset) -> int:
return len(all_index)


def repr_results_to_tsdataset(reprs: np.array, dataset: TSDataset) -> TSDataset:
def repr_results_to_tsdataset(reprs: np.array,
dataset: TSDataset) -> TSDataset:
"""
Convert representation model output to a TSDataset
Expand Down Expand Up @@ -447,8 +455,9 @@ def build_ts_infer_input(tsdataset: TSDataset,
#build sample base on DataAdapter
data_adapter = DataAdapter()
if json_data['model_type'] == 'forecasting':
raise_if_not(tsdataset.get_target() is not None,
"The target of tsdataset can not be None for forecasting!")
raise_if_not(
tsdataset.get_target() is not None,
"The target of tsdataset can not be None for forecasting!")
size_keys = ['in_chunk_len', 'out_chunk_len', 'skip_chunk_len']
for key in size_keys:
raise_if_not(
Expand All @@ -471,7 +480,8 @@ def build_ts_infer_input(tsdataset: TSDataset,
raise_if_not(
key in json_data['size'],
f"The {key} in json_data['size'] can not be None for anomaly!")
dataset = data_adapter.to_sample_dataset(tsdataset, **json_data['size'])
dataset = data_adapter.to_sample_dataset(tsdataset,
**json_data['size'])
else:
raise_log(ValueError(f"Invalid model_type: {json_data['model_type']}"))

Expand Down
22 changes: 15 additions & 7 deletions tools/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from paddlets.utils import backtest
from paddlets.utils.utils import convert_and_remove_types
from paddlets.logger import Logger
from paddlets.utils.utils import set_print_mem_info, update_train_results
from paddlets.utils.utils import set_print_mem_info, set_log_interval, update_train_results
from export import export

logger = Logger(__name__)
Expand Down Expand Up @@ -45,7 +45,8 @@ def parse_args():
type=str,
default=None)
# Runntime params
parser.add_argument('--seq_len', help='input length in training.', type=int)
parser.add_argument(
'--seq_len', help='input length in training.', type=int)
parser.add_argument(
'--predict_len', help='output length in training.', type=int)
parser.add_argument('--epoch', help='Iterations in training.', type=int)
Expand All @@ -55,7 +56,10 @@ def parse_args():

# Other params
parser.add_argument(
'--seed', help='Set the random seed in training.', default=42, type=int)
'--seed',
help='Set the random seed in training.',
default=42,
type=int)
parser.add_argument(
'--opts', help='Update the key-value pairs of all options.', nargs='+')

Expand Down Expand Up @@ -84,6 +88,8 @@ def main(args):

print_mem_info = cfg.dic.get('print_mem_info', True)
set_print_mem_info(print_mem_info)
log_interval = cfg.dic.get('log_interval', 1)
set_log_interval(log_interval)
batch_size = cfg.batch_size
dataset = cfg.dataset
predict_len = cfg.predict_len
Expand Down Expand Up @@ -231,7 +237,8 @@ def main(args):
ts_train, ts_val, ts_test = get_dataset(dataset['name'], split,
seq_len, info_params)
else:
ts_train = get_dataset(dataset['name'], split, seq_len, info_params)
ts_train = get_dataset(dataset['name'], split, seq_len,
info_params)

if cfg.model['name'] in ['TimesNetModel', 'Nonstationary_Transformer'
] and args.device == 'xpu':
Expand All @@ -240,7 +247,8 @@ def main(args):
if cfg.model['name'] == 'PP-TS':
from paddlets.ensemble import WeightingEnsembleForecaster
estimators = []
for model_name, model_cfg in cfg.model['model_cfg']['Ensemble'].items():
for model_name, model_cfg in cfg.model['model_cfg']['Ensemble'].items(
):
model_cfg = Config(
model_cfg,
seq_len=seq_len,
Expand Down Expand Up @@ -311,8 +319,8 @@ def main(args):
if dataset['name'] != 'TSDataset':
ts_all = get_dataset(dataset['name'])
ts_all = time_feature_generator.fit_transform(ts_all)
ts_train._known_cov = ts_all._known_cov[split['train'][0]:split[
'train'][1]]
ts_train._known_cov = ts_all._known_cov[split['train'][0]:
split['train'][1]]
if ts_val is not None:
ts_val._known_cov = ts_all._known_cov[split['val'][
0] - seq_len:split['val'][1]]
Expand Down

0 comments on commit 913c800

Please sign in to comment.