Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support log_interval for TS #535

Merged
merged 1 commit into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 22 additions & 19 deletions paddlets/models/classify/dl/paddle_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,18 @@ class PaddleBaseClassifier(BaseClassifier):
_callback_container(CallbackContainer): Container holding a list of callbacks.
"""

def __init__(
self,
loss_fn: Callable[..., paddle.Tensor]=F.cross_entropy,
optimizer_fn: Callable[..., Optimizer]=paddle.optimizer.Adam,
optimizer_params: Dict[str, Any]=dict(learning_rate=1e-3),
eval_metrics: List[str]=[],
callbacks: List[Callback]=[],
batch_size: int=32,
max_epochs: int=10,
verbose: int=1,
patience: int=4,
seed: Optional[int]=None, ):
def __init__(self,
loss_fn: Callable[..., paddle.Tensor]=F.cross_entropy,
optimizer_fn: Callable[..., Optimizer]=paddle.optimizer.Adam,
optimizer_params: Dict[str, Any]=dict(learning_rate=1e-3),
eval_metrics: List[str]=[],
callbacks: List[Callback]=[],
batch_size: int=32,
max_epochs: int=10,
verbose: int=1,
patience: int=4,
seed: Optional[int]=None,
config: Dict[str, Any]=None):
super(PaddleBaseClassifier, self).__init__()
self._loss_fn = loss_fn
self._optimizer_fn = optimizer_fn
Expand Down Expand Up @@ -146,7 +146,8 @@ def _check_params(self):
if not self._eval_metrics:
self._eval_metrics = ["acc"]

def _check_tsdatasets(self, tsdatasets: List[TSDataset],
def _check_tsdatasets(self,
tsdatasets: List[TSDataset],
labels: np.ndarray):
"""Ensure the robustness of input data (consistent feature order), at the same time,
check whether the data types are compatible. If not, the processing logic is as follows.
Expand Down Expand Up @@ -214,7 +215,8 @@ def _init_optimizer(self) -> Optimizer:

else:
return self._optimizer_fn(
**self._optimizer_params, parameters=self._network.parameters())
**self._optimizer_params,
parameters=self._network.parameters())

def _init_fit_dataloaders(
self,
Expand Down Expand Up @@ -259,8 +261,8 @@ def _init_fit_dataloaders(
valid_tsdatasets, valid_labels,
self._fit_params['input_lens'])
else:
valid_dataset = data_adapter.to_paddle_dataset(valid_tsdatasets,
valid_labels)
valid_dataset = data_adapter.to_paddle_dataset(
valid_tsdatasets, valid_labels)
valid_dataloader = data_adapter.to_paddle_dataloader(
valid_dataset, self._batch_size, shuffle=False)

Expand All @@ -282,8 +284,8 @@ def _init_predict_dataloader(
tsdatasets = [tsdatasets]
self._check_tsdatasets(tsdatasets, labels)
data_adapter = ClassifyDataAdapter()
dataset = data_adapter.to_paddle_dataset(tsdatasets, labels,
self._fit_params['input_lens'])
dataset = data_adapter.to_paddle_dataset(
tsdatasets, labels, self._fit_params['input_lens'])
dataloader = data_adapter.to_paddle_dataloader(
dataset, self._batch_size, shuffle=False)
return dataloader
Expand Down Expand Up @@ -420,7 +422,8 @@ def predict(
# np.save('probs',probs)
rng = check_random_state(self._seed)
return np.array([
self._classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))]
self._classes_[int(
rng.choice(np.flatnonzero(prob == prob.max())))]
for prob in probs
])

Expand Down
11 changes: 4 additions & 7 deletions paddlets/models/common/callbacks/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,14 +375,11 @@ def on_batch_end(self, batch: int, logs: Optional[Dict[str, Any]]=None):
max_mem_reserved_str = ""
max_mem_allocated_str = ""
if paddle.device.is_compiled_with_cuda() and utils.print_mem_info:
if paddle.device.cuda.max_memory_reserved() / (1024**2) < 1:
max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // 1024} KB"
max_mem_allocated_str = f", max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // 1024} KB"
else:
max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB"
max_mem_allocated_str = f", max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB"
max_mem_allocated_str = f", max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"

msg += f"{max_mem_reserved_str}{max_mem_allocated_str}"
total_time = int(time.time() - self._start_time)
msg += f" | {str(datetime.timedelta(seconds=total_time)) + 's':<6}"
logger.info(msg)
if batch % utils.log_interval == 0:
logger.info(msg)
13 changes: 10 additions & 3 deletions paddlets/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ def seq_len(self) -> int:
def epoch(self) -> int:
return self.dic.get('epoch')

@property
def log_interval(self) -> int:
return self.dic.get('log_interval', 1)

@property
def print_mem_info(self) -> bool:
return self.dic.get('print_mem_info', False)

@property
def model(self) -> Dict:
return self.dic.get('model', {}).copy()
Expand Down Expand Up @@ -157,9 +165,8 @@ def update_config_dict(
if learning_rate:
dic['model']['model_cfg']['optimizer_params'][
'learning_rate'] = learning_rate
dic['model']['model_cfg']['optimizer_params'][
'learning_rate'] = float(dic['model']['model_cfg']['optimizer_params'][
'learning_rate'])
dic['model']['model_cfg']['optimizer_params']['learning_rate'] = float(dic[
'model']['model_cfg']['optimizer_params']['learning_rate'])
if batch_size:
dic['batch_size'] = batch_size
if epoch:
Expand Down
20 changes: 15 additions & 5 deletions paddlets/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,19 @@
logger = Logger(__name__)
# Compatible with earlier versions
print_mem_info = True
log_interval = 1


def set_print_mem_info(flag: bool):
global print_mem_info
print_mem_info = flag


def set_log_interval(interval: int):
global log_interval
log_interval = interval


def check_model_fitted(model: Trainable, msg: str=None):
"""
check if model has fitted, Raise Exception if not fitted
Expand Down Expand Up @@ -182,7 +188,8 @@ def check_train_valid_continuity(train_data: TSDataset,
pd.to_timedelta(train_index.freq))
elif isinstance(train_index, pd.RangeIndex):
if isinstance(valid_index, pd.RangeIndex):
continuious = (valid_index[0] - train_index[-1] == train_index.step)
continuious = (
valid_index[0] - train_index[-1] == train_index.step)
else:
raise_log("Unsupport data index format")

Expand Down Expand Up @@ -317,7 +324,8 @@ def get_tsdataset_max_len(dataset: TSDataset) -> int:
return len(all_index)


def repr_results_to_tsdataset(reprs: np.array, dataset: TSDataset) -> TSDataset:
def repr_results_to_tsdataset(reprs: np.array,
dataset: TSDataset) -> TSDataset:
"""
Convert representation model output to a TSDataset
Expand Down Expand Up @@ -447,8 +455,9 @@ def build_ts_infer_input(tsdataset: TSDataset,
#build sample base on DataAdapter
data_adapter = DataAdapter()
if json_data['model_type'] == 'forecasting':
raise_if_not(tsdataset.get_target() is not None,
"The target of tsdataset can not be None for forecasting!")
raise_if_not(
tsdataset.get_target() is not None,
"The target of tsdataset can not be None for forecasting!")
size_keys = ['in_chunk_len', 'out_chunk_len', 'skip_chunk_len']
for key in size_keys:
raise_if_not(
Expand All @@ -471,7 +480,8 @@ def build_ts_infer_input(tsdataset: TSDataset,
raise_if_not(
key in json_data['size'],
f"The {key} in json_data['size'] can not be None for anomaly!")
dataset = data_adapter.to_sample_dataset(tsdataset, **json_data['size'])
dataset = data_adapter.to_sample_dataset(tsdataset,
**json_data['size'])
else:
raise_log(ValueError(f"Invalid model_type: {json_data['model_type']}"))

Expand Down
22 changes: 15 additions & 7 deletions tools/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from paddlets.utils import backtest
from paddlets.utils.utils import convert_and_remove_types
from paddlets.logger import Logger
from paddlets.utils.utils import set_print_mem_info, update_train_results
from paddlets.utils.utils import set_print_mem_info, set_log_interval, update_train_results
from export import export

logger = Logger(__name__)
Expand Down Expand Up @@ -45,7 +45,8 @@ def parse_args():
type=str,
default=None)
# Runntime params
parser.add_argument('--seq_len', help='input length in training.', type=int)
parser.add_argument(
'--seq_len', help='input length in training.', type=int)
parser.add_argument(
'--predict_len', help='output length in training.', type=int)
parser.add_argument('--epoch', help='Iterations in training.', type=int)
Expand All @@ -55,7 +56,10 @@ def parse_args():

# Other params
parser.add_argument(
'--seed', help='Set the random seed in training.', default=42, type=int)
'--seed',
help='Set the random seed in training.',
default=42,
type=int)
parser.add_argument(
'--opts', help='Update the key-value pairs of all options.', nargs='+')

Expand Down Expand Up @@ -84,6 +88,8 @@ def main(args):

print_mem_info = cfg.dic.get('print_mem_info', True)
set_print_mem_info(print_mem_info)
log_interval = cfg.dic.get('log_interval', 1)
set_log_interval(log_interval)
batch_size = cfg.batch_size
dataset = cfg.dataset
predict_len = cfg.predict_len
Expand Down Expand Up @@ -231,7 +237,8 @@ def main(args):
ts_train, ts_val, ts_test = get_dataset(dataset['name'], split,
seq_len, info_params)
else:
ts_train = get_dataset(dataset['name'], split, seq_len, info_params)
ts_train = get_dataset(dataset['name'], split, seq_len,
info_params)

if cfg.model['name'] in ['TimesNetModel', 'Nonstationary_Transformer'
] and args.device == 'xpu':
Expand All @@ -240,7 +247,8 @@ def main(args):
if cfg.model['name'] == 'PP-TS':
from paddlets.ensemble import WeightingEnsembleForecaster
estimators = []
for model_name, model_cfg in cfg.model['model_cfg']['Ensemble'].items():
for model_name, model_cfg in cfg.model['model_cfg']['Ensemble'].items(
):
model_cfg = Config(
model_cfg,
seq_len=seq_len,
Expand Down Expand Up @@ -311,8 +319,8 @@ def main(args):
if dataset['name'] != 'TSDataset':
ts_all = get_dataset(dataset['name'])
ts_all = time_feature_generator.fit_transform(ts_all)
ts_train._known_cov = ts_all._known_cov[split['train'][0]:split[
'train'][1]]
ts_train._known_cov = ts_all._known_cov[split['train'][0]:
split['train'][1]]
if ts_val is not None:
ts_val._known_cov = ts_all._known_cov[split['val'][
0] - seq_len:split['val'][1]]
Expand Down