Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The following error occurs when using the DEVISIGN dataset for training #41

Open
lanlanguai opened this issue Mar 2, 2023 · 0 comments

Comments

@lanlanguai
Copy link

lanlanguai commented Mar 2, 2023

In order to test the usability of the code, we used a small number of samples (about 20) for testing, and the following error occurred during the operation. Please provide a solution, thank you very much.

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Input In [3], in <cell line: 5>()
      3 model = ClassificationModel(cfg=cfg, trainer=trainer)
      4 model.init_from_checkpoint_if_available()
----> 5 model.fit()

File ~/OpenHands/openhands/apis/classification_model.py:108, in ClassificationModel.fit(self)
    104 def fit(self):
    105     """
    106     Method to be called to start the training.
    107     """
--> 108     self.trainer.fit(self, self.datamodule)

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:552, in Trainer.fit(self, model, train_dataloaders, val_dataloaders, datamodule, train_dataloader)
    546 self.data_connector.attach_data(
    547     model, train_dataloaders=train_dataloaders, val_dataloaders=val_dataloaders, datamodule=datamodule
    548 )
    550 self.checkpoint_connector.resume_start()
--> 552 self._run(model)
    554 assert self.state.stopped
    555 self.training = False

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:917, in Trainer._run(self, model)
    914 self.checkpoint_connector.restore_training_state()
    916 # dispatch `start_training` or `start_evaluating` or `start_predicting`
--> 917 self._dispatch()
    919 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
    920 self._post_dispatch()

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:985, in Trainer._dispatch(self)
    983     self.accelerator.start_predicting(self)
    984 else:
--> 985     self.accelerator.start_training(self)

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py:92, in Accelerator.start_training(self, trainer)
     91 def start_training(self, trainer: "pl.Trainer") -> None:
---> 92     self.training_type_plugin.start_training(trainer)

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py:161, in TrainingTypePlugin.start_training(self, trainer)
    159 def start_training(self, trainer: "pl.Trainer") -> None:
    160     # double dispatch to initiate the training loop
--> 161     self._results = trainer.run_stage()

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:995, in Trainer.run_stage(self)
    993 if self.predicting:
    994     return self._run_predict()
--> 995 return self._run_train()

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:1030, in Trainer._run_train(self)
   1027 if not self.is_global_zero and self.progress_bar_callback is not None:
   1028     self.progress_bar_callback.disable()
-> 1030 self._run_sanity_check(self.lightning_module)
   1032 # enable train mode
   1033 self.model.train()

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:1114, in Trainer._run_sanity_check(self, ref_model)
   1112 # run eval step
   1113 with torch.no_grad():
-> 1114     self._evaluation_loop.run()
   1116 self.on_sanity_check_end()
   1118 # reset validation metrics

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/loops/base.py:111, in Loop.run(self, *args, **kwargs)
    109 try:
    110     self.on_advance_start(*args, **kwargs)
--> 111     self.advance(*args, **kwargs)
    112     self.on_advance_end()
    113     self.iteration_count += 1

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py:109, in EvaluationLoop.advance(self, *args, **kwargs)
    106 dataloader_iter = enumerate(dataloader)
    107 dl_max_batches = self._max_batches[self.current_dataloader_idx]
--> 109 dl_outputs = self.epoch_loop.run(
    110     dataloader_iter, self.current_dataloader_idx, dl_max_batches, self.num_dataloaders
    111 )
    113 # store batch level output per dataloader
    114 self.outputs.append(dl_outputs)

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/loops/base.py:111, in Loop.run(self, *args, **kwargs)
    109 try:
    110     self.on_advance_start(*args, **kwargs)
--> 111     self.advance(*args, **kwargs)
    112     self.on_advance_end()
    113     self.iteration_count += 1

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py:111, in EvaluationEpochLoop.advance(self, dataloader_iter, dataloader_idx, dl_max_batches, num_dataloaders)
    109 # lightning module methods
    110 with self.trainer.profiler.profile("evaluation_step_and_end"):
--> 111     output = self.evaluation_step(batch, batch_idx, dataloader_idx)
    112     output = self.evaluation_step_end(output)
    114 self.batch_progress.increment_processed()

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py:158, in EvaluationEpochLoop.evaluation_step(self, batch, batch_idx, dataloader_idx)
    156     self.trainer.lightning_module._current_fx_name = "validation_step"
    157     with self.trainer.profiler.profile("validation_step"):
--> 158         output = self.trainer.accelerator.validation_step(step_kwargs)
    160 return output

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py:211, in Accelerator.validation_step(self, step_kwargs)
    199 """The actual validation step.
    200 
    201 Args:
   (...)
    208           (only if multiple val dataloaders used)
    209 """
    210 with self.precision_plugin.val_step_context(), self.training_type_plugin.val_step_context():
--> 211     return self.training_type_plugin.validation_step(*step_kwargs.values())

File ~/miniconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py:178, in TrainingTypePlugin.validation_step(self, *args, **kwargs)
    177 def validation_step(self, *args, **kwargs):
--> 178     return self.model.validation_step(*args, **kwargs)

File ~/OpenHands/openhands/apis/classification_model.py:43, in ClassificationModel.validation_step(self, batch, batch_idx)
     38 """
     39 Lightning calls this inside the training loop with the data from the validation dataloader
     40 passed in as `batch` and calculates the loss and the accuracy.
     41 """
     42 y_hat = self.model(batch["frames"])
---> 43 loss = self.loss(y_hat, batch["labels"])
     44 preds = F.softmax(y_hat, dim=-1)
     45 acc_top1 = self.accuracy_metric(preds, batch["labels"])

File ~/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py:889, in Module._call_impl(self, *input, **kwargs)
    887     result = self._slow_forward(*input, **kwargs)
    888 else:
--> 889     result = self.forward(*input, **kwargs)
    890 for hook in itertools.chain(
    891         _global_forward_hooks.values(),
    892         self._forward_hooks.values()):
    893     hook_result = hook(self, input, result)

File ~/miniconda3/lib/python3.8/site-packages/torch/nn/modules/loss.py:1047, in CrossEntropyLoss.forward(self, input, target)
   1045 def forward(self, input: Tensor, target: Tensor) -> Tensor:
   1046     assert self.weight is None or isinstance(self.weight, Tensor)
-> 1047     return F.cross_entropy(input, target, weight=self.weight,
   1048                            ignore_index=self.ignore_index, reduction=self.reduction)

File ~/miniconda3/lib/python3.8/site-packages/torch/nn/functional.py:2693, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2691 if size_average is not None or reduce is not None:
   2692     reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2693 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)

File ~/miniconda3/lib/python3.8/site-packages/torch/nn/functional.py:2388, in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   2384     raise ValueError(
   2385         "Expected input batch_size ({}) to match target batch_size ({}).".format(input.size(0), target.size(0))
   2386     )
   2387 if dim == 2:
-> 2388     ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
   2389 elif dim == 4:
   2390     ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

IndexError: Target 11 is out of bounds.

Looking forward to your reply, best wishes!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant