diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py index 7296776f9c5..0c794a00105 100644 --- a/paddlespeech/cli/asr/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import argparse +import io import os import sys import time @@ -229,6 +230,8 @@ def preprocess(self, model_type: str, input: Union[str, os.PathLike]): audio_file = input if isinstance(audio_file, (str, os.PathLike)): logger.debug("Preprocess audio_file:" + audio_file) + elif isinstance(audio_file, io.BytesIO): + audio_file.seek(0) # Get the object for feature extraction if "deepspeech2" in model_type or "conformer" in model_type or "transformer" in model_type: @@ -352,6 +355,8 @@ def _check(self, audio_file: str, sample_rate: int, force_yes: bool=False): if not os.path.isfile(audio_file): logger.error("Please input the right audio file path") return False + elif isinstance(audio_file, io.BytesIO): + audio_file.seek(0) logger.debug("checking the audio file format......") try: