openai/whisper-large-v3 · Error when running a model inference

agonben23

Nov 9, 2023

When executing the following code, an error appears that prevents using the model

from transformers import pipeline
import torch

pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")

audio_file_path = "/kaggle/input/audios3/6d1a6405e10d3a882536bd8363ace064e6cd80236e49d7e8a574c392ecc3ae05.wav"

with open(audio_file_path, "rb") as audio_file:
audio_content = audio_file.read()

transcription = pipe(audio_content, lang="es")

print(transcription)

The complete traceback is specified below :

ValueError Traceback (most recent call last)
Cell In[1], line 4
1 from transformers import pipeline
2 import torch
----> 4 pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
6 audio_file_path = "/kaggle/input/audios3/6d1a6405e10d3a882536bd8363ace064e6cd80236e49d7e8a574c392ecc3ae05.wav"
8 with open(audio_file_path, "rb") as audio_file:

File /opt/conda/lib/python3.10/site-packages/transformers/pipelines/init.py:921, in pipeline(task, model, config, tokenizer, feature_extractor, image_processor, framework, revision, use_fast, token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)
918 tokenizer_kwargs = model_kwargs.copy()
919 tokenizer_kwargs.pop("torch_dtype", None)
--> 921 tokenizer = AutoTokenizer.from_pretrained(
922 tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
923 )
925 if load_image_processor:
926 # Try to infer image processor from model or config name (if provided as str)
927 if image_processor is None:

File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:736, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
732 if tokenizer_class is None:
733 raise ValueError(
734 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
735 )
--> 736 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
738 # Otherwise we have to be creative.
739 # if model is an encoder decoder, the encoder tokenizer class is used by default
740 if isinstance(config, EncoderDecoderConfig):

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1854, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
1851 else:
1852 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1854 return cls._from_pretrained(
1855 resolved_vocab_files,
1856 pretrained_model_name_or_path,
1857 init_configuration,
1858 *init_inputs,
1859 token=token,
1860 cache_dir=cache_dir,
1861 local_files_only=local_files_only,
1862 _commit_hash=commit_hash,
1863 _is_local=is_local,
1864 **kwargs,
1865 )

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1886, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
1884 has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
1885 if (from_slow or not has_tokenizer_file) and cls.slow_tokenizer_class is not None:
-> 1886 slow_tokenizer = (cls.slow_tokenizer_class)._from_pretrained(
1887 copy.deepcopy(resolved_vocab_files),
1888 pretrained_model_name_or_path,
1889 copy.deepcopy(init_configuration),
1890 *init_inputs,
1891 token=token,
1892 cache_dir=cache_dir,
1893 local_files_only=local_files_only,
1894 _commit_hash=_commit_hash,
1895 **(copy.deepcopy(kwargs)),
1896 )
1897 else:
1898 slow_tokenizer = None

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2073, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
2066 raise ValueError(
2067 f"Wrong index found for {token}: should be {tokenizer.convert_tokens_to_ids(token)} but found "
2068 f"{index}."
2069 )
2070 elif not has_tokenizer_file and index != current_index:
2071 # Tokenizer slow: added token cannot already be in the vocabulary so its index needs to be the
2072 # current length of the tokenizer.
-> 2073 raise ValueError(
2074 f"Non-consecutive added token '{token}' found. "
2075 f"Should have index {current_index} but has index {index} in saved vocabulary."
2076 )
2078 is_special = bool(token in special_tokens)
2079 if is_last_special is None or is_last_special == is_special:

ValueError: Non-consecutive added token '<|0.02|>' found. Should have index 50365 but has index 50366 in saved vocabulary.

Usman8433

Nov 16, 2023

I am also getting the same error.

sanchit-gandhi

Nov 16, 2023

Hey @agonben23 and @Usman8433 ! Could you both confirm that you're running the latest version of the transformers package? i.e. the version you get with:

pip install -U transformers

I am not able to reproduce the error with the latest version of the package. If it still persists after update, could you post the output of the following command please:

transformers-cli env

matthew2023

Nov 28, 2023

Hey @agonben23 and @Usman8433 ! Could you both confirm that you're running the latest version of the transformers package? i.e. the version you get with:
pip install -U transformers
I am not able to reproduce the error with the latest version of the package. If it still persists after update, could you post the output of the following command please:
transformers-cli env

transformers version: 4.36.0.dev0
Platform: Windows-10-10.0.22631-SP0
Python version: 3.11.5
Huggingface_hub version: 0.19.4
Safetensors version: 0.4.0
Accelerate version: 0.24.1
Accelerate config: not found
PyTorch version (GPU?): 2.1.1 (True)
Tensorflow version (GPU?): not installed (NA)
Flax version (CPU?/GPU?/TPU?): not installed (NA)
Jax version: not installed
JaxLib version: not installed
Using GPU in script?:
Using distributed or parallel set-up in script?:

matthew2023

Nov 28, 2023

Traceback (most recent call last):
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\urllib3\connectionpool.py", line 715, in urlopen
httplib_response = self._make_request(
^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\urllib3\connectionpool.py", line 404, in _make_request
self._validate_conn(conn)
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\urllib3\connectionpool.py", line 1058, in validate_conn
conn.connect()
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\urllib3\connection.py", line 419, in connect
self.sock = ssl_wrap_socket(
^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\urllib3\util\ssl.py", line 449, in ssl_wrap_socket
ssl_sock = ssl_wrap_socket_impl(
^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\urllib3\util\ssl.py", line 493, in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\ssl.py", line 517, in wrap_socket
return self.sslsocket_class._create(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\ssl.py", line 1108, in _create
self.do_handshake()
File "C:\Users\l't's.conda\envs\py311\Lib\ssl.py", line 1379, in do_handshake
self._sslobj.do_handshake()
ssl.SSLEOFError: [SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1006)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\requests\adapters.py", line 486, in send
resp = conn.urlopen(
^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\urllib3\connectionpool.py", line 799, in urlopen
retries = retries.increment(
^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\urllib3\util\retry.py", line 592, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /openai/whisper-large-v3/resolve/main/config.json (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1006)')))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "C:\Users\l't's\code\whisper\fast_tr.py", line 12, in
model = AutoModelForSpeechSeq2Seq.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\transformers\models\auto\auto_factory.py", line 488, in from_pretrained
resolved_config_file = cached_file(
^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\transformers\utils\hub.py", line 389, in cached_file
resolved_file = hf_hub_download(
^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\huggingface_hub\utils_validators.py", line 118, in _inner_fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\huggingface_hub\file_download.py", line 1247, in hf_hub_download
metadata = get_hf_file_metadata(
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\huggingface_hub\utils_validators.py", line 118, in _inner_fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\huggingface_hub\file_download.py", line 1624, in get_hf_file_metadata
r = _request_wrapper(
^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\huggingface_hub\file_download.py", line 402, in _request_wrapper
response = _request_wrapper(
^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\huggingface_hub\file_download.py", line 425, in _request_wrapper
response = get_session().request(method=method, url=url, **params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\requests\sessions.py", line 589, in request
resp = self.send(prep, **send_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\requests\sessions.py", line 703, in send
r = adapter.send(request, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\huggingface_hub\utils_http.py", line 63, in send
return super().send(request, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\l't's.conda\envs\py311\Lib\site-packages\requests\adapters.py", line 517, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /openai/whisper-large-v3/resolve/main/config.json (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1006)')))"), '(Request ID: 2871eed3-0659-4e0a-93ba-2c8d16c599af)')

Vladmir1235432

May 15, 2024

Try this:

import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset
import pyaudio
import ffmpeg
import soundfile as sf
import shutup; shutup.please()

device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_id = "openai/whisper-large-v3"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=30,
batch_size=16,
return_timestamps=True,
torch_dtype=torch_dtype,
device=device,
)

#You should not transfer the audio file directly (result = pipe("your_file.wav") ). An error may pop up --> ffmpeg was not found but is required to load audio files from filename
data, samplerate = sf.read('your_file.wav')
result = pipe(data)
print(result["text"])