(APIServer pid=19) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=19) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 61, in wrapper (APIServer pid=19) return await main (APIServer pid=19) ^^^^^^^^^^ (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 1884, in run_server (APIServer pid=19) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 1902, in run_server_worker (APIServer pid=19) async with build_async_engine_client( (APIServer pid=19) ^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=19) File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__ (APIServer pid=19) return await anext(self.gen) (APIServer pid=19) ^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 180, in build_async_engine_client (APIServer pid=19) async with build_async_engine_client_from_engine_args( (APIServer pid=19) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=19) File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__ (APIServer pid=19) return await anext(self.gen) (APIServer pid=19) ^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 206, in build_async_engine_client_from_engine_args (APIServer pid=19) vllm_config = engine_args.create_engine_config(usage_context=usage_context) (APIServer pid=19) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/vllm/engine/arg_utils.py", line 1135, in create_engine_config (APIServer pid=19) self.speculative_config) = maybe_override_with_speculators( (APIServer pid=19) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/vllm/transformers_utils/config.py", line 501, in maybe_override_with_speculators (APIServer pid=19) config_dict, _ = PretrainedConfig.get_config_dict( (APIServer pid=19) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/transformers/configuration_utils.py", line 662, in get_config_dict (APIServer pid=19) config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs) (APIServer pid=19) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/transformers/configuration_utils.py", line 721, in _get_config_dict (APIServer pid=19) resolved_config_file = cached_file( (APIServer pid=19) ^^^^^^^^^^^^ (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/transformers/utils/hub.py", line 322, in cached_file (APIServer pid=19) file = cached_files(path_or_repo_id=path_or_repo_id, filenames=[filename], **kwargs) (APIServer pid=19) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=19) File "/usr/local/lib/python3.12/dist-packages/transformers/utils/hub.py", line 563, in cached_files (APIServer pid=19) raise OSError(f"There was a specific connection error when trying to load {path_or_repo_id}:\n{e}") from e (APIServer pid=19) OSError: There was a specific connection error when trying to load meta-llama/Meta-Llama-3.1-8B-Instruct: (APIServer pid=19) 401 Client Error: Unauthorized for url: https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/config.json (Request ID: Root=1-68f09e81-0e9d307178aa94ad289f4e6f;bd63fa06-0c90-4930-b0cd-41baebe71ecb) (APIServer pid=19) (APIServer pid=19) Invalid credentials in Authorization header