| | |
| | |
| | """ |
| | Launch vLLM OpenAI-compatible server for google/gemma-3n-E4B-it in venv. |
| | """ |
| | from dotenv import load_dotenv |
| | load_dotenv() |
| | import os |
| | import subprocess |
| | import sys |
| |
|
| | MODEL = "google/gemma-3n-E4B-it" |
| | PORT = os.environ.get("VLLM_PORT", "8000") |
| | HF_TOKEN = os.environ.get("HF_TOKEN") |
| |
|
| | if not HF_TOKEN: |
| | print("[ERROR] Please set the HF_TOKEN environment variable for model download.") |
| | sys.exit(1) |
| |
|
| | cmd = [ |
| | sys.executable, "-m", "vllm.entrypoints.openai.api_server", |
| | "--model", MODEL, |
| | "--port", PORT, |
| | "--host", "0.0.0.0", |
| | "--token", HF_TOKEN |
| | ] |
| |
|
| | print(f"[INFO] Launching vLLM server for {MODEL} on port {PORT}...") |
| | subprocess.run(cmd) |
| | |
| | """ |
| | Launch vLLM OpenAI-compatible server for google/gemma-3n-E4B-it in venv. |
| | """ |
| | from dotenv import load_dotenv |
| | load_dotenv() |
| | import os |
| | import subprocess |
| | import sys |
| |
|
| | MODEL = "google/gemma-3n-E4B-it" |
| | PORT = os.environ.get("VLLM_PORT", "8000") |
| | HF_TOKEN = os.environ.get("HF_TOKEN") |
| |
|
| | if not HF_TOKEN: |
| | print("[ERROR] Please set the HF_TOKEN environment variable for model download.") |
| | sys.exit(1) |
| |
|
| | cmd = [ |
| | sys.executable, "-m", "vllm.entrypoints.openai.api_server", |
| | "--model", MODEL, |
| | "--port", PORT, |
| | "--host", "0.0.0.0", |
| | "--token", HF_TOKEN |
| | ] |
| |
|
| | print(f"[INFO] Launching vLLM server for {MODEL} on port {PORT}...") |
| | subprocess.run(cmd) |
| |
|