|
|
#!/bin/bash |
|
|
set -e |
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_URL="https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/blob/main/Phi-3-mini-4k-instruct-q4.gguf" |
|
|
MODEL_URL=${1:-$DEFAULT_URL} |
|
|
MODEL_DIR="model" |
|
|
ENGINE_DIR="engine" |
|
|
|
|
|
echo ">>> Setting up Ultra-Fast LLM..." |
|
|
|
|
|
|
|
|
if [ ! -d "$MODEL_DIR" ]; then |
|
|
mkdir -p "$MODEL_DIR" |
|
|
fi |
|
|
|
|
|
|
|
|
if [ ! -f "$MODEL_DIR/model.gguf" ]; then |
|
|
echo ">>> Downloading model from $MODEL_URL..." |
|
|
wget -O "$MODEL_DIR/model.gguf" "$MODEL_URL" --show-progress |
|
|
else |
|
|
echo ">>> Model already exists using existing..." |
|
|
fi |
|
|
|
|
|
|
|
|
if [ ! -f "$MODEL_DIR/template.txt" ]; then |
|
|
echo "<|user|>\n{{prompt}}<|end|>\n<|assistant|>" > "$MODEL_DIR/template.txt" |
|
|
echo ">>> Created default Phi-3 template." |
|
|
fi |
|
|
|
|
|
|
|
|
if [ ! -d ".venv" ]; then |
|
|
echo ">>> Creating Python virtual environment..." |
|
|
python3 -m venv .venv |
|
|
source .venv/bin/activate |
|
|
pip install -r requirements.txt |
|
|
else |
|
|
source .venv/bin/activate |
|
|
fi |
|
|
|
|
|
|
|
|
echo ">>> Compiling C++ Optimization Engine..." |
|
|
if [ -d "$ENGINE_DIR" ]; then |
|
|
cd "$ENGINE_DIR" |
|
|
g++ -O2 -march=native -shared -fPIC -o libbatch.so batch_server.cpp \ |
|
|
-I"../.venv/lib/python3.12/site-packages/include" \ |
|
|
-L"../.venv/lib/python3.12/site-packages/llama_cpp/lib" \ |
|
|
-lllama -Wl,-rpath,"../.venv/lib/python3.12/site-packages/llama_cpp/lib" |
|
|
cd .. |
|
|
else |
|
|
echo "Error: engine directory not found!" |
|
|
exit 1 |
|
|
fi |
|
|
|
|
|
echo ">>> Setup Complete!" |
|
|
echo "Run server with: cd api && ../.venv/bin/uvicorn main:app --host 0.0.0.0 --port 8000" |
|
|
|