Tim Luka Horstmann
commited on
Commit
·
c2b5e84
1
Parent(s):
f32e6b8
Fix
Browse files- .gitignore +0 -0
- Dockerfile +3 -1
.gitignore
ADDED
|
File without changes
|
Dockerfile
CHANGED
|
@@ -47,7 +47,9 @@ RUN pip install --no-cache-dir huggingface_hub
|
|
| 47 |
|
| 48 |
# 9) Clone llama.cpp for quantize tool
|
| 49 |
RUN git clone https://github.com/ggerganov/llama.cpp.git /tmp/llama.cpp && \
|
| 50 |
-
cd /tmp/llama.cpp &&
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# 10) Download & pre-quantize model (no runtime repack)
|
| 53 |
RUN python3 - <<EOF
|
|
|
|
| 47 |
|
| 48 |
# 9) Clone llama.cpp for quantize tool
|
| 49 |
RUN git clone https://github.com/ggerganov/llama.cpp.git /tmp/llama.cpp && \
|
| 50 |
+
cd /tmp/llama.cpp && \
|
| 51 |
+
cmake -B build -DLLAMA_BUILD_QUANTIZE=ON . && \
|
| 52 |
+
cmake --build build --target quantize
|
| 53 |
|
| 54 |
# 10) Download & pre-quantize model (no runtime repack)
|
| 55 |
RUN python3 - <<EOF
|