Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,10 +27,10 @@ os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
| 27 |
|
| 28 |
# --- LLAMA.CPP SETUP ---
|
| 29 |
LLAMA_CPP_DIR = Path("llama.cpp")
|
| 30 |
-
|
| 31 |
-
|
|
|
|
| 32 |
|
| 33 |
-
## FIXED FUNCTION: Replaced 'make' with 'cmake' for the build process.
|
| 34 |
def setup_llama_cpp():
|
| 35 |
"""Clones and builds llama.cpp if not already present."""
|
| 36 |
if not LLAMA_CPP_DIR.exists():
|
|
@@ -43,22 +43,11 @@ def setup_llama_cpp():
|
|
| 43 |
logging.error(error_msg, exc_info=True)
|
| 44 |
raise RuntimeError(error_msg)
|
| 45 |
|
| 46 |
-
# If the binary doesn't exist, try to build it with CMake.
|
| 47 |
if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
|
| 48 |
logging.info("llama.cpp 'quantize' binary not found. Building with CMake...")
|
| 49 |
try:
|
| 50 |
-
|
| 51 |
-
subprocess.run(
|
| 52 |
-
["cmake", "."],
|
| 53 |
-
cwd=str(LLAMA_CPP_DIR), # Run command inside the llama.cpp directory
|
| 54 |
-
check=True, capture_output=True, text=True
|
| 55 |
-
)
|
| 56 |
-
# Step 2: Build the 'quantize' target
|
| 57 |
-
subprocess.run(
|
| 58 |
-
["cmake", "--build", ".", "--target", "quantize"],
|
| 59 |
-
cwd=str(LLAMA_CPP_DIR),
|
| 60 |
-
check=True, capture_output=True, text=True
|
| 61 |
-
)
|
| 62 |
logging.info("'quantize' binary built successfully with CMake.")
|
| 63 |
except subprocess.CalledProcessError as e:
|
| 64 |
error_msg = f"Failed to build llama.cpp with CMake. Error: {e.stderr}"
|
|
@@ -144,7 +133,7 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
|
|
| 144 |
quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
|
| 145 |
|
| 146 |
try:
|
| 147 |
-
log_stream += f"Executing llama.cpp
|
| 148 |
convert_command = ["python3", str(LLAMA_CPP_CONVERT_SCRIPT), model_path_or_id, "--outfile", f16_gguf_path, "--outtype", "f16"]
|
| 149 |
process = subprocess.run(convert_command, check=True, capture_output=True, text=True)
|
| 150 |
log_stream += process.stdout
|
|
|
|
| 27 |
|
| 28 |
# --- LLAMA.CPP SETUP ---
|
| 29 |
LLAMA_CPP_DIR = Path("llama.cpp")
|
| 30 |
+
## FIX: The conversion script was renamed in the llama.cpp repository.
|
| 31 |
+
LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert-hf-to-gguf.py"
|
| 32 |
+
LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
|
| 33 |
|
|
|
|
| 34 |
def setup_llama_cpp():
|
| 35 |
"""Clones and builds llama.cpp if not already present."""
|
| 36 |
if not LLAMA_CPP_DIR.exists():
|
|
|
|
| 43 |
logging.error(error_msg, exc_info=True)
|
| 44 |
raise RuntimeError(error_msg)
|
| 45 |
|
|
|
|
| 46 |
if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
|
| 47 |
logging.info("llama.cpp 'quantize' binary not found. Building with CMake...")
|
| 48 |
try:
|
| 49 |
+
subprocess.run(["cmake", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
|
| 50 |
+
subprocess.run(["cmake", "--build", ".", "--target", "quantize"], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
logging.info("'quantize' binary built successfully with CMake.")
|
| 52 |
except subprocess.CalledProcessError as e:
|
| 53 |
error_msg = f"Failed to build llama.cpp with CMake. Error: {e.stderr}"
|
|
|
|
| 133 |
quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
|
| 134 |
|
| 135 |
try:
|
| 136 |
+
log_stream += f"Executing llama.cpp conversion script on '{model_path_or_id}'...\n"
|
| 137 |
convert_command = ["python3", str(LLAMA_CPP_CONVERT_SCRIPT), model_path_or_id, "--outfile", f16_gguf_path, "--outtype", "f16"]
|
| 138 |
process = subprocess.run(convert_command, check=True, capture_output=True, text=True)
|
| 139 |
log_stream += process.stdout
|