broadfield-dev commited on
Commit
8a1a33a
·
verified ·
1 Parent(s): dc35602

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -21,13 +21,14 @@ if not HF_TOKEN:
21
  logging.warning("HF_TOKEN environment variable not set. Packaging and uploading will fail.")
22
 
23
  api = HfApi()
24
- # Use the /tmp directory which is always writable in a container environment
25
  OUTPUT_DIR = "/tmp/optimized_models"
26
  os.makedirs(OUTPUT_DIR, exist_ok=True)
27
 
28
- LLAMA_CPP_DIR = Path("llama.cpp")
 
 
 
29
  LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
30
- LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
31
 
32
  if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
33
  error_msg = "FATAL ERROR: llama.cpp binaries not found. The Docker build may have failed."
@@ -99,8 +100,9 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
99
  quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
100
  absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
101
  try:
102
- convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
103
- process = subprocess.run(convert_command, check=True, capture_output=True, text=True, cwd=str(LLAMA_CPP_DIR))
 
104
  log_stream += f"Executing llama.cpp conversion script...\n{process.stdout}\n"
105
  if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
106
  quantize_map = {"q4_k_m": "Q4_K_M", "q5_k_m": "Q5_K_M", "q8_0": "Q8_0", "f16": "F16"}
@@ -110,8 +112,8 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
110
  os.rename(f16_gguf_path, quantized_gguf_path)
111
  else:
112
  log_stream += f"Quantizing FP16 GGUF to {target_quant_name}...\n"
113
- quantize_command = ["./quantize", f16_gguf_path, quantized_gguf_path, target_quant_name]
114
- process = subprocess.run(quantize_command, check=True, capture_output=True, text=True, cwd=str(LLAMA_CPP_DIR))
115
  log_stream += f"{process.stdout}\n"
116
  if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
117
  os.remove(f16_gguf_path)
@@ -179,7 +181,7 @@ def run_amop_pipeline(model_id: str, pipeline_type: str, do_prune: bool, prune_p
179
  raise ValueError("Invalid pipeline type selected.")
180
  full_log += log
181
  yield {final_output: "Packaging & Uploading (4/5)", log_output: full_log}
182
- final_message, log = stage_5_package_and_upload(model_id, optimized_path, full_log, options)
183
  full_log += log
184
  yield {final_output: gr.update(value="SUCCESS", label="Status"), log_output: full_log, success_box: gr.Markdown(f"✅ **Success!** Model available: [{repo_id_for_link}](https://huggingface.co/{repo_id_for_link})", visible=True), run_button: gr.Button(interactive=True, value="Run Optimization Pipeline", variant="primary"), analyze_button: gr.Button(interactive=True, value="Analyze Model")}
185
  except Exception as e:
 
21
  logging.warning("HF_TOKEN environment variable not set. Packaging and uploading will fail.")
22
 
23
  api = HfApi()
 
24
  OUTPUT_DIR = "/tmp/optimized_models"
25
  os.makedirs(OUTPUT_DIR, exist_ok=True)
26
 
27
+ # Use an absolute path to the pre-built location in /opt
28
+ LLAMA_CPP_DIR = Path("/opt/llama.cpp")
29
+ # Binaries are in the 'build/bin' subdirectory from our out-of-source build
30
+ LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "build" / "bin" / "quantize"
31
  LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
 
32
 
33
  if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
34
  error_msg = "FATAL ERROR: llama.cpp binaries not found. The Docker build may have failed."
 
100
  quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
101
  absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
102
  try:
103
+ # The python script can be called directly using its absolute path.
104
+ convert_command = ["python3", str(LLAMA_CPP_CONVERT_SCRIPT), absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
105
+ process = subprocess.run(convert_command, check=True, capture_output=True, text=True)
106
  log_stream += f"Executing llama.cpp conversion script...\n{process.stdout}\n"
107
  if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
108
  quantize_map = {"q4_k_m": "Q4_K_M", "q5_k_m": "Q5_K_M", "q8_0": "Q8_0", "f16": "F16"}
 
112
  os.rename(f16_gguf_path, quantized_gguf_path)
113
  else:
114
  log_stream += f"Quantizing FP16 GGUF to {target_quant_name}...\n"
115
+ quantize_command = [str(LLAMA_CPP_QUANTIZE_SCRIPT), f16_gguf_path, quantized_gguf_path, target_quant_name]
116
+ process = subprocess.run(quantize_command, check=True, capture_output=True, text=True)
117
  log_stream += f"{process.stdout}\n"
118
  if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
119
  os.remove(f16_gguf_path)
 
181
  raise ValueError("Invalid pipeline type selected.")
182
  full_log += log
183
  yield {final_output: "Packaging & Uploading (4/5)", log_output: full_log}
184
+ final_message, log = stage_5_package_and_upload(model_id, optimized_model_path, full_log, options)
185
  full_log += log
186
  yield {final_output: gr.update(value="SUCCESS", label="Status"), log_output: full_log, success_box: gr.Markdown(f"✅ **Success!** Model available: [{repo_id_for_link}](https://huggingface.co/{repo_id_for_link})", visible=True), run_button: gr.Button(interactive=True, value="Run Optimization Pipeline", variant="primary"), analyze_button: gr.Button(interactive=True, value="Analyze Model")}
187
  except Exception as e: