broadfield-dev commited on
Commit
c2a4575
·
verified ·
1 Parent(s): 22406c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -124,8 +124,8 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
124
  f16_gguf_path = os.path.join(gguf_path, "model-f16.gguf")
125
  quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
126
  try:
127
- convert_command = ["python3", str(LLAMA_CPP_CONVERT_SCRIPT), model_path_or_id, "--outfile", f16_gguf_path, "--outtype", "f16"]
128
- process = subprocess.run(convert_command, check=True, capture_output=True, text=True)
129
  log_stream += f"Executing llama.cpp conversion script on '{model_path_or_id}'...\n{process.stdout}\n"
130
  if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
131
  quantize_map = {"q4_k_m": "Q4_K_M", "q5_k_m": "Q5_K_M", "q8_0": "Q8_0", "f16": "F16"}
@@ -135,8 +135,8 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
135
  os.rename(f16_gguf_path, quantized_gguf_path)
136
  else:
137
  log_stream += f"Quantizing FP16 GGUF to {target_quant_name}...\n"
138
- quantize_command = [str(LLAMA_CPP_QUANTIZE_SCRIPT), f16_gguf_path, quantized_gguf_path, target_quant_name]
139
- process = subprocess.run(quantize_command, check=True, capture_output=True, text=True)
140
  log_stream += f"{process.stdout}\n"
141
  if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
142
  os.remove(f16_gguf_path)
@@ -204,7 +204,7 @@ def run_amop_pipeline(model_id: str, pipeline_type: str, do_prune: bool, prune_p
204
  raise ValueError("Invalid pipeline type selected.")
205
  full_log += log
206
  yield {final_output: "Packaging & Uploading (4/5)", log_output: full_log}
207
- final_message, log = stage_5_package_and_upload(model_id, optimized_path, full_log, options)
208
  full_log += log
209
  yield {final_output: gr.update(value="SUCCESS", label="Status"), log_output: full_log, success_box: gr.Markdown(f"✅ **Success!** Model available: [{repo_id_for_link}](https://huggingface.co/{repo_id_for_link})", visible=True), run_button: gr.Button(interactive=True, value="Run Optimization Pipeline", variant="primary"), analyze_button: gr.Button(interactive=True, value="Analyze Model")}
210
  except Exception as e:
 
124
  f16_gguf_path = os.path.join(gguf_path, "model-f16.gguf")
125
  quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
126
  try:
127
+ convert_command = ["python3", "convert.py", model_path_or_id, "--outfile", f16_gguf_path, "--outtype", "f16"]
128
+ process = subprocess.run(convert_command, check=True, capture_output=True, text=True, cwd=str(LLAMA_CPP_DIR))
129
  log_stream += f"Executing llama.cpp conversion script on '{model_path_or_id}'...\n{process.stdout}\n"
130
  if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
131
  quantize_map = {"q4_k_m": "Q4_K_M", "q5_k_m": "Q5_K_M", "q8_0": "Q8_0", "f16": "F16"}
 
135
  os.rename(f16_gguf_path, quantized_gguf_path)
136
  else:
137
  log_stream += f"Quantizing FP16 GGUF to {target_quant_name}...\n"
138
+ quantize_command = ["./quantize", f16_gguf_path, quantized_gguf_path, target_quant_name]
139
+ process = subprocess.run(quantize_command, check=True, capture_output=True, text=True, cwd=str(LLAMA_CPP_DIR))
140
  log_stream += f"{process.stdout}\n"
141
  if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
142
  os.remove(f16_gguf_path)
 
204
  raise ValueError("Invalid pipeline type selected.")
205
  full_log += log
206
  yield {final_output: "Packaging & Uploading (4/5)", log_output: full_log}
207
+ final_message, log = stage_5_package_and_upload(model_id, optimized_model_path, full_log, options)
208
  full_log += log
209
  yield {final_output: gr.update(value="SUCCESS", label="Status"), log_output: full_log, success_box: gr.Markdown(f"✅ **Success!** Model available: [{repo_id_for_link}](https://huggingface.co/{repo_id_for_link})", visible=True), run_button: gr.Button(interactive=True, value="Run Optimization Pipeline", variant="primary"), analyze_button: gr.Button(interactive=True, value="Analyze Model")}
210
  except Exception as e: