odyssey-coder / app.py
os-odyssey's picture
Create app.py
57160bf verified
raw
history blame
6.86 kB
# app.py
# Simple coding assistant with analyze-and-rewrite workflow
# Brand: odyssey
import os
import requests
import gradio as gr
# ===== DEFAULTS =====
DEFAULT_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
DEFAULT_MAX_NEW_TOKENS = 512
DEFAULT_TEMPERATURE = 0.2
DEFAULT_TOP_P = 0.95
DEFAULT_TOP_K = 50
HF_INFERENCE_URL = "https://api-inference.huggingface.co/models/{}"
# ===== Helper: call Hugging Face text-generation inference API =====
def hf_text_generation(model: str, prompt: str, hf_token: str, params: dict):
url = HF_INFERENCE_URL.format(model)
headers = {"Authorization": f"Bearer {hf_token}"}
payload = {"inputs": prompt, "parameters": params}
r = requests.post(url, headers=headers, json=payload, timeout=120)
r.raise_for_status()
data = r.json()
if isinstance(data, dict) and "error" in data:
raise RuntimeError(f"Hugging Face error: {data['error']}")
# Common response shapes:
# 1) [{"generated_text": "..."}]
# 2) [{"some_key": ...}] or string-like results
if isinstance(data, list) and len(data) > 0:
first = data[0]
if isinstance(first, dict) and "generated_text" in first:
return first["generated_text"]
return str(first)
return str(data)
# ===== Analysis & rewrite instruction prompt =====
ANALYSIS_INSTRUCTION = """
You are a senior code reviewer and refactorer.
Step 1: Analyze the following input (it can be source code or a prompt). Provide concise bullet points: bugs, edge-cases, security concerns, performance issues, missing pieces, and concrete suggestions.
Step 2: Produce an optimized, rewritten version of the code or a clarified prompt that is ready-to-run or ready-to-feed into a code generation model.
Return the output in two clearly marked sections:
===ANALYSIS===
- bullet points...
===REWRITTEN===
<optimized code or clarified prompt>
Return only these two sections and nothing else.
"""
def analyze_and_rewrite_via_api(model, user_input, hf_token, analysis_max_tokens=512, gen_max_tokens=512, temperature=0.15):
prompt = ANALYSIS_INSTRUCTION + "\n\nINPUT:\n" + user_input + "\n\n"
params = {
"max_new_tokens": analysis_max_tokens,
"temperature": temperature,
"top_p": DEFAULT_TOP_P,
"top_k": DEFAULT_TOP_K,
"return_full_text": False
}
raw = hf_text_generation(model, prompt, hf_token, params)
if "===ANALYSIS===" in raw and "===REWRITTEN===" in raw:
analysis = raw.split("===ANALYSIS===")[1].split("===REWRITTEN===")[0].strip()
rewritten = raw.split("===REWRITTEN===")[1].strip()
return analysis, rewritten, raw
# Fallback: return full raw as analysis if markers are missing
return raw, "", raw
def generate_code_via_api(model, prompt, hf_token, max_new_tokens=256, temperature=0.2, top_p=0.95, top_k=50):
params = {
"max_new_tokens": int(max_new_tokens),
"temperature": float(temperature),
"top_p": float(top_p),
"top_k": int(top_k),
"return_full_text": False
}
return hf_text_generation(model, prompt, hf_token, params)
# ===== Gradio UI =====
with gr.Blocks(title="odyssey β€” Coding Assistant & Rewriter") as demo:
gr.Markdown("## odyssey β€” Coding Assistant and Rewriter (Qwen Coder)")
gr.Markdown(
"This tool lets you customize model, token, temperature and output length. "
"Enable 'Analyze β†’ Rewrite' to have the model first analyze the input and then produce an optimized rewrite."
)
with gr.Row():
with gr.Column(scale=2):
user_input = gr.Textbox(
label="Input (code or prompt)",
lines=8,
placeholder="Paste the code you want reviewed/refactored or the prompt you want optimized..."
)
thinking_toggle = gr.Checkbox(label="Analyze β†’ Rewrite (thinking mode)", value=True)
model_input = gr.Textbox(label="Hugging Face model name", value=DEFAULT_MODEL)
use_remote = gr.Checkbox(label="Use Hugging Face Inference API (recommended)", value=True)
hf_token_input = gr.Textbox(
label="Hugging Face API token (leave empty to use HF_API_TOKEN secret)",
type="password"
)
max_tokens_input = gr.Slider(
minimum=64, maximum=2048, step=16,
label="max_new_tokens", value=DEFAULT_MAX_NEW_TOKENS
)
temp_input = gr.Slider(
minimum=0.0, maximum=1.0, step=0.01,
label="temperature", value=DEFAULT_TEMPERATURE
)
top_p_input = gr.Slider(minimum=0.1, maximum=1.0, step=0.05, label="top_p", value=DEFAULT_TOP_P)
top_k_input = gr.Slider(minimum=0, maximum=200, step=1, label="top_k", value=DEFAULT_TOP_K)
generate_btn = gr.Button("Run")
with gr.Column(scale=2):
analysis_out = gr.Textbox(label="Model Analysis", lines=8)
rewritten_out = gr.Textbox(label="Rewritten Code / Prompt", lines=12)
normal_out = gr.Textbox(label="Normal Generation Output", lines=12)
def on_generate(inp, thinking, model_name, use_remote_flag, token, max_new_toks, temp, top_p, top_k):
hf_token = token or os.environ.get("HF_API_TOKEN")
if use_remote_flag and not hf_token:
return "ERROR: Please provide a Hugging Face API token or set HF_API_TOKEN in Secrets.", "", ""
try:
if thinking:
analysis, rewritten, raw = analyze_and_rewrite_via_api(
model_name, inp, hf_token,
analysis_max_tokens=min(1024, max_new_toks // 2),
gen_max_tokens=max_new_toks,
temperature=temp
)
final_source = rewritten if rewritten.strip() else inp
normal_generation = generate_code_via_api(
model_name, final_source, hf_token,
max_new_tokens=max_new_toks,
temperature=temp,
top_p=top_p, top_k=top_k
)
return analysis, rewritten, normal_generation
else:
out = generate_code_via_api(model_name, inp, hf_token,
max_new_tokens=max_new_toks, temperature=temp,
top_p=top_p, top_k=top_k)
return "", "", out
except Exception as e:
return f"Model execution error: {str(e)}", "", ""
generate_btn.click(
on_generate,
inputs=[user_input, thinking_toggle, model_input, use_remote, hf_token_input, max_tokens_input, temp_input, top_p_input, top_k_input],
outputs=[analysis_out, rewritten_out, normal_out]
)
if __name__ == "__main__":
demo.launch()