# app.py # Simple coding assistant with analyze-and-rewrite workflow # Brand: odyssey import os import requests import gradio as gr # ===== DEFAULTS ===== DEFAULT_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct" DEFAULT_MAX_NEW_TOKENS = 512 DEFAULT_TEMPERATURE = 0.2 DEFAULT_TOP_P = 0.95 DEFAULT_TOP_K = 50 HF_INFERENCE_URL = "https://api-inference.huggingface.co/models/{}" # ===== Helper: call Hugging Face text-generation inference API ===== def hf_text_generation(model: str, prompt: str, hf_token: str, params: dict): url = HF_INFERENCE_URL.format(model) headers = {"Authorization": f"Bearer {hf_token}"} payload = {"inputs": prompt, "parameters": params} r = requests.post(url, headers=headers, json=payload, timeout=120) r.raise_for_status() data = r.json() if isinstance(data, dict) and "error" in data: raise RuntimeError(f"Hugging Face error: {data['error']}") # Common response shapes: # 1) [{"generated_text": "..."}] # 2) [{"some_key": ...}] or string-like results if isinstance(data, list) and len(data) > 0: first = data[0] if isinstance(first, dict) and "generated_text" in first: return first["generated_text"] return str(first) return str(data) # ===== Analysis & rewrite instruction prompt ===== ANALYSIS_INSTRUCTION = """ You are a senior code reviewer and refactorer. Step 1: Analyze the following input (it can be source code or a prompt). Provide concise bullet points: bugs, edge-cases, security concerns, performance issues, missing pieces, and concrete suggestions. Step 2: Produce an optimized, rewritten version of the code or a clarified prompt that is ready-to-run or ready-to-feed into a code generation model. Return the output in two clearly marked sections: ===ANALYSIS=== - bullet points... ===REWRITTEN=== Return only these two sections and nothing else. """ def analyze_and_rewrite_via_api(model, user_input, hf_token, analysis_max_tokens=512, gen_max_tokens=512, temperature=0.15): prompt = ANALYSIS_INSTRUCTION + "\n\nINPUT:\n" + user_input + "\n\n" params = { "max_new_tokens": analysis_max_tokens, "temperature": temperature, "top_p": DEFAULT_TOP_P, "top_k": DEFAULT_TOP_K, "return_full_text": False } raw = hf_text_generation(model, prompt, hf_token, params) if "===ANALYSIS===" in raw and "===REWRITTEN===" in raw: analysis = raw.split("===ANALYSIS===")[1].split("===REWRITTEN===")[0].strip() rewritten = raw.split("===REWRITTEN===")[1].strip() return analysis, rewritten, raw # Fallback: return full raw as analysis if markers are missing return raw, "", raw def generate_code_via_api(model, prompt, hf_token, max_new_tokens=256, temperature=0.2, top_p=0.95, top_k=50): params = { "max_new_tokens": int(max_new_tokens), "temperature": float(temperature), "top_p": float(top_p), "top_k": int(top_k), "return_full_text": False } return hf_text_generation(model, prompt, hf_token, params) # ===== Gradio UI ===== with gr.Blocks(title="odyssey — Coding Assistant & Rewriter") as demo: gr.Markdown("## odyssey — Coding Assistant and Rewriter (Qwen Coder)") gr.Markdown( "This tool lets you customize model, token, temperature and output length. " "Enable 'Analyze → Rewrite' to have the model first analyze the input and then produce an optimized rewrite." ) with gr.Row(): with gr.Column(scale=2): user_input = gr.Textbox( label="Input (code or prompt)", lines=8, placeholder="Paste the code you want reviewed/refactored or the prompt you want optimized..." ) thinking_toggle = gr.Checkbox(label="Analyze → Rewrite (thinking mode)", value=True) model_input = gr.Textbox(label="Hugging Face model name", value=DEFAULT_MODEL) use_remote = gr.Checkbox(label="Use Hugging Face Inference API (recommended)", value=True) hf_token_input = gr.Textbox( label="Hugging Face API token (leave empty to use HF_API_TOKEN secret)", type="password" ) max_tokens_input = gr.Slider( minimum=64, maximum=2048, step=16, label="max_new_tokens", value=DEFAULT_MAX_NEW_TOKENS ) temp_input = gr.Slider( minimum=0.0, maximum=1.0, step=0.01, label="temperature", value=DEFAULT_TEMPERATURE ) top_p_input = gr.Slider(minimum=0.1, maximum=1.0, step=0.05, label="top_p", value=DEFAULT_TOP_P) top_k_input = gr.Slider(minimum=0, maximum=200, step=1, label="top_k", value=DEFAULT_TOP_K) generate_btn = gr.Button("Run") with gr.Column(scale=2): analysis_out = gr.Textbox(label="Model Analysis", lines=8) rewritten_out = gr.Textbox(label="Rewritten Code / Prompt", lines=12) normal_out = gr.Textbox(label="Normal Generation Output", lines=12) def on_generate(inp, thinking, model_name, use_remote_flag, token, max_new_toks, temp, top_p, top_k): hf_token = token or os.environ.get("HF_API_TOKEN") if use_remote_flag and not hf_token: return "ERROR: Please provide a Hugging Face API token or set HF_API_TOKEN in Secrets.", "", "" try: if thinking: analysis, rewritten, raw = analyze_and_rewrite_via_api( model_name, inp, hf_token, analysis_max_tokens=min(1024, max_new_toks // 2), gen_max_tokens=max_new_toks, temperature=temp ) final_source = rewritten if rewritten.strip() else inp normal_generation = generate_code_via_api( model_name, final_source, hf_token, max_new_tokens=max_new_toks, temperature=temp, top_p=top_p, top_k=top_k ) return analysis, rewritten, normal_generation else: out = generate_code_via_api(model_name, inp, hf_token, max_new_tokens=max_new_toks, temperature=temp, top_p=top_p, top_k=top_k) return "", "", out except Exception as e: return f"Model execution error: {str(e)}", "", "" generate_btn.click( on_generate, inputs=[user_input, thinking_toggle, model_input, use_remote, hf_token_input, max_tokens_input, temp_input, top_p_input, top_k_input], outputs=[analysis_out, rewritten_out, normal_out] ) if __name__ == "__main__": demo.launch()