import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from peft import PeftModel # Model names base_model_name = "unsloth/qwen2.5-coder-3b-instruct-bnb-4bit" lora_model_name = "MarioCap/OCodeR_500-Qwen-2.5-Code-3B" # Load tokenizer and base model tokenizer = AutoTokenizer.from_pretrained(base_model_name) base_model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map="auto", trust_remote_code=True) model = PeftModel.from_pretrained(base_model, lora_model_name) # Create pipeline pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # Chat/inference function def generate(prompt, max_new_tokens=200, temperature=0.7): response = pipe(prompt, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True) return response[0]['generated_text'] # Gradio UI with gr.Blocks() as demo: gr.Markdown("### 💡 Code Assistant - OCodeR + Qwen 2.5") with gr.Row(): prompt = gr.Textbox(label="Enter your coding prompt", placeholder="Write a Python function to reverse a string...") with gr.Row(): max_tokens = gr.Slider(50, 1024, value=200, step=50, label="Max New Tokens") temp = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature") with gr.Row(): output = gr.Textbox(label="Generated Output") with gr.Row(): submit = gr.Button("Generate") submit.click(fn=generate, inputs=[prompt, max_tokens, temp], outputs=output) demo.launch()