ansphare / app.py
marttinbell's picture
Update app.py
da07111 verified
# app.py
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
import torch
import gradio as gr
# Model name from Hugging Face
MODEL_NAME = "TheBloke/Mistral-7B-v0.1-AWQ"
# Load the model
print("πŸš€ Loading Mistral 7B v0.1 AWQ model...")
model = AutoAWQForCausalLM.from_quantized(
MODEL_NAME,
fuse_layers=True,
trust_remote_code=False,
safetensors=True
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=False)
print("βœ… Model loaded successfully!")
# Text generation function
def generate_text(prompt, temperature, max_tokens):
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Clean the output (remove the original prompt from response)
if prompt in response:
response = response[len(prompt):].strip()
return response
# Gradio Interface
interface = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(lines=3, placeholder="Ask Mistral something...", label="Prompt"),
gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(50, 1024, value=512, step=10, label="Max Tokens")
],
outputs=gr.Textbox(lines=10, label="Response"),
title="🧠 Mistral 7B v0.1 AWQ",
description="Run the quantized Mistral 7B v0.1 model locally or on Google Colab using Gradio.",
theme="default"
)
if __name__ == "__main__":
interface.launch(share=True)