Spaces:

thisisam
/

fara-7b-chat-test

Runtime error

File size: 2,610 Bytes

4fe284b
2cddae5
 
faf508c
4fe284b
2cddae5
 
 
 
 
 
 
 
 
 
 
 
 
491fbd9
2cddae5
faf508c
2cddae5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faf508c
491fbd9
2cddae5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491fbd9
 
2cddae5
3a32d35
2cddae5
 
 
 
493fc32
 
faf508c
2cddae5
faf508c
2cddae5
 
 
 
 
 
4fe284b
907a197
7274fb9

import gradio as gr
from transformers import AutoProcessor, AutoModelForVision2Seq
import torch
from PIL import Image

# Load model and processor directly
# Using device_map="auto" to handle GPU/CPU automatically
print("Loading Fara-7B model...")
processor = AutoProcessor.from_pretrained("microsoft/Fara-7B", trust_remote_code=True)
model = AutoModelForVision2Seq.from_pretrained(
    "microsoft/Fara-7B", 
    trust_remote_code=True, 
    device_map="auto",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
print("Model loaded successfully!")

def chat(message, history, image):
    """
    Chat function using the local Fara-7B model
    """
    if not message and not image:
        return "Please provide text or an image."
        
    # Prepare content list for the model
    content = []
    
    # Add image if provided
    if image:
        content.append({"type": "image", "image": image})
    
    # Add text
    if message:
        content.append({"type": "text", "text": message})
    elif image:
        # If only image is provided, ask for description
        content.append({"type": "text", "text": "Describe this image and what actions I can take."})

    # Construct messages
    messages = [
        {
            "role": "user",
            "content": content
        }
    ]

    try:
        # Process inputs
        # The processor handles the image and text formatting
        inputs = processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        ).to(model.device)

        # Generate response
        outputs = model.generate(**inputs, max_new_tokens=500)
        
        # Decode response
        generated_text = processor.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
        return generated_text
        
    except Exception as e:
        return f"Error generating response: {str(e)}"

# Create a simple Gradio interface
with gr.Blocks(title="Fara-7B Simple Chat") as demo:
    gr.Markdown("# 🤖 Fara-7B Simple Chat")
    gr.Markdown("Running microsoft/Fara-7B directly using transformers.")
    
    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.Image(type="pil", label="Upload Screenshot (Optional)")
        
        with gr.Column(scale=2):
            chatbot = gr.ChatInterface(
                fn=chat,
                additional_inputs=[image_input],
                type="messages"
            )

if __name__ == "__main__":
    demo.launch()