Spaces:

red-rectangle
/

black-box

Runtime error

App Files Files Community

Joel Lundgren commited on Sep 16

Commit

f32efcc

1 Parent(s): 64a7b3c

test with new layout

Browse files

Files changed (2) hide show

app.py +146 -3
requirements.txt +5 -0

app.py CHANGED Viewed

@@ -1,7 +1,150 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

 import gradio as gr
+from PIL import Image, ImageDraw
+from ultralytics import YOLO
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load a pre-trained YOLO model
+model = YOLO('yolov8n.pt')
+def detect_objects(image):
+    """
+    Performs object detection on an image using the YOLO model.
+    Args:
+        image (PIL.Image.Image): The input image.
+    Returns:
+        tuple: A tuple containing:
+            - PIL.Image.Image: The image with detected objects annotated.
+            - str: A string listing the names of detected objects.
+    """
+    # Perform inference
+    results = model(image)
+    # Get the first result
+    result = results[0]
+    # Create a copy of the image to draw on
+    annotated_image = image.copy()
+    draw = ImageDraw.Draw(annotated_image)
+    detected_objects = []
+    # Extract bounding boxes, classes, and confidences
+    for box in result.boxes:
+        xyxy = box.xyxy[0].tolist()
+        label = result.names[int(box.cls)]
+        confidence = box.conf[0].item()
+        detected_objects.append(label)
+        # Draw bounding box
+        draw.rectangle(xyxy, outline="red", width=2)
+        # Draw label
+        draw.text((xyxy[0], xyxy[1]), f"{label} ({confidence:.2f})", fill="red")
+    # Create a unique, comma-separated string of detected objects
+    detected_objects_str = ", ".join(list(set(detected_objects)))
+    if not detected_objects_str:
+        detected_objects_str = "No objects detected."
+    return annotated_image, detected_objects_str
+# Cache for LLM models and tokenizers
+llm_cache = {}
+def get_llm(model_name):
+    if model_name in llm_cache:
+        return llm_cache[model_name]
+    model_map = {
+        "qwen3:0.6b": "Qwen/Qwen3-0.6B-Instruct",
+        "gemma3:1b": "google/gemma-3-1b-it"
+    }
+    hf_model_name = model_map[model_name]
+    tokenizer = AutoTokenizer.from_pretrained(hf_model_name)
+    model = AutoModelForCausalLM.from_pretrained(hf_model_name)
+    llm_cache[model_name] = (model, tokenizer)
+    return model, tokenizer
+def update_user_prompt(detected_objects, current_prompt):
+    if "No objects detected" in detected_objects:
+        return current_prompt
+    if current_prompt:
+        new_prompt = f"{current_prompt}, {detected_objects}"
+    else:
+        new_prompt = f"Objects detected in the image: {detected_objects}"
+    return new_prompt
+def generate_text(model_name, system_prompt, user_prompt):
+    model, tokenizer = get_llm(model_name)
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt")
+    generated_ids = model.generate(
+        model_inputs.input_ids,
+        max_new_tokens=512
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return response
+with gr.Blocks() as demo:
+    gr.Markdown("# Black Box: Object Detection and LLM Chat")
+    with gr.Tab("Object Detection"):
+        with gr.Row():
+            image_input = gr.Image(type="pil", label="Upload Image or Use Webcam", sources=["upload", "webcam"])
+            detected_image_output = gr.Image(label="Detected Objects")
+        object_detection_button = gr.Button("Detect Objects")
+        detected_objects_output = gr.Textbox(label="Detected Objects")
+    with gr.Tab("LLM Chat"):
+        model_selector = gr.Dropdown(choices=["qwen2:0.5b", "gemma2:2b"], label="Select LLM Model")
+        system_prompt_input = gr.Textbox(label="System Prompt", value="You are a helpful assistant.")
+        user_prompt_input = gr.Textbox(label="User Prompt")
+        llm_output = gr.Textbox(label="LLM Response")
+        llm_button = gr.Button("Generate")
+    # Connect object detection components
+    object_detection_button.click(
+        fn=detect_objects,
+        inputs=image_input,
+        outputs=[detected_image_output, detected_objects_output]
+    )
+    # Connect LLM components
+    llm_button.click(
+        fn=generate_text,
+        inputs=[model_selector, system_prompt_input, user_prompt_input],
+        outputs=llm_output
+    )
+    # Connect detected objects to user prompt
+    detected_objects_output.change(
+        fn=update_user_prompt,
+        inputs=[detected_objects_output, user_prompt_input],
+        outputs=user_prompt_input
+    )
 demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+ultralytics
+torch
+transformers
+pillow