Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import io | |
| import streamlit as st | |
| from PIL import Image, ImageDraw | |
| from google import genai | |
| from google.genai import types | |
| # Hilfsfunktionen | |
| def parse_list_boxes(text): | |
| """Extrahiert Bounding Boxes aus dem Antworttext""" | |
| pattern = r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]' | |
| matches = re.findall(pattern, text) | |
| return [[float(m) for m in match] for match in matches] | |
| def draw_bounding_boxes(image, boxes): | |
| """Zeichnet Bounding Boxes auf das Bild""" | |
| draw = ImageDraw.Draw(image) | |
| width, height = image.size | |
| for box in boxes: | |
| # Sicherstellen, dass alle Werte zwischen 0-1 liegen | |
| ymin = max(0.0, min(1.0, box[0])) | |
| xmin = max(0.0, min(1.0, box[1])) | |
| ymax = max(0.0, min(1.0, box[2])) | |
| xmax = max(0.0, min(1.0, box[3])) | |
| # Zeichne den Rahmen | |
| draw.rectangle([ | |
| xmin * width, | |
| ymin * height, | |
| xmax * width, | |
| ymax * height | |
| ], outline="#00FF00", width=7) # Neon green mit dicken Linien | |
| return image | |
| # Streamlit UI | |
| st.title("Objekterkennung mit Gemini") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| uploaded_file = st.file_uploader("Bild hochladen", type=["jpg", "png", "jpeg"]) | |
| object_name = st.text_input("Objekt zur Erkennung", placeholder="z.B. 'Auto', 'Person'") | |
| if uploaded_file and object_name: | |
| image = Image.open(uploaded_file) | |
| width, height = image.size | |
| st.image(image, caption="Hochgeladenes Bild", use_container_width=True) | |
| if st.button("Analysieren"): | |
| with st.spinner("Analysiere Bild..."): | |
| try: | |
| # Bildvorbereitung | |
| image_bytes = io.BytesIO() | |
| image.save(image_bytes, format=image.format) | |
| image_part = types.Part.from_bytes( | |
| data=image_bytes.getvalue(), | |
| mime_type=f"image/{image.format.lower()}" | |
| ) | |
| # API-Client | |
| client = genai.Client(api_key=os.getenv("KEY")) | |
| # Bildbeschreibung | |
| desc_response = client.models.generate_content( | |
| model="gemini-2.0-flash-exp", | |
| contents=["Beschreibe dieses Bild detailliert.", image_part] | |
| ) | |
| # Objekterkennung | |
| detection_prompt = ( | |
| f"Gib exakt 4 Dezimalzahlen pro Box für alle {object_name} im Format " | |
| "[ymin, xmin, ymax, xmax] als reine Python-Liste ohne weiteren Text. " | |
| "Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]" | |
| ) | |
| box_response = client.models.generate_content( | |
| model="gemini-2.0-flash-exp", | |
| contents=[detection_prompt, image_part] | |
| ) | |
| # Verarbeitung | |
| try: | |
| boxes = parse_list_boxes(box_response.text) | |
| st.write("**Parsed Boxes:**", boxes) | |
| except Exception as e: | |
| st.error(f"Parsing Error: {str(e)}") | |
| boxes = [] | |
| annotated_image = image.copy() | |
| if boxes: | |
| annotated_image = draw_bounding_boxes(annotated_image, boxes) | |
| result_text = f"{len(boxes)} {object_name} erkannt" | |
| # Zoom auf erste Box | |
| ymin, xmin, ymax, xmax = boxes[0] | |
| zoom_area = ( | |
| max(0, int(xmin * width - 50)), | |
| max(0, int(ymin * height - 50)), | |
| min(width, int(xmax * width + 50)), | |
| min(height, int(ymax * height + 50)) | |
| ) | |
| zoomed_image = annotated_image.crop(zoom_area) | |
| else: | |
| result_text = "Keine Objekte gefunden" | |
| zoomed_image = None | |
| # Ergebnisse anzeigen | |
| with col2: | |
| st.write("## Objekterkennung:") | |
| st.write(result_text) | |
| if boxes: | |
| st.image( | |
| [annotated_image, zoomed_image], | |
| caption=["Gesamtbild", "Zoom auf Erkennung"], | |
| width=400 | |
| ) | |
| else: | |
| st.image(annotated_image, caption="Keine Objekte erkannt", width=400) | |
| st.write("## Beschreibung:") | |
| st.write(desc_response.text) | |
| except Exception as e: | |
| st.error(f"Fehler: {str(e)}") |