gemini_vision_objects

Sleeping

App Files Files Community

Sebbe33 commited on Feb 12, 2025

Commit

b908919

verified ·

1 Parent(s): cdb1e78

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -24

app.py CHANGED Viewed

@@ -13,20 +13,6 @@ def parse_list_boxes(text):
     matches = re.findall(pattern, text)
     return [[float(m) for m in match] for match in matches]
-'''def draw_bounding_boxes(image, boxes):
-    """Zeichnet Bounding Boxes auf das Bild"""
-    draw = ImageDraw.Draw(image)
-    width, height = image.size
-    for box in boxes:
-        ymin, xmin, ymax, xmax = box
-        draw.rectangle([
-            xmin * width,
-            ymin * height,
-            xmax * width,
-            ymax * height
-        ], outline="red", width=3)
-    return image'''
 def draw_bounding_boxes(image, boxes):
     """Zeichnet Bounding Boxes auf das Bild"""
     draw = ImageDraw.Draw(image)
@@ -39,15 +25,15 @@ def draw_bounding_boxes(image, boxes):
         ymax = max(0.0, min(1.0, box[2]))
         xmax = max(0.0, min(1.0, box[3]))
         draw.rectangle([
             xmin * width,
             ymin * height,
             xmax * width,
             ymax * height
-        ], outline="red", width=5)  # Dickere Linien
     return image
 # Streamlit UI
 st.title("Bildanalyse mit Gemini")
 col1, col2 = st.columns(2)
@@ -58,6 +44,7 @@ with col1:
     if uploaded_file and object_name:
         image = Image.open(uploaded_file)
         st.image(image, caption="Hochgeladenes Bild", use_container_width=True)
         if st.button("Analysieren"):
@@ -82,34 +69,62 @@ with col1:
                     # Objekterkennung
                     detection_prompt = (
-                        f"Gib alle Bounding Boxes für {object_name} im Format "
-                        "[ymin, xmin, ymax, xmax] als Liste. Nur die Liste zurückgeben!"
                     )
                     box_response = client.models.generate_content(
                         model="gemini-2.0-flash-exp",
                         contents=[detection_prompt, image_part]
                     )
-                    st.write("Raw API Response:", box_response.text)
                     # Verarbeitung
-                    boxes = parse_list_boxes(box_response.text)
                     annotated_image = image.copy()
                     if boxes:
                         annotated_image = draw_bounding_boxes(annotated_image, boxes)
                         result_text = f"{len(boxes)} {object_name} erkannt"
                     else:
                         result_text = "Keine Objekte gefunden"
                     # Ergebnisse anzeigen
                     with col2:
-                        st.write("## Objekterkennung:")
-                        st.write(result_text)
-                        st.image(annotated_image, caption="Erkannte Objekte", use_container_width=True)
                         st.write("## Beschreibung:")
                         st.write(desc_response.text)
                 except Exception as e:
                     st.error(f"Fehler: {str(e)}")

     matches = re.findall(pattern, text)
     return [[float(m) for m in match] for match in matches]
 def draw_bounding_boxes(image, boxes):
     """Zeichnet Bounding Boxes auf das Bild"""
     draw = ImageDraw.Draw(image)
         ymax = max(0.0, min(1.0, box[2]))
         xmax = max(0.0, min(1.0, box[3]))
+        # Zeichne den Rahmen
         draw.rectangle([
             xmin * width,
             ymin * height,
             xmax * width,
             ymax * height
+        ], outline="#00FF00", width=7)  # Neon green mit dicken Linien
     return image
 # Streamlit UI
 st.title("Bildanalyse mit Gemini")
 col1, col2 = st.columns(2)
     if uploaded_file and object_name:
         image = Image.open(uploaded_file)
+        width, height = image.size
         st.image(image, caption="Hochgeladenes Bild", use_container_width=True)
         if st.button("Analysieren"):
                     # Objekterkennung
                     detection_prompt = (
+                        f"Gib exakt 4 Dezimalzahlen pro Box für alle {object_name} im Format "
+                        "[ymin, xmin, ymax, xmax] als reine Python-Liste ohne weiteren Text. "
+                        "Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
                     )
                     box_response = client.models.generate_content(
                         model="gemini-2.0-flash-exp",
                         contents=[detection_prompt, image_part]
                     )
+                    # Debug-Ausgaben
+                    st.write("**Raw API Response:**", box_response.text)
                     # Verarbeitung
+                    try:
+                        boxes = parse_list_boxes(box_response.text)
+                        st.write("**Parsed Boxes:**", boxes)
+                    except Exception as e:
+                        st.error(f"Parsing Error: {str(e)}")
+                        boxes = []
                     annotated_image = image.copy()
                     if boxes:
                         annotated_image = draw_bounding_boxes(annotated_image, boxes)
                         result_text = f"{len(boxes)} {object_name} erkannt"
+                        # Zoom auf erste Box
+                        ymin, xmin, ymax, xmax = boxes[0]
+                        zoom_area = (
+                            max(0, int(xmin * width - 50)),
+                            max(0, int(ymin * height - 50)),
+                            min(width, int(xmax * width + 50)),
+                            min(height, int(ymax * height + 50))
+                        )
+                        zoomed_image = annotated_image.crop(zoom_area)
                     else:
                         result_text = "Keine Objekte gefunden"
+                        zoomed_image = None
                     # Ergebnisse anzeigen
                     with col2:
                         st.write("## Beschreibung:")
                         st.write(desc_response.text)
+                        st.write("## Objekterkennung:")
+                        st.write(result_text)
+                        if boxes:
+                            st.image(
+                                [annotated_image, zoomed_image],
+                                caption=["Gesamtbild", "Zoom auf Erkennung"],
+                                width=400
+                            )
+                        else:
+                            st.image(annotated_image, caption="Keine Objekte erkannt", width=400)
                 except Exception as e:
                     st.error(f"Fehler: {str(e)}")