Spaces:

doinglean
/

florence2-chart-analysis

Runtime error

App Files Files Community

doinglean commited on Sep 4

Commit

82434dd

verified ·

1 Parent(s): 207c330

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -28

app.py CHANGED Viewed

@@ -10,8 +10,14 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Lade das Modell und den Processor
-model = AutoModelForCausalLM.from_pretrained("microsoft/florence-2-large", trust_remote_code=True)
-processor = AutoProcessor.from_pretrained("microsoft/florence-2-large", trust_remote_code=True)
 def analyze_image(image, prompt):
     logger.info("Starting image analysis with prompt: %s", prompt)
@@ -21,6 +27,12 @@ def analyze_image(image, prompt):
     image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
     logger.info("Image shape: %s", image_np.shape)
     # Allgemeine Bildbeschreibung
     if "what do you see" in prompt.lower() or "was siehst du" in prompt.lower():
         inputs = processor(text=prompt, images=image_np, return_tensors="pt")
@@ -34,9 +46,8 @@ def analyze_image(image, prompt):
         description = processor.batch_decode(outputs, skip_special_tokens=True)[0]
         return {"prompt": prompt, "description": description}
-    # Spezifische Kerzen-Analyse
     elif "last 8 candles" in prompt.lower() or "letzte 8 kerzen" in prompt.lower():
-        # Objekterkennung mit Florence-2
         task_prompt = "<OD>"  # Objekterkennung
         inputs = processor(text=task_prompt, images=image_np, return_tensors="pt")
         with torch.no_grad():
@@ -49,11 +60,11 @@ def analyze_image(image, prompt):
         predictions = processor.post_process_generation(outputs, task=task_prompt, image_size=(image_np.shape[1], image_np.shape[0]))
         logger.info("Detected objects: %s", predictions)
-        # Extrahiere Kerzen
         detections = []
         if "<OD>" in predictions:
             for i, (bbox, label) in enumerate(zip(predictions["<OD>"]["bboxes"], predictions["<OD>"]["labels"])):
-                if "candle" not in label.lower():  # Filtere nur Kerzen
                     continue
                 xmin, ymin, xmax, ymax = map(int, bbox)
@@ -65,11 +76,10 @@ def analyze_image(image, prompt):
                 mean_color = np.mean(candle_roi, axis=(0, 1)).astype(int)
                 color_rgb = f"RGB({mean_color[2]},{mean_color[1]},{mean_color[0]})"
-                # OCR für Preise (Achsen oder Kerzenregion)
-                price_roi = image_cv[max(0, ymin-50):min(image_np.shape[0], ymax+50),
-                                    max(0, xmin-50):min(image_np.shape[1], xmax+50)]
-                price_task = "<OCR>"
-                ocr_inputs = processor(text=price_task, images=price_roi, return_tensors="pt")
                 with torch.no_grad():
                     ocr_outputs = model.generate(
                         input_ids=ocr_inputs["input_ids"],
@@ -91,24 +101,9 @@ def analyze_image(image, prompt):
         if not detections:
             logger.warning("No candlesticks detected. Ensure clear image with visible candles.")
-            return {"prompt": prompt, "description": "No candlesticks detected. Try a clearer screenshot."}
         return {"prompt": prompt, "detections": detections}
-    # Fallback für unbekannte Prompts
     else:
-        return {"prompt": prompt, "description": "Unsupported prompt. Use 'Was siehst du auf dem Bild?' or 'List last 8 candles with their colors'."}
-# Erstelle Gradio-Schnittstelle
-iface = gr.Interface(
-    fn=analyze_image,
-    inputs=[
-        gr.Image(type="pil", label="Upload an Image"),
-        gr.Textbox(label="Prompt", placeholder="Enter your prompt, e.g., 'Was siehst du auf dem Bild?' or 'List last 8 candles with their colors'")
-    ],
-    outputs="json",
-    title="Image Analysis with Florence-2-large",
-    description="Upload an image and provide a prompt to get a description or analyze candlesticks."
-)
-iface.launch()

 logger = logging.getLogger(__name__)
 # Lade das Modell und den Processor
+try:
+    logger.info("Loading model: microsoft/florence-2-base")
+    model = AutoModelForCausalLM.from_pretrained("microsoft/florence-2-base", trust_remote_code=True)
+    processor = AutoProcessor.from_pretrained("microsoft/florence-2-base", trust_remote_code=True)
+    logger.info("Model and processor loaded successfully")
+except Exception as e:
+    logger.error("Failed to load model: %s", str(e))
+    raise
 def analyze_image(image, prompt):
     logger.info("Starting image analysis with prompt: %s", prompt)
     image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
     logger.info("Image shape: %s", image_np.shape)
+    # Bildvorverarbeitung: Kontrast erhöhen
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+    gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
+    enhanced = clahe.apply(gray)
+    image_cv = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
     # Allgemeine Bildbeschreibung
     if "what do you see" in prompt.lower() or "was siehst du" in prompt.lower():
         inputs = processor(text=prompt, images=image_np, return_tensors="pt")
         description = processor.batch_decode(outputs, skip_special_tokens=True)[0]
         return {"prompt": prompt, "description": description}
+    # Kerzen-Analyse
     elif "last 8 candles" in prompt.lower() or "letzte 8 kerzen" in prompt.lower():
         task_prompt = "<OD>"  # Objekterkennung
         inputs = processor(text=task_prompt, images=image_np, return_tensors="pt")
         with torch.no_grad():
         predictions = processor.post_process_generation(outputs, task=task_prompt, image_size=(image_np.shape[1], image_np.shape[0]))
         logger.info("Detected objects: %s", predictions)
         detections = []
         if "<OD>" in predictions:
             for i, (bbox, label) in enumerate(zip(predictions["<OD>"]["bboxes"], predictions["<OD>"]["labels"])):
+                # Erweitere Filter für Kerzen
+                if "candle" not in label.lower() and "bar" not in label.lower() and "chart" not in label.lower():
                     continue
                 xmin, ymin, xmax, ymax = map(int, bbox)
                 mean_color = np.mean(candle_roi, axis=(0, 1)).astype(int)
                 color_rgb = f"RGB({mean_color[2]},{mean_color[1]},{mean_color[0]})"
+                # OCR für Preise (erweiterte ROI)
+                price_roi = image_cv[max(0, ymin-200):min(image_np.shape[0], ymax+200),
+                                    max(0, xmin-200):min(image_np.shape[1], xmax+200)]
+                ocr_inputs = processor(text="<OCR>", images=price_roi, return_tensors="pt")
                 with torch.no_grad():
                     ocr_outputs = model.generate(
                         input_ids=ocr_inputs["input_ids"],
         if not detections:
             logger.warning("No candlesticks detected. Ensure clear image with visible candles.")
+            return {"prompt": prompt, "description": "No candlesticks detected. Try a clearer screenshot with visible candles and prices."}
         return {"prompt": prompt, "detections": detections}
     else:
+        return {"prompt": prompt, "description": "Unsupported prompt