Spaces:

Tonic
/

l-operator-demo

Running on Zero

App Files Files Community

Joseph Pollack commited on Aug 29

Commit

81e328a

unverified ·

1 Parent(s): 7dfb388

adds examples

Browse files

Files changed (1) hide show

app.py +82 -14

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import torch
 from PIL import Image
 import json
 import os
 from transformers import AutoProcessor, AutoModelForImageTextToText
 from typing import List, Dict, Any
 import logging
@@ -130,7 +132,7 @@ class LOperatorDemo:
             return f"❌ Error generating action: {str(e)}"
     @spaces.GPU(duration=90)  # 1.5 minutes for chat responses
-    def chat_with_model(self, message: str, history: List[Dict[str, str]], image: Image.Image = None) -> List[Dict[str, str]]:
         """Chat interface function for Gradio"""
         if not self.is_loaded:
             return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Model not loaded. Please load the model first."}]
@@ -139,6 +141,19 @@ class LOperatorDemo:
             return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please upload an Android screenshot image."}]
         try:
             # Extract goal and instruction from message
             if "Goal:" in message and "Step:" in message:
                 # Parse structured input
@@ -160,7 +175,7 @@ class LOperatorDemo:
                 instruction = message
             # Generate action
-            response = self.generate_action(image, goal, instruction)
             return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}]
         except Exception as e:
@@ -181,7 +196,42 @@ def load_model():
         logger.error(f"Error loading model: {str(e)}")
         return f"❌ Error loading model: {str(e)}"
-# Load example episodes (lazy loading to avoid startup timeout)
 def load_example_episodes():
     """Load example episodes from the extracted data - properly load images for Gradio"""
     examples = []
@@ -197,23 +247,28 @@ def load_example_episodes():
                 # Check if both files exist
                 if os.path.exists(metadata_path) and os.path.exists(image_path):
                     with open(metadata_path, "r") as f:
                         metadata = json.load(f)
                     # Load the image using PIL
                     image = Image.open(image_path)
-                    # Ensure image is in RGB mode
-                    if image.mode != "RGB":
-                        image = image.convert("RGB")
-                    episode_num = episode_dir.split('_')[1]
-                    goal_text = metadata.get('goal', f'Episode {episode_num} example')
-                    examples.append([
-                        image,  # Use PIL Image object instead of file path
-                        f"Episode {episode_num}: {goal_text[:50]}..."
-                    ])
             except Exception as e:
                 logger.warning(f"Could not load example for {episode_dir}: {str(e)}")
@@ -341,7 +396,20 @@ def create_demo():
             if not goal or not step:
                 return {"error": "Please provide both goal and step"}
-            response = demo_instance.generate_action(image, goal, step)
             try:
                 # Try to parse as JSON

 from PIL import Image
 import json
 import os
+import base64
+import io
 from transformers import AutoProcessor, AutoModelForImageTextToText
 from typing import List, Dict, Any
 import logging
             return f"❌ Error generating action: {str(e)}"
     @spaces.GPU(duration=90)  # 1.5 minutes for chat responses
+    def chat_with_model(self, message: str, history: List[Dict[str, str]], image=None) -> List[Dict[str, str]]:
         """Chat interface function for Gradio"""
         if not self.is_loaded:
             return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Model not loaded. Please load the model first."}]
             return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please upload an Android screenshot image."}]
         try:
+            # Handle different image formats
+            pil_image = None
+            if isinstance(image, str) and image.startswith('data:image/'):
+                # Handle base64 image
+                pil_image = base64_to_pil(image)
+            elif hasattr(image, 'mode'):  # PIL Image object
+                pil_image = image
+            else:
+                return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Invalid image format. Please upload a valid image."}]
+            if pil_image is None:
+                return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Failed to process image. Please try again."}]
             # Extract goal and instruction from message
             if "Goal:" in message and "Step:" in message:
                 # Parse structured input
                 instruction = message
             # Generate action
+            response = self.generate_action(pil_image, goal, instruction)
             return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}]
         except Exception as e:
         logger.error(f"Error loading model: {str(e)}")
         return f"❌ Error loading model: {str(e)}"
+def pil_to_base64(image):
+    """Convert PIL image to base64 string for Gradio examples"""
+    try:
+        # Convert to RGB if needed
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        # Save to bytes buffer
+        buffer = io.BytesIO()
+        image.save(buffer, format="PNG")
+        buffer.seek(0)
+        # Convert to base64
+        img_str = base64.b64encode(buffer.getvalue()).decode()
+        return f"data:image/png;base64,{img_str}"
+    except Exception as e:
+        logger.error(f"Error converting image to base64: {str(e)}")
+        return None
+def base64_to_pil(base64_string):
+    """Convert base64 string to PIL image"""
+    try:
+        # Remove data URL prefix if present
+        if base64_string.startswith('data:image/'):
+            base64_string = base64_string.split(',')[1]
+        # Decode base64
+        image_data = base64.b64decode(base64_string)
+        # Create PIL image from bytes
+        image = Image.open(io.BytesIO(image_data))
+        return image
+    except Exception as e:
+        logger.error(f"Error converting base64 to PIL image: {str(e)}")
+        return None
 def load_example_episodes():
     """Load example episodes from the extracted data - properly load images for Gradio"""
     examples = []
                 # Check if both files exist
                 if os.path.exists(metadata_path) and os.path.exists(image_path):
+                    logger.info(f"Loading example from {episode_dir}")
                     with open(metadata_path, "r") as f:
                         metadata = json.load(f)
                     # Load the image using PIL
                     image = Image.open(image_path)
+                    # Convert to base64 for Gradio examples
+                    base64_image = pil_to_base64(image)
+                    if base64_image:
+                        episode_num = episode_dir.split('_')[1]
+                        goal_text = metadata.get('goal', f'Episode {episode_num} example')
+                        examples.append([
+                            base64_image,  # Use base64 encoded image
+                            f"Episode {episode_num}: {goal_text[:50]}..."
+                        ])
+                        logger.info(f"Successfully loaded example for Episode {episode_num}")
+                    else:
+                        logger.warning(f"Failed to convert image to base64 for {episode_dir}")
             except Exception as e:
                 logger.warning(f"Could not load example for {episode_dir}: {str(e)}")
             if not goal or not step:
                 return {"error": "Please provide both goal and step"}
+            # Handle different image formats
+            pil_image = None
+            if isinstance(image, str) and image.startswith('data:image/'):
+                # Handle base64 image
+                pil_image = base64_to_pil(image)
+            elif hasattr(image, 'mode'):  # PIL Image object
+                pil_image = image
+            else:
+                return {"error": "Invalid image format. Please upload a valid image."}
+            if pil_image is None:
+                return {"error": "Failed to process image. Please try again."}
+            response = demo_instance.generate_action(pil_image, goal, step)
             try:
                 # Try to parse as JSON