Joseph Pollack commited on
Commit
d3f57e1
·
unverified ·
1 Parent(s): ff310d7

attemts to add simpler example loading and image processing

Browse files
Files changed (1) hide show
  1. app.py +11 -14
app.py CHANGED
@@ -3,8 +3,6 @@ import torch
3
  from PIL import Image
4
  import json
5
  import os
6
- import shutil
7
- import tempfile
8
  from transformers import AutoProcessor, AutoModelForImageTextToText
9
  from typing import List, Dict, Any
10
  import logging
@@ -148,6 +146,9 @@ class LOperatorDemo:
148
  elif isinstance(image, str) and os.path.exists(image):
149
  # Handle file path (from examples)
150
  pil_image = Image.open(image)
 
 
 
151
  else:
152
  return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Invalid image format. Please upload a valid image."}]
153
 
@@ -183,14 +184,10 @@ def load_model():
183
 
184
 
185
  def load_example_episodes():
186
- """Load example episodes using shutil to copy files to temp location"""
187
  examples = []
188
 
189
  try:
190
- # Create temporary directory for examples
191
- temp_dir = tempfile.mkdtemp()
192
- logger.info(f"Created temporary directory for examples: {temp_dir}")
193
-
194
  episode_dirs = ["episode_13", "episode_53", "episode_73"]
195
 
196
  for episode_dir in episode_dirs:
@@ -205,16 +202,15 @@ def load_example_episodes():
205
  with open(metadata_path, "r") as f:
206
  metadata = json.load(f)
207
 
208
- # Copy image to temp directory
209
- temp_image_path = os.path.join(temp_dir, f"{episode_dir}_screenshot.png")
210
- shutil.copy2(image_path, temp_image_path)
211
 
212
  episode_num = episode_dir.split('_')[1]
213
  goal_text = metadata.get('goal', f'Episode {episode_num} example')
214
 
215
  examples.append([
216
- temp_image_path, # Use temp file path
217
- goal_text # Just the goal text, no additional formatting
218
  ])
219
  logger.info(f"Successfully loaded example for Episode {episode_num}")
220
 
@@ -226,7 +222,7 @@ def load_example_episodes():
226
  logger.error(f"Error loading examples: {str(e)}")
227
  examples = []
228
 
229
- logger.info(f"Loaded {len(examples)} examples using shutil")
230
  return examples
231
 
232
  # Create Gradio interface
@@ -306,7 +302,8 @@ def create_demo():
306
  textbox=gr.Textbox(
307
  label="Goal",
308
  placeholder="e.g., Open the Settings app and navigate to Display settings",
309
- lines=2
 
310
  )
311
  )
312
 
 
3
  from PIL import Image
4
  import json
5
  import os
 
 
6
  from transformers import AutoProcessor, AutoModelForImageTextToText
7
  from typing import List, Dict, Any
8
  import logging
 
146
  elif isinstance(image, str) and os.path.exists(image):
147
  # Handle file path (from examples)
148
  pil_image = Image.open(image)
149
+ elif hasattr(image, 'name') and os.path.exists(image.name):
150
+ # Handle Gradio file object
151
+ pil_image = Image.open(image.name)
152
  else:
153
  return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Invalid image format. Please upload a valid image."}]
154
 
 
184
 
185
 
186
  def load_example_episodes():
187
+ """Load example episodes using PIL to load images directly"""
188
  examples = []
189
 
190
  try:
 
 
 
 
191
  episode_dirs = ["episode_13", "episode_53", "episode_73"]
192
 
193
  for episode_dir in episode_dirs:
 
202
  with open(metadata_path, "r") as f:
203
  metadata = json.load(f)
204
 
205
+ # Load image directly with PIL
206
+ pil_image = Image.open(image_path)
 
207
 
208
  episode_num = episode_dir.split('_')[1]
209
  goal_text = metadata.get('goal', f'Episode {episode_num} example')
210
 
211
  examples.append([
212
+ pil_image, # Use PIL Image object directly
213
+ goal_text # Use the goal text from metadata
214
  ])
215
  logger.info(f"Successfully loaded example for Episode {episode_num}")
216
 
 
222
  logger.error(f"Error loading examples: {str(e)}")
223
  examples = []
224
 
225
+ logger.info(f"Loaded {len(examples)} examples using PIL")
226
  return examples
227
 
228
  # Create Gradio interface
 
302
  textbox=gr.Textbox(
303
  label="Goal",
304
  placeholder="e.g., Open the Settings app and navigate to Display settings",
305
+ lines=2,
306
+ show_label=True
307
  )
308
  )
309