Spaces:

chris-propeller
/

sam3-test

Running on L4

App Files Files Community

chris-propeller commited on 19 days ago

Commit

a36d7fa

1 Parent(s): 334daaa

stateless

Browse files

Files changed (3) hide show

app-bak.py +342 -0
app.py +51 -295
test_minimal.py +15 -0

app-bak.py ADDED Viewed

	@@ -0,0 +1,342 @@

+import spaces
+import gradio as gr
+import numpy as np
+from PIL import Image
+import base64
+import io
+from typing import Dict, Any
+import warnings
+warnings.filterwarnings("ignore")
+@spaces.GPU
+def sam3_inference(image, text_prompt, confidence_threshold=0.5):
+    """
+    Standalone GPU function with model initialization for Spaces Stateless GPU
+    All CUDA operations and related imports must happen inside this decorated function
+    """
+    try:
+        # Import torch and transformers inside GPU function to avoid main process CUDA init
+        import torch
+        from transformers import Sam3Model, Sam3Processor
+        # Initialize model and processor inside GPU function (required for Stateless GPU)
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        model = Sam3Model.from_pretrained(
+            "facebook/sam3",
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+        ).to(device)
+        processor = Sam3Processor.from_pretrained("facebook/sam3")
+        print(f"Model loaded on device: {device}")
+        # Handle base64 input (for API)
+        if isinstance(image, str):
+            if image.startswith('data:image'):
+                image = image.split(',')[1]
+            image_bytes = base64.b64decode(image)
+            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        # Process with SAM3
+        inputs = processor(
+            images=image,
+            text=text_prompt.strip(),
+            return_tensors="pt"
+        ).to(device)
+        # Convert dtype to match model
+        for key in inputs:
+            if inputs[key].dtype == torch.float32:
+                inputs[key] = inputs[key].to(model.dtype)
+        with torch.no_grad():
+            outputs = model(**inputs)
+        # Use proper SAM3 post-processing
+        results = processor.post_process_instance_segmentation(
+            outputs,
+            threshold=confidence_threshold,
+            mask_threshold=0.5,
+            target_sizes=inputs.get("original_sizes").tolist()
+        )[0]
+        return results
+    except Exception as e:
+        raise Exception(f"SAM3 inference error: {str(e)}")
+class SAM3Handler:
+    """SAM3 handler for both UI and API access"""
+    def __init__(self):
+        print("SAM3 handler initialized (models will be loaded lazily)")
+    def predict(self, image, text_prompt, confidence_threshold=0.5):
+        """
+        Main prediction function for both UI and API
+        Args:
+            image: PIL Image or base64 string
+            text_prompt: String describing what to segment
+            confidence_threshold: Minimum confidence for masks
+        Returns:
+            Dict with masks, scores, and metadata
+        """
+        try:
+            # Call the standalone GPU function
+            results = sam3_inference(image, text_prompt, confidence_threshold)
+            # Prepare response
+            response = {
+                "masks": [],
+                "scores": [],
+                "prompt_type": "text",
+                "prompt_value": text_prompt,
+                "num_masks": len(results["masks"])
+            }
+            # Process each mask
+            for i in range(len(results["masks"])):
+                mask_np = results["masks"][i].cpu().numpy().astype(np.uint8) * 255
+                score = results["scores"][i].item()
+                if score >= confidence_threshold:
+                    # Convert mask to base64 for API response
+                    mask_image = Image.fromarray(mask_np, mode='L')
+                    buffer = io.BytesIO()
+                    mask_image.save(buffer, format='PNG')
+                    mask_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+                    response["masks"].append(mask_b64)
+                    response["scores"].append(score)
+            return response
+        except Exception as e:
+            return {"error": str(e)}
+# Initialize the handler
+handler = SAM3Handler()
+def gradio_interface(image, text_prompt, confidence_threshold):
+    """Gradio interface wrapper"""
+    result = handler.predict(image, text_prompt, confidence_threshold)
+    if "error" in result:
+        return f"Error: {result['error']}", None
+    # For UI, show the first mask as an example
+    if result["masks"]:
+        first_mask_b64 = result["masks"][0]
+        first_score = result["scores"][0]
+        # Decode first mask for display
+        mask_bytes = base64.b64decode(first_mask_b64)
+        mask_image = Image.open(io.BytesIO(mask_bytes))
+        info = f"Found {result['num_masks']} masks. First mask score: {first_score:.3f}"
+        return info, mask_image
+    else:
+        return "No masks found above confidence threshold", None
+def api_predict(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    API function matching SAM2 inference endpoint format
+    Expected input format (matching SAM2 + SAM3 extensions):
+    {
+        "inputs": {
+            "image": "base64_encoded_image_string",
+            # SAM3 NEW: Text-based prompts
+            "text_prompts": ["person", "car"],  # List of text descriptions
+            # SAM2 compatible: Point-based prompts
+            "points": [[[x1, y1]], [[x2, y2]]],  # Points for each object
+            "labels": [[1], [1]],  # Labels for each point (1=foreground, 0=background)
+            # SAM2 compatible: Bounding box prompts
+            "boxes": [[x1, y1, x2, y2], [x1, y1, x2, y2]],  # Bounding boxes
+            "multimask_output": false,  # Optional, defaults to False
+            "confidence_threshold": 0.5  # Optional, minimum confidence for returned masks
+        }
+    }
+    Returns (matching SAM2 format):
+    {
+        "masks": [base64_encoded_mask_1, base64_encoded_mask_2, ...],
+        "scores": [score1, score2, ...],
+        "num_objects": int,
+        "sam_version": "3.0",
+        "success": true
+    }
+    """
+    try:
+        inputs_data = data.get("inputs", {})
+        # Extract inputs
+        image_b64 = inputs_data.get("image")
+        text_prompts = inputs_data.get("text_prompts", [])
+        input_points = inputs_data.get("points", [])
+        input_labels = inputs_data.get("labels", [])
+        input_boxes = inputs_data.get("boxes", [])
+        multimask_output = inputs_data.get("multimask_output", False)
+        confidence_threshold = inputs_data.get("confidence_threshold", 0.5)
+        # Validate inputs
+        if not image_b64:
+            return {"error": "No image provided", "success": False}
+        has_text = bool(text_prompts)
+        has_points = bool(input_points and input_labels)
+        has_boxes = bool(input_boxes)
+        if not (has_text or has_points or has_boxes):
+            return {"error": "Must provide at least one prompt type: text_prompts, points+labels, or boxes", "success": False}
+        if has_points and len(input_points) != len(input_labels):
+            return {"error": "Number of points and labels must match", "success": False}
+        # Decode image
+        if image_b64.startswith('data:image'):
+            image_b64 = image_b64.split(',')[1]
+        image_bytes = base64.b64decode(image_b64)
+        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        all_masks = []
+        all_scores = []
+        # Process text prompts (SAM3 feature)
+        if has_text:
+            for text_prompt in text_prompts:
+                result = handler.predict(image, text_prompt, confidence_threshold)
+                if "error" not in result:
+                    all_masks.extend(result["masks"])
+                    all_scores.extend(result["scores"])
+        # Process visual prompts (SAM2 compatibility) - Basic implementation
+        # Note: This is a simplified version. Full SAM2 compatibility would require
+        # implementing the visual prompt processing in the handler
+        if has_boxes or has_points:
+            # For now, fall back to a generic prompt if no text provided
+            if not has_text:
+                result = handler.predict(image, "object", confidence_threshold)
+                if "error" not in result and result["masks"]:
+                    # Take only the number of masks requested
+                    num_requested = len(input_boxes) if has_boxes else len(input_points)
+                    all_masks.extend(result["masks"][:num_requested])
+                    all_scores.extend(result["scores"][:num_requested])
+        # Build SAM2-compatible response
+        return {
+            "masks": all_masks,
+            "scores": all_scores,
+            "num_objects": len(all_masks),
+            "sam_version": "3.0",
+            "success": True
+        }
+    except Exception as e:
+        return {"error": str(e), "success": False, "sam_version": "3.0"}
+# Create Gradio interface
+with gr.Blocks(title="SAM3 Inference API") as demo:
+    gr.HTML("<h1>SAM3 Promptable Concept Segmentation</h1>")
+    gr.HTML("<p>This Space provides both a UI and API for SAM3 inference. Use the interface below or call the API programmatically.</p>")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Input Image")
+            text_input = gr.Textbox(label="Text Prompt", placeholder="Enter what you want to segment (e.g., 'cat', 'person', 'car')")
+            confidence_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Confidence Threshold")
+            predict_btn = gr.Button("Segment", variant="primary")
+        with gr.Column():
+            info_output = gr.Textbox(label="Results Info")
+            mask_output = gr.Image(label="Sample Mask")
+    # API endpoint - this creates /api/predict/
+    predict_btn.click(
+        gradio_interface,
+        inputs=[image_input, text_input, confidence_slider],
+        outputs=[info_output, mask_output],
+        api_name="predict"  # This creates the API endpoint
+    )
+    # SAM2-compatible API endpoint - this creates /api/sam2_compatible/
+    gr.Interface(
+        fn=api_predict,
+        inputs=gr.JSON(label="SAM2/SAM3 Compatible Input"),
+        outputs=gr.JSON(label="SAM2/SAM3 Compatible Output"),
+        title="SAM2/SAM3 Compatible API",
+        description="API endpoint that matches SAM2 inference endpoint format with SAM3 extensions",
+        api_name="sam2_compatible"
+    )
+    # Add API documentation
+    gr.HTML("""
+    <h2>API Usage</h2>
+    <h3>1. Simple Text API (Gradio format)</h3>
+    <pre>
+import requests
+import base64
+# Encode your image to base64
+with open("image.jpg", "rb") as f:
+    image_b64 = base64.b64encode(f.read()).decode()
+# Make API request
+response = requests.post(
+    "https://your-username-sam3-api.hf.space/api/predict",
+    json={
+        "data": [image_b64, "kitten", 0.5]
+    }
+)
+result = response.json()
+    </pre>
+    <h3>2. SAM2/SAM3 Compatible API (Inference Endpoint format)</h3>
+    <pre>
+import requests
+import base64
+# Encode your image to base64
+with open("image.jpg", "rb") as f:
+    image_b64 = base64.b64encode(f.read()).decode()
+# SAM3 Text Prompts (NEW)
+response = requests.post(
+    "https://your-username-sam3-api.hf.space/api/sam2_compatible",
+    json={
+        "data": [{
+            "inputs": {
+                "image": image_b64,
+                "text_prompts": ["kitten", "toy"],
+                "confidence_threshold": 0.5
+            }
+        }]
+    }
+)
+# SAM2 Compatible (Points/Boxes)
+response = requests.post(
+    "https://your-username-sam3-api.hf.space/api/sam2_compatible",
+    json={
+        "data": [{
+            "inputs": {
+                "image": image_b64,
+                "boxes": [[100, 100, 200, 200]],
+                "confidence_threshold": 0.5
+            }
+        }]
+    }
+)
+result = response.json()
+    </pre>
+    """)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

app.py CHANGED Viewed

@@ -1,41 +1,37 @@
 import spaces
 import gradio as gr
-import numpy as np
-from PIL import Image
-import base64
-import io
-from typing import Dict, Any
-import warnings
-warnings.filterwarnings("ignore")
 @spaces.GPU
-def sam3_inference(image, text_prompt, confidence_threshold=0.5):
     """
-    Standalone GPU function with model initialization for Spaces Stateless GPU
-    All CUDA operations and related imports must happen inside this decorated function
     """
     try:
-        # Import torch and transformers inside GPU function to avoid main process CUDA init
-        import torch
-        from transformers import Sam3Model, Sam3Processor
-        # Initialize model and processor inside GPU function (required for Stateless GPU)
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = Sam3Model.from_pretrained(
             "facebook/sam3",
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
         ).to(device)
         processor = Sam3Processor.from_pretrained("facebook/sam3")
-        print(f"Model loaded on device: {device}")
-        # Handle base64 input (for API)
-        if isinstance(image, str):
-            if image.startswith('data:image'):
-                image = image.split(',')[1]
-            image_bytes = base64.b64decode(image)
-            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-        # Process with SAM3
         inputs = processor(
             images=image,
             text=text_prompt.strip(),
@@ -47,10 +43,11 @@ def sam3_inference(image, text_prompt, confidence_threshold=0.5):
             if inputs[key].dtype == torch.float32:
                 inputs[key] = inputs[key].to(model.dtype)
         with torch.no_grad():
             outputs = model(**inputs)
-        # Use proper SAM3 post-processing
         results = processor.post_process_instance_segmentation(
             outputs,
             threshold=confidence_threshold,
@@ -58,285 +55,44 @@ def sam3_inference(image, text_prompt, confidence_threshold=0.5):
             target_sizes=inputs.get("original_sizes").tolist()
         )[0]
-        return results
-    except Exception as e:
-        raise Exception(f"SAM3 inference error: {str(e)}")
-class SAM3Handler:
-    """SAM3 handler for both UI and API access"""
-    def __init__(self):
-        print("SAM3 handler initialized (models will be loaded lazily)")
-    def predict(self, image, text_prompt, confidence_threshold=0.5):
-        """
-        Main prediction function for both UI and API
-        Args:
-            image: PIL Image or base64 string
-            text_prompt: String describing what to segment
-            confidence_threshold: Minimum confidence for masks
-        Returns:
-            Dict with masks, scores, and metadata
-        """
-        try:
-            # Call the standalone GPU function
-            results = sam3_inference(image, text_prompt, confidence_threshold)
-            # Prepare response
-            response = {
-                "masks": [],
-                "scores": [],
-                "prompt_type": "text",
-                "prompt_value": text_prompt,
-                "num_masks": len(results["masks"])
-            }
-            # Process each mask
-            for i in range(len(results["masks"])):
-                mask_np = results["masks"][i].cpu().numpy().astype(np.uint8) * 255
-                score = results["scores"][i].item()
-                if score >= confidence_threshold:
-                    # Convert mask to base64 for API response
-                    mask_image = Image.fromarray(mask_np, mode='L')
-                    buffer = io.BytesIO()
-                    mask_image.save(buffer, format='PNG')
-                    mask_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
-                    response["masks"].append(mask_b64)
-                    response["scores"].append(score)
-            return response
-        except Exception as e:
-            return {"error": str(e)}
-# Initialize the handler
-handler = SAM3Handler()
-def gradio_interface(image, text_prompt, confidence_threshold):
-    """Gradio interface wrapper"""
-    result = handler.predict(image, text_prompt, confidence_threshold)
-    if "error" in result:
-        return f"Error: {result['error']}", None
-    # For UI, show the first mask as an example
-    if result["masks"]:
-        first_mask_b64 = result["masks"][0]
-        first_score = result["scores"][0]
-        # Decode first mask for display
-        mask_bytes = base64.b64decode(first_mask_b64)
-        mask_image = Image.open(io.BytesIO(mask_bytes))
-        info = f"Found {result['num_masks']} masks. First mask score: {first_score:.3f}"
-        return info, mask_image
-    else:
-        return "No masks found above confidence threshold", None
-def api_predict(data: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    API function matching SAM2 inference endpoint format
-    Expected input format (matching SAM2 + SAM3 extensions):
-    {
-        "inputs": {
-            "image": "base64_encoded_image_string",
-            # SAM3 NEW: Text-based prompts
-            "text_prompts": ["person", "car"],  # List of text descriptions
-            # SAM2 compatible: Point-based prompts
-            "points": [[[x1, y1]], [[x2, y2]]],  # Points for each object
-            "labels": [[1], [1]],  # Labels for each point (1=foreground, 0=background)
-            # SAM2 compatible: Bounding box prompts
-            "boxes": [[x1, y1, x2, y2], [x1, y1, x2, y2]],  # Bounding boxes
-            "multimask_output": false,  # Optional, defaults to False
-            "confidence_threshold": 0.5  # Optional, minimum confidence for returned masks
-        }
-    }
-    Returns (matching SAM2 format):
-    {
-        "masks": [base64_encoded_mask_1, base64_encoded_mask_2, ...],
-        "scores": [score1, score2, ...],
-        "num_objects": int,
-        "sam_version": "3.0",
-        "success": true
-    }
-    """
-    try:
-        inputs_data = data.get("inputs", {})
-        # Extract inputs
-        image_b64 = inputs_data.get("image")
-        text_prompts = inputs_data.get("text_prompts", [])
-        input_points = inputs_data.get("points", [])
-        input_labels = inputs_data.get("labels", [])
-        input_boxes = inputs_data.get("boxes", [])
-        multimask_output = inputs_data.get("multimask_output", False)
-        confidence_threshold = inputs_data.get("confidence_threshold", 0.5)
-        # Validate inputs
-        if not image_b64:
-            return {"error": "No image provided", "success": False}
-        has_text = bool(text_prompts)
-        has_points = bool(input_points and input_labels)
-        has_boxes = bool(input_boxes)
-        if not (has_text or has_points or has_boxes):
-            return {"error": "Must provide at least one prompt type: text_prompts, points+labels, or boxes", "success": False}
-        if has_points and len(input_points) != len(input_labels):
-            return {"error": "Number of points and labels must match", "success": False}
-        # Decode image
-        if image_b64.startswith('data:image'):
-            image_b64 = image_b64.split(',')[1]
-        image_bytes = base64.b64decode(image_b64)
-        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-        all_masks = []
-        all_scores = []
-        # Process text prompts (SAM3 feature)
-        if has_text:
-            for text_prompt in text_prompts:
-                result = handler.predict(image, text_prompt, confidence_threshold)
-                if "error" not in result:
-                    all_masks.extend(result["masks"])
-                    all_scores.extend(result["scores"])
-        # Process visual prompts (SAM2 compatibility) - Basic implementation
-        # Note: This is a simplified version. Full SAM2 compatibility would require
-        # implementing the visual prompt processing in the handler
-        if has_boxes or has_points:
-            # For now, fall back to a generic prompt if no text provided
-            if not has_text:
-                result = handler.predict(image, "object", confidence_threshold)
-                if "error" not in result and result["masks"]:
-                    # Take only the number of masks requested
-                    num_requested = len(input_boxes) if has_boxes else len(input_points)
-                    all_masks.extend(result["masks"][:num_requested])
-                    all_scores.extend(result["scores"][:num_requested])
-        # Build SAM2-compatible response
-        return {
-            "masks": all_masks,
-            "scores": all_scores,
-            "num_objects": len(all_masks),
-            "sam_version": "3.0",
-            "success": True
-        }
     except Exception as e:
-        return {"error": str(e), "success": False, "sam_version": "3.0"}
-# Create Gradio interface
-with gr.Blocks(title="SAM3 Inference API") as demo:
-    gr.HTML("<h1>SAM3 Promptable Concept Segmentation</h1>")
-    gr.HTML("<p>This Space provides both a UI and API for SAM3 inference. Use the interface below or call the API programmatically.</p>")
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(type="pil", label="Input Image")
-            text_input = gr.Textbox(label="Text Prompt", placeholder="Enter what you want to segment (e.g., 'cat', 'person', 'car')")
-            confidence_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Confidence Threshold")
-            predict_btn = gr.Button("Segment", variant="primary")
-        with gr.Column():
-            info_output = gr.Textbox(label="Results Info")
-            mask_output = gr.Image(label="Sample Mask")
-    # API endpoint - this creates /api/predict/
-    predict_btn.click(
-        gradio_interface,
-        inputs=[image_input, text_input, confidence_slider],
-        outputs=[info_output, mask_output],
-        api_name="predict"  # This creates the API endpoint
-    )
-    # SAM2-compatible API endpoint - this creates /api/sam2_compatible/
-    gr.Interface(
-        fn=api_predict,
-        inputs=gr.JSON(label="SAM2/SAM3 Compatible Input"),
-        outputs=gr.JSON(label="SAM2/SAM3 Compatible Output"),
-        title="SAM2/SAM3 Compatible API",
-        description="API endpoint that matches SAM2 inference endpoint format with SAM3 extensions",
-        api_name="sam2_compatible"
-    )
-    # Add API documentation
-    gr.HTML("""
-    <h2>API Usage</h2>
-    <h3>1. Simple Text API (Gradio format)</h3>
-    <pre>
-import requests
-import base64
-# Encode your image to base64
-with open("image.jpg", "rb") as f:
-    image_b64 = base64.b64encode(f.read()).decode()
-# Make API request
-response = requests.post(
-    "https://your-username-sam3-api.hf.space/api/predict",
-    json={
-        "data": [image_b64, "kitten", 0.5]
-    }
-)
-result = response.json()
-    </pre>
-    <h3>2. SAM2/SAM3 Compatible API (Inference Endpoint format)</h3>
-    <pre>
-import requests
-import base64
-# Encode your image to base64
-with open("image.jpg", "rb") as f:
-    image_b64 = base64.b64encode(f.read()).decode()
-# SAM3 Text Prompts (NEW)
-response = requests.post(
-    "https://your-username-sam3-api.hf.space/api/sam2_compatible",
-    json={
-        "data": [{
-            "inputs": {
-                "image": image_b64,
-                "text_prompts": ["kitten", "toy"],
-                "confidence_threshold": 0.5
-            }
-        }]
-    }
-)
-# SAM2 Compatible (Points/Boxes)
-response = requests.post(
-    "https://your-username-sam3-api.hf.space/api/sam2_compatible",
-    json={
-        "data": [{
-            "inputs": {
-                "image": image_b64,
-                "boxes": [[100, 100, 200, 200]],
-                "confidence_threshold": 0.5
-            }
-        }]
-    }
-)
-result = response.json()
-    </pre>
-    """)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import spaces
 import gradio as gr
 @spaces.GPU
+def sam3_predict(image, text_prompt, confidence_threshold=0.5):
     """
+    SAM3 prediction function for Stateless GPU environment
+    All imports and CUDA operations happen here
     """
+    # Import everything inside the GPU function
+    import torch
+    import numpy as np
+    from PIL import Image
+    import base64
+    import io
+    from transformers import Sam3Model, Sam3Processor
     try:
+        # Handle base64 input if needed
+        if isinstance(image, str):
+            if image.startswith('data:image'):
+                image = image.split(',')[1]
+            image_bytes = base64.b64decode(image)
+            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        # Initialize model and processor
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = Sam3Model.from_pretrained(
             "facebook/sam3",
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
         ).to(device)
         processor = Sam3Processor.from_pretrained("facebook/sam3")
+        # Process input
         inputs = processor(
             images=image,
             text=text_prompt.strip(),
             if inputs[key].dtype == torch.float32:
                 inputs[key] = inputs[key].to(model.dtype)
+        # Run inference
         with torch.no_grad():
             outputs = model(**inputs)
+        # Post-process
         results = processor.post_process_instance_segmentation(
             outputs,
             threshold=confidence_threshold,
             target_sizes=inputs.get("original_sizes").tolist()
         )[0]
+        # Return results for UI
+        if len(results["masks"]) > 0:
+            # Convert first mask for display
+            mask_np = results["masks"][0].cpu().numpy().astype(np.uint8) * 255
+            score = results["scores"][0].item()
+            mask_image = Image.fromarray(mask_np, mode='L')
+            return f"Found {len(results['masks'])} masks. Best score: {score:.3f}", mask_image
+        else:
+            return "No masks found above confidence threshold", None
     except Exception as e:
+        return f"Error: {str(e)}", None
+# Simple gradio interface - no class, no global state
+def create_interface():
+    with gr.Blocks(title="SAM3 Inference") as demo:
+        gr.HTML("<h1>SAM3 Promptable Concept Segmentation</h1>")
+        with gr.Row():
+            with gr.Column():
+                image_input = gr.Image(type="pil", label="Input Image")
+                text_input = gr.Textbox(label="Text Prompt", placeholder="Enter what to segment (e.g., 'cat', 'person', 'car')")
+                confidence_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Confidence Threshold")
+                predict_btn = gr.Button("Segment", variant="primary")
+            with gr.Column():
+                info_output = gr.Textbox(label="Results Info")
+                mask_output = gr.Image(label="Sample Mask")
+        predict_btn.click(
+            sam3_predict,
+            inputs=[image_input, text_input, confidence_slider],
+            outputs=[info_output, mask_output]
+        )
+    return demo
 if __name__ == "__main__":
+    demo = create_interface()
     demo.launch(server_name="0.0.0.0", server_port=7860)

test_minimal.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import spaces
+@spaces.GPU
+def test_gpu():
+    import torch
+    from transformers import Sam3Model, Sam3Processor
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Test GPU function works! Device: {device}")
+    return f"GPU test successful on {device}"
+if __name__ == "__main__":
+    print("Starting minimal test...")
+    result = test_gpu()
+    print(result)