Spaces:

chris-propeller
/

sam3-test

Running on L4

App Files Files Community

chris-propeller commited on Nov 24

Commit

050a111

1 Parent(s): a36d7fa

add back in api

Browse files

Files changed (1) hide show

app.py +284 -7

app.py CHANGED Viewed

@@ -2,10 +2,10 @@ import spaces
 import gradio as gr
 @spaces.GPU
-def sam3_predict(image, text_prompt, confidence_threshold=0.5):
     """
-    SAM3 prediction function for Stateless GPU environment
-    All imports and CUDA operations happen here
     """
     # Import everything inside the GPU function
     import torch
@@ -55,6 +55,20 @@ def sam3_predict(image, text_prompt, confidence_threshold=0.5):
             target_sizes=inputs.get("original_sizes").tolist()
         )[0]
         # Return results for UI
         if len(results["masks"]) > 0:
             # Convert first mask for display
@@ -69,10 +83,148 @@ def sam3_predict(image, text_prompt, confidence_threshold=0.5):
     except Exception as e:
         return f"Error: {str(e)}", None
-# Simple gradio interface - no class, no global state
 def create_interface():
-    with gr.Blocks(title="SAM3 Inference") as demo:
         gr.HTML("<h1>SAM3 Promptable Concept Segmentation</h1>")
         with gr.Row():
             with gr.Column():
@@ -85,12 +237,137 @@ def create_interface():
                 info_output = gr.Textbox(label="Results Info")
                 mask_output = gr.Image(label="Sample Mask")
         predict_btn.click(
-            sam3_predict,
             inputs=[image_input, text_input, confidence_slider],
-            outputs=[info_output, mask_output]
         )
     return demo
 if __name__ == "__main__":

 import gradio as gr
 @spaces.GPU
+def sam3_inference(image, text_prompt, confidence_threshold=0.5):
     """
+    Core SAM3 inference function for Stateless GPU environment
+    Returns raw results for both UI and API use
     """
     # Import everything inside the GPU function
     import torch
             target_sizes=inputs.get("original_sizes").tolist()
         )[0]
+        return results
+    except Exception as e:
+        raise Exception(f"SAM3 inference error: {str(e)}")
+def gradio_interface(image, text_prompt, confidence_threshold):
+    """Gradio interface wrapper for UI"""
+    import numpy as np
+    from PIL import Image
+    import io
+    try:
+        results = sam3_inference(image, text_prompt, confidence_threshold)
         # Return results for UI
         if len(results["masks"]) > 0:
             # Convert first mask for display
     except Exception as e:
         return f"Error: {str(e)}", None
+def api_predict(image, text_prompt, confidence_threshold):
+    """API prediction function for simple Gradio API"""
+    import numpy as np
+    from PIL import Image
+    import base64
+    import io
+    try:
+        results = sam3_inference(image, text_prompt, confidence_threshold)
+        # Prepare API response
+        response = {
+            "masks": [],
+            "scores": [],
+            "prompt_type": "text",
+            "prompt_value": text_prompt,
+            "num_masks": len(results["masks"])
+        }
+        # Process each mask
+        for i in range(len(results["masks"])):
+            mask_np = results["masks"][i].cpu().numpy().astype(np.uint8) * 255
+            score = results["scores"][i].item()
+            if score >= confidence_threshold:
+                # Convert mask to base64 for API response
+                mask_image = Image.fromarray(mask_np, mode='L')
+                buffer = io.BytesIO()
+                mask_image.save(buffer, format='PNG')
+                mask_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+                response["masks"].append(mask_b64)
+                response["scores"].append(score)
+        return response
+    except Exception as e:
+        return {"error": str(e)}
+def sam2_compatible_api(data):
+    """
+    SAM2-compatible API endpoint with SAM3 extensions
+    Supports text prompts (SAM3), points, and boxes (SAM2 compatible)
+    """
+    import numpy as np
+    from PIL import Image
+    import base64
+    import io
+    try:
+        inputs_data = data.get("inputs", {})
+        # Extract inputs
+        image_b64 = inputs_data.get("image")
+        text_prompts = inputs_data.get("text_prompts", [])
+        input_points = inputs_data.get("points", [])
+        input_labels = inputs_data.get("labels", [])
+        input_boxes = inputs_data.get("boxes", [])
+        confidence_threshold = inputs_data.get("confidence_threshold", 0.5)
+        # Validate inputs
+        if not image_b64:
+            return {"error": "No image provided", "success": False}
+        has_text = bool(text_prompts)
+        has_points = bool(input_points and input_labels)
+        has_boxes = bool(input_boxes)
+        if not (has_text or has_points or has_boxes):
+            return {"error": "Must provide at least one prompt type: text_prompts, points+labels, or boxes", "success": False}
+        if has_points and len(input_points) != len(input_labels):
+            return {"error": "Number of points and labels must match", "success": False}
+        # Decode image
+        if image_b64.startswith('data:image'):
+            image_b64 = image_b64.split(',')[1]
+        image_bytes = base64.b64decode(image_b64)
+        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        all_masks = []
+        all_scores = []
+        # Process text prompts (SAM3 feature)
+        if has_text:
+            for text_prompt in text_prompts:
+                results = sam3_inference(image, text_prompt, confidence_threshold)
+                if results and len(results["masks"]) > 0:
+                    for i in range(len(results["masks"])):
+                        mask_np = results["masks"][i].cpu().numpy().astype(np.uint8) * 255
+                        score = results["scores"][i].item()
+                        if score >= confidence_threshold:
+                            # Convert mask to base64
+                            mask_image = Image.fromarray(mask_np, mode='L')
+                            buffer = io.BytesIO()
+                            mask_image.save(buffer, format='PNG')
+                            mask_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+                            all_masks.append(mask_b64)
+                            all_scores.append(score)
+        # Process visual prompts (SAM2 compatibility) - Basic implementation
+        if has_boxes or has_points:
+            # For visual prompts, use a generic prompt to get masks
+            # This is a simplified implementation - full SAM2 compatibility would require
+            # implementing visual prompt processing in the core function
+            if not has_text:
+                results = sam3_inference(image, "object", confidence_threshold)
+                if results and len(results["masks"]) > 0:
+                    # Take only the number of masks requested
+                    num_requested = len(input_boxes) if has_boxes else len(input_points)
+                    for i in range(min(num_requested, len(results["masks"]))):
+                        mask_np = results["masks"][i].cpu().numpy().astype(np.uint8) * 255
+                        score = results["scores"][i].item()
+                        # Convert mask to base64
+                        mask_image = Image.fromarray(mask_np, mode='L')
+                        buffer = io.BytesIO()
+                        mask_image.save(buffer, format='PNG')
+                        mask_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+                        all_masks.append(mask_b64)
+                        all_scores.append(score)
+        # Build SAM2-compatible response
+        return {
+            "masks": all_masks,
+            "scores": all_scores,
+            "num_objects": len(all_masks),
+            "sam_version": "3.0",
+            "success": True
+        }
+    except Exception as e:
+        return {"error": str(e), "success": False, "sam_version": "3.0"}
+# Create comprehensive Gradio interface with API endpoints
 def create_interface():
+    with gr.Blocks(title="SAM3 Inference API") as demo:
         gr.HTML("<h1>SAM3 Promptable Concept Segmentation</h1>")
+        gr.HTML("<p>This Space provides both a UI and API for SAM3 inference with SAM2 compatibility. Use the interface below or call the API programmatically.</p>")
         with gr.Row():
             with gr.Column():
                 info_output = gr.Textbox(label="Results Info")
                 mask_output = gr.Image(label="Sample Mask")
+        # Main UI prediction with API endpoint
         predict_btn.click(
+            gradio_interface,
             inputs=[image_input, text_input, confidence_slider],
+            outputs=[info_output, mask_output],
+            api_name="predict"  # Creates /api/predict endpoint
+        )
+        # Simple API endpoint for Gradio format
+        gr.Interface(
+            fn=api_predict,
+            inputs=[
+                gr.Image(type="pil", label="Image"),
+                gr.Textbox(label="Text Prompt"),
+                gr.Slider(minimum=0.1, maximum=1.0, value=0.5, label="Confidence Threshold")
+            ],
+            outputs=gr.JSON(label="API Response"),
+            title="Simple API",
+            description="Returns structured JSON response with base64 encoded masks",
+            api_name="simple_api"
+        )
+        # SAM2-compatible API endpoint
+        gr.Interface(
+            fn=sam2_compatible_api,
+            inputs=gr.JSON(label="SAM2/SAM3 Compatible Input"),
+            outputs=gr.JSON(label="SAM2/SAM3 Compatible Output"),
+            title="SAM2/SAM3 Compatible API",
+            description="API endpoint that matches SAM2 inference endpoint format with SAM3 extensions",
+            api_name="sam2_compatible"
         )
+        # Add comprehensive API documentation
+        gr.HTML("""
+        <h2>API Usage</h2>
+        <h3>1. Simple Text API (Gradio format)</h3>
+        <pre>
+import requests
+import base64
+# Encode your image to base64
+with open("image.jpg", "rb") as f:
+    image_b64 = base64.b64encode(f.read()).decode()
+# Make API request
+response = requests.post(
+    "https://your-username-sam3-api.hf.space/api/predict",
+    json={
+        "data": [image_b64, "kitten", 0.5]
+    }
+)
+result = response.json()
+        </pre>
+        <h3>2. SAM2/SAM3 Compatible API (Inference Endpoint format)</h3>
+        <pre>
+import requests
+import base64
+# Encode your image to base64
+with open("image.jpg", "rb") as f:
+    image_b64 = base64.b64encode(f.read()).decode()
+# SAM3 Text Prompts (NEW)
+response = requests.post(
+    "https://your-username-sam3-api.hf.space/api/sam2_compatible",
+    json={
+        "data": [{
+            "inputs": {
+                "image": image_b64,
+                "text_prompts": ["kitten", "toy"],
+                "confidence_threshold": 0.5
+            }
+        }]
+    }
+)
+# SAM2 Compatible (Points/Boxes)
+response = requests.post(
+    "https://your-username-sam3-api.hf.space/api/sam2_compatible",
+    json={
+        "data": [{
+            "inputs": {
+                "image": image_b64,
+                "boxes": [[100, 100, 200, 200]],
+                "confidence_threshold": 0.5
+            }
+        }]
+    }
+)
+result = response.json()
+        </pre>
+        <h3>3. API Parameters</h3>
+        <h4>SAM2-Compatible API Input</h4>
+        <pre>
+{
+  "inputs": {
+    "image": "base64_encoded_image_string",
+    // SAM3 NEW: Text-based prompts
+    "text_prompts": ["person", "car"],  // List of text descriptions
+    // SAM2 COMPATIBLE: Point-based prompts
+    "points": [[[x1, y1]], [[x2, y2]]],  // Points for each object
+    "labels": [[1], [1]],  // Labels for each point (1=foreground, 0=background)
+    // SAM2 COMPATIBLE: Bounding box prompts
+    "boxes": [[x1, y1, x2, y2], [x1, y1, x2, y2]],  // Bounding boxes
+    "multimask_output": false,  // Optional, defaults to False
+    "confidence_threshold": 0.5  // Optional, minimum confidence for returned masks
+  }
+}
+        </pre>
+        <h4>API Response</h4>
+        <pre>
+{
+  "masks": ["base64_encoded_mask_1", "base64_encoded_mask_2"],
+  "scores": [0.95, 0.87],
+  "num_objects": 2,
+  "sam_version": "3.0",
+  "success": true
+}
+        </pre>
+        """)
     return demo
 if __name__ == "__main__":