Spaces:

JacobLinCool
/

tmp-service

Sleeping

App Files Files Community

JacobLinCool commited on about 1 month ago

Commit

7c94b61

verified ·

1 Parent(s): 59122de

Create app.py

Browse files

Files changed (1) hide show

app.py +243 -0

app.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import spaces
+from transformers import (
+    AutoImageProcessor,
+    AutoModelForCausalLM,
+)
+import gradio as gr
+import torch
+from accelerate import Accelerator
+import numpy as np
+import cv2
+from PIL import Image
+import zipfile
+import io
+import tempfile
+import os
+DEVICE = Accelerator().device
+MODEL_NAME = "qihoo360/fg-clip2-so400m"
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True).to(
+    DEVICE
+)
+image_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
+def determine_max_value(image):
+    """Determine max_num_patches based on image size."""
+    w, h = image.size
+    max_val = (w // 16) * (h // 16)
+    if max_val > 784:
+        return 1024
+    elif max_val > 576:
+        return 784
+    elif max_val > 256:
+        return 576
+    elif max_val > 128:
+        return 256
+    else:
+        return 128
+@spaces.GPU
+def generate_image_embeddings(zip_file):
+    """
+    Generate embeddings from images in a zip file.
+    Args:
+        zip_file: Uploaded zip file containing images
+    Returns:
+        Tuple of (embeddings as numpy file, status message)
+    """
+    try:
+        # Extract images from zip
+        images = []
+        with zipfile.ZipFile(zip_file.name, "r") as zip_ref:
+            for file_info in zip_ref.filelist:
+                if file_info.filename.lower().endswith(
+                    (".png", ".jpg", ".jpeg", ".bmp", ".webp")
+                ):
+                    with zip_ref.open(file_info) as img_file:
+                        img = Image.open(io.BytesIO(img_file.read())).convert("RGB")
+                        images.append(img)
+        if len(images) == 0:
+            return None, "❌ No valid images found in the zip file"
+        # Generate embeddings
+        embeddings = []
+        with torch.no_grad():
+            for i, image in enumerate(images):
+                image_input = image_processor(
+                    images=image,
+                    max_num_patches=determine_max_value(image),
+                    return_tensors="pt",
+                ).to(DEVICE)
+                image_feature = model.get_image_features(**image_input)
+                # Normalize the embedding
+                normalized_features = image_feature / image_feature.norm(
+                    dim=-1, keepdim=True
+                )
+                embeddings.append(normalized_features.cpu().numpy())
+        embeddings = np.vstack(embeddings)
+        # Save embeddings to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".npy") as tmp:
+            np.save(tmp.name, embeddings)
+            output_path = tmp.name
+        message = f"✅ Successfully generated embeddings for {len(images)} images\nShape: {embeddings.shape}"
+        return output_path, message
+    except Exception as e:
+        return None, f"❌ Error: {str(e)}"
+def extract_frames(video_path: str, fps: int = 4):
+    """
+    Extract frames from video at specified fps.
+    Args:
+        video_path: Path to the video file
+        fps: Frames per second to sample
+    Returns:
+        List of PIL Images
+    """
+    cap = cv2.VideoCapture(video_path)
+    video_fps = cap.get(cv2.CAP_PROP_FPS)
+    frame_interval = int(round(video_fps) / fps)
+    frames = []
+    frame_count = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if frame_count % frame_interval == 0:
+            # Convert BGR to RGB
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            pil_image = Image.fromarray(frame_rgb)
+            frames.append(pil_image)
+        frame_count += 1
+    cap.release()
+    return frames
+@spaces.GPU
+def generate_video_embeddings(video_file, fps):
+    """
+    Generate embeddings from video frames.
+    Args:
+        video_file: Uploaded video file
+        fps: Frames per second to extract
+    Returns:
+        Tuple of (embeddings as numpy file, status message)
+    """
+    try:
+        # Extract frames
+        frames = extract_frames(video_file.name, fps)
+        if len(frames) == 0:
+            return None, "❌ No frames could be extracted from the video"
+        # Generate embeddings
+        embeddings = []
+        with torch.no_grad():
+            for i, frame in enumerate(frames):
+                image_input = image_processor(
+                    images=frame,
+                    max_num_patches=determine_max_value(frame),
+                    return_tensors="pt",
+                ).to(DEVICE)
+                image_feature = model.get_image_features(**image_input)
+                # Normalize the embedding
+                normalized_features = image_feature / image_feature.norm(
+                    dim=-1, keepdim=True
+                )
+                embeddings.append(normalized_features.cpu().numpy())
+        embeddings = np.vstack(embeddings)
+        # Save embeddings to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".npy") as tmp:
+            np.save(tmp.name, embeddings)
+            output_path = tmp.name
+        message = f"✅ Successfully generated embeddings for {len(frames)} frames (extracted at {fps} fps)\nShape: {embeddings.shape}"
+        return output_path, message
+    except Exception as e:
+        return None, f"❌ Error: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="Video & Image Embedding Generator") as demo:
+    gr.Markdown("# 🎬 Video & Image Embedding Generator")
+    gr.Markdown(f"Generate embeddings using **{MODEL_NAME}** model")
+    with gr.Tab("📦 Images from ZIP"):
+        gr.Markdown("Upload a ZIP file containing images to generate embeddings")
+        with gr.Row():
+            with gr.Column():
+                zip_input = gr.File(label="Upload ZIP file", file_types=[".zip"])
+                img_submit_btn = gr.Button("Generate Embeddings", variant="primary")
+            with gr.Column():
+                img_output = gr.File(label="Download Embeddings (.npy)")
+                img_status = gr.Textbox(label="Status", lines=3)
+        img_submit_btn.click(
+            fn=generate_image_embeddings,
+            inputs=[zip_input],
+            outputs=[img_output, img_status],
+        )
+    with gr.Tab("🎥 Video Frames"):
+        gr.Markdown(
+            "Upload a video and specify FPS to extract frames and generate embeddings"
+        )
+        with gr.Row():
+            with gr.Column():
+                video_input = gr.Video(label="Upload Video")
+                fps_input = gr.Slider(
+                    minimum=1,
+                    maximum=30,
+                    value=4,
+                    step=1,
+                    label="Frames per Second (FPS)",
+                )
+                vid_submit_btn = gr.Button("Generate Embeddings", variant="primary")
+            with gr.Column():
+                vid_output = gr.File(label="Download Embeddings (.npy)")
+                vid_status = gr.Textbox(label="Status", lines=3)
+        vid_submit_btn.click(
+            fn=generate_video_embeddings,
+            inputs=[video_input, fps_input],
+            outputs=[vid_output, vid_status],
+        )
+    gr.Markdown(
+        """
+    ### 📝 Notes:
+    - Images in ZIP: Supports PNG, JPG, JPEG, BMP, WEBP formats
+    - Video: Supports common video formats (MP4, AVI, MOV, etc.)
+    - Output: NumPy array file (.npy) containing normalized embeddings
+    - Load embeddings: `embeddings = np.load('embeddings.npy')`
+    """
+    )
+if __name__ == "__main__":
+    demo.launch()