import torch
import av
import numpy as np
from PIL import Image
import comfy.model_management
from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict
from comfy_api.latest import io


class LoadVideoFrame(ComfyNodeABC):
    """
    Extract a specific frame from a video tensor as an image.
    Takes a video tensor [B, T, H, W, C] and extracts frame at specified index.
    """
    
    @classmethod
    def INPUT_TYPES(cls) -> InputTypeDict:
        return {
            "required": {
                "video": (IO.VIDEO, {
                    "tooltip": "Video tensor [B, T, H, W, C]"
                }),
                "frame_index": (IO.INT, {
                    "default": 0,
                    "min": 0,
                    "max": 10000,
                    "step": 1,
                    "tooltip": "Frame index to extract (0 = first frame)"
                }),
            }
        }
    
    RETURN_TYPES = (IO.IMAGE,)
    RETURN_NAMES = ("image",)
    FUNCTION = "load_video_frame"
    CATEGORY = "image/loaders"
    DESCRIPTION = "Extract a specific frame from a video tensor as an image"
    
    def load_video_frame(self, video, frame_index):
        """
        Extract a specific frame from a video file.
        
        Args:
            video: VideoFromFile object
            frame_index: Index of the frame to extract (0-based)
            
        Returns:
            image: Extracted frame as image tensor [B, H, W, C]
        """
        try:
            # Get the video file source
            video_source = video.get_stream_source()
            
            # Open video file
            container = av.open(video_source)
            video_stream = container.streams.video[0]
            
            # Get total number of frames
            total_frames = video_stream.frames
            if total_frames is None:
                # If frame count is unknown, count them
                total_frames = sum(1 for _ in container.decode(video=0))
                container.close()
                container = av.open(video_source)
                video_stream = container.streams.video[0]
            
            # Validate frame index
            if frame_index >= total_frames:
                raise ValueError(f"Frame index {frame_index} is out of range. Video has {total_frames} frames.")
            
            # Seek to the desired frame
            container.seek(frame_index, stream=video_stream)
            
            # Decode the frame
            frame = None
            for frame in container.decode(video=0):
                break
            
            if frame is None:
                raise ValueError(f"Could not decode frame {frame_index}")
            
            # Convert to PIL Image
            pil_image = frame.to_image()
            
            # Convert to tensor
            image_array = np.array(pil_image).astype(np.float32) / 255.0
            image_tensor = torch.from_numpy(image_array).unsqueeze(0)  # Add batch dimension
            
            # Move to appropriate device
            device = comfy.model_management.get_torch_device()
            image_tensor = image_tensor.to(device)
            
            container.close()
            
            return (image_tensor,)
            
        except Exception as e:
            raise RuntimeError(f"Error extracting video frame: {str(e)}")
    

# Node class mappings
NODE_CLASS_MAPPINGS = {
    "LoadVideoFrame": LoadVideoFrame,
}

NODE_DISPLAY_NAME_MAPPINGS = {
    "LoadVideoFrame": "Load Video Frame",
}