Spaces:

TH9817
/

aa

Runtime error

App Files Files Community

TH9817 commited on Oct 31, 2024

Commit

7741a53

verified ·

1 Parent(s): 7c8d6c5

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -12

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 import numpy as np
 from huggingface_hub import hf_hub_download
 from transformers import BitsAndBytesConfig, LlavaNextVideoForConditionalGeneration, LlavaNextVideoProcessor
 quantization_config = BitsAndBytesConfig(
@@ -40,11 +41,10 @@ def read_video_pyav(container, indices):
             frames.append(frame)
     return np.stack([x.to_ndarray(format="rgb24") for x in frames])
-from huggingface_hub import hf_hub_download
 # Download video from the hub
 video_path_1 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
-video_path_2 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="karate.mp4", repo_type="dataset")
 container = av.open(video_path_1)
@@ -54,12 +54,12 @@ indices = np.arange(0, total_frames, total_frames / 8).astype(int)
 clip_baby = read_video_pyav(container, indices)
-container = av.open(video_path_2)
 # sample uniformly 8 frames from the video (we can sample more for longer videos)
-total_frames = container.streams.video[0].frames
-indices = np.arange(0, total_frames, total_frames / 8).astype(int)
-clip_karate = read_video_pyav(container, indices)
 # Each "content" is a list of dicts and you can add image/video/text modalities
 conversation = [
@@ -83,13 +83,23 @@ conversation_2 = [
 ]
 prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
-prompt_2 = processor.apply_chat_template(conversation_2, add_generation_prompt=True)
-inputs = processor([prompt, prompt_2], videos=[clip_baby, clip_karate], padding=True, return_tensors="pt").to(model.device)
-generate_kwargs = {"max_new_tokens": 100, "do_sample": True, "top_p": 0.9}
-output = model.generate(**inputs, **generate_kwargs)
-generated_text = processor.batch_decode(output, skip_special_tokens=True)
-print(generated_text)

 import numpy as np
 from huggingface_hub import hf_hub_download
 from transformers import BitsAndBytesConfig, LlavaNextVideoForConditionalGeneration, LlavaNextVideoProcessor
+import gradio as gr
 quantization_config = BitsAndBytesConfig(
             frames.append(frame)
     return np.stack([x.to_ndarray(format="rgb24") for x in frames])
 # Download video from the hub
 video_path_1 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
+#video_path_2 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="karate.mp4", repo_type="dataset")
 container = av.open(video_path_1)
 clip_baby = read_video_pyav(container, indices)
+#container = av.open(video_path_2)
 # sample uniformly 8 frames from the video (we can sample more for longer videos)
+#total_frames = container.streams.video[0].frames
+#indices = np.arange(0, total_frames, total_frames / 8).astype(int)
+#clip_karate = read_video_pyav(container, indices)
 # Each "content" is a list of dicts and you can add image/video/text modalities
 conversation = [
 ]
 prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
+#prompt_2 = processor.apply_chat_template(conversation_2, add_generation_prompt=True)
+inputs = processor(prompt, videos=clip_baby, padding=True, return_tensors="pt").to(model.device)
+def chat(i):
+    generate_kwargs = {"max_new_tokens": i, "do_sample": True, "top_p": 0.9}
+    output = model.generate(**inputs, **generate_kwargs)
+    generated_text = processor.batch_decode(output, skip_special_tokens=True)
+    return"answer"+generated_text
+demo = gr.Interface(
+    fn=chat,
+    inputs=[gr.Slider(100,300)],
+    outputs=["text"],
+)
+# 起動
+demo.launch()