Spaces:

fffiloni
/

soft-video-understanding

Paused

fffiloni commited on Mar 5, 2024

Commit

933471e

verified ·

1 Parent(s): 5090a91

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,10 @@ zephyr_model = "HuggingFaceH4/zephyr-7b-beta"
 pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat16, device_map="auto")
 standard_sys = f"""
 """
 def extract_frames(video_in, interval=24, output_format='.jpg'):
@@ -83,7 +86,7 @@ def extract_audio(video_path):
     return "output_audio.mp3"
 def get_salmonn(audio_in):
-    salmonn_prompt = "Describe sound."
     client = Client("fffiloni/SALMONN-7B-gradio")
     result = client.predict(
     		audio_in,	# filepath  in 'Audio' Audio component
@@ -141,9 +144,9 @@ def infer(video_in):
     print(formatted_captions)
     # Send formatted captions to LLM
-    #video_description_from_llm = llm_process(formatted_captions)
-    return formatted_captions
 with gr.Blocks() as demo :
     with gr.Column(elem_id="col-container"):

 pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat16, device_map="auto")
 standard_sys = f"""
+You will be provided a list of visual events, and an audio description. All these informations come from a single video.
+List of visual events are actually images extracted from this video every 12 frames.
+Audio events are actually the description from the audio of the video.
+Your job is to use these information to provide a short resume about what is happening in the video.
 """
 def extract_frames(video_in, interval=24, output_format='.jpg'):
     return "output_audio.mp3"
 def get_salmonn(audio_in):
+    salmonn_prompt = "Please describe the audio"
     client = Client("fffiloni/SALMONN-7B-gradio")
     result = client.predict(
     		audio_in,	# filepath  in 'Audio' Audio component
     print(formatted_captions)
     # Send formatted captions to LLM
+    video_description_from_llm = llm_process(formatted_captions)
+    return video_description_from_llm
 with gr.Blocks() as demo :
     with gr.Column(elem_id="col-container"):