ankandrew
commited on
Commit
·
f17ef4c
1
Parent(s):
c4f32fc
Add Info toast message w/ time taken
Browse files
app.py
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
|
|
| 1 |
import subprocess
|
|
|
|
|
|
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import spaces
|
| 4 |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
|
@@ -20,6 +24,21 @@ MODEL_NAMES = {
|
|
| 20 |
}
|
| 21 |
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
@spaces.GPU(duration=300)
|
| 24 |
def run_inference(model_key, input_type, text, image, video, fps, system_prompt, add_vision_id):
|
| 25 |
"""
|
|
@@ -78,10 +97,14 @@ def run_inference(model_key, input_type, text, image, video, fps, system_prompt,
|
|
| 78 |
)
|
| 79 |
inputs = inputs.to(model.device)
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
|
| 87 |
# Build Gradio interface
|
|
|
|
| 1 |
+
import contextlib
|
| 2 |
import subprocess
|
| 3 |
+
import time
|
| 4 |
+
from typing import Iterator, Callable
|
| 5 |
+
|
| 6 |
import gradio as gr
|
| 7 |
import spaces
|
| 8 |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
|
|
|
| 24 |
}
|
| 25 |
|
| 26 |
|
| 27 |
+
@contextlib.contextmanager
|
| 28 |
+
def measure_time() -> Iterator[Callable[[], float]]:
|
| 29 |
+
"""
|
| 30 |
+
A context manager for measuring execution time (in seconds) within its code block.
|
| 31 |
+
|
| 32 |
+
usage:
|
| 33 |
+
with code_timer() as timer:
|
| 34 |
+
# Code snippet to be timed
|
| 35 |
+
print(f"Code took: {timer()} seconds")
|
| 36 |
+
"""
|
| 37 |
+
start_time = end_time = time.perf_counter()
|
| 38 |
+
yield lambda: end_time - start_time
|
| 39 |
+
end_time = time.perf_counter()
|
| 40 |
+
|
| 41 |
+
|
| 42 |
@spaces.GPU(duration=300)
|
| 43 |
def run_inference(model_key, input_type, text, image, video, fps, system_prompt, add_vision_id):
|
| 44 |
"""
|
|
|
|
| 97 |
)
|
| 98 |
inputs = inputs.to(model.device)
|
| 99 |
|
| 100 |
+
with measure_time() as timer:
|
| 101 |
+
gen_ids = model.generate(**inputs, max_new_tokens=512)
|
| 102 |
+
# Trim the prompt tokens
|
| 103 |
+
trimmed = [out_ids[len(inp_ids):] for inp_ids, out_ids in zip(inputs.input_ids, gen_ids)]
|
| 104 |
+
result = processor.batch_decode(trimmed, skip_special_tokens=True)[0]
|
| 105 |
+
|
| 106 |
+
gr.Info(f"Finished in {timer():.2f}s", title="Success", duration=5) # green-style info toast :contentReference[oaicite:0]{index=0}
|
| 107 |
+
return result
|
| 108 |
|
| 109 |
|
| 110 |
# Build Gradio interface
|