ankandrew
commited on
Commit
·
cece0ec
1
Parent(s):
e8a0cf2
Minor fix
Browse files
app.py
CHANGED
|
@@ -7,10 +7,9 @@ import gradio as gr
|
|
| 7 |
import spaces
|
| 8 |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
| 9 |
from qwen_vl_utils import process_vision_info
|
| 10 |
-
from transformers.utils import is_flash_attn_2_available
|
| 11 |
|
| 12 |
subprocess.run(
|
| 13 |
-
"pip install
|
| 14 |
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
| 15 |
shell=True,
|
| 16 |
)
|
|
@@ -49,7 +48,6 @@ def run_inference(model_key, input_type, text, image, video, fps, system_prompt,
|
|
| 49 |
model_id,
|
| 50 |
torch_dtype="auto",
|
| 51 |
device_map="auto",
|
| 52 |
-
attn_implementation="flash_attention_2" if is_flash_attn_2_available() else None,
|
| 53 |
)
|
| 54 |
processor = AutoProcessor.from_pretrained(model_id)
|
| 55 |
|
|
|
|
| 7 |
import spaces
|
| 8 |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
| 9 |
from qwen_vl_utils import process_vision_info
|
|
|
|
| 10 |
|
| 11 |
subprocess.run(
|
| 12 |
+
"pip install flash-attn --no-build-isolation",
|
| 13 |
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
| 14 |
shell=True,
|
| 15 |
)
|
|
|
|
| 48 |
model_id,
|
| 49 |
torch_dtype="auto",
|
| 50 |
device_map="auto",
|
|
|
|
| 51 |
)
|
| 52 |
processor = AutoProcessor.from_pretrained(model_id)
|
| 53 |
|