Derr11 commited on
Commit
5d70b88
·
verified ·
1 Parent(s): ecbe307

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -66
app.py CHANGED
@@ -1,81 +1,84 @@
1
- import gradio as gr
2
- from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
3
  import torch
4
- import spaces # مكتبة ZeroGPU
 
 
5
 
6
- # 1. إعدادات النموذج (Qwen3-Omni-Thinking)
7
- MODEL_ID = "Qwen/Qwen3-Omni-30B-A3B-Thinking"
8
 
9
- print(f"جاري تحميل النموذج العملاق {MODEL_ID}... هذا سيستغرق بضعة دقائق.")
10
-
11
- # إعداد الضغط (4-bit Quantization) لتناسب ذاكرة ZeroGPU
12
- nf4_config = BitsAndBytesConfig(
13
- load_in_4bit=True,
14
- bnb_4bit_quant_type="nf4",
15
- bnb_4bit_use_double_quant=True,
16
- bnb_4bit_compute_dtype=torch.bfloat16
17
  )
18
 
19
- # تحميل الـ Tokenizer
20
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
21
-
22
- # تحميل النموذج مع الضغط
23
- # التعديل هنا: استخدام AutoModel بدلاً من AutoModelForCausalLM
24
- model = AutoModel.from_pretrained(
25
- MODEL_ID,
26
- quantization_config=nf4_config,
27
- device_map="auto",
28
  trust_remote_code=True
29
  )
30
 
31
- print("تم تحميل النموذج بنجاح! المعلم جاهز.")
 
 
 
 
 
 
 
 
 
32
 
33
- # 2. دالة التفكير والرد
34
- @spaces.GPU(duration=120)
35
- def chat_with_thinking_model(message, history):
36
- messages = []
37
-
38
- for user_msg, bot_msg in history:
39
- messages.append({"role": "user", "content": user_msg})
40
- messages.append({"role": "assistant", "content": bot_msg})
41
-
42
- messages.append({"role": "user", "content": message})
43
 
44
- text = tokenizer.apply_chat_template(
45
- messages,
46
- tokenize=False,
47
- add_generation_prompt=True
48
- )
49
-
50
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
51
 
52
- generated_ids = model.generate(
53
- **model_inputs,
54
- max_new_tokens=1024,
55
- temperature=0.7
56
- )
57
-
58
- generated_ids = [
59
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
60
- ]
61
 
62
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
63
- return response
 
 
 
 
 
 
 
64
 
65
- # 3. واجهة المستخدم
66
- custom_css = """
67
- #chatbot {min-height: 400px;}
68
- """
69
 
70
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
71
- gr.Markdown("# 🧠 Nasaq AI Tutor (Thinking Mode)")
72
- gr.Markdown("هذا النموذج يستخدم **Qwen3-Omni-Thinking**. ستلاحظ أنه قد يكتب خطوات تفكيره قبل الإجابة النهائية.")
73
-
74
- chatbot = gr.ChatInterface(
75
- fn=chat_with_thinking_model,
76
- examples=["اشرح لي النظرية النسبية وكأنني طفل في الخامسة", "حل المعادلة: س^2 + 5س + 6 = 0 مع الشرح"],
77
- cache_examples=False,
78
- )
 
 
 
 
 
 
 
 
79
 
80
- if __name__ == "__main__":
81
- demo.launch()
 
 
 
 
 
 
 
 
1
+ import soundfile as sf
 
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoProcessor
4
+ # تأكد أن ملف qwen_omni_utils.py موجود في نفس المجلد
5
+ from qwen_omni_utils import process_mm_info
6
 
7
+ MODEL_PATH = "Qwen/Qwen3-Omni-30B-A3B-Instruct"
 
8
 
9
+ # 1. استخدام AutoModel بدلاً من الاسم المباشر وتفعيل trust_remote_code
10
+ print("Loading model...")
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ MODEL_PATH,
13
+ dtype="auto",
14
+ device_map="auto",
15
+ attn_implementation="flash_attention_2",
16
+ trust_remote_code=True # ضروري جداً للنماذج الجديدة
17
  )
18
 
19
+ # 2. استخدام AutoProcessor وتفعيل trust_remote_code
20
+ print("Loading processor...")
21
+ processor = AutoProcessor.from_pretrained(
22
+ MODEL_PATH,
 
 
 
 
 
23
  trust_remote_code=True
24
  )
25
 
26
+ conversation = [
27
+ {
28
+ "role": "user",
29
+ "content": [
30
+ {"type": "image", "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-Omni/demo/cars.jpg"},
31
+ {"type": "audio", "audio": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-Omni/demo/cough.wav"},
32
+ {"type": "text", "text": "What can you see and hear? Answer in one short sentence."}
33
+ ],
34
+ },
35
+ ]
36
 
37
+ USE_AUDIO_IN_VIDEO = True
 
 
 
 
 
 
 
 
 
38
 
39
+ # Preparation for inference
40
+ print("Processing inputs...")
41
+ text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
 
 
 
 
42
 
43
+ # تأكد أن دالة process_mm_info تعمل بشكل صحيح مع الروابط
44
+ audios, images, videos = process_mm_info(conversation, use_audio_in_video=USE_AUDIO_IN_VIDEO)
 
 
 
 
 
 
 
45
 
46
+ inputs = processor(
47
+ text=text,
48
+ audio=audios,
49
+ images=images,
50
+ videos=videos,
51
+ return_tensors="pt",
52
+ padding=True,
53
+ use_audio_in_video=USE_AUDIO_IN_VIDEO
54
+ )
55
 
56
+ inputs = inputs.to(model.device).to(model.dtype)
 
 
 
57
 
58
+ # Inference
59
+ print("Generating...")
60
+ text_ids, audio = model.generate(
61
+ **inputs,
62
+ speaker="Ethan",
63
+ thinker_return_dict_in_generate=True,
64
+ use_audio_in_video=USE_AUDIO_IN_VIDEO
65
+ )
66
+
67
+ # Decoding text
68
+ output_text = processor.batch_decode(
69
+ text_ids.sequences[:, inputs["input_ids"].shape[1] :],
70
+ skip_special_tokens=True,
71
+ clean_up_tokenization_spaces=False
72
+ )
73
+
74
+ print(f"Output Text: {output_text}")
75
 
76
+ # Saving audio
77
+ if audio is not None:
78
+ print("Saving audio to output.wav...")
79
+ sf.write(
80
+ "output.wav",
81
+ audio.reshape(-1).detach().cpu().numpy(),
82
+ samplerate=24000,
83
+ )
84
+ print("Done.")