Spaces:

baidu
/

ERNIE-4.5-VL-28B-A3B-Thinking

Running

App Files Files Community

LokeZhou commited on 29 days ago

Commit

72ae5ae

1 Parent(s): 6d89bbe

use openai client

Browse files

Files changed (2) hide show

app.py +81 -214
requirements.txt +1 -4

app.py CHANGED Viewed

@@ -1,231 +1,98 @@
-import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoProcessor,TextStreamer,TextIteratorStreamer
-from PIL import Image
 import base64
-import io
-import re
-from typing import Generator, List, Tuple, Optional
-import spaces
-import threading
-MAX_HISTORY=5
-model_path = 'baidu/ERNIE-4.5-VL-28B-A3B-Thinking'
-model = AutoModelForCausalLM.from_pretrained(
-    model_path,
-    device_map="auto",
-    torch_dtype=torch.bfloat16,
-    trust_remote_code=True
-)
-processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
-processor.eval()
-model.add_image_preprocess(processor)
-def encode_image(image: Image.Image) -> str:
-    if image is None:
-        return ""
-    buffer = io.BytesIO()
-    image.save(buffer, format="PNG")
-    return base64.b64encode(buffer.getvalue()).decode("utf-8")
-def extract_text_from_html(html: str) -> str:
-    text = re.sub(r'<img.*?>', '', html)
-    text = re.sub(r'<.*?>', '', text)
-    if text.startswith("user: "):
-        return text[6:].strip()
-    elif text.startswith("assistant: "):
-        return text[8:].strip()
-    return text.strip()
-@spaces.GPU(duration=120)
-def process_chat(
-    message: str,
-    image: Optional[Image.Image],
-    chat_history: List[Tuple[str, str, Optional[str]]],
-    max_new_tokens: int,
-    temperature: float
-) -> Generator[List[Tuple[str, str]], None, None]:
-    """处理聊天输入，流式生成回应"""
-    current_image_b64 = encode_image(image) if image else None
-    image_html = ""
-    if current_image_b64:
-        image_html = f'<br><img src="data:image/png;base64,{current_image_b64}" style="max-width:300px; border-radius:4px;">'
-    user_text = message
-    user_message_html = f"user: {user_text}{image_html}"
-    temp_history = chat_history + [(user_message_html, "", current_image_b64)]
-    model_messages = []
-    for hist in temp_history[:-1]:
-        user_html, assistant_text, hist_image_b64 = hist
-        user_text_clean = extract_text_from_html(user_html)
-        user_content=[]
-        if hist_image_b64:
-            user_content.insert(0, {"type": "image_url","image_url": {"url": hist_image_b64}})
-        else:
-            user_content.append({"type": "text", "text": ""})
-        model_messages.append({"role": "user", "content": user_content})
-        assistant_content=[{"type": "text", "text": assistant_text}]
-        model_messages.append({"role": "bot", "content": assistant_content})
-    current_user_content = [{"type": "text", "text": user_text}]
-    if current_image_b64:
-        current_user_content.insert(0, {"type": "image_url", "image_url": {"url":current_image_b64}})
-    model_messages.append({"role": "user", "content": current_user_content})
-    text = processor.tokenizer.apply_chat_template(
-        model_messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
-    )
-    image_inputs, video_inputs = processor.process_vision_info(model_messages)
-    inputs = processor(
-        text=[text],
-        images=image_inputs,
-        videos=video_inputs,
-        padding=True,
-        return_tensors="pt",
-    )
-    device = next(model.parameters()).device
-    inputs = inputs.to(device)
-    streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
-    generation_kwargs = {
-        **inputs,
-        "streamer": streamer,
-        "max_new_tokens": max_new_tokens,
-        "temperature": temperature,
-        "use_cache": False
-    }
-    thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    generated_text = ""
-    for new_token in streamer:
-        generated_text += new_token
-        temp_history[-1] = (user_message_html, f"assistant: {generated_text}", current_image_b64)
-        display_history = [(h[0], h[1]) for h in temp_history[-MAX_HISTORY:]]
-        yield display_history
-    thread.join()
-def chat_interface(
-    message: str,
-    image: Optional[Image.Image],
-    chat_history: List[Tuple[str, str, Optional[str]]],
-    max_new_tokens: int,
-    temperature: float
-) -> Generator[tuple, None, None]:
-    for updated_display_history in process_chat(message, image, chat_history, max_new_tokens, temperature):
-        updated_full_history = []
-        for i, display_item in enumerate(updated_display_history):
-            full_item = next((h for h in chat_history if h[0] == display_item[0] and h[1] == display_item[1]), None)
-            if full_item:
-                updated_full_history.append(full_item)
-            else:
-                if i == len(updated_display_history) - 1:
-                    img_b64 = encode_image(image) if image else None
-                    updated_full_history.append((display_item[0], display_item[1], img_b64))
-                else:
-                    updated_full_history.append((display_item[0], display_item[1], None))
-        yield "", None, updated_full_history, updated_display_history
-with gr.Blocks(title="ERNIE-4.5-VL-28B-A3B-Thinking", theme=gr.themes.Soft()) as demo:
-    full_chat_history = gr.State([])
-    with gr.Row():
-        with gr.Column(scale=3):
-            chat_display = gr.Chatbot(
-                label="chat_bot",
-                height=500,
-                bubble_full_width=False
-            )
-        with gr.Column(scale=1):
-            gr.Markdown("generation kwargs")
-            max_new_tokens = gr.Slider(
-                minimum=128, maximum=32768, value=8192, step=255,
-                label="max_new_token"
-            )
-            temperature = gr.Slider(
-                minimum=0.1, maximum=2.0, value=0.7, step=0.1,
-                label="temperature"
-            )
-            clear_btn = gr.Button("clear", variant="destructive")
-    with gr.Row():
-        text_input = gr.Textbox(
-            label="input text",
-            placeholder="input text messages...",
-            lines=2
         )
-        image_input = gr.Image(
-            label="input image",
-            placeholder="upload image...",
-            type="pil",
-            height=100
-        )
-        submit_btn = gr.Button("submit", variant="primary")
-    submit_btn.click(
-        fn=chat_interface,
-        inputs=[text_input, image_input, full_chat_history, max_new_tokens, temperature],
-        outputs=[text_input, image_input, full_chat_history, chat_display]
     )
-    text_input.submit(
-        fn=chat_interface,
-        inputs=[text_input, image_input, full_chat_history, max_new_tokens, temperature],
-        outputs=[text_input, image_input, full_chat_history, chat_display]
-    )
-    def clear_chat():
-        return [], []
-    clear_btn.click(
-        fn=clear_chat,
-        inputs=[],
-        outputs=[full_chat_history, chat_display]
-    )
 if __name__ == "__main__":
-    demo.launch()

 import base64
+import mimetypes
+import os
+from pathlib import Path
+from typing import Any, Dict, List
+import gradio as gr
+from openai import OpenAI
+DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "ERNIE-4.5-VL-28B-A3B-Thinking")
+api_key = os.getenv("OPENAI_API_KEY","")
+_client = OpenAI(
+    base_url="https://9d4as2f4m0e8f0a6.aistudio-app.com/v1/chat/completions",
+    api_key=api_key,
+)
+def _data_url(path: str) -> str:
+    mime, _ = mimetypes.guess_type(path)
+    mime = mime or "application/octet-stream"
+    data = base64.b64encode(Path(path).read_bytes()).decode("utf-8")
+    return f"data:{mime};base64,{data}"
+def _image_content(path: str) -> Dict[str, Any]:
+    return {"type": "image_url", "image_url": {"url": _data_url(path)}}
+def _text_content(text: str) -> Dict[str, Any]:
+    return {"type": "text", "text": text}
+def _message(role: str, content: Any) -> Dict[str, Any]:
+    return {"role": role, "content": content}
+def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
+    files = message.get("files") or []
+    text = (message.get("text") or "").strip()
+    content: List[Dict[str, Any]] = [_image_content(p) for p in files]
+    if text:
+        content.append(_text_content(text))
+    return _message("user", content)
+def _convert_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    msgs: List[Dict[str, Any]] = []
+    user_content: List[Dict[str, Any]] = []
+    for turn in history or []:
+        role, content = turn.get("role"), turn.get("content")
+        if role == "user":
+            if isinstance(content, str):
+                user_content.append(_text_content(content))
+            elif isinstance(content, tuple):
+                user_content.extend(_image_content(path) for path in content if path)
+        elif role == "bot" or role == "assistant":
+            msgs.append(_message("user", user_content.copy()))
+            user_content.clear()
+            content = [{"type": "text", "text": content}]
+            msgs.append(_message("bot", content))
+    return msgs
+def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], model_name: str = DEFAULT_MODEL):
+    messages = _convert_history(history)
+    messages.append(_build_user_message(message))
+    try:
+        stream = _client.chat.completions.create(
+            model=model_name,
+            messages=messages,
+            stream=True
         )
+        partial = ""
+        for chunk in stream:
+            delta = chunk.choices[0].delta.content
+            if delta:
+                partial += delta
+                yield partial
+    except Exception as e:
+        yield f"Failed to get response: {e}"
+def build_demo() -> gr.Blocks:
+    chatbot = gr.Chatbot(type="messages", allow_tags=["think"])
+    textbox = gr.MultimodalTextbox(
+        show_label=False,
+        placeholder="Enter text, or upload one or more images...",
+        file_types=["image"],
+        file_count="multiple"
     )
+    return gr.ChatInterface(
+        fn=stream_response,
+        type="messages",
+        multimodal=True,
+        chatbot=chatbot,
+        textbox=textbox,
+        title="ERNIE-4.5-VL-28B-A3B-Thinking",
+    ).queue(default_concurrency_limit=8)
 if __name__ == "__main__":
+    build_demo().launch(server_name="0.0.0.0", server_port=8100,share=False)

requirements.txt CHANGED Viewed

@@ -1,4 +1 @@
-transformers==4.57.1
-decord
-sentencepiece
-accelerate


1	+ openai