|
|
import base64 |
|
|
import html |
|
|
import mimetypes |
|
|
import os |
|
|
from pathlib import Path |
|
|
from typing import Any, Dict, List |
|
|
|
|
|
import gradio as gr |
|
|
from openai import OpenAI |
|
|
|
|
|
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "ERNIE-4.5-VL-28B-A3B-Thinking") |
|
|
BASE_URL = os.getenv("BASE_URL","") |
|
|
api_key = os.getenv("ERNIE_API_KEY","") |
|
|
|
|
|
|
|
|
CUSTOM_CSS = """ |
|
|
body { |
|
|
background: radial-gradient(circle at top, #fdfbff 0%, #e7ecf7 45%, #dfe6f5 100%); |
|
|
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Arial, sans-serif; |
|
|
color: #0f172a; |
|
|
} |
|
|
.gradio-container { |
|
|
max-width: 1200px !important; |
|
|
margin: 0 auto; |
|
|
} |
|
|
#ernie-hero { |
|
|
padding: 12px 0 4px; |
|
|
} |
|
|
#ernie-hero h1 { |
|
|
font-size: 1.85rem; |
|
|
margin-bottom: 0; |
|
|
font-weight: 500; |
|
|
} |
|
|
#model-link { |
|
|
margin-top: 6px; |
|
|
font-size: 0.95rem; |
|
|
} |
|
|
#model-link a { |
|
|
color: #4c1d95; |
|
|
text-decoration: none; |
|
|
font-weight: 500; |
|
|
} |
|
|
#model-link a:hover { |
|
|
text-decoration: underline; |
|
|
} |
|
|
#examples-panel { |
|
|
margin-top: 20px; |
|
|
padding: 18px 22px; |
|
|
border-radius: 18px; |
|
|
border: 1px solid rgba(15, 23, 42, 0.12); |
|
|
background: rgba(255, 255, 255, 0.92); |
|
|
box-shadow: 0 15px 35px rgba(15, 23, 42, 0.08); |
|
|
gap: 18px; |
|
|
} |
|
|
#examples-panel h4 { |
|
|
margin: 0 0 8px; |
|
|
font-size: 1.1rem; |
|
|
font-weight: 500; |
|
|
} |
|
|
#examples-panel p { |
|
|
margin: 0; |
|
|
color: rgba(15, 23, 42, 0.7); |
|
|
font-size: 0.95rem; |
|
|
} |
|
|
#examples-grid table { |
|
|
width: 100%; |
|
|
} |
|
|
#examples-grid table tbody { |
|
|
display: grid; |
|
|
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); |
|
|
gap: 12px; |
|
|
} |
|
|
#examples-grid table tr { |
|
|
display: block; |
|
|
background: #f7f9ff; |
|
|
border-radius: 14px; |
|
|
border: 1px solid rgba(15, 23, 42, 0.08); |
|
|
padding: 14px; |
|
|
box-shadow: 0 10px 28px rgba(15, 23, 42, 0.08); |
|
|
} |
|
|
#examples-grid table td { |
|
|
display: block; |
|
|
padding: 0; |
|
|
} |
|
|
#chat-wrapper { |
|
|
margin-top: 32px; |
|
|
border-radius: 24px; |
|
|
padding: 18px; |
|
|
background: rgba(255, 255, 255, 0.95); |
|
|
border: 1px solid rgba(15, 23, 42, 0.1); |
|
|
box-shadow: 0 25px 60px rgba(15, 23, 42, 0.12); |
|
|
} |
|
|
.ernie-section { |
|
|
border-radius: 18px; |
|
|
margin-bottom: 14px; |
|
|
padding: 16px 18px; |
|
|
border: 1px solid rgba(15, 23, 42, 0.1); |
|
|
background: rgba(255, 255, 255, 0.95); |
|
|
box-shadow: 0 10px 24px rgba(15, 23, 42, 0.08); |
|
|
} |
|
|
.ernie-section-header { |
|
|
font-size: 0.85rem; |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 0.08em; |
|
|
font-weight: 600; |
|
|
color: rgba(15, 23, 42, 0.65); |
|
|
display: flex; |
|
|
align-items: center; |
|
|
gap: 6px; |
|
|
} |
|
|
.ernie-section-body { |
|
|
margin-top: 10px; |
|
|
font-size: 1rem; |
|
|
color: rgba(15, 23, 42, 0.92); |
|
|
white-space: pre-wrap; |
|
|
line-height: 1.55; |
|
|
} |
|
|
.ernie-thinking { |
|
|
border-color: rgba(79, 70, 229, 0.35); |
|
|
background: rgba(129, 140, 248, 0.08); |
|
|
} |
|
|
.ernie-answer { |
|
|
border-color: rgba(16, 185, 129, 0.35); |
|
|
background: rgba(110, 231, 183, 0.08); |
|
|
} |
|
|
|
|
|
@media (prefers-color-scheme: dark) { |
|
|
body { |
|
|
background: radial-gradient(circle at top, #1f264b 0%, #0f172a 45%, #040713 100%); |
|
|
color: #ecf2ff; |
|
|
} |
|
|
#model-link a { |
|
|
color: #a5b4fc; |
|
|
} |
|
|
#examples-panel { |
|
|
border: 1px solid rgba(255, 255, 255, 0.05); |
|
|
background: rgba(8, 13, 30, 0.85); |
|
|
box-shadow: 0 15px 45px rgba(3, 7, 18, 0.55); |
|
|
} |
|
|
#examples-panel p { |
|
|
color: rgba(236, 242, 255, 0.75); |
|
|
} |
|
|
#examples-grid table tr { |
|
|
background: rgba(15, 23, 42, 0.7); |
|
|
border: 1px solid rgba(255, 255, 255, 0.04); |
|
|
box-shadow: 0 10px 30px rgba(4, 6, 15, 0.45); |
|
|
} |
|
|
#chat-wrapper { |
|
|
background: rgba(2, 6, 23, 0.78); |
|
|
border: 1px solid rgba(99, 102, 241, 0.25); |
|
|
box-shadow: 0 25px 70px rgba(2, 6, 23, 0.7); |
|
|
} |
|
|
.ernie-section { |
|
|
border: 1px solid rgba(255, 255, 255, 0.08); |
|
|
background: rgba(15, 23, 42, 0.85); |
|
|
box-shadow: 0 10px 30px rgba(2, 6, 23, 0.55); |
|
|
} |
|
|
.ernie-section-header { |
|
|
color: rgba(236, 242, 255, 0.75); |
|
|
} |
|
|
.ernie-section-body { |
|
|
color: rgba(248, 250, 255, 0.95); |
|
|
} |
|
|
.ernie-answer { |
|
|
border-color: rgba(45, 212, 191, 0.45); |
|
|
background: rgba(8, 47, 56, 0.65); |
|
|
} |
|
|
.ernie-thinking { |
|
|
border-color: rgba(165, 180, 252, 0.4); |
|
|
background: rgba(30, 27, 75, 0.65); |
|
|
} |
|
|
} |
|
|
""" |
|
|
|
|
|
_client = OpenAI( |
|
|
base_url=BASE_URL, |
|
|
api_key=api_key, |
|
|
) |
|
|
|
|
|
def _data_url(path: str) -> str: |
|
|
mime, _ = mimetypes.guess_type(path) |
|
|
mime = mime or "application/octet-stream" |
|
|
data = base64.b64encode(Path(path).read_bytes()).decode("utf-8") |
|
|
return f"data:{mime};base64,{data}" |
|
|
|
|
|
def _media_content(path: str) -> Dict[str, Any]: |
|
|
"""支持图片和视频""" |
|
|
mime, _ = mimetypes.guess_type(path) |
|
|
if mime and mime.startswith("video"): |
|
|
|
|
|
return {"type": "video_url", "video_url": {"url": _data_url(path)}} |
|
|
else: |
|
|
|
|
|
return {"type": "image_url", "image_url": {"url": _data_url(path)}} |
|
|
|
|
|
def _text_content(text: str) -> Dict[str, Any]: |
|
|
return {"type": "text", "text": text} |
|
|
|
|
|
def _message(role: str, content: Any) -> Dict[str, Any]: |
|
|
return {"role": role, "content": content} |
|
|
|
|
|
def _format_sections(thinking: str, answer: str | None = None) -> str: |
|
|
"""Render Thinking/Answer blocks with HTML so the chatbot can style them.""" |
|
|
def _build_block(kind: str, label: str, text: str, icon: str) -> str: |
|
|
text = (text or "").strip() |
|
|
if not text: |
|
|
return "" |
|
|
escaped = html.escape(text) |
|
|
return ( |
|
|
f'<div class="ernie-section ernie-{kind}">' |
|
|
f'<div class="ernie-section-header">{icon} {label}</div>' |
|
|
f'<div class="ernie-section-body">{escaped}</div>' |
|
|
"</div>" |
|
|
) |
|
|
|
|
|
sections = [ |
|
|
_build_block("thinking", "Thinking", thinking, "🧠"), |
|
|
_build_block("answer", "Answer", answer, "✨") if answer is not None else "", |
|
|
] |
|
|
rendered = "".join(section for section in sections if section) |
|
|
return rendered |
|
|
|
|
|
def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]: |
|
|
files = message.get("files") or [] |
|
|
text = (message.get("text") or "").strip() |
|
|
content: List[Dict[str, Any]] = [_media_content(p) for p in files] |
|
|
if text: |
|
|
content.append(_text_content(text)) |
|
|
return _message("user", content) |
|
|
|
|
|
def _convert_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]: |
|
|
msgs: List[Dict[str, Any]] = [] |
|
|
user_content: List[Dict[str, Any]] = [] |
|
|
|
|
|
for turn in history or []: |
|
|
role, content = turn.get("role"), turn.get("content") |
|
|
if role == "user": |
|
|
if isinstance(content, str): |
|
|
user_content.append(_text_content(content)) |
|
|
elif isinstance(content, tuple): |
|
|
user_content.extend(_media_content(path) for path in content if path) |
|
|
elif role == "assistant": |
|
|
if "Answer:\n" in content: |
|
|
|
|
|
answer_only = content.split("Answer:\n", 1)[1].strip() |
|
|
else: |
|
|
|
|
|
answer_only = content.strip() |
|
|
|
|
|
if user_content: |
|
|
msgs.append(_message("user", user_content.copy())) |
|
|
user_content.clear() |
|
|
|
|
|
msgs.append(_message("assistant", [{"type": "text", "text": answer_only}])) |
|
|
|
|
|
return msgs |
|
|
|
|
|
|
|
|
def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], model_name: str = DEFAULT_MODEL): |
|
|
messages = _convert_history(history) |
|
|
messages.append(_build_user_message(message)) |
|
|
|
|
|
try: |
|
|
stream = _client.chat.completions.create( |
|
|
model="default", |
|
|
messages=messages, |
|
|
stream=True |
|
|
) |
|
|
thinking_parts: List[str] = [] |
|
|
answer_parts: List[str] = [] |
|
|
answer_started = False |
|
|
|
|
|
for chunk in stream: |
|
|
delta = chunk.choices[0].delta |
|
|
|
|
|
if getattr(delta, "reasoning_content", None): |
|
|
thinking_parts.append(delta.reasoning_content) |
|
|
|
|
|
if getattr(delta, "content", None): |
|
|
answer_started = True |
|
|
answer_parts.append(delta.content) |
|
|
|
|
|
thinking_text = "".join(thinking_parts) |
|
|
answer_text = "".join(answer_parts) if answer_parts else None |
|
|
|
|
|
if answer_started: |
|
|
rendered = _format_sections(thinking_text, answer_text) |
|
|
else: |
|
|
rendered = _format_sections(thinking_text) |
|
|
|
|
|
if rendered: |
|
|
yield rendered |
|
|
|
|
|
if not answer_started and thinking_parts: |
|
|
|
|
|
rendered = _format_sections("".join(thinking_parts)) |
|
|
if rendered: |
|
|
yield rendered |
|
|
except Exception as e: |
|
|
yield f"Failed to get response: {e}" |
|
|
|
|
|
def run_example(message: Dict[str, Any], history: List[Dict[str, Any]] | None = None): |
|
|
""" |
|
|
用于 Examples 点击时直接走大模型。 |
|
|
- 输入还是 ChatInterface 那种 message dict:{"text": ..., "files": [...]} |
|
|
- history 是 Chatbot 当前的消息列表(type="messages") |
|
|
- 输出改成 Chatbot 需要的消息列表:[{role, content}, ...] |
|
|
""" |
|
|
history = history or [] |
|
|
|
|
|
|
|
|
for rendered in stream_response(message, history): |
|
|
|
|
|
user_text = (message.get("text") or "").strip() or "[Example]" |
|
|
display_history = history + [ |
|
|
{"role": "user", "content": user_text}, |
|
|
{"role": "assistant", "content": rendered}, |
|
|
] |
|
|
|
|
|
yield display_history |
|
|
|
|
|
def build_demo() -> gr.Blocks: |
|
|
theme = gr.themes.Soft(primary_hue="violet", secondary_hue="cyan", neutral_hue="slate") |
|
|
|
|
|
with gr.Blocks( |
|
|
title="ERNIE-4.5-VL-28B-A3B-Thinking", |
|
|
theme=theme, |
|
|
css=CUSTOM_CSS, |
|
|
) as demo: |
|
|
with gr.Column(elem_id="ernie-hero"): |
|
|
gr.Markdown( |
|
|
""" |
|
|
<h1>Chat with ERNIE-4.5-VL-28B-A3B-Thinking</h1> |
|
|
""", |
|
|
elem_id="hero-text", |
|
|
) |
|
|
gr.Markdown( |
|
|
""" |
|
|
<p id="model-link"> |
|
|
Model Repository: |
|
|
<a href="https://huggingface.co/baidu/ERNIE-4.5-VL-28B-A3B-Thinking" target="_blank" rel="noopener"> |
|
|
ERNIE-4.5-VL-28B-A3B-Thinking |
|
|
</a> |
|
|
</p> |
|
|
""" |
|
|
) |
|
|
|
|
|
textbox = gr.MultimodalTextbox( |
|
|
show_label=False, |
|
|
placeholder="Enter text, or upload one or more images...", |
|
|
file_types=["image","video"], |
|
|
file_count="multiple" |
|
|
) |
|
|
chatbot = gr.Chatbot( |
|
|
type="messages", |
|
|
allow_tags=["think"], |
|
|
height=560, |
|
|
render_markdown=True, |
|
|
show_copy_button=True, |
|
|
) |
|
|
|
|
|
examples = [ |
|
|
{ |
|
|
"text": "这道题怎么解", |
|
|
"files": ["examples/case1.png"] |
|
|
}, |
|
|
{ |
|
|
"text": "How many real people are actually in the picture?", |
|
|
"files": ["examples/case2.png"] |
|
|
}, |
|
|
] |
|
|
|
|
|
with gr.Column(elem_id="examples-panel"): |
|
|
gr.Examples( |
|
|
examples=examples, |
|
|
inputs=textbox, |
|
|
label=None, |
|
|
examples_per_page=4, |
|
|
elem_id="examples-grid", |
|
|
fn=run_example, |
|
|
outputs=chatbot, |
|
|
run_on_click=True, |
|
|
) |
|
|
|
|
|
with gr.Column(elem_id="chat-wrapper"): |
|
|
chat_interface = gr.ChatInterface( |
|
|
fn=stream_response, |
|
|
type="messages", |
|
|
multimodal=True, |
|
|
chatbot=chatbot, |
|
|
textbox=textbox, |
|
|
) |
|
|
|
|
|
|
|
|
return demo.queue(default_concurrency_limit=8) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
build_demo().launch() |
|
|
|