Spaces:

baidu
/

ERNIE-4.5-VL-28B-A3B-Thinking

Running

App Files Files Community

ERNIE-4.5-VL-28B-A3B-Thinking / app.py

jzhang533

update ui (#1)

5ea0a9a verified 28 days ago

raw

history blame contribute delete

12.3 kB

	import base64
	import html
	import mimetypes
	import os
	from pathlib import Path
	from typing import Any, Dict, List

	import gradio as gr
	from openai import OpenAI

	DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "ERNIE-4.5-VL-28B-A3B-Thinking")
	BASE_URL = os.getenv("BASE_URL","")
	api_key = os.getenv("ERNIE_API_KEY","")


	CUSTOM_CSS = """
	body {
	background: radial-gradient(circle at top, #fdfbff 0%, #e7ecf7 45%, #dfe6f5 100%);
	font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Arial, sans-serif;
	color: #0f172a;
	}
	.gradio-container {
	max-width: 1200px !important;
	margin: 0 auto;
	}
	#ernie-hero {
	padding: 12px 0 4px;
	}
	#ernie-hero h1 {
	font-size: 1.85rem;
	margin-bottom: 0;
	font-weight: 500;
	}
	#model-link {
	margin-top: 6px;
	font-size: 0.95rem;
	}
	#model-link a {
	color: #4c1d95;
	text-decoration: none;
	font-weight: 500;
	}
	#model-link a:hover {
	text-decoration: underline;
	}
	#examples-panel {
	margin-top: 20px;
	padding: 18px 22px;
	border-radius: 18px;
	border: 1px solid rgba(15, 23, 42, 0.12);
	background: rgba(255, 255, 255, 0.92);
	box-shadow: 0 15px 35px rgba(15, 23, 42, 0.08);
	gap: 18px;
	}
	#examples-panel h4 {
	margin: 0 0 8px;
	font-size: 1.1rem;
	font-weight: 500;
	}
	#examples-panel p {
	margin: 0;
	color: rgba(15, 23, 42, 0.7);
	font-size: 0.95rem;
	}
	#examples-grid table {
	width: 100%;
	}
	#examples-grid table tbody {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
	gap: 12px;
	}
	#examples-grid table tr {
	display: block;
	background: #f7f9ff;
	border-radius: 14px;
	border: 1px solid rgba(15, 23, 42, 0.08);
	padding: 14px;
	box-shadow: 0 10px 28px rgba(15, 23, 42, 0.08);
	}
	#examples-grid table td {
	display: block;
	padding: 0;
	}
	#chat-wrapper {
	margin-top: 32px;
	border-radius: 24px;
	padding: 18px;
	background: rgba(255, 255, 255, 0.95);
	border: 1px solid rgba(15, 23, 42, 0.1);
	box-shadow: 0 25px 60px rgba(15, 23, 42, 0.12);
	}
	.ernie-section {
	border-radius: 18px;
	margin-bottom: 14px;
	padding: 16px 18px;
	border: 1px solid rgba(15, 23, 42, 0.1);
	background: rgba(255, 255, 255, 0.95);
	box-shadow: 0 10px 24px rgba(15, 23, 42, 0.08);
	}
	.ernie-section-header {
	font-size: 0.85rem;
	text-transform: uppercase;
	letter-spacing: 0.08em;
	font-weight: 600;
	color: rgba(15, 23, 42, 0.65);
	display: flex;
	align-items: center;
	gap: 6px;
	}
	.ernie-section-body {
	margin-top: 10px;
	font-size: 1rem;
	color: rgba(15, 23, 42, 0.92);
	white-space: pre-wrap;
	line-height: 1.55;
	}
	.ernie-thinking {
	border-color: rgba(79, 70, 229, 0.35);
	background: rgba(129, 140, 248, 0.08);
	}
	.ernie-answer {
	border-color: rgba(16, 185, 129, 0.35);
	background: rgba(110, 231, 183, 0.08);
	}

	@media (prefers-color-scheme: dark) {
	body {
	background: radial-gradient(circle at top, #1f264b 0%, #0f172a 45%, #040713 100%);
	color: #ecf2ff;
	}
	#model-link a {
	color: #a5b4fc;
	}
	#examples-panel {
	border: 1px solid rgba(255, 255, 255, 0.05);
	background: rgba(8, 13, 30, 0.85);
	box-shadow: 0 15px 45px rgba(3, 7, 18, 0.55);
	}
	#examples-panel p {
	color: rgba(236, 242, 255, 0.75);
	}
	#examples-grid table tr {
	background: rgba(15, 23, 42, 0.7);
	border: 1px solid rgba(255, 255, 255, 0.04);
	box-shadow: 0 10px 30px rgba(4, 6, 15, 0.45);
	}
	#chat-wrapper {
	background: rgba(2, 6, 23, 0.78);
	border: 1px solid rgba(99, 102, 241, 0.25);
	box-shadow: 0 25px 70px rgba(2, 6, 23, 0.7);
	}
	.ernie-section {
	border: 1px solid rgba(255, 255, 255, 0.08);
	background: rgba(15, 23, 42, 0.85);
	box-shadow: 0 10px 30px rgba(2, 6, 23, 0.55);
	}
	.ernie-section-header {
	color: rgba(236, 242, 255, 0.75);
	}
	.ernie-section-body {
	color: rgba(248, 250, 255, 0.95);
	}
	.ernie-answer {
	border-color: rgba(45, 212, 191, 0.45);
	background: rgba(8, 47, 56, 0.65);
	}
	.ernie-thinking {
	border-color: rgba(165, 180, 252, 0.4);
	background: rgba(30, 27, 75, 0.65);
	}
	}
	"""

	_client = OpenAI(
	base_url=BASE_URL,
	api_key=api_key,
	)

	def _data_url(path: str) -> str:
	mime, _ = mimetypes.guess_type(path)
	mime = mime or "application/octet-stream"
	data = base64.b64encode(Path(path).read_bytes()).decode("utf-8")
	return f"data:{mime};base64,{data}"

	def _media_content(path: str) -> Dict[str, Any]:
	"""支持图片和视频"""
	mime, _ = mimetypes.guess_type(path)
	if mime and mime.startswith("video"):
	# 视频格式
	return {"type": "video_url", "video_url": {"url": _data_url(path)}}
	else:
	# 图片格式（默认）
	return {"type": "image_url", "image_url": {"url": _data_url(path)}}

	def _text_content(text: str) -> Dict[str, Any]:
	return {"type": "text", "text": text}

	def _message(role: str, content: Any) -> Dict[str, Any]:
	return {"role": role, "content": content}

	def _format_sections(thinking: str, answer: str \| None = None) -> str:
	"""Render Thinking/Answer blocks with HTML so the chatbot can style them."""
	def _build_block(kind: str, label: str, text: str, icon: str) -> str:
	text = (text or "").strip()
	if not text:
	return ""
	escaped = html.escape(text)
	return (
	f'<div class="ernie-section ernie-{kind}">'
	f'<div class="ernie-section-header">{icon} {label}</div>'
	f'<div class="ernie-section-body">{escaped}</div>'
	"</div>"
	)

	sections = [
	_build_block("thinking", "Thinking", thinking, "🧠"),
	_build_block("answer", "Answer", answer, "✨") if answer is not None else "",
	]
	rendered = "".join(section for section in sections if section)
	return rendered

	def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
	files = message.get("files") or []
	text = (message.get("text") or "").strip()
	content: List[Dict[str, Any]] = [_media_content(p) for p in files]
	if text:
	content.append(_text_content(text))
	return _message("user", content)

	def _convert_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
	msgs: List[Dict[str, Any]] = []
	user_content: List[Dict[str, Any]] = []

	for turn in history or []:
	role, content = turn.get("role"), turn.get("content")
	if role == "user":
	if isinstance(content, str):
	user_content.append(_text_content(content))
	elif isinstance(content, tuple):
	user_content.extend(_media_content(path) for path in content if path)
	elif role == "assistant":
	if "Answer:\n" in content:
	# 分割并仅保留Answer部分
	answer_only = content.split("Answer:\n", 1)[1].strip()
	else:
	# 兼容没有Thinking的情况
	answer_only = content.strip()

	if user_content:
	msgs.append(_message("user", user_content.copy()))
	user_content.clear()

	msgs.append(_message("assistant", [{"type": "text", "text": answer_only}]))

	return msgs


	def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], model_name: str = DEFAULT_MODEL):
	messages = _convert_history(history)
	messages.append(_build_user_message(message))

	try:
	stream = _client.chat.completions.create(
	model="default",
	messages=messages,
	stream=True
	)
	thinking_parts: List[str] = []
	answer_parts: List[str] = []
	answer_started = False

	for chunk in stream:
	delta = chunk.choices[0].delta

	if getattr(delta, "reasoning_content", None):
	thinking_parts.append(delta.reasoning_content)

	if getattr(delta, "content", None):
	answer_started = True
	answer_parts.append(delta.content)

	thinking_text = "".join(thinking_parts)
	answer_text = "".join(answer_parts) if answer_parts else None

	if answer_started:
	rendered = _format_sections(thinking_text, answer_text)
	else:
	rendered = _format_sections(thinking_text)

	if rendered:
	yield rendered

	if not answer_started and thinking_parts:
	# 流结束但模型未返回Answer时，至少保证Thinking被展示完全
	rendered = _format_sections("".join(thinking_parts))
	if rendered:
	yield rendered
	except Exception as e:
	yield f"Failed to get response: {e}"

	def run_example(message: Dict[str, Any], history: List[Dict[str, Any]] \| None = None):
	"""
	用于 Examples 点击时直接走大模型。
	- 输入还是 ChatInterface 那种 message dict：{"text": ..., "files": [...]}
	- history 是 Chatbot 当前的消息列表（type="messages"）
	- 输出改成 Chatbot 需要的消息列表：[{role, content}, ...]
	"""
	history = history or []

	# 直接复用你现有的流式函数，只是把它返回的 HTML 包一层 messages
	for rendered in stream_response(message, history):
	# 这里只简单把 user 文本展示出来；图片就当“上下文里有了”，不专门渲染
	user_text = (message.get("text") or "").strip() or "[Example]"
	display_history = history + [
	{"role": "user", "content": user_text},
	{"role": "assistant", "content": rendered},
	]
	# 关键：对 Chatbot 来说，返回值要是「完整的消息列表」
	yield display_history

	def build_demo() -> gr.Blocks:
	theme = gr.themes.Soft(primary_hue="violet", secondary_hue="cyan", neutral_hue="slate")

	with gr.Blocks(
	title="ERNIE-4.5-VL-28B-A3B-Thinking",
	theme=theme,
	css=CUSTOM_CSS,
	) as demo:
	with gr.Column(elem_id="ernie-hero"):
	gr.Markdown(
	"""
	<h1>Chat with ERNIE-4.5-VL-28B-A3B-Thinking</h1>
	""",
	elem_id="hero-text",
	)
	gr.Markdown(
	"""
	<p id="model-link">
	Model Repository:
	<a href="https://huggingface.co/baidu/ERNIE-4.5-VL-28B-A3B-Thinking" target="_blank" rel="noopener">
	ERNIE-4.5-VL-28B-A3B-Thinking
	</a>
	</p>
	"""
	)

	textbox = gr.MultimodalTextbox(
	show_label=False,
	placeholder="Enter text, or upload one or more images...",
	file_types=["image","video"],
	file_count="multiple"
	)
	chatbot = gr.Chatbot(
	type="messages",
	allow_tags=["think"],
	height=560,
	render_markdown=True,
	show_copy_button=True,
	)

	examples = [
	{
	"text": "这道题怎么解",
	"files": ["examples/case1.png"]
	},
	{
	"text": "How many real people are actually in the picture?",
	"files": ["examples/case2.png"]
	},
	]

	with gr.Column(elem_id="examples-panel"):
	gr.Examples(
	examples=examples,
	inputs=textbox,
	label=None,
	examples_per_page=4,
	elem_id="examples-grid",
	fn=run_example, # 点击示例时，直接走大模型
	outputs=chatbot,
	run_on_click=True,
	)

	with gr.Column(elem_id="chat-wrapper"):
	chat_interface = gr.ChatInterface(
	fn=stream_response,
	type="messages",
	multimodal=True,
	chatbot=chatbot,
	textbox=textbox,
	)


	return demo.queue(default_concurrency_limit=8)



	if __name__ == "__main__":
	build_demo().launch()