PaperShow / posterbuilder /latex_proj /poster_output.tex
JaceWei's picture
update: push latest content
930133a
raw
history blame
8.3 kB
% Unofficial University of Cambridge Poster Template
% https://github.com/andiac/gemini-cam
% a fork of https://github.com/anishathalye/gemini
% also refer to https://github.com/k4rtik/uchicago-poster
\documentclass[final]{beamer}
% ====================
% Packages
% ====================
\usepackage[T1]{fontenc}
\usepackage{lmodern}
\usepackage[size=custom,width=120,height=72,scale=1.0]{beamerposter}
\usetheme{gemini}
\usecolortheme{cam}
\usepackage{graphicx}
\usepackage{booktabs}
\usepackage[numbers]{natbib}
\usepackage{tikz}
\usepackage{pgfplots}
\pgfplotsset{compat=1.14}
\usepackage{anyfontsize}
\definecolor{nipspurple}{RGB}{94,46,145}
\setbeamercolor{headline}{bg=white, fg=black}
\setbeamercolor{block title}{bg=nipspurple, fg=white}
\addtobeamertemplate{block begin}{
\setlength{\textpaddingtop}{0.2em}%
\setlength{\textpaddingbottom}{0.2em}%
}{}
% ====================
% Lengths
% ====================
% If you have N columns, choose \sepwidth and \colwidth such that
% (N+1)*\sepwidth + N*\colwidth = \paperwidth
\newlength{\sepwidth}
\newlength{\colwidth}
\setlength{\sepwidth}{0.025\paperwidth}
\setlength{\colwidth}{0.3\paperwidth}
\newcommand{\separatorcolumn}{\begin{column}{\sepwidth}\end{column}}
% ====================
% Title
% ====================
\title{Paper2Poster: Towards Multimodal Poster}
\author{Wei Pang\textsuperscript{1}, Kevin Qinghong Lin\textsuperscript{2}, Xiangru Jian\textsuperscript{1}, Xi He\textsuperscript{1}, Philip Torr\textsuperscript{3}}
\institute[shortinst]{1 University of Waterloo; 2 National University of Singapore; 3 University of Oxford}
% ====================
% Footer (optional)
% ====================
\footercontent{
\href{https://paper2poster.github.io/}{https://paper2poster.github.io/} \hfill
Generated by Paper2Poster \hfill
}
% (can be left out to remove footer)
% ====================
% Logo (optional)
% ====================
% use this to include logos on the left and/or right side of the header:
\logoright{\includegraphics[height=5cm]{logos/right_logo.png}}
\logoleft{\includegraphics[height=4cm]{logos/left_logo.png}}
% ====================
% Body
% ====================
% --- injected font tweaks ---
\setbeamerfont{title}{size=\Huge}
\setbeamerfont{author}{size=\Large}
\setbeamerfont{institute}{size=\large}
\setbeamerfont{block title}{size=\Large}
\setbeamerfont{block body}{size=\large}
\begin{document}
% Refer to https://github.com/k4rtik/uchicago-poster
% logo: https://www.cam.ac.uk/brand-resources/about-the-logo/logo-downloads
\addtobeamertemplate{headline}{}
{
\begin{tikzpicture}[remember picture,overlay]
\node [anchor=north west, inner sep=3cm] at ([xshift=0.0cm,yshift=1.0cm]current page.north west)
\end{tikzpicture}
}
\begin{frame}[t]
\begin{columns}[t]
\separatorcolumn
\begin{column}{\colwidth}
\begin{block}{Why Posters Are Hard}
We target \textbf{single-page, multimodal compression} of \textit{20K+ tokens} into clear panels. Posters demand \textcolor{blue}{tight text–visual coupling}, \textbf{layout balance}, and \textit{readable density}. Pure LLM/VLM approaches \textcolor{red}{miss spatial feedback}, causing overflow and incoherence. We reveal that \textbf{visual-in-the-loop planning} is essential to preserve reading order, keep figures relevant, and sustain \textit{engagement} within hard space limits.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-1.png}
\end{figure}
\end{block}
\begin{block}{Benchmark and Data}
We launch the \textbf{Paper2Poster Benchmark}: \textcolor{blue}{100 paper–poster pairs} spanning \textit{280 topics}. Average input: \textcolor{blue}{20,370 tokens, 22.6 pages}. Output posters compress text by \textcolor{blue}{14.4×} and figures by \textcolor{blue}{2.6×}. Evaluation covers \textbf{Visual Quality}, \textbf{Textual Coherence}, \textbf{VLM-as-Judge}, and \textbf{PaperQuiz}. This suite spotlights \textit{semantic alignment}, \textbf{fluency}, and \textcolor{blue}{reader comprehension}.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-6.png}
\end{figure}
\end{block}
\begin{block}{PaperQuiz: What Matters}
We generate \textcolor{blue}{100 MCQs/paper}: \textbf{50 verbatim} + \textbf{50 interpretive}. Multiple VLM readers simulate \textit{novice-to-expert} audiences and answer from the poster only. Scores are length-penalized to reward \textbf{dense clarity}. Results \textbf{correlate with human judgment}, proving PaperQuiz captures \textcolor{blue}{information delivery} beyond surface visuals and discourages \textcolor{red}{verbose, unfocused designs}.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-7.png}
\end{figure}
\end{block}
\end{column}
\separatorcolumn
\begin{column}{\colwidth}
\begin{block}{PosterAgent Pipeline}
Our \textbf{top-down, visual-in-the-loop} agent compresses long papers into coherent posters. • \textbf{Parser} builds a structured asset library. • \textbf{Planner} aligns text–visual pairs and produces a \textcolor{blue}{binary-tree layout}. • \textbf{Painter–Commenter} renders panels via code and uses VLM feedback to fix \textcolor{red}{overflow} and misalignment. The result: \textbf{balanced, legible}, editable posters.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-8.png}
\end{figure}
\end{block}
\begin{block}{Parser: Structured Assets}
We distill PDFs into \textbf{section synopses} and \textit{figure/table assets} using \textcolor{blue}{MARKER} and \textcolor{blue}{DOCLING}, then LLM summarization. The asset library preserves \textbf{hierarchy} and \textit{semantics} while shrinking context for efficient planning. This step boosts \textbf{visual-semantic matching} and reduces \textcolor{red}{noise}, enabling reliable downstream \textit{layout reasoning}.
\end{block}
\begin{block}{Planner: Layout Mastery}
We semantically match \textbf{sections ↔ figures} and allocate space via a \textcolor{blue}{binary-tree layout} that preserves \textit{reading order}, aspect ratios, and \textbf{content length} estimates. Panels are populated iteratively, ensuring \textbf{text brevity} and \textit{visual balance}. This strategy stabilizes coordinates and avoids \textcolor{red}{LLM numeric drift} in absolute placements.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-30.png}
\end{figure}
\end{block}
\end{column}
\separatorcolumn
\begin{column}{\colwidth}
\begin{block}{Painter–Commenter Loop}
The \textbf{Painter} turns section–figure pairs into crisp bullets and executable \textcolor{blue}{python-pptx} code, rendering draft panels. The \textbf{Commenter} VLM zooms into panels, using \textit{in-context examples} to flag \textcolor{red}{overflow} or \textcolor{red}{blankness}. Iterations continue until \textbf{fit and alignment} are achieved, producing \textit{readable, compact} panels with minimal revision cycles.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-picture-61.png}
\end{figure}
\end{block}
\begin{block}{Results: Stronger, Leaner}
Our open-source variants beat \textcolor{blue}{4o-driven multi-agents} on most metrics, with \textcolor{blue}{87\% fewer tokens}. We hit \textbf{state-of-the-art figure relevance}, near-\textit{GT} visual similarity, and \textbf{high VLM-as-Judge} scores. PaperQuiz confirms \textbf{better knowledge transfer}. Cost is tiny: \textcolor{blue}{\$0.0045–\$0.55/poster}. Key bottleneck remains \textcolor{red}{Engagement}, guiding future design.
\begin{figure}
\centering
\includegraphics[width=0.80\linewidth]{figures/paper-table-1.png}
\end{figure}
\end{block}
\begin{block}{Limits and Next Steps}
Current bottleneck: \textbf{sequential panel refinement} slows throughput (\textasciitilde{}\textcolor{blue}{4.5 min/doc}). We plan \textbf{panel-level parallelism}, \textit{external knowledge} integration (e.g., OpenReview), and \textbf{human-in-the-loop} editing for higher \textcolor{blue}{engagement}. These upgrades aim to boost \textbf{runtime, interactivity}, and \textit{visual storytelling}, pushing toward fully automated \textbf{author-grade posters}.
\end{block}
\end{column}
\separatorcolumn
\end{columns}
\end{frame}
\end{document}