Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>JarvisArt: Tech Poster</title> | |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> | |
| <style> | |
| :root { | |
| --primary: #6e48aa; | |
| --secondary: #9d50bb; | |
| --accent: #4776e6; | |
| --dark: #1a1a2e; | |
| --light: #f8f9fa; | |
| --text: #333; | |
| } | |
| * { | |
| margin: 0; | |
| padding: 0; | |
| box-sizing: border-box; | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
| } | |
| body { | |
| background-color: var(--dark); | |
| color: var(--light); | |
| line-height: 1.6; | |
| } | |
| .poster { | |
| max-width: 1200px; | |
| margin: 2rem auto; | |
| background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); | |
| border-radius: 15px; | |
| overflow: hidden; | |
| box-shadow: 0 20px 40px rgba(0, 0, 0, 0.3); | |
| position: relative; | |
| border: 1px solid rgba(255, 255, 255, 0.1); | |
| } | |
| .poster::before { | |
| content: ''; | |
| position: absolute; | |
| top: 0; | |
| left: 0; | |
| width: 100%; | |
| height: 100%; | |
| background: | |
| radial-gradient(circle at 20% 30%, rgba(110, 72, 170, 0.15) 0%, transparent 30%), | |
| radial-gradient(circle at 80% 70%, rgba(157, 80, 187, 0.15) 0%, transparent 30%); | |
| pointer-events: none; | |
| } | |
| .header { | |
| padding: 3rem 4rem 2rem; | |
| background: linear-gradient(to right, var(--primary), var(--secondary)); | |
| position: relative; | |
| overflow: hidden; | |
| } | |
| .header::after { | |
| content: ''; | |
| position: absolute; | |
| bottom: -50px; | |
| left: 0; | |
| width: 100%; | |
| height: 100px; | |
| background: var(--dark); | |
| transform: skewY(-3deg); | |
| z-index: 1; | |
| } | |
| .title { | |
| font-size: 3.5rem; | |
| font-weight: 800; | |
| margin-bottom: 1rem; | |
| line-height: 1.1; | |
| text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3); | |
| position: relative; | |
| z-index: 2; | |
| } | |
| .subtitle { | |
| font-size: 1.5rem; | |
| font-weight: 300; | |
| opacity: 0.9; | |
| margin-bottom: 1.5rem; | |
| position: relative; | |
| z-index: 2; | |
| } | |
| .authors { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 0.5rem 1.5rem; | |
| margin-bottom: 1rem; | |
| position: relative; | |
| z-index: 2; | |
| } | |
| .author { | |
| font-size: 1rem; | |
| font-weight: 500; | |
| } | |
| .affiliations { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 0.5rem 1.5rem; | |
| font-size: 0.9rem; | |
| opacity: 0.8; | |
| margin-bottom: 1rem; | |
| position: relative; | |
| z-index: 2; | |
| } | |
| .project-link { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| background: rgba(255, 255, 255, 0.1); | |
| padding: 0.5rem 1rem; | |
| border-radius: 50px; | |
| text-decoration: none; | |
| color: white; | |
| font-weight: 500; | |
| transition: all 0.3s ease; | |
| position: relative; | |
| z-index: 2; | |
| } | |
| .project-link:hover { | |
| background: rgba(255, 255, 255, 0.2); | |
| transform: translateY(-2px); | |
| } | |
| .content { | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 2rem; | |
| padding: 3rem 4rem; | |
| position: relative; | |
| z-index: 2; | |
| } | |
| .abstract { | |
| grid-column: 1 / -1; | |
| background: rgba(255, 255, 255, 0.05); | |
| padding: 2rem; | |
| border-radius: 10px; | |
| border-left: 4px solid var(--accent); | |
| } | |
| .abstract-title { | |
| font-size: 1.5rem; | |
| margin-bottom: 1rem; | |
| color: var(--accent); | |
| } | |
| .abstract-text { | |
| font-size: 1.1rem; | |
| line-height: 1.7; | |
| } | |
| .highlight { | |
| color: var(--accent); | |
| font-weight: 600; | |
| } | |
| .features { | |
| display: grid; | |
| grid-template-columns: 1fr; | |
| gap: 1.5rem; | |
| } | |
| .feature { | |
| background: rgba(255, 255, 255, 0.05); | |
| padding: 1.5rem; | |
| border-radius: 10px; | |
| transition: all 0.3s ease; | |
| } | |
| .feature:hover { | |
| transform: translateY(-5px); | |
| box-shadow: 0 10px 20px rgba(0, 0, 0, 0.2); | |
| } | |
| .feature-title { | |
| font-size: 1.2rem; | |
| margin-bottom: 0.5rem; | |
| color: var(--accent); | |
| display: flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| } | |
| .feature-icon { | |
| font-size: 1.5rem; | |
| } | |
| .feature-text { | |
| font-size: 1rem; | |
| opacity: 0.9; | |
| } | |
| .image-container { | |
| position: relative; | |
| border-radius: 10px; | |
| overflow: hidden; | |
| box-shadow: 0 10px 20px rgba(0, 0, 0, 0.3); | |
| height: 100%; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| } | |
| .image-container img { | |
| width: 100%; | |
| height: auto; | |
| object-fit: cover; | |
| transition: transform 0.5s ease; | |
| } | |
| .image-container:hover img { | |
| transform: scale(1.03); | |
| } | |
| .image-caption { | |
| position: absolute; | |
| bottom: 0; | |
| left: 0; | |
| right: 0; | |
| background: rgba(0, 0, 0, 0.7); | |
| padding: 1rem; | |
| font-size: 0.9rem; | |
| text-align: center; | |
| } | |
| .stats { | |
| display: grid; | |
| grid-template-columns: repeat(2, 1fr); | |
| gap: 1.5rem; | |
| margin-top: 1.5rem; | |
| } | |
| .stat { | |
| background: rgba(255, 255, 255, 0.05); | |
| padding: 1.5rem; | |
| border-radius: 10px; | |
| text-align: center; | |
| } | |
| .stat-value { | |
| font-size: 2.5rem; | |
| font-weight: 700; | |
| color: var(--accent); | |
| margin-bottom: 0.5rem; | |
| } | |
| .stat-label { | |
| font-size: 0.9rem; | |
| opacity: 0.8; | |
| } | |
| .footer { | |
| padding: 2rem 4rem; | |
| background: rgba(0, 0, 0, 0.3); | |
| text-align: center; | |
| font-size: 0.9rem; | |
| opacity: 0.7; | |
| position: relative; | |
| z-index: 2; | |
| } | |
| @media (max-width: 992px) { | |
| .content { | |
| grid-template-columns: 1fr; | |
| padding: 2rem; | |
| } | |
| .title { | |
| font-size: 2.5rem; | |
| } | |
| .subtitle { | |
| font-size: 1.2rem; | |
| } | |
| .header { | |
| padding: 2rem; | |
| } | |
| } | |
| @media (max-width: 576px) { | |
| .title { | |
| font-size: 2rem; | |
| } | |
| .subtitle { | |
| font-size: 1rem; | |
| } | |
| .header { | |
| padding: 1.5rem; | |
| } | |
| .content { | |
| padding: 1.5rem; | |
| } | |
| .abstract { | |
| padding: 1.5rem; | |
| } | |
| .stats { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="poster"> | |
| <div class="header"> | |
| <h1 class="title">JarvisArt</h1> | |
| <p class="subtitle">Liberating Human Artistic Creativity via an Intelligent Photo Retouching Agent</p> | |
| <div class="authors"> | |
| <span class="author">Yunlong Lin*</span> | |
| <span class="author">Zixu Lin*</span> | |
| <span class="author">Kunjie Lin*</span> | |
| <span class="author">Jinbin Bai</span> | |
| <span class="author">Panwang Pan</span> | |
| <span class="author">Chenxin Li</span> | |
| <span class="author">Haoyu Chen</span> | |
| <span class="author">Zhongdao Wang</span> | |
| <span class="author">Xinghao Ding†</span> | |
| <span class="author">Wenbo Li♣</span> | |
| <span class="author">Shuicheng Yan†</span> | |
| </div> | |
| <div class="affiliations"> | |
| <span>Xiamen University</span> | |
| <span>HKUST(GZ)</span> | |
| <span>CUHK</span> | |
| <span>Bytedance</span> | |
| <span>NUS</span> | |
| <span>Tsinghua University</span> | |
| </div> | |
| <a href="https://jarvisart.vercel.app/" class="project-link" target="_blank"> | |
| <i class="fas fa-external-link-alt"></i> | |
| Project Page | |
| </a> | |
| </div> | |
| <div class="content"> | |
| <div class="abstract"> | |
| <h2 class="abstract-title">Abstract</h2> | |
| <p class="abstract-text"> | |
| We introduce <span class="highlight">JarvisArt</span>, a multi-modal large language model (MLLM)-driven agent that understands user intent, mimics professional artists' reasoning, and intelligently coordinates over <span class="highlight">200 retouching tools</span> within Lightroom. JarvisArt undergoes a two-stage training process and demonstrates <span class="highlight">user-friendly interaction</span>, superior generalization, and fine-grained control over both global and local adjustments. Notably, it outperforms GPT-4o with a <span class="highlight">60% improvement</span> in average pixel-level metrics on our MMArt-Bench benchmark while maintaining comparable instruction-following capabilities. | |
| </p> | |
| </div> | |
| <div class="features"> | |
| <div class="feature"> | |
| <h3 class="feature-title"> | |
| <i class="fas fa-brain feature-icon"></i> | |
| Professional Reasoning | |
| </h3> | |
| <p class="feature-text"> | |
| Mimics the reasoning process of professional artists through Chain-of-Thought supervised fine-tuning and GRPO-R optimization. | |
| </p> | |
| </div> | |
| <div class="feature"> | |
| <h3 class="feature-title"> | |
| <i class="fas fa-tools feature-icon"></i> | |
| Comprehensive Toolset | |
| </h3> | |
| <p class="feature-text"> | |
| Intelligently coordinates over 200 retouching tools within Lightroom for both global and local adjustments. | |
| </p> | |
| </div> | |
| <div class="feature"> | |
| <h3 class="feature-title"> | |
| <i class="fas fa-user-astronaut feature-icon"></i> | |
| User-Friendly Interaction | |
| </h3> | |
| <p class="feature-text"> | |
| Supports intuitive, free-form edits through natural inputs like text prompts, bounding boxes, or brushstrokes. | |
| </p> | |
| </div> | |
| </div> | |
| <div class="image-container"> | |
| <img src="https://cdn.vansin.top/papers/2506.17612/images/b1b85618f6f156005dab98b71efac19eb8379eb249721ebe9c59ebcd55ca3412.jpg" alt="JarvisArt Interface"> | |
| <div class="image-caption"> | |
| Figure 1: JarvisArt supports multi-granularity retouching through natural inputs and edits any-resolution images. | |
| </div> | |
| </div> | |
| <div class="stats"> | |
| <div class="stat"> | |
| <div class="stat-value">200+</div> | |
| <div class="stat-label">Retouching Tools</div> | |
| </div> | |
| <div class="stat"> | |
| <div class="stat-value">60%</div> | |
| <div class="stat-label">Improvement over GPT-4o</div> | |
| </div> | |
| <div class="stat"> | |
| <div class="stat-value">55K</div> | |
| <div class="stat-label">Training Samples</div> | |
| </div> | |
| <div class="stat"> | |
| <div class="stat-value">∞</div> | |
| <div class="stat-label">Image Resolution Support</div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="footer"> | |
| JarvisArt: A new paradigm for intelligent photo retouching | https://jarvisart.vercel.app/ | |
| </div> | |
| </div> | |
| </body> | |
| </html> |