ranbac commited on
Commit
727bcf9
·
verified ·
1 Parent(s): 0a6d72e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +410 -0
app.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import spaces
4
+ import os
5
+ import tempfile
6
+ from PIL import Image, ImageOps
7
+ from typing import Iterable
8
+
9
+ from transformers import AutoProcessor, AutoModelForImageTextToText
10
+ from gradio.themes import Soft
11
+ from gradio.themes.utils import colors, fonts, sizes
12
+
13
+ colors.hot_pink = colors.Color(
14
+ name="hot_pink",
15
+ c50="#FFF0F5",
16
+ c100="#FFE4EC",
17
+ c200="#FFC0D9",
18
+ c300="#FF99C4",
19
+ c400="#FF7EB8",
20
+ c500="#FF69B4",
21
+ c600="#E55AA0",
22
+ c700="#CC4C8C",
23
+ c800="#B33D78",
24
+ c900="#992F64",
25
+ c950="#802050",
26
+ )
27
+
28
+
29
+ class HotPinkTheme(Soft):
30
+ def __init__(
31
+ self,
32
+ *,
33
+ primary_hue: colors.Color | str = colors.gray,
34
+ secondary_hue: colors.Color | str = colors.hot_pink,
35
+ neutral_hue: colors.Color | str = colors.slate,
36
+ text_size: sizes.Size | str = sizes.text_lg,
37
+ font: fonts.Font | str | Iterable[fonts.Font | str] = (
38
+ fonts.GoogleFont("Outfit"),
39
+ "Arial",
40
+ "sans-serif",
41
+ ),
42
+ font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
43
+ fonts.GoogleFont("IBM Plex Mono"),
44
+ "ui-monospace",
45
+ "monospace",
46
+ ),
47
+ ):
48
+ super().__init__(
49
+ primary_hue=primary_hue,
50
+ secondary_hue=secondary_hue,
51
+ neutral_hue=neutral_hue,
52
+ text_size=text_size,
53
+ font=font,
54
+ font_mono=font_mono,
55
+ )
56
+ super().set(
57
+ background_fill_primary="*primary_50",
58
+ background_fill_primary_dark="*primary_900",
59
+ body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
60
+ body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
61
+ button_primary_text_color="white",
62
+ button_primary_text_color_hover="white",
63
+ button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
64
+ button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
65
+ button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
66
+ button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
67
+ button_secondary_text_color="black",
68
+ button_secondary_text_color_hover="white",
69
+ button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
70
+ button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
71
+ button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
72
+ button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
73
+ slider_color="*secondary_500",
74
+ slider_color_dark="*secondary_600",
75
+ block_title_text_weight="600",
76
+ block_border_width="3px",
77
+ block_shadow="*shadow_drop_lg",
78
+ button_primary_shadow="*shadow_drop_lg",
79
+ button_large_padding="11px",
80
+ color_accent_soft="*primary_100",
81
+ block_label_background_fill="*primary_200",
82
+ )
83
+
84
+
85
+ hot_pink_theme = HotPinkTheme()
86
+
87
+ css = """
88
+ @import url('https://fonts.googleapis.com/css2?family=Outfit:wght@400;500;600;700&family=IBM+Plex+Mono:wght@400;500;600&display=swap');
89
+
90
+ /* Grid background */
91
+ body, .gradio-container {
92
+ background-color: #FFF0F5 !important;
93
+ background-image:
94
+ linear-gradient(#FFC0D9 1px, transparent 1px),
95
+ linear-gradient(90deg, #FFC0D9 1px, transparent 1px) !important;
96
+ background-size: 40px 40px !important;
97
+ font-family: 'Outfit', sans-serif !important;
98
+ }
99
+
100
+ .dark body, .dark .gradio-container {
101
+ background-color: #1a1a1a !important;
102
+ background-image:
103
+ linear-gradient(rgba(255, 105, 180, 0.1) 1px, transparent 1px),
104
+ linear-gradient(90deg, rgba(255, 105, 180, 0.1) 1px, transparent 1px) !important;
105
+ background-size: 40px 40px !important;
106
+ }
107
+
108
+ /* Sidebar width */
109
+ .gradio-sidebar {
110
+ min-width: 420px !important;
111
+ max-width: 480px !important;
112
+ }
113
+
114
+ /* Titles */
115
+ #main-title h1 {
116
+ font-size: 2.5em !important;
117
+ font-weight: 700 !important;
118
+ background: linear-gradient(135deg, #FF69B4 0%, #FF99C4 50%, #E55AA0 100%);
119
+ background-size: 200% 200%;
120
+ -webkit-background-clip: text;
121
+ -webkit-text-fill-color: transparent;
122
+ background-clip: text;
123
+ animation: gradient-shift 4s ease infinite;
124
+ letter-spacing: -0.02em;
125
+ }
126
+
127
+ #output-title h2 {
128
+ font-size: 2.2em !important;
129
+ }
130
+
131
+ @keyframes gradient-shift {
132
+ 0%, 100% { background-position: 0% 50%; }
133
+ 50% { background-position: 100% 50%; }
134
+ }
135
+
136
+ /* Card styling */
137
+ .gradio-group {
138
+ background: rgba(255, 255, 255, 0.9) !important;
139
+ border: 2px solid #FFC0D9 !important;
140
+ border-radius: 12px !important;
141
+ box-shadow: 0 4px 24px rgba(255, 105, 180, 0.08) !important;
142
+ backdrop-filter: blur(10px);
143
+ transition: all 0.3s ease;
144
+ }
145
+
146
+ .gradio-group:hover {
147
+ box-shadow: 0 8px 32px rgba(255, 105, 180, 0.12) !important;
148
+ border-color: #FF99C4 !important;
149
+ }
150
+
151
+ .dark .gradio-group {
152
+ background: rgba(30, 30, 30, 0.9) !important;
153
+ border-color: rgba(255, 105, 180, 0.3) !important;
154
+ }
155
+
156
+ /* Image upload */
157
+ .gradio-image {
158
+ border-radius: 10px !important;
159
+ overflow: hidden;
160
+ border: 2px dashed #FF99C4 !important;
161
+ transition: all 0.3s ease;
162
+ }
163
+
164
+ .gradio-image:hover {
165
+ border-color: #FF69B4 !important;
166
+ background: rgba(255, 105, 180, 0.02) !important;
167
+ }
168
+
169
+ /* Radio buttons */
170
+ .gradio-radio label {
171
+ border-radius: 6px !important;
172
+ transition: all 0.2s ease !important;
173
+ border: 1px solid transparent !important;
174
+ }
175
+
176
+ .gradio-radio label:hover {
177
+ background: rgba(255, 105, 180, 0.05) !important;
178
+ }
179
+
180
+ .gradio-radio label.selected {
181
+ background: rgba(255, 105, 180, 0.1) !important;
182
+ border-color: #FF69B4 !important;
183
+ }
184
+
185
+ /* Primary button */
186
+ .primary {
187
+ border-radius: 8px !important;
188
+ font-weight: 600 !important;
189
+ letter-spacing: 0.05em !important;
190
+ transition: all 0.3s ease !important;
191
+ }
192
+
193
+ .primary:hover {
194
+ transform: translateY(-2px) !important;
195
+ }
196
+
197
+ /* Output textbox */
198
+ .gradio-textbox textarea {
199
+ font-family: 'IBM Plex Mono', monospace !important;
200
+ font-size: 0.95rem !important;
201
+ line-height: 1.7 !important;
202
+ background: rgba(255, 255, 255, 0.95) !important;
203
+ border: 1px solid #FFC0D9 !important;
204
+ border-radius: 8px !important;
205
+ }
206
+
207
+ .dark .gradio-textbox textarea {
208
+ background: rgba(30, 30, 30, 0.95) !important;
209
+ border-color: rgba(255, 105, 180, 0.2) !important;
210
+ }
211
+
212
+ /* Markdown output */
213
+ .gradio-markdown {
214
+ font-family: 'Outfit', sans-serif !important;
215
+ line-height: 1.7 !important;
216
+ }
217
+
218
+ .gradio-markdown code {
219
+ font-family: 'IBM Plex Mono', monospace !important;
220
+ background: rgba(255, 105, 180, 0.08) !important;
221
+ padding: 2px 6px !important;
222
+ border-radius: 4px !important;
223
+ color: #CC4C8C !important;
224
+ }
225
+
226
+ .gradio-markdown pre {
227
+ background: rgba(255, 105, 180, 0.05) !important;
228
+ border: 1px solid #FFC0D9 !important;
229
+ border-radius: 8px !important;
230
+ padding: 1rem !important;
231
+ }
232
+
233
+ /* Examples */
234
+ .gradio-examples .gallery-item {
235
+ border: 2px solid #FFC0D9 !important;
236
+ border-radius: 8px !important;
237
+ transition: all 0.2s ease !important;
238
+ }
239
+
240
+ .gradio-examples .gallery-item:hover {
241
+ border-color: #FF69B4 !important;
242
+ transform: translateY(-2px) !important;
243
+ box-shadow: 0 4px 12px rgba(255, 105, 180, 0.15) !important;
244
+ }
245
+
246
+ /* Scrollbar */
247
+ ::-webkit-scrollbar { width: 8px; height: 8px; }
248
+ ::-webkit-scrollbar-track { background: rgba(255,105,180,0.05); border-radius: 4px; }
249
+ ::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #FF69B4, #FF99C4); border-radius: 4px; }
250
+ ::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #E55AA0, #FF69B4); }
251
+
252
+ /* Accordion */
253
+ .gradio-accordion {
254
+ border-radius: 10px !important;
255
+ border: 1px solid #FFC0D9 !important;
256
+ }
257
+
258
+ .gradio-accordion > .label-wrap {
259
+ background: rgba(255, 105, 180, 0.03) !important;
260
+ border-radius: 10px !important;
261
+ }
262
+
263
+ /* Animations */
264
+ @keyframes fadeIn {
265
+ from { opacity: 0; transform: translateY(10px); }
266
+ to { opacity: 1; transform: translateY(0); }
267
+ }
268
+
269
+ .gradio-row { animation: fadeIn 0.4s ease-out; }
270
+
271
+ label { font-weight: 600 !important; color: #333 !important; }
272
+ .dark label { color: #eee !important; }
273
+
274
+ footer { display: none !important; }
275
+
276
+
277
+ /* Wider sidebar */
278
+ .sidebar {
279
+ min-width: 420px !important;
280
+ max-width: 480px !important;
281
+ }
282
+ """
283
+
284
+ MODEL_PATH = "zai-org/GLM-OCR"
285
+
286
+ processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
287
+ model = AutoModelForImageTextToText.from_pretrained(
288
+ pretrained_model_name_or_path=MODEL_PATH,
289
+ torch_dtype=torch.bfloat16,
290
+ device_map="auto",
291
+ trust_remote_code=True,
292
+ )
293
+
294
+ TASK_PROMPTS = {
295
+ "Text": "Text Recognition:",
296
+ "Formula": "Formula Recognition:",
297
+ "Table": "Table Recognition:",
298
+ }
299
+
300
+ @spaces.GPU
301
+ def process_image(image, task):
302
+ """Run OCR on the uploaded image with the selected recognition type."""
303
+ if image is None:
304
+ return "Please upload an image first.", "Please upload an image first."
305
+
306
+ if image.mode in ("RGBA", "LA", "P"):
307
+ image = image.convert("RGB")
308
+ image = ImageOps.exif_transpose(image)
309
+
310
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
311
+ image.save(tmp.name, "PNG")
312
+ tmp.close()
313
+
314
+ prompt = TASK_PROMPTS.get(task, "Text Recognition:")
315
+
316
+ messages = [
317
+ {
318
+ "role": "user",
319
+ "content": [
320
+ {"type": "image", "url": tmp.name},
321
+ {"type": "text", "text": prompt},
322
+ ],
323
+ }
324
+ ]
325
+
326
+ inputs = processor.apply_chat_template(
327
+ messages,
328
+ tokenize=True,
329
+ add_generation_prompt=True,
330
+ return_dict=True,
331
+ return_tensors="pt",
332
+ ).to(model.device)
333
+
334
+ inputs.pop("token_type_ids", None)
335
+
336
+ generated_ids = model.generate(**inputs, max_new_tokens=8192)
337
+ output_text = processor.decode(
338
+ generated_ids[0][inputs["input_ids"].shape[1] :],
339
+ skip_special_tokens=True,
340
+ )
341
+
342
+ os.unlink(tmp.name)
343
+
344
+ result = output_text.strip()
345
+ return result, result
346
+
347
+ with gr.Blocks(fill_height=True) as demo:
348
+
349
+ with gr.Sidebar(width=450):
350
+
351
+ gr.Markdown("# **GLM-OCR**", elem_id="main-title")
352
+
353
+ image_input = gr.Image(
354
+ type="pil",
355
+ label="Upload Image",
356
+ sources=["upload", "clipboard"],
357
+ height=300,
358
+ )
359
+
360
+ task = gr.Radio(
361
+ choices=list(TASK_PROMPTS.keys()),
362
+ value="Text",
363
+ label="Recognition Type",
364
+ )
365
+
366
+ btn = gr.Button("Perform OCR", variant="primary")
367
+
368
+ gr.Examples(
369
+ examples=[
370
+ "examples/1.jpg",
371
+ "examples/4.jpg",
372
+ "examples/5.webp",
373
+ "examples/2.jpg",
374
+ "examples/3.jpg",
375
+ ],
376
+ inputs=image_input,
377
+ label="Examples",
378
+ )
379
+
380
+ gr.Markdown("## Output", elem_id="output-title")
381
+
382
+ output_text = gr.Textbox(
383
+ label="Raw Output Stream",
384
+ interactive=True,
385
+ lines=22,
386
+ )
387
+
388
+ with gr.Accordion("(Result.md)", open=False):
389
+ output_md = gr.Markdown(label="Rendered Markdown")
390
+
391
+ btn.click(
392
+ fn=process_image,
393
+ inputs=[image_input, task],
394
+ outputs=[output_text, output_md],
395
+ )
396
+
397
+ image_input.change(
398
+ fn=lambda: ("", ""),
399
+ inputs=None,
400
+ outputs=[output_text, output_md],
401
+ )
402
+
403
+ if __name__ == "__main__":
404
+ demo.queue(max_size=50).launch(
405
+ css=css,
406
+ theme=hot_pink_theme,
407
+ mcp_server=True,
408
+ ssr_mode=False,
409
+ show_error=True,
410
+ )