Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| from gradio_image_prompter import ImagePrompter | |
| from transformers import AutoProcessor, UdopForConditionalGeneration | |
| import easyocr | |
| from PIL import Image | |
| import spaces | |
| processor = AutoProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False) | |
| model = UdopForConditionalGeneration.from_pretrained("microsoft/udop-large") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| def udop_box_inference(image, text_prompt, box_coordinates): | |
| box_coordinates = [box_coordinates[0], box_coordinates[1], box_coordinates[3], box_coordinates[4]] | |
| extracted_image = extract_box(image_path, box_coordinates) | |
| extracted_image.save("cropped_image.png") | |
| reader = easyocr.Reader(['en']) | |
| result = reader.readtext('cropped_image.png') | |
| texts = [] | |
| bboxs = [] | |
| for (bbox, text, prob) in result: | |
| texts.append(text) | |
| bboxs.append([bbox[0][0], bbox[0][1], bbox[2][0], bbox[2][1]]) | |
| height = image.size[1] | |
| width = image.size[0] | |
| image = image.convert("RGB") | |
| norm_boxes = [] | |
| for box in bboxs: | |
| norm_boxes.append(normalize_bbox(box, width, height)) | |
| encoding = processor(image, text_prompt, texts, boxes=norm_boxes, return_tensors="pt") | |
| predicted_ids = model.generate(**encoding) | |
| return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
| def normalize_bbox(bbox, width, height): | |
| return [ | |
| int(1000 * (bbox[0] / width)), | |
| int(1000 * (bbox[1] / height)), | |
| int(1000 * (bbox[2] / width)), | |
| int(1000 * (bbox[3] / height)), | |
| ] | |
| def extract_box(image_path, coordinates): | |
| image = Image.open(image_path) | |
| x, y, x2, y2 = coordinates | |
| cropped_image = image.crop((x, y, x2, y2)) | |
| return cropped_image | |
| def infer_box(prompts, text_prompts): | |
| # background (original image) layers[0] ( point prompt) composite (total image) | |
| image = prompts["image"] | |
| if image is None: | |
| gr.Error("Please upload an image and draw a box before submitting") | |
| points = prompts["points"][0] | |
| if points is None: | |
| gr.Error("Please draw a box before submitting.") | |
| return udop_box_inference(image, text_prompts, points) | |
| with gr.Blocks(title="UDOP") as demo: | |
| gr.Markdown("# UDOP") | |
| gr.Markdown("UDOP is a cutting-edge foundation model for a document understanding and generation.") | |
| gr.Markdown("Try UDOP in this demo.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Title | |
| gr.Markdown("To try box prompting, simply upload and image and draw a box on it.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| im = ImagePrompter(type="pil") | |
| text_prompt = gr.Textbox() | |
| btn = gr.Button("Submit") | |
| with gr.Column(): | |
| output = gr.Textbox(label="UDOP Output") | |
| btn.click(infer_box, inputs=[im,text_prompt], outputs=[output]) | |
| demo.launch(debug=True) |