| | |
| |
|
| | import base64 |
| | import json |
| | from pathlib import Path |
| | import gradio as gr |
| | from openai import OpenAI |
| |
|
| | API_KEY = "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" |
| | MODEL = "gpt-5.1" |
| |
|
| | client = OpenAI(api_key=API_KEY) |
| |
|
| |
|
| | def upload_pdf(path): |
| | return client.files.create(file=open(path, "rb"), purpose="assistants").id |
| |
|
| |
|
| | |
| | def prompt(): |
| | return ( |
| | "Extract structured JSON from the attached logistics document. Return ONLY valid JSON.\n" |
| | "{\n" |
| | " \"po_number\": string|null,\n" |
| | " \"ship_from_name\": string|null,\n" |
| | " \"ship_from_email\": string|null,\n" |
| | " \"carrier_type\": string|null,\n" |
| | " \"rail_car_number\": string|null,\n" |
| | " \"total_quantity\": number|null,\n" |
| | " \"inventories\": [\n" |
| | " {\n" |
| | " \"productName\": string|null,\n" |
| | " \"productCode\": string|null,\n" |
| | " \"variants\": [\n" |
| | " {\n" |
| | " \"dimensions\": string|null,\n" |
| | " \"pcs_per_pkg\": number|null,\n" |
| | " \"length_ft\": number|null,\n" |
| | " \"width\": number|null,\n" |
| | " \"packages\": number|null,\n" |
| | " \"pieces\": number|null,\n" |
| | " \"fbm\": number|string|null\n" |
| | " }\n" |
| | " ],\n" |
| | " \"total_pcs\": number|null,\n" |
| | " \"total_fbm\": number|string|null\n" |
| | " }\n" |
| | " ],\n" |
| | " \"custom_fields\": {}\n" |
| | "}\n\n" |
| | "SHIP FROM RULES:\n" |
| | "- If explicit fields like 'Origin', 'Ship From' exist, extract that value.\n" |
| | "- If the document is an email-style inbound notice (header block) and shows:\n" |
| | " From: Name <email>\n" |
| | " then ship_from_name = Name, ship_from_email = email.\n" |
| | "- If only an email exists and no human name, set both fields to that email.\n" |
| | "- If both Origin and an email sender exist, use Origin for ship_from_name and still capture the email under ship_from_email.\n" |
| | "- Priority: Origin β Email Name β Mill β Sender block β null.\n\n" |
| | "CARRIER / EQUIPMENT RULE:\n" |
| | "- If the table contains:\n" |
| | " Equipment id = <value>\n" |
| | " Mark = <value>\n" |
| | " then ALWAYS treat 'Equipment id' as the railcar number.\n" |
| | "- NEVER use 'Mark' as railcar number.\n" |
| | "- Carrier type must match the carrier text exactly (e.g., CHICAGO RAIL LINK).\n\n" |
| | "INVENTORY RULES:\n" |
| | "- Do not merge length groups. Each unique length or dimension is its own variant.\n" |
| | "- Extract pcs_per_pkg, packages, pieces, fbm exactly as written.\n" |
| | "- total_pcs = sum of pieces.\n" |
| | "- total_fbm = sum of fbm.\n\n" |
| | "TOTAL QUANTITY RULE:\n" |
| | "- Use explicit totals if they appear.\n" |
| | "- If no explicit total quantity appears, leave null.\n\n" |
| | "CUSTOM FIELDS RULE:\n" |
| | "- Capture all meaningful leftover fields not part of main schema.\n\n" |
| | "Return ONLY the JSON." |
| | ) |
| |
|
| |
|
| |
|
| | |
| | def extract(path): |
| | suffix = Path(path).suffix.lower() |
| |
|
| | if suffix == ".pdf": |
| | fid = upload_pdf(path) |
| | content = [ |
| | {"type": "text", "text": prompt()}, |
| | {"type": "file", "file": {"file_id": fid}} |
| | ] |
| | else: |
| | b64 = base64.b64encode(Path(path).read_bytes()).decode() |
| | ext = suffix[1:] |
| | content = [ |
| | {"type": "text", "text": prompt()}, |
| | {"type": "image_url", "image_url": {"url": f"data:image/{ext};base64,{b64}"}} |
| | ] |
| |
|
| | r = client.chat.completions.create( |
| | model=MODEL, |
| | messages=[{"role": "user", "content": content}] |
| | ) |
| |
|
| | text = r.choices[0].message.content |
| | return text[text.find("{"): text.rfind("}") + 1] |
| |
|
| |
|
| | def ui(image_input, pdf_input): |
| | if image_input: |
| | return extract(image_input) |
| | if pdf_input: |
| | return extract(pdf_input.name) |
| | return "{}" |
| |
|
| |
|
| | |
| |
|
| | with gr.Blocks() as demo: |
| | gr.Markdown("# **Logistics OCR Data Extractor (GPT-5.1)**") |
| |
|
| | with gr.Row(): |
| | img = gr.Image(label="Upload Image", type="filepath") |
| | pdf = gr.File(label="Upload PDF", file_types=["pdf"]) |
| |
|
| | out = gr.JSON(label="Extracted JSON") |
| | btn = gr.Button("Submit") |
| |
|
| | btn.click(fn=ui, inputs=[img, pdf], outputs=out) |
| |
|
| | gr.Examples( |
| | examples=[ |
| | ["IMG_0001.jpg", None], |
| | ["IMG_0002.jpg", None] |
| | ], |
| | inputs=[img, pdf], |
| | label="Sample Images" |
| | ) |
| |
|
| | demo.launch(share=True) |
| |
|