Spaces:

IFMedTechdemo
/

medibotOCR

Paused

App Files Files Community

IFMedTechdemo commited on Nov 16

Commit

a779cac

verified ·

1 Parent(s): 40cc5c9

Update app.py

Browse files

total change in code version 2.
The generate_image signature now exactly matches the inputs=[...] order (including the gr.State(...) for text).

The function always yields two values, and the UI defines outputs=[output, markdown_output], so Gradio will not error.

ClinicalNER is only invoked when model_name == "Dots.OCR"; otherwise, medications are derived from line-splitting as a fallback.

Spell-check suggests up to 5 matches per med (depending on your private matcher implementation), and each line includes both score and CER.

Files changed (1) hide show

app.py +223 -97

app.py CHANGED Viewed

@@ -1,10 +1,7 @@
-######################################  version  2   ########################################################
 import os
 import time
 from threading import Thread
 from typing import Iterable, Dict, Any, Optional, List
-import pandas as pd  # For reading Excel file
 import gradio as gr
 import spaces
@@ -21,14 +18,11 @@ from transformers import (
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
-MAX_MAX_NEW_TOKENS = 4096
-DEFAULT_MAX_NEW_TOKENS = 2048
 # -----------------------------
-# Character Error Rate (CER) Calculation
 # -----------------------------
 def levenshtein(a: str, b: str) -> int:
     """Levenshtein distance to calculate CER."""
     a, b = a.lower(), b.lower()
@@ -45,14 +39,17 @@ def levenshtein(a: str, b: str) -> int:
         for j, cb in enumerate(b, 1):
             cur = dp[j]
             cost = 0 if ca == cb else 1
-            dp[j] = min(dp[j] + 1, dp[j-1] + 1, prev + cost)
             prev = cur
     return dp[-1]
 def character_error_rate(pred: str, target: str) -> float:
-    """Calculate the Character Error Rate (CER)."""
     distance = levenshtein(pred, target)
-    return (distance / len(target)) * 100 if len(target) > 0 else 0
 # -----------------------------
 # Private repo: dynamic import
@@ -64,14 +61,15 @@ REPO_ID = "IFMedTech/Medibot_OCR_model"  # private backend repo
 # Map filenames to exported class names
 PY_MODULES = {
-    "ner.py": "ClinicalNER",  # NER is only applied for Dots.OCR output
     "tfidf_phonetic.py": "TfidfPhoneticMatcher",
     "symspell_matcher.py": "SymSpellMatcher",
     "rapidfuzz_matcher.py": "RapidFuzzMatcher",
-    # 'drug_dictionary.xlsx' is data, not a module
 }
-HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
 def _dynamic_import(module_path: str, class_name: str):
     spec = importlib.util.spec_from_file_location(class_name, module_path)
@@ -79,7 +77,8 @@ def _dynamic_import(module_path: str, class_name: str):
     spec.loader.exec_module(module)  # type: ignore
     return getattr(module, class_name)
-# Load private classes and Excel dictionary
 priv_classes: Dict[str, Any] = {}
 drug_xlsx_path: Optional[str] = None
 try:
@@ -91,7 +90,11 @@ try:
             if cls:
                 priv_classes[cls] = _dynamic_import(path, cls)
                 print(f"[Private] Loaded class: {cls} from {fname}")
-        drug_xlsx_path = hf_hub_download(repo_id=REPO_ID, filename="Medibot_Drugs_Cleaned_Updated.xlsx", token=HF_TOKEN)
         print(f"[Private] Downloaded Excel at: {drug_xlsx_path}")
 except Exception as e:
     print(f"[Private] ERROR loading private backend: {e}")
@@ -116,6 +119,7 @@ colors.steel_blue = colors.Color(
     c950="#1E3450",
 )
 class SteelBlueTheme(Soft):
     def __init__(
         self,
@@ -125,10 +129,14 @@ class SteelBlueTheme(Soft):
         neutral_hue: colors.Color | str = colors.slate,
         text_size: sizes.Size | str = sizes.text_lg,
         font: fonts.Font | str | Iterable[fonts.Font | str] = (
-            fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
         ),
         font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
-            fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
         ),
     ):
         super().__init__(
@@ -167,6 +175,7 @@ class SteelBlueTheme(Soft):
             block_label_background_fill="*primary_200",
         )
 steel_blue_theme = SteelBlueTheme()
 css = """
@@ -177,11 +186,7 @@ css = """
 # ----------------------------
 # RUNTIME / DEVICE
 # ----------------------------
-# Ensure CUDA_VISIBLE_DEVICES is set correctly to use GPU
 os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
-# Check if CUDA is available and print relevant information
 print("CUDA_VISIBLE_DEVICES =", os.environ.get("CUDA_VISIBLE_DEVICES"))
 print("torch.__version__     =", torch.__version__)
 print("torch.version.cuda    =", torch.version.cuda)
@@ -214,6 +219,7 @@ processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True
 attn_impl = "sdpa"
 try:
     import flash_attn  # noqa: F401
     if use_cuda:
         attn_impl = "flash_attention_2"
 except Exception:
@@ -224,31 +230,41 @@ model_d = AutoModelForCausalLM.from_pretrained(
     attn_implementation=attn_impl,
     torch_dtype=DTYPE_BF16,
     device_map="auto" if use_cuda else None,
-    trust_remote_code=True
 ).eval()
 if not use_cuda:
     model_d.to(device)
 # ----------------------------
-# GENERATION (OCR → Spell-check)
 # ----------------------------
-@spaces.GPU
-def generate_image(model_name: str,
-                   text: str,
-                   image: Image.Image,
-                   max_new_tokens: int,
-                   temperature: float,
-                   top_p: float,
-                   top_k: int,
-                   repetition_penalty: float,
-                   spell_algo: str):
     """
-    1) Stream OCR tokens to Raw output.
-    2) Directly apply spell-check algorithms (TF-IDF+Phonetic, SymSpell, or RapidFuzz).
-    3) Only apply Clinical NER to Dots.OCR output, then apply spell-check on the result.
     """
     if image is None:
         yield "Please upload an image.", "Please upload an image."
         return
@@ -260,23 +276,31 @@ def generate_image(model_name: str,
         yield "Invalid model selected.", "Invalid model selected."
         return
-    # Build prompt
-    messages = [{
-        "role": "user",
-        "content": [
-            {"type": "image"},
-            {"type": "text", "text": text},
-        ]
-    }]
-    prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     # Preprocess
-    inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True)
     inputs = {k: (v.to(device) if hasattr(v, "to") else v) for k, v in inputs.items()}
     # Streamer
     tokenizer = getattr(processor, "tokenizer", None) or processor
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     gen_kwargs = dict(
         **inputs,
@@ -289,132 +313,234 @@ def generate_image(model_name: str,
         repetition_penalty=repetition_penalty,
     )
-    # Start generation
     thread = Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
-    # 1) Live OCR streaming to Raw
     buffer = ""
     for new_text in streamer:
         buffer += new_text.replace("<|im_end|>", "")
         time.sleep(0.01)
         yield buffer, buffer
-    # Final raw OCR output (buffer)
     final_ocr_text = buffer.strip()
-    # 2) Apply Clinical NER ONLY for Dots.OCR output
-    meds = []
     if model_name == "Dots.OCR":
         try:
-            if "ClinicalNER" in priv_classes:
                 ClinicalNER = priv_classes["ClinicalNER"]
-                ner = ClinicalNER(token=HF_TOKEN)  # pass model_id=... if using your own model
-                meds = ner(final_ocr_text) or []
-                print("Extracted meds:", meds)  # Print extracted meds
             else:
-                print("[NER] ClinicalNER not available.")
         except Exception as e:
             print(f"[NER] Error running ClinicalNER: {e}")
-    # 3) Apply selected spell-check algorithm (directly on raw OCR output or NER output)
     spell_section = "\n---\n### Spell-check suggestions (" + spell_algo + ")\n"
     corr: Dict[str, List] = {}
     try:
-        if final_ocr_text and drug_xlsx_path:
-            # Print meds and the number of rows in the drug_xlsx_path
-            print("Meds:", meds)
-            print("Rows in drug_xlsx_path:", len(pd.read_excel(drug_xlsx_path)))
-            if spell_algo == "TF-IDF + Phonetic" and "TfidfPhoneticMatcher" in priv_classes:
                 Cls = priv_classes["TfidfPhoneticMatcher"]
-                checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs", ngram_size=3, phonetic_weight=0.4)
-                corr = checker.match_list([final_ocr_text], top_k=5, tfidf_threshold=0.15)
             elif spell_algo == "SymSpell" and "SymSpellMatcher" in priv_classes:
                 Cls = priv_classes["SymSpellMatcher"]
-                checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs", max_edit=2, prefix_len=7)
-                corr = checker.match_list([final_ocr_text], top_k=5, min_score=0.4)
-            elif spell_algo == "RapidFuzz" and "RapidFuzzMatcher" in priv_classes:
                 Cls = priv_classes["RapidFuzzMatcher"]
                 checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs")
-                corr = checker.match_list([final_ocr_text], top_k=5, threshold=70.0)
             else:
                 spell_section += "- Spell-check backend unavailable.\n"
         else:
-            spell_section += "- No OCR output or Excel dictionary missing.\n"
     except Exception as e:
         spell_section += f"- Spell-check error: {e}\n"
-    # Format spell-check suggestions (top-5 with CER)
     if corr:
-        for raw in [final_ocr_text]:
             suggestions = corr.get(raw, [])
             if suggestions:
                 spell_section += f"- **{raw}**\n"
                 for cand, score in suggestions:
-                    cer = character_error_rate(cand, raw)  # Calculate CER
-                    spell_section += f"  - {cand} (score={score:.3f}, CER={cer:.3f}%)\n"
             else:
                 spell_section += f"- **{raw}**\n  - (no suggestions)\n"
-    final_md = spell_section  # Only spell-check suggestions
-    # 4) Final yield: raw unchanged; Markdown with spell-check
     yield final_ocr_text, final_md
 # ----------------------------
 # UI
 # ----------------------------
 image_examples = [
-    ["OCR the content perfectly.", "examples/3.jpg"],
-    ["Perform OCR on the image.", "examples/1.jpg"],
-    ["Extract the contents. [page].", "examples/2.jpg"],
 ]
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
-    gr.Markdown("# **Handwritten Doctor's Prescription Reading**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=2):
-            image_upload = gr.Image(type="pil", label="Upload Image", height=290)
             image_submit = gr.Button("Submit", variant="primary")
-            gr.Examples(examples=image_examples, inputs=[image_upload])
             # Spell-check selection
             spell_choice = gr.Radio(
                 choices=["TF-IDF + Phonetic", "SymSpell", "RapidFuzz"],
                 label="Select Spell-check Approach",
-                value="TF-IDF + Phonetic"
             )
             with gr.Accordion("Advanced options", open=False):
-                max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
-                temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.7)
-                top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
-                top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
-                repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
         with gr.Column(scale=3):
             gr.Markdown("## Output", elem_id="output-title")
-            output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=11, show_copy_button=True)
             model_choice = gr.Radio(
                 choices=["Chandra-OCR", "Dots.OCR"],
                 label="Select OCR Model",
-                value="Chandra-OCR"
             )
     image_submit.click(
         fn=generate_image,
-        inputs=[model_choice, image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty, spell_choice],
-        outputs=[output]
     )
 if __name__ == "__main__":
-    demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)

 import os
 import time
 from threading import Thread
 from typing import Iterable, Dict, Any, Optional, List
 import gradio as gr
 import spaces
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
 # -----------------------------
+# Character Error Rate (CER)
 # -----------------------------
 def levenshtein(a: str, b: str) -> int:
     """Levenshtein distance to calculate CER."""
     a, b = a.lower(), b.lower()
         for j, cb in enumerate(b, 1):
             cur = dp[j]
             cost = 0 if ca == cb else 1
+            dp[j] = min(dp[j] + 1, dp[j - 1] + 1, prev + cost)
             prev = cur
     return dp[-1]
 def character_error_rate(pred: str, target: str) -> float:
+    """Calculate the Character Error Rate (CER) in percent."""
+    target = target or ""
     distance = levenshtein(pred, target)
+    return (distance / len(target)) * 100 if len(target) > 0 else 0.0
 # -----------------------------
 # Private repo: dynamic import
 # Map filenames to exported class names
 PY_MODULES = {
+    "ner.py": "ClinicalNER",              # NER is only applied for Dots.OCR output
     "tfidf_phonetic.py": "TfidfPhoneticMatcher",
     "symspell_matcher.py": "SymSpellMatcher",
     "rapidfuzz_matcher.py": "RapidFuzzMatcher",
+    # 'Medibot_Drugs_Cleaned_Updated.xlsx' is data, not a module
 }
+HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
 def _dynamic_import(module_path: str, class_name: str):
     spec = importlib.util.spec_from_file_location(class_name, module_path)
     spec.loader.exec_module(module)  # type: ignore
     return getattr(module, class_name)
+# Load private classes and Excel dictionary (once at import time)
 priv_classes: Dict[str, Any] = {}
 drug_xlsx_path: Optional[str] = None
 try:
             if cls:
                 priv_classes[cls] = _dynamic_import(path, cls)
                 print(f"[Private] Loaded class: {cls} from {fname}")
+        drug_xlsx_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename="Medibot_Drugs_Cleaned_Updated.xlsx",
+            token=HF_TOKEN,
+        )
         print(f"[Private] Downloaded Excel at: {drug_xlsx_path}")
 except Exception as e:
     print(f"[Private] ERROR loading private backend: {e}")
     c950="#1E3450",
 )
 class SteelBlueTheme(Soft):
     def __init__(
         self,
         neutral_hue: colors.Color | str = colors.slate,
         text_size: sizes.Size | str = sizes.text_lg,
         font: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Outfit"),
+            "Arial",
+            "sans-serif",
         ),
         font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("IBM Plex Mono"),
+            "ui-monospace",
+            "monospace",
         ),
     ):
         super().__init__(
             block_label_background_fill="*primary_200",
         )
 steel_blue_theme = SteelBlueTheme()
 css = """
 # ----------------------------
 # RUNTIME / DEVICE
 # ----------------------------
 os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
 print("CUDA_VISIBLE_DEVICES =", os.environ.get("CUDA_VISIBLE_DEVICES"))
 print("torch.__version__     =", torch.__version__)
 print("torch.version.cuda    =", torch.version.cuda)
 attn_impl = "sdpa"
 try:
     import flash_attn  # noqa: F401
     if use_cuda:
         attn_impl = "flash_attention_2"
 except Exception:
     attn_implementation=attn_impl,
     torch_dtype=DTYPE_BF16,
     device_map="auto" if use_cuda else None,
+    trust_remote_code=True,
 ).eval()
 if not use_cuda:
     model_d.to(device)
 # ----------------------------
+# GENERATION (OCR → NER (Dots only) → Spell-check + CER)
 # ----------------------------
+MAX_MAX_NEW_TOKENS = 4096
+DEFAULT_MAX_NEW_TOKENS = 2048
+@spaces.GPU  # you can add duration=... if needed, e.g. @spaces.GPU(duration=240)
+def generate_image(
+    model_name: str,
+    text: str,
+    image: Image.Image,
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+    top_k: int,
+    repetition_penalty: float,
+    spell_algo: str,
+):
     """
+    1) Stream OCR tokens to Raw output (unchanged).
+    2) If model_name == 'Dots.OCR', run ClinicalNER → list[str] meds.
+       For Chandra-OCR, skip NER.
+    3) Apply selected spell-check (TF-IDF+Phonetic / SymSpell / RapidFuzz)
+       using Excel dict, and compute CER for each suggestion.
+    4) Markdown shows OCR text, NER list (if any), and spell-check top-5
+       suggestions with scores and CER.
     """
     if image is None:
+        # Two outputs: raw textbox + markdown
         yield "Please upload an image.", "Please upload an image."
         return
         yield "Invalid model selected.", "Invalid model selected."
         return
+    # Build prompt from text parameter (kept via gr.State)
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image"},
+                {"type": "text", "text": text},
+            ],
+        }
+    ]
+    prompt_full = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
     # Preprocess
+    inputs = processor(
+        text=[prompt_full], images=[image], return_tensors="pt", padding=True
+    )
     inputs = {k: (v.to(device) if hasattr(v, "to") else v) for k, v in inputs.items()}
     # Streamer
     tokenizer = getattr(processor, "tokenizer", None) or processor
+    streamer = TextIteratorStreamer(
+        tokenizer, skip_prompt=True, skip_special_tokens=True
+    )
     gen_kwargs = dict(
         **inputs,
         repetition_penalty=repetition_penalty,
     )
+    # Start generation in background thread
     thread = Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
+    # 1) Live OCR streaming to Raw (and mirror to Markdown during stream)
     buffer = ""
     for new_text in streamer:
         buffer += new_text.replace("<|im_end|>", "")
         time.sleep(0.01)
+        # During streaming, just show the raw text in both components
         yield buffer, buffer
+    # Final raw text
     final_ocr_text = buffer.strip()
+    # -------------------------
+    # 2) Clinical NER (Dots.OCR only)
+    # -------------------------
+    meds: List[str] = []
     if model_name == "Dots.OCR":
         try:
+            if "ClinicalNER" in priv_classes and HF_TOKEN is not None:
                 ClinicalNER = priv_classes["ClinicalNER"]
+                ner = ClinicalNER(token=HF_TOKEN)  # model_id can be passed if needed
+                ner_output = ner(final_ocr_text) or []
+                # Expecting list[str]; be robust:
+                meds = [m.strip() for m in ner_output if isinstance(m, str) and m.strip()]
+                print("[NER] Extracted meds:", meds)
             else:
+                print("[NER] ClinicalNER not available or no HF token.")
         except Exception as e:
             print(f"[NER] Error running ClinicalNER: {e}")
+    # Fallback: if no meds found (or Chandra-OCR), derive meds from OCR lines
+    if not meds:
+        meds = [line.strip() for line in final_ocr_text.splitlines() if line.strip()]
+        print("[NER] Using line-based meds fallback, count:", len(meds))
+    # -------------------------
+    # Build Markdown: OCR text + NER section
+    # -------------------------
+    md = "### Raw OCR Output\n"
+    md += "```\n" + (final_ocr_text or "(empty)") + "\n```\n"
+    md += "\n---\n### Clinical NER (Medications)\n"
+    if meds:
+        for m in meds:
+            md += f"- {m}\n"
+    else:
+        md += "- None detected\n"
+    # -------------------------
+    # 3) Spell-check (med list) with CER
+    # -------------------------
     spell_section = "\n---\n### Spell-check suggestions (" + spell_algo + ")\n"
     corr: Dict[str, List] = {}
     try:
+        if meds and drug_xlsx_path:
+            if (
+                spell_algo == "TF-IDF + Phonetic"
+                and "TfidfPhoneticMatcher" in priv_classes
+            ):
                 Cls = priv_classes["TfidfPhoneticMatcher"]
+                checker = Cls(
+                    xlsx_path=drug_xlsx_path,
+                    column="Combined_Drugs",
+                    ngram_size=3,
+                    phonetic_weight=0.4,
+                )
+                corr = checker.match_list(meds, top_k=5, tfidf_threshold=0.15)
             elif spell_algo == "SymSpell" and "SymSpellMatcher" in priv_classes:
                 Cls = priv_classes["SymSpellMatcher"]
+                checker = Cls(
+                    xlsx_path=drug_xlsx_path,
+                    column="Combined_Drugs",
+                    max_edit=2,
+                    prefix_len=7,
+                )
+                corr = checker.match_list(meds, top_k=5, min_score=0.4)
+            elif (
+                spell_algo == "RapidFuzz" and "RapidFuzzMatcher" in priv_classes
+            ):
                 Cls = priv_classes["RapidFuzzMatcher"]
                 checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs")
+                corr = checker.match_list(meds, top_k=5, threshold=70.0)
             else:
                 spell_section += "- Spell-check backend unavailable.\n"
         else:
+            spell_section += "- No NER/med list or Excel dictionary missing.\n"
     except Exception as e:
         spell_section += f"- Spell-check error: {e}\n"
+    # Format suggestions (top-5 per med, with scores + CER)
     if corr:
+        for raw in meds:
             suggestions = corr.get(raw, [])
             if suggestions:
                 spell_section += f"- **{raw}**\n"
                 for cand, score in suggestions:
+                    cer = character_error_rate(cand, raw)
+                    spell_section += (
+                        f"  - {cand} "
+                        f"(score={score:.3f}, CER={cer:.3f}%)\n"
+                    )
             else:
                 spell_section += f"- **{raw}**\n  - (no suggestions)\n"
+    final_md = md + spell_section
+    # 4) Final yield: raw unchanged; Markdown with NER + spell-check + CER
     yield final_ocr_text, final_md
 # ----------------------------
 # UI
 # ----------------------------
+# IMPORTANT: examples must match the number of inputs (here: only image)
 image_examples = [
+    ["examples/3.jpg"],
+    ["examples/1.jpg"],
+    ["examples/2.jpg"],
 ]
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
+    gr.Markdown(
+        "# **Handwritten Doctor's Prescription Reading**", elem_id="main-title"
+    )
     with gr.Row():
         with gr.Column(scale=2):
+            image_upload = gr.Image(
+                type="pil", label="Upload Image", height=290
+            )
             image_submit = gr.Button("Submit", variant="primary")
+            gr.Examples(
+                examples=image_examples,
+                inputs=[image_upload],
+                label="Example Images",
+            )
             # Spell-check selection
             spell_choice = gr.Radio(
                 choices=["TF-IDF + Phonetic", "SymSpell", "RapidFuzz"],
                 label="Select Spell-check Approach",
+                value="TF-IDF + Phonetic",
             )
             with gr.Accordion("Advanced options", open=False):
+                max_new_tokens = gr.Slider(
+                    label="Max new tokens",
+                    minimum=1,
+                    maximum=MAX_MAX_NEW_TOKENS,
+                    step=1,
+                    value=DEFAULT_MAX_NEW_TOKENS,
+                )
+                temperature = gr.Slider(
+                    label="Temperature",
+                    minimum=0.1,
+                    maximum=4.0,
+                    step=0.1,
+                    value=0.7,
+                )
+                top_p = gr.Slider(
+                    label="Top-p (nucleus sampling)",
+                    minimum=0.05,
+                    maximum=1.0,
+                    step=0.05,
+                    value=0.9,
+                )
+                top_k = gr.Slider(
+                    label="Top-k",
+                    minimum=1,
+                    maximum=1000,
+                    step=1,
+                    value=50,
+                )
+                repetition_penalty = gr.Slider(
+                    label="Repetition penalty",
+                    minimum=1.0,
+                    maximum=2.0,
+                    step=0.05,
+                    value=1.1,
+                )
         with gr.Column(scale=3):
             gr.Markdown("## Output", elem_id="output-title")
+            output = gr.Textbox(
+                label="Raw Output Stream",
+                interactive=False,
+                lines=11,
+                show_copy_button=True,
+            )
+            with gr.Accordion("(Result.md)", open=False):
+                markdown_output = gr.Markdown(label="(Result.Md)")
             model_choice = gr.Radio(
                 choices=["Chandra-OCR", "Dots.OCR"],
                 label="Select OCR Model",
+                value="Chandra-OCR",
             )
+    # Hard-coded instruction text, passed as gr.State to match the 'text' parameter
+    query_state = gr.State(
+        "Extract medicine or drugs names along with dosage amount or quantity"
+    )
     image_submit.click(
         fn=generate_image,
+        inputs=[
+            model_choice,
+            query_state,
+            image_upload,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+            repetition_penalty,
+            spell_choice,
+        ],
+        outputs=[output, markdown_output],
     )
 if __name__ == "__main__":
+    demo.queue(max_size=50).launch(
+        mcp_server=True, ssr_mode=False, show_error=True
+    )