IFMedTechdemo commited on
Commit
7b1f46b
·
verified ·
1 Parent(s): 8763814

Update app.py

Browse files

created and separared two versioons of code

Files changed (1) hide show
  1. app.py +472 -54
app.py CHANGED
@@ -1,7 +1,11 @@
 
 
 
1
  import os
2
  import time
3
  from threading import Thread
4
  from typing import Iterable, Dict, Any, Optional, List
 
5
 
6
  import gradio as gr
7
  import spaces
@@ -18,6 +22,35 @@ from transformers import (
18
  from gradio.themes import Soft
19
  from gradio.themes.utils import colors, fonts, sizes
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # -----------------------------
22
  # Private repo: dynamic import
23
  # -----------------------------
@@ -28,8 +61,7 @@ REPO_ID = "IFMedTech/Medibot_OCR_model" # private backend repo
28
 
29
  # Map filenames to exported class names
30
  PY_MODULES = {
31
-
32
- "ner.py": "ClinicalNER",
33
  "tfidf_phonetic.py": "TfidfPhoneticMatcher",
34
  "symspell_matcher.py": "SymSpellMatcher",
35
  "rapidfuzz_matcher.py": "RapidFuzzMatcher",
@@ -191,10 +223,8 @@ if not use_cuda:
191
  model_d.to(device)
192
 
193
  # ----------------------------
194
- # GENERATION (OCR → NER → Spell-check)
195
  # ----------------------------
196
- MAX_MAX_NEW_TOKENS = 4096
197
- DEFAULT_MAX_NEW_TOKENS = 2048
198
 
199
  @spaces.GPU
200
  def generate_image(model_name: str,
@@ -207,10 +237,9 @@ def generate_image(model_name: str,
207
  repetition_penalty: float,
208
  spell_algo: str):
209
  """
210
- 1) Stream OCR tokens to Raw output (unchanged).
211
- 2) After stream completes, run ClinicalNER on final raw text → list[str] meds.
212
- 3) Apply selected spell-check (TF-IDF+Phonetic / SymSpell / RapidFuzz) using Excel dict.
213
- 4) Markdown shows OCR + NER list + spell-check top-5 suggestions with scores.
214
  """
215
  if image is None:
216
  yield "Please upload an image.", "Please upload an image."
@@ -257,87 +286,82 @@ def generate_image(model_name: str,
257
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
258
  thread.start()
259
 
260
- # 1) Live OCR streaming to Raw (and mirror to Markdown during stream)
261
  buffer = ""
262
  for new_text in streamer:
263
  buffer += new_text.replace("<|im_end|>", "")
264
  time.sleep(0.01)
265
  yield buffer, buffer
266
 
267
- # Final raw text for downstream processing
268
- final_ocr_text = buffer
269
-
270
- # 2) Clinical NER (from private repo)
271
- # meds: List[str] = []
272
- # try:
273
- # if "ClinicalNER" in priv_classes:
274
- # ClinicalNER = priv_classes["ClinicalNER"]
275
- # ner = ClinicalNER(token=HF_TOKEN) # pass model_id=... if using your own model
276
- # meds = ner(final_ocr_text) or []
277
- # else:
278
- # print("[NER] ClinicalNER not available.")
279
- # except Exception as e:
280
- # print(f"[NER] Error running ClinicalNER: {e}")
281
-
282
- raw_ocr_text = buffer.strip()
283
- meds = [line.strip() for line in raw_ocr_text.split('\n') if line.strip()]
284
-
285
-
286
- # Build Markdown with OCR + NER section
287
- md = final_ocr_text
288
- md += "\n\n---\n### Clinical NER (Medications)\n"
289
- if meds:
290
- for m in meds:
291
- md += f"- {m}\n"
292
- else:
293
- md += "- None detected\n"
294
 
295
- # 3) Spell-check on NER output using selected approach + Excel dict
296
  spell_section = "\n---\n### Spell-check suggestions (" + spell_algo + ")\n"
297
  corr: Dict[str, List] = {}
298
 
299
  try:
300
- if meds and drug_xlsx_path:
 
 
 
 
301
  if spell_algo == "TF-IDF + Phonetic" and "TfidfPhoneticMatcher" in priv_classes:
302
  Cls = priv_classes["TfidfPhoneticMatcher"]
303
  checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs", ngram_size=3, phonetic_weight=0.4)
304
- corr = checker.match_list(meds, top_k=5, tfidf_threshold=0.15)
305
 
306
  elif spell_algo == "SymSpell" and "SymSpellMatcher" in priv_classes:
307
  Cls = priv_classes["SymSpellMatcher"]
308
  checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs", max_edit=2, prefix_len=7)
309
- corr = checker.match_list(meds, top_k=5, min_score=0.4)
310
 
311
  elif spell_algo == "RapidFuzz" and "RapidFuzzMatcher" in priv_classes:
312
  Cls = priv_classes["RapidFuzzMatcher"]
313
  checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs")
314
- corr = checker.match_list(meds, top_k=5, threshold=70.0)
315
  else:
316
  spell_section += "- Spell-check backend unavailable.\n"
317
  else:
318
- spell_section += "- No NER output or Excel dictionary missing.\n"
319
  except Exception as e:
320
  spell_section += f"- Spell-check error: {e}\n"
321
 
322
- # Format suggestions (top-5 with scores)
323
  if corr:
324
- for raw in meds:
325
  suggestions = corr.get(raw, [])
326
  if suggestions:
327
  spell_section += f"- **{raw}**\n"
328
  for cand, score in suggestions:
329
- spell_section += f" - {cand} (score={score:.3f})\n"
 
330
  else:
331
  spell_section += f"- **{raw}**\n - (no suggestions)\n"
332
 
333
- final_md = md + spell_section
334
 
335
- # 4) Final yield: raw unchanged; Markdown with NER + spell-check
336
  yield final_ocr_text, final_md
337
 
338
  # ----------------------------
339
  # UI
340
  # ----------------------------
 
341
  image_examples = [
342
  ["OCR the content perfectly.", "examples/3.jpg"],
343
  ["Perform OCR on the image.", "examples/1.jpg"],
@@ -345,10 +369,9 @@ image_examples = [
345
  ]
346
 
347
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
348
- gr.Markdown("# **Handwritten Doctor's Prescription Reading**", elem_id="main-title")
349
  with gr.Row():
350
  with gr.Column(scale=2):
351
- #image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
352
  image_upload = gr.Image(type="pil", label="Upload Image", height=290)
353
  image_submit = gr.Button("Submit", variant="primary")
354
  gr.Examples(examples=image_examples, inputs=[image_upload])
@@ -370,8 +393,6 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
370
  with gr.Column(scale=3):
371
  gr.Markdown("## Output", elem_id="output-title")
372
  output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=11, show_copy_button=True)
373
- with gr.Accordion("(Result.md)", open=False):
374
- markdown_output = gr.Markdown(label="(Result.Md)")
375
 
376
  model_choice = gr.Radio(
377
  choices=["Chandra-OCR", "Dots.OCR"],
@@ -381,9 +402,406 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
381
 
382
  image_submit.click(
383
  fn=generate_image,
384
- inputs=[model_choice,gr.State("Extract medicine or drugs names along with dosage amount or quantity") , image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty, spell_choice],
385
- outputs=[output, markdown_output]
386
  )
387
 
388
  if __name__ == "__main__":
389
  demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###################################### version 2 ########################################################
2
+
3
+
4
  import os
5
  import time
6
  from threading import Thread
7
  from typing import Iterable, Dict, Any, Optional, List
8
+ import pandas as pd # For reading Excel file
9
 
10
  import gradio as gr
11
  import spaces
 
22
  from gradio.themes import Soft
23
  from gradio.themes.utils import colors, fonts, sizes
24
 
25
+ # -----------------------------
26
+ # Character Error Rate (CER) Calculation
27
+ # -----------------------------
28
+
29
+ def levenshtein(a: str, b: str) -> int:
30
+ """Levenshtein distance to calculate CER."""
31
+ a, b = a.lower(), b.lower()
32
+ if a == b:
33
+ return 0
34
+ if not a:
35
+ return len(b)
36
+ if not b:
37
+ return len(a)
38
+ dp = list(range(len(b) + 1))
39
+ for i, ca in enumerate(a, 1):
40
+ prev = dp[0]
41
+ dp[0] = i
42
+ for j, cb in enumerate(b, 1):
43
+ cur = dp[j]
44
+ cost = 0 if ca == cb else 1
45
+ dp[j] = min(dp[j] + 1, dp[j-1] + 1, prev + cost)
46
+ prev = cur
47
+ return dp[-1]
48
+
49
+ def character_error_rate(pred: str, target: str) -> float:
50
+ """Calculate the Character Error Rate (CER)."""
51
+ distance = levenshtein(pred, target)
52
+ return (distance / len(target)) * 100 if len(target) > 0 else 0
53
+
54
  # -----------------------------
55
  # Private repo: dynamic import
56
  # -----------------------------
 
61
 
62
  # Map filenames to exported class names
63
  PY_MODULES = {
64
+ "ner.py": "ClinicalNER", # NER is only applied for Dots.OCR output
 
65
  "tfidf_phonetic.py": "TfidfPhoneticMatcher",
66
  "symspell_matcher.py": "SymSpellMatcher",
67
  "rapidfuzz_matcher.py": "RapidFuzzMatcher",
 
223
  model_d.to(device)
224
 
225
  # ----------------------------
226
+ # GENERATION (OCR → Spell-check)
227
  # ----------------------------
 
 
228
 
229
  @spaces.GPU
230
  def generate_image(model_name: str,
 
237
  repetition_penalty: float,
238
  spell_algo: str):
239
  """
240
+ 1) Stream OCR tokens to Raw output.
241
+ 2) Directly apply spell-check algorithms (TF-IDF+Phonetic, SymSpell, or RapidFuzz).
242
+ 3) Only apply Clinical NER to Dots.OCR output, then apply spell-check on the result.
 
243
  """
244
  if image is None:
245
  yield "Please upload an image.", "Please upload an image."
 
286
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
287
  thread.start()
288
 
289
+ # 1) Live OCR streaming to Raw
290
  buffer = ""
291
  for new_text in streamer:
292
  buffer += new_text.replace("<|im_end|>", "")
293
  time.sleep(0.01)
294
  yield buffer, buffer
295
 
296
+ # Final raw OCR output (buffer)
297
+ final_ocr_text = buffer.strip()
298
+
299
+ # 2) Apply Clinical NER ONLY for Dots.OCR output
300
+ meds = []
301
+ if model_name == "Dots.OCR":
302
+ try:
303
+ if "ClinicalNER" in priv_classes:
304
+ ClinicalNER = priv_classes["ClinicalNER"]
305
+ ner = ClinicalNER(token=HF_TOKEN) # pass model_id=... if using your own model
306
+ meds = ner(final_ocr_text) or []
307
+ print("Extracted meds:", meds) # Print extracted meds
308
+ else:
309
+ print("[NER] ClinicalNER not available.")
310
+ except Exception as e:
311
+ print(f"[NER] Error running ClinicalNER: {e}")
 
 
 
 
 
 
 
 
 
 
 
312
 
313
+ # 3) Apply selected spell-check algorithm (directly on raw OCR output or NER output)
314
  spell_section = "\n---\n### Spell-check suggestions (" + spell_algo + ")\n"
315
  corr: Dict[str, List] = {}
316
 
317
  try:
318
+ if final_ocr_text and drug_xlsx_path:
319
+ # Print meds and the number of rows in the drug_xlsx_path
320
+ print("Meds:", meds)
321
+ print("Rows in drug_xlsx_path:", len(pd.read_excel(drug_xlsx_path)))
322
+
323
  if spell_algo == "TF-IDF + Phonetic" and "TfidfPhoneticMatcher" in priv_classes:
324
  Cls = priv_classes["TfidfPhoneticMatcher"]
325
  checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs", ngram_size=3, phonetic_weight=0.4)
326
+ corr = checker.match_list([final_ocr_text], top_k=5, tfidf_threshold=0.15)
327
 
328
  elif spell_algo == "SymSpell" and "SymSpellMatcher" in priv_classes:
329
  Cls = priv_classes["SymSpellMatcher"]
330
  checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs", max_edit=2, prefix_len=7)
331
+ corr = checker.match_list([final_ocr_text], top_k=5, min_score=0.4)
332
 
333
  elif spell_algo == "RapidFuzz" and "RapidFuzzMatcher" in priv_classes:
334
  Cls = priv_classes["RapidFuzzMatcher"]
335
  checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs")
336
+ corr = checker.match_list([final_ocr_text], top_k=5, threshold=70.0)
337
  else:
338
  spell_section += "- Spell-check backend unavailable.\n"
339
  else:
340
+ spell_section += "- No OCR output or Excel dictionary missing.\n"
341
  except Exception as e:
342
  spell_section += f"- Spell-check error: {e}\n"
343
 
344
+ # Format spell-check suggestions (top-5 with CER)
345
  if corr:
346
+ for raw in [final_ocr_text]:
347
  suggestions = corr.get(raw, [])
348
  if suggestions:
349
  spell_section += f"- **{raw}**\n"
350
  for cand, score in suggestions:
351
+ cer = character_error_rate(cand, raw) # Calculate CER
352
+ spell_section += f" - {cand} (score={score:.3f}, CER={cer:.3f}%)\n"
353
  else:
354
  spell_section += f"- **{raw}**\n - (no suggestions)\n"
355
 
356
+ final_md = spell_section # Only spell-check suggestions
357
 
358
+ # 4) Final yield: raw unchanged; Markdown with spell-check
359
  yield final_ocr_text, final_md
360
 
361
  # ----------------------------
362
  # UI
363
  # ----------------------------
364
+
365
  image_examples = [
366
  ["OCR the content perfectly.", "examples/3.jpg"],
367
  ["Perform OCR on the image.", "examples/1.jpg"],
 
369
  ]
370
 
371
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
372
+ gr.Markdown("# **Handwritten Doctor's Prescription Reading V2**", elem_id="main-title")
373
  with gr.Row():
374
  with gr.Column(scale=2):
 
375
  image_upload = gr.Image(type="pil", label="Upload Image", height=290)
376
  image_submit = gr.Button("Submit", variant="primary")
377
  gr.Examples(examples=image_examples, inputs=[image_upload])
 
393
  with gr.Column(scale=3):
394
  gr.Markdown("## Output", elem_id="output-title")
395
  output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=11, show_copy_button=True)
 
 
396
 
397
  model_choice = gr.Radio(
398
  choices=["Chandra-OCR", "Dots.OCR"],
 
402
 
403
  image_submit.click(
404
  fn=generate_image,
405
+ inputs=[model_choice, image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty, spell_choice],
406
+ outputs=[output]
407
  )
408
 
409
  if __name__ == "__main__":
410
  demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)
411
+
412
+
413
+
414
+
415
+
416
+
417
+ ##################################### version 1 #######################################################
418
+
419
+ # import os
420
+ # import time
421
+ # from threading import Thread
422
+ # from typing import Iterable, Dict, Any, Optional, List
423
+
424
+ # import gradio as gr
425
+ # import spaces
426
+ # import torch
427
+ # from PIL import Image
428
+
429
+ # from transformers import (
430
+ # Qwen3VLForConditionalGeneration,
431
+ # AutoModelForCausalLM,
432
+ # AutoProcessor,
433
+ # TextIteratorStreamer,
434
+ # )
435
+
436
+ # from gradio.themes import Soft
437
+ # from gradio.themes.utils import colors, fonts, sizes
438
+
439
+ # # -----------------------------
440
+ # # Private repo: dynamic import
441
+ # # -----------------------------
442
+ # import importlib.util
443
+ # from huggingface_hub import hf_hub_download
444
+
445
+ # REPO_ID = "IFMedTech/Medibot_OCR_model" # private backend repo
446
+
447
+ # # Map filenames to exported class names
448
+ # PY_MODULES = {
449
+
450
+ # "ner.py": "ClinicalNER",
451
+ # "tfidf_phonetic.py": "TfidfPhoneticMatcher",
452
+ # "symspell_matcher.py": "SymSpellMatcher",
453
+ # "rapidfuzz_matcher.py": "RapidFuzzMatcher",
454
+ # # 'drug_dictionary.xlsx' is data, not a module
455
+ # }
456
+
457
+ # HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
458
+
459
+ # def _dynamic_import(module_path: str, class_name: str):
460
+ # spec = importlib.util.spec_from_file_location(class_name, module_path)
461
+ # module = importlib.util.module_from_spec(spec)
462
+ # spec.loader.exec_module(module) # type: ignore
463
+ # return getattr(module, class_name)
464
+
465
+ # # Load private classes and Excel dictionary
466
+ # priv_classes: Dict[str, Any] = {}
467
+ # drug_xlsx_path: Optional[str] = None
468
+ # try:
469
+ # if HF_TOKEN is None:
470
+ # print("[Private] WARNING: HUGGINGFACE_TOKEN not set; NER/Spell-check will be unavailable.")
471
+ # else:
472
+ # for fname, cls in PY_MODULES.items():
473
+ # path = hf_hub_download(repo_id=REPO_ID, filename=fname, token=HF_TOKEN)
474
+ # if cls:
475
+ # priv_classes[cls] = _dynamic_import(path, cls)
476
+ # print(f"[Private] Loaded class: {cls} from {fname}")
477
+ # drug_xlsx_path = hf_hub_download(repo_id=REPO_ID, filename="Medibot_Drugs_Cleaned_Updated.xlsx", token=HF_TOKEN)
478
+ # print(f"[Private] Downloaded Excel at: {drug_xlsx_path}")
479
+ # except Exception as e:
480
+ # print(f"[Private] ERROR loading private backend: {e}")
481
+ # priv_classes = {}
482
+ # drug_xlsx_path = None
483
+
484
+ # # ----------------------------
485
+ # # THEME
486
+ # # ----------------------------
487
+ # colors.steel_blue = colors.Color(
488
+ # name="steel_blue",
489
+ # c50="#EBF3F8",
490
+ # c100="#D3E5F0",
491
+ # c200="#A8CCE1",
492
+ # c300="#7DB3D2",
493
+ # c400="#529AC3",
494
+ # c500="#4682B4",
495
+ # c600="#3E72A0",
496
+ # c700="#36638C",
497
+ # c800="#2E5378",
498
+ # c900="#264364",
499
+ # c950="#1E3450",
500
+ # )
501
+
502
+ # class SteelBlueTheme(Soft):
503
+ # def __init__(
504
+ # self,
505
+ # *,
506
+ # primary_hue: colors.Color | str = colors.gray,
507
+ # secondary_hue: colors.Color | str = colors.steel_blue,
508
+ # neutral_hue: colors.Color | str = colors.slate,
509
+ # text_size: sizes.Size | str = sizes.text_lg,
510
+ # font: fonts.Font | str | Iterable[fonts.Font | str] = (
511
+ # fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
512
+ # ),
513
+ # font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
514
+ # fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
515
+ # ),
516
+ # ):
517
+ # super().__init__(
518
+ # primary_hue=primary_hue,
519
+ # secondary_hue=secondary_hue,
520
+ # neutral_hue=neutral_hue,
521
+ # text_size=text_size,
522
+ # font=font,
523
+ # font_mono=font_mono,
524
+ # )
525
+ # super().set(
526
+ # background_fill_primary="*primary_50",
527
+ # background_fill_primary_dark="*primary_900",
528
+ # body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
529
+ # body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
530
+ # button_primary_text_color="white",
531
+ # button_primary_text_color_hover="white",
532
+ # button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
533
+ # button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
534
+ # button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
535
+ # button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
536
+ # button_secondary_text_color="black",
537
+ # button_secondary_text_color_hover="white",
538
+ # button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
539
+ # button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
540
+ # button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
541
+ # button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
542
+ # slider_color="*secondary_500",
543
+ # slider_color_dark="*secondary_600",
544
+ # block_title_text_weight="600",
545
+ # block_border_width="3px",
546
+ # block_shadow="*shadow_drop_lg",
547
+ # button_primary_shadow="*shadow_drop_lg",
548
+ # button_large_padding="11px",
549
+ # color_accent_soft="*primary_100",
550
+ # block_label_background_fill="*primary_200",
551
+ # )
552
+
553
+ # steel_blue_theme = SteelBlueTheme()
554
+
555
+ # css = """
556
+ # #main-title h1 { font-size: 2.3em !important; }
557
+ # #output-title h2 { font-size: 2.1em !important; }
558
+ # """
559
+
560
+ # # ----------------------------
561
+ # # RUNTIME / DEVICE
562
+ # # ----------------------------
563
+ # os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
564
+ # print("CUDA_VISIBLE_DEVICES =", os.environ.get("CUDA_VISIBLE_DEVICES"))
565
+ # print("torch.__version__ =", torch.__version__)
566
+ # print("torch.version.cuda =", torch.version.cuda)
567
+ # print("cuda available =", torch.cuda.is_available())
568
+ # print("cuda device count =", torch.cuda.device_count())
569
+ # if torch.cuda.is_available():
570
+ # print("using device =", torch.cuda.get_device_name(0))
571
+
572
+ # use_cuda = torch.cuda.is_available()
573
+ # device = torch.device("cuda:0" if use_cuda else "cpu")
574
+ # if use_cuda:
575
+ # torch.backends.cudnn.benchmark = True
576
+
577
+ # DTYPE_FP16 = torch.float16 if use_cuda else torch.float32
578
+ # DTYPE_BF16 = torch.bfloat16 if use_cuda else torch.float32
579
+
580
+ # # ----------------------------
581
+ # # OCR MODELS: Chandra-OCR + Dots.OCR
582
+ # # ----------------------------
583
+ # # 1) Chandra-OCR (Qwen3VL)
584
+ # MODEL_ID_V = "datalab-to/chandra"
585
+ # processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
586
+ # model_v = Qwen3VLForConditionalGeneration.from_pretrained(
587
+ # MODEL_ID_V, trust_remote_code=True, torch_dtype=DTYPE_FP16
588
+ # ).to(device).eval()
589
+
590
+ # # 2) Dots.OCR (flash_attn2 if available, else SDPA)
591
+ # MODEL_PATH_D = "prithivMLmods/Dots.OCR-Latest-BF16"
592
+ # processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
593
+ # attn_impl = "sdpa"
594
+ # try:
595
+ # import flash_attn # noqa: F401
596
+ # if use_cuda:
597
+ # attn_impl = "flash_attention_2"
598
+ # except Exception:
599
+ # attn_impl = "sdpa"
600
+
601
+ # model_d = AutoModelForCausalLM.from_pretrained(
602
+ # MODEL_PATH_D,
603
+ # attn_implementation=attn_impl,
604
+ # torch_dtype=DTYPE_BF16,
605
+ # device_map="auto" if use_cuda else None,
606
+ # trust_remote_code=True
607
+ # ).eval()
608
+ # if not use_cuda:
609
+ # model_d.to(device)
610
+
611
+ # # ----------------------------
612
+ # # GENERATION (OCR → NER → Spell-check)
613
+ # # ----------------------------
614
+ # MAX_MAX_NEW_TOKENS = 4096
615
+ # DEFAULT_MAX_NEW_TOKENS = 2048
616
+
617
+ # @spaces.GPU
618
+ # def generate_image(model_name: str,
619
+ # text: str,
620
+ # image: Image.Image,
621
+ # max_new_tokens: int,
622
+ # temperature: float,
623
+ # top_p: float,
624
+ # top_k: int,
625
+ # repetition_penalty: float,
626
+ # spell_algo: str):
627
+ # """
628
+ # 1) Stream OCR tokens to Raw output (unchanged).
629
+ # 2) After stream completes, run ClinicalNER on final raw text → list[str] meds.
630
+ # 3) Apply selected spell-check (TF-IDF+Phonetic / SymSpell / RapidFuzz) using Excel dict.
631
+ # 4) Markdown shows OCR + NER list + spell-check top-5 suggestions with scores.
632
+ # """
633
+ # if image is None:
634
+ # yield "Please upload an image.", "Please upload an image."
635
+ # return
636
+
637
+ # if model_name == "Chandra-OCR":
638
+ # processor, model = processor_v, model_v
639
+ # elif model_name == "Dots.OCR":
640
+ # processor, model = processor_d, model_d
641
+ # else:
642
+ # yield "Invalid model selected.", "Invalid model selected."
643
+ # return
644
+
645
+ # # Build prompt
646
+ # messages = [{
647
+ # "role": "user",
648
+ # "content": [
649
+ # {"type": "image"},
650
+ # {"type": "text", "text": text},
651
+ # ]
652
+ # }]
653
+ # prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
654
+
655
+ # # Preprocess
656
+ # inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True)
657
+ # inputs = {k: (v.to(device) if hasattr(v, "to") else v) for k, v in inputs.items()}
658
+
659
+ # # Streamer
660
+ # tokenizer = getattr(processor, "tokenizer", None) or processor
661
+ # streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
662
+
663
+ # gen_kwargs = dict(
664
+ # **inputs,
665
+ # streamer=streamer,
666
+ # max_new_tokens=max_new_tokens,
667
+ # do_sample=True,
668
+ # temperature=temperature,
669
+ # top_p=top_p,
670
+ # top_k=top_k,
671
+ # repetition_penalty=repetition_penalty,
672
+ # )
673
+
674
+ # # Start generation
675
+ # thread = Thread(target=model.generate, kwargs=gen_kwargs)
676
+ # thread.start()
677
+
678
+ # # 1) Live OCR streaming to Raw (and mirror to Markdown during stream)
679
+ # buffer = ""
680
+ # for new_text in streamer:
681
+ # buffer += new_text.replace("<|im_end|>", "")
682
+ # time.sleep(0.01)
683
+ # yield buffer, buffer
684
+
685
+ # # Final raw text for downstream processing
686
+ # final_ocr_text = buffer
687
+
688
+ # # 2) Clinical NER (from private repo)
689
+ # # meds: List[str] = []
690
+ # # try:
691
+ # # if "ClinicalNER" in priv_classes:
692
+ # # ClinicalNER = priv_classes["ClinicalNER"]
693
+ # # ner = ClinicalNER(token=HF_TOKEN) # pass model_id=... if using your own model
694
+ # # meds = ner(final_ocr_text) or []
695
+ # # else:
696
+ # # print("[NER] ClinicalNER not available.")
697
+ # # except Exception as e:
698
+ # # print(f"[NER] Error running ClinicalNER: {e}")
699
+
700
+ # raw_ocr_text = buffer.strip()
701
+ # meds = [line.strip() for line in raw_ocr_text.split('\n') if line.strip()]
702
+
703
+
704
+ # # Build Markdown with OCR + NER section
705
+ # md = final_ocr_text
706
+ # md += "\n\n---\n### Clinical NER (Medications)\n"
707
+ # if meds:
708
+ # for m in meds:
709
+ # md += f"- {m}\n"
710
+ # else:
711
+ # md += "- None detected\n"
712
+
713
+ # # 3) Spell-check on NER output using selected approach + Excel dict
714
+ # spell_section = "\n---\n### Spell-check suggestions (" + spell_algo + ")\n"
715
+ # corr: Dict[str, List] = {}
716
+
717
+ # try:
718
+ # if meds and drug_xlsx_path:
719
+ # if spell_algo == "TF-IDF + Phonetic" and "TfidfPhoneticMatcher" in priv_classes:
720
+ # Cls = priv_classes["TfidfPhoneticMatcher"]
721
+ # checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs", ngram_size=3, phonetic_weight=0.4)
722
+ # corr = checker.match_list(meds, top_k=5, tfidf_threshold=0.15)
723
+
724
+ # elif spell_algo == "SymSpell" and "SymSpellMatcher" in priv_classes:
725
+ # Cls = priv_classes["SymSpellMatcher"]
726
+ # checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs", max_edit=2, prefix_len=7)
727
+ # corr = checker.match_list(meds, top_k=5, min_score=0.4)
728
+
729
+ # elif spell_algo == "RapidFuzz" and "RapidFuzzMatcher" in priv_classes:
730
+ # Cls = priv_classes["RapidFuzzMatcher"]
731
+ # checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs")
732
+ # corr = checker.match_list(meds, top_k=5, threshold=70.0)
733
+ # else:
734
+ # spell_section += "- Spell-check backend unavailable.\n"
735
+ # else:
736
+ # spell_section += "- No NER output or Excel dictionary missing.\n"
737
+ # except Exception as e:
738
+ # spell_section += f"- Spell-check error: {e}\n"
739
+
740
+ # # Format suggestions (top-5 with scores)
741
+ # if corr:
742
+ # for raw in meds:
743
+ # suggestions = corr.get(raw, [])
744
+ # if suggestions:
745
+ # spell_section += f"- **{raw}**\n"
746
+ # for cand, score in suggestions:
747
+ # spell_section += f" - {cand} (score={score:.3f})\n"
748
+ # else:
749
+ # spell_section += f"- **{raw}**\n - (no suggestions)\n"
750
+
751
+ # final_md = md + spell_section
752
+
753
+ # # 4) Final yield: raw unchanged; Markdown with NER + spell-check
754
+ # yield final_ocr_text, final_md
755
+
756
+ # # ----------------------------
757
+ # # UI
758
+ # # ----------------------------
759
+ # image_examples = [
760
+ # ["OCR the content perfectly.", "examples/3.jpg"],
761
+ # ["Perform OCR on the image.", "examples/1.jpg"],
762
+ # ["Extract the contents. [page].", "examples/2.jpg"],
763
+ # ]
764
+
765
+ # with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
766
+ # gr.Markdown("# **Handwritten Doctor's Prescription Reading**", elem_id="main-title")
767
+ # with gr.Row():
768
+ # with gr.Column(scale=2):
769
+ # #image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
770
+ # image_upload = gr.Image(type="pil", label="Upload Image", height=290)
771
+ # image_submit = gr.Button("Submit", variant="primary")
772
+ # gr.Examples(examples=image_examples, inputs=[image_upload])
773
+
774
+ # # Spell-check selection
775
+ # spell_choice = gr.Radio(
776
+ # choices=["TF-IDF + Phonetic", "SymSpell", "RapidFuzz"],
777
+ # label="Select Spell-check Approach",
778
+ # value="TF-IDF + Phonetic"
779
+ # )
780
+
781
+ # with gr.Accordion("Advanced options", open=False):
782
+ # max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
783
+ # temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.7)
784
+ # top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
785
+ # top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
786
+ # repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
787
+
788
+ # with gr.Column(scale=3):
789
+ # gr.Markdown("## Output", elem_id="output-title")
790
+ # output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=11, show_copy_button=True)
791
+ # with gr.Accordion("(Result.md)", open=False):
792
+ # markdown_output = gr.Markdown(label="(Result.Md)")
793
+
794
+ # model_choice = gr.Radio(
795
+ # choices=["Chandra-OCR", "Dots.OCR"],
796
+ # label="Select OCR Model",
797
+ # value="Chandra-OCR"
798
+ # )
799
+
800
+ # image_submit.click(
801
+ # fn=generate_image,
802
+ # inputs=[model_choice,gr.State("Extract medicine or drugs names along with dosage amount or quantity") , image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty, spell_choice],
803
+ # outputs=[output, markdown_output]
804
+ # )
805
+
806
+ # if __name__ == "__main__":
807
+ # demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)