feat: add medication analysis with easy explanations
Browse files- OCR extraction followed by medication info analysis
- Explain medication name, effects, and side effects
- User-friendly explanations for elderly and children
- Two-panel UI: OCR results + easy explanations
- Single model (Qwen2.5-VL-7B) handles both tasks
๐ค Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
app.py
CHANGED
|
@@ -31,9 +31,9 @@ def _extract_json_block(text: str) -> Optional[str]:
|
|
| 31 |
return match.group(0)
|
| 32 |
|
| 33 |
|
| 34 |
-
@spaces.GPU(duration=
|
| 35 |
-
def
|
| 36 |
-
"""
|
| 37 |
try:
|
| 38 |
# Qwen2.5-VL ๋ชจ๋ธ ๋ก๋
|
| 39 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
@@ -43,8 +43,8 @@ def extract_text_from_image(image: Image.Image) -> str:
|
|
| 43 |
)
|
| 44 |
processor = AutoProcessor.from_pretrained(MODEL_ID)
|
| 45 |
|
| 46 |
-
# OCR
|
| 47 |
-
|
| 48 |
{
|
| 49 |
"role": "user",
|
| 50 |
"content": [
|
|
@@ -54,9 +54,8 @@ def extract_text_from_image(image: Image.Image) -> str:
|
|
| 54 |
}
|
| 55 |
]
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
image_inputs, video_inputs = process_vision_info(messages)
|
| 60 |
inputs = processor(
|
| 61 |
text=[text],
|
| 62 |
images=image_inputs,
|
|
@@ -66,7 +65,6 @@ def extract_text_from_image(image: Image.Image) -> str:
|
|
| 66 |
)
|
| 67 |
inputs = inputs.to(model.device)
|
| 68 |
|
| 69 |
-
# ์ถ๋ก
|
| 70 |
with torch.no_grad():
|
| 71 |
generated_ids = model.generate(**inputs, max_new_tokens=2048)
|
| 72 |
|
|
@@ -74,14 +72,59 @@ def extract_text_from_image(image: Image.Image) -> str:
|
|
| 74 |
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
| 75 |
]
|
| 76 |
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 79 |
)[0]
|
| 80 |
|
| 81 |
-
return
|
| 82 |
|
| 83 |
except Exception as e:
|
| 84 |
-
raise Exception(f"
|
| 85 |
|
| 86 |
|
| 87 |
def extract_medications_from_text(text: str) -> List[str]:
|
|
@@ -173,18 +216,23 @@ def format_results(extracted_text: str, medications: List[str]) -> Tuple[str, st
|
|
| 173 |
|
| 174 |
|
| 175 |
def run_analysis(image: Optional[Image.Image], progress=gr.Progress()):
|
| 176 |
-
"""๋ฉ์ธ ๋ถ์ ํ์ดํ๋ผ์ธ: OCR
|
| 177 |
if image is None:
|
| 178 |
-
return "๐ท ์ฝ ๋ดํฌ๋ ์ฒ๋ฐฉ์ ์ฌ์ง์ ์
๋ก๋ํด์ฃผ์ธ์."
|
| 179 |
|
| 180 |
-
progress(0.
|
|
|
|
| 181 |
|
| 182 |
try:
|
| 183 |
-
|
| 184 |
progress(1.0, desc="โ
์๋ฃ!")
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
except Exception as e:
|
| 187 |
-
return f"### โ ๏ธ ์ค๋ฅ ๋ฐ์\n\n{str(e)}"
|
| 188 |
|
| 189 |
|
| 190 |
# ์ฌํํ CSS
|
|
@@ -270,8 +318,8 @@ body {
|
|
| 270 |
|
| 271 |
HERO_HTML = """
|
| 272 |
<div class="hero">
|
| 273 |
-
<h1>๐ ์ฝ
|
| 274 |
-
<p>์ฝ๋ดํฌ/์ฒ๋ฐฉ์ ์ฌ์ง์์ ์ฝ
|
| 275 |
</div>
|
| 276 |
"""
|
| 277 |
|
|
@@ -282,24 +330,37 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
|
|
| 282 |
with gr.Column(elem_classes=["upload-section"]):
|
| 283 |
gr.Markdown("### ๐ธ ์ฌ์ง ์
๋ก๋")
|
| 284 |
image_input = gr.Image(type="pil", label="์ฝ๋ดํฌ ๋๋ ์ฒ๋ฐฉ์ ์ฌ์ง", height=350)
|
| 285 |
-
analyze_button = gr.Button("๐
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
|
| 291 |
analyze_button.click(
|
| 292 |
run_analysis,
|
| 293 |
inputs=image_input,
|
| 294 |
-
outputs=
|
| 295 |
)
|
| 296 |
|
| 297 |
gr.Markdown("""
|
| 298 |
---
|
| 299 |
|
| 300 |
-
**โน๏ธ
|
| 301 |
-
|
| 302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
""")
|
| 304 |
|
| 305 |
if __name__ == "__main__":
|
|
|
|
| 31 |
return match.group(0)
|
| 32 |
|
| 33 |
|
| 34 |
+
@spaces.GPU(duration=180)
|
| 35 |
+
def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
|
| 36 |
+
"""์ด๋ฏธ์ง์์ OCR ์ถ์ถ ํ ์ฝ ์ ๋ณด ๋ถ์"""
|
| 37 |
try:
|
| 38 |
# Qwen2.5-VL ๋ชจ๋ธ ๋ก๋
|
| 39 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
|
|
| 43 |
)
|
| 44 |
processor = AutoProcessor.from_pretrained(MODEL_ID)
|
| 45 |
|
| 46 |
+
# Step 1: OCR - ์ด๋ฏธ์ง์์ ํ
์คํธ ์ถ์ถ
|
| 47 |
+
ocr_messages = [
|
| 48 |
{
|
| 49 |
"role": "user",
|
| 50 |
"content": [
|
|
|
|
| 54 |
}
|
| 55 |
]
|
| 56 |
|
| 57 |
+
text = processor.apply_chat_template(ocr_messages, tokenize=False, add_generation_prompt=True)
|
| 58 |
+
image_inputs, video_inputs = process_vision_info(ocr_messages)
|
|
|
|
| 59 |
inputs = processor(
|
| 60 |
text=[text],
|
| 61 |
images=image_inputs,
|
|
|
|
| 65 |
)
|
| 66 |
inputs = inputs.to(model.device)
|
| 67 |
|
|
|
|
| 68 |
with torch.no_grad():
|
| 69 |
generated_ids = model.generate(**inputs, max_new_tokens=2048)
|
| 70 |
|
|
|
|
| 72 |
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
| 73 |
]
|
| 74 |
|
| 75 |
+
ocr_text = processor.batch_decode(
|
| 76 |
+
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 77 |
+
)[0]
|
| 78 |
+
|
| 79 |
+
if not ocr_text or ocr_text.strip() == "":
|
| 80 |
+
return "ํ
์คํธ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.", ""
|
| 81 |
+
|
| 82 |
+
# Step 2: ์ฝ ์ ๋ณด ๋ถ์ - OCR ํ
์คํธ๋ฅผ LLM์๊ฒ ์ ๋ฌ
|
| 83 |
+
analysis_messages = [
|
| 84 |
+
{
|
| 85 |
+
"role": "user",
|
| 86 |
+
"content": [
|
| 87 |
+
{"type": "text", "text": f"""๋ค์์ ์ฝ ๋ดํฌ๋ ์ฒ๋ฐฉ์ ์์ ์ถ์ถํ ํ
์คํธ์
๋๋ค:
|
| 88 |
+
|
| 89 |
+
{ocr_text}
|
| 90 |
+
|
| 91 |
+
์ ํ
์คํธ์์ ์ฝ ์ด๋ฆ์ ์ฐพ์์, ๊ฐ ์ฝ์ ๋ํด ๋ค์ ์ ๋ณด๋ฅผ **๋
ธ์ธ๊ณผ ์ด๋ฆฐ์ด ๋ชจ๋ ์ฝ๊ฒ ์ดํดํ ์ ์๋๋ก** ์ฌ๋ฏธ์๊ณ ์น๊ทผํ๊ฒ ์ค๋ช
ํด์ฃผ์ธ์:
|
| 92 |
+
|
| 93 |
+
1. **์ฝ ์ด๋ฆ**: ์ ํํ ์ฝ ์ด๋ฆ
|
| 94 |
+
2. **ํจ๋ฅ**: ์ด ์ฝ์ด ๋ฌด์์ ์น๋ฃํ๊ณ ์ด๋ป๊ฒ ๋์์ด ๋๋์ง
|
| 95 |
+
3. **๋ถ์์ฉ**: ์ฃผ์ํด์ผ ํ ๋ถ์์ฉ๋ค
|
| 96 |
+
|
| 97 |
+
๊ฐ ์ฝ๋ง๋ค ์ด๋ชจ์ง๋ฅผ ์ฌ์ฉํ๊ณ , ์ฌ์ด ๋จ์ด๋ก ์ค๋ช
ํด์ฃผ์ธ์. ํ ๋จธ๋ ํ ์๋ฒ์ง๋ ์ด๋ฑํ์๋ ์ดํดํ ์ ์๊ฒ ์์ฑํด์ฃผ์ธ์.
|
| 98 |
+
๋งํฌ๋ค์ด ํ์์ผ๋ก ์์ฑํด์ฃผ์ธ์."""},
|
| 99 |
+
],
|
| 100 |
+
}
|
| 101 |
+
]
|
| 102 |
+
|
| 103 |
+
text = processor.apply_chat_template(analysis_messages, tokenize=False, add_generation_prompt=True)
|
| 104 |
+
inputs = processor(
|
| 105 |
+
text=[text],
|
| 106 |
+
images=None,
|
| 107 |
+
videos=None,
|
| 108 |
+
padding=True,
|
| 109 |
+
return_tensors="pt",
|
| 110 |
+
)
|
| 111 |
+
inputs = inputs.to(model.device)
|
| 112 |
+
|
| 113 |
+
with torch.no_grad():
|
| 114 |
+
generated_ids = model.generate(**inputs, max_new_tokens=3072, temperature=0.7)
|
| 115 |
+
|
| 116 |
+
generated_ids_trimmed = [
|
| 117 |
+
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
analysis_text = processor.batch_decode(
|
| 121 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 122 |
)[0]
|
| 123 |
|
| 124 |
+
return ocr_text.strip(), analysis_text.strip()
|
| 125 |
|
| 126 |
except Exception as e:
|
| 127 |
+
raise Exception(f"๋ถ์ ์ค๋ฅ: {str(e)}")
|
| 128 |
|
| 129 |
|
| 130 |
def extract_medications_from_text(text: str) -> List[str]:
|
|
|
|
| 216 |
|
| 217 |
|
| 218 |
def run_analysis(image: Optional[Image.Image], progress=gr.Progress()):
|
| 219 |
+
"""๋ฉ์ธ ๋ถ์ ํ์ดํ๋ผ์ธ: OCR + ์ฝ ์ ๋ณด ๋ถ์"""
|
| 220 |
if image is None:
|
| 221 |
+
return "๐ท ์ฝ ๋ดํฌ๋ ์ฒ๋ฐฉ์ ์ฌ์ง์ ์
๋ก๋ํด์ฃผ์ธ์.", ""
|
| 222 |
|
| 223 |
+
progress(0.3, desc="๐ธ 1๋จ๊ณ: OCR ํ
์คํธ ์ถ์ถ ์ค...")
|
| 224 |
+
progress(0.6, desc="๐ค 2๋จ๊ณ: ์ฝ ์ ๋ณด ๋ถ์ ์ค...")
|
| 225 |
|
| 226 |
try:
|
| 227 |
+
ocr_text, analysis = analyze_medication_image(image)
|
| 228 |
progress(1.0, desc="โ
์๋ฃ!")
|
| 229 |
+
|
| 230 |
+
ocr_output = f"### ๐ ์ถ์ถ๋ ํ
์คํธ\n\n```\n{ocr_text}\n```"
|
| 231 |
+
analysis_output = f"### ๐ ์ฝ ์ ๋ณด ์ค๋ช
\n\n{analysis}"
|
| 232 |
+
|
| 233 |
+
return ocr_output, analysis_output
|
| 234 |
except Exception as e:
|
| 235 |
+
return f"### โ ๏ธ ์ค๋ฅ ๋ฐ์\n\n{str(e)}", ""
|
| 236 |
|
| 237 |
|
| 238 |
# ์ฌํํ CSS
|
|
|
|
| 318 |
|
| 319 |
HERO_HTML = """
|
| 320 |
<div class="hero">
|
| 321 |
+
<h1>๐ ์ฐ๋ฆฌ ๊ฐ์กฑ ์ฝ ๋์ฐ๋ฏธ</h1>
|
| 322 |
+
<p>์ฝ๋ดํฌ/์ฒ๋ฐฉ์ ์ฌ์ง์์ ์ฝ ์ ๋ณด๋ฅผ ์ฝ๊ณ ์ฌ๋ฏธ์๊ฒ ์๋ ค๋๋ ค์!</p>
|
| 323 |
</div>
|
| 324 |
"""
|
| 325 |
|
|
|
|
| 330 |
with gr.Column(elem_classes=["upload-section"]):
|
| 331 |
gr.Markdown("### ๐ธ ์ฌ์ง ์
๋ก๋")
|
| 332 |
image_input = gr.Image(type="pil", label="์ฝ๋ดํฌ ๋๋ ์ฒ๋ฐฉ์ ์ฌ์ง", height=350)
|
| 333 |
+
analyze_button = gr.Button("๐ ์ฝ ์ ๋ณด ๋ถ์ํ๊ธฐ", elem_classes=["analyze-btn"], size="lg")
|
| 334 |
+
|
| 335 |
+
with gr.Row():
|
| 336 |
+
with gr.Column(elem_classes=["result-section"]):
|
| 337 |
+
gr.Markdown("### ๐ 1๋จ๊ณ: ์ถ์ถ๋ ํ
์คํธ")
|
| 338 |
+
ocr_output = gr.Markdown("OCR๋ก ์ถ์ถ๋ ํ
์คํธ๊ฐ ์ฌ๊ธฐ ํ์๋ฉ๋๋ค.")
|
| 339 |
|
| 340 |
+
with gr.Column(elem_classes=["result-section"]):
|
| 341 |
+
gr.Markdown("### ๐ 2๋จ๊ณ: ์ฌ์ด ์ฝ ์ค๋ช
")
|
| 342 |
+
analysis_output = gr.Markdown("๋
ธ์ธ๊ณผ ์ด๋ฆฐ์ด๋ ์ดํดํ๊ธฐ ์ฌ์ด ์ฝ ์ ๋ณด๊ฐ ์ฌ๊ธฐ ํ์๋ฉ๋๋ค.")
|
| 343 |
|
| 344 |
analyze_button.click(
|
| 345 |
run_analysis,
|
| 346 |
inputs=image_input,
|
| 347 |
+
outputs=[ocr_output, analysis_output],
|
| 348 |
)
|
| 349 |
|
| 350 |
gr.Markdown("""
|
| 351 |
---
|
| 352 |
|
| 353 |
+
**โน๏ธ ์ฌ์ฉ ๋ฐฉ๋ฒ**
|
| 354 |
+
1. ์ฝ ๋ดํฌ๋ ์ฒ๋ฐฉ์ ์ฌ์ง์ ์
๋ก๋ํ์ธ์
|
| 355 |
+
2. '์ฝ ์ ๋ณด ๋ถ์ํ๊ธฐ' ๋ฒํผ์ ํด๋ฆญํ์ธ์
|
| 356 |
+
3. ์ผ์ชฝ์๋ ์ถ์ถ๋ ํ
์คํธ, ์ค๋ฅธ์ชฝ์๋ ์ฌ์ด ์ค๋ช
์ด ๋ํ๋ฉ๋๋ค!
|
| 357 |
+
|
| 358 |
+
**โ ๏ธ ์ฃผ์์ฌํญ**
|
| 359 |
+
- ์ด ์ฑ์ ์ฐธ๊ณ ์ฉ์ด๋ฉฐ, ์ค์ ๋ณต์ฝ์ ๋ฐ๋์ ์์ฌ๋ ์ฝ์ฌ์ ์ง์๋ฅผ ๋ฐ๋ฅด์ธ์
|
| 360 |
+
- AI๊ฐ ์์ฑํ ์ ๋ณด์ด๋ฏ๋ก ์ ํํ์ง ์์ ์ ์์ต๋๋ค
|
| 361 |
+
|
| 362 |
+
**๐ค ๊ธฐ์ ์คํ**
|
| 363 |
+
- Qwen2.5-VL-7B-Instruct (OCR + ์ฝ ์ ๋ณด ๋ถ์)
|
| 364 |
""")
|
| 365 |
|
| 366 |
if __name__ == "__main__":
|