LLDDWW Claude commited on
Commit
0e6a905
ยท
1 Parent(s): 8a7e01c

feat: add medication analysis with easy explanations

Browse files

- OCR extraction followed by medication info analysis
- Explain medication name, effects, and side effects
- User-friendly explanations for elderly and children
- Two-panel UI: OCR results + easy explanations
- Single model (Qwen2.5-VL-7B) handles both tasks

๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +89 -28
app.py CHANGED
@@ -31,9 +31,9 @@ def _extract_json_block(text: str) -> Optional[str]:
31
  return match.group(0)
32
 
33
 
34
- @spaces.GPU(duration=120)
35
- def extract_text_from_image(image: Image.Image) -> str:
36
- """Qwen2-VL๋กœ ์ด๋ฏธ์ง€์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ"""
37
  try:
38
  # Qwen2.5-VL ๋ชจ๋ธ ๋กœ๋“œ
39
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
@@ -43,8 +43,8 @@ def extract_text_from_image(image: Image.Image) -> str:
43
  )
44
  processor = AutoProcessor.from_pretrained(MODEL_ID)
45
 
46
- # OCR ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
47
- messages = [
48
  {
49
  "role": "user",
50
  "content": [
@@ -54,9 +54,8 @@ def extract_text_from_image(image: Image.Image) -> str:
54
  }
55
  ]
56
 
57
- # ์ž…๋ ฅ ์ค€๋น„
58
- text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
59
- image_inputs, video_inputs = process_vision_info(messages)
60
  inputs = processor(
61
  text=[text],
62
  images=image_inputs,
@@ -66,7 +65,6 @@ def extract_text_from_image(image: Image.Image) -> str:
66
  )
67
  inputs = inputs.to(model.device)
68
 
69
- # ์ถ”๋ก 
70
  with torch.no_grad():
71
  generated_ids = model.generate(**inputs, max_new_tokens=2048)
72
 
@@ -74,14 +72,59 @@ def extract_text_from_image(image: Image.Image) -> str:
74
  out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
75
  ]
76
 
77
- output_text = processor.batch_decode(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
79
  )[0]
80
 
81
- return output_text.strip() if output_text else "ํ…์ŠคํŠธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
82
 
83
  except Exception as e:
84
- raise Exception(f"OCR ์˜ค๋ฅ˜: {str(e)}")
85
 
86
 
87
  def extract_medications_from_text(text: str) -> List[str]:
@@ -173,18 +216,23 @@ def format_results(extracted_text: str, medications: List[str]) -> Tuple[str, st
173
 
174
 
175
  def run_analysis(image: Optional[Image.Image], progress=gr.Progress()):
176
- """๋ฉ”์ธ ๋ถ„์„ ํŒŒ์ดํ”„๋ผ์ธ: OCR๋งŒ ์‹คํ–‰"""
177
  if image is None:
178
- return "๐Ÿ“ท ์•ฝ ๋ด‰ํˆฌ๋‚˜ ์ฒ˜๋ฐฉ์ „ ์‚ฌ์ง„์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”."
179
 
180
- progress(0.5, desc="๐Ÿ“ธ OCR ํ…์ŠคํŠธ ์ถ”์ถœ ์ค‘...")
 
181
 
182
  try:
183
- extracted_text = extract_text_from_image(image)
184
  progress(1.0, desc="โœ… ์™„๋ฃŒ!")
185
- return f"### ๐Ÿ“„ OCR ์ถ”์ถœ ๊ฒฐ๊ณผ\n\n```\n{extracted_text}\n```"
 
 
 
 
186
  except Exception as e:
187
- return f"### โš ๏ธ ์˜ค๋ฅ˜ ๋ฐœ์ƒ\n\n{str(e)}"
188
 
189
 
190
  # ์‹ฌํ”Œํ•œ CSS
@@ -270,8 +318,8 @@ body {
270
 
271
  HERO_HTML = """
272
  <div class="hero">
273
- <h1>๐Ÿ’Š ์•ฝ ์ด๋ฆ„ ์ถ”์ถœ๊ธฐ</h1>
274
- <p>์•ฝ๋ด‰ํˆฌ/์ฒ˜๋ฐฉ์ „ ์‚ฌ์ง„์—์„œ ์•ฝ ์ด๋ฆ„์„ ์ž๋™์œผ๋กœ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค</p>
275
  </div>
276
  """
277
 
@@ -282,24 +330,37 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
282
  with gr.Column(elem_classes=["upload-section"]):
283
  gr.Markdown("### ๐Ÿ“ธ ์‚ฌ์ง„ ์—…๋กœ๋“œ")
284
  image_input = gr.Image(type="pil", label="์•ฝ๋ด‰ํˆฌ ๋˜๋Š” ์ฒ˜๋ฐฉ์ „ ์‚ฌ์ง„", height=350)
285
- analyze_button = gr.Button("๐Ÿ” OCR ํ…์ŠคํŠธ ์ถ”์ถœ", elem_classes=["analyze-btn"], size="lg")
 
 
 
 
 
286
 
287
- with gr.Column(elem_classes=["result-section"]):
288
- gr.Markdown("### ๐Ÿ“‹ OCR ์ถ”์ถœ ๊ฒฐ๊ณผ")
289
- text_output = gr.Markdown("OCR๋กœ ์ถ”์ถœ๋œ ์ „์ฒด ํ…์ŠคํŠธ๊ฐ€ ์—ฌ๊ธฐ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค.")
290
 
291
  analyze_button.click(
292
  run_analysis,
293
  inputs=image_input,
294
- outputs=text_output,
295
  )
296
 
297
  gr.Markdown("""
298
  ---
299
 
300
- **โ„น๏ธ OCR ๋ชจ๋ธ**
301
- - Qwen2.5-VL-7B-Instruct - ์ตœ์ฒจ๋‹จ ๋น„์ „-์–ธ์–ด ๋ชจ๋ธ ๊ธฐ๋ฐ˜ OCR (GPT-4o ์ˆ˜์ค€)
302
- - ํ•œ๊ตญ์–ด, ์˜์–ด ๋“ฑ ๋‹ค๊ตญ์–ด ์ง€์›
 
 
 
 
 
 
 
 
303
  """)
304
 
305
  if __name__ == "__main__":
 
31
  return match.group(0)
32
 
33
 
34
+ @spaces.GPU(duration=180)
35
+ def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
36
+ """์ด๋ฏธ์ง€์—์„œ OCR ์ถ”์ถœ ํ›„ ์•ฝ ์ •๋ณด ๋ถ„์„"""
37
  try:
38
  # Qwen2.5-VL ๋ชจ๋ธ ๋กœ๋“œ
39
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 
43
  )
44
  processor = AutoProcessor.from_pretrained(MODEL_ID)
45
 
46
+ # Step 1: OCR - ์ด๋ฏธ์ง€์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ
47
+ ocr_messages = [
48
  {
49
  "role": "user",
50
  "content": [
 
54
  }
55
  ]
56
 
57
+ text = processor.apply_chat_template(ocr_messages, tokenize=False, add_generation_prompt=True)
58
+ image_inputs, video_inputs = process_vision_info(ocr_messages)
 
59
  inputs = processor(
60
  text=[text],
61
  images=image_inputs,
 
65
  )
66
  inputs = inputs.to(model.device)
67
 
 
68
  with torch.no_grad():
69
  generated_ids = model.generate(**inputs, max_new_tokens=2048)
70
 
 
72
  out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
73
  ]
74
 
75
+ ocr_text = processor.batch_decode(
76
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
77
+ )[0]
78
+
79
+ if not ocr_text or ocr_text.strip() == "":
80
+ return "ํ…์ŠคํŠธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.", ""
81
+
82
+ # Step 2: ์•ฝ ์ •๋ณด ๋ถ„์„ - OCR ํ…์ŠคํŠธ๋ฅผ LLM์—๊ฒŒ ์ „๋‹ฌ
83
+ analysis_messages = [
84
+ {
85
+ "role": "user",
86
+ "content": [
87
+ {"type": "text", "text": f"""๋‹ค์Œ์€ ์•ฝ ๋ด‰ํˆฌ๋‚˜ ์ฒ˜๋ฐฉ์ „์—์„œ ์ถ”์ถœํ•œ ํ…์ŠคํŠธ์ž…๋‹ˆ๋‹ค:
88
+
89
+ {ocr_text}
90
+
91
+ ์œ„ ํ…์ŠคํŠธ์—์„œ ์•ฝ ์ด๋ฆ„์„ ์ฐพ์•„์„œ, ๊ฐ ์•ฝ์— ๋Œ€ํ•ด ๋‹ค์Œ ์ •๋ณด๋ฅผ **๋…ธ์ธ๊ณผ ์–ด๋ฆฐ์ด ๋ชจ๋‘ ์‰ฝ๊ฒŒ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋„๋ก** ์žฌ๋ฏธ์žˆ๊ณ  ์นœ๊ทผํ•˜๊ฒŒ ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”:
92
+
93
+ 1. **์•ฝ ์ด๋ฆ„**: ์ •ํ™•ํ•œ ์•ฝ ์ด๋ฆ„
94
+ 2. **ํšจ๋Šฅ**: ์ด ์•ฝ์ด ๋ฌด์—‡์„ ์น˜๋ฃŒํ•˜๊ณ  ์–ด๋–ป๊ฒŒ ๋„์›€์ด ๋˜๋Š”์ง€
95
+ 3. **๋ถ€์ž‘์šฉ**: ์ฃผ์˜ํ•ด์•ผ ํ•  ๋ถ€์ž‘์šฉ๋“ค
96
+
97
+ ๊ฐ ์•ฝ๋งˆ๋‹ค ์ด๋ชจ์ง€๋ฅผ ์‚ฌ์šฉํ•˜๊ณ , ์‰ฌ์šด ๋‹จ์–ด๋กœ ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”. ํ• ๋จธ๋‹ˆ ํ• ์•„๋ฒ„์ง€๋‚˜ ์ดˆ๋“ฑํ•™์ƒ๋„ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๊ฒŒ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.
98
+ ๋งˆํฌ๋‹ค์šด ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."""},
99
+ ],
100
+ }
101
+ ]
102
+
103
+ text = processor.apply_chat_template(analysis_messages, tokenize=False, add_generation_prompt=True)
104
+ inputs = processor(
105
+ text=[text],
106
+ images=None,
107
+ videos=None,
108
+ padding=True,
109
+ return_tensors="pt",
110
+ )
111
+ inputs = inputs.to(model.device)
112
+
113
+ with torch.no_grad():
114
+ generated_ids = model.generate(**inputs, max_new_tokens=3072, temperature=0.7)
115
+
116
+ generated_ids_trimmed = [
117
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
118
+ ]
119
+
120
+ analysis_text = processor.batch_decode(
121
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
122
  )[0]
123
 
124
+ return ocr_text.strip(), analysis_text.strip()
125
 
126
  except Exception as e:
127
+ raise Exception(f"๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
128
 
129
 
130
  def extract_medications_from_text(text: str) -> List[str]:
 
216
 
217
 
218
  def run_analysis(image: Optional[Image.Image], progress=gr.Progress()):
219
+ """๋ฉ”์ธ ๋ถ„์„ ํŒŒ์ดํ”„๋ผ์ธ: OCR + ์•ฝ ์ •๋ณด ๋ถ„์„"""
220
  if image is None:
221
+ return "๐Ÿ“ท ์•ฝ ๋ด‰ํˆฌ๋‚˜ ์ฒ˜๋ฐฉ์ „ ์‚ฌ์ง„์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”.", ""
222
 
223
+ progress(0.3, desc="๐Ÿ“ธ 1๋‹จ๊ณ„: OCR ํ…์ŠคํŠธ ์ถ”์ถœ ์ค‘...")
224
+ progress(0.6, desc="๐Ÿค– 2๋‹จ๊ณ„: ์•ฝ ์ •๋ณด ๋ถ„์„ ์ค‘...")
225
 
226
  try:
227
+ ocr_text, analysis = analyze_medication_image(image)
228
  progress(1.0, desc="โœ… ์™„๋ฃŒ!")
229
+
230
+ ocr_output = f"### ๐Ÿ“„ ์ถ”์ถœ๋œ ํ…์ŠคํŠธ\n\n```\n{ocr_text}\n```"
231
+ analysis_output = f"### ๐Ÿ’Š ์•ฝ ์ •๋ณด ์„ค๋ช…\n\n{analysis}"
232
+
233
+ return ocr_output, analysis_output
234
  except Exception as e:
235
+ return f"### โš ๏ธ ์˜ค๋ฅ˜ ๋ฐœ์ƒ\n\n{str(e)}", ""
236
 
237
 
238
  # ์‹ฌํ”Œํ•œ CSS
 
318
 
319
  HERO_HTML = """
320
  <div class="hero">
321
+ <h1>๐Ÿ’Š ์šฐ๋ฆฌ ๊ฐ€์กฑ ์•ฝ ๋„์šฐ๋ฏธ</h1>
322
+ <p>์•ฝ๋ด‰ํˆฌ/์ฒ˜๋ฐฉ์ „ ์‚ฌ์ง„์—์„œ ์•ฝ ์ •๋ณด๋ฅผ ์‰ฝ๊ณ  ์žฌ๋ฏธ์žˆ๊ฒŒ ์•Œ๋ ค๋“œ๋ ค์š”!</p>
323
  </div>
324
  """
325
 
 
330
  with gr.Column(elem_classes=["upload-section"]):
331
  gr.Markdown("### ๐Ÿ“ธ ์‚ฌ์ง„ ์—…๋กœ๋“œ")
332
  image_input = gr.Image(type="pil", label="์•ฝ๋ด‰ํˆฌ ๋˜๋Š” ์ฒ˜๋ฐฉ์ „ ์‚ฌ์ง„", height=350)
333
+ analyze_button = gr.Button("๐Ÿ” ์•ฝ ์ •๋ณด ๋ถ„์„ํ•˜๊ธฐ", elem_classes=["analyze-btn"], size="lg")
334
+
335
+ with gr.Row():
336
+ with gr.Column(elem_classes=["result-section"]):
337
+ gr.Markdown("### ๐Ÿ“‹ 1๋‹จ๊ณ„: ์ถ”์ถœ๋œ ํ…์ŠคํŠธ")
338
+ ocr_output = gr.Markdown("OCR๋กœ ์ถ”์ถœ๋œ ํ…์ŠคํŠธ๊ฐ€ ์—ฌ๊ธฐ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค.")
339
 
340
+ with gr.Column(elem_classes=["result-section"]):
341
+ gr.Markdown("### ๐Ÿ“‹ 2๋‹จ๊ณ„: ์‰ฌ์šด ์•ฝ ์„ค๋ช…")
342
+ analysis_output = gr.Markdown("๋…ธ์ธ๊ณผ ์–ด๋ฆฐ์ด๋„ ์ดํ•ดํ•˜๊ธฐ ์‰ฌ์šด ์•ฝ ์ •๋ณด๊ฐ€ ์—ฌ๊ธฐ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค.")
343
 
344
  analyze_button.click(
345
  run_analysis,
346
  inputs=image_input,
347
+ outputs=[ocr_output, analysis_output],
348
  )
349
 
350
  gr.Markdown("""
351
  ---
352
 
353
+ **โ„น๏ธ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•**
354
+ 1. ์•ฝ ๋ด‰ํˆฌ๋‚˜ ์ฒ˜๋ฐฉ์ „ ์‚ฌ์ง„์„ ์—…๋กœ๋“œํ•˜์„ธ์š”
355
+ 2. '์•ฝ ์ •๋ณด ๋ถ„์„ํ•˜๊ธฐ' ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜์„ธ์š”
356
+ 3. ์™ผ์ชฝ์—๋Š” ์ถ”์ถœ๋œ ํ…์ŠคํŠธ, ์˜ค๋ฅธ์ชฝ์—๋Š” ์‰ฌ์šด ์„ค๋ช…์ด ๋‚˜ํƒ€๋‚ฉ๋‹ˆ๋‹ค!
357
+
358
+ **โš ๏ธ ์ฃผ์˜์‚ฌํ•ญ**
359
+ - ์ด ์•ฑ์€ ์ฐธ๊ณ ์šฉ์ด๋ฉฐ, ์‹ค์ œ ๋ณต์•ฝ์€ ๋ฐ˜๋“œ์‹œ ์˜์‚ฌ๋‚˜ ์•ฝ์‚ฌ์˜ ์ง€์‹œ๋ฅผ ๋”ฐ๋ฅด์„ธ์š”
360
+ - AI๊ฐ€ ์ƒ์„ฑํ•œ ์ •๋ณด์ด๋ฏ€๋กœ ์ •ํ™•ํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
361
+
362
+ **๐Ÿค– ๊ธฐ์ˆ  ์Šคํƒ**
363
+ - Qwen2.5-VL-7B-Instruct (OCR + ์•ฝ ์ •๋ณด ๋ถ„์„)
364
  """)
365
 
366
  if __name__ == "__main__":