Spaces:

LLDDWW
/

MedCard

Running

LLDDWW Claude commited on Oct 1

Commit

0e6a905

1 Parent(s): 8a7e01c

feat: add medication analysis with easy explanations

- OCR extraction followed by medication info analysis
- Explain medication name, effects, and side effects
- User-friendly explanations for elderly and children
- Two-panel UI: OCR results + easy explanations
- Single model (Qwen2.5-VL-7B) handles both tasks

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

app.py +89 -28

app.py CHANGED Viewed

@@ -31,9 +31,9 @@ def _extract_json_block(text: str) -> Optional[str]:
     return match.group(0)
-@spaces.GPU(duration=120)
-def extract_text_from_image(image: Image.Image) -> str:
-    """Qwen2-VL로 이미지에서 텍스트 추출"""
     try:
         # Qwen2.5-VL 모델 로드
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
@@ -43,8 +43,8 @@ def extract_text_from_image(image: Image.Image) -> str:
         )
         processor = AutoProcessor.from_pretrained(MODEL_ID)
-        # OCR 프롬프트 구성
-        messages = [
             {
                 "role": "user",
                 "content": [
@@ -54,9 +54,8 @@ def extract_text_from_image(image: Image.Image) -> str:
             }
         ]
-        # 입력 준비
-        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        image_inputs, video_inputs = process_vision_info(messages)
         inputs = processor(
             text=[text],
             images=image_inputs,
@@ -66,7 +65,6 @@ def extract_text_from_image(image: Image.Image) -> str:
         )
         inputs = inputs.to(model.device)
-        # 추론
         with torch.no_grad():
             generated_ids = model.generate(**inputs, max_new_tokens=2048)
@@ -74,14 +72,59 @@ def extract_text_from_image(image: Image.Image) -> str:
             out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
         ]
-        output_text = processor.batch_decode(
             generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
         )[0]
-        return output_text.strip() if output_text else "텍스트를 찾을 수 없습니다."
     except Exception as e:
-        raise Exception(f"OCR 오류: {str(e)}")
 def extract_medications_from_text(text: str) -> List[str]:
@@ -173,18 +216,23 @@ def format_results(extracted_text: str, medications: List[str]) -> Tuple[str, st
 def run_analysis(image: Optional[Image.Image], progress=gr.Progress()):
-    """메인 분석 파이프라인: OCR만 실행"""
     if image is None:
-        return "📷 약 봉투나 처방전 사진을 업로드해주세요."
-    progress(0.5, desc="📸 OCR 텍스트 추출 중...")
     try:
-        extracted_text = extract_text_from_image(image)
         progress(1.0, desc="✅ 완료!")
-        return f"### 📄 OCR 추출 결과\n\n```\n{extracted_text}\n```"
     except Exception as e:
-        return f"### ⚠️ 오류 발생\n\n{str(e)}"
 # 심플한 CSS
@@ -270,8 +318,8 @@ body {
 HERO_HTML = """
 <div class="hero">
-    <h1>💊 약 이름 추출기</h1>
-    <p>약봉투/처방전 사진에서 약 이름을 자동으로 추출합니다</p>
 </div>
 """
@@ -282,24 +330,37 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
     with gr.Column(elem_classes=["upload-section"]):
         gr.Markdown("### 📸 사진 업로드")
         image_input = gr.Image(type="pil", label="약봉투 또는 처방전 사진", height=350)
-        analyze_button = gr.Button("🔍 OCR 텍스트 추출", elem_classes=["analyze-btn"], size="lg")
-    with gr.Column(elem_classes=["result-section"]):
-        gr.Markdown("### 📋 OCR 추출 결과")
-        text_output = gr.Markdown("OCR로 추출된 전체 텍스트가 여기 표시됩니다.")
     analyze_button.click(
         run_analysis,
         inputs=image_input,
-        outputs=text_output,
     )
     gr.Markdown("""
     ---
-    **ℹ️ OCR 모델**
-    - Qwen2.5-VL-7B-Instruct - 최첨단 비전-언어 모델 기반 OCR (GPT-4o 수준)
-    - 한국어, 영어 등 다국어 지원
     """)
 if __name__ == "__main__":

     return match.group(0)
+@spaces.GPU(duration=180)
+def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
+    """이미지에서 OCR 추출 후 약 정보 분석"""
     try:
         # Qwen2.5-VL 모델 로드
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
         )
         processor = AutoProcessor.from_pretrained(MODEL_ID)
+        # Step 1: OCR - 이미지에서 텍스트 추출
+        ocr_messages = [
             {
                 "role": "user",
                 "content": [
             }
         ]
+        text = processor.apply_chat_template(ocr_messages, tokenize=False, add_generation_prompt=True)
+        image_inputs, video_inputs = process_vision_info(ocr_messages)
         inputs = processor(
             text=[text],
             images=image_inputs,
         )
         inputs = inputs.to(model.device)
         with torch.no_grad():
             generated_ids = model.generate(**inputs, max_new_tokens=2048)
             out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
         ]
+        ocr_text = processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        if not ocr_text or ocr_text.strip() == "":
+            return "텍스트를 찾을 수 없습니다.", ""
+        # Step 2: 약 정보 분석 - OCR 텍스트를 LLM에게 전달
+        analysis_messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": f"""다음은 약 봉투나 처방전에서 추출한 텍스트입니다:
+{ocr_text}
+위 텍스트에서 약 이름을 찾아서, 각 약에 대해 다음 정보를 **노인과 어린이 모두 쉽게 이해할 수 있도록** 재미있고 친근하게 설명해주세요:
+1. **약 이름**: 정확한 약 이름
+2. **효능**: 이 약이 무엇을 치료하고 어떻게 도움이 되는지
+3. **부작용**: 주의해야 할 부작용들
+각 약마다 이모지를 사용하고, 쉬운 단어로 설명해주세요. 할머니 할아버지나 초등학생도 이해할 수 있게 작성해주세요.
+마크다운 형식으로 작성해주세요."""},
+                ],
+            }
+        ]
+        text = processor.apply_chat_template(analysis_messages, tokenize=False, add_generation_prompt=True)
+        inputs = processor(
+            text=[text],
+            images=None,
+            videos=None,
+            padding=True,
+            return_tensors="pt",
+        )
+        inputs = inputs.to(model.device)
+        with torch.no_grad():
+            generated_ids = model.generate(**inputs, max_new_tokens=3072, temperature=0.7)
+        generated_ids_trimmed = [
+            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        analysis_text = processor.batch_decode(
             generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
         )[0]
+        return ocr_text.strip(), analysis_text.strip()
     except Exception as e:
+        raise Exception(f"분석 오류: {str(e)}")
 def extract_medications_from_text(text: str) -> List[str]:
 def run_analysis(image: Optional[Image.Image], progress=gr.Progress()):
+    """메인 분석 파이프라인: OCR + 약 정보 분석"""
     if image is None:
+        return "📷 약 봉투나 처방전 사진을 업로드해주세요.", ""
+    progress(0.3, desc="📸 1단계: OCR 텍스트 추출 중...")
+    progress(0.6, desc="🤖 2단계: 약 정보 분석 중...")
     try:
+        ocr_text, analysis = analyze_medication_image(image)
         progress(1.0, desc="✅ 완료!")
+        ocr_output = f"### 📄 추출된 텍스트\n\n```\n{ocr_text}\n```"
+        analysis_output = f"### 💊 약 정보 설명\n\n{analysis}"
+        return ocr_output, analysis_output
     except Exception as e:
+        return f"### ⚠️ 오류 발생\n\n{str(e)}", ""
 # 심플한 CSS
 HERO_HTML = """
 <div class="hero">
+    <h1>💊 우리 가족 약 도우미</h1>
+    <p>약봉투/처방전 사진에서 약 정보를 쉽고 재미있게 알려드려요!</p>
 </div>
 """
     with gr.Column(elem_classes=["upload-section"]):
         gr.Markdown("### 📸 사진 업로드")
         image_input = gr.Image(type="pil", label="약봉투 또는 처방전 사진", height=350)
+        analyze_button = gr.Button("🔍 약 정보 분석하기", elem_classes=["analyze-btn"], size="lg")
+    with gr.Row():
+        with gr.Column(elem_classes=["result-section"]):
+            gr.Markdown("### 📋 1단계: 추출된 텍스트")
+            ocr_output = gr.Markdown("OCR로 추출된 텍스트가 여기 표시됩니다.")
+        with gr.Column(elem_classes=["result-section"]):
+            gr.Markdown("### 📋 2단계: 쉬운 약 설명")
+            analysis_output = gr.Markdown("노인과 어린이도 이해하기 쉬운 약 정보가 여기 표시됩니다.")
     analyze_button.click(
         run_analysis,
         inputs=image_input,
+        outputs=[ocr_output, analysis_output],
     )
     gr.Markdown("""
     ---
+    **ℹ️ 사용 방법**
+    1. 약 봉투나 처방전 사진을 업로드하세요
+    2. '약 정보 분석하기' 버튼을 클릭하세요
+    3. 왼쪽에는 추출된 텍스트, 오른쪽에는 쉬운 설명이 나타납니다!
+    **⚠️ 주의사항**
+    - 이 앱은 참고용이며, 실제 복약은 반드시 의사나 약사의 지시를 따르세요
+    - AI가 생성한 정보이므로 정확하지 않을 수 있습니다
+    **🤖 기술 스택**
+    - Qwen2.5-VL-7B-Instruct (OCR + 약 정보 분석)
     """)
 if __name__ == "__main__":