Spaces:

joeee2321512
/

Basira

Runtime error

App Files Files Community

joeee2321512 commited on Jul 25

Commit

f814852

verified ·

1 Parent(s): f6d3976

Upload 2 files

Browse files

Files changed (2) hide show

app.py +106 -0
requirements.txt +11 -0

app.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import gradio as gr
+from PIL import Image
+import torch
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+from peft import PeftModel
+import gc
+import os
+# Add this line immediately after your imports
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+# --- Configuration ---
+base_model_id = "joeee2321512/Qwen2.5-VL-3B-Instruct-finetuned"
+adapter_id = "joeee2321512/Basira"
+# --- Model Loading ---
+print("Loading base model...")
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    base_model_id,
+    torch_dtype=torch.float16,
+    device_map="auto",
+    token=os.getenv("token_HF")
+)
+print("Loading processor...")
+processor = AutoProcessor.from_pretrained(
+    base_model_id,
+    token=os.getenv("token_HF")
+)
+processor.tokenizer.padding_side = "right"
+print("Loading and applying adapter...")
+model = PeftModel.from_pretrained(model, adapter_id)
+print("Model loaded successfully!")
+# --- The Inference Function ---
+def perform_ocr_on_image(image_input: Image.Image) -> str:
+    """
+    This is the core function that Gradio will call.
+    It takes a PIL image and returns the transcribed text string.
+    """
+    if image_input is None:
+        return "Please upload an image."
+    try:
+        # Format the prompt using the chat template
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": image_input},
+                    {"type": "text", "text": (
+                        "Analyze the input image and detect all Arabic text. "
+                        "Output only the extracted text—verbatim and in its original script—"
+                        "without any added commentary, translation, punctuation or formatting. "
+                        "Present each line of text as plain UTF-8 strings, with no extra characters or words."
+                    )},
+                ],
+            }
+        ]
+        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # Prepare inputs for the model
+        inputs = processor(text=text, images=image_input, return_tensors="pt").to(model.device)
+        # Generate prediction
+        with torch.no_grad():
+            generated_ids = model.generate(**inputs, max_new_tokens=512)
+        # Decode the output
+        full_response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        # --- FIX: Post-process the response to remove the prompt ---
+        # The model's actual output starts after the "assistant" marker.
+        # We split the full response by this marker and take the last part.
+        parts = full_response.split("assistant")
+        if len(parts) > 1:
+            # Take the last part and remove any leading/trailing whitespace
+            cleaned_response = parts[-1].strip()
+        else:
+            # If the marker isn't found, return the full response as a fallback
+            cleaned_response = full_response
+        # --- END OF FIX ---
+        # Clean up memory
+        gc.collect()
+        torch.cuda.empty_cache()
+        return cleaned_response
+    except Exception as e:
+        print(f"An error occurred during inference: {e}")
+        return f"An error occurred: {str(e)}"
+# --- Create and Launch the Gradio Interface ---
+demo = gr.Interface(
+    fn=perform_ocr_on_image,
+    inputs=gr.Image(type="pil", label="Upload Arabic Document Image"),
+    outputs=gr.Textbox(label="Transcription", lines=10, show_copy_button=True),
+    title="Basira: Fine-Tuned Qwen-VL for Arabic OCR",
+    description="A demo for the Qwen-VL 2.5 (3B) model, fine-tuned for enhanced Arabic OCR. Upload an image to see the transcription.",
+    allow_flagging="never"
+)
+if _name_ == "_main_":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+# requirements.txt
+torch
+transformers
+peft
+accelerate
+bitsandbytes
+Pillow
+gradio
+sentencepiece
+qwen-vl-utils
+torchvision