| # import gradio as gr | |
| # from transformers import BlipProcessor, BlipForConditionalGeneration | |
| # from PIL import Image | |
| # import torch | |
| # # Load model and processor from your Hugging Face repo | |
| # model_id = "khalednabawi11/blip-roco-model" | |
| # processor = BlipProcessor.from_pretrained(model_id) | |
| # model = BlipForConditionalGeneration.from_pretrained(model_id) | |
| # model.eval() | |
| # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # model.to(device) | |
| # def generate_caption(image): | |
| # # Preprocess | |
| # inputs = processor(image, return_tensors="pt").to(device) | |
| # # Generate caption | |
| # with torch.no_grad(): | |
| # output = model.generate(**inputs, max_new_tokens=250, num_beams=5) | |
| # # Decode | |
| # caption = processor.decode(output[0], skip_special_tokens=True) | |
| # return caption | |
| # # def generate_caption(image): | |
| # # prompt = "Radiology report:" | |
| # # inputs = processor(images=image, text=prompt, return_tensors="pt").to(device) | |
| # # output = model.generate( | |
| # # **inputs, | |
| # # max_length=250, | |
| # # num_beams=3, | |
| # # repetition_penalty=1.2, | |
| # # length_penalty=0.0, | |
| # # early_stopping=True, | |
| # # # truncation=True | |
| # # ) | |
| # # caption = processor.batch_decode(output, skip_special_tokens=True)[0] | |
| # # return caption.strip() | |
| # # Gradio UI | |
| # demo = gr.Interface( | |
| # fn=generate_caption, | |
| # inputs=gr.Image(type="pil", label="Upload an Image"), | |
| # outputs=gr.Textbox(label="Generated Caption"), | |
| # title="BLIP Medical Caption Generator", | |
| # description="Upload an image and get a caption generated by your fine-tuned BLIP model.", | |
| # ) | |
| # if __name__ == "__main__": | |
| # demo.launch() | |
| # import os | |
| # import gradio as gr | |
| # from transformers import AutoProcessor, AutoModelForVision2Seq, AutoModelForImageTextToText | |
| # from PIL import Image | |
| # import torch | |
| # from huggingface_hub import login | |
| # hf_token = os.getenv("hf_token") | |
| # login(token=hf_token) | |
| # processor = AutoProcessor.from_pretrained("google/medgemma-4b-it") | |
| # model = AutoModelForImageTextToText.from_pretrained("google/medgemma-4b-it", device_map = "cpu") | |
| # processor = AutoProcessor.from_pretrained("google/gemma-3n-E4B-it-litert-preview") | |
| # model = AutoModelForImageTextToText.from_pretrained("google/gemma-3n-E4B-it-litert-preview", device_map = "cpu") | |
| # model.eval() | |
| # # Inference function | |
| # def generate_caption(image, prompt): | |
| # inputs = processor(images=image, text=prompt, return_tensors="pt") | |
| # with torch.no_grad(): | |
| # outputs = model.generate( | |
| # **inputs, | |
| # max_new_tokens=256, | |
| # num_beams=4, | |
| # early_stopping=True | |
| # ) | |
| # caption = processor.decode(outputs[0], skip_special_tokens=True) | |
| # return caption.strip() | |
| # # Gradio UI | |
| # demo = gr.Interface( | |
| # fn=generate_caption, | |
| # inputs=[ | |
| # gr.Image(type="pil", label="Upload Medical Image"), | |
| # gr.Textbox(label="Prompt", value="Radiology report:") | |
| # ], | |
| # outputs=gr.Textbox(label="Generated Caption"), | |
| # title="Medical Scan Report Generator", | |
| # description="Upload a medical image and enter a prompt (e.g. 'Radiology report:') to generate a diagnostic caption.", | |
| # ) | |
| # if __name__ == "__main__": | |
| # demo.launch() | |
| import os | |
| import torch | |
| from transformers import pipeline | |
| from PIL import Image | |
| import gradio as gr | |
| from huggingface_hub import login | |
| hf_token = os.getenv("hf_token") | |
| login(token=hf_token) | |
| # model_id = "google/gemma-3n-E4B-it-litert-preview" | |
| model_id = "google/medgemma-4b-it" | |
| # Load the MedGemma pipeline | |
| pipe = pipeline( | |
| "image-text-to-text", | |
| model=model_id, | |
| torch_dtype=torch.bfloat16, | |
| device="cuda" if torch.cuda.is_available() else "cpu", | |
| ) | |
| # Inference function | |
| def analyze_scan(image): | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": [{"type": "text", "text": "You are an expert radiologist."}] | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": "Describe this Medical Scan Image Giving a full detailed report"}, | |
| {"type": "image", "image": image}, | |
| ] | |
| } | |
| ] | |
| output = pipe(text=messages, max_new_tokens=200) | |
| return output[0]["generated_text"][-1]["content"] | |
| # Gradio Interface | |
| demo = gr.Interface( | |
| fn=analyze_scan, | |
| inputs=gr.Image(type="pil", label="Upload Medical Scan"), | |
| outputs=gr.Textbox(label="Scanalyze Medical Scan Report"), | |
| title="Medical Scan Analyzer (MedGemma)", | |
| description="Upload a Medical Scan image to get an AI-generated diagnostic report using Google's MedGemma model.", | |
| allow_flagging="never", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |