Spaces:

m7mdal7aj
/

KB-VQA

Sleeping

App Files Files Community

KB-VQA / app.py

m7mdal7aj

Update app.py

a650af8 verified almost 2 years ago

raw

history blame

6.35 kB

	import streamlit as st
	import torch
	import bitsandbytes
	import accelerate
	import scipy
	import copy
	from PIL import Image
	import torch.nn as nn
	from my_model.object_detection import detect_and_draw_objects
	from my_model.captioner.image_captioning import get_caption
	from my_model.utilities import free_gpu_resources
	from my_model.KBVQA import KBVQA, prepare_kbvqa_model
	import my_model.utilities.st_config as st_config




	class ImageHandler:
	@staticmethod
	def analyze_image(image, model, show_processed_image=False):
	img = copy.deepcopy(image)
	caption = model.get_caption(img)
	image_with_boxes, detected_objects_str = model.detect_objects(img)
	if show_processed_image:
	st.image(image_with_boxes)
	return caption, detected_objects_str

	@staticmethod
	def free_gpu_resources():
	# Implementation for freeing GPU resources
	free_gpu_resources()

	class QuestionAnswering:
	@staticmethod
	def answer_question(image, question, caption, detected_objects_str, model):
	answer = model.generate_answer(question, caption, detected_objects_str)
	st.image(image)
	st.write(caption)
	st.write("----------------")
	st.write(detected_objects_str)
	return answer

	class UIComponents:
	@staticmethod
	def display_image_selection(sample_images):
	cols = st.columns(len(sample_images))
	for idx, sample_image_path in enumerate(sample_images):
	with cols[idx]:
	image = Image.open(sample_image_path)
	st.image(image, use_column_width=True)
	if st.button(f'Select Sample Image {idx + 1}', key=f'sample_{idx}'):
	st.session_state['current_image'] = image
	st.session_state['qa_history'] = []
	st.session_state['analysis_done'] = False
	st.session_state['answer_in_progress'] = False

	def load_kbvqa_model(detection_model):
	"""Load KBVQA Model based on the selected detection model."""
	if st.session_state.get('kbvqa') is not None:
	st.write("Model already loaded.")
	else:
	st.session_state['kbvqa'] = prepare_kbvqa_model(detection_model)
	if st.session_state['kbvqa']:
	st.write("Model is ready for inference.")
	return True
	return False

	def set_model_confidence(detection_model):
	"""Set the confidence level for the detection model."""
	default_confidence = 0.2 if detection_model == "yolov5" else 0.4
	confidence_level = st.slider(
	"Select Detection Confidence Level",
	min_value=0.1,
	max_value=0.9,
	value=default_confidence,
	step=0.1
	)
	st.session_state['kbvqa'].detection_confidence = confidence_level

	def image_qa_app(kbvqa_model):
	"""Streamlit app interface for image QA."""
	sample_images = st_config.SAMPLE_IMAGES
	UIComponents.display_image_selection(sample_images)

	uploaded_image = st.file_uploader("Or upload an Image", type=["png", "jpg", "jpeg"])
	if uploaded_image is not None:
	st.session_state['current_image'] = Image.open(uploaded_image)
	st.session_state['qa_history'] = []
	st.session_state['analysis_done'] = False
	st.session_state['answer_in_progress'] = False

	if st.session_state.get('current_image') and not st.session_state.get('analysis_done', False):
	if st.button('Analyze Image'):
	caption, detected_objects_str = ImageHandler.analyze_image(st.session_state['current_image'], kbvqa_model)
	st.session_state['caption'] = caption
	st.session_state['detected_objects_str'] = detected_objects_str
	st.session_state['analysis_done'] = True

	if st.session_state.get('analysis_done', False):
	question = st.text_input("Ask a question about this image:")
	if st.button('Get Answer'):
	answer = QuestionAnswering.answer_question(
	st.session_state['current_image'],
	question,
	st.session_state.get('caption', ''),
	st.session_state.get('detected_objects_str', ''),
	kbvqa_model
	)
	st.session_state['qa_history'].append((question, answer))

	for q, a in st.session_state.get('qa_history', []):
	st.text(f"Q: {q}\nA: {a}\n")

	def run_inference():
	"""Main function to run inference based on the selected method."""
	st.title("Run Inference")

	method = st.selectbox(
	"Choose a method:",
	["Fine-Tuned Model", "In-Context Learning (n-shots)"],
	index=0
	)

	if method == "Fine-Tuned Model":
	detection_model = st.selectbox(
	"Choose a model for object detection:",
	["yolov5", "detic"],
	index=0
	)

	if 'kbvqa' not in st.session_state or st.session_state['detection_model'] != detection_model:
	st.session_state['detection_model'] = detection_model
	if load_kbvqa_model(detection_model):
	set_model_confidence(detection_model)
	image_qa_app(st.session_state['kbvqa'])



	def main():
	st.sidebar.title("Navigation")
	selection = st.sidebar.radio("Go to", ["Home", "Dataset Analysis", "Evaluation Results", "Run Inference", "Dissertation Report"])

	if selection == "Home":
	st.title("MultiModal Learning for Knowledge-Based Visual Question Answering")
	st.write("Home page content goes here...")

	elif selection == "Dissertation Report":
	st.title("Dissertation Report")
	st.write("Click the link below to view the PDF.")
	# Example to display a link to a PDF
	st.download_button(
	label="Download PDF",
	data=open("Files/Dissertation Report.pdf", "rb"),
	file_name="example.pdf",
	mime="application/octet-stream"
	)

	elif selection == "Evaluation Results":
	st.title("Evaluation Results")
	st.write("This is a Place Holder until the contents are uploaded.")

	elif selection == "Dataset Analysis":
	st.title("OK-VQA Dataset Analysis")
	st.write("This is a Place Holder until the contents are uploaded.")

	elif selection == "Run Inference":
	run_inference()







	if __name__ == "__main__":
	main()