Spaces:

KRLabsOrg
/

LettuceDetect-Multilingual

Sleeping

Kovács Ádám

Changed

811e6a2 9 months ago

11.2 kB

	import streamlit as st
	import streamlit.components.v1 as components

	from lettucedetect.models.inference import HallucinationDetector


	def create_interactive_text(text: str, spans: list[dict[str, int \| float]]) -> str:
	"""Create interactive HTML with highlighting and hover effects.

	:param text: The text to create the interactive text for.
	:param spans: The spans to highlight.
	:return: The interactive text.
	"""
	html_text = text

	for span in sorted(spans, key=lambda x: x["start"], reverse=True):
	span_text = text[span["start"] : span["end"]]
	highlighted_span = f'<span class="hallucination" title="Confidence: {span["confidence"]:.3f}">{span_text}</span>'
	html_text = html_text[: span["start"]] + highlighted_span + html_text[span["end"] :]

	return f"""
	<style>
	.container {{
	font-family: Arial, sans-serif;
	font-size: 16px;
	line-height: 1.6;
	padding: 20px;
	}}
	.hallucination {{
	background-color: rgba(255, 99, 71, 0.3);
	padding: 2px;
	border-radius: 3px;
	cursor: help;
	}}
	.hallucination:hover {{
	background-color: rgba(255, 99, 71, 0.5);
	}}
	</style>
	<div class="container">{html_text}</div>
	"""


	# Define examples for each language
	LANGUAGE_EXAMPLES = {
	"English (en)": {
	"model_path": "KRLabsOrg/lettucedect-base-modernbert-en-v1",
	"lang": "en",
	"context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
	"question": "What is the capital of France? What is the population of France?",
	"answer": "The capital of France is Paris. The population of France is 69 million.",
	"output_label": "Predictions"
	},
	"German (de)": {
	"model_path": "KRLabsOrg/lettucedect-210m-eurobert-de-v1",
	"lang": "de",
	"context": "Frankreich ist ein Land in Europa. Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 67 Millionen.",
	"question": "Was ist die Hauptstadt von Frankreich? Wie groß ist die Bevölkerung Frankreichs?",
	"answer": "Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 69 Millionen.",
	"output_label": "Vorhersagen"
	},
	"French (fr)": {
	"model_path": "KRLabsOrg/lettucedect-210m-eurobert-fr-v1",
	"lang": "fr",
	"context": "La France est un pays d'Europe. La capitale de la France est Paris. La population de la France est de 67 millions.",
	"question": "Quelle est la capitale de la France? Quelle est la population de la France?",
	"answer": "La capitale de la France est Paris. La population de la France est de 69 millions.",
	"output_label": "Prédictions"
	},
	"Spanish (es)": {
	"model_path": "KRLabsOrg/lettucedect-210m-eurobert-es-v1",
	"lang": "es",
	"context": "Francia es un país de Europa. La capital de Francia es París. La población de Francia es de 67 millones.",
	"question": "¿Cuál es la capital de Francia? ¿Cuál es la población de Francia?",
	"answer": "La capital de Francia es París. La población de Francia es de 69 millones.",
	"output_label": "Predicciones"
	},
	"Italian (it)": {
	"model_path": "KRLabsOrg/lettucedect-210m-eurobert-it-v1",
	"lang": "it",
	"context": "La Francia è un paese in Europa. La capitale della Francia è Parigi. La popolazione della Francia è di 67 milioni.",
	"question": "Qual è la capitale della Francia? Qual è la popolazione della Francia?",
	"answer": "La capitale della Francia è Parigi. La popolazione della Francia è di 69 milioni.",
	"output_label": "Previsioni"
	},
	"Polish (pl)": {
	"model_path": "KRLabsOrg/lettucedect-210m-eurobert-pl-v1",
	"lang": "pl",
	"context": "Kopernikanizm to teoria astronomiczna opracowana przez Mikołaja Kopernika, zgodnie z którą Słońce znajduje się w centrum Układu Słonecznego, a Ziemia i inne planety krążą wokół niego. Teoria ta została opublikowana w dziele 'O obrotach sfer niebieskich' w 1543 roku.",
	"question": "Na czym polega teoria kopernikańska i kiedy została opublikowana?",
	"answer": "Teoria kopernikańska zakłada, że Ziemia jest jednym z wielu ciał niebieskich krążących wokół Słońca. Kopernik opracował również zaawansowane równania matematyczne opisujące ruch satelitów, które zostały wykorzystane w XX wieku w programie kosmicznym NASA. Teoria została opublikowana w 1543 roku.",
	"output_label": "Przewidywania"
	},
	"Chinese (cn)": {
	"model_path": "KRLabsOrg/lettucedect-210m-eurobert-cn-v1",
	"lang": "cn",
	"context": "长城是中国古代的伟大防御工程，全长超过21,000公里。它的建造始于公元前7世纪，历经多个朝代。",
	"question": "长城有多长？它是什么时候建造的？",
	"answer": "长城全长约50,000公里。它的建造始于公元前3世纪，仅在秦朝时期。",
	"output_label": "预测"
	},
	"LLM-Based": {
	"method": "llm",
	"lang": "en",
	"context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
	"question": "What is the capital of France? What is the population of France?",
	"answer": "The capital of France is Paris. The population of France is 69 million.",
	"output_label": "LLM Predictions"
	}
	}


	def main():
	st.set_page_config(page_title="Lettuce Detective", page_icon="🥬", layout="wide")

	st.image(
	"https://github.com/KRLabsOrg/LettuceDetect/blob/main/assets/lettuce_detective.png?raw=true",
	width=600,
	)

	st.title("LettuceDetect Multilingual Demo 🌍")
	st.markdown("### Detect hallucinations in 7 languages")

	# Create a sidebar for language selection and model options
	with st.sidebar:
	st.header("Settings")
	selected_language = st.selectbox(
	"Select Language",
	list(LANGUAGE_EXAMPLES.keys())
	)

	example = LANGUAGE_EXAMPLES[selected_language]

	# Only show model size option for transformer-based models
	model_method = example.get("method", "transformer")

	if model_method == "transformer":
	model_size = st.radio(
	"Model Size",
	["Base (210M)", "Large (610M)"],
	index=0,
	help="Base models are faster, large models are more accurate."
	)

	# API key not needed for transformer models
	openai_api_key = None
	else:
	# For LLM-based method
	st.info("LLM-based detection requires an OpenAI API key")
	openai_api_key = st.text_input("OpenAI API Key", type="password")

	st.markdown("---")
	st.markdown("### About")
	st.markdown(
	"LettuceDetect identifies hallucinations by comparing answers to provided context. "
	"Highlighted text indicates content not supported by the source material."
	)
	st.markdown("[GitHub](https://github.com/KRLabsOrg/LettuceDetect) \| [HuggingFace](https://huggingface.co/collections/KRLabsOrg/multilingual-hallucination-detection-682a2549c18ecd32689231ce)")

	# Get the example data for the selected language
	example = LANGUAGE_EXAMPLES[selected_language]

	# Adjust model path based on selected size if needed
	if model_method == "transformer":
	model_path = example["model_path"]
	if "base" not in model_path.lower() and "large" not in model_path.lower():
	# Only adjust if it's a numerical size model that can be switched
	if "210m" in model_path.lower() and "Large" in model_size:
	model_path = model_path.replace("210m", "610m")
	elif "610m" in model_path.lower() and "Base" in model_size:
	model_path = model_path.replace("610m", "210m")
	else:
	# For LLM-based method, no model path needed
	model_path = None

	@st.cache_resource
	def load_detector(method, model_path=None, lang=None, api_key=None):
	try:
	import os
	if api_key:
	os.environ["OPENAI_API_KEY"] = api_key

	if method == "transformer":
	return HallucinationDetector(
	method=method,
	model_path=model_path,
	lang=lang,
	trust_remote_code=True
	)
	else:
	# LLM-based method
	return HallucinationDetector(method=method)
	except Exception as e:
	st.error(f"Error loading model: {e}")
	return None

	# Load detector for the selected language
	with st.spinner(f"Loading {selected_language} model..."):
	detector = load_detector(
	method=model_method,
	model_path=model_path,
	lang=example["lang"],
	api_key=openai_api_key
	)

	# Create a two-column layout
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Input")
	context = st.text_area(
	"Context",
	example["context"],
	height=150
	)

	question = st.text_area(
	"Question",
	example["question"],
	height=80
	)

	answer = st.text_area(
	"Answer",
	example["answer"],
	height=100
	)

	with col2:
	st.subheader("Results")
	if detector:
	if st.button("Detect Hallucinations", type="primary"):
	with st.spinner("Analyzing..."):
	predictions = detector.predict(
	context=[context], question=question, answer=answer, output_format="spans"
	)

	if predictions:
	st.success(f"Found {len(predictions)} hallucination(s)")
	st.markdown(f"{example['output_label']}:")
	html_content = create_interactive_text(answer, predictions)
	components.html(html_content, height=200)

	# Display raw predictions in a collapsible section
	with st.expander("Raw prediction data"):
	st.json(predictions)
	else:
	st.info("No hallucinations detected")
	else:
	st.error("Model not loaded. Please check your internet connection or try a different language.")

	# Show information about current model
	st.markdown("---")
	if model_method == "transformer":
	st.markdown(f"Current Model: {model_path}")
	else:
	st.markdown("Method: LLM-based hallucination detection")
	st.markdown(f"Language: {example['lang']}")


	if __name__ == "__main__":
	main()