chatbotO3 / app.py
irhamni's picture
Update app.py
c828795 verified
# -*- coding: utf-8 -*-
"""app.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1iEIEymiaYLbhMx5NqPutAwwI2rMCyIOQ
"""
import pandas as pd
import gradio as gr
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import os
import sys
# --- 1. Konfigurasi Model dan Data ---
# Model IndoBERT-Large-P1 (Lebih kuat dari MiniLM)
MODEL_NAME = "indobenchmark/indobert-large-p1"
try:
print(f"Memuat Model Kuat: {MODEL_NAME}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME)
print("Model berhasil dimuat.")
except Exception as e:
print(f"FATAL ERROR: Gagal memuat model IndoBERT-Large. Detail: {e}")
sys.exit(1)
# Fungsi Mean Pooling (Wajib untuk model non-SBERT)
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[0]
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
return sum_embeddings / sum_mask
# Fungsi untuk mendapatkan Sentence Embeddings
def get_embeddings(texts):
"""Menghasilkan vektor embedding untuk teks yang diberikan."""
encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=512)
with torch.no_grad():
model_output = model(**encoded_input)
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
return F.normalize(sentence_embeddings, p=2, dim=1).numpy()
# --- Pemuatan Data CSV ---
FILE_PATH = "perpustakaan_faq.csv"
df_faq = None
try:
print(f"Mencoba memuat file: {FILE_PATH}...")
df_faq = pd.read_csv(FILE_PATH)
df_faq = df_faq.rename(columns={'user_query': 'question', 'chatbot_response': 'answer'})
print(f"File '{FILE_PATH}' berhasil dimuat dengan {len(df_faq)} baris data.")
except Exception as e:
print(f"\nFATAL ERROR: Gagal memuat data CSV. Pastikan file ada di path. Detail: {e}")
sys.exit(1)
# --- 2. Pelatihan Data (Pre-computation of FAQ Embeddings) ---
faq_embeddings = None
if df_faq is not None and not df_faq.empty:
print("Menghitung embeddings untuk data FAQ...")
faq_questions = df_faq['question'].tolist()
faq_embeddings = get_embeddings(faq_questions)
print("Penghitungan embeddings selesai.")
else:
print("FATAL ERROR: Data tidak ditemukan atau kosong.")
sys.exit(1)
# --- 3. Logika Chatbot (Retrieval) ---
# FIX SOLUTIF: Threshold diturunkan menjadi 0.60
def library_chatbot(user_query, threshold=0.60):
"""Fungsi utama chatbot untuk merespons pertanyaan pengguna."""
if not user_query:
return "Halo! Silakan ajukan pertanyaan seputar perpustakaan."
user_embedding = get_embeddings([user_query])
similarities = cosine_similarity(user_embedding, faq_embeddings)
best_match_index = np.argmax(similarities)
max_similarity = similarities[0][best_match_index]
best_response = df_faq.loc[best_match_index, 'answer']
if max_similarity >= threshold:
return best_response
else:
return (
"Mohon maaf, saya belum dapat menemukan jawaban yang spesifik untuk pertanyaan Anda "
"dalam data yang saya miliki saat ini (Skor Kemiripan Tertinggi: {:.4f}). "
"Silakan ajukan pertanyaan dengan kata kunci yang berbeda.".format(max_similarity)
)
# --- 4. Tampilkan Hasil (Gradio Interface) ---
title = "Perpustakaan BI Kantor Sumatera Selatan"
description = (
"πŸ€– Chatbot ini menggunakan Model **IndoBERT-Large-P1** yang lebih kuat dengan arsitektur RoBERTa. "
"Ambang batas (Threshold) diturunkan menjadi **0.60**."
)
# Buat antarmuka Gradio
iface = gr.Interface(
fn=library_chatbot,
inputs=gr.Textbox(lines=2, placeholder="Ketik pertanyaan Anda di sini...", label="Pertanyaan Pengguna"),
outputs=gr.Textbox(label="Jawaban Chatbot"),
title=title,
description=description,
theme=gr.themes.Soft(),
allow_flagging='never'
)
# Jalankan aplikasi Gradio
print("\n--- Menjalankan Gradio ---")
print("Tunggu sebentar hingga link publik muncul (Running on public URL: ...)")
iface.launch(share=True)