model / app.py
admin08077's picture
Update app.py
7c5377d verified
import os
import torch
import torch.nn as nn
import torch.optim as optim
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# === Neural Network for Chatbot ===
class ChatBotNN(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(ChatBotNN, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, output_dim)
self.activation = nn.ReLU()
def forward(self, x):
x = self.activation(self.fc1(x))
x = self.fc2(x)
return x
# === Helper Functions ===
def process_text_file(file_path):
"""
Process a plain text file into a list of sentences.
Each line in the text file is treated as one sentence.
"""
with open(file_path, "r") as f:
lines = f.readlines()
return [line.strip() for line in lines if line.strip()]
def process_csv_file(file_path):
"""
Process a CSV file into a list of sentences.
Assumes the CSV has a column named 'text'.
"""
data = pd.read_csv(file_path)
if 'text' in data.columns:
return data['text'].dropna().tolist()
else:
raise ValueError("CSV file must have a 'text' column.")
# === Training Data ===
corpus = []
vectorizer = TfidfVectorizer()
def train_bot(file_path, file_type):
"""
Train the chatbot by adding content from the uploaded file to the corpus.
"""
global corpus
if file_type == "txt":
corpus += process_text_file(file_path)
elif file_type == "csv":
corpus += process_csv_file(file_path)
else:
raise ValueError("Unsupported file type. Use .txt or .csv.")
# Fit the vectorizer to the updated corpus
vectorizer.fit(corpus)
def generate_response(user_input):
"""
Generate a chatbot response based on the trained corpus using cosine similarity.
"""
if not corpus:
return "I don't know much yet. Please upload some files to teach me!"
# Vectorize user input and the corpus
user_vector = vectorizer.transform([user_input])
corpus_vectors = vectorizer.transform(corpus)
# Compute cosine similarity
similarities = cosine_similarity(user_vector, corpus_vectors)
most_similar_idx = similarities.argmax()
return corpus[most_similar_idx]
# === Streamlit App ===
st.title("Chatbot Trainer with File Uploads")
st.write("""
### How it Works:
1. Upload `.txt` or `.csv` files to teach the chatbot.
- **.txt**: Each line represents one training sentence.
- **.csv**: Must have a column named `text` for training sentences.
2. Interact with the chatbot in real-time.
3. Watch the chatbot improve as you train it with more files!
""")
uploaded_file = st.file_uploader("Upload a file (.txt or .csv)", type=["txt", "csv"])
if uploaded_file is not None:
# Save the uploaded file locally
file_path = os.path.join("uploads", uploaded_file.name)
os.makedirs("uploads", exist_ok=True)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
# Train the chatbot on the uploaded file
st.write("Training the chatbot with the uploaded file...")
file_extension = uploaded_file.name.split(".")[-1]
train_bot(file_path, file_extension)
st.success("Training complete!")
# Chat Interface
st.write("### Chat with the Bot!")
user_input = st.text_input("You:", placeholder="Type something to chat...")
if user_input:
response = generate_response(user_input)
st.write(f"**Bot:** {response}")