Spaces:

admin08077
/

model

Sleeping

App Files Files Community

model / app.py

admin08077

Update app.py

7c5377d verified about 1 year ago

raw

history blame contribute delete

3.6 kB

	import os
	import torch
	import torch.nn as nn
	import torch.optim as optim
	import streamlit as st
	import pandas as pd
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity

	# === Neural Network for Chatbot ===
	class ChatBotNN(nn.Module):
	def __init__(self, input_dim, hidden_dim, output_dim):
	super(ChatBotNN, self).__init__()
	self.fc1 = nn.Linear(input_dim, hidden_dim)
	self.fc2 = nn.Linear(hidden_dim, output_dim)
	self.activation = nn.ReLU()

	def forward(self, x):
	x = self.activation(self.fc1(x))
	x = self.fc2(x)
	return x

	# === Helper Functions ===
	def process_text_file(file_path):
	"""
	Process a plain text file into a list of sentences.
	Each line in the text file is treated as one sentence.
	"""
	with open(file_path, "r") as f:
	lines = f.readlines()
	return [line.strip() for line in lines if line.strip()]

	def process_csv_file(file_path):
	"""
	Process a CSV file into a list of sentences.
	Assumes the CSV has a column named 'text'.
	"""
	data = pd.read_csv(file_path)
	if 'text' in data.columns:
	return data['text'].dropna().tolist()
	else:
	raise ValueError("CSV file must have a 'text' column.")

	# === Training Data ===
	corpus = []
	vectorizer = TfidfVectorizer()

	def train_bot(file_path, file_type):
	"""
	Train the chatbot by adding content from the uploaded file to the corpus.
	"""
	global corpus
	if file_type == "txt":
	corpus += process_text_file(file_path)
	elif file_type == "csv":
	corpus += process_csv_file(file_path)
	else:
	raise ValueError("Unsupported file type. Use .txt or .csv.")

	# Fit the vectorizer to the updated corpus
	vectorizer.fit(corpus)

	def generate_response(user_input):
	"""
	Generate a chatbot response based on the trained corpus using cosine similarity.
	"""
	if not corpus:
	return "I don't know much yet. Please upload some files to teach me!"

	# Vectorize user input and the corpus
	user_vector = vectorizer.transform([user_input])
	corpus_vectors = vectorizer.transform(corpus)

	# Compute cosine similarity
	similarities = cosine_similarity(user_vector, corpus_vectors)
	most_similar_idx = similarities.argmax()
	return corpus[most_similar_idx]

	# === Streamlit App ===
	st.title("Chatbot Trainer with File Uploads")
	st.write("""
	### How it Works:
	1. Upload `.txt` or `.csv` files to teach the chatbot.
	- .txt: Each line represents one training sentence.
	- .csv: Must have a column named `text` for training sentences.
	2. Interact with the chatbot in real-time.
	3. Watch the chatbot improve as you train it with more files!
	""")

	uploaded_file = st.file_uploader("Upload a file (.txt or .csv)", type=["txt", "csv"])

	if uploaded_file is not None:
	# Save the uploaded file locally
	file_path = os.path.join("uploads", uploaded_file.name)
	os.makedirs("uploads", exist_ok=True)
	with open(file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())

	# Train the chatbot on the uploaded file
	st.write("Training the chatbot with the uploaded file...")
	file_extension = uploaded_file.name.split(".")[-1]
	train_bot(file_path, file_extension)
	st.success("Training complete!")

	# Chat Interface
	st.write("### Chat with the Bot!")
	user_input = st.text_input("You:", placeholder="Type something to chat...")

	if user_input:
	response = generate_response(user_input)
	st.write(f"Bot: {response}")