Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # === Neural Network for Chatbot === | |
| class ChatBotNN(nn.Module): | |
| def __init__(self, input_dim, hidden_dim, output_dim): | |
| super(ChatBotNN, self).__init__() | |
| self.fc1 = nn.Linear(input_dim, hidden_dim) | |
| self.fc2 = nn.Linear(hidden_dim, output_dim) | |
| self.activation = nn.ReLU() | |
| def forward(self, x): | |
| x = self.activation(self.fc1(x)) | |
| x = self.fc2(x) | |
| return x | |
| # === Helper Functions === | |
| def process_text_file(file_path): | |
| """ | |
| Process a plain text file into a list of sentences. | |
| Each line in the text file is treated as one sentence. | |
| """ | |
| with open(file_path, "r") as f: | |
| lines = f.readlines() | |
| return [line.strip() for line in lines if line.strip()] | |
| def process_csv_file(file_path): | |
| """ | |
| Process a CSV file into a list of sentences. | |
| Assumes the CSV has a column named 'text'. | |
| """ | |
| data = pd.read_csv(file_path) | |
| if 'text' in data.columns: | |
| return data['text'].dropna().tolist() | |
| else: | |
| raise ValueError("CSV file must have a 'text' column.") | |
| # === Training Data === | |
| corpus = [] | |
| vectorizer = TfidfVectorizer() | |
| def train_bot(file_path, file_type): | |
| """ | |
| Train the chatbot by adding content from the uploaded file to the corpus. | |
| """ | |
| global corpus | |
| if file_type == "txt": | |
| corpus += process_text_file(file_path) | |
| elif file_type == "csv": | |
| corpus += process_csv_file(file_path) | |
| else: | |
| raise ValueError("Unsupported file type. Use .txt or .csv.") | |
| # Fit the vectorizer to the updated corpus | |
| vectorizer.fit(corpus) | |
| def generate_response(user_input): | |
| """ | |
| Generate a chatbot response based on the trained corpus using cosine similarity. | |
| """ | |
| if not corpus: | |
| return "I don't know much yet. Please upload some files to teach me!" | |
| # Vectorize user input and the corpus | |
| user_vector = vectorizer.transform([user_input]) | |
| corpus_vectors = vectorizer.transform(corpus) | |
| # Compute cosine similarity | |
| similarities = cosine_similarity(user_vector, corpus_vectors) | |
| most_similar_idx = similarities.argmax() | |
| return corpus[most_similar_idx] | |
| # === Streamlit App === | |
| st.title("Chatbot Trainer with File Uploads") | |
| st.write(""" | |
| ### How it Works: | |
| 1. Upload `.txt` or `.csv` files to teach the chatbot. | |
| - **.txt**: Each line represents one training sentence. | |
| - **.csv**: Must have a column named `text` for training sentences. | |
| 2. Interact with the chatbot in real-time. | |
| 3. Watch the chatbot improve as you train it with more files! | |
| """) | |
| uploaded_file = st.file_uploader("Upload a file (.txt or .csv)", type=["txt", "csv"]) | |
| if uploaded_file is not None: | |
| # Save the uploaded file locally | |
| file_path = os.path.join("uploads", uploaded_file.name) | |
| os.makedirs("uploads", exist_ok=True) | |
| with open(file_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| # Train the chatbot on the uploaded file | |
| st.write("Training the chatbot with the uploaded file...") | |
| file_extension = uploaded_file.name.split(".")[-1] | |
| train_bot(file_path, file_extension) | |
| st.success("Training complete!") | |
| # Chat Interface | |
| st.write("### Chat with the Bot!") | |
| user_input = st.text_input("You:", placeholder="Type something to chat...") | |
| if user_input: | |
| response = generate_response(user_input) | |
| st.write(f"**Bot:** {response}") | |