Spaces:

Fanny1366
/

ISOM5240_Project

Sleeping

App Files Files Community

Fanny1366 commited on Mar 27

Commit

2201d3b

verified ·

1 Parent(s): 2e70dd6

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -208

app.py CHANGED Viewed

@@ -1,108 +1,34 @@
-from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer, pipeline
-from datasets import Dataset
-from evaluate import load
 import numpy as np
 import torch
-# Step 1: Define your dataset
-# Replace with your actual dataset
-train_data = {
-    "Review": [
-        "This product is excellent, I love it!",
-        "Terrible experience, would not recommend.",
-        "It's okay, not great, but not bad either."
-    ],
-    "labels": [2, 0, 1]  # Assuming 0=Very Negative, 1=Negative, 2=Neutral, 3=Positive, 4=Very Positive
-}
-eval_data = {
-    "Review": [
-        "Amazing quality, worth the price!",
-        "Awful, completely disappointed."
-    ],
-    "labels": [4, 0]
-}
-# Convert datasets to Hugging Face Dataset format
-small_train_dataset = Dataset.from_dict(train_data)
-small_eval_dataset = Dataset.from_dict(eval_data)
-# Step 2: Load the model and tokenizer
-model = AutoModelForSequenceClassification.from_pretrained(
-    "tabularisai/multilingual-sentiment-analysis",
-    num_labels=5  # Ensure this matches the number of labels in your dataset
-)
-tokenizer = AutoTokenizer.from_pretrained(
-    "tabularisai/multilingual-sentiment-analysis"
-)
-# Step 3: Tokenize the datasets
-def tokenize_function(examples):
-    return tokenizer(examples["Review"], padding="max_length", truncation=True, max_length=128)
-tokenized_train = small_train_dataset.map(tokenize_function, batched=True)
-tokenized_eval = small_eval_dataset.map(tokenize_function, batched=True)
-# Ensure the datasets have the "labels" column renamed correctly
-tokenized_train = tokenized_train.rename_column("labels", "label")
-tokenized_eval = tokenized_eval.rename_column("labels", "label")
-# Step 4: Define compute metrics function
-accuracy_metric = load("accuracy")
-def compute_metrics(eval_pred):
-    logits, labels = eval_pred
-    predictions = np.argmax(logits, axis=-1)
-    return accuracy_metric.compute(predictions=predictions, references=labels)
-# Step 5: Configure training arguments
-training_args = TrainingArguments(
-    output_dir="test_trainer",
-    num_train_epochs=1,  # Increased epochs for better learning
-    per_device_train_batch_size=4,  # Adjust batch size based on available GPU memory
-    evaluation_strategy="epoch",  # Evaluate after each epoch
-    save_strategy="no",  # Avoid saving checkpoints for simplicity
-    learning_rate=5e-5,  # Fine-tuned learning rate
-    logging_dir="logs",  # Log directory
-    seed=42  # Ensure reproducibility
-)
-# Step 6: Set up the Trainer
-trainer = Trainer(
-    model=model,
-    args=training_args,
-    train_dataset=tokenized_train,
-    eval_dataset=tokenized_eval,
-    compute_metrics=compute_metrics
-)
-# Debug: Ensure reproducibility
-np.random.seed(42)
-torch.manual_seed(42)
-if torch.cuda.is_available():
-    torch.cuda.manual_seed(42)
-# Step 7: Train and Evaluate
 try:
-    print("Training the model...")
-    trainer.train()  # Train the model
-    print("Evaluating the model...")
-    eval_results = trainer.evaluate()  # Evaluate the model
-    print("Evaluation Results:", eval_results)
-except RuntimeError as e:
-    print("RuntimeError occurred:", str(e))
-# Step 8: Use pipeline for quick testing
-print("\nPipeline Testing:")
-sentiment_pipeline = pipeline(
-    "text-classification",
-    model=model,
-    tokenizer=tokenizer
-)
-# Example test case
-text = "No commercials, and no adds no need for wifi it can use the satellite radio station to pick up or at least that's how it looks"
-result = sentiment_pipeline(text)
-# Correct label map based on model outputs
 label_map = {
     "Very Negative": 0,
     "Negative": 1,
@@ -111,115 +37,32 @@ label_map = {
     "Very Positive": 4
 }
-# Map the predicted label to its numeric equivalent
-predicted_label = label_map[result[0]['label']]
-confidence = result[0]['score']
-print(f"Text: {text}")
-print(f"Predicted label: {predicted_label} ({result[0]['label']})")
-print(f"Confidence: {confidence:.4f}")
-# Batch Testing
-examples = [
-    {"text": "The stock market showed a strong recovery today.", "label": 4},
-    {"text": "The company's performance is a disaster!", "label": 0},
-    {"text": "It's a stable investment with consistent returns.", "label": 2}
-]
-print("\nBatch Testing:")
-for example in examples:
-    result = sentiment_pipeline(example["text"])
-    predicted_label = label_map[result[0]['label']]  # Map the model's output
-    print(f"Text: {example['text'][:50]}...")
-    print(f"True: {example['label']} | Predicted: {predicted_label} ({result[0]['label']}) | Confidence: {result[0]['score']:.2f}")
-    print("-" * 60)
-# The second pipeline: Text Extraction
-# Installation
-from transformers import pipeline
-from keybert import KeyBERT
-from collections import defaultdict
-import re
-# Initialize the model (the first run will download automatically)
-kw_model = KeyBERT()  # Keyword extraction
-classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")  # Requirement classification
-# Sample Spotify review data
-reviews = [
-    {"text": "Love the Discover Weekly feature but ads are too frequent.", "rating": 4},
-    {"text": "App crashes every time I save a playlist.", "rating": 1},
-    {"text": "Please add a sleep timer option!", "rating": 5},
-    {"text": "Lyrics are out of sync with the music.", "rating": 2},
-]
-# Predefined requirements category tags
-demand_labels = [
-    "feature request",  # Function Request
-    "bug report",       # Question feedback
-    "content issue",    # Content issues (e.g., lyrics)
-    "subscription",     # Subscription related
-    "general feedback"  # General feedback
-]
-def analyze_reviews(reviews):
-    results = []
-    for review in reviews:
-        text = review["text"]
-        rating = review["rating"]
-       # 1. Keyword Extraction (KeyBERT)
         keywords = kw_model.extract_keywords(
-            text,
-            keyphrase_ngram_range=(1, 2),  # Extract 1-2 word combinations
-            stop_words="english",          # Filter stop words
-            top_n=3                        # Return the first 3 keywords
         )
-        keywords = [kw[0] for kw in keywords]  # Extract keyword text
-        #2. Requirement Classification (Zero-shot)
-        demand_result = classifier(text, demand_labels)
-        primary_demand = demand_result["labels"][0]  # Most likely type of requirement
-        # 3. Adjust the urgency according to the rating
-        urgency = "low"
-        if rating <= 2:
-            urgency = "high"
-        elif rating <= 4:
-            urgency = "medium"
-        # Structured results
-        results.append({
-            "text": text,
-            "rating": rating,
-            "keywords": keywords,
-            "demand_type": primary_demand,
-            "urgency": urgency
-        })
-    return results
-# Execution analysis
-analysis_results = analyze_reviews(reviews)
-# Print
-for i, result in enumerate(analysis_results, 1):
-    print(f"\nReview {i}:")
-    print(f"Text: {result['text']}")
-    print(f"Rating: {result['rating']}/5")
-    print(f"Keywords: {', '.join(result['keywords'])}")
-    print(f"Demand Type: {result['demand_type']}")
-    print(f"Urgency: {result['urgency']}")
-import streamlit as st
-# 创建一个文本输入框
-input_text = st.text_input("请输入文本")
-# 显示输入的文本
-if input_text:
-    st.write(f"你输入的文本是: {input_text}")

+import streamlit as st
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+from keybert import KeyBERT
 import numpy as np
 import torch
+from evaluate import load
+# 加载情感分析模型和分词器
 try:
+    model = AutoModelForSequenceClassification.from_pretrained(
+        "tabularisai/multilingual-sentiment-analysis",
+        num_labels=5
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        "tabularisai/multilingual-sentiment-analysis"
+    )
+    sentiment_pipeline = pipeline(
+        "text-classification",
+        model=model,
+        tokenizer=tokenizer
+    )
+except Exception as e:
+    st.error(f"加载情感分析模型时出错: {e}")
+# 加载关键词提取模型
+try:
+    kw_model = KeyBERT()
+except Exception as e:
+    st.error(f"加载关键词提取模型时出错: {e}")
+# 定义标签映射
 label_map = {
     "Very Negative": 0,
     "Negative": 1,
     "Very Positive": 4
 }
+# Streamlit 应用标题
+st.title("文本情感与关键词分析")
+# 创建文本输入框
+input_text = st.text_area("请输入评价性文本", "")
+if input_text:
+    try:
+        # 情感分析
+        result = sentiment_pipeline(input_text)
+        predicted_label = label_map[result[0]['label']]
+        rating = predicted_label + 1  # 将标签转换为 1 - 5 的评分
+        confidence = result[0]['score']
+        # 关键词提取
         keywords = kw_model.extract_keywords(
+            input_text,
+            keyphrase_ngram_range=(1, 2),
+            stop_words="english",
+            top_n=3
         )
+        keyword_text = [kw[0] for kw in keywords]
+        # 显示结果
+        st.write(f"情感评分: {rating}/5")
+        st.write(f"置信度: {confidence:.2f}")
+        st.write(f"关键词: {', '.join(keyword_text)}")
+    except Exception as e:
+        st.error(f"分析文本时出错: {e}")