first commit
Browse files- README.md +81 -6
- app.py +176 -0
- final_channal_analyzer.py +674 -0
- final_video_analyzer.py +743 -0
- prompt.py +268 -0
- requirements.txt +63 -0
- utils.py +38 -0
- youtube_api_test.py +112 -0
README.md
CHANGED
|
@@ -1,14 +1,89 @@
|
|
| 1 |
---
|
| 2 |
-
title: Youtube Analyzer
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: green
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 5.33.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
-
short_description: YouTube
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Youtube Comment Analyzer
|
| 3 |
+
emoji: ๐
|
| 4 |
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.33.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
+
short_description: Strategic YouTube insights from comment analysis
|
| 12 |
+
tags:
|
| 13 |
+
- agent-demo-track
|
| 14 |
+
- youtube
|
| 15 |
+
- sentiment-analysis
|
| 16 |
+
- ai-agents
|
| 17 |
+
- mcp
|
| 18 |
---
|
| 19 |
|
| 20 |
+
# ๐ YouTube Analyzer Pro
|
| 21 |
+
|
| 22 |
+
> YouTube Analyzer Pro revolutionizes content analysis through **MCP (Model Context Protocol) Server** integration with AI-powered sentiment analysis and real-time comment processing.
|
| 23 |
+
|
| 24 |
+
## ๐ฅ Demo Video
|
| 25 |
+
|
| 26 |
+
[**Watch our MCP Server in action**](https://drive.google.com/file/d/1zWnphL-UtVhQP7FpDbUucF_TtIJ4n91S/view)
|
| 27 |
+
|
| 28 |
+
## ๐ก The Problem
|
| 29 |
+
|
| 30 |
+
**YouTube comments contain massive untapped intelligence:**
|
| 31 |
+
- ๐ค **Sarcasm/Irony**: "Great video... really helpful ๐" โ Actually negative
|
| 32 |
+
- ๐ญ **Hidden Needs**: "Do this for beginners too" โ Content opportunity
|
| 33 |
+
- ๐ฏ **Improvement Requests**: "Audio could be better" โ Technical feedback
|
| 34 |
+
- ๐ **Current tools only count likes** โ Miss the actual insights
|
| 35 |
+
|
| 36 |
+
## ๐ Our LLM Solution
|
| 37 |
+
|
| 38 |
+
### ๐ง Advanced Analysis
|
| 39 |
+
- **Sarcasm Detection**: Identifies irony and sarcasm patterns
|
| 40 |
+
- **Emotion Classification**: Multiple emotion types with confidence levels
|
| 41 |
+
- **Need Extraction**: What viewers actually want/request
|
| 42 |
+
- **Cultural Context**: Multi-language sentiment understanding
|
| 43 |
+
|
| 44 |
+
### ๐ Key Features
|
| 45 |
+
- **Single Video Analysis**: Deep dive into comments with sentiment scoring
|
| 46 |
+
- **Channel Intelligence**: Videos vs Shorts specialized analysis
|
| 47 |
+
- **Visual Dashboards**: Professional charts showing hidden patterns
|
| 48 |
+
- **Strategic Insights**: AI-generated action plans
|
| 49 |
+
|
| 50 |
+
## ๐ ๏ธ Tech Stack
|
| 51 |
+
|
| 52 |
+
```
|
| 53 |
+
Comments โ GPT-4 Analysis โ Sarcasm Detection โ Business Intelligence
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
- **AI**: LLM custom prompts
|
| 57 |
+
- **Visualization**: Matplotlib, Plotly
|
| 58 |
+
- **Interface**: Gradio with MCP Server integration
|
| 59 |
+
- **Performance**: Real-time processing
|
| 60 |
+
|
| 61 |
+
## ๐ Results vs Traditional Tools
|
| 62 |
+
|
| 63 |
+
| Traditional | Our LLM Analysis |
|
| 64 |
+
|------------|------------------|
|
| 65 |
+
| "Positive comments" | "Genuine positive vs sarcastic complaints" |
|
| 66 |
+
| "High engagement" | "Specific audience requests identified" |
|
| 67 |
+
| "Good reception" | "Content format preferences detected" |
|
| 68 |
+
|
| 69 |
+
## ๐ฏ Business Impact
|
| 70 |
+
|
| 71 |
+
- **Advanced Intelligence**: Sarcasm and sentiment detection beyond basic metrics
|
| 72 |
+
- **Actionable Insights**: Per video analysis with specific recommendations
|
| 73 |
+
- **Strategic Value**: Comment-driven content optimization
|
| 74 |
+
- **Real Understanding**: What audiences actually think and want
|
| 75 |
+
|
| 76 |
+
## ๐ฅ Contributors
|
| 77 |
+
|
| 78 |
+
- **Su Il Lee**
|
| 79 |
+
- **HanJun Jung**
|
| 80 |
+
|
| 81 |
+
---
|
| 82 |
+
|
| 83 |
+
<div align="center">
|
| 84 |
+
|
| 85 |
+
### ๐ Agents-MCP-Hackathon
|
| 86 |
+
**YouTube Analyzer Pro**
|
| 87 |
+
|
| 88 |
+
</div>
|
| 89 |
+
|
app.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from utils import *
|
| 3 |
+
from youtube_api_test import *
|
| 4 |
+
from prompt import *
|
| 5 |
+
from final_channal_analyzer import *
|
| 6 |
+
from final_video_analyzer import *
|
| 7 |
+
|
| 8 |
+
css = """
|
| 9 |
+
.gradio-container {
|
| 10 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 11 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 12 |
+
}
|
| 13 |
+
.main {
|
| 14 |
+
background: rgba(255, 255, 255, 0.98);
|
| 15 |
+
border-radius: 25px;
|
| 16 |
+
margin: 20px;
|
| 17 |
+
padding: 40px;
|
| 18 |
+
box-shadow: 0 25px 80px rgba(0,0,0,0.15);
|
| 19 |
+
backdrop-filter: blur(10px);
|
| 20 |
+
}
|
| 21 |
+
.analysis-button {
|
| 22 |
+
background: linear-gradient(45deg, #4facfe, #00f2fe) !important;
|
| 23 |
+
border: none !important;
|
| 24 |
+
color: white !important;
|
| 25 |
+
font-weight: bold !important;
|
| 26 |
+
border-radius: 15px !important;
|
| 27 |
+
padding: 15px 30px !important;
|
| 28 |
+
margin: 10px !important;
|
| 29 |
+
transition: all 0.3s ease !important;
|
| 30 |
+
box-shadow: 0 6px 20px rgba(79, 172, 254, 0.3) !important;
|
| 31 |
+
}
|
| 32 |
+
.analysis-button:hover {
|
| 33 |
+
transform: translateY(-3px) !important;
|
| 34 |
+
box-shadow: 0 10px 30px rgba(79, 172, 254, 0.4) !important;
|
| 35 |
+
}
|
| 36 |
+
.shorts-button {
|
| 37 |
+
background: linear-gradient(45deg, #ff6b6b, #feca57) !important;
|
| 38 |
+
box-shadow: 0 6px 20px rgba(255, 107, 107, 0.3) !important;
|
| 39 |
+
}
|
| 40 |
+
.shorts-button:hover {
|
| 41 |
+
box-shadow: 0 10px 30px rgba(255, 107, 107, 0.4) !important;
|
| 42 |
+
}
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
with gr.Blocks(css=css, title="YouTube Analyzer Pro - Specialized Analysis") as demo:
|
| 46 |
+
gr.HTML("""
|
| 47 |
+
<div style='text-align:center; padding:30px; background:linear-gradient(45deg,#ff6b6b,#feca57,#48cae4); border-radius:20px; margin-bottom:30px;'>
|
| 48 |
+
<h1 style='color:white; font-size:3em; margin:0; text-shadow:3px 3px 6px rgba(0,0,0,0.4); font-weight:800;'>
|
| 49 |
+
๐ YouTube Analyzer Pro
|
| 50 |
+
</h1>
|
| 51 |
+
<p style='color:white; font-size:1.4em; margin:15px 0 0 0; text-shadow:2px 2px 4px rgba(0,0,0,0.3); font-weight:500;'>
|
| 52 |
+
AI-Powered Specialized Content Analysis
|
| 53 |
+
</p>
|
| 54 |
+
<p style='color:rgba(255,255,255,0.9); font-size:1.1em; margin:10px 0 0 0; text-shadow:1px 1px 2px rgba(0,0,0,0.3);'>
|
| 55 |
+
๐น Deep Video Analysis โข ๐ฌ Shorts Intelligence โข ๐ฌ Comment Insights
|
| 56 |
+
</p>
|
| 57 |
+
</div>
|
| 58 |
+
""")
|
| 59 |
+
|
| 60 |
+
with gr.Tabs():
|
| 61 |
+
|
| 62 |
+
with gr.Tab("๐ฏ Youtube Single Video Analysis"):
|
| 63 |
+
with gr.Tabs():
|
| 64 |
+
with gr.TabItem("YouTube Channel: Single Video"):
|
| 65 |
+
with gr.Row():
|
| 66 |
+
with gr.Column(scale=2):
|
| 67 |
+
video_id = gr.Textbox(
|
| 68 |
+
label="YouTube Video ID",
|
| 69 |
+
value="hTSaweR8qMI",
|
| 70 |
+
placeholder="Enter video ID...",
|
| 71 |
+
info="๐ก The video ID is the part after 'v=' in a YouTube URL\n๐บ Example: youtube.com/watch?v=dQw4w9WgXcQ โ Enter: dQw4w9WgXcQ"
|
| 72 |
+
)
|
| 73 |
+
with gr.Column(scale=1):
|
| 74 |
+
comment_limit_slider = gr.Slider(
|
| 75 |
+
minimum=10,
|
| 76 |
+
maximum=50,
|
| 77 |
+
value=25,
|
| 78 |
+
step=5,
|
| 79 |
+
label="๐ Major Comments to Analyze",
|
| 80 |
+
info="๐ฏ Select 10-50 comments for analysis"
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
video_btn = gr.Button("๐ Analyze Video In Depth :) ~40s", variant="primary")
|
| 84 |
+
|
| 85 |
+
with gr.Row():
|
| 86 |
+
with gr.Column(scale=2):
|
| 87 |
+
video_result = gr.Markdown(label="๐ Comprehensive Analysis Report")
|
| 88 |
+
|
| 89 |
+
with gr.Column(scale=1):
|
| 90 |
+
gr.HTML("<h3 style='text-align:center; margin:10px;'>๐ Analytics Dashboard</h3>")
|
| 91 |
+
|
| 92 |
+
video_info_display = gr.Markdown(label="๐น Video Information")
|
| 93 |
+
sentiment_chart = gr.Image(label="๐ฌ Sentiment Analysis Dashboard", type="pil")
|
| 94 |
+
opinion_chart = gr.Image(label="๐ฅ Public Opinion Analysis", type="pil")
|
| 95 |
+
|
| 96 |
+
video_btn.click(
|
| 97 |
+
fn=lambda video_id, comment_limit: comment_analyzer(video_id, comment_limit),
|
| 98 |
+
inputs=[video_id, comment_limit_slider],
|
| 99 |
+
outputs=[video_result, video_info_display, sentiment_chart, opinion_chart],
|
| 100 |
+
show_progress=True
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
with gr.Tab("๐ Youtube Channel Specialized Analysis"):
|
| 104 |
+
gr.HTML("<h2 style='text-align:center; color:#2C3E50; margin:20px 0;'>๐ Choose Your Analysis Type ~60s(</h2>")
|
| 105 |
+
|
| 106 |
+
with gr.Row():
|
| 107 |
+
with gr.Column(scale=2):
|
| 108 |
+
channel_input = gr.Textbox(
|
| 109 |
+
label="๐ฏ YouTube Channel ID",
|
| 110 |
+
value="MrBeast",
|
| 111 |
+
placeholder="Enter YouTube channel ID for specialized analysis...",
|
| 112 |
+
info="๐ก We'll analyze your selected number of videos or shorts with detailed comment insights",
|
| 113 |
+
lines=1
|
| 114 |
+
)
|
| 115 |
+
with gr.Column(scale=1):
|
| 116 |
+
max_videos_slider = gr.Slider(
|
| 117 |
+
minimum=2,
|
| 118 |
+
maximum=10,
|
| 119 |
+
value=5,
|
| 120 |
+
step=1,
|
| 121 |
+
label="๐ Max Videos/Shorts to Analyze",
|
| 122 |
+
info="๐ฏ Select 2-10 content pieces for analysis"
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# Two specialized analysis buttons
|
| 126 |
+
with gr.Row():
|
| 127 |
+
with gr.Column():
|
| 128 |
+
videos_btn = gr.Button(
|
| 129 |
+
"๐น Analyze Videos",
|
| 130 |
+
variant="primary",
|
| 131 |
+
size="large",
|
| 132 |
+
elem_classes=["analysis-button"]
|
| 133 |
+
)
|
| 134 |
+
with gr.Column():
|
| 135 |
+
shorts_btn = gr.Button(
|
| 136 |
+
"๐ฌ Analyze Shorts",
|
| 137 |
+
variant="secondary",
|
| 138 |
+
size="large",
|
| 139 |
+
elem_classes=["analysis-button", "shorts-button"]
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
with gr.Row():
|
| 143 |
+
analysis_result = gr.Markdown(
|
| 144 |
+
label="๐ฏ AI Intelligence Report",
|
| 145 |
+
elem_classes=["analysis-report"]
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
dashboard_plot = gr.Plot(
|
| 149 |
+
label="๐ Interactive Analytics Dashboard",
|
| 150 |
+
elem_classes=["dashboard-plot"]
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
videos_btn.click(
|
| 154 |
+
fn=lambda channel_input, max_videos: analyze_content_batch(channel_input, "videos", max_videos),
|
| 155 |
+
inputs=[channel_input, max_videos_slider],
|
| 156 |
+
outputs=[analysis_result, dashboard_plot],
|
| 157 |
+
show_progress=True
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
shorts_btn.click(
|
| 161 |
+
fn=lambda channel_input, max_videos: analyze_content_batch(channel_input, "shorts", max_videos),
|
| 162 |
+
inputs=[channel_input, max_videos_slider],
|
| 163 |
+
outputs=[analysis_result, dashboard_plot],
|
| 164 |
+
show_progress=True
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
gr.HTML("""
|
| 168 |
+
<div style='text-align:center; margin-top:40px; padding:20px; background:rgba(0,0,0,0.05); border-radius:15px;'>
|
| 169 |
+
<p style='color:#7F8C8D; font-size:0.9em; margin:0;'>
|
| 170 |
+
๐ฏ Specialized Analysis โข ๐ฌ Real Comment Insights โข ๐ Trend Reasoning
|
| 171 |
+
</p>
|
| 172 |
+
</div>
|
| 173 |
+
""")
|
| 174 |
+
|
| 175 |
+
if __name__ == "__main__":
|
| 176 |
+
demo.launch(mcp_server=True)
|
final_channal_analyzer.py
ADDED
|
@@ -0,0 +1,674 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from utils import *
|
| 2 |
+
from youtube_api_test import *
|
| 3 |
+
import traceback
|
| 4 |
+
import datetime
|
| 5 |
+
import json
|
| 6 |
+
import plotly.graph_objects as go
|
| 7 |
+
from plotly.subplots import make_subplots
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def analyze_detailed_comments_sentiment(videos_data, content_type="videos", max_videos=5):
|
| 11 |
+
if not videos_data:
|
| 12 |
+
return {}
|
| 13 |
+
|
| 14 |
+
batch_content = f"Analyze {content_type} comments in detail with reasoning:\n\n"
|
| 15 |
+
|
| 16 |
+
for i, (video_id, title, likes, comments) in enumerate(videos_data[:max_videos]):
|
| 17 |
+
comment_data = []
|
| 18 |
+
for j, (comment, like_count) in enumerate(zip(comments[:30], likes[:30])):
|
| 19 |
+
comment_data.append(f"- \"{comment}\" ({like_count} likes)")
|
| 20 |
+
|
| 21 |
+
comments_text = '\n'.join(comment_data)[:2500]
|
| 22 |
+
|
| 23 |
+
batch_content += f"""
|
| 24 |
+
VIDEO {i}: "{title[:120]}"
|
| 25 |
+
COMMENTS WITH LIKES:
|
| 26 |
+
{comments_text}
|
| 27 |
+
---
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
batch_prompt = f"""
|
| 31 |
+
{batch_content}
|
| 32 |
+
**Note: Advanced sentiment analysis required - consider sarcasm, slang, emojis, and context**
|
| 33 |
+
For each video, analyze the comments and extract multiple top comments by sentiment. Provide detailed analysis in this EXACT JSON format:
|
| 34 |
+
|
| 35 |
+
{{
|
| 36 |
+
"video_0": {{
|
| 37 |
+
"sentiment": "positive",
|
| 38 |
+
"score": 0.7,
|
| 39 |
+
"positive_ratio": 65,
|
| 40 |
+
"negative_ratio": 15,
|
| 41 |
+
"key_themes": ["collaboration", "creativity"],
|
| 42 |
+
"engagement_quality": "high",
|
| 43 |
+
"best_positives": [
|
| 44 |
+
{{"comment": "Amazing collaboration with small creators!", "likes": 150}},
|
| 45 |
+
{{"comment": "Love this authentic content!", "likes": 89}},
|
| 46 |
+
{{"comment": "Best video this year!", "likes": 67}}
|
| 47 |
+
],
|
| 48 |
+
"best_negatives": [
|
| 49 |
+
{{"comment": "Audio quality could be better", "likes": 45}},
|
| 50 |
+
{{"comment": "Too long, should be shorter", "likes": 23}},
|
| 51 |
+
{{"comment": "Boring content lately", "likes": 12}}
|
| 52 |
+
],
|
| 53 |
+
"best_neutrals": [
|
| 54 |
+
{{"comment": "Thanks for the content", "likes": 34}},
|
| 55 |
+
{{"comment": "First!", "likes": 89}},
|
| 56 |
+
{{"comment": "When is the next upload?", "likes": 56}}
|
| 57 |
+
],
|
| 58 |
+
"positive_reasons": [
|
| 59 |
+
"Viewers appreciate authentic collaborations and humble attitude",
|
| 60 |
+
"High production quality and engaging storytelling",
|
| 61 |
+
"Strong community connection and interaction"
|
| 62 |
+
],
|
| 63 |
+
"negative_reasons": [
|
| 64 |
+
"Technical issues mentioned by some viewers",
|
| 65 |
+
"Content length concerns from audience",
|
| 66 |
+
"Some want more variety in topics"
|
| 67 |
+
],
|
| 68 |
+
"trend_analysis": "Strong positive trend due to community focus and authentic content"
|
| 69 |
+
}},
|
| 70 |
+
"video_1": {{
|
| 71 |
+
"sentiment": "neutral",
|
| 72 |
+
"score": 0.5,
|
| 73 |
+
"positive_ratio": 45,
|
| 74 |
+
"negative_ratio": 25,
|
| 75 |
+
"key_themes": ["gaming", "entertainment"],
|
| 76 |
+
"engagement_quality": "medium",
|
| 77 |
+
"best_positives": [
|
| 78 |
+
{{"comment": "Good gameplay as always", "likes": 78}},
|
| 79 |
+
{{"comment": "Nice skills bro", "likes": 45}}
|
| 80 |
+
],
|
| 81 |
+
"best_negatives": [
|
| 82 |
+
{{"comment": "Not your best work", "likes": 34}},
|
| 83 |
+
{{"comment": "Too repetitive", "likes": 23}}
|
| 84 |
+
],
|
| 85 |
+
"best_neutrals": [
|
| 86 |
+
{{"comment": "Part 2 when?", "likes": 67}},
|
| 87 |
+
{{"comment": "Early squad", "likes": 89}}
|
| 88 |
+
],
|
| 89 |
+
"positive_reasons": [
|
| 90 |
+
"Consistent quality appreciated by fans",
|
| 91 |
+
"Good technical skills recognized"
|
| 92 |
+
],
|
| 93 |
+
"negative_reasons": [
|
| 94 |
+
"Some viewers want more innovation",
|
| 95 |
+
"Content feels repetitive to some"
|
| 96 |
+
],
|
| 97 |
+
"trend_analysis": "Steady engagement but needs fresh elements"
|
| 98 |
+
}}
|
| 99 |
+
}}
|
| 100 |
+
|
| 101 |
+
IMPORTANT REQUIREMENTS:
|
| 102 |
+
0. If comments are not in English. Translate it in English.
|
| 103 |
+
1. Extract 2-3 best comments for each sentiment category (positive, negative, neutral)
|
| 104 |
+
2. Include actual comment text and like counts from the data provided.
|
| 105 |
+
3. Ensure like counts match the data given
|
| 106 |
+
4. Provide 2-3 specific reasons for positive and negative sentiment patterns
|
| 107 |
+
5. Make sure positive_ratio + negative_ratio + neutral_ratio roughly equals 100
|
| 108 |
+
6. Return ONLY valid JSON without markdown formatting
|
| 109 |
+
7. Use actual quotes from the comments provided. Do not change the raw comments if it includes likes.
|
| 110 |
+
"""
|
| 111 |
+
|
| 112 |
+
try:
|
| 113 |
+
print(f"๐ง Sending {len(videos_data)} videos to AI for multi-comment sentiment analysis...")
|
| 114 |
+
response = client.chat.completions.create(
|
| 115 |
+
model="gpt-4o-mini",
|
| 116 |
+
messages=[{"role": "user", "content": batch_prompt}],
|
| 117 |
+
max_tokens=3000,
|
| 118 |
+
temperature=0.5
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
response_text = response.choices[0].message.content.strip()
|
| 122 |
+
print(f"๐ฅ Received AI response: {len(response_text)} characters")
|
| 123 |
+
|
| 124 |
+
if "```json" in response_text:
|
| 125 |
+
response_text = response_text.split("```json")[1].split("```")[0].strip()
|
| 126 |
+
elif "```" in response_text:
|
| 127 |
+
response_text = response_text.split("```")[1].split("```")[0].strip()
|
| 128 |
+
|
| 129 |
+
response_text = response_text.strip()
|
| 130 |
+
if not response_text.startswith('{'):
|
| 131 |
+
start_idx = response_text.find('{')
|
| 132 |
+
end_idx = response_text.rfind('}') + 1
|
| 133 |
+
if start_idx != -1 and end_idx != 0:
|
| 134 |
+
response_text = response_text[start_idx:end_idx]
|
| 135 |
+
|
| 136 |
+
print(f"๐ง Cleaned response for JSON parsing...")
|
| 137 |
+
batch_results = json.loads(response_text)
|
| 138 |
+
print(f"โ
Successfully parsed AI analysis for {len(batch_results)} {content_type}")
|
| 139 |
+
return batch_results
|
| 140 |
+
|
| 141 |
+
except json.JSONDecodeError as e:
|
| 142 |
+
print(f"โ JSON parsing error: {e}")
|
| 143 |
+
print(f"โ Raw response: {response_text[:500]}...")
|
| 144 |
+
|
| 145 |
+
fallback_results = {}
|
| 146 |
+
for i in range(min(len(videos_data), max_videos)):
|
| 147 |
+
video_id, title, likes, comments = videos_data[i]
|
| 148 |
+
|
| 149 |
+
sample_positives = []
|
| 150 |
+
sample_negatives = []
|
| 151 |
+
sample_neutrals = []
|
| 152 |
+
|
| 153 |
+
for j, (comment, like_count) in enumerate(zip(comments[:10], likes[:10])):
|
| 154 |
+
if j < 3:
|
| 155 |
+
sample_positives.append({"comment": comment[:100], "likes": like_count})
|
| 156 |
+
elif j < 6:
|
| 157 |
+
sample_negatives.append({"comment": comment[:100], "likes": like_count})
|
| 158 |
+
else:
|
| 159 |
+
sample_neutrals.append({"comment": comment[:100], "likes": like_count})
|
| 160 |
+
|
| 161 |
+
fallback_results[f"video_{i}"] = {
|
| 162 |
+
"sentiment": "neutral",
|
| 163 |
+
"score": 0.5 + (i * 0.1),
|
| 164 |
+
"positive_ratio": 50 + (i * 5),
|
| 165 |
+
"negative_ratio": 20 + (i * 2),
|
| 166 |
+
"key_themes": ["content", "entertainment", "youtube"],
|
| 167 |
+
"engagement_quality": "medium",
|
| 168 |
+
"best_positives": sample_positives or [{"comment": "Great video!", "likes": 50}],
|
| 169 |
+
"best_negatives": sample_negatives or [{"comment": "Could improve", "likes": 20}],
|
| 170 |
+
"best_neutrals": sample_neutrals or [{"comment": "Thanks for content", "likes": 30}],
|
| 171 |
+
"positive_reasons": [
|
| 172 |
+
"General audience appreciation",
|
| 173 |
+
"Consistent content quality"
|
| 174 |
+
],
|
| 175 |
+
"negative_reasons": [
|
| 176 |
+
"Minor technical improvements needed",
|
| 177 |
+
"Some content preferences vary"
|
| 178 |
+
],
|
| 179 |
+
"trend_analysis": "Steady engagement with growth potential"
|
| 180 |
+
}
|
| 181 |
+
print(f"๐ Using enhanced fallback data for {len(fallback_results)} videos")
|
| 182 |
+
return fallback_results
|
| 183 |
+
|
| 184 |
+
except Exception as e:
|
| 185 |
+
print(f"โ Sentiment analysis error: {e}")
|
| 186 |
+
print(f"โ Full error: {traceback.format_exc()}")
|
| 187 |
+
|
| 188 |
+
basic_fallback = {}
|
| 189 |
+
for i in range(min(len(videos_data), max_videos)):
|
| 190 |
+
basic_fallback[f"video_{i}"] = {
|
| 191 |
+
"sentiment": "neutral", "score": 0.4, "positive_ratio": 40,
|
| 192 |
+
"negative_ratio": 30, "key_themes": ["general"], "engagement_quality": "medium",
|
| 193 |
+
"best_positives": [{"comment": "Good content", "likes": 25}],
|
| 194 |
+
"best_negatives": [{"comment": "Could improve", "likes": 15}],
|
| 195 |
+
"best_neutrals": [{"comment": "Thanks", "likes": 20}],
|
| 196 |
+
"positive_reasons": ["Basic appreciation"],
|
| 197 |
+
"negative_reasons": ["General feedback"],
|
| 198 |
+
"trend_analysis": "Stable engagement"
|
| 199 |
+
}
|
| 200 |
+
print(f"๐ Using basic fallback for {len(basic_fallback)} videos")
|
| 201 |
+
return basic_fallback
|
| 202 |
+
|
| 203 |
+
def create_content_dashboard(content_df, content_type="Videos"):
|
| 204 |
+
"""Create specialized dashboard for videos or shorts"""
|
| 205 |
+
if content_df.empty:
|
| 206 |
+
fig = go.Figure()
|
| 207 |
+
fig.add_annotation(text=f"No {content_type.lower()} found for analysis",
|
| 208 |
+
xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
|
| 209 |
+
return fig
|
| 210 |
+
|
| 211 |
+
fig = make_subplots(
|
| 212 |
+
rows=2, cols=2,
|
| 213 |
+
subplot_titles=(
|
| 214 |
+
f'๐ {content_type} Sentiment Trend & Performance',
|
| 215 |
+
f'๐ {content_type} Sentiment Distribution',
|
| 216 |
+
f'๐ก Engagement Quality Breakdown',
|
| 217 |
+
f'๐ฅ Performance vs Sentiment Correlation'
|
| 218 |
+
),
|
| 219 |
+
specs=[
|
| 220 |
+
[{"secondary_y": True}, {"type": "pie"}],
|
| 221 |
+
[{"type": "bar"}, {"type": "scatter"}]
|
| 222 |
+
],
|
| 223 |
+
vertical_spacing=0.15,
|
| 224 |
+
horizontal_spacing=0.12
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
content_labels = [f"{content_type[:-1]} {i+1}" for i in range(len(content_df))]
|
| 228 |
+
colors = ['#2E86DE' if content_type == 'Videos' else '#FF6B35'] * len(content_df)
|
| 229 |
+
|
| 230 |
+
fig.add_trace(
|
| 231 |
+
go.Scatter(
|
| 232 |
+
x=content_labels,
|
| 233 |
+
y=content_df['sentiment_score'],
|
| 234 |
+
mode='lines+markers',
|
| 235 |
+
marker=dict(size=12, color=colors[0], line=dict(width=2, color='white')),
|
| 236 |
+
line=dict(width=4, color=colors[0]),
|
| 237 |
+
name=f'{content_type} Sentiment',
|
| 238 |
+
hovertemplate='<b>%{x}</b><br>Sentiment: %{y:.2f}<extra></extra>'
|
| 239 |
+
),
|
| 240 |
+
row=1, col=1
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
# Add views as bars
|
| 244 |
+
fig.add_trace(
|
| 245 |
+
go.Bar(
|
| 246 |
+
x=content_labels,
|
| 247 |
+
y=content_df['views']/1000,
|
| 248 |
+
name='Views (K)',
|
| 249 |
+
opacity=0.4,
|
| 250 |
+
marker_color=colors[0],
|
| 251 |
+
hovertemplate='<b>%{x}</b><br>Views: %{y:.0f}K<extra></extra>'
|
| 252 |
+
),
|
| 253 |
+
row=1, col=1, secondary_y=True
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
# Sentiment distribution pie
|
| 257 |
+
avg_positive = content_df['positive_ratio'].mean()
|
| 258 |
+
avg_negative = content_df['negative_ratio'].mean()
|
| 259 |
+
avg_neutral = 100 - avg_positive - avg_negative
|
| 260 |
+
|
| 261 |
+
fig.add_trace(
|
| 262 |
+
go.Pie(
|
| 263 |
+
labels=['๐ Positive', '๐ Neutral', '๐ Negative'],
|
| 264 |
+
values=[avg_positive, avg_neutral, avg_negative],
|
| 265 |
+
marker_colors=['#2ECC71', '#95A5A6', '#E74C3C'],
|
| 266 |
+
hole=0.4,
|
| 267 |
+
hovertemplate='<b>%{label}</b><br>%{value:.1f}%<extra></extra>',
|
| 268 |
+
textinfo='label+percent',
|
| 269 |
+
textfont=dict(size=12, color='white')
|
| 270 |
+
),
|
| 271 |
+
row=1, col=2
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
# Engagement quality breakdown
|
| 275 |
+
engagement_counts = content_df['engagement_quality'].value_counts()
|
| 276 |
+
quality_colors = {'high': '#27AE60', 'medium': '#F39C12', 'low': '#E74C3C'}
|
| 277 |
+
|
| 278 |
+
fig.add_trace(
|
| 279 |
+
go.Bar(
|
| 280 |
+
x=engagement_counts.index,
|
| 281 |
+
y=engagement_counts.values,
|
| 282 |
+
marker_color=[quality_colors.get(q, '#95A5A6') for q in engagement_counts.index],
|
| 283 |
+
hovertemplate='<b>%{x} Quality</b><br>Count: %{y}<extra></extra>',
|
| 284 |
+
text=engagement_counts.values,
|
| 285 |
+
textposition='auto',
|
| 286 |
+
textfont=dict(size=14, color='white')
|
| 287 |
+
),
|
| 288 |
+
row=2, col=1
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
# Performance vs Sentiment scatter
|
| 292 |
+
fig.add_trace(
|
| 293 |
+
go.Scatter(
|
| 294 |
+
x=content_df['sentiment_score'],
|
| 295 |
+
y=content_df['views'],
|
| 296 |
+
mode='markers',
|
| 297 |
+
marker=dict(
|
| 298 |
+
size=content_df['positive_ratio']/3,
|
| 299 |
+
color=content_df['sentiment_score'],
|
| 300 |
+
colorscale='RdYlGn',
|
| 301 |
+
showscale=True,
|
| 302 |
+
colorbar=dict(title="Sentiment Score"),
|
| 303 |
+
line=dict(width=2, color='white')
|
| 304 |
+
),
|
| 305 |
+
text=[f"{content_type[:-1]} {i+1}" for i in range(len(content_df))],
|
| 306 |
+
hovertemplate='<b>%{text}</b><br>Sentiment: %{x:.2f}<br>Views: %{y:,}<extra></extra>'
|
| 307 |
+
),
|
| 308 |
+
row=2, col=2
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
fig.update_layout(
|
| 312 |
+
height=800,
|
| 313 |
+
showlegend=False,
|
| 314 |
+
title_text=f"๐ฏ {content_type} Analytics Dashboard - AI-Powered Insights",
|
| 315 |
+
title_font=dict(size=20, color='#2C3E50'),
|
| 316 |
+
title_x=0.5,
|
| 317 |
+
plot_bgcolor='white',
|
| 318 |
+
paper_bgcolor='white'
|
| 319 |
+
)
|
| 320 |
+
|
| 321 |
+
# Update axes
|
| 322 |
+
fig.update_yaxes(title_text="Sentiment Score", row=1, col=1)
|
| 323 |
+
fig.update_yaxes(title_text="Views (K)", row=1, col=1, secondary_y=True)
|
| 324 |
+
fig.update_xaxes(title_text="Content Index", row=1, col=1, tickangle=45)
|
| 325 |
+
fig.update_xaxes(title_text="Sentiment Score", row=2, col=2)
|
| 326 |
+
fig.update_yaxes(title_text="Views", row=2, col=2)
|
| 327 |
+
|
| 328 |
+
return fig
|
| 329 |
+
|
| 330 |
+
def analyze_content_batch(channel_input, content_type="videos", max_videos=5):
|
| 331 |
+
"""Analyze either videos or shorts with detailed insights"""
|
| 332 |
+
try:
|
| 333 |
+
print(f"๐ Starting {content_type} analysis for: {channel_input} (Max: {max_videos})")
|
| 334 |
+
channel_id = get_channel_id_by_name(channel_input)
|
| 335 |
+
if not channel_id:
|
| 336 |
+
print(f"โ Channel '{channel_input}' not found!")
|
| 337 |
+
return None
|
| 338 |
+
|
| 339 |
+
if content_type == "videos":
|
| 340 |
+
content_df = get_channel_videos(channel_id, limit=max_videos)
|
| 341 |
+
emoji = "๐น"
|
| 342 |
+
else:
|
| 343 |
+
content_df = get_channel_shorts(channel_id, limit=max_videos)
|
| 344 |
+
emoji = "๐ฌ"
|
| 345 |
+
|
| 346 |
+
if content_df.empty:
|
| 347 |
+
return f"## {emoji} No {content_type} found\n\nThis channel doesn't have any {content_type} to analyze.", go.Figure()
|
| 348 |
+
|
| 349 |
+
# Initialize columns
|
| 350 |
+
content_df['sentiment_score'] = 0.0
|
| 351 |
+
content_df['positive_ratio'] = 0.0
|
| 352 |
+
content_df['negative_ratio'] = 0.0
|
| 353 |
+
content_df['key_themes'] = None
|
| 354 |
+
content_df['engagement_quality'] = 'medium'
|
| 355 |
+
content_df['best_positive'] = ''
|
| 356 |
+
content_df['best_negative'] = ''
|
| 357 |
+
content_df['best_neutral'] = ''
|
| 358 |
+
content_df['positive_reason'] = ''
|
| 359 |
+
content_df['negative_reason'] = ''
|
| 360 |
+
content_df['trend_analysis'] = ''
|
| 361 |
+
content_df['best_positives'] = None
|
| 362 |
+
content_df['best_negatives'] = None
|
| 363 |
+
content_df['best_neutrals'] = None
|
| 364 |
+
content_df['positive_reasons'] = None
|
| 365 |
+
content_df['negative_reasons'] = None
|
| 366 |
+
|
| 367 |
+
print(f"๐ Collecting {content_type} comments...")
|
| 368 |
+
batch_data = []
|
| 369 |
+
for i, row in content_df.iterrows():
|
| 370 |
+
comments_df = get_youtube_comments(row['video_id'], limit=17, order='relevance')
|
| 371 |
+
if not comments_df.empty:
|
| 372 |
+
batch_data.append((row['video_id'], row['title'], comments_df['likes'].tolist(), comments_df['comment'].tolist()))
|
| 373 |
+
|
| 374 |
+
if batch_data:
|
| 375 |
+
print(f"๐ง AI analyzing {len(batch_data)} {content_type}...")
|
| 376 |
+
results = analyze_detailed_comments_sentiment(batch_data, content_type, max_videos)
|
| 377 |
+
|
| 378 |
+
for i, (video_id, title, likes, comments) in enumerate(batch_data):
|
| 379 |
+
result_key = f"video_{i}"
|
| 380 |
+
if result_key in results:
|
| 381 |
+
result = results[result_key]
|
| 382 |
+
try:
|
| 383 |
+
idx = content_df[content_df['video_id'] == video_id].index[0]
|
| 384 |
+
|
| 385 |
+
# Apply basic metrics
|
| 386 |
+
content_df.at[idx, 'sentiment_score'] = result.get('score', 0)
|
| 387 |
+
content_df.at[idx, 'positive_ratio'] = result.get('positive_ratio', 0)
|
| 388 |
+
content_df.at[idx, 'negative_ratio'] = result.get('negative_ratio', 0)
|
| 389 |
+
content_df.at[idx, 'key_themes'] = result.get('key_themes', [])
|
| 390 |
+
content_df.at[idx, 'engagement_quality'] = result.get('engagement_quality', 'medium')
|
| 391 |
+
content_df.at[idx, 'trend_analysis'] = result.get('trend_analysis', '')
|
| 392 |
+
|
| 393 |
+
# Apply multiple comments and reasons
|
| 394 |
+
content_df.at[idx, 'best_positives'] = result.get('best_positives', [])
|
| 395 |
+
content_df.at[idx, 'best_negatives'] = result.get('best_negatives', [])
|
| 396 |
+
content_df.at[idx, 'best_neutrals'] = result.get('best_neutrals', [])
|
| 397 |
+
content_df.at[idx, 'positive_reasons'] = result.get('positive_reasons', [])
|
| 398 |
+
content_df.at[idx, 'negative_reasons'] = result.get('negative_reasons', [])
|
| 399 |
+
|
| 400 |
+
# Keep single comment fields for backward compatibility
|
| 401 |
+
best_pos = result.get('best_positives', [])
|
| 402 |
+
best_neg = result.get('best_negatives', [])
|
| 403 |
+
best_neu = result.get('best_neutrals', [])
|
| 404 |
+
|
| 405 |
+
content_df.at[idx, 'best_positive'] = best_pos[0]['comment'] if best_pos else ''
|
| 406 |
+
content_df.at[idx, 'best_negative'] = best_neg[0]['comment'] if best_neg else ''
|
| 407 |
+
content_df.at[idx, 'best_neutral'] = best_neu[0]['comment'] if best_neu else ''
|
| 408 |
+
|
| 409 |
+
pos_reasons = result.get('positive_reasons', [])
|
| 410 |
+
neg_reasons = result.get('negative_reasons', [])
|
| 411 |
+
|
| 412 |
+
content_df.at[idx, 'positive_reason'] = pos_reasons[0] if pos_reasons else ''
|
| 413 |
+
content_df.at[idx, 'negative_reason'] = neg_reasons[0] if neg_reasons else ''
|
| 414 |
+
|
| 415 |
+
print(f"โ
Applied multi-comment analysis for: {title[:50]}...")
|
| 416 |
+
|
| 417 |
+
except Exception as e:
|
| 418 |
+
print(f"โ Error applying results for {title[:50]}: {str(e)}")
|
| 419 |
+
|
| 420 |
+
# Generate insights
|
| 421 |
+
insights = generate_detailed_insights(content_df, content_type.capitalize())
|
| 422 |
+
|
| 423 |
+
# Create dashboard
|
| 424 |
+
dashboard = create_content_dashboard(content_df, content_type.capitalize())
|
| 425 |
+
|
| 426 |
+
print(f"โ
{content_type.capitalize()} analysis completed!")
|
| 427 |
+
return insights, dashboard
|
| 428 |
+
|
| 429 |
+
except Exception as e:
|
| 430 |
+
print(f"โ Error analyzing {content_type}: {str(e)}")
|
| 431 |
+
error_msg = f"## โ {content_type.capitalize()} Analysis Error\n\n**Error:** {str(e)}"
|
| 432 |
+
empty_fig = go.Figure()
|
| 433 |
+
return error_msg, empty_fig
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
def generate_detailed_insights(content_df, content_type):
|
| 437 |
+
"""Generate AI-powered detailed insights with LLM analysis"""
|
| 438 |
+
if content_df.empty:
|
| 439 |
+
return f"## No {content_type.lower()} found for analysis"
|
| 440 |
+
|
| 441 |
+
analysis_data = {
|
| 442 |
+
"content_type": content_type,
|
| 443 |
+
"total_content": len(content_df),
|
| 444 |
+
"performance_metrics": {
|
| 445 |
+
"avg_views": content_df['views'].mean(),
|
| 446 |
+
"avg_sentiment": content_df['sentiment_score'].mean(),
|
| 447 |
+
"avg_positive": content_df['positive_ratio'].mean(),
|
| 448 |
+
"avg_negative": content_df['negative_ratio'].mean(),
|
| 449 |
+
"total_views": content_df['views'].sum()
|
| 450 |
+
},
|
| 451 |
+
"content_breakdown": []
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
for i, row in content_df.iterrows():
|
| 455 |
+
content_analysis = {
|
| 456 |
+
"index": i + 1,
|
| 457 |
+
"title": row['title'][:80],
|
| 458 |
+
"views": row['views'],
|
| 459 |
+
"sentiment_score": row['sentiment_score'],
|
| 460 |
+
"positive_ratio": row.get('positive_ratio', 0),
|
| 461 |
+
"negative_ratio": row.get('negative_ratio', 0),
|
| 462 |
+
"engagement_quality": row.get('engagement_quality', 'medium'),
|
| 463 |
+
"key_themes": row.get('key_themes', []),
|
| 464 |
+
"best_positives": row.get('best_positives', []),
|
| 465 |
+
"best_negatives": row.get('best_negatives', []),
|
| 466 |
+
"positive_reasons": row.get('positive_reasons', []),
|
| 467 |
+
"negative_reasons": row.get('negative_reasons', []),
|
| 468 |
+
"trend_analysis": row.get('trend_analysis', '')
|
| 469 |
+
}
|
| 470 |
+
analysis_data["content_breakdown"].append(content_analysis)
|
| 471 |
+
|
| 472 |
+
# Create LLM analysis prompt
|
| 473 |
+
llm_prompt = f"""
|
| 474 |
+
Analyze this YouTube {content_type.lower()} performance data and generate a comprehensive intelligence report.
|
| 475 |
+
|
| 476 |
+
PERFORMANCE DATA:
|
| 477 |
+
- Total {content_type}: {analysis_data['total_content']}
|
| 478 |
+
- Average Views: {analysis_data['performance_metrics']['avg_views']:,.0f}
|
| 479 |
+
- Average Sentiment: {analysis_data['performance_metrics']['avg_sentiment']:.2f}/1.0
|
| 480 |
+
- Positive Ratio: {analysis_data['performance_metrics']['avg_positive']:.1f}%
|
| 481 |
+
- Negative Ratio: {analysis_data['performance_metrics']['avg_negative']:.1f}%
|
| 482 |
+
|
| 483 |
+
INDIVIDUAL CONTENT ANALYSIS:
|
| 484 |
+
{chr(10).join([f"{item['index']}. '{item['title']}' - {item['views']:,} views, {item['sentiment_score']:.2f} sentiment, {item['positive_ratio']:.0f}% positive, Quality: {item['engagement_quality']}, Themes: {item['key_themes'][:3]}" for item in analysis_data['content_breakdown']])}
|
| 485 |
+
|
| 486 |
+
Generate a professional analysis report in the following structure:
|
| 487 |
+
|
| 488 |
+
# ๐ {content_type} Performance Intelligence Report
|
| 489 |
+
|
| 490 |
+
## ๐ Executive Summary
|
| 491 |
+
[2-3 sentences about overall performance and key findings]
|
| 492 |
+
|
| 493 |
+
## ๐ฏ Performance Breakdown
|
| 494 |
+
|
| 495 |
+
### ๐ Champion Content Analysis
|
| 496 |
+
[Identify top 2-3 performing videos with specific reasons for success]
|
| 497 |
+
|
| 498 |
+
### โ ๏ธ Optimization Opportunities
|
| 499 |
+
[Identify bottom 2-3 performing videos with specific improvement recommendations]
|
| 500 |
+
|
| 501 |
+
## ๐ก Strategic Insights
|
| 502 |
+
|
| 503 |
+
### ๐ฅ Winning Formula
|
| 504 |
+
[3-4 key success patterns identified from top performers]
|
| 505 |
+
|
| 506 |
+
### ๐ฌ Content DNA Analysis
|
| 507 |
+
[Analysis of themes, engagement patterns, and audience preferences]
|
| 508 |
+
|
| 509 |
+
### ๐ Audience Sentiment Intelligence
|
| 510 |
+
[Deep dive into comment sentiment patterns and audience behavior]
|
| 511 |
+
|
| 512 |
+
## ๐ Action Plan Recommendations
|
| 513 |
+
|
| 514 |
+
### Immediate Actions
|
| 515 |
+
[1-2 specific, actionable recommendations]
|
| 516 |
+
|
| 517 |
+
## ๐ Competitive Advantage
|
| 518 |
+
[How this channel can differentiate and excel in their niche]
|
| 519 |
+
|
| 520 |
+
---
|
| 521 |
+
|
| 522 |
+
Requirements:
|
| 523 |
+
- Use emojis strategically for visual impact
|
| 524 |
+
- Include specific data points and percentages
|
| 525 |
+
- Make recommendations actionable and specific
|
| 526 |
+
- Write in professional but engaging tone
|
| 527 |
+
- Focus on growth and optimization strategies
|
| 528 |
+
- Keep analysis data-driven and insightful
|
| 529 |
+
"""
|
| 530 |
+
|
| 531 |
+
try:
|
| 532 |
+
# Generate LLM insights
|
| 533 |
+
print("๐ง Generating AI-powered strategic insights...")
|
| 534 |
+
response = client.chat.completions.create(
|
| 535 |
+
model="gpt-4o-mini",
|
| 536 |
+
messages=[{"role": "user", "content": llm_prompt}],
|
| 537 |
+
max_tokens=3000,
|
| 538 |
+
temperature=0.3
|
| 539 |
+
)
|
| 540 |
+
|
| 541 |
+
llm_insights = response.choices[0].message.content.strip()
|
| 542 |
+
|
| 543 |
+
# Add individual content performance cards
|
| 544 |
+
detailed_breakdown = """
|
| 545 |
+
|
| 546 |
+
<details>
|
| 547 |
+
<summary style="font-size: 1.5em; font-weight: bold; cursor: pointer; margin: 16px 0 8px 0; color: inherit;">
|
| 548 |
+
Individual Content Performance Matrix<br> (Click to Expand!)
|
| 549 |
+
</summary>
|
| 550 |
+
|
| 551 |
+
## ๐ Individual Content Performance Matrix
|
| 552 |
+
|
| 553 |
+
"""
|
| 554 |
+
|
| 555 |
+
for item in analysis_data["content_breakdown"]:
|
| 556 |
+
# Performance rating logic
|
| 557 |
+
performance_score = (
|
| 558 |
+
(item['sentiment_score'] * 40) +
|
| 559 |
+
(min(item['views'] / analysis_data['performance_metrics']['avg_views'], 2) * 30) +
|
| 560 |
+
(item['positive_ratio'] * 0.3)
|
| 561 |
+
)
|
| 562 |
+
|
| 563 |
+
if performance_score >= 80:
|
| 564 |
+
rating = "๐ CHAMPION"
|
| 565 |
+
status_color = "๐"
|
| 566 |
+
elif performance_score >= 60:
|
| 567 |
+
rating = "๐ STRONG"
|
| 568 |
+
status_color = "๐"
|
| 569 |
+
elif performance_score >= 40:
|
| 570 |
+
rating = "๐ STEADY"
|
| 571 |
+
status_color = "๐ "
|
| 572 |
+
else:
|
| 573 |
+
rating = "โ ๏ธ NEEDS WORK"
|
| 574 |
+
status_color = "๐"
|
| 575 |
+
|
| 576 |
+
detailed_breakdown += f"""
|
| 577 |
+
### {rating}: "{item['title']}"
|
| 578 |
+
|
| 579 |
+
| Metric | Value | Performance |
|
| 580 |
+
|--------|--------|-------------|
|
| 581 |
+
| ๐ **Views** | {item['views']:,} | {status_color} {'Above Average' if item['views'] > analysis_data['performance_metrics']['avg_views'] else 'Below Average'} |
|
| 582 |
+
| ๐ฏ **Sentiment Score** | {item['sentiment_score']:.2f}/1.0 | {'๐ฅ Excellent' if item['sentiment_score'] > 0.8 else '๐ Good' if item['sentiment_score'] > 0.6 else 'โ ๏ธ Needs Work'} |
|
| 583 |
+
| ๐ **Positive Feedback** | {item['positive_ratio']:.0f}% | {'๐ Outstanding' if item['positive_ratio'] > 80 else '๐ Strong' if item['positive_ratio'] > 60 else '๐ง Improve'} |
|
| 584 |
+
| ๐ช **Engagement Quality** | {item['engagement_quality'].title()} | {'๐ฅ High Impact' if item['engagement_quality'] == 'high' else '๐ Steady Growth' if item['engagement_quality'] == 'medium' else '๐ก Potential'} |
|
| 585 |
+
|
| 586 |
+
**๐จ Content Themes**: {', '.join(item['key_themes'][:3]) if item['key_themes'] else 'General Content'}
|
| 587 |
+
|
| 588 |
+
"""
|
| 589 |
+
# Positive feedback section
|
| 590 |
+
if item.get('best_positives') or item.get('positive_reasons'):
|
| 591 |
+
detailed_breakdown += "| **๐ Top Comments** | **๐ Positive Reasons** |\n"
|
| 592 |
+
detailed_breakdown += "|---------------------|------------------------|\n"
|
| 593 |
+
|
| 594 |
+
max_len = max(len(item.get('best_positives', [])), len(item.get('positive_reasons', [])))
|
| 595 |
+
for i in range(max_len):
|
| 596 |
+
comment = item.get('best_positives', [])[i]['comment'][:100] + "..." if i < len(item.get('best_positives', [])) else ""
|
| 597 |
+
reason = item.get('positive_reasons', [])[i][:100] + "..." if i < len(item.get('positive_reasons', [])) else ""
|
| 598 |
+
detailed_breakdown += f"| {comment} | {reason} |\n"
|
| 599 |
+
detailed_breakdown += "\n"
|
| 600 |
+
|
| 601 |
+
# Negative feedback section
|
| 602 |
+
if item.get('best_negatives') or item.get('negative_reasons'):
|
| 603 |
+
detailed_breakdown += "| **๐ Critical Feedback** | **๐ Negative Reasons** |\n"
|
| 604 |
+
detailed_breakdown += "|--------------------------|------------------------|\n"
|
| 605 |
+
|
| 606 |
+
max_len = max(len(item.get('best_negatives', [])), len(item.get('negative_reasons', [])))
|
| 607 |
+
for i in range(max_len):
|
| 608 |
+
comment = item.get('best_negatives', [])[i]['comment'][:100] + "..." if i < len(item.get('best_negatives', [])) else ""
|
| 609 |
+
reason = item.get('negative_reasons', [])[i][:100] + "..." if i < len(item.get('negative_reasons', [])) else ""
|
| 610 |
+
detailed_breakdown += f"| {comment} | {reason} |\n"
|
| 611 |
+
detailed_breakdown += "\n"
|
| 612 |
+
|
| 613 |
+
detailed_breakdown += "---\n"
|
| 614 |
+
|
| 615 |
+
detailed_breakdown += "\n</details>\n"
|
| 616 |
+
|
| 617 |
+
# Combine LLM insights with detailed breakdown
|
| 618 |
+
final_report = llm_insights + detailed_breakdown
|
| 619 |
+
|
| 620 |
+
# Add footer with timestamp and hackathon branding
|
| 621 |
+
final_report += f"""
|
| 622 |
+
|
| 623 |
+
---
|
| 624 |
+
*๐ค AI-Powered Strategic Intelligence | โฐ {datetime.datetime.now().strftime('%Y-%m-%d %H:%M UTC')} | ๐ Next-Gen Analytics*
|
| 625 |
+
"""
|
| 626 |
+
|
| 627 |
+
print("โ
Strategic intelligence report generated successfully!")
|
| 628 |
+
return final_report
|
| 629 |
+
|
| 630 |
+
except Exception as e:
|
| 631 |
+
print(f"โ LLM Analysis Error: {str(e)}")
|
| 632 |
+
|
| 633 |
+
# Fallback to enhanced static analysis
|
| 634 |
+
best_video = content_df.loc[content_df['sentiment_score'].idxmax()]
|
| 635 |
+
worst_video = content_df.loc[content_df['sentiment_score'].idxmin()]
|
| 636 |
+
|
| 637 |
+
fallback_report = f"""
|
| 638 |
+
# ๐ {content_type} Performance Intelligence Report
|
| 639 |
+
|
| 640 |
+
## ๐ Executive Dashboard
|
| 641 |
+
|
| 642 |
+
| ๐ฏ Key Metric | ๐ Performance | ๐ญ Status |
|
| 643 |
+
|---------------|----------------|-----------|
|
| 644 |
+
| **Portfolio Size** | {len(content_df)} {content_type.lower()} | {'๐ฅ Focused Strategy' if len(content_df) <= 10 else '๐ Active Portfolio'} |
|
| 645 |
+
| **Average Performance** | {content_df['views'].mean():,.0f} views | {'๐ Viral Territory' if content_df['views'].mean() > 1000000 else '๐ Strong Growth' if content_df['views'].mean() > 100000 else '๐ Building Momentum'} |
|
| 646 |
+
| **Audience Sentiment** | {content_df['sentiment_score'].mean():.2f}/1.0 | {'๐ Exceptional' if content_df['sentiment_score'].mean() > 0.8 else '๐ Positive' if content_df['sentiment_score'].mean() > 0.6 else 'โ ๏ธ Optimization Needed'} |
|
| 647 |
+
| **Success Rate** | {content_df['positive_ratio'].mean():.0f}% positive | {'๐ Championship Level' if content_df['positive_ratio'].mean() > 80 else '๐ Competitive' if content_df['positive_ratio'].mean() > 60 else '๐ง Growth Opportunity'} |
|
| 648 |
+
|
| 649 |
+
## ๐ฏ Performance Analysis
|
| 650 |
+
|
| 651 |
+
### ๐ TOP PERFORMER: "{best_video['title'][:60]}..."
|
| 652 |
+
- **๐ Metrics**: {best_video['views']:,} views | {best_video['sentiment_score']:.2f} sentiment | {best_video.get('positive_ratio', 0):.0f}% positive
|
| 653 |
+
- **โ
Success DNA**: {best_video.get('positive_reason', 'Strong audience resonance and engaging content delivery')}
|
| 654 |
+
|
| 655 |
+
### โ ๏ธ OPTIMIZATION TARGET: "{worst_video['title'][:60]}..."
|
| 656 |
+
- **๐ Metrics**: {worst_video['views']:,} views | {worst_video['sentiment_score']:.2f} sentiment | {worst_video.get('positive_ratio', 0):.0f}% positive
|
| 657 |
+
- **๐ง Growth Areas**: {worst_video.get('negative_reason', 'Content optimization and audience alignment needed')}
|
| 658 |
+
|
| 659 |
+
## ๐ Strategic Action Plan
|
| 660 |
+
|
| 661 |
+
### Immediate Wins (Next 30 Days)
|
| 662 |
+
1. **๐ฌ Replicate Success Formula**: Scale elements from "{best_video['title'][:30]}..." format
|
| 663 |
+
2. **๐ง Optimize Underperformers**: Address feedback patterns from bottom performers
|
| 664 |
+
3. **๐ Engagement Boost**: Focus on {content_df['engagement_quality'].value_counts().index[0]} quality content
|
| 665 |
+
|
| 666 |
+
### Strategic Growth (Next 90 Days)
|
| 667 |
+
1. **๐ฏ Content Optimization**: Leverage top-performing themes and formats
|
| 668 |
+
2. **๐ฅ Audience Development**: Build on positive sentiment patterns
|
| 669 |
+
3. **๐ Performance Scaling**: Systematic improvement of bottom 20% content
|
| 670 |
+
|
| 671 |
+
---
|
| 672 |
+
*๐ค Enhanced Analytics Engine | ๐ MCP Server Hackathon | โฐ {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')} | ๐ Next-Gen Intelligence*
|
| 673 |
+
"""
|
| 674 |
+
return fallback_report
|
final_video_analyzer.py
ADDED
|
@@ -0,0 +1,743 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
from utils import *
|
| 3 |
+
from youtube_api_test import *
|
| 4 |
+
import traceback
|
| 5 |
+
import datetime
|
| 6 |
+
from prompt import *
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
from io import BytesIO
|
| 9 |
+
from PIL import Image
|
| 10 |
+
import concurrent.futures
|
| 11 |
+
|
| 12 |
+
plt.rcParams['font.family'] = ['DejaVu Sans', 'Arial Unicode MS', 'SimHei', 'Malgun Gothic']
|
| 13 |
+
plt.rcParams['axes.unicode_minus'] = False
|
| 14 |
+
|
| 15 |
+
client = openai.OpenAI(api_key=api_key)
|
| 16 |
+
|
| 17 |
+
def create_sentiment_pie_chart(classified_comments):
|
| 18 |
+
try:
|
| 19 |
+
print("๐ Creating PREMIUM sentiment analysis dashboard...")
|
| 20 |
+
|
| 21 |
+
plt.rcParams['font.size'] = 10
|
| 22 |
+
|
| 23 |
+
sentiment_data = {'Positive': [], 'Negative': [], 'Neutral': []}
|
| 24 |
+
confidence_breakdown = {'High': 0, 'Medium': 0, 'Low': 0}
|
| 25 |
+
top_liked_by_sentiment = {'Positive': [], 'Negative': [], 'Neutral': []}
|
| 26 |
+
|
| 27 |
+
for comment in classified_comments:
|
| 28 |
+
analysis = comment['sentiment_analysis']
|
| 29 |
+
likes = comment['likes']
|
| 30 |
+
comment_text = comment['comment']
|
| 31 |
+
|
| 32 |
+
sentiment = 'Neutral'
|
| 33 |
+
if 'Positive' in analysis:
|
| 34 |
+
sentiment = 'Positive'
|
| 35 |
+
elif 'Negative' in analysis:
|
| 36 |
+
sentiment = 'Negative'
|
| 37 |
+
|
| 38 |
+
sentiment_data[sentiment].append({
|
| 39 |
+
'comment': comment_text,
|
| 40 |
+
'likes': likes,
|
| 41 |
+
'analysis': analysis
|
| 42 |
+
})
|
| 43 |
+
|
| 44 |
+
# Extract confidence level
|
| 45 |
+
if 'High' in analysis:
|
| 46 |
+
confidence_breakdown['High'] += 1
|
| 47 |
+
elif 'Medium' in analysis:
|
| 48 |
+
confidence_breakdown['Medium'] += 1
|
| 49 |
+
else:
|
| 50 |
+
confidence_breakdown['Low'] += 1
|
| 51 |
+
|
| 52 |
+
top_liked_by_sentiment = sentiment_data
|
| 53 |
+
|
| 54 |
+
# Sort top liked comments
|
| 55 |
+
for sentiment in top_liked_by_sentiment:
|
| 56 |
+
top_liked_by_sentiment[sentiment] = sorted(
|
| 57 |
+
top_liked_by_sentiment[sentiment],
|
| 58 |
+
key=lambda x: x['likes'],
|
| 59 |
+
reverse=True
|
| 60 |
+
)[:3] # Top 3 per sentiment
|
| 61 |
+
|
| 62 |
+
# Calculate percentages and metrics
|
| 63 |
+
total_comments = len(classified_comments)
|
| 64 |
+
sentiment_counts = {k: len(v) for k, v in sentiment_data.items()}
|
| 65 |
+
sentiment_percentages = {k: (v/total_comments*100) if total_comments > 0 else 0
|
| 66 |
+
for k, v in sentiment_counts.items()}
|
| 67 |
+
|
| 68 |
+
# Calculate engagement metrics
|
| 69 |
+
avg_likes_by_sentiment = {}
|
| 70 |
+
for sentiment, comments in sentiment_data.items():
|
| 71 |
+
if comments:
|
| 72 |
+
avg_likes_by_sentiment[sentiment] = sum([c['likes'] for c in comments]) / len(comments)
|
| 73 |
+
else:
|
| 74 |
+
avg_likes_by_sentiment[sentiment] = 0
|
| 75 |
+
|
| 76 |
+
print(f"๐ Sentiment breakdown: {sentiment_counts}")
|
| 77 |
+
print(f"๐ Confidence breakdown: {confidence_breakdown}")
|
| 78 |
+
|
| 79 |
+
fig = plt.figure(figsize=(16, 10))
|
| 80 |
+
gs = fig.add_gridspec(2, 2, hspace=0.3, wspace=0.3)
|
| 81 |
+
|
| 82 |
+
ax1 = fig.add_subplot(gs[0, 0])
|
| 83 |
+
|
| 84 |
+
if total_comments > 0:
|
| 85 |
+
labels = list(sentiment_counts.keys())
|
| 86 |
+
sizes = list(sentiment_counts.values())
|
| 87 |
+
colors = ['#2ecc71', '#e74c3c', '#95a5a6']
|
| 88 |
+
explode = (0.05, 0.05, 0.05)
|
| 89 |
+
|
| 90 |
+
non_zero_data = [(label, size, color, exp) for label, size, color, exp in zip(labels, sizes, colors, explode) if size > 0]
|
| 91 |
+
if non_zero_data:
|
| 92 |
+
labels, sizes, colors, explode = zip(*non_zero_data)
|
| 93 |
+
|
| 94 |
+
wedges, texts, autotexts = ax1.pie(sizes, labels=labels, colors=colors, explode=explode,
|
| 95 |
+
autopct=lambda pct: f'{pct:.1f}%\n({int(pct/100*total_comments)})',
|
| 96 |
+
startangle=90, textprops={'fontsize': 10, 'weight': 'bold'})
|
| 97 |
+
|
| 98 |
+
for autotext in autotexts:
|
| 99 |
+
autotext.set_color('white')
|
| 100 |
+
autotext.set_fontsize(9)
|
| 101 |
+
autotext.set_weight('bold')
|
| 102 |
+
|
| 103 |
+
ax1.set_title('๐ฌ Sentiment Distribution', fontsize=14, weight='bold', pad=15)
|
| 104 |
+
|
| 105 |
+
ax2 = fig.add_subplot(gs[0, 1])
|
| 106 |
+
|
| 107 |
+
conf_labels = list(confidence_breakdown.keys())
|
| 108 |
+
conf_values = list(confidence_breakdown.values())
|
| 109 |
+
conf_colors = ['#e74c3c', '#f39c12', '#2ecc71']
|
| 110 |
+
|
| 111 |
+
bars = ax2.bar(conf_labels, conf_values, color=conf_colors, alpha=0.8)
|
| 112 |
+
ax2.set_title('๐ฏ Analysis Confidence', fontsize=12, weight='bold')
|
| 113 |
+
ax2.set_ylabel('Comments', fontsize=10)
|
| 114 |
+
|
| 115 |
+
for bar, value in zip(bars, conf_values):
|
| 116 |
+
height = bar.get_height()
|
| 117 |
+
ax2.text(bar.get_x() + bar.get_width()/2., height + 0.1,
|
| 118 |
+
f'{value}', ha='center', va='bottom', fontweight='bold', fontsize=9)
|
| 119 |
+
|
| 120 |
+
ax3 = fig.add_subplot(gs[1, 0])
|
| 121 |
+
|
| 122 |
+
sent_labels = list(avg_likes_by_sentiment.keys())
|
| 123 |
+
sent_values = list(avg_likes_by_sentiment.values())
|
| 124 |
+
sent_colors = ['#2ecc71', '#e74c3c', '#95a5a6']
|
| 125 |
+
|
| 126 |
+
bars = ax3.bar(sent_labels, sent_values, color=sent_colors, alpha=0.8)
|
| 127 |
+
ax3.set_title('๐ Average Likes by Sentiment', fontsize=12, weight='bold')
|
| 128 |
+
ax3.set_ylabel('Avg Likes', fontsize=10)
|
| 129 |
+
|
| 130 |
+
for bar, value in zip(bars, sent_values):
|
| 131 |
+
height = bar.get_height()
|
| 132 |
+
ax3.text(bar.get_x() + bar.get_width()/2., height + 0.1,
|
| 133 |
+
f'{value:.1f}', ha='center', va='bottom', fontweight='bold', fontsize=9)
|
| 134 |
+
|
| 135 |
+
ax4 = fig.add_subplot(gs[1, 1])
|
| 136 |
+
ax4.axis('off')
|
| 137 |
+
|
| 138 |
+
total_likes = sum([sum([c['likes'] for c in comments]) for comments in sentiment_data.values()])
|
| 139 |
+
most_engaging_sentiment = max(avg_likes_by_sentiment.items(), key=lambda x: x[1])[0]
|
| 140 |
+
dominant_sentiment = max(sentiment_counts.items(), key=lambda x: x[1])[0]
|
| 141 |
+
|
| 142 |
+
insights_text = f"""๐ฏ KEY INSIGHTS:
|
| 143 |
+
|
| 144 |
+
๐ Total Comments: {total_comments}
|
| 145 |
+
๐ Total Likes: {total_likes:,}
|
| 146 |
+
๐ Dominant: {dominant_sentiment}
|
| 147 |
+
โก Most Engaging: {most_engaging_sentiment}
|
| 148 |
+
๐ฏ High Confidence: {confidence_breakdown['High']}/{total_comments}"""
|
| 149 |
+
|
| 150 |
+
ax4.text(0.05, 0.95, insights_text, fontsize=10,
|
| 151 |
+
bbox=dict(boxstyle="round,pad=0.5", facecolor='lightblue', alpha=0.8),
|
| 152 |
+
weight='bold', transform=ax4.transAxes, verticalalignment='top')
|
| 153 |
+
|
| 154 |
+
fig.suptitle('๐ Sentiment Analysis Dashboard',
|
| 155 |
+
fontsize=16, weight='bold', y=0.95)
|
| 156 |
+
|
| 157 |
+
buffer = BytesIO()
|
| 158 |
+
plt.savefig(buffer, format='png', dpi=200, bbox_inches='tight', facecolor='white')
|
| 159 |
+
buffer.seek(0)
|
| 160 |
+
|
| 161 |
+
pil_image = Image.open(buffer)
|
| 162 |
+
plt.close()
|
| 163 |
+
|
| 164 |
+
print("โ
PREMIUM sentiment dashboard created! ๐")
|
| 165 |
+
return pil_image
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"โ Sentiment dashboard error: {str(e)}")
|
| 169 |
+
print(f"โ Error details: {traceback.format_exc()}")
|
| 170 |
+
|
| 171 |
+
try:
|
| 172 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 173 |
+
ax.text(0.5, 0.5, f'๐ SENTIMENT ANALYSIS DASHBOARD\n\nProcessing Error: {str(e)}\n\n๐ Optimizing analysis...',
|
| 174 |
+
ha='center', va='center', fontsize=12, weight='bold',
|
| 175 |
+
transform=ax.transAxes,
|
| 176 |
+
bbox=dict(boxstyle="round,pad=1", facecolor='lightgreen', alpha=0.8))
|
| 177 |
+
ax.set_title('๐ฌ Sentiment Analysis - System Update', fontsize=14, weight='bold')
|
| 178 |
+
ax.axis('off')
|
| 179 |
+
|
| 180 |
+
buffer = BytesIO()
|
| 181 |
+
plt.savefig(buffer, format='png', dpi=200, bbox_inches='tight', facecolor='white')
|
| 182 |
+
buffer.seek(0)
|
| 183 |
+
pil_image = Image.open(buffer)
|
| 184 |
+
plt.close()
|
| 185 |
+
return pil_image
|
| 186 |
+
except:
|
| 187 |
+
return None
|
| 188 |
+
|
| 189 |
+
def translate_to_english_llm(original_text):
|
| 190 |
+
"""Translate Korean keywords/text to English using LLM - OPTIMIZED"""
|
| 191 |
+
try:
|
| 192 |
+
translation_prompt = f"""
|
| 193 |
+
Translate to English concisely: {original_text[:200]}
|
| 194 |
+
Return ONLY the translation.
|
| 195 |
+
"""
|
| 196 |
+
|
| 197 |
+
response = client.chat.completions.create(
|
| 198 |
+
model="gpt-4o-mini",
|
| 199 |
+
messages=[{"role": "user", "content": translation_prompt}],
|
| 200 |
+
max_tokens=50,
|
| 201 |
+
temperature=0.1
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
return response.choices[0].message.content.strip()
|
| 205 |
+
|
| 206 |
+
except Exception as e:
|
| 207 |
+
print(f"Translation error: {str(e)}")
|
| 208 |
+
return original_text[:200]
|
| 209 |
+
|
| 210 |
+
def create_public_opinion_bar_chart(opinion_results):
|
| 211 |
+
try:
|
| 212 |
+
print("๐ Creating public opinion analysis chart...")
|
| 213 |
+
print(f"๐ Opinion results received: {opinion_results}")
|
| 214 |
+
|
| 215 |
+
opinion_metrics = {}
|
| 216 |
+
|
| 217 |
+
concerns = []
|
| 218 |
+
if 'Key Concerns:' in opinion_results:
|
| 219 |
+
concerns_line = opinion_results.split('Key Concerns:')[1].split('\n')[0]
|
| 220 |
+
raw_concerns = [c.strip() for c in concerns_line.split(',') if c.strip()]
|
| 221 |
+
for concern in raw_concerns[:3]:
|
| 222 |
+
translated = translate_to_english_llm(concern)
|
| 223 |
+
concerns.append(translated)
|
| 224 |
+
|
| 225 |
+
viewpoints = []
|
| 226 |
+
if 'Popular Viewpoints:' in opinion_results:
|
| 227 |
+
viewpoints_line = opinion_results.split('Popular Viewpoints:')[1].split('\n')[0]
|
| 228 |
+
raw_viewpoints = [v.strip() for v in viewpoints_line.split(',') if v.strip()]
|
| 229 |
+
for viewpoint in raw_viewpoints[:3]:
|
| 230 |
+
translated = translate_to_english_llm(viewpoint)
|
| 231 |
+
viewpoints.append(translated)
|
| 232 |
+
|
| 233 |
+
engagement_level = "Medium"
|
| 234 |
+
controversy_level = "Low"
|
| 235 |
+
overall_sentiment = "Mixed"
|
| 236 |
+
|
| 237 |
+
if 'Audience Engagement:' in opinion_results:
|
| 238 |
+
engagement_level = opinion_results.split('Audience Engagement:')[1].split('\n')[0].strip()
|
| 239 |
+
|
| 240 |
+
if 'Controversy Level:' in opinion_results:
|
| 241 |
+
controversy_level = opinion_results.split('Controversy Level:')[1].split('\n')[0].strip()
|
| 242 |
+
|
| 243 |
+
if 'Overall Public Sentiment:' in opinion_results:
|
| 244 |
+
overall_sentiment = opinion_results.split('Overall Public Sentiment:')[1].split('\n')[0].strip()
|
| 245 |
+
|
| 246 |
+
all_topics = []
|
| 247 |
+
|
| 248 |
+
for i, concern in enumerate(concerns):
|
| 249 |
+
weight = 8 - i
|
| 250 |
+
all_topics.append({
|
| 251 |
+
'topic': concern,
|
| 252 |
+
'category': 'Key Concerns',
|
| 253 |
+
'weight': weight,
|
| 254 |
+
'color': '#e74c3c'
|
| 255 |
+
})
|
| 256 |
+
|
| 257 |
+
for i, viewpoint in enumerate(viewpoints):
|
| 258 |
+
weight = 6 - i
|
| 259 |
+
all_topics.append({
|
| 260 |
+
'topic': viewpoint,
|
| 261 |
+
'category': 'Popular Views',
|
| 262 |
+
'weight': weight,
|
| 263 |
+
'color': '#2ecc71'
|
| 264 |
+
})
|
| 265 |
+
|
| 266 |
+
engagement_scores = {'High': 8, 'Medium': 5, 'Low': 2}
|
| 267 |
+
engagement_score = engagement_scores.get(engagement_level, 5)
|
| 268 |
+
all_topics.append({
|
| 269 |
+
'topic': f'Engagement: {engagement_level}',
|
| 270 |
+
'category': 'Metrics',
|
| 271 |
+
'weight': engagement_score,
|
| 272 |
+
'color': '#f39c12'
|
| 273 |
+
})
|
| 274 |
+
|
| 275 |
+
controversy_scores = {'High': 7, 'Medium': 4, 'Low': 1}
|
| 276 |
+
controversy_score = controversy_scores.get(controversy_level, 3)
|
| 277 |
+
all_topics.append({
|
| 278 |
+
'topic': f'Controversy: {controversy_level}',
|
| 279 |
+
'category': 'Metrics',
|
| 280 |
+
'weight': controversy_score,
|
| 281 |
+
'color': '#9b59b6'
|
| 282 |
+
})
|
| 283 |
+
|
| 284 |
+
if len(all_topics) <= 2:
|
| 285 |
+
all_topics = [
|
| 286 |
+
{'topic': 'General Discussion', 'category': 'Popular Views', 'weight': 6, 'color': '#2ecc71'},
|
| 287 |
+
{'topic': 'Mixed Reactions', 'category': 'Key Concerns', 'weight': 5, 'color': '#e74c3c'},
|
| 288 |
+
{'topic': 'Active Participation', 'category': 'Metrics', 'weight': 7, 'color': '#f39c12'}
|
| 289 |
+
]
|
| 290 |
+
|
| 291 |
+
fig, ax = plt.subplots(figsize=(14, 8))
|
| 292 |
+
|
| 293 |
+
y_positions = range(len(all_topics))
|
| 294 |
+
weights = [item['weight'] for item in all_topics]
|
| 295 |
+
colors = [item['color'] for item in all_topics]
|
| 296 |
+
labels = [item['topic'] for item in all_topics]
|
| 297 |
+
|
| 298 |
+
bars = ax.barh(y_positions, weights, color=colors, alpha=0.8)
|
| 299 |
+
|
| 300 |
+
for i, (bar, label) in enumerate(zip(bars, labels)):
|
| 301 |
+
ax.text(bar.get_width() + 0.2, bar.get_y() + bar.get_height()/2,
|
| 302 |
+
label, va='center', fontweight='bold', fontsize=10)
|
| 303 |
+
|
| 304 |
+
ax.set_title('๐ฅ Public Opinion Analysis', fontsize=16, weight='bold', pad=20)
|
| 305 |
+
ax.set_xlabel('Opinion Strength Score', fontsize=12, weight='bold')
|
| 306 |
+
ax.set_yticks([])
|
| 307 |
+
ax.grid(axis='x', alpha=0.3)
|
| 308 |
+
|
| 309 |
+
insights_text = f"""๐ Summary: Engagement: {engagement_level} | Controversy: {controversy_level} | Sentiment: {overall_sentiment}"""
|
| 310 |
+
fig.text(0.02, 0.02, insights_text, fontsize=10,
|
| 311 |
+
bbox=dict(boxstyle="round,pad=0.3", facecolor='lightgray', alpha=0.8))
|
| 312 |
+
|
| 313 |
+
plt.tight_layout()
|
| 314 |
+
|
| 315 |
+
buffer = BytesIO()
|
| 316 |
+
plt.savefig(buffer, format='png', dpi=200, bbox_inches='tight', facecolor='white')
|
| 317 |
+
buffer.seek(0)
|
| 318 |
+
|
| 319 |
+
pil_image = Image.open(buffer)
|
| 320 |
+
plt.close()
|
| 321 |
+
|
| 322 |
+
print("โ
Public opinion chart created! ๐")
|
| 323 |
+
return pil_image
|
| 324 |
+
|
| 325 |
+
except Exception as e:
|
| 326 |
+
print(f"โ Public opinion chart error: {str(e)}")
|
| 327 |
+
|
| 328 |
+
# Simple fallback chart
|
| 329 |
+
try:
|
| 330 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 331 |
+
ax.text(0.5, 0.5, f'๐ฏ PUBLIC OPINION ANALYSIS\n\nProcessing...',
|
| 332 |
+
ha='center', va='center', fontsize=12, weight='bold',
|
| 333 |
+
transform=ax.transAxes,
|
| 334 |
+
bbox=dict(boxstyle="round,pad=1", facecolor='lightblue', alpha=0.8))
|
| 335 |
+
ax.set_title('๐ฅ Public Opinion Analysis', fontsize=14, weight='bold')
|
| 336 |
+
ax.axis('off')
|
| 337 |
+
|
| 338 |
+
buffer = BytesIO()
|
| 339 |
+
plt.savefig(buffer, format='png', dpi=200, bbox_inches='tight', facecolor='white')
|
| 340 |
+
buffer.seek(0)
|
| 341 |
+
pil_image = Image.open(buffer)
|
| 342 |
+
plt.close()
|
| 343 |
+
return pil_image
|
| 344 |
+
except:
|
| 345 |
+
return None
|
| 346 |
+
|
| 347 |
+
def sentiment_classification_llm(comments_list, comment_limit):
|
| 348 |
+
"""Step 1: LLM for sentiment classification - OPTIMIZED for speed"""
|
| 349 |
+
try:
|
| 350 |
+
print("๐ฏ Step 1: Starting OPTIMIZED sentiment classification...")
|
| 351 |
+
|
| 352 |
+
# OPTIMIZATION: Reduce comments to top 20 for faster processing
|
| 353 |
+
top_comments = comments_list[:comment_limit]
|
| 354 |
+
|
| 355 |
+
# Create batch prompt with all comments
|
| 356 |
+
batch_comments_text = ""
|
| 357 |
+
for i, comment_data in enumerate(top_comments, 1):
|
| 358 |
+
batch_comments_text += f"{i}. \"{comment_data['comment'][:100]}\" (Likes: {comment_data['likes']})\n" # Truncate long comments
|
| 359 |
+
|
| 360 |
+
sentiment_prompt = f"""
|
| 361 |
+
Classify sentiment of these {len(top_comments)} YouTube comments quickly and efficiently:
|
| 362 |
+
Note: Advanced sentiment analysis - consider sarcasm, slang, emojis, and context
|
| 363 |
+
|
| 364 |
+
{batch_comments_text}
|
| 365 |
+
|
| 366 |
+
Return in this EXACT format for each comment:
|
| 367 |
+
|
| 368 |
+
Comment 1: Positive/Negative/Neutral - High/Medium/Low confidence - Brief reason
|
| 369 |
+
Comment 2: Positive/Negative/Neutral - High/Medium/Low confidence - Brief reason
|
| 370 |
+
[Continue for all...]
|
| 371 |
+
|
| 372 |
+
Be fast and precise. Classify ALL {len(top_comments)} comments.
|
| 373 |
+
"""
|
| 374 |
+
|
| 375 |
+
response = client.chat.completions.create(
|
| 376 |
+
model="gpt-4o-mini",
|
| 377 |
+
messages=[{"role": "user", "content": sentiment_prompt}],
|
| 378 |
+
max_tokens=1500, # Reduced for faster processing
|
| 379 |
+
temperature=0.1
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
batch_result = response.choices[0].message.content.strip()
|
| 383 |
+
|
| 384 |
+
# Parse the batch result - SIMPLIFIED parsing
|
| 385 |
+
classified_comments = []
|
| 386 |
+
result_lines = batch_result.split('\n')
|
| 387 |
+
|
| 388 |
+
for i, line in enumerate(result_lines):
|
| 389 |
+
if f"Comment {i+1}:" in line and i < len(top_comments):
|
| 390 |
+
# Extract sentiment info from line
|
| 391 |
+
sentiment_analysis = line.replace(f"Comment {i+1}:", "").strip()
|
| 392 |
+
|
| 393 |
+
classified_comments.append({
|
| 394 |
+
'comment': top_comments[i]['comment'],
|
| 395 |
+
'likes': top_comments[i]['likes'],
|
| 396 |
+
'sentiment_analysis': sentiment_analysis,
|
| 397 |
+
'index': i + 1
|
| 398 |
+
})
|
| 399 |
+
|
| 400 |
+
# Fill any missing comments with default values
|
| 401 |
+
while len(classified_comments) < len(top_comments):
|
| 402 |
+
missing_index = len(classified_comments)
|
| 403 |
+
classified_comments.append({
|
| 404 |
+
'comment': top_comments[missing_index]['comment'],
|
| 405 |
+
'likes': top_comments[missing_index]['likes'],
|
| 406 |
+
'sentiment_analysis': "Neutral - Medium confidence - Processing completed",
|
| 407 |
+
'index': missing_index + 1
|
| 408 |
+
})
|
| 409 |
+
|
| 410 |
+
print(f"โ
OPTIMIZED sentiment classification completed for {len(classified_comments)} comments")
|
| 411 |
+
return classified_comments
|
| 412 |
+
|
| 413 |
+
except Exception as e:
|
| 414 |
+
print(f"โ Sentiment classification error: {str(e)}")
|
| 415 |
+
# Quick fallback
|
| 416 |
+
classified_comments = []
|
| 417 |
+
for i, comment_data in enumerate(comments_list[:15], 1): # Even smaller fallback
|
| 418 |
+
classified_comments.append({
|
| 419 |
+
'comment': comment_data['comment'],
|
| 420 |
+
'likes': comment_data['likes'],
|
| 421 |
+
'sentiment_analysis': "Neutral - Medium confidence - Quick processing",
|
| 422 |
+
'index': i
|
| 423 |
+
})
|
| 424 |
+
return classified_comments
|
| 425 |
+
|
| 426 |
+
def public_opinion_analysis_llm(classified_comments):
|
| 427 |
+
"""Step 3: LLM for public opinion analysis - OPTIMIZED"""
|
| 428 |
+
try:
|
| 429 |
+
print("๐ Step 3: Starting OPTIMIZED public opinion analysis...")
|
| 430 |
+
|
| 431 |
+
positive_comments = [item for item in classified_comments if 'Positive' in item['sentiment_analysis']][:5]
|
| 432 |
+
negative_comments = [item for item in classified_comments if 'Negative' in item['sentiment_analysis']][:5]
|
| 433 |
+
neutral_comments = [item for item in classified_comments if 'Neutral' in item['sentiment_analysis']][:5]
|
| 434 |
+
|
| 435 |
+
opinion_prompt = f"""
|
| 436 |
+
Analyze public opinion from these YouTube comments quickly:
|
| 437 |
+
|
| 438 |
+
POSITIVE ({len(positive_comments)}): {', '.join([item['comment'] for item in positive_comments])}
|
| 439 |
+
NEGATIVE ({len(negative_comments)}): {', '.join([item['comment'] for item in negative_comments])}
|
| 440 |
+
NEUTRAL ({len(neutral_comments)}): {', '.join([item['comment'] for item in neutral_comments])}
|
| 441 |
+
|
| 442 |
+
Return ONLY in this format:
|
| 443 |
+
|
| 444 |
+
TRANSLATIONS (if needed):
|
| 445 |
+
[Original comment] โ [English translation]
|
| 446 |
+
|
| 447 |
+
Overall Public Sentiment: [Positive/Negative/Mixed/Neutral]
|
| 448 |
+
Dominant Opinion: [Main viewpoint in one sentence]
|
| 449 |
+
Key Concerns: [Top 3 concerns, comma-separated]
|
| 450 |
+
Popular Viewpoints: [Top 3 popular opinions, comma-separated]
|
| 451 |
+
Controversy Level: [High/Medium/Low]
|
| 452 |
+
Audience Engagement: [High/Medium/Low]
|
| 453 |
+
|
| 454 |
+
Be fast and objective.
|
| 455 |
+
"""
|
| 456 |
+
|
| 457 |
+
response = client.chat.completions.create(
|
| 458 |
+
model="gpt-4o-mini",
|
| 459 |
+
messages=[{"role": "user", "content": opinion_prompt}],
|
| 460 |
+
max_tokens=300,
|
| 461 |
+
temperature=0.2
|
| 462 |
+
)
|
| 463 |
+
|
| 464 |
+
opinion_results = response.choices[0].message.content.strip()
|
| 465 |
+
print(f"โ
OPTIMIZED public opinion analysis completed")
|
| 466 |
+
|
| 467 |
+
return opinion_results
|
| 468 |
+
|
| 469 |
+
except Exception as e:
|
| 470 |
+
print(f"โ Public opinion analysis error: {str(e)}")
|
| 471 |
+
return "Overall Public Sentiment: Mixed\nDominant Opinion: General discussion\nKey Concerns: none, identified, quickly\nPopular Viewpoints: standard, response, analysis\nControversy Level: Low\nAudience Engagement: Medium"
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
def create_video_info_display(video_info):
|
| 475 |
+
"""Create beautiful HTML display for video information"""
|
| 476 |
+
try:
|
| 477 |
+
title = video_info.get('title', 'N/A')
|
| 478 |
+
channel = video_info.get('channel_name', 'N/A')
|
| 479 |
+
views = video_info.get('view_count', 0)
|
| 480 |
+
likes = video_info.get('like_count', 0)
|
| 481 |
+
duration = video_info.get('duration', 'N/A')
|
| 482 |
+
published = video_info.get('publish_date', 'N/A')
|
| 483 |
+
video_id = video_info.get('video_id', 'N/A')
|
| 484 |
+
|
| 485 |
+
# Format numbers
|
| 486 |
+
views_formatted = f"{views:,}" if isinstance(views, int) else str(views)
|
| 487 |
+
likes_formatted = f"{likes:,}" if isinstance(likes, int) else str(likes)
|
| 488 |
+
|
| 489 |
+
video_info_html = f"""
|
| 490 |
+
<div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 491 |
+
padding: 20px; border-radius: 15px; margin: 10px 0;
|
| 492 |
+
box-shadow: 0 8px 25px rgba(0,0,0,0.15);'>
|
| 493 |
+
<h3 style='color: white; margin: 0 0 15px 0; text-align: center;
|
| 494 |
+
text-shadow: 2px 2px 4px rgba(0,0,0,0.3); font-size: 1.4em;'>
|
| 495 |
+
๐น Video Information
|
| 496 |
+
</h3>
|
| 497 |
+
|
| 498 |
+
<div style='display: grid; grid-template-columns: 1fr 1fr; gap: 15px;
|
| 499 |
+
background: rgba(255,255,255,0.1); padding: 15px; border-radius: 10px;'>
|
| 500 |
+
|
| 501 |
+
<div style='background: rgba(255,255,255,0.2); padding: 12px; border-radius: 8px;'>
|
| 502 |
+
<div style='color: #FFD700; font-weight: bold; margin-bottom: 5px; font-size: 0.9em;'>๐ฌ TITLE</div>
|
| 503 |
+
<div style='color: white; font-size: 1em; line-height: 1.3;'>{title}</div>
|
| 504 |
+
</div>
|
| 505 |
+
|
| 506 |
+
<div style='background: rgba(255,255,255,0.2); padding: 12px; border-radius: 8px;'>
|
| 507 |
+
<div style='color: #FFD700; font-weight: bold; margin-bottom: 5px; font-size: 0.9em;'>๐บ CHANNEL</div>
|
| 508 |
+
<div style='color: white; font-size: 1em;'>{channel}</div>
|
| 509 |
+
</div>
|
| 510 |
+
|
| 511 |
+
<div style='background: rgba(255,255,255,0.2); padding: 12px; border-radius: 8px;'>
|
| 512 |
+
<div style='color: #FFD700; font-weight: bold; margin-bottom: 5px; font-size: 0.9em;'>๐ VIEWS</div>
|
| 513 |
+
<div style='color: white; font-size: 1.1em; font-weight: bold;'>{views_formatted}</div>
|
| 514 |
+
</div>
|
| 515 |
+
|
| 516 |
+
<div style='background: rgba(255,255,255,0.2); padding: 12px; border-radius: 8px;'>
|
| 517 |
+
<div style='color: #FFD700; font-weight: bold; margin-bottom: 5px; font-size: 0.9em;'>๐ LIKES</div>
|
| 518 |
+
<div style='color: white; font-size: 1.1em; font-weight: bold;'>{likes_formatted}</div>
|
| 519 |
+
</div>
|
| 520 |
+
|
| 521 |
+
<div style='background: rgba(255,255,255,0.2); padding: 12px; border-radius: 8px;'>
|
| 522 |
+
<div style='color: #FFD700; font-weight: bold; margin-bottom: 5px; font-size: 0.9em;'>โฑ๏ธ DURATION</div>
|
| 523 |
+
<div style='color: white; font-size: 1em;'>{duration}</div>
|
| 524 |
+
</div>
|
| 525 |
+
|
| 526 |
+
<div style='background: rgba(255,255,255,0.2); padding: 12px; border-radius: 8px;'>
|
| 527 |
+
<div style='color: #FFD700; font-weight: bold; margin-bottom: 5px; font-size: 0.9em;'>๐
PUBLISHED</div>
|
| 528 |
+
<div style='color: white; font-size: 1em;'>{published}</div>
|
| 529 |
+
</div>
|
| 530 |
+
</div>
|
| 531 |
+
|
| 532 |
+
<div style='text-align: center; margin-top: 15px;'>
|
| 533 |
+
<div style='color: rgba(255,255,255,0.8); font-size: 0.9em;'>
|
| 534 |
+
๐ฏ Video ID: {video_id}
|
| 535 |
+
</div>
|
| 536 |
+
</div>
|
| 537 |
+
</div>
|
| 538 |
+
"""
|
| 539 |
+
|
| 540 |
+
return video_info_html
|
| 541 |
+
|
| 542 |
+
except Exception as e:
|
| 543 |
+
print(f"โ Video info display error: {str(e)}")
|
| 544 |
+
return f"""
|
| 545 |
+
<div style='background: #ff6b6b; padding: 15px; border-radius: 10px; margin: 10px 0;'>
|
| 546 |
+
<h3 style='color: white; margin: 0; text-align: center;'>โ Video Information Error</h3>
|
| 547 |
+
<p style='color: white; margin: 10px 0 0 0; text-align: center;'>
|
| 548 |
+
Unable to load video information: {str(e)}
|
| 549 |
+
</p>
|
| 550 |
+
</div>
|
| 551 |
+
"""
|
| 552 |
+
|
| 553 |
+
def final_analysis_report_llm(video_info, news, classified_comments, keyword_results, opinion_results):
|
| 554 |
+
"""Step 4: Final comprehensive analysis report generation in English"""
|
| 555 |
+
try:
|
| 556 |
+
print("๐ Step 4: Generating final analysis report in English...")
|
| 557 |
+
|
| 558 |
+
total_comments = len(classified_comments)
|
| 559 |
+
positive_count = len([item for item in classified_comments if 'Positive' in item['sentiment_analysis']])
|
| 560 |
+
negative_count = len([item for item in classified_comments if 'Negative' in item['sentiment_analysis']])
|
| 561 |
+
neutral_count = total_comments - positive_count - negative_count
|
| 562 |
+
|
| 563 |
+
positive_pct = (positive_count / total_comments * 100) if total_comments > 0 else 0
|
| 564 |
+
negative_pct = (negative_count / total_comments * 100) if total_comments > 0 else 0
|
| 565 |
+
neutral_pct = (neutral_count / total_comments * 100) if total_comments > 0 else 0
|
| 566 |
+
|
| 567 |
+
top_comments = sorted(classified_comments, key=lambda x: x['likes'], reverse=True)[:5]
|
| 568 |
+
|
| 569 |
+
newline = '\n'
|
| 570 |
+
top_comments_formatted = newline.join([
|
| 571 |
+
f"{i+1}. \"{item['comment']}\" ({item['likes']} likes) - {item['sentiment_analysis'].split('Reason: ')[1] if 'Reason: ' in item['sentiment_analysis'] else 'Analysis provided'}"
|
| 572 |
+
for i, item in enumerate(top_comments)
|
| 573 |
+
])
|
| 574 |
+
|
| 575 |
+
final_prompt = f"""
|
| 576 |
+
Create a comprehensive YouTube video analysis report in ENGLISH using all the processed data.
|
| 577 |
+
|
| 578 |
+
VIDEO INFO:
|
| 579 |
+
{video_info}
|
| 580 |
+
|
| 581 |
+
SENTIMENT ANALYSIS RESULTS:
|
| 582 |
+
- Total Comments Analyzed: {total_comments}
|
| 583 |
+
- Positive: {positive_count} ({positive_pct:.1f}%)
|
| 584 |
+
- Negative: {negative_count} ({negative_pct:.1f}%)
|
| 585 |
+
- Neutral: {neutral_count} ({neutral_pct:.1f}%)
|
| 586 |
+
|
| 587 |
+
PUBLIC OPINION ANALYSIS:
|
| 588 |
+
{opinion_results}
|
| 589 |
+
|
| 590 |
+
TOP COMMENTS BY LIKES:
|
| 591 |
+
{top_comments_formatted}
|
| 592 |
+
|
| 593 |
+
Create a detailed analysis report in ENGLISH using the following EXACT format:
|
| 594 |
+
|
| 595 |
+
# ๐ฌ YouTube Video Analysis Report
|
| 596 |
+
|
| 597 |
+
## ๐ Key Insights
|
| 598 |
+
`[Main video topic and focus]`
|
| 599 |
+
|
| 600 |
+
## ๐ฏ Video Overview
|
| 601 |
+
[Comprehensive summary of video content and context in English]
|
| 602 |
+
|
| 603 |
+
## ๐ฌ Comment Sentiment Analysis
|
| 604 |
+
|
| 605 |
+
### ๐ Sentiment Distribution
|
| 606 |
+
- **Positive**: {positive_pct:.1f}% ({positive_count} comments)
|
| 607 |
+
- **Negative**: {negative_pct:.1f}% ({negative_count} comments)
|
| 608 |
+
- **Neutral**: {neutral_pct:.1f}% ({neutral_count} comments)
|
| 609 |
+
|
| 610 |
+
### ๐ Key Comment Insights
|
| 611 |
+
1. **Positive Reactions**: [Analysis of positive sentiment patterns in English]
|
| 612 |
+
2. **Negative Reactions**: [Analysis of negative sentiment patterns in English]
|
| 613 |
+
3. **Core Discussion Topics**: [Main topics and themes from comments in English]
|
| 614 |
+
|
| 615 |
+
### ๐ฏ Top Engaged Comments Analysis
|
| 616 |
+
[Detailed breakdown of most-liked comments with sentiment explanations in English]
|
| 617 |
+
|
| 618 |
+
### ๐ฏ Critical Comments Analysis
|
| 619 |
+
[Detailed breakdown of most-negative comments with sentiment explanations in English]
|
| 620 |
+
|
| 621 |
+
### ๐ฅ Public Opinion Summary
|
| 622 |
+
[Synthesis of public opinion analysis results in English]
|
| 623 |
+
|
| 624 |
+
## ๐ฐ Content Relevance & Impact
|
| 625 |
+
[Analysis of video's relevance to current trends and news in English]
|
| 626 |
+
|
| 627 |
+
## ๐ก Key Findings
|
| 628 |
+
1. **Audience Engagement Pattern**: [Major finding from sentiment analysis in English]
|
| 629 |
+
2. **Public Opinion Trend**: [Major finding from opinion analysis in English]
|
| 630 |
+
3. **Content Impact Assessment**: [Overall impact and reception analysis in English]
|
| 631 |
+
|
| 632 |
+
## ๐ฏ Business Intelligence
|
| 633 |
+
|
| 634 |
+
### ๐ Opportunity Factors
|
| 635 |
+
- **Content Strategy**: [Content opportunities based on positive sentiment in English]
|
| 636 |
+
- **Audience Engagement**: [Engagement optimization opportunities in English]
|
| 637 |
+
- **Brand Positioning**: [Brand opportunities identified from analysis in English]
|
| 638 |
+
|
| 639 |
+
### โ ๏ธ Risk Factors
|
| 640 |
+
- **Reputation Management**: [Potential risks from negative sentiment in English]
|
| 641 |
+
- **Content Concerns**: [Content-related concerns from analysis in English]
|
| 642 |
+
- **Audience Feedback**: [Critical feedback points requiring attention in English]
|
| 643 |
+
|
| 644 |
+
## ๐ Executive Summary
|
| 645 |
+
**Bottom Line**: [Two-sentence summary of the analysis and main recommendation in English]
|
| 646 |
+
|
| 647 |
+
**Key Metrics**: Total Comments: {total_comments} | Engagement Score: [Calculate based on sentiment] |
|
| 648 |
+
|
| 649 |
+
---
|
| 650 |
+
**Analysis Completed**: {datetime.datetime.now()}
|
| 651 |
+
**Comments Processed**: {total_comments} | **Analysis Pipeline**: Premium 3-stage LLM process completed
|
| 652 |
+
**Report Language**: English | **Data Sources**: YouTube Comments + Video Info + Latest News
|
| 653 |
+
"""
|
| 654 |
+
|
| 655 |
+
response = client.chat.completions.create(
|
| 656 |
+
model="gpt-4o-mini",
|
| 657 |
+
messages=[{"role": "user", "content": final_prompt}],
|
| 658 |
+
max_tokens=2000, # Increased for comprehensive English report
|
| 659 |
+
temperature=0.5
|
| 660 |
+
)
|
| 661 |
+
|
| 662 |
+
final_report = response.choices[0].message.content.strip()
|
| 663 |
+
print(f"โ
Final English analysis report generated")
|
| 664 |
+
|
| 665 |
+
return final_report
|
| 666 |
+
|
| 667 |
+
except Exception as e:
|
| 668 |
+
print(f"โ Final report generation error: {str(e)}")
|
| 669 |
+
return f"""# โ Analysis Report Generation Failed
|
| 670 |
+
|
| 671 |
+
## Error Details
|
| 672 |
+
**Error**: {str(e)}
|
| 673 |
+
**Time**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
| 674 |
+
|
| 675 |
+
## Status
|
| 676 |
+
Analysis completed with {len(classified_comments)} comments processed.
|
| 677 |
+
"""
|
| 678 |
+
|
| 679 |
+
def comment_analyzer(video_id="9P6H2QywDjM", comment_limit=10):
|
| 680 |
+
try:
|
| 681 |
+
print(f"๐ Starting OPTIMIZED comprehensive analysis for video: {video_id}")
|
| 682 |
+
|
| 683 |
+
print("๐ Collecting video data in parallel...")
|
| 684 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
| 685 |
+
video_info_future = executor.submit(get_youtube_video_info, video_id=video_id)
|
| 686 |
+
comments_future = executor.submit(get_youtube_comments, video_id=video_id, limit=comment_limit, order='relevance') # Reduced from 100 to 50
|
| 687 |
+
|
| 688 |
+
# Get results
|
| 689 |
+
video_info, video_info_dict = video_info_future.result()
|
| 690 |
+
comments = comments_future.result()
|
| 691 |
+
# summarization = summary_future.result()
|
| 692 |
+
# video_info, video_info_dict = get_youtube_video_info(video_id)
|
| 693 |
+
if video_info == None: return "Check video ID"
|
| 694 |
+
# comments = get_youtube_comments(video_id, comment_limit, order="relevance")
|
| 695 |
+
# summarization = summarize_video()
|
| 696 |
+
|
| 697 |
+
sorted_comments = comments.sort_values('likes', ascending=False)
|
| 698 |
+
|
| 699 |
+
comments_for_analysis = [
|
| 700 |
+
{'comment': comment, 'likes': likes}
|
| 701 |
+
for comment, likes in zip(sorted_comments['comment'].tolist()[:50], sorted_comments['likes'].tolist()[:50])
|
| 702 |
+
]
|
| 703 |
+
|
| 704 |
+
news = "" # Skip news for speed optimization
|
| 705 |
+
|
| 706 |
+
print("๐ค Starting OPTIMIZED LLM analysis pipeline...")
|
| 707 |
+
|
| 708 |
+
# Step 1: Sentiment Classification (optimized)
|
| 709 |
+
classified_comments = sentiment_classification_llm(comments_for_analysis, comment_limit)
|
| 710 |
+
|
| 711 |
+
# Step 2: Public Opinion Analysis (optimized)
|
| 712 |
+
opinion_results = public_opinion_analysis_llm(classified_comments)
|
| 713 |
+
|
| 714 |
+
# Step 3: Create Visual Charts in parallel
|
| 715 |
+
print("๐ Creating charts in parallel...")
|
| 716 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
| 717 |
+
sentiment_future = executor.submit(create_sentiment_pie_chart, classified_comments)
|
| 718 |
+
opinion_future = executor.submit(create_public_opinion_bar_chart, opinion_results)
|
| 719 |
+
final_report_future = executor.submit(final_analysis_report_llm, video_info, news, classified_comments, "", opinion_results)
|
| 720 |
+
|
| 721 |
+
sentiment_chart = sentiment_future.result()
|
| 722 |
+
opinion_chart = opinion_future.result()
|
| 723 |
+
final_report = final_report_future.result()
|
| 724 |
+
|
| 725 |
+
print("โ
OPTIMIZED comprehensive analysis complete!")
|
| 726 |
+
video_info_markdown = f"""
|
| 727 |
+
## ๐น Video Information
|
| 728 |
+
|
| 729 |
+
| Video Information |
|
| 730 |
+
|------------|
|
| 731 |
+
| **๐ฌ Channel:** {video_info_dict.get('channel_title', 'N/A')[:20]}.. |
|
| 732 |
+
| **๐ฌ Title:** {video_info_dict.get('title', 'N/A')[:20]}.. |
|
| 733 |
+
| **๐ Views:** {video_info_dict.get('view_count', 'N/A'):,} |
|
| 734 |
+
| **๐ Likes:** {video_info_dict.get('like_count', 'N/A'):,} |
|
| 735 |
+
| **๐
Published:** {video_info_dict.get('published_at', 'N/A')} |
|
| 736 |
+
"""
|
| 737 |
+
|
| 738 |
+
return final_report, video_info_markdown, sentiment_chart, opinion_chart
|
| 739 |
+
|
| 740 |
+
except Exception as e:
|
| 741 |
+
print(f"โ Analysis error: {str(e)}")
|
| 742 |
+
error_report = f"# โ Analysis Failed\n\nError: {str(e)}\nTime: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
| 743 |
+
return error_report, None, None
|
prompt.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datetime
|
| 2 |
+
import traceback
|
| 3 |
+
def keyword_prompt(video_info, summarization):
|
| 4 |
+
keyword_prompt = f"""
|
| 5 |
+
You are given a summary of a YouTube video.
|
| 6 |
+
Your task is to identify the **main subject (person, company, or concept)** that the video is about.
|
| 7 |
+
Only return a **single keyword** (preferably a named entity such as a person, brand, or organization).
|
| 8 |
+
|
| 9 |
+
Video Info:
|
| 10 |
+
{video_info}
|
| 11 |
+
|
| 12 |
+
Video Summary:
|
| 13 |
+
{summarization}
|
| 14 |
+
|
| 15 |
+
Return only one keyword that best represents the **main focus** of the video content.
|
| 16 |
+
"""
|
| 17 |
+
return keyword_prompt
|
| 18 |
+
|
| 19 |
+
def analysis_prompt(video_info, summarization, news, comments_text):
|
| 20 |
+
analysis_prompt = f"""
|
| 21 |
+
Analyze YouTube video information, summary, comments, and related latest news to create a Markdown format report.
|
| 22 |
+
|
| 23 |
+
Video Info: {video_info}
|
| 24 |
+
|
| 25 |
+
Video Summary:
|
| 26 |
+
{summarization}
|
| 27 |
+
|
| 28 |
+
Latest News:
|
| 29 |
+
{news}
|
| 30 |
+
|
| 31 |
+
Comments:
|
| 32 |
+
{comments_text}
|
| 33 |
+
|
| 34 |
+
Please write in the following format:
|
| 35 |
+
|
| 36 |
+
# ๐ฌ YouTube Video Analysis Report
|
| 37 |
+
|
| 38 |
+
## ๐ Key Keywords
|
| 39 |
+
`keyword`
|
| 40 |
+
|
| 41 |
+
## ๐ฏ Video Overview
|
| 42 |
+
[Summary of main video content]
|
| 43 |
+
|
| 44 |
+
## ๐ฌ Comment Sentiment Analysis
|
| 45 |
+
|
| 46 |
+
### ๐ Sentiment Distribution
|
| 47 |
+
- **Positive**: X%
|
| 48 |
+
- **Negative**: Y%
|
| 49 |
+
- **Neutral**: Z%
|
| 50 |
+
|
| 51 |
+
### ๐ Key Comment Insights
|
| 52 |
+
1. **Positive Reactions**: [Summary of main positive comments]
|
| 53 |
+
2. **Negative Reactions**: [Summary of main negative comments]
|
| 54 |
+
3. **Core Issues**: [Main topics found in comments]
|
| 55 |
+
|
| 56 |
+
### ๐ Comments
|
| 57 |
+
1. Positive Comments: [Positive comments with sentiment classification and reasoning]
|
| 58 |
+
2. Negative Comments: [Negative comments with sentiment classification and reasoning]
|
| 59 |
+
3. Neutral Comments: [Neutral comments with sentiment classification and reasoning]
|
| 60 |
+
|
| 61 |
+
## ๐ฐ Latest News Relevance
|
| 62 |
+
[Analysis of correlation between news and video/comments]
|
| 63 |
+
|
| 64 |
+
## ๐ก Key Insights
|
| 65 |
+
1. [First major finding]
|
| 66 |
+
2. [Second major finding]
|
| 67 |
+
3. [Third major finding]
|
| 68 |
+
|
| 69 |
+
# ## ๐ฏ Business Intelligence
|
| 70 |
+
|
| 71 |
+
# ### Opportunity Factors
|
| 72 |
+
# - [Business opportunity 1]
|
| 73 |
+
# - [Business opportunity 2]
|
| 74 |
+
|
| 75 |
+
# ### Risk Factors
|
| 76 |
+
# - [Potential risk 1]
|
| 77 |
+
# - [Potential risk 2]
|
| 78 |
+
|
| 79 |
+
# ## ๐ Recommended Actions
|
| 80 |
+
# 1. **Immediate Actions**: [Actions needed within 24 hours]
|
| 81 |
+
# 2. **Short-term Strategy**: [Execution plan within 1 week]
|
| 82 |
+
# 3. **Long-term Strategy**: [Long-term plan over 1 month]
|
| 83 |
+
---
|
| 84 |
+
**Analysis Completed**: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
return analysis_prompt
|
| 88 |
+
|
| 89 |
+
def analysis_prompt(video_info, summarization, news, comments_text):
|
| 90 |
+
analysis_prompt = f"""
|
| 91 |
+
Analyze YouTube video information, summary, comments, and related latest news to create a Markdown format report.
|
| 92 |
+
|
| 93 |
+
Video Info: {video_info}
|
| 94 |
+
|
| 95 |
+
Video Summary:
|
| 96 |
+
{summarization}
|
| 97 |
+
|
| 98 |
+
Latest News:
|
| 99 |
+
{news}
|
| 100 |
+
|
| 101 |
+
Comments:
|
| 102 |
+
{comments_text}
|
| 103 |
+
|
| 104 |
+
Please write in the following format:
|
| 105 |
+
|
| 106 |
+
# ๐ฌ YouTube Video Analysis Report
|
| 107 |
+
|
| 108 |
+
## ๐ Key Keywords
|
| 109 |
+
`keyword`
|
| 110 |
+
|
| 111 |
+
## ๐ฏ Video Overview
|
| 112 |
+
[Summary of main video content]
|
| 113 |
+
|
| 114 |
+
## ๐ฌ Comment Sentiment Analysis
|
| 115 |
+
|
| 116 |
+
### ๐ Sentiment Distribution
|
| 117 |
+
- **Positive**: X%
|
| 118 |
+
- **Negative**: Y%
|
| 119 |
+
- **Neutral**: Z%
|
| 120 |
+
|
| 121 |
+
### ๐ Key Comment Insights
|
| 122 |
+
1. **Positive Reactions**: [Summary of main positive comments]
|
| 123 |
+
2. **Negative Reactions**: [Summary of main negative comments]
|
| 124 |
+
3. **Core Issues**: [Main topics found in comments]
|
| 125 |
+
|
| 126 |
+
### ๐ Comments
|
| 127 |
+
1. Positive Comments: [Positive comments with sentiment classification and reasoning]
|
| 128 |
+
2. Negative Comments: [Negative comments with sentiment classification and reasoning]
|
| 129 |
+
3. Neutral Comments: [Neutral comments with sentiment classification and reasoning]
|
| 130 |
+
|
| 131 |
+
## ๐ฐ Latest News Relevance
|
| 132 |
+
[Analysis of correlation between news and video/comments]
|
| 133 |
+
|
| 134 |
+
## ๐ก Key Insights
|
| 135 |
+
1. [First major finding]
|
| 136 |
+
2. [Second major finding]
|
| 137 |
+
3. [Third major finding]
|
| 138 |
+
|
| 139 |
+
# ## ๐ฏ Business Intelligence
|
| 140 |
+
|
| 141 |
+
# ### Opportunity Factors
|
| 142 |
+
# - [Business opportunity 1]
|
| 143 |
+
# - [Business opportunity 2]
|
| 144 |
+
|
| 145 |
+
# ### Risk Factors
|
| 146 |
+
# - [Potential risk 1]
|
| 147 |
+
# - [Potential risk 2]
|
| 148 |
+
|
| 149 |
+
# ## ๐ Recommended Actions
|
| 150 |
+
# 1. **Immediate Actions**: [Actions needed within 24 hours]
|
| 151 |
+
# 2. **Short-term Strategy**: [Execution plan within 1 week]
|
| 152 |
+
# 3. **Long-term Strategy**: [Long-term plan over 1 month]
|
| 153 |
+
---
|
| 154 |
+
**Analysis Completed**: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 155 |
+
"""
|
| 156 |
+
|
| 157 |
+
return analysis_prompt
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def error_message(video_id):
|
| 162 |
+
error_msg = f"""
|
| 163 |
+
# โ Analysis Failed
|
| 164 |
+
|
| 165 |
+
**Error Message:** {str(e)}
|
| 166 |
+
|
| 167 |
+
**Debug Information:**
|
| 168 |
+
- Video ID: {video_id}
|
| 169 |
+
- Time: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 170 |
+
|
| 171 |
+
**Check Items:**
|
| 172 |
+
1. Verify YouTube Video ID is correct
|
| 173 |
+
2. Verify API key is valid
|
| 174 |
+
3. Check network connection
|
| 175 |
+
|
| 176 |
+
**Detailed Error:**
|
| 177 |
+
```
|
| 178 |
+
{traceback.format_exc()}
|
| 179 |
+
```
|
| 180 |
+
"""
|
| 181 |
+
return error_msg
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def analysis_prompt2(content_type, all_comments ):
|
| 185 |
+
analysis_prompt = f"""
|
| 186 |
+
Please analyze the sentiment of the following {content_type} comments in detail:
|
| 187 |
+
|
| 188 |
+
{all_comments}
|
| 189 |
+
|
| 190 |
+
Please write detailed analysis results in the following format:
|
| 191 |
+
|
| 192 |
+
### ๐ Sentiment Distribution
|
| 193 |
+
- **Positive**: X% (specific numbers)
|
| 194 |
+
- **Negative**: Y% (specific numbers)
|
| 195 |
+
- **Neutral**: Z% (specific numbers)
|
| 196 |
+
|
| 197 |
+
### ๐ Sentiment-based Comment Analysis
|
| 198 |
+
|
| 199 |
+
#### ๐ Positive Comments
|
| 200 |
+
**Representative Comment Examples:**
|
| 201 |
+
- "Actual comment 1" โ Reason for positive classification
|
| 202 |
+
- "Actual comment 2" โ Reason for positive classification
|
| 203 |
+
- "Actual comment 3" โ Reason for positive classification
|
| 204 |
+
|
| 205 |
+
**Main Positive Keywords:** keyword1, keyword2, keyword3
|
| 206 |
+
|
| 207 |
+
#### ๐ก Negative Comments
|
| 208 |
+
**Representative Comment Examples:**
|
| 209 |
+
- "Actual comment 1" โ Reason for negative classification
|
| 210 |
+
- "Actual comment 2" โ Reason for negative classification
|
| 211 |
+
- "Actual comment 3" โ Reason for negative classification
|
| 212 |
+
|
| 213 |
+
**Main Negative Keywords:** keyword1, keyword2, keyword3
|
| 214 |
+
|
| 215 |
+
#### ๐ Neutral Comments
|
| 216 |
+
**Representative Comment Examples:**
|
| 217 |
+
- "Actual comment 1" โ Reason for neutral classification
|
| 218 |
+
- "Actual comment 2" โ Reason for neutral classification
|
| 219 |
+
|
| 220 |
+
**Main Neutral Keywords:** keyword1, keyword2, keyword3
|
| 221 |
+
|
| 222 |
+
### ๐ก Key Insights
|
| 223 |
+
1. **Sentiment Trends**: [Overall sentiment trend analysis]
|
| 224 |
+
2. **Main Topics**: [Most mentioned issues in comments]
|
| 225 |
+
3. **Viewer Reactions**: [Main interests or reactions of viewers]
|
| 226 |
+
|
| 227 |
+
### ๐ Summary
|
| 228 |
+
**One-line Summary:** [Summarize overall comment sentiment and main content in one sentence]"""
|
| 229 |
+
return analysis_prompt
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def channel_markdown_result(videos, total_video_views, avg_video_views, videos_text, shorts, total_shorts_views, avg_shorts_views, shorts_text, video_sentiment, shorts_sentiment):
|
| 234 |
+
markdown_result = f"""# ๐ YouTube Channel Analysis Report
|
| 235 |
+
|
| 236 |
+
## ๐ฌ Latest Regular Videos ({len(videos)} videos)
|
| 237 |
+
**Total Views**: {total_video_views:,} | **Average Views**: {avg_video_views:,.0f}
|
| 238 |
+
|
| 239 |
+
{videos_text}
|
| 240 |
+
|
| 241 |
+
---
|
| 242 |
+
|
| 243 |
+
## ๐ฏ Latest Shorts ({len(shorts)} videos)
|
| 244 |
+
**Total Views**: {total_shorts_views:,} | **Average Views**: {avg_shorts_views:,.0f}
|
| 245 |
+
|
| 246 |
+
{shorts_text}
|
| 247 |
+
|
| 248 |
+
---
|
| 249 |
+
|
| 250 |
+
## ๐ฌ Comment Sentiment Analysis
|
| 251 |
+
|
| 252 |
+
### ๐บ Regular Video Comment Reactions
|
| 253 |
+
{video_sentiment}
|
| 254 |
+
|
| 255 |
+
### ๐ฑ Shorts Comment Reactions
|
| 256 |
+
{shorts_sentiment}
|
| 257 |
+
|
| 258 |
+
---
|
| 259 |
+
|
| 260 |
+
## ๐ก Key Insights
|
| 261 |
+
- **Regular Video Average**: {avg_video_views:,.0f} views
|
| 262 |
+
- **Shorts Average**: {avg_shorts_views:,.0f} views
|
| 263 |
+
- **Performance Comparison**: {"Regular videos perform better" if avg_video_views > avg_shorts_views else "Shorts perform better" if avg_shorts_views > avg_video_views else "Similar performance"}
|
| 264 |
+
|
| 265 |
+
---
|
| 266 |
+
**Analysis Completed**: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 267 |
+
"""
|
| 268 |
+
return markdown_result
|
requirements.txt
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# YouTube Comment Analyzer Requirements - Cleaned from pip list
|
| 2 |
+
anyio==4.9.0
|
| 3 |
+
cachetools==5.5.2
|
| 4 |
+
contourpy==1.3.2
|
| 5 |
+
cycler==0.12.1
|
| 6 |
+
dateparser==1.2.1
|
| 7 |
+
docutils==0.21.2
|
| 8 |
+
fonttools==4.58.1
|
| 9 |
+
google-api-core==2.25.0
|
| 10 |
+
google-api-python-client==2.171.0
|
| 11 |
+
google-auth==2.40.2
|
| 12 |
+
google-auth-httplib2==0.2.0
|
| 13 |
+
googleapis-common-protos==1.70.0
|
| 14 |
+
h11==0.16.0
|
| 15 |
+
httpcore==1.0.9
|
| 16 |
+
httplib2==0.22.0
|
| 17 |
+
httpx==0.28.1
|
| 18 |
+
id==1.5.0
|
| 19 |
+
jaraco.classes==3.4.0
|
| 20 |
+
jaraco.context==6.0.1
|
| 21 |
+
jaraco.functools==4.1.0
|
| 22 |
+
jiter==0.10.0
|
| 23 |
+
jsonpointer==2.1
|
| 24 |
+
keyring==25.6.0
|
| 25 |
+
kiwisolver==1.4.8
|
| 26 |
+
matplot==0.1.9
|
| 27 |
+
matplotlib==3.10.3
|
| 28 |
+
more-itertools==10.7.0
|
| 29 |
+
nh3==0.2.21
|
| 30 |
+
numpy==2.2.6
|
| 31 |
+
openai==1.83.0
|
| 32 |
+
pandas==2.2.3
|
| 33 |
+
pillow==11.2.1
|
| 34 |
+
proto-plus==1.26.1
|
| 35 |
+
protobuf==6.31.1
|
| 36 |
+
pyasn1==0.6.1
|
| 37 |
+
pyasn1_modules==0.4.2
|
| 38 |
+
pyloco==0.0.139
|
| 39 |
+
pyparsing==3.2.3
|
| 40 |
+
python-dateutil==2.9.0.post0
|
| 41 |
+
pytz==2025.2
|
| 42 |
+
readme_renderer==44.0
|
| 43 |
+
regex==2024.11.6
|
| 44 |
+
requests-toolbelt==1.0.0
|
| 45 |
+
rfc3986==2.0.0
|
| 46 |
+
rsa==4.9.1
|
| 47 |
+
setuptools==78.1.1
|
| 48 |
+
SimpleWebSocketServer==0.1.2
|
| 49 |
+
six==1.17.0
|
| 50 |
+
sniffio==1.3.1
|
| 51 |
+
twine==6.1.0
|
| 52 |
+
typing==3.7.4.3
|
| 53 |
+
tzdata==2025.2
|
| 54 |
+
tzlocal==5.3.1
|
| 55 |
+
uritemplate==4.2.0
|
| 56 |
+
ushlex==0.99.1
|
| 57 |
+
uv==0.7.9
|
| 58 |
+
websocket-client==1.8.0
|
| 59 |
+
wheel==0.45.1
|
| 60 |
+
youtube-comment-downloader==0.1.76
|
| 61 |
+
tavily-python==0.7.5
|
| 62 |
+
pytrends==4.9.2
|
| 63 |
+
plotly==6.1.2
|
utils.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys, subprocess, openai, json
|
| 2 |
+
from youtube_comment_downloader import *
|
| 3 |
+
from tavily import TavilyClient
|
| 4 |
+
from pytrends.request import TrendReq
|
| 5 |
+
|
| 6 |
+
# pytrends = TrendReq(hl='en-US', tz=360)
|
| 7 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
| 8 |
+
client = openai.OpenAI(api_key=api_key)
|
| 9 |
+
tavily_api_key = os.getenv("TAVILY_API_KEY")
|
| 10 |
+
|
| 11 |
+
def download_comments(video_id="9P6H2QywDjM", output_file="9P6H2QywDjM.json", limit=10, sort=1):
|
| 12 |
+
# youtube_comment_downloader ๋ชจ๋์ ํธ์ถํ์ฌ ๋๊ธ์ ๋ค์ด๋ก๋ํฉ๋๋ค.
|
| 13 |
+
subprocess.run([sys.executable, "-m", "youtube_comment_downloader", "--youtubeid", video_id, "--output", output_file, "--limit", limit, "--sort", sort], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 14 |
+
with open(output_file, 'r', encoding='utf-8') as f: return [ {k: json.loads(line)[k] for k in ['text', 'votes', 'replies', 'heart', 'reply', 'time_parsed']} for line in f if line.strip()]
|
| 15 |
+
|
| 16 |
+
def download_comments2(video_id="9P6H2QywDjM", limit=10, sort=1):
|
| 17 |
+
comments = []
|
| 18 |
+
for comment in YoutubeCommentDownloader().get_comments_from_url(f'https://www.youtube.com/watch?v={video_id}', sort_by=sort):
|
| 19 |
+
comments.append({k: comment.get(k) for k in ['text', 'votes', 'replies', 'heart', 'reply', 'time_parsed']})
|
| 20 |
+
if len(comments) >= limit: break
|
| 21 |
+
return comments
|
| 22 |
+
|
| 23 |
+
def get_tavily_search(keyword):
|
| 24 |
+
tavily = TavilyClient(api_key=tavily_api_key)
|
| 25 |
+
return tavily.search( query=f"{keyword} ์ต์ ๋ด์ค", search_depth="advanced", max_results=5, include_answer=True,)
|
| 26 |
+
|
| 27 |
+
def get_recent_news(keyword):
|
| 28 |
+
response = client.chat.completions.create(model="gpt-4o-mini", messages=[ {"role": "user", "content": f"'{keyword}' ๊ด๋ จ ์ต์ ๋ด์ค๋ค ์์ฝํด์ฃผ์ธ์\n ๋ด์ฉ: {get_tavily_search(keyword)}"}], max_tokens=500, temperature=0.3)
|
| 29 |
+
return response.choices[0].message.content
|
| 30 |
+
|
| 31 |
+
def summarize_video(video_id="9P6H2QywDjM"):
|
| 32 |
+
# TODO
|
| 33 |
+
return
|
| 34 |
+
|
| 35 |
+
def get_main_character(summarization):
|
| 36 |
+
# TODO
|
| 37 |
+
return
|
| 38 |
+
|
youtube_api_test.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from googleapiclient.discovery import build
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 6 |
+
|
| 7 |
+
def get_youtube_comments(video_id, limit=30, order='time'):
|
| 8 |
+
# YouTube ๋๊ธ ๊ฐ์ ธ์ค๊ธฐ; order: ์ ๋ ฌ ๋ฐฉ์ ('time': ์ต์ ์, 'relevance': ์ข์์์)
|
| 9 |
+
youtube = build("youtube", "v3", developerKey=API_KEY); comments = []; next_page_token = None
|
| 10 |
+
while len(comments) < limit:
|
| 11 |
+
response = youtube.commentThreads().list(part="snippet", videoId=video_id, maxResults=min(100, limit - len(comments)), order=order, pageToken=next_page_token).execute()
|
| 12 |
+
for item in response['items']:
|
| 13 |
+
comment = item['snippet']['topLevelComment']['snippet']
|
| 14 |
+
if len(comment['textDisplay'].split()) <= 300: comments.append({'comment': f"{comment['textDisplay']} {comment['likeCount']} likes", 'likes': comment['likeCount'], 'published': comment['publishedAt']})
|
| 15 |
+
next_page_token = response.get('nextPageToken')
|
| 16 |
+
if not next_page_token: break
|
| 17 |
+
return pd.DataFrame(comments[:limit])
|
| 18 |
+
|
| 19 |
+
def get_youtube_video_info(video_id):
|
| 20 |
+
youtube = build("youtube", "v3", developerKey=API_KEY)
|
| 21 |
+
response = youtube.videos().list(part="snippet,statistics,contentDetails", id=video_id).execute()
|
| 22 |
+
if not response['items']: return None, None
|
| 23 |
+
s, st, d = response['items'][0]['snippet'], response['items'][0]['statistics'], response['items'][0]['contentDetails'] # s=snipet, st=status, d=details
|
| 24 |
+
# return {
|
| 25 |
+
# 'title': s['title'], 'description': s['description'], 'channel_title': s['channelTitle'], 'channel_id': s['channelId'],
|
| 26 |
+
# 'published_at': s['publishedAt'], 'tags': s.get('tags', []), 'category_id': s['categoryId'], 'default_language': s.get('defaultLanguage'),
|
| 27 |
+
# 'view_count': int(st.get('viewCount', 0)),'like_count': int(st.get('likeCount', 0)), 'comment_count': int(st.get('commentCount', 0)), 'duration': d['duration'],
|
| 28 |
+
# }
|
| 29 |
+
return [f"์ ๋ชฉ: {s['title']}\n์ค๋ช
: {s['description']}\n์ฑ๋: {s['channelTitle']}\n๊ฒ์์ผ: {s['publishedAt']}\n์กฐํ์: {int(st.get('viewCount', 0)):,}\n์ข์์: {int(st.get('likeCount', 0)):,}\n๋๊ธ์: {int(st.get('commentCount', 0)):,}\n๊ธธ์ด: {d['duration']}\nํ๊ทธ: {s.get('tags', [])}",
|
| 30 |
+
{'title': s['title'], 'description': s['description'], 'channel_title': s['channelTitle'], 'channel_id': s['channelId'],
|
| 31 |
+
'published_at': s['publishedAt'], 'tags': s.get('tags', []), 'category_id': s['categoryId'], 'default_language': s.get('defaultLanguage'),
|
| 32 |
+
'view_count': int(st.get('viewCount', 0)),'like_count': int(st.get('likeCount', 0)), 'comment_count': int(st.get('commentCount', 0)), 'duration': d['duration'],}]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def get_youtube_video_info_dict(video_id):
|
| 36 |
+
youtube = build("youtube", "v3", developerKey=API_KEY)
|
| 37 |
+
response = youtube.videos().list(part="snippet,statistics,contentDetails", id=video_id).execute()
|
| 38 |
+
if not response['items']: return None
|
| 39 |
+
s, st, d = response['items'][0]['snippet'], response['items'][0]['statistics'], response['items'][0]['contentDetails'] # s=snipet, st=status, d=details
|
| 40 |
+
return {
|
| 41 |
+
'title': s['title'], 'description': s['description'], 'channel_title': s['channelTitle'], 'channel_id': s['channelId'],
|
| 42 |
+
'published_at': s['publishedAt'], 'tags': s.get('tags', []), 'category_id': s['categoryId'], 'default_language': s.get('defaultLanguage'),
|
| 43 |
+
'view_count': int(st.get('viewCount', 0)),'like_count': int(st.get('likeCount', 0)), 'comment_count': int(st.get('commentCount', 0)), 'duration': d['duration'],
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
def get_channel_id_by_name(channel_name):
|
| 47 |
+
"""Convert channel name to channel ID"""
|
| 48 |
+
youtube = build("youtube", "v3", developerKey=API_KEY)
|
| 49 |
+
|
| 50 |
+
search_response = youtube.search().list(q=channel_name,type='channel',part='id,snippet',maxResults=1).execute()
|
| 51 |
+
if search_response['items']: channel_id = search_response['items'][0]['id']['channelId'];return channel_id
|
| 52 |
+
return None
|
| 53 |
+
|
| 54 |
+
def get_channel_videos(channel_id, limit=10):
|
| 55 |
+
youtube = build("youtube", "v3", developerKey=API_KEY)
|
| 56 |
+
response = youtube.search().list(part="snippet", channelId=channel_id, maxResults=50, order="date", type="video").execute()
|
| 57 |
+
videos = []
|
| 58 |
+
for item in response['items']:
|
| 59 |
+
video_id = item['id']['videoId']
|
| 60 |
+
# ์์ ์ธ๋ถ์ ๋ณด ๊ฐ์ ธ์์ ๊ธธ์ด ํ์ธ
|
| 61 |
+
video_detail = youtube.videos().list(part="contentDetails,statistics", id=video_id).execute()
|
| 62 |
+
if video_detail['items']:
|
| 63 |
+
duration = video_detail['items'][0]['contentDetails']['duration']
|
| 64 |
+
# PT1M ์ด์์ธ ๊ฒฝ์ฐ๋ง (์ผ์ธ ๊ฐ ์๋ ์ผ๋ฐ ์์)
|
| 65 |
+
if 'M' in duration or 'H' in duration:
|
| 66 |
+
videos.append({'video_id': video_id, 'title': item['snippet']['title'], 'published': item['snippet']['publishedAt'], 'duration': duration, 'views': int(video_detail['items'][0]['statistics'].get('viewCount', 0))})
|
| 67 |
+
if len(videos) >= limit: break
|
| 68 |
+
return pd.DataFrame(videos)
|
| 69 |
+
|
| 70 |
+
def get_channel_shorts(channel_id, limit=10):
|
| 71 |
+
youtube = build("youtube", "v3", developerKey=API_KEY)
|
| 72 |
+
response = youtube.search().list(part="snippet", channelId=channel_id, maxResults=50, order="date", type="video").execute()
|
| 73 |
+
shorts = []
|
| 74 |
+
for item in response['items']:
|
| 75 |
+
video_id = item['id']['videoId']
|
| 76 |
+
# ์์ ์ธ๋ถ์ ๋ณด ๊ฐ์ ธ์์ ๊ธธ์ด ํ์ธ
|
| 77 |
+
video_detail = youtube.videos().list(part="contentDetails,statistics", id=video_id).execute()
|
| 78 |
+
if video_detail['items']:
|
| 79 |
+
duration = video_detail['items'][0]['contentDetails']['duration']
|
| 80 |
+
# PT60S ์ดํ์ด๊ณ M์ด๋ H๊ฐ ์๋ ๊ฒฝ์ฐ (์ผ์ธ )
|
| 81 |
+
if 'H' not in duration and 'M' not in duration and 'S' in duration:
|
| 82 |
+
shorts.append({'video_id': video_id, 'title': item['snippet']['title'], 'published': item['snippet']['publishedAt'], 'duration': duration, 'views': int(video_detail['items'][0]['statistics'].get('viewCount', 0))})
|
| 83 |
+
if len(shorts) >= limit: break
|
| 84 |
+
return pd.DataFrame(shorts)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# ์ฌ์ฉ ์์
|
| 88 |
+
if __name__ == "__main__":
|
| 89 |
+
# video_id = "9P6H2QywDjM"
|
| 90 |
+
# video_info = get_youtube_video_info(video_id)
|
| 91 |
+
|
| 92 |
+
# # ์ต์ ์ 100๊ฐ
|
| 93 |
+
# latest_comments = get_youtube_comments(video_id, limit=100, order='time') # order = 'time' or 'relevance'
|
| 94 |
+
# print(f"\n์ด ๋๊ธ ์: {len(latest_comments)}")
|
| 95 |
+
# print(f"ํ๊ท ์ข์์: {latest_comments['likes'].mean():.1f}")
|
| 96 |
+
# by_likes = latest_comments.sort_values('likes', ascending=False)
|
| 97 |
+
# by_date = latest_comments.sort_values('published', ascending=False)
|
| 98 |
+
# comments_text = '\n'.join([f"{i+1}. {comment}" for i, comment in enumerate(by_likes['comment'].tolist())])
|
| 99 |
+
# print(f"\n๋๊ธ:\n{comments_text}")
|
| 100 |
+
|
| 101 |
+
channel_id = "UCX6OQ3DkcsbYNE6H8uQQuVA" # MrBeast ์ฑ๋ ์์
|
| 102 |
+
|
| 103 |
+
latest_videos = get_channel_videos(channel_id, limit=10)
|
| 104 |
+
latest_shorts = get_channel_shorts(channel_id, limit=10)
|
| 105 |
+
|
| 106 |
+
print(f"์ต์ ์ผ๋ฐ ์์ {len(latest_videos)}๊ฐ:")
|
| 107 |
+
for i, row in latest_videos.iterrows():
|
| 108 |
+
print(f"{i+1}. {row['title']} ({row['duration']}) - ์กฐํ์: {row['views']:,}")
|
| 109 |
+
|
| 110 |
+
print(f"\n์ต์ ์ผ์ธ {len(latest_shorts)}๊ฐ:")
|
| 111 |
+
for i, row in latest_shorts.iterrows():
|
| 112 |
+
print(f"{i+1}. {row['title']} ({row['duration']}) - ์กฐํ์: {row['views']:,}")
|