import openai from utils import * from youtube_api_test import * import traceback import datetime from prompt import * import matplotlib.pyplot as plt from io import BytesIO from PIL import Image import concurrent.futures plt.rcParams['font.family'] = ['DejaVu Sans', 'Arial Unicode MS', 'SimHei', 'Malgun Gothic'] plt.rcParams['axes.unicode_minus'] = False client = openai.OpenAI(api_key=api_key) def create_sentiment_pie_chart(classified_comments): try: print("š Creating PREMIUM sentiment analysis dashboard...") plt.rcParams['font.size'] = 10 sentiment_data = {'Positive': [], 'Negative': [], 'Neutral': []} confidence_breakdown = {'High': 0, 'Medium': 0, 'Low': 0} top_liked_by_sentiment = {'Positive': [], 'Negative': [], 'Neutral': []} for comment in classified_comments: analysis = comment['sentiment_analysis'] likes = comment['likes'] comment_text = comment['comment'] sentiment = 'Neutral' if 'Positive' in analysis: sentiment = 'Positive' elif 'Negative' in analysis: sentiment = 'Negative' sentiment_data[sentiment].append({ 'comment': comment_text, 'likes': likes, 'analysis': analysis }) # Extract confidence level if 'High' in analysis: confidence_breakdown['High'] += 1 elif 'Medium' in analysis: confidence_breakdown['Medium'] += 1 else: confidence_breakdown['Low'] += 1 top_liked_by_sentiment = sentiment_data # Sort top liked comments for sentiment in top_liked_by_sentiment: top_liked_by_sentiment[sentiment] = sorted( top_liked_by_sentiment[sentiment], key=lambda x: x['likes'], reverse=True )[:3] # Top 3 per sentiment # Calculate percentages and metrics total_comments = len(classified_comments) sentiment_counts = {k: len(v) for k, v in sentiment_data.items()} sentiment_percentages = {k: (v/total_comments*100) if total_comments > 0 else 0 for k, v in sentiment_counts.items()} # Calculate engagement metrics avg_likes_by_sentiment = {} for sentiment, comments in sentiment_data.items(): if comments: avg_likes_by_sentiment[sentiment] = sum([c['likes'] for c in comments]) / len(comments) else: avg_likes_by_sentiment[sentiment] = 0 print(f"š Sentiment breakdown: {sentiment_counts}") print(f"š Confidence breakdown: {confidence_breakdown}") fig = plt.figure(figsize=(16, 10)) gs = fig.add_gridspec(2, 2, hspace=0.3, wspace=0.3) ax1 = fig.add_subplot(gs[0, 0]) if total_comments > 0: labels = list(sentiment_counts.keys()) sizes = list(sentiment_counts.values()) colors = ['#2ecc71', '#e74c3c', '#95a5a6'] explode = (0.05, 0.05, 0.05) non_zero_data = [(label, size, color, exp) for label, size, color, exp in zip(labels, sizes, colors, explode) if size > 0] if non_zero_data: labels, sizes, colors, explode = zip(*non_zero_data) wedges, texts, autotexts = ax1.pie(sizes, labels=labels, colors=colors, explode=explode, autopct=lambda pct: f'{pct:.1f}%\n({int(pct/100*total_comments)})', startangle=90, textprops={'fontsize': 10, 'weight': 'bold'}) for autotext in autotexts: autotext.set_color('white') autotext.set_fontsize(9) autotext.set_weight('bold') ax1.set_title('š¬ Sentiment Distribution', fontsize=14, weight='bold', pad=15) ax2 = fig.add_subplot(gs[0, 1]) conf_labels = list(confidence_breakdown.keys()) conf_values = list(confidence_breakdown.values()) conf_colors = ['#e74c3c', '#f39c12', '#2ecc71'] bars = ax2.bar(conf_labels, conf_values, color=conf_colors, alpha=0.8) ax2.set_title('šÆ Analysis Confidence', fontsize=12, weight='bold') ax2.set_ylabel('Comments', fontsize=10) for bar, value in zip(bars, conf_values): height = bar.get_height() ax2.text(bar.get_x() + bar.get_width()/2., height + 0.1, f'{value}', ha='center', va='bottom', fontweight='bold', fontsize=9) ax3 = fig.add_subplot(gs[1, 0]) sent_labels = list(avg_likes_by_sentiment.keys()) sent_values = list(avg_likes_by_sentiment.values()) sent_colors = ['#2ecc71', '#e74c3c', '#95a5a6'] bars = ax3.bar(sent_labels, sent_values, color=sent_colors, alpha=0.8) ax3.set_title('š Average Likes by Sentiment', fontsize=12, weight='bold') ax3.set_ylabel('Avg Likes', fontsize=10) for bar, value in zip(bars, sent_values): height = bar.get_height() ax3.text(bar.get_x() + bar.get_width()/2., height + 0.1, f'{value:.1f}', ha='center', va='bottom', fontweight='bold', fontsize=9) ax4 = fig.add_subplot(gs[1, 1]) ax4.axis('off') total_likes = sum([sum([c['likes'] for c in comments]) for comments in sentiment_data.values()]) most_engaging_sentiment = max(avg_likes_by_sentiment.items(), key=lambda x: x[1])[0] dominant_sentiment = max(sentiment_counts.items(), key=lambda x: x[1])[0] insights_text = f"""šÆ KEY INSIGHTS: š Total Comments: {total_comments} š Total Likes: {total_likes:,} š Dominant: {dominant_sentiment} ā” Most Engaging: {most_engaging_sentiment} šÆ High Confidence: {confidence_breakdown['High']}/{total_comments}""" ax4.text(0.05, 0.95, insights_text, fontsize=10, bbox=dict(boxstyle="round,pad=0.5", facecolor='lightblue', alpha=0.8), weight='bold', transform=ax4.transAxes, verticalalignment='top') fig.suptitle('š Sentiment Analysis Dashboard', fontsize=16, weight='bold', y=0.95) buffer = BytesIO() plt.savefig(buffer, format='png', dpi=200, bbox_inches='tight', facecolor='white') buffer.seek(0) pil_image = Image.open(buffer) plt.close() print("ā PREMIUM sentiment dashboard created! š") return pil_image except Exception as e: print(f"ā Sentiment dashboard error: {str(e)}") print(f"ā Error details: {traceback.format_exc()}") try: fig, ax = plt.subplots(figsize=(10, 6)) ax.text(0.5, 0.5, f'š SENTIMENT ANALYSIS DASHBOARD\n\nProcessing Error: {str(e)}\n\nš Optimizing analysis...', ha='center', va='center', fontsize=12, weight='bold', transform=ax.transAxes, bbox=dict(boxstyle="round,pad=1", facecolor='lightgreen', alpha=0.8)) ax.set_title('š¬ Sentiment Analysis - System Update', fontsize=14, weight='bold') ax.axis('off') buffer = BytesIO() plt.savefig(buffer, format='png', dpi=200, bbox_inches='tight', facecolor='white') buffer.seek(0) pil_image = Image.open(buffer) plt.close() return pil_image except: return None def translate_to_english_llm(original_text): """Translate Korean keywords/text to English using LLM - OPTIMIZED""" try: translation_prompt = f""" Translate to English concisely: {original_text[:200]} Return ONLY the translation. """ response = client.chat.completions.create( model="gpt-4o-mini", messages=[{"role": "user", "content": translation_prompt}], max_tokens=50, temperature=0.1 ) return response.choices[0].message.content.strip() except Exception as e: print(f"Translation error: {str(e)}") return original_text[:200] def create_public_opinion_bar_chart(opinion_results): try: print("š Creating public opinion analysis chart...") print(f"š Opinion results received: {opinion_results}") opinion_metrics = {} concerns = [] if 'Key Concerns:' in opinion_results: concerns_line = opinion_results.split('Key Concerns:')[1].split('\n')[0] raw_concerns = [c.strip() for c in concerns_line.split(',') if c.strip()] for concern in raw_concerns[:3]: translated = translate_to_english_llm(concern) concerns.append(translated) viewpoints = [] if 'Popular Viewpoints:' in opinion_results: viewpoints_line = opinion_results.split('Popular Viewpoints:')[1].split('\n')[0] raw_viewpoints = [v.strip() for v in viewpoints_line.split(',') if v.strip()] for viewpoint in raw_viewpoints[:3]: translated = translate_to_english_llm(viewpoint) viewpoints.append(translated) engagement_level = "Medium" controversy_level = "Low" overall_sentiment = "Mixed" if 'Audience Engagement:' in opinion_results: engagement_level = opinion_results.split('Audience Engagement:')[1].split('\n')[0].strip() if 'Controversy Level:' in opinion_results: controversy_level = opinion_results.split('Controversy Level:')[1].split('\n')[0].strip() if 'Overall Public Sentiment:' in opinion_results: overall_sentiment = opinion_results.split('Overall Public Sentiment:')[1].split('\n')[0].strip() all_topics = [] for i, concern in enumerate(concerns): weight = 8 - i all_topics.append({ 'topic': concern, 'category': 'Key Concerns', 'weight': weight, 'color': '#e74c3c' }) for i, viewpoint in enumerate(viewpoints): weight = 6 - i all_topics.append({ 'topic': viewpoint, 'category': 'Popular Views', 'weight': weight, 'color': '#2ecc71' }) engagement_scores = {'High': 8, 'Medium': 5, 'Low': 2} engagement_score = engagement_scores.get(engagement_level, 5) all_topics.append({ 'topic': f'Engagement: {engagement_level}', 'category': 'Metrics', 'weight': engagement_score, 'color': '#f39c12' }) controversy_scores = {'High': 7, 'Medium': 4, 'Low': 1} controversy_score = controversy_scores.get(controversy_level, 3) all_topics.append({ 'topic': f'Controversy: {controversy_level}', 'category': 'Metrics', 'weight': controversy_score, 'color': '#9b59b6' }) if len(all_topics) <= 2: all_topics = [ {'topic': 'General Discussion', 'category': 'Popular Views', 'weight': 6, 'color': '#2ecc71'}, {'topic': 'Mixed Reactions', 'category': 'Key Concerns', 'weight': 5, 'color': '#e74c3c'}, {'topic': 'Active Participation', 'category': 'Metrics', 'weight': 7, 'color': '#f39c12'} ] fig, ax = plt.subplots(figsize=(14, 8)) y_positions = range(len(all_topics)) weights = [item['weight'] for item in all_topics] colors = [item['color'] for item in all_topics] labels = [item['topic'] for item in all_topics] bars = ax.barh(y_positions, weights, color=colors, alpha=0.8) for i, (bar, label) in enumerate(zip(bars, labels)): ax.text(bar.get_width() + 0.2, bar.get_y() + bar.get_height()/2, label, va='center', fontweight='bold', fontsize=10) ax.set_title('š„ Public Opinion Analysis', fontsize=16, weight='bold', pad=20) ax.set_xlabel('Opinion Strength Score', fontsize=12, weight='bold') ax.set_yticks([]) ax.grid(axis='x', alpha=0.3) insights_text = f"""š Summary: Engagement: {engagement_level} | Controversy: {controversy_level} | Sentiment: {overall_sentiment}""" fig.text(0.02, 0.02, insights_text, fontsize=10, bbox=dict(boxstyle="round,pad=0.3", facecolor='lightgray', alpha=0.8)) plt.tight_layout() buffer = BytesIO() plt.savefig(buffer, format='png', dpi=200, bbox_inches='tight', facecolor='white') buffer.seek(0) pil_image = Image.open(buffer) plt.close() print("ā Public opinion chart created! š") return pil_image except Exception as e: print(f"ā Public opinion chart error: {str(e)}") # Simple fallback chart try: fig, ax = plt.subplots(figsize=(10, 6)) ax.text(0.5, 0.5, f'šÆ PUBLIC OPINION ANALYSIS\n\nProcessing...', ha='center', va='center', fontsize=12, weight='bold', transform=ax.transAxes, bbox=dict(boxstyle="round,pad=1", facecolor='lightblue', alpha=0.8)) ax.set_title('š„ Public Opinion Analysis', fontsize=14, weight='bold') ax.axis('off') buffer = BytesIO() plt.savefig(buffer, format='png', dpi=200, bbox_inches='tight', facecolor='white') buffer.seek(0) pil_image = Image.open(buffer) plt.close() return pil_image except: return None def sentiment_classification_llm(comments_list, comment_limit): """Step 1: LLM for sentiment classification - OPTIMIZED for speed""" try: print("šÆ Step 1: Starting OPTIMIZED sentiment classification...") # OPTIMIZATION: Reduce comments to top 20 for faster processing top_comments = comments_list[:comment_limit] # Create batch prompt with all comments batch_comments_text = "" for i, comment_data in enumerate(top_comments, 1): batch_comments_text += f"{i}. \"{comment_data['comment'][:100]}\" (Likes: {comment_data['likes']})\n" # Truncate long comments sentiment_prompt = f""" Classify sentiment of these {len(top_comments)} YouTube comments quickly and efficiently: Note: Advanced sentiment analysis - consider sarcasm, slang, emojis, and context {batch_comments_text} Return in this EXACT format for each comment: Comment 1: Positive/Negative/Neutral - High/Medium/Low confidence - Brief reason Comment 2: Positive/Negative/Neutral - High/Medium/Low confidence - Brief reason [Continue for all...] Be fast and precise. Classify ALL {len(top_comments)} comments. """ response = client.chat.completions.create( model="gpt-4o-mini", messages=[{"role": "user", "content": sentiment_prompt}], max_tokens=1500, # Reduced for faster processing temperature=0.1 ) batch_result = response.choices[0].message.content.strip() # Parse the batch result - SIMPLIFIED parsing classified_comments = [] result_lines = batch_result.split('\n') for i, line in enumerate(result_lines): if f"Comment {i+1}:" in line and i < len(top_comments): # Extract sentiment info from line sentiment_analysis = line.replace(f"Comment {i+1}:", "").strip() classified_comments.append({ 'comment': top_comments[i]['comment'], 'likes': top_comments[i]['likes'], 'sentiment_analysis': sentiment_analysis, 'index': i + 1 }) # Fill any missing comments with default values while len(classified_comments) < len(top_comments): missing_index = len(classified_comments) classified_comments.append({ 'comment': top_comments[missing_index]['comment'], 'likes': top_comments[missing_index]['likes'], 'sentiment_analysis': "Neutral - Medium confidence - Processing completed", 'index': missing_index + 1 }) print(f"ā OPTIMIZED sentiment classification completed for {len(classified_comments)} comments") return classified_comments except Exception as e: print(f"ā Sentiment classification error: {str(e)}") # Quick fallback classified_comments = [] for i, comment_data in enumerate(comments_list[:15], 1): # Even smaller fallback classified_comments.append({ 'comment': comment_data['comment'], 'likes': comment_data['likes'], 'sentiment_analysis': "Neutral - Medium confidence - Quick processing", 'index': i }) return classified_comments def public_opinion_analysis_llm(classified_comments): """Step 3: LLM for public opinion analysis - OPTIMIZED""" try: print("š Step 3: Starting OPTIMIZED public opinion analysis...") positive_comments = [item for item in classified_comments if 'Positive' in item['sentiment_analysis']][:5] negative_comments = [item for item in classified_comments if 'Negative' in item['sentiment_analysis']][:5] neutral_comments = [item for item in classified_comments if 'Neutral' in item['sentiment_analysis']][:5] opinion_prompt = f""" Analyze public opinion from these YouTube comments quickly: POSITIVE ({len(positive_comments)}): {', '.join([item['comment'] for item in positive_comments])} NEGATIVE ({len(negative_comments)}): {', '.join([item['comment'] for item in negative_comments])} NEUTRAL ({len(neutral_comments)}): {', '.join([item['comment'] for item in neutral_comments])} Return ONLY in this format: TRANSLATIONS (if needed): [Original comment] ā [English translation] Overall Public Sentiment: [Positive/Negative/Mixed/Neutral] Dominant Opinion: [Main viewpoint in one sentence] Key Concerns: [Top 3 concerns, comma-separated] Popular Viewpoints: [Top 3 popular opinions, comma-separated] Controversy Level: [High/Medium/Low] Audience Engagement: [High/Medium/Low] Be fast and objective. """ response = client.chat.completions.create( model="gpt-4o-mini", messages=[{"role": "user", "content": opinion_prompt}], max_tokens=300, temperature=0.2 ) opinion_results = response.choices[0].message.content.strip() print(f"ā OPTIMIZED public opinion analysis completed") return opinion_results except Exception as e: print(f"ā Public opinion analysis error: {str(e)}") return "Overall Public Sentiment: Mixed\nDominant Opinion: General discussion\nKey Concerns: none, identified, quickly\nPopular Viewpoints: standard, response, analysis\nControversy Level: Low\nAudience Engagement: Medium" def create_video_info_display(video_info): """Create beautiful HTML display for video information""" try: title = video_info.get('title', 'N/A') channel = video_info.get('channel_name', 'N/A') views = video_info.get('view_count', 0) likes = video_info.get('like_count', 0) duration = video_info.get('duration', 'N/A') published = video_info.get('publish_date', 'N/A') video_id = video_info.get('video_id', 'N/A') # Format numbers views_formatted = f"{views:,}" if isinstance(views, int) else str(views) likes_formatted = f"{likes:,}" if isinstance(likes, int) else str(likes) video_info_html = f"""
Unable to load video information: {str(e)}