Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import streamlit as st | |
| import googleapiclient.discovery | |
| import pandas as pd | |
| from transformers import pipeline | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| st.title('Анализатор комментариев :red[YouTube] :sunglasses:') | |
| # Инициализируем модель Hugging Face для анализа тональности текста | |
| # Кэшируем ресурс для одной загрузки модели на все сессии | |
| #@st.cache_resource | |
| def load_model(): | |
| """ | |
| Loads the 'blanchefort/rubert-base-cased-sentiment' model from HuggingFace | |
| and saves to cache for consecutive loads. | |
| """ | |
| model = pipeline( | |
| "sentiment-analysis", | |
| "blanchefort/rubert-base-cased-sentiment") | |
| return model | |
| def extract_video_id(url: str) -> str: | |
| """ | |
| Extracts the video ID from a YouTube video URL. | |
| Args: url (str): The YouTube video URL. | |
| Returns: str: The extracted video ID, | |
| or an empty string if the URL is not valid. | |
| """ | |
| pattern = r"(?<=v=)[\w-]+(?=&|\b)" | |
| match = re.search(pattern, url) | |
| if match: | |
| return match.group() | |
| else: | |
| return "" | |
| def download_comments(video_id: str) -> pd.DataFrame: | |
| """ | |
| Downloads comments from a YouTube video based on the provided video ID | |
| and returns them as a DataFrame. | |
| Args: video_id (str): The video ID of the YouTube video. | |
| Returns: DataFrame: A DataFrame containing the downloaded comments from the video. | |
| """ | |
| DEV_KEY = os.getenv('API_KEY_YOUTUBE') | |
| youtube = googleapiclient.discovery.build("youtube", | |
| "v3", | |
| developerKey=DEV_KEY) | |
| request = youtube.commentThreads().list(part="snippet", | |
| videoId=video_id, | |
| maxResults=100) | |
| response = request.execute() | |
| comments = [] | |
| for item in response['items']: | |
| comment = item['snippet']['topLevelComment']['snippet'] | |
| comments.append([comment['authorDisplayName'], | |
| comment['publishedAt'], | |
| comment['updatedAt'], | |
| comment['likeCount'], | |
| comment['textDisplay'],]) | |
| return pd.DataFrame(comments, | |
| columns=['author', | |
| 'published_at', | |
| 'updated_at', | |
| 'like_count', | |
| 'text',]) | |
| def analyze_emotions_in_comments(df: pd.DataFrame) -> tuple: | |
| """ | |
| Takes a DataFrame with comments, | |
| processes the emotional sentiment of each comment in the DataFrame | |
| Args: dataframe (pandas.DataFrame): DataFrame containing comments to analyze. | |
| Returns: tuple: containing the updated DataFrame with the added 'Emotional Sentiment' column | |
| and the total count of processed comments. | |
| """ | |
| model = load_model() | |
| selected_columns = ['text', 'author', 'published_at'] | |
| df = df[selected_columns] | |
| res_list = [] | |
| res_list = model(df['text'][:513].to_list()) | |
| full_df = pd.concat([pd.DataFrame(res_list), df], axis=1) | |
| return (full_df, len(res_list)) | |
| def plot_heatmap_from_dataframe(df: pd.DataFrame) -> plt: | |
| """ | |
| Visualizes the data from the input DataFrame and returns a matplotlib plot object. | |
| Args: df (DataFrame): The input DataFrame containing the data to be visualized. | |
| Returns: plt: A matplotlib plot object showing the visualization of the data. | |
| """ | |
| df['published_at'] = pd.to_datetime(df['published_at']) | |
| df['Date'] = df['published_at'].dt.date | |
| df['Hour'] = df['published_at'].dt.hour | |
| pivot_table = df.pivot_table(index='Hour', | |
| columns='Date', | |
| values='text', | |
| aggfunc='count') | |
| plt.figure(figsize=(10, 6)) | |
| sns.heatmap(pivot_table, | |
| cmap='YlGnBu') | |
| plt.title('Количество комментариев по часам и датам') | |
| plt.xlabel('Дата') | |
| plt.ylabel('Час') | |
| return plt | |
| def visualize_data(df: pd.DataFrame): | |
| """ | |
| Visualizes the data from the input DataFrame and returns a matplotlib figure object. | |
| Args: df (DataFrame): The input DataFrame containing the data to be visualized. | |
| Returns: fig: A matplotlib figure object | |
| """ | |
| data = df['label'].value_counts() | |
| fig, ax = plt.subplots() | |
| plt.title("Эмоциональная окраска комментариев на YouTube") | |
| label = data.index | |
| ax.pie(data, labels=label, autopct='%1.1f%%') | |
| return fig | |
| def change_url(): | |
| st.session_state.start = False | |
| if "start" not in st.session_state: | |
| st.session_state.start = False | |
| # Получаем id видеоролика из URL для отправки запроса | |
| url = st.text_input(label="Enter URL from YouTube", on_change=change_url) | |
| video_id = extract_video_id(url) | |
| if video_id != "": | |
| if btn_start := st.button('Загрузить комментарии'): | |
| st.session_state.start = True | |
| if st.session_state.start: | |
| # Выводим таблицу с результатами на странице | |
| comments_df = download_comments(video_id) | |
| with st.spinner('Analyzing comments...'): | |
| full_df, num_comments = analyze_emotions_in_comments(comments_df) | |
| st.success(f'Готово! Обработано {num_comments} комментариев.') | |
| st.write(full_df) | |
| st.markdown('***') | |
| # Выводим heatmap комментариев по часам и датам | |
| st.pyplot(plot_heatmap_from_dataframe(full_df)) | |
| st.markdown('***') | |
| # Выводим круговую диаграмму | |
| st.pyplot(visualize_data(full_df)) | |