""" app.py ------ Aplikasi berbasis web ABSA IndoBERT menggunakan Streamlit untuk analisis sentimen berbasis aspek dari kritik dan saran mahasiswa. UPDATED: Dengan Batch + Chunked Processing + Session-based Cache untuk multi-user UPDATED: Visualisasi dinamis yang menyesuaikan dengan kolom yang tersedia """ import os import time import gc import uuid import shutil from io import BytesIO from pathlib import Path import pandas as pd import streamlit as st import torch import torch.nn.functional as F from torch.utils.data import DataLoader from config import CONFIG, ASPEK_COLUMNS from model_utils import load_model_and_tokenizer, ABSADataset from visualization import ( show_sentiment_bar_chart, show_sentiment_pie_chart, show_year_distribution, show_semester_distribution, show_prodi_distribution, show_top10_matkul_distribution, show_sentiment_by_year, show_sentiment_by_semester, show_sentiment_by_prodi, show_sentiment_by_top10_matkul, ) from preprocessing import text_preprocessing_pipeline # Konfigurasi untuk chunked processing (membagi data besar menjadi bagian kecil) CHUNK_SIZE = 2500 ENABLE_CHUNKED = True CACHE_EXPIRY_HOURS = 24 # Buat direktori untuk menyimpan cache file os.makedirs("chache_file", exist_ok=True) os.makedirs("chache_file/sessions", exist_ok=True) # Konfigurasi halaman st.set_page_config( page_title="ABSA IndoBERT", layout="wide", page_icon="💬" ) # Load custom CSS with open(os.path.join("assets", "style.css"), encoding="utf-8") as f: st.markdown(f"", unsafe_allow_html=True) st.markdown('', unsafe_allow_html=True) def get_session_id(): """Generate atau ambil session ID untuk user - tetap ada meski refresh halaman""" query_params = st.query_params # Cek apakah session ID sudah ada di URL parameter if "sid" in query_params: sid = query_params["sid"] st.session_state.session_id = sid return sid # Jika belum ada, buat session ID baru if "session_id" not in st.session_state: new_session_id = str(uuid.uuid4()) st.session_state.session_id = new_session_id st.query_params["sid"] = new_session_id return new_session_id # Jika sudah ada di session state, gunakan yang existing existing_id = st.session_state.session_id st.query_params["sid"] = existing_id return existing_id def get_session_cache_dir(): """Dapatkan direktori cache khusus untuk session ini""" sid = get_session_id() cache_dir = Path(f"chache_file/sessions/{sid}") cache_dir.mkdir(parents=True, exist_ok=True) return cache_dir def get_session_chunks_dir(): """Dapatkan direktori chunks khusus untuk session ini""" chunks_dir = get_session_cache_dir() / "chunks" chunks_dir.mkdir(parents=True, exist_ok=True) return chunks_dir def cleanup_old_sessions(): """Hapus cache session yang sudah expired (lebih dari 24 jam)""" sessions_dir = Path("chache_file/sessions") if not sessions_dir.exists(): return current_time = time.time() for session_dir in sessions_dir.iterdir(): if session_dir.is_dir(): mod_time = session_dir.stat().st_mtime age_hours = (current_time - mod_time) / 3600 # Hapus jika sudah lebih dari CACHE_EXPIRY_HOURS if age_hours > CACHE_EXPIRY_HOURS: try: shutil.rmtree(session_dir) print(f"Deleted expired session: {session_dir.name}") except OSError as e: print(f"Error deleting session {session_dir.name}: {e}") # Jalankan cleanup saat aplikasi dimulai cleanup_old_sessions() @st.cache_resource(show_spinner=False) def get_model_resources(): """Memuat model dan tokenizer IndoBERT (di-cache agar tidak reload terus)""" return load_model_and_tokenizer() # Load model dan tokenizer dengan spinner with st.spinner("Sedang memuat model IndoBERT dan tokenizer... Harap tunggu sebentar!"): model, tokenizer, le, device = get_model_resources() # Tampilkan notifikasi sukses sementara success_placeholder = st.empty() success_placeholder.success("Model dan tokenizer berhasil dimuat!") time.sleep(1) success_placeholder.empty() def convert_df_to_excel(df): """Mengubah DataFrame menjadi file Excel dalam bentuk byte stream untuk download""" output = BytesIO() with pd.ExcelWriter(output, engine="openpyxl") as writer: df.to_excel(writer, index=False) return output.getvalue() def clear_memory(): """Bersihkan memory cache untuk optimasi performa""" gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() def process_chunk_batch(chunk_dataframe, chunk_num, total_chunk_count, progress_bar, status_text): """ Memproses satu chunk data dengan batch processing. Progress bar menunjukkan: Preprocessing 0-100%, lalu Predicting 0-100% """ # STEP 1: Preprocessing teks (0-100%) cleaned_text_list = [] total_rows = len(chunk_dataframe) for idx, raw_text in enumerate(chunk_dataframe["kritik_saran"]): clean_text = text_preprocessing_pipeline(str(raw_text)) cleaned_text_list.append(clean_text) # Update progress bar setiap 50 baris if idx % 50 == 0 or idx == total_rows - 1: progress = (idx + 1) / total_rows progress_bar.progress(progress) status_text.text( f"Chunk {chunk_num}/{total_chunk_count} | Preprocessing: {idx+1}/{total_rows} rows") # Reset progress bar untuk tahap prediksi progress_bar.progress(0) status_text.text( f"Chunk {chunk_num}/{total_chunk_count} | Memulai prediksi...") time.sleep(0.2) # STEP 2: Batch Prediction dengan model (0-100%) batch_sz = CONFIG.get("batch_size", 32) num_sents = len(cleaned_text_list) num_asps = len(ASPEK_COLUMNS) # Siapkan dataset dan dataloader ds = ABSADataset(cleaned_text_list, ASPEK_COLUMNS, tokenizer, CONFIG["max_len"]) dl = DataLoader( ds, batch_size=batch_sz, shuffle=False, num_workers=0 ) # Matrix untuk menyimpan hasil prediksi predictions_matrix = [[None] * num_asps for _ in range(num_sents)] batch_counter = 0 total_batch_count = len(dl) # Proses prediksi batch demi batch model.eval() with torch.no_grad(): for batch_data in dl: inp_ids = batch_data['input_ids'].to(device) attn_mask = batch_data['attention_mask'].to(device) sent_idxs = batch_data['sent_idx'].numpy() asp_idxs = batch_data['aspect_idx'].numpy() # Prediksi dan konversi ke label model_outputs = model(inp_ids, attn_mask) probabilities = F.softmax(model_outputs, dim=1) predicted_indices = torch.argmax( probabilities, dim=1).cpu().numpy() pred_labels = le.inverse_transform(predicted_indices) # Simpan hasil prediksi ke matrix for s_idx, a_idx, lbl in zip(sent_idxs, asp_idxs, pred_labels): predictions_matrix[s_idx][a_idx] = lbl # Update progress bar batch_counter += 1 progress = batch_counter / total_batch_count progress_bar.progress(progress) status_text.text( f"Chunk {chunk_num}/{total_chunk_count} | Predicting: {batch_counter}/{total_batch_count} batches") # STEP 3: Gabungkan hasil prediksi dengan data asli result_list = [] for idx, (_, data_row) in enumerate(chunk_dataframe.iterrows()): row_dict = data_row.to_dict() row_dict["kritik_saran"] = cleaned_text_list[idx] for asp_idx, asp_name in enumerate(ASPEK_COLUMNS): row_dict[asp_name] = predictions_matrix[idx][asp_idx] result_list.append(row_dict) result_dataframe = pd.DataFrame(result_list) # Simpan hasil chunk ke file CSV chunks_directory = get_session_chunks_dir() chunk_filepath = chunks_directory / f"chunk_{chunk_num}.csv" result_dataframe.to_csv(chunk_filepath, index=False) # Progress selesai progress_bar.progress(1.0) status_text.text(f"Chunk {chunk_num}/{total_chunk_count} | Selesai!") clear_memory() return result_dataframe def get_available_columns(df): """Deteksi kolom-kolom yang tersedia dalam dataframe untuk filter dan visualisasi dinamis""" available = { 'has_tahun': 'tahun' in df.columns or 'tanggal' in df.columns, 'has_semester': 'semester' in df.columns, 'has_prodi': 'nama_prodi' in df.columns, 'has_matkul': 'nama_matakuliah' in df.columns and 'kode_matakuliah' in df.columns, } return available # Judul aplikasi st.markdown("""

ABSA IndoBERT

Analisis otomatis kritik dan saran berdasarkan aspek tertentu dan sentimen dengan model IndoBERT.

""", unsafe_allow_html=True) st.markdown(" ") st.markdown(" ") st.markdown(" ") # Panduan pengunaan aplikasi steps = [ {"icon": "bi bi-cloud-arrow-up", "title": "1. Upload File Excel", "description": "Siapkan dan upload file Excel kritik dan saran yang wajib memiliki kolom `kritik_saran`."}, {"icon": "bi bi-hourglass-split", "title": "2. Proses Data Otomatis", "description": "Website akan melakukan preprocessing dan menjalankan model prediksi sentimen."}, {"icon": "bi bi-bar-chart", "title": "3. Analisis & Visualisasi", "description": "Lihat hasil lengkap berupa tabel data terprediksi, metrik, dan visualisasi sentimen."}, {"icon": "bi bi-cloud-arrow-down", "title": "4. Unduh Hasil Analisis", "description": "Unduh hasil analisis lengkap Anda dalam format file Excel untuk laporan lebih lanjut."} ] cols = st.columns(len(steps)) for i, step in enumerate(steps): with cols[i]: st.markdown(f"""
{step['title']}

{step['description']}

""", unsafe_allow_html=True) st.markdown("") st.markdown("") # Upload file Excel uploaded_file = st.file_uploader( " Upload Data Kritik & Saran", type=["xlsx"], help="File maksimal 200MB dengan format .xlsx" ) # Tombol untuk hapus cache - KHUSUS PER SESSION session_cache_dir = get_session_cache_dir() session_result_file = session_cache_dir / "temp_predicted.csv" session_chunks_dir = get_session_chunks_dir() # Tombol hapus cache data hasil prediksi if session_result_file.exists(): if st.button("Hapus Cache Data"): session_result_file.unlink() st.success("Cache Data dihapus, silahkan refresh!") time.sleep(1) st.rerun() # Tombol hapus cache chunks if session_chunks_dir.exists(): chunk_files = list(session_chunks_dir.glob("*.csv")) if chunk_files: if st.button(f"Hapus {len(chunk_files)} Chunk Cache", key="delete_chunks"): for chunk_f in chunk_files: chunk_f.unlink() st.success( f"{len(chunk_files)} Chunk cache berhasil dihapus, silahkan refresh!") time.sleep(1) st.rerun() # Tampilkan info file yang di-cache jika ada if session_result_file.exists() or (session_chunks_dir.exists() and list(session_chunks_dir.glob("*.csv"))): if not uploaded_file: metadata_file = session_cache_dir / "metadata.txt" cached_filename = None if metadata_file.exists(): try: with open(metadata_file, "r", encoding="utf-8") as f: cached_filename = f.read().strip() except Exception: if "uploaded_filename" in st.session_state: cached_filename = st.session_state.uploaded_filename if cached_filename and cached_filename != "": st.caption(f"File Sebelumnya: {cached_filename}") else: st.caption("Cache dari upload sebelumnya") else: st.caption(" ") # Inisialisasi session state untuk menyimpan hasil prediksi if "df_predicted" not in st.session_state: st.session_state.df_predicted = None # Load dari cache jika tersedia if st.session_state.df_predicted is None and session_result_file.exists(): try: df_cached = pd.read_csv(session_result_file) if "tahun" in df_cached.columns: df_cached["tahun"] = pd.to_numeric( df_cached["tahun"], errors='coerce').astype('Int64') st.session_state.df_predicted = df_cached st.info("Loaded from your session cache!") except (pd.errors.EmptyDataError, FileNotFoundError) as e: st.warning(f"Gagal memuat cache: {e}") # Proses file yang di-upload if uploaded_file: file_bytes = uploaded_file.getvalue() # Cek apakah ini file baru atau file yang sama if "last_uploaded_file" not in st.session_state or st.session_state.last_uploaded_file != file_bytes: st.session_state.last_uploaded_file = file_bytes st.session_state.uploaded_filename = uploaded_file.name try: df_uploaded = pd.read_excel(BytesIO(file_bytes)) # Konversi kolom tahun jika ada if "tahun" in df_uploaded.columns: df_uploaded["tahun"] = pd.to_numeric( df_uploaded["tahun"], errors='coerce').astype('Int64') except ValueError as err: st.error(f"Gagal membaca file: {err}") else: # Validasi kolom kritik_saran wajib ada if "kritik_saran" not in df_uploaded.columns: st.error("Kolom 'kritik_saran' tidak ditemukan.") else: # Hapus duplikasi berdasarkan kolom kritik_saran df_uploaded = df_uploaded.drop_duplicates( subset=["kritik_saran"]) # Tambahkan kolom aspek jika belum ada for aspect_col in ASPEK_COLUMNS: if aspect_col not in df_uploaded.columns: df_uploaded[aspect_col] = None st.markdown("### Preprocessing dan Prediksi") total_rows = len(df_uploaded) # Tentukan apakah menggunakan chunked processing atau tidak use_chunked = ENABLE_CHUNKED and total_rows > CHUNK_SIZE if use_chunked: # MODE CHUNKED PROCESSING untuk dataset besar num_chunks = (total_rows + CHUNK_SIZE - 1) // CHUNK_SIZE info_col1, info_col2, info_col3 = st.columns(3) with info_col1: st.info(f"**Total data:** {total_rows:,} rows") with info_col2: st.warning( f"**Mode:** Chunked Processing ({CHUNK_SIZE:,} rows/chunk)") with info_col3: st.info(f"**Total chunks:** {num_chunks}") start_time = time.time() all_chunk_results = [] chunk_progress_bar = st.progress(0) chunk_status_text = st.empty() overall_status = st.empty() # Proses setiap chunk for start_idx in range(0, total_rows, CHUNK_SIZE): current_chunk_number = (start_idx // CHUNK_SIZE) + 1 current_chunk_df = df_uploaded.iloc[start_idx:start_idx+CHUNK_SIZE].copy( ) current_chunk_file = session_chunks_dir / \ f"chunk_{current_chunk_number}.csv" # Cek apakah chunk sudah pernah diproses (ada di cache) if current_chunk_file.exists(): chunk_result = pd.read_csv(current_chunk_file) all_chunk_results.append(chunk_result) processed = min(start_idx + CHUNK_SIZE, total_rows) progress_pct = (processed / total_rows) * 100 chunk_progress_bar.progress(1.0) chunk_status_text.text( f"Chunk {current_chunk_number}/{num_chunks} | Loaded from cache" ) overall_status.success( f"✅ Chunk {current_chunk_number}/{num_chunks} loaded from cache | " f"Progress: {processed:,}/{total_rows:,} ({progress_pct:.1f}%)" ) time.sleep(0.3) continue # Proses chunk baru chunk_progress_bar.progress(0) chunk_result = process_chunk_batch( current_chunk_df, current_chunk_number, num_chunks, chunk_progress_bar, chunk_status_text ) all_chunk_results.append(chunk_result) # Hitung estimasi waktu tersisa processed = min(start_idx + CHUNK_SIZE, total_rows) progress_pct = (processed / total_rows) * 100 elapsed = time.time() - start_time est_total = (elapsed / processed) * total_rows est_remaining = est_total - elapsed overall_status.success( f"✅ Chunk {current_chunk_number}/{num_chunks} selesai | " f"Progress: {processed:,}/{total_rows:,} ({progress_pct:.1f}%) | " f"Elapsed: {elapsed:.0f}s | ETA: {est_remaining:.0f}s" ) time.sleep(0.3) # Gabungkan semua hasil chunk chunk_status_text.empty() overall_status.info("🔄 Menggabungkan semua chunks...") df_session = pd.concat( all_chunk_results, ignore_index=True) overall_status.empty() end_time = time.time() duration = end_time - start_time else: # MODE BATCH PROCESSING untuk dataset kecil st.info( f"**Total data:** {total_rows:,} rows | **Mode:** Batch Processing") start_time = time.time() progress_bar = st.progress(0) status_text = st.empty() # STEP 1: Preprocessing cleaned_text_list = [] total_preprocessing = len(df_uploaded) for idx, raw_text in enumerate(df_uploaded["kritik_saran"]): clean_text = text_preprocessing_pipeline(str(raw_text)) cleaned_text_list.append(clean_text) if idx % 50 == 0 or idx == total_preprocessing - 1: progress = (idx + 1) / total_preprocessing progress_bar.progress(progress) status_text.text( f"Preprocessing: {idx+1}/{total_preprocessing} rows") # STEP 2: Prediksi progress_bar.progress(0) status_text.text("Memulai prediksi...") time.sleep(0.3) batch_sz = CONFIG.get("batch_size", 32) num_sents = len(cleaned_text_list) num_asps = len(ASPEK_COLUMNS) ds = ABSADataset( cleaned_text_list, ASPEK_COLUMNS, tokenizer, CONFIG["max_len"]) dl = DataLoader( ds, batch_size=batch_sz, shuffle=False, num_workers=0) predictions_matrix = [ [None] * num_asps for _ in range(num_sents)] batch_counter = 0 total_batch_count = len(dl) model.eval() with torch.no_grad(): for batch_data in dl: inp_ids = batch_data['input_ids'].to(device) attn_mask = batch_data['attention_mask'].to(device) sent_idxs = batch_data['sent_idx'].numpy() asp_idxs = batch_data['aspect_idx'].numpy() model_outputs = model(inp_ids, attn_mask) probabilities = F.softmax(model_outputs, dim=1) predicted_indices = torch.argmax( probabilities, dim=1).cpu().numpy() pred_labels = le.inverse_transform( predicted_indices) for s_idx, a_idx, lbl in zip(sent_idxs, asp_idxs, pred_labels): predictions_matrix[s_idx][a_idx] = lbl batch_counter += 1 progress = batch_counter / total_batch_count progress_bar.progress(progress) status_text.text( f"Predicting: {batch_counter}/{total_batch_count} batches") # STEP 3: Gabungkan hasil result_list = [] for idx, (_, data_row) in enumerate(df_uploaded.iterrows()): row_dict = data_row.to_dict() row_dict["kritik_saran"] = cleaned_text_list[idx] for asp_idx, asp_name in enumerate(ASPEK_COLUMNS): row_dict[asp_name] = predictions_matrix[idx][asp_idx] result_list.append(row_dict) df_session = pd.DataFrame(result_list) progress_bar.progress(1.0) status_text.text("Selesai!") time.sleep(0.5) progress_bar.empty() status_text.empty() end_time = time.time() duration = end_time - start_time # Simpan hasil ke session state dan cache file st.session_state.df_predicted = df_session df_session.to_csv(session_result_file, index=False) # Simpan metadata nama file metadata_file = session_cache_dir / "metadata.txt" with open(metadata_file, "w", encoding="utf-8") as f: f.write(uploaded_file.name) # Hitung performa processing total_items = total_rows * len(ASPEK_COLUMNS) items_per_second = total_items / duration if duration > 0 else 0 # Tampilkan ringkasan hasil processing if use_chunked: st.success( f"✅ **Chunked + Batch Processing selesai!**\n\n" f"- **{total_rows:,}** ulasan diproses\n" f"- **{len(ASPEK_COLUMNS)}** aspek per ulasan\n" f"- **{total_items:,}** total prediksi\n" f"- Diproses dalam **{num_chunks}** chunk\n" f"- Waktu: **{duration:.2f}** detik (~{items_per_second:.1f} prediksi/detik)\n" f"- Optimized untuk dataset besar!" ) else: st.success( f"✅ **Batch Processing selesai!**\n\n" f"- **{total_rows:,}** ulasan diproses\n" f"- **{len(ASPEK_COLUMNS)}** aspek per ulasan\n" f"- **{total_items:,}** total prediksi\n" f"- Waktu: **{duration:.2f}** detik (~{items_per_second:.1f} prediksi/detik)" ) # Tampilan hasil prediksi dan visualisasi if st.session_state.df_predicted is not None: df_predicted = st.session_state.df_predicted # Deteksi kolom yang tersedia untuk filter dinamis available_cols = get_available_columns(df_predicted) # Sidebar filter dengan pengecekan kolom dinamis st.sidebar.header("Filter Data") df_clean = df_predicted.copy() # Cek apakah ada filter yang tersedia has_any_filter = any(available_cols.values()) if not has_any_filter: st.sidebar.info( "Tidak ada kolom yang dapat difilter. Pastikan file memiliki kolom seperti: nama_matakuliah, nama_prodi, tahun/tanggal, atau semester.") # Filter Mata Kuliah (jika kolom tersedia) selected_matkul = [] if available_cols['has_matkul']: matkul_options = sorted( [x for x in df_clean["nama_matakuliah"].dropna().unique() if x]) if matkul_options: selected_matkul = st.sidebar.multiselect( "Nama Mata Kuliah", matkul_options, default=matkul_options) # Filter Program Studi (jika kolom tersedia) selected_prodi = [] if available_cols['has_prodi']: prodi_options = sorted( [x for x in df_clean["nama_prodi"].dropna().unique() if x]) if prodi_options: selected_prodi = st.sidebar.multiselect( "Program Studi", prodi_options, default=prodi_options) # Filter Tahun (jika kolom tersedia) selected_tahun = [] if available_cols['has_tahun']: if 'tanggal' in df_clean.columns and 'tahun' not in df_clean.columns: df_clean['tahun'] = pd.to_datetime( df_clean['tanggal'], errors='coerce').dt.year if 'tahun' in df_clean.columns: tahun_options = sorted( [x for x in df_clean["tahun"].dropna().unique() if pd.notna(x)]) if tahun_options: selected_tahun = st.sidebar.multiselect( "Tahun", tahun_options, default=tahun_options) # Filter Semester (jika kolom tersedia) selected_semester = [] if available_cols['has_semester']: semester_options = sorted( [x for x in df_clean["semester"].dropna().unique() if pd.notna(x)]) if semester_options: selected_semester = st.sidebar.multiselect( "Semester", semester_options, default=semester_options) # Terapkan semua filter yang dipilih df_filtered = df_clean.copy() if selected_matkul and available_cols['has_matkul']: df_filtered = df_filtered[df_filtered["nama_matakuliah"].isin( selected_matkul)] if selected_prodi and available_cols['has_prodi']: df_filtered = df_filtered[df_filtered["nama_prodi"].isin( selected_prodi)] if selected_tahun and available_cols['has_tahun']: df_filtered = df_filtered[df_filtered["tahun"].isin(selected_tahun)] if selected_semester and available_cols['has_semester']: df_filtered = df_filtered[df_filtered["semester"].isin( selected_semester)] # Tampilkan tabel hasil prediksi st.markdown("### Tabel Data Hasil Prediksi") st.dataframe(df_filtered, width='stretch') # Tombol download untuk data terfilter dan semua data col_dl1, col_dl2 = st.columns(2) with col_dl1: st.download_button( label="Unduh Data Terfilter", data=convert_df_to_excel(df_filtered), file_name="hasil_prediksi_absa_filtered.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", use_container_width=True ) with col_dl2: st.download_button( label="Unduh Semua Data", data=convert_df_to_excel(df_predicted), file_name="hasil_prediksi_absa_all.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", use_container_width=True ) st.info( f"Menampilkan {len(df_filtered):,} dari {len(df_predicted):,} data ulasan setelah difilter." ) # Ringkasan Cepat st.markdown("") st.markdown("### Ringkasan Cepat") st.markdown("") # Hitung total sentimen dari semua aspek total_pos = (df_filtered[ASPEK_COLUMNS] == "positif").sum().sum() total_net = (df_filtered[ASPEK_COLUMNS] == "netral").sum().sum() total_neg = (df_filtered[ASPEK_COLUMNS] == "negatif").sum().sum() # Tentukan kolom ringkasan berdasarkan data yang tersedia summary_cols = [] # Kolom dasar (selalu ada) summary_cols.extend(['ulasan', 'aspek']) # Kolom opsional berdasarkan ketersediaan data if available_cols['has_matkul']: summary_cols.append('matkul') if available_cols['has_prodi']: summary_cols.append('prodi') if available_cols['has_semester']: summary_cols.append('semester') # Buat kolom dinamis untuk menampilkan metrik num_cols = len(summary_cols) cols = st.columns(num_cols) col_idx = 0 # Metrik dasar: Jumlah Ulasan & Aspek cols[col_idx].metric("Jumlah Ulasan", f"{len(df_filtered):,}") col_idx += 1 cols[col_idx].metric("Jumlah Aspek", len(ASPEK_COLUMNS)) col_idx += 1 # Metrik Mata Kuliah (jika tersedia) if available_cols['has_matkul']: matkul_count = df_filtered['nama_matakuliah'].nunique() cols[col_idx].metric("Jumlah Mata Kuliah", f"{matkul_count:,}") col_idx += 1 # Metrik Prodi (jika tersedia) if available_cols['has_prodi']: prodi_count = df_filtered['nama_prodi'].nunique() cols[col_idx].metric("Jumlah Prodi", f"{prodi_count:,}") col_idx += 1 # Metrik Semester (jika tersedia) if available_cols['has_semester']: semester_count = df_filtered['semester'].nunique() cols[col_idx].metric("Jumlah Semester", f"{semester_count:,}") col_idx += 1 st.markdown("") # Baris kedua: Metrik Sentimen dan info tambahan summary_cols2 = ['positif', 'netral', 'negatif'] if available_cols['has_tahun']: summary_cols2.append('tahun') if 'kritik_saran' in df_filtered.columns: summary_cols2.append('kata') cols2 = st.columns(len(summary_cols2)) col_idx2 = 0 # Metrik untuk masing-masing jenis sentimen cols2[col_idx2].metric("Sentimen Positif", f"{total_pos:,}") col_idx2 += 1 cols2[col_idx2].metric("Sentimen Netral", f"{total_net:,}") col_idx2 += 1 cols2[col_idx2].metric("Sentimen Negatif", f"{total_neg:,}") col_idx2 += 1 # Metrik Rentang Tahun (jika tersedia) if available_cols['has_tahun']: if 'tahun' in df_filtered.columns: tahun_valid = df_filtered['tahun'].dropna() if len(tahun_valid) > 0: tahun_min = int(tahun_valid.min()) tahun_max = int(tahun_valid.max()) if tahun_min == tahun_max: cols2[col_idx2].metric("Tahun", f"{tahun_min}") else: cols2[col_idx2].metric( "Rentang Tahun", f"{tahun_min} - {tahun_max}") else: cols2[col_idx2].metric("Rentang Tahun", "N/A") else: cols2[col_idx2].metric("Rentang Tahun", "N/A") col_idx2 += 1 # Metrik Rata-rata Panjang Kata (jika tersedia) if 'kritik_saran' in df_filtered.columns and len(df_filtered) > 0: try: word_counts = df_filtered['kritik_saran'].astype( str).str.split().str.len() avg_word_count = round(word_counts.mean(), 1) cols2[col_idx2].metric( "Rata-rata Panjang Kata", f"{avg_word_count} kata") except Exception: cols2[col_idx2].metric("Rata-rata Panjang Kata", "N/A") st.markdown("---") st.markdown("### Visualisasi Data") # Visualisasi Sentimen (selalu ditampilkan) col1, col2 = st.columns(2) with col1: show_sentiment_bar_chart(df_filtered, ASPEK_COLUMNS) with col2: show_sentiment_pie_chart(df_filtered, ASPEK_COLUMNS) # Visualisasi distribusi berdasarkan kolom yang tersedia viz_shown = False # Visualisasi Tahun dan Semester (jika tersedia) if available_cols['has_tahun'] or available_cols['has_semester']: col1, col2 = st.columns(2) with col1: if available_cols['has_tahun']: result = show_year_distribution(df_filtered) if result: viz_shown = True with col2: if available_cols['has_semester']: result = show_semester_distribution(df_filtered) if result: viz_shown = True # Visualisasi Program Studi (jika tersedia) if available_cols['has_prodi']: st.markdown("---") result = show_prodi_distribution(df_filtered) if result: viz_shown = True # Visualisasi Top 10 Mata Kuliah (jika tersedia) if available_cols['has_matkul']: st.markdown("---") result = show_top10_matkul_distribution(df_filtered) if result: viz_shown = True # Visualisasi Sentimen per Tahun/Semester (jika tersedia) if available_cols['has_tahun'] or available_cols['has_semester']: st.markdown("---") col1, col2 = st.columns(2) with col1: if available_cols['has_tahun']: result = show_sentiment_by_year(df_filtered, ASPEK_COLUMNS) if result: viz_shown = True with col2: if available_cols['has_semester']: result = show_sentiment_by_semester(df_filtered, ASPEK_COLUMNS) if result: viz_shown = True # Visualisasi Sentimen per Program Studi (jika tersedia) if available_cols['has_prodi']: st.markdown("---") result = show_sentiment_by_prodi(df_filtered, ASPEK_COLUMNS) if result: viz_shown = True # Visualisasi Sentimen per Top 10 Mata Kuliah (jika tersedia) if available_cols['has_matkul']: st.markdown("---") result = show_sentiment_by_top10_matkul(df_filtered, ASPEK_COLUMNS) if result: viz_shown = True # Footer aplikasi st.caption(""" """, unsafe_allow_html=True)