Spaces:
Sleeping
Sleeping
Commit
·
43aec53
0
Parent(s):
first commit
Browse files- .gitignore +24 -0
- README.md +2 -0
- app.py +353 -0
- assets/style.css +46 -0
- clean_reqs.py +47 -0
- config.py +14 -0
- model_utils.py +118 -0
- preprocessing.py +111 -0
- requirements.txt +9 -0
- visualization.py +331 -0
.gitignore
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python cache
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
|
| 7 |
+
# Streamlit cache
|
| 8 |
+
.streamlit/
|
| 9 |
+
.cache/
|
| 10 |
+
chache_file/
|
| 11 |
+
|
| 12 |
+
# Model files
|
| 13 |
+
assets/model/
|
| 14 |
+
*.pth
|
| 15 |
+
*.pt
|
| 16 |
+
*.joblib
|
| 17 |
+
*.bin
|
| 18 |
+
*.safetensors
|
| 19 |
+
|
| 20 |
+
# Jupyter Notebook checkpoints
|
| 21 |
+
.ipynb_checkpoints/
|
| 22 |
+
|
| 23 |
+
# Logs
|
| 24 |
+
*.log
|
README.md
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# absa-indobert-web
|
| 2 |
+
Analisis otomatis kritik dan saran berdasarkan aspek tertentu dan sentimen dengan model IndoBERT.
|
app.py
ADDED
|
@@ -0,0 +1,353 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
app.py
|
| 3 |
+
------
|
| 4 |
+
Aplikasi berbasis web ABSA IndoBERT menggunakan Streamlit untuk analisis sentimen
|
| 5 |
+
berbasis aspek dari kritik dan saran mahasiswa.
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
import time
|
| 9 |
+
from io import BytesIO
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import streamlit as st
|
| 12 |
+
from config import CONFIG, ASPEK_COLUMNS
|
| 13 |
+
from model_utils import load_model_and_tokenizer, predict_multi_aspect
|
| 14 |
+
from visualization import (
|
| 15 |
+
show_sentiment_bar_chart,
|
| 16 |
+
show_sentiment_pie_chart,
|
| 17 |
+
show_year_distribution,
|
| 18 |
+
show_semester_distribution,
|
| 19 |
+
show_prodi_distribution,
|
| 20 |
+
show_top10_matkul_distribution,
|
| 21 |
+
show_sentiment_by_year,
|
| 22 |
+
show_sentiment_by_semester,
|
| 23 |
+
show_sentiment_by_prodi,
|
| 24 |
+
show_sentiment_by_top10_matkul,
|
| 25 |
+
show_sentiment_stacked_percentage,
|
| 26 |
+
)
|
| 27 |
+
from preprocessing import text_preprocessing_pipeline
|
| 28 |
+
|
| 29 |
+
os.makedirs("chache_file", exist_ok=True)
|
| 30 |
+
# # 🔒 Opsional: Bersihkan cache hasil prediksi saat aplikasi dimulai ulang
|
| 31 |
+
# if os.path.exists("chache_file/temp_predicted.csv"):
|
| 32 |
+
# os.remove("chache_file/temp_predicted.csv")
|
| 33 |
+
|
| 34 |
+
# Konfigurasi halaman
|
| 35 |
+
st.set_page_config(
|
| 36 |
+
page_title="ABSA IndoBERT",
|
| 37 |
+
layout="wide",
|
| 38 |
+
page_icon="💬"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# Load custom CSS
|
| 42 |
+
with open(os.path.join("assets", "style.css"), encoding="utf-8") as f:
|
| 43 |
+
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# Fungsi cache untuk loading model
|
| 47 |
+
@st.cache_resource(show_spinner=False)
|
| 48 |
+
def get_model_resources():
|
| 49 |
+
"""Memuat model dan tokenizer IndoBERT."""
|
| 50 |
+
return load_model_and_tokenizer()
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# Spinner loading model
|
| 54 |
+
with st.spinner("Sedang memuat model IndoBERT dan tokenizer... Harap tunggu sebentar!"):
|
| 55 |
+
model, tokenizer, le, device = get_model_resources()
|
| 56 |
+
|
| 57 |
+
success_placeholder = st.empty()
|
| 58 |
+
success_placeholder.success("✅ Model dan tokenizer berhasil dimuat!")
|
| 59 |
+
time.sleep(1)
|
| 60 |
+
success_placeholder.empty()
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def convert_df_to_excel(df):
|
| 64 |
+
"""Mengubah DataFrame menjadi file Excel dalam bentuk byte stream."""
|
| 65 |
+
output = BytesIO()
|
| 66 |
+
with pd.ExcelWriter(output, engine="openpyxl") as writer:
|
| 67 |
+
df.to_excel(writer, index=False)
|
| 68 |
+
return output.getvalue()
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# Judul aplikasi
|
| 72 |
+
st.markdown("""
|
| 73 |
+
<h1 class='title-center'>💬 ABSA IndoBERT</h1>
|
| 74 |
+
<p style='text-align: center; font-size: 16px;'>
|
| 75 |
+
Analisis otomatis kritik dan saran berdasarkan aspek tertentu dan sentimen dengan model IndoBERT.
|
| 76 |
+
</p>
|
| 77 |
+
""", unsafe_allow_html=True)
|
| 78 |
+
|
| 79 |
+
# Upload file
|
| 80 |
+
st.markdown("## 🧾 Unggah File Data")
|
| 81 |
+
col1, col2 = st.columns([5, 1])
|
| 82 |
+
with col1:
|
| 83 |
+
uploaded_file = st.file_uploader(" ", type=["xlsx"])
|
| 84 |
+
|
| 85 |
+
if os.path.exists("chache_file/temp_predicted.csv"):
|
| 86 |
+
if st.button("🧹 Hapus Cache Prediksi"):
|
| 87 |
+
os.remove("chache_file/temp_predicted.csv")
|
| 88 |
+
st.markdown("""
|
| 89 |
+
<span style="color: green;">✅ Cache berhasil dihapus. Silahkan Refresh!</span>
|
| 90 |
+
<script>
|
| 91 |
+
setTimeout(function() {
|
| 92 |
+
window.location.reload();
|
| 93 |
+
}, 100);
|
| 94 |
+
</script>
|
| 95 |
+
""", unsafe_allow_html=True)
|
| 96 |
+
|
| 97 |
+
with col2:
|
| 98 |
+
st.markdown("")
|
| 99 |
+
st.markdown("")
|
| 100 |
+
with st.expander("📘 Petunjuk Penggunaan"):
|
| 101 |
+
st.markdown("""
|
| 102 |
+
- Unggah file `.xlsx` dengan kolom `kritik_saran`
|
| 103 |
+
- Sistem akan melakukan _preprocessing_ dan prediksi otomatis
|
| 104 |
+
- Hasil akhir dapat diunduh dalam bentuk file Excel dan divisualisasikan
|
| 105 |
+
""")
|
| 106 |
+
|
| 107 |
+
# Inisialisasi session state
|
| 108 |
+
if "df_predicted" not in st.session_state:
|
| 109 |
+
st.session_state.df_predicted = None
|
| 110 |
+
|
| 111 |
+
# ✅ Jika belum ada df_predicted tapi ada file cache, load dari file
|
| 112 |
+
if st.session_state.df_predicted is None and os.path.exists("chache_file/temp_predicted.csv"):
|
| 113 |
+
try:
|
| 114 |
+
df_cached = pd.read_csv("chache_file/temp_predicted.csv")
|
| 115 |
+
# Konversi kolom tahun ke integer jika ada
|
| 116 |
+
if "tahun" in df_cached.columns:
|
| 117 |
+
df_cached["tahun"] = pd.to_numeric(
|
| 118 |
+
df_cached["tahun"], errors='coerce').astype('Int64')
|
| 119 |
+
st.session_state.df_predicted = df_cached
|
| 120 |
+
except (pd.errors.ParserError, FileNotFoundError) as e:
|
| 121 |
+
st.warning(f"Gagal memuat cache: {e}")
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
# Proses prediksi
|
| 125 |
+
if uploaded_file:
|
| 126 |
+
file_bytes = uploaded_file.getvalue()
|
| 127 |
+
if "last_uploaded_file" not in st.session_state or st.session_state.last_uploaded_file != file_bytes:
|
| 128 |
+
st.session_state.last_uploaded_file = file_bytes
|
| 129 |
+
try:
|
| 130 |
+
df_uploaded = pd.read_excel(BytesIO(file_bytes))
|
| 131 |
+
|
| 132 |
+
# Konversi kolom tahun ke integer jika ada
|
| 133 |
+
if "tahun" in df_uploaded.columns:
|
| 134 |
+
df_uploaded["tahun"] = pd.to_numeric(
|
| 135 |
+
df_uploaded["tahun"], errors='coerce').astype('Int64')
|
| 136 |
+
|
| 137 |
+
except ValueError as err:
|
| 138 |
+
st.error(f"❌ Gagal membaca file: {err}")
|
| 139 |
+
else:
|
| 140 |
+
if "kritik_saran" not in df_uploaded.columns:
|
| 141 |
+
st.error("❌ Kolom 'kritik_saran' tidak ditemukan.")
|
| 142 |
+
else:
|
| 143 |
+
df_uploaded = df_uploaded.drop_duplicates(
|
| 144 |
+
subset=["kritik_saran"])
|
| 145 |
+
for aspek in ASPEK_COLUMNS:
|
| 146 |
+
if aspek not in df_uploaded.columns:
|
| 147 |
+
df_uploaded[aspek] = None
|
| 148 |
+
|
| 149 |
+
st.markdown("## ⚙️ Preprocessing dan Prediksi")
|
| 150 |
+
progress = st.progress(1, text="Menyiapkan...")
|
| 151 |
+
|
| 152 |
+
pred_results = []
|
| 153 |
+
with st.spinner("Sedang memproses data..."):
|
| 154 |
+
for i, (_, row) in enumerate(df_uploaded.iterrows()):
|
| 155 |
+
cleaned_text = text_preprocessing_pipeline(
|
| 156 |
+
str(row["kritik_saran"]))
|
| 157 |
+
hasil = predict_multi_aspect(
|
| 158 |
+
model, tokenizer, cleaned_text, ASPEK_COLUMNS, le, device, CONFIG[
|
| 159 |
+
"max_len"]
|
| 160 |
+
)
|
| 161 |
+
result_row = row.to_dict()
|
| 162 |
+
result_row["kritik_saran"] = cleaned_text
|
| 163 |
+
result_row.update(hasil)
|
| 164 |
+
pred_results.append(result_row)
|
| 165 |
+
progress.progress((i + 1) / len(df_uploaded),
|
| 166 |
+
text=f"Memproses baris ke-{i + 1} dari {len(df_uploaded)}")
|
| 167 |
+
# ✅ Simpan ke session dan file sementara
|
| 168 |
+
df_session = pd.DataFrame(pred_results)
|
| 169 |
+
st.session_state.df_predicted = df_session
|
| 170 |
+
df_session.to_csv(
|
| 171 |
+
"chache_file/temp_predicted.csv", index=False)
|
| 172 |
+
st.success("✅ Preprocessing & Prediksi selesai!")
|
| 173 |
+
|
| 174 |
+
# Setelah prediksi selesai
|
| 175 |
+
if st.session_state.df_predicted is not None:
|
| 176 |
+
df_predicted = st.session_state.df_predicted
|
| 177 |
+
|
| 178 |
+
# Sidebar filter - Pindahkan ke atas sebelum menampilkan tabel
|
| 179 |
+
st.sidebar.header("🔍 Filter Data")
|
| 180 |
+
|
| 181 |
+
# Pastikan kolom yang difilter ada dan bersihkan nilai NaN
|
| 182 |
+
df_clean = df_predicted.copy()
|
| 183 |
+
|
| 184 |
+
# Filter Mata Kuliah
|
| 185 |
+
if "nama_matakuliah" in df_clean.columns:
|
| 186 |
+
matkul_options = sorted(
|
| 187 |
+
[x for x in df_clean["nama_matakuliah"].dropna().unique() if x])
|
| 188 |
+
selected_matkul = st.sidebar.multiselect(
|
| 189 |
+
"Nama Mata Kuliah", matkul_options, default=matkul_options)
|
| 190 |
+
else:
|
| 191 |
+
selected_matkul = []
|
| 192 |
+
|
| 193 |
+
# Filter Program Studi
|
| 194 |
+
if "nama_prodi" in df_clean.columns:
|
| 195 |
+
prodi_options = sorted(
|
| 196 |
+
[x for x in df_clean["nama_prodi"].dropna().unique() if x])
|
| 197 |
+
selected_prodi = st.sidebar.multiselect(
|
| 198 |
+
"Program Studi", prodi_options, default=prodi_options)
|
| 199 |
+
else:
|
| 200 |
+
selected_prodi = []
|
| 201 |
+
|
| 202 |
+
# Filter Tahun
|
| 203 |
+
if "tahun" in df_clean.columns:
|
| 204 |
+
tahun_options = sorted(
|
| 205 |
+
[x for x in df_clean["tahun"].dropna().unique() if pd.notna(x)])
|
| 206 |
+
selected_tahun = st.sidebar.multiselect(
|
| 207 |
+
"Tahun", tahun_options, default=tahun_options)
|
| 208 |
+
else:
|
| 209 |
+
selected_tahun = []
|
| 210 |
+
|
| 211 |
+
# Filter Semester
|
| 212 |
+
if "semester" in df_clean.columns:
|
| 213 |
+
semester_options = sorted(
|
| 214 |
+
[x for x in df_clean["semester"].dropna().unique() if pd.notna(x)])
|
| 215 |
+
selected_semester = st.sidebar.multiselect(
|
| 216 |
+
"Semester", semester_options, default=semester_options)
|
| 217 |
+
else:
|
| 218 |
+
selected_semester = []
|
| 219 |
+
|
| 220 |
+
# Apply filters dengan pengecekan kolom yang ada
|
| 221 |
+
df_filtered = df_clean.copy()
|
| 222 |
+
|
| 223 |
+
if selected_matkul and "nama_matakuliah" in df_filtered.columns:
|
| 224 |
+
df_filtered = df_filtered[df_filtered["nama_matakuliah"].isin(
|
| 225 |
+
selected_matkul)]
|
| 226 |
+
|
| 227 |
+
if selected_prodi and "nama_prodi" in df_filtered.columns:
|
| 228 |
+
df_filtered = df_filtered[df_filtered["nama_prodi"].isin(
|
| 229 |
+
selected_prodi)]
|
| 230 |
+
|
| 231 |
+
if selected_tahun and "tahun" in df_filtered.columns:
|
| 232 |
+
df_filtered = df_filtered[df_filtered["tahun"].isin(selected_tahun)]
|
| 233 |
+
|
| 234 |
+
if selected_semester and "semester" in df_filtered.columns:
|
| 235 |
+
df_filtered = df_filtered[df_filtered["semester"].isin(
|
| 236 |
+
selected_semester)]
|
| 237 |
+
|
| 238 |
+
# Tampilkan tabel hasil prediksi yang sudah difilter
|
| 239 |
+
st.markdown("### 📄 Tabel Hasil Prediksi")
|
| 240 |
+
st.dataframe(df_filtered, width="stretch")
|
| 241 |
+
|
| 242 |
+
# Download buttons dalam satu kolom
|
| 243 |
+
st.download_button(
|
| 244 |
+
label="⬇️ Unduh Hasil Excel (Data Terfilter)",
|
| 245 |
+
data=convert_df_to_excel(df_filtered),
|
| 246 |
+
file_name="hasil_prediksi_absa_filtered.xlsx",
|
| 247 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
st.download_button(
|
| 251 |
+
label="⬇️ Unduh Semua Data Excel",
|
| 252 |
+
data=convert_df_to_excel(df_predicted),
|
| 253 |
+
file_name="hasil_prediksi_absa_all.xlsx",
|
| 254 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
st.info(
|
| 258 |
+
f"Menampilkan {len(df_filtered)} dari {len(df_predicted)} data ulasan setelah difilter."
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
# Ringkasan Cepat - menggunakan data yang sudah difilter
|
| 262 |
+
st.markdown("### 📌 Ringkasan Cepat")
|
| 263 |
+
|
| 264 |
+
total_pos = (df_filtered[ASPEK_COLUMNS] == "positif").sum().sum()
|
| 265 |
+
total_net = (df_filtered[ASPEK_COLUMNS] == "netral").sum().sum()
|
| 266 |
+
total_neg = (df_filtered[ASPEK_COLUMNS] == "negatif").sum().sum()
|
| 267 |
+
|
| 268 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
| 269 |
+
col1.metric("Jumlah Ulasan", len(df_filtered))
|
| 270 |
+
col2.metric("Jumlah Aspek", len(ASPEK_COLUMNS))
|
| 271 |
+
|
| 272 |
+
# Tambahkan pengecekan untuk kolom yang mungkin tidak ada
|
| 273 |
+
matkul_count = df_filtered['nama_matakuliah'].nunique(
|
| 274 |
+
) if 'nama_matakuliah' in df_filtered.columns else 0
|
| 275 |
+
prodi_count = df_filtered['nama_prodi'].nunique(
|
| 276 |
+
) if 'nama_prodi' in df_filtered.columns else 0
|
| 277 |
+
semester_count = df_filtered['semester'].nunique(
|
| 278 |
+
) if 'semester' in df_filtered.columns else 0
|
| 279 |
+
|
| 280 |
+
col3.metric("Jumlah Mata Kuliah", matkul_count)
|
| 281 |
+
col4.metric("Jumlah Prodi", prodi_count)
|
| 282 |
+
col5.metric("Jumlah Semester", semester_count)
|
| 283 |
+
|
| 284 |
+
col6, col7, col8, col9, col10 = st.columns(5)
|
| 285 |
+
col6.metric("Sentimen Positif", f"{total_pos}")
|
| 286 |
+
col7.metric("Sentimen Netral", f"{total_net}")
|
| 287 |
+
col8.metric("Sentimen Negatif", f"{total_neg}")
|
| 288 |
+
|
| 289 |
+
# Tampilkan rentang tahun jika kolom tahun ada
|
| 290 |
+
if 'tahun' in df_filtered.columns and len(df_filtered) > 0:
|
| 291 |
+
tahun_min = int(df_filtered['tahun'].min())
|
| 292 |
+
tahun_max = int(df_filtered['tahun'].max())
|
| 293 |
+
col9.metric("Rentang Tahun", f"{tahun_min} - {tahun_max}")
|
| 294 |
+
else:
|
| 295 |
+
col9.metric("Rentang Tahun", "N/A")
|
| 296 |
+
|
| 297 |
+
# Tambahkan rata-rata panjang kata dalam kritik saran
|
| 298 |
+
if len(df_filtered) > 0 and 'kritik_saran' in df_filtered.columns:
|
| 299 |
+
word_counts = df_filtered['kritik_saran'].astype(
|
| 300 |
+
str).str.split().str.len()
|
| 301 |
+
avg_word_count = round(word_counts.mean(), 1)
|
| 302 |
+
col10.metric("Rata-rata Panjang Kata", f"{avg_word_count} kata")
|
| 303 |
+
else:
|
| 304 |
+
col10.metric("Rata-rata Panjang Kata", "0 kata")
|
| 305 |
+
|
| 306 |
+
st.markdown("---")
|
| 307 |
+
st.markdown("### 📊 Visualisasi Data")
|
| 308 |
+
|
| 309 |
+
col1, col2 = st.columns(2)
|
| 310 |
+
with col1:
|
| 311 |
+
show_sentiment_bar_chart(df_filtered, ASPEK_COLUMNS)
|
| 312 |
+
with col2:
|
| 313 |
+
show_sentiment_pie_chart(df_filtered, ASPEK_COLUMNS)
|
| 314 |
+
|
| 315 |
+
col1, col2 = st.columns(2)
|
| 316 |
+
with col1:
|
| 317 |
+
show_year_distribution(df_filtered)
|
| 318 |
+
with col2:
|
| 319 |
+
show_semester_distribution(df_filtered)
|
| 320 |
+
|
| 321 |
+
st.markdown("---")
|
| 322 |
+
show_prodi_distribution(df_filtered)
|
| 323 |
+
|
| 324 |
+
st.markdown("---")
|
| 325 |
+
show_top10_matkul_distribution(df_filtered)
|
| 326 |
+
|
| 327 |
+
st.markdown("---")
|
| 328 |
+
col1, col2 = st.columns(2)
|
| 329 |
+
with col1:
|
| 330 |
+
show_sentiment_by_year(df_filtered, ASPEK_COLUMNS)
|
| 331 |
+
with col2:
|
| 332 |
+
show_sentiment_by_semester(df_filtered, ASPEK_COLUMNS)
|
| 333 |
+
|
| 334 |
+
st.markdown("---")
|
| 335 |
+
show_sentiment_by_prodi(df_filtered, ASPEK_COLUMNS)
|
| 336 |
+
|
| 337 |
+
st.markdown("---")
|
| 338 |
+
show_sentiment_by_top10_matkul(df_filtered, ASPEK_COLUMNS)
|
| 339 |
+
|
| 340 |
+
st.markdown("---")
|
| 341 |
+
show_sentiment_stacked_percentage(df_filtered, ASPEK_COLUMNS)
|
| 342 |
+
|
| 343 |
+
#
|
| 344 |
+
|
| 345 |
+
else:
|
| 346 |
+
st.info("ℹ️ Silakan unggah file Excel untuk memulai proses")
|
| 347 |
+
|
| 348 |
+
# Footer
|
| 349 |
+
st.markdown("""
|
| 350 |
+
<div class='footer'>
|
| 351 |
+
© 2025 Darmawan Jiddan – ABSA IndoBERT | Dibuat dengan ❤️ menggunakan Streamlit
|
| 352 |
+
</div>
|
| 353 |
+
""", unsafe_allow_html=True)
|
assets/style.css
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Gaya Umum untuk Light & Dark Mode */
|
| 2 |
+
/* .block-container {
|
| 3 |
+
padding-top: 2rem;
|
| 4 |
+
} */
|
| 5 |
+
|
| 6 |
+
/* Heading */
|
| 7 |
+
h1, h2, h3, h4, h5, h6, p,
|
| 8 |
+
.stMarkdown h2,
|
| 9 |
+
.stMarkdown h3 {
|
| 10 |
+
color: var(--text-color);
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
/* Ukuran font tabel */
|
| 14 |
+
.stDataFrame div {
|
| 15 |
+
font-size: 14px;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
/* Background elemen tertentu */
|
| 19 |
+
.css-1cpxqw2 {
|
| 20 |
+
background-color: var(--secondary-background-color);
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
/* Teks kecil */
|
| 24 |
+
.small-text {
|
| 25 |
+
font-size: 0.9em;
|
| 26 |
+
color: var(--text-color-secondary);
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
/* Judul utama di tengah */
|
| 30 |
+
h1.title-center {
|
| 31 |
+
text-align: center;
|
| 32 |
+
margin-bottom: 10px;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
.footer {
|
| 36 |
+
position: relative;
|
| 37 |
+
bottom: 0;
|
| 38 |
+
width: 100%;
|
| 39 |
+
padding: 10px 0;
|
| 40 |
+
background-color: var(--background-color);
|
| 41 |
+
color: var(--text-color);
|
| 42 |
+
text-align: center;
|
| 43 |
+
font-size: 14px;
|
| 44 |
+
border-top: 1px solid var(--secondary-background-color);
|
| 45 |
+
margin-top: 50px;
|
| 46 |
+
}
|
clean_reqs.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Clean_reqs.py
|
| 3 |
+
-------------
|
| 4 |
+
Script untuk membersihkan file requirements.txt dengan cara:
|
| 5 |
+
- Menghapus versi duplikat paket.
|
| 6 |
+
- Menyimpan versi terbaru untuk setiap paket.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import re
|
| 10 |
+
from packaging import version
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def clean_requirements(file_path="requirements.txt"):
|
| 14 |
+
"""
|
| 15 |
+
Membersihkan file requirements.txt.
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
file_path (str): Path ke file requirements.txt yang akan dibersihkan.
|
| 19 |
+
|
| 20 |
+
Proses:
|
| 21 |
+
- Melewatkan komentar dan baris kosong.
|
| 22 |
+
- Menyimpan versi terbaru untuk setiap paket.
|
| 23 |
+
- Menulis ulang file requirements.txt yang sudah dibersihkan.
|
| 24 |
+
"""
|
| 25 |
+
packages = {}
|
| 26 |
+
|
| 27 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 28 |
+
for line in f:
|
| 29 |
+
line = line.strip()
|
| 30 |
+
if not line or line.startswith("#"):
|
| 31 |
+
continue
|
| 32 |
+
|
| 33 |
+
match = re.match(r"([a-zA-Z0-9_\-]+)==(.+)", line)
|
| 34 |
+
if match:
|
| 35 |
+
name, ver = match.groups()
|
| 36 |
+
if name not in packages or version.parse(ver) > version.parse(packages[name]):
|
| 37 |
+
packages[name] = ver
|
| 38 |
+
|
| 39 |
+
with open(file_path, "w", encoding="utf-8") as f:
|
| 40 |
+
for name, ver in sorted(packages.items()):
|
| 41 |
+
f.write(f"{name}=={ver}\n")
|
| 42 |
+
|
| 43 |
+
print(f"✅ Cleaned requirements saved to {file_path}")
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
if __name__ == "__main__":
|
| 47 |
+
clean_requirements()
|
config.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
config.py
|
| 3 |
+
---------
|
| 4 |
+
Berisi konfigurasi utama untuk model ABSA (Aspect-Based Sentiment Analysis)
|
| 5 |
+
menggunakan IndoBERT, termasuk parameter model dan daftar kolom aspek.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
CONFIG = {
|
| 9 |
+
"model_name": "indobenchmark/indobert-base-p1",
|
| 10 |
+
"dropout_rate": 0.3,
|
| 11 |
+
"max_len": 128
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
ASPEK_COLUMNS = ["tugas", "pengajaran", "materi", "metode", "interaksi"]
|
model_utils.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
model_utils.py
|
| 3 |
+
--------------
|
| 4 |
+
Berisi utilitas untuk memuat model IndoBERT ABSA, tokenizer, dan label encoder,
|
| 5 |
+
serta fungsi untuk melakukan prediksi multi-aspek.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import torch
|
| 10 |
+
import torch.nn as nn
|
| 11 |
+
import torch.nn.functional as F
|
| 12 |
+
import joblib
|
| 13 |
+
from transformers import AutoModel, AutoTokenizer
|
| 14 |
+
from config import CONFIG
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def load_model_and_tokenizer():
|
| 18 |
+
"""
|
| 19 |
+
Memuat model IndoBERT ABSA, tokenizer, dan label encoder.
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
model (nn.Module): Model ABSA yang sudah diload.
|
| 23 |
+
tokenizer (AutoTokenizer): Tokenizer untuk IndoBERT.
|
| 24 |
+
label_encoder (LabelEncoder): Encoder untuk label sentimen.
|
| 25 |
+
device (torch.device): Device (cuda/cpu) yang digunakan.
|
| 26 |
+
"""
|
| 27 |
+
tokenizer = AutoTokenizer.from_pretrained(CONFIG["model_name"])
|
| 28 |
+
|
| 29 |
+
base_path = os.path.abspath(os.path.dirname(__file__))
|
| 30 |
+
model_dir = os.path.join(base_path, "assets", "model")
|
| 31 |
+
model_path = os.path.join(model_dir, "indobert_absa_model.pth")
|
| 32 |
+
label_path = os.path.join(model_dir, "label_encoder.joblib")
|
| 33 |
+
|
| 34 |
+
if not os.path.exists(model_path) or not os.path.exists(label_path):
|
| 35 |
+
raise FileNotFoundError("Model atau Label Encoder tidak ditemukan.")
|
| 36 |
+
|
| 37 |
+
label_encoder = joblib.load(label_path)
|
| 38 |
+
|
| 39 |
+
class IndoBertForABSA(nn.Module):
|
| 40 |
+
"""
|
| 41 |
+
Model klasifikasi aspek berbasis IndoBERT untuk ABSA.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
def __init__(self, num_labels):
|
| 45 |
+
super().__init__()
|
| 46 |
+
self.bert = AutoModel.from_pretrained(
|
| 47 |
+
CONFIG["model_name"], trust_remote_code=True, use_safetensors=True
|
| 48 |
+
)
|
| 49 |
+
self.norm = nn.LayerNorm(self.bert.config.hidden_size)
|
| 50 |
+
self.dropout = nn.Dropout(CONFIG["dropout_rate"])
|
| 51 |
+
self.classifier = nn.Linear(
|
| 52 |
+
self.bert.config.hidden_size, num_labels)
|
| 53 |
+
|
| 54 |
+
def forward(self, input_ids, attention_mask):
|
| 55 |
+
"""
|
| 56 |
+
Forward pass untuk model ABSA.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
input_ids (torch.Tensor): Tensor input token IDs.
|
| 60 |
+
attention_mask (torch.Tensor): Tensor mask perhatian.
|
| 61 |
+
|
| 62 |
+
Returns:
|
| 63 |
+
torch.Tensor: Logit prediksi.
|
| 64 |
+
"""
|
| 65 |
+
output = self.bert(input_ids=input_ids,
|
| 66 |
+
attention_mask=attention_mask)
|
| 67 |
+
pooled = output.pooler_output
|
| 68 |
+
normed = self.norm(pooled)
|
| 69 |
+
dropped = self.dropout(normed)
|
| 70 |
+
return self.classifier(dropped)
|
| 71 |
+
|
| 72 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 73 |
+
model = IndoBertForABSA(num_labels=len(label_encoder.classes_))
|
| 74 |
+
model.load_state_dict(torch.load(model_path, map_location=device))
|
| 75 |
+
model.to(device)
|
| 76 |
+
model.eval()
|
| 77 |
+
|
| 78 |
+
return model, tokenizer, label_encoder, device
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def predict_multi_aspect(model, tokenizer, sentence, aspek_list, label_encoder, device, max_len):
|
| 82 |
+
"""
|
| 83 |
+
Melakukan prediksi sentimen untuk setiap aspek pada satu kalimat.
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
model (nn.Module): Model ABSA yang sudah diload.
|
| 87 |
+
tokenizer (AutoTokenizer): Tokenizer IndoBERT.
|
| 88 |
+
sentence (str): Kalimat input.
|
| 89 |
+
aspek_list (list): Daftar aspek yang ingin diprediksi.
|
| 90 |
+
label_encoder (LabelEncoder): Encoder label.
|
| 91 |
+
device (torch.device): Device (cuda/cpu).
|
| 92 |
+
max_len (int): Panjang maksimum token.
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
dict: Hasil prediksi berupa {aspek: label_sentimen}.
|
| 96 |
+
"""
|
| 97 |
+
results = {}
|
| 98 |
+
for aspek in aspek_list:
|
| 99 |
+
combined = f"[ASPEK] {aspek} [TEXT] {sentence}"
|
| 100 |
+
encoded = tokenizer.encode_plus(
|
| 101 |
+
combined,
|
| 102 |
+
add_special_tokens=True,
|
| 103 |
+
padding="max_length",
|
| 104 |
+
max_length=max_len,
|
| 105 |
+
truncation=True,
|
| 106 |
+
return_attention_mask=True,
|
| 107 |
+
return_tensors="pt",
|
| 108 |
+
)
|
| 109 |
+
input_ids = encoded["input_ids"].to(device)
|
| 110 |
+
attention_mask = encoded["attention_mask"].to(device)
|
| 111 |
+
|
| 112 |
+
with torch.no_grad():
|
| 113 |
+
outputs = model(input_ids, attention_mask)
|
| 114 |
+
probs = F.softmax(outputs, dim=1).squeeze()
|
| 115 |
+
idx = torch.argmax(probs).item()
|
| 116 |
+
label = label_encoder.inverse_transform([idx])[0]
|
| 117 |
+
results[aspek] = label
|
| 118 |
+
return results
|
preprocessing.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
preprocessing.py
|
| 3 |
+
----------------
|
| 4 |
+
Modul preprocessing teks untuk ABSA IndoBERT, termasuk pembersihan teks,
|
| 5 |
+
normalisasi slang, tokenisasi, dan penghapusan emoji.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import re
|
| 9 |
+
import os
|
| 10 |
+
import nltk
|
| 11 |
+
from nltk.tokenize import word_tokenize
|
| 12 |
+
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
|
| 13 |
+
|
| 14 |
+
# Tentukan folder NLTK di project atau environment
|
| 15 |
+
NLTK_DATA_DIR = os.path.join(os.path.dirname(__file__), "nltk_data")
|
| 16 |
+
os.makedirs(NLTK_DATA_DIR, exist_ok=True)
|
| 17 |
+
|
| 18 |
+
# Tambahkan path NLTK supaya bisa menemukan data
|
| 19 |
+
nltk.data.path.append(NLTK_DATA_DIR)
|
| 20 |
+
|
| 21 |
+
# Download punkt jika belum ada
|
| 22 |
+
try:
|
| 23 |
+
nltk.data.find("tokenizers/punkt")
|
| 24 |
+
except LookupError:
|
| 25 |
+
nltk.download("punkt", download_dir=NLTK_DATA_DIR)
|
| 26 |
+
|
| 27 |
+
# Stemmer Sastrawi
|
| 28 |
+
stemmer = StemmerFactory().create_stemmer()
|
| 29 |
+
|
| 30 |
+
slang_dict = {
|
| 31 |
+
'sy': 'saya', 'aku': 'saya', 'aq': 'saya', 'gua': 'saya', 'gue': 'saya',
|
| 32 |
+
'km': 'kamu', 'kmu': 'kamu', 'lu': 'kamu', 'loe': 'kamu', 'lo': 'kamu',
|
| 33 |
+
'gk': 'tidak', 'ga': 'tidak', 'nggak': 'tidak', 'tdk': 'tidak', 'enggak': 'tidak', 'g': 'tidak',
|
| 34 |
+
'bgt': 'banget', 'bangt': 'banget', 'bngt': 'banget',
|
| 35 |
+
'bnyk': 'banyak', 'smw': 'semua', 'bbrp': 'beberapa',
|
| 36 |
+
'udh': 'sudah', 'sdh': 'sudah', 'dah': 'sudah', 'td': 'tadi',
|
| 37 |
+
'bsk': 'besok', 'kmrn': 'kemarin', 'skrng': 'sekarang', 'skrg': 'sekarang', 'nnti': 'nanti',
|
| 38 |
+
'mlm': 'malam', 'pgi': 'pagi',
|
| 39 |
+
'jg': 'juga', 'aja': 'saja', 'aj': 'saja', 'jd': 'jadi', 'lg': 'lagi', 'lgi': 'lagi',
|
| 40 |
+
'tp': 'tapi', 'tpi': 'tapi', 'tpnya': 'tapi', 'trs': 'terus', 'trus': 'terus', 'trsnya': 'terusnya',
|
| 41 |
+
'krn': 'karena', 'karana': 'karena', 'utk': 'untuk', 'bsa': 'bisa',
|
| 42 |
+
'dr': 'dari', 'dpn': 'depan', 'blkg': 'belakang', 'dkt': 'dekat',
|
| 43 |
+
'ngajar': 'mengajar', 'ngasih': 'memberi', 'ngerti': 'mengerti', 'ngumpul': 'mengumpulkan', 'ngulang': 'mengulang',
|
| 44 |
+
'bikin': 'membuat', 'ajar': 'mengajar', 'ajarkan': 'mengajarkan', 'diajar': 'diajarkan', 'mengampu': 'mengajar', 'diampu': 'diajarkan',
|
| 45 |
+
'dosen2': 'dosen', 'mhs': 'mahasiswa', 'mhsw': 'mahasiswa', 'nilai2': 'nilai', 'mantul': 'mantap betul',
|
| 46 |
+
'ny': 'nya', 'gitu': 'seperti itu', 'bgtu': 'begitu',
|
| 47 |
+
'sm': 'sama', 'dtg': 'datang', 'mnt': 'menit', 'cepet': 'cepat', 'cpt': 'cepat',
|
| 48 |
+
'kayak': 'seperti', 'kyk': 'seperti', 'dpt': 'dapat', 'masingmasing': 'masing-masing',
|
| 49 |
+
'terimakasih': 'terima kasih', 'terimaksih': 'terima kasih', 'terimakasi': 'terima kasih', 'makasi': 'terima kasih', 'kasih': 'terima kasih',
|
| 50 |
+
'sebaiknya': 'sebaik nya', 'akan tetapi': 'tetapi',
|
| 51 |
+
'diperjelas': 'dijelaskan', 'diperbaiki': 'dibetulkan',
|
| 52 |
+
'diparkan': 'dipaparkan', 'refrensi': 'referensi',
|
| 53 |
+
'pemblajaran': 'pembelajaran', 'pemebelajaran': 'pembelajaran', 'pembelajran': 'pembelajaran', 'pembelajara': 'pembelajaran', 'palajaran': 'pelajaran',
|
| 54 |
+
'pendikan': 'pendidikan', 'matkul': 'matakuliah',
|
| 55 |
+
'ofline': 'offline', 'dapatkan': 'diperoleh', 'tatap': 'tatap muka',
|
| 56 |
+
'kouta': 'kuota', 'sekira': 'kira-kira', 'selow': 'santai', 'penyampain': 'penyampaian',
|
| 57 |
+
'membing': 'membimbing', 'memaksimalkan': 'maksimal',
|
| 58 |
+
'tap': 'tatap', 'wasan': 'wawasan', 'pelan': 'lambat', 'iya': 'ya', 'ijin': 'izin',
|
| 59 |
+
'seperti itu': '', 'dik': '', 'mpk': '', 'pu': '', 'ta': '', 'the': '',
|
| 60 |
+
'mendik': 'mendidik', 'efesien': 'efisien', 'menuhi': 'memenuhi', 'tep': 'tepat',
|
| 61 |
+
'pendik': 'pendidikan', 'semster': 'semester', 'vidio': 'video',
|
| 62 |
+
'asinkronus': 'asyncronous', 'sinkronus': 'syncronous', 'mahasiswai': 'mahasiswa',
|
| 63 |
+
'perkulihan': 'perkuliahan', 'kedepa': 'kedepan', 'projek': 'proyek', 'bertap': 'bertatap',
|
| 64 |
+
'perkuliah': 'perkuliahan', 'bajk': 'baik', 'diskus': 'diskusi', 'praktek': 'praktik',
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
pattern_slang = re.compile(r'\b(' + '|'.join(re.escape(k) for k in slang_dict) + r')\b', flags=re.IGNORECASE) # noqa: C0301, C0201
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def cleaning_text(text: str) -> str: # noqa: C0103
|
| 71 |
+
"""Membersihkan teks: HTML, simbol, karakter aneh, dan rapikan tanda baca.""" # noqa: C0116
|
| 72 |
+
if not isinstance(text, str):
|
| 73 |
+
return ''
|
| 74 |
+
text = re.sub(r'<[^>]+>', '', text)
|
| 75 |
+
text = re.sub(r'>|<', ' ', text)
|
| 76 |
+
text = re.sub(r'([a-z])([A-Z])', r'\1. \2', text)
|
| 77 |
+
text = text.replace('\n', ' ')
|
| 78 |
+
text = re.sub(r'[^a-zA-Z0-9\s.,!?\-:;\'"()]', '', text)
|
| 79 |
+
text = re.sub(r'([!?.,])\1{2,}', r'\1\1', text)
|
| 80 |
+
text = re.sub(r'([.!?])(\w)', r'\1 \2', text)
|
| 81 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
| 82 |
+
return text
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def remove_emoji(text):
|
| 86 |
+
"""Hapus emoji dari teks.""" # noqa: C0116
|
| 87 |
+
emoji_pattern = re.compile(
|
| 88 |
+
"["
|
| 89 |
+
u"\U0001F600-\U0001F64F"
|
| 90 |
+
u"\U0001F300-\U0001F5FF"
|
| 91 |
+
u"\U0001F680-\U0001F6FF"
|
| 92 |
+
u"\U0001F1E0-\U0001F1FF"
|
| 93 |
+
u"\U00002702-\U000027B0"
|
| 94 |
+
u"\U000024C2-\U0001F251"
|
| 95 |
+
"]+", flags=re.UNICODE
|
| 96 |
+
)
|
| 97 |
+
return emoji_pattern.sub(r'', text)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def normalize_text(tokens): # noqa: C0103
|
| 101 |
+
"""Normalisasi kata berdasarkan kamus slang.""" # noqa: C0116
|
| 102 |
+
return [slang_dict.get(w.lower(), w.lower()) for w in tokens]
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def text_preprocessing_pipeline(text):
|
| 106 |
+
"""Pipeline preprocessing lengkap: cleaning, emoji removal, tokenisasi, normalisasi.""" # noqa: C0116
|
| 107 |
+
text = cleaning_text(text) # noqa: C0103
|
| 108 |
+
text = remove_emoji(text)
|
| 109 |
+
tokens = word_tokenize(text)
|
| 110 |
+
tokens = normalize_text(tokens) # noqa: C0103
|
| 111 |
+
return " ".join(tokens)
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
joblib==1.5.2
|
| 2 |
+
nltk==3.9.1
|
| 3 |
+
packaging==25.0
|
| 4 |
+
pandas==2.3.2
|
| 5 |
+
plotly==6.3.0
|
| 6 |
+
Sastrawi==1.0.1
|
| 7 |
+
streamlit==1.50.0
|
| 8 |
+
torch==2.7.1+cu118
|
| 9 |
+
transformers==4.56.2
|
visualization.py
ADDED
|
@@ -0,0 +1,331 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Modul visualization.py
|
| 3 |
+
----------------------
|
| 4 |
+
Berisi fungsi-fungsi untuk menampilkan berbagai visualisasi data kritik dan saran
|
| 5 |
+
dalam bentuk bar chart, pie chart, serta distribusi berdasarkan tahun, semester,
|
| 6 |
+
program studi, dan mata kuliah menggunakan Streamlit & Plotly.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import streamlit as st
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import plotly.express as px
|
| 12 |
+
from config import ASPEK_COLUMNS
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Palet warna kustom
|
| 16 |
+
sentimen_palette = {
|
| 17 |
+
"netral": "#FFE24C",
|
| 18 |
+
"positif": "#4CFF72",
|
| 19 |
+
"negatif": "#FF4C4C"
|
| 20 |
+
}
|
| 21 |
+
category_order = ["netral", "positif", "negatif"]
|
| 22 |
+
|
| 23 |
+
# Konfigurasi Plotly
|
| 24 |
+
config_options = {
|
| 25 |
+
"scrollZoom": False,
|
| 26 |
+
"displayModeBar": False
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def show_sentiment_bar_chart(df_predicted, aspek_columns):
|
| 31 |
+
"""Menampilkan bar chart distribusi sentimen per aspek."""
|
| 32 |
+
if df_predicted.empty or not set(aspek_columns).issubset(df_predicted.columns):
|
| 33 |
+
st.warning("Data atau kolom aspek tidak tersedia untuk ditampilkan.")
|
| 34 |
+
return
|
| 35 |
+
|
| 36 |
+
df_long = df_predicted.melt(
|
| 37 |
+
value_vars=aspek_columns,
|
| 38 |
+
var_name="aspek",
|
| 39 |
+
value_name="sentimen"
|
| 40 |
+
)
|
| 41 |
+
df_long["sentimen"] = pd.Categorical(
|
| 42 |
+
df_long["sentimen"],
|
| 43 |
+
categories=category_order,
|
| 44 |
+
ordered=True
|
| 45 |
+
)
|
| 46 |
+
count_data = df_long.groupby(
|
| 47 |
+
["aspek", "sentimen"], observed=False
|
| 48 |
+
).size().reset_index(name="jumlah")
|
| 49 |
+
fig = px.bar(
|
| 50 |
+
count_data,
|
| 51 |
+
x="aspek",
|
| 52 |
+
y="jumlah",
|
| 53 |
+
color="sentimen",
|
| 54 |
+
barmode="group",
|
| 55 |
+
color_discrete_map=sentimen_palette,
|
| 56 |
+
category_orders={"sentimen": category_order}
|
| 57 |
+
)
|
| 58 |
+
fig.update_layout(title="Distribusi Sentimen per Aspek")
|
| 59 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def show_sentiment_pie_chart(df_predicted, aspek_columns):
|
| 63 |
+
"""Menampilkan pie chart distribusi total sentimen."""
|
| 64 |
+
sentimen_total = df_predicted[aspek_columns].values.ravel()
|
| 65 |
+
sentimen_counts = pd.Series(sentimen_total).value_counts().reset_index()
|
| 66 |
+
sentimen_counts.columns = ["sentimen", "jumlah"]
|
| 67 |
+
sentimen_counts = sentimen_counts.sort_values("jumlah", ascending=False)
|
| 68 |
+
fig = px.pie(sentimen_counts, names="sentimen", values="jumlah",
|
| 69 |
+
color="sentimen", color_discrete_map=sentimen_palette,
|
| 70 |
+
hole=0.3)
|
| 71 |
+
fig.update_layout(title="Total Komposisi Sentimen")
|
| 72 |
+
fig.update_traces(textposition='inside', textinfo='percent+label')
|
| 73 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def show_year_distribution(df):
|
| 77 |
+
"""Menampilkan distribusi jumlah kritik/saran per tahun."""
|
| 78 |
+
if 'tanggal' in df.columns:
|
| 79 |
+
df['tahun'] = pd.to_datetime(df['tanggal'], errors='coerce').dt.year
|
| 80 |
+
|
| 81 |
+
if 'tahun' in df.columns:
|
| 82 |
+
df_tahun = df.dropna(subset=['tahun']).copy()
|
| 83 |
+
df_tahun['tahun'] = df_tahun['tahun'].astype(int)
|
| 84 |
+
|
| 85 |
+
year_counts = df_tahun['tahun'].value_counts().reset_index()
|
| 86 |
+
year_counts.columns = ['tahun', 'jumlah']
|
| 87 |
+
year_counts = year_counts.sort_values('jumlah', ascending=False)
|
| 88 |
+
|
| 89 |
+
fig = px.bar(year_counts, x='tahun', y='jumlah',
|
| 90 |
+
color='tahun', title="Distribusi Kritik/Saran per Tahun")
|
| 91 |
+
fig.update_layout(xaxis=dict(type='category'))
|
| 92 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def show_semester_distribution(df):
|
| 96 |
+
"""Menampilkan distribusi jumlah kritik/saran per semester."""
|
| 97 |
+
if 'semester' in df.columns:
|
| 98 |
+
semester_counts = df['semester'].value_counts().reset_index()
|
| 99 |
+
semester_counts.columns = ['semester', 'jumlah']
|
| 100 |
+
semester_counts = semester_counts.sort_values(
|
| 101 |
+
'jumlah', ascending=False)
|
| 102 |
+
fig = px.bar(semester_counts, x='semester', y='jumlah',
|
| 103 |
+
color='semester', title="Distribusi Kritik/Saran per Semester")
|
| 104 |
+
fig.update_layout(xaxis=dict(categoryorder='total descending'))
|
| 105 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def show_prodi_distribution(df):
|
| 109 |
+
"""Menampilkan jumlah kritik/saran per program studi."""
|
| 110 |
+
if 'nama_prodi' in df.columns:
|
| 111 |
+
prodi_counts = df['nama_prodi'].value_counts().reset_index()
|
| 112 |
+
prodi_counts.columns = ['nama_prodi', 'jumlah']
|
| 113 |
+
prodi_counts = prodi_counts.sort_values(by='jumlah', ascending=True)
|
| 114 |
+
fig = px.bar(
|
| 115 |
+
prodi_counts,
|
| 116 |
+
x='jumlah',
|
| 117 |
+
y='nama_prodi',
|
| 118 |
+
orientation='h',
|
| 119 |
+
color='jumlah',
|
| 120 |
+
title="Jumlah Kritik/Saran per Program Studi"
|
| 121 |
+
)
|
| 122 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def show_top10_matkul_distribution(df):
|
| 126 |
+
"""Menampilkan 10 mata kuliah dengan jumlah kritik/saran terbanyak."""
|
| 127 |
+
if 'nama_matakuliah' in df.columns and 'kode_matakuliah' in df.columns:
|
| 128 |
+
matkul_counts = (
|
| 129 |
+
df.groupby(['kode_matakuliah', 'nama_matakuliah'], observed=False)
|
| 130 |
+
.size()
|
| 131 |
+
.reset_index(name='jumlah')
|
| 132 |
+
.sort_values(by='jumlah', ascending=False)
|
| 133 |
+
.head(10)
|
| 134 |
+
)
|
| 135 |
+
matkul_counts['label'] = (
|
| 136 |
+
matkul_counts['kode_matakuliah'] + " - " +
|
| 137 |
+
matkul_counts['nama_matakuliah']
|
| 138 |
+
)
|
| 139 |
+
matkul_counts = matkul_counts.sort_values(by='jumlah', ascending=True)
|
| 140 |
+
|
| 141 |
+
fig = px.bar(
|
| 142 |
+
matkul_counts,
|
| 143 |
+
x='jumlah',
|
| 144 |
+
y='label',
|
| 145 |
+
orientation='h',
|
| 146 |
+
title="Top 10 Mata Kuliah Berdasarkan Kritik/Saran",
|
| 147 |
+
color='jumlah'
|
| 148 |
+
)
|
| 149 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def show_sentiment_by_year(df, aspek_columns):
|
| 153 |
+
"""Menampilkan distribusi sentimen per tahun."""
|
| 154 |
+
if 'tahun' in df.columns:
|
| 155 |
+
df_long = df.melt(id_vars=['tahun'],
|
| 156 |
+
value_vars=aspek_columns,
|
| 157 |
+
var_name='aspek',
|
| 158 |
+
value_name='sentimen')
|
| 159 |
+
year_sentiment = df_long.groupby(
|
| 160 |
+
['tahun', 'sentimen'], observed=False
|
| 161 |
+
).size().reset_index(name='jumlah')
|
| 162 |
+
year_sentiment = year_sentiment.sort_values('jumlah', ascending=False)
|
| 163 |
+
fig = px.bar(year_sentiment, x='tahun', y='jumlah', color='sentimen',
|
| 164 |
+
barmode='group', color_discrete_map=sentimen_palette)
|
| 165 |
+
fig.update_layout(title="Distribusi Sentimen Kritik/Saran per Tahun")
|
| 166 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def show_sentiment_by_semester(df, aspek_columns):
|
| 170 |
+
"""Menampilkan distribusi sentimen per semester."""
|
| 171 |
+
if 'semester' in df.columns:
|
| 172 |
+
df_long = df.melt(id_vars=['semester'],
|
| 173 |
+
value_vars=aspek_columns,
|
| 174 |
+
var_name='aspek',
|
| 175 |
+
value_name='sentimen')
|
| 176 |
+
semester_sentiment = df_long.groupby(
|
| 177 |
+
['semester', 'sentimen'], observed=False
|
| 178 |
+
).size().reset_index(name='jumlah')
|
| 179 |
+
semester_sentiment = semester_sentiment.sort_values(
|
| 180 |
+
'jumlah', ascending=False)
|
| 181 |
+
fig = px.bar(semester_sentiment, x='semester', y='jumlah', color='sentimen',
|
| 182 |
+
barmode='group', color_discrete_map=sentimen_palette)
|
| 183 |
+
fig.update_layout(
|
| 184 |
+
title="Distribusi Sentimen Kritik/Saran per Semester")
|
| 185 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def show_sentiment_by_prodi(df, aspek_columns):
|
| 189 |
+
"""Menampilkan distribusi sentimen per program studi."""
|
| 190 |
+
if 'nama_prodi' in df.columns:
|
| 191 |
+
df_long = df.melt(
|
| 192 |
+
id_vars=['nama_prodi'],
|
| 193 |
+
value_vars=aspek_columns,
|
| 194 |
+
var_name='aspek',
|
| 195 |
+
value_name='sentimen'
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
prodi_sentiment = (
|
| 199 |
+
df_long.groupby(['nama_prodi', 'sentimen'], observed=False)
|
| 200 |
+
.size()
|
| 201 |
+
.reset_index(name='jumlah')
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
total_per_prodi = (
|
| 205 |
+
prodi_sentiment.groupby('nama_prodi')['jumlah']
|
| 206 |
+
.sum()
|
| 207 |
+
.sort_values(ascending=False)
|
| 208 |
+
)
|
| 209 |
+
ordered_categories = total_per_prodi.index.tolist()[::-1]
|
| 210 |
+
|
| 211 |
+
prodi_sentiment['nama_prodi'] = pd.Categorical(
|
| 212 |
+
prodi_sentiment['nama_prodi'],
|
| 213 |
+
categories=ordered_categories,
|
| 214 |
+
ordered=True
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
fig = px.bar(
|
| 218 |
+
prodi_sentiment,
|
| 219 |
+
y='nama_prodi',
|
| 220 |
+
x='jumlah',
|
| 221 |
+
color='sentimen',
|
| 222 |
+
barmode='group',
|
| 223 |
+
orientation='h',
|
| 224 |
+
color_discrete_map=sentimen_palette
|
| 225 |
+
)
|
| 226 |
+
fig.update_layout(
|
| 227 |
+
title="Distribusi Sentimen per Program Studi",
|
| 228 |
+
yaxis={'categoryorder': 'array',
|
| 229 |
+
'categoryarray': ordered_categories}
|
| 230 |
+
)
|
| 231 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
def show_sentiment_by_top10_matkul(df, aspek_columns):
|
| 235 |
+
"""Menampilkan distribusi sentimen pada 10 mata kuliah teratas."""
|
| 236 |
+
df_top10 = (
|
| 237 |
+
df.groupby(['kode_matakuliah', 'nama_matakuliah'], observed=False)
|
| 238 |
+
.size()
|
| 239 |
+
.sort_values(ascending=False)
|
| 240 |
+
.head(10)
|
| 241 |
+
.index
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
df_filtered = df[df.set_index(
|
| 245 |
+
['kode_matakuliah', 'nama_matakuliah']).index.isin(df_top10)]
|
| 246 |
+
|
| 247 |
+
df_long = df_filtered.melt(
|
| 248 |
+
id_vars=['kode_matakuliah', 'nama_matakuliah'],
|
| 249 |
+
value_vars=aspek_columns,
|
| 250 |
+
var_name='aspek',
|
| 251 |
+
value_name='sentimen'
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
df_long['label'] = (
|
| 255 |
+
df_long['kode_matakuliah'] + " - " + df_long['nama_matakuliah']
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
matkul_sentiment = (
|
| 259 |
+
df_long.groupby(['label', 'sentimen'], observed=False)
|
| 260 |
+
.size()
|
| 261 |
+
.reset_index(name='jumlah')
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
total_per_label = (
|
| 265 |
+
matkul_sentiment.groupby('label')['jumlah']
|
| 266 |
+
.sum()
|
| 267 |
+
.sort_values(ascending=False)
|
| 268 |
+
)
|
| 269 |
+
ordered_labels = total_per_label.index.tolist()[::-1]
|
| 270 |
+
|
| 271 |
+
matkul_sentiment['label'] = pd.Categorical(
|
| 272 |
+
matkul_sentiment['label'],
|
| 273 |
+
categories=ordered_labels,
|
| 274 |
+
ordered=True
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
fig = px.bar(
|
| 278 |
+
matkul_sentiment,
|
| 279 |
+
y='label',
|
| 280 |
+
x='jumlah',
|
| 281 |
+
color='sentimen',
|
| 282 |
+
barmode='group',
|
| 283 |
+
orientation='h',
|
| 284 |
+
color_discrete_map=sentimen_palette
|
| 285 |
+
)
|
| 286 |
+
fig.update_layout(
|
| 287 |
+
title="Distribusi Sentimen pada Top 10 Mata Kuliah",
|
| 288 |
+
yaxis={'categoryorder': 'array', 'categoryarray': ordered_labels}
|
| 289 |
+
)
|
| 290 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
def show_sentiment_stacked_percentage(df, aspek_columns):
|
| 294 |
+
"""Menampilkan stacked bar chart dengan persentase sentimen per aspek."""
|
| 295 |
+
|
| 296 |
+
if df.empty or not set(aspek_columns).issubset(df.columns):
|
| 297 |
+
st.warning("Data atau kolom aspek tidak tersedia.")
|
| 298 |
+
return
|
| 299 |
+
|
| 300 |
+
df_long = df.melt(
|
| 301 |
+
value_vars=aspek_columns,
|
| 302 |
+
var_name="aspek",
|
| 303 |
+
value_name="sentimen"
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
# Hitung persentase
|
| 307 |
+
count_data = df_long.groupby(
|
| 308 |
+
['aspek', 'sentimen']).size().reset_index(name='jumlah')
|
| 309 |
+
total_per_aspek = count_data.groupby('aspek')['jumlah'].sum().reset_index()
|
| 310 |
+
total_per_aspek.columns = ['aspek', 'total']
|
| 311 |
+
count_data = count_data.merge(total_per_aspek, on='aspek')
|
| 312 |
+
count_data['persentase'] = (
|
| 313 |
+
count_data['jumlah'] / count_data['total']) * 100
|
| 314 |
+
|
| 315 |
+
fig = px.bar(
|
| 316 |
+
count_data,
|
| 317 |
+
x="aspek",
|
| 318 |
+
y="persentase",
|
| 319 |
+
color="sentimen",
|
| 320 |
+
title="Persentase Distribusi Sentimen per Aspek",
|
| 321 |
+
color_discrete_map=sentimen_palette,
|
| 322 |
+
category_orders={
|
| 323 |
+
"sentimen": category_order,
|
| 324 |
+
"aspek": ASPEK_COLUMNS # Add this line to order aspects
|
| 325 |
+
}
|
| 326 |
+
)
|
| 327 |
+
fig.update_layout(
|
| 328 |
+
yaxis_title="Persentase (%)",
|
| 329 |
+
xaxis_title="Aspek"
|
| 330 |
+
)
|
| 331 |
+
st.plotly_chart(fig, use_container_width=True, config=config_options)
|