zdannn2808 commited on
Commit
43aec53
·
0 Parent(s):

first commit

Browse files
Files changed (10) hide show
  1. .gitignore +24 -0
  2. README.md +2 -0
  3. app.py +353 -0
  4. assets/style.css +46 -0
  5. clean_reqs.py +47 -0
  6. config.py +14 -0
  7. model_utils.py +118 -0
  8. preprocessing.py +111 -0
  9. requirements.txt +9 -0
  10. visualization.py +331 -0
.gitignore ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python cache
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+
7
+ # Streamlit cache
8
+ .streamlit/
9
+ .cache/
10
+ chache_file/
11
+
12
+ # Model files
13
+ assets/model/
14
+ *.pth
15
+ *.pt
16
+ *.joblib
17
+ *.bin
18
+ *.safetensors
19
+
20
+ # Jupyter Notebook checkpoints
21
+ .ipynb_checkpoints/
22
+
23
+ # Logs
24
+ *.log
README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # absa-indobert-web
2
+ Analisis otomatis kritik dan saran berdasarkan aspek tertentu dan sentimen dengan model IndoBERT.
app.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py
3
+ ------
4
+ Aplikasi berbasis web ABSA IndoBERT menggunakan Streamlit untuk analisis sentimen
5
+ berbasis aspek dari kritik dan saran mahasiswa.
6
+ """
7
+ import os
8
+ import time
9
+ from io import BytesIO
10
+ import pandas as pd
11
+ import streamlit as st
12
+ from config import CONFIG, ASPEK_COLUMNS
13
+ from model_utils import load_model_and_tokenizer, predict_multi_aspect
14
+ from visualization import (
15
+ show_sentiment_bar_chart,
16
+ show_sentiment_pie_chart,
17
+ show_year_distribution,
18
+ show_semester_distribution,
19
+ show_prodi_distribution,
20
+ show_top10_matkul_distribution,
21
+ show_sentiment_by_year,
22
+ show_sentiment_by_semester,
23
+ show_sentiment_by_prodi,
24
+ show_sentiment_by_top10_matkul,
25
+ show_sentiment_stacked_percentage,
26
+ )
27
+ from preprocessing import text_preprocessing_pipeline
28
+
29
+ os.makedirs("chache_file", exist_ok=True)
30
+ # # 🔒 Opsional: Bersihkan cache hasil prediksi saat aplikasi dimulai ulang
31
+ # if os.path.exists("chache_file/temp_predicted.csv"):
32
+ # os.remove("chache_file/temp_predicted.csv")
33
+
34
+ # Konfigurasi halaman
35
+ st.set_page_config(
36
+ page_title="ABSA IndoBERT",
37
+ layout="wide",
38
+ page_icon="💬"
39
+ )
40
+
41
+ # Load custom CSS
42
+ with open(os.path.join("assets", "style.css"), encoding="utf-8") as f:
43
+ st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
44
+
45
+
46
+ # Fungsi cache untuk loading model
47
+ @st.cache_resource(show_spinner=False)
48
+ def get_model_resources():
49
+ """Memuat model dan tokenizer IndoBERT."""
50
+ return load_model_and_tokenizer()
51
+
52
+
53
+ # Spinner loading model
54
+ with st.spinner("Sedang memuat model IndoBERT dan tokenizer... Harap tunggu sebentar!"):
55
+ model, tokenizer, le, device = get_model_resources()
56
+
57
+ success_placeholder = st.empty()
58
+ success_placeholder.success("✅ Model dan tokenizer berhasil dimuat!")
59
+ time.sleep(1)
60
+ success_placeholder.empty()
61
+
62
+
63
+ def convert_df_to_excel(df):
64
+ """Mengubah DataFrame menjadi file Excel dalam bentuk byte stream."""
65
+ output = BytesIO()
66
+ with pd.ExcelWriter(output, engine="openpyxl") as writer:
67
+ df.to_excel(writer, index=False)
68
+ return output.getvalue()
69
+
70
+
71
+ # Judul aplikasi
72
+ st.markdown("""
73
+ <h1 class='title-center'>💬 ABSA IndoBERT</h1>
74
+ <p style='text-align: center; font-size: 16px;'>
75
+ Analisis otomatis kritik dan saran berdasarkan aspek tertentu dan sentimen dengan model IndoBERT.
76
+ </p>
77
+ """, unsafe_allow_html=True)
78
+
79
+ # Upload file
80
+ st.markdown("## 🧾 Unggah File Data")
81
+ col1, col2 = st.columns([5, 1])
82
+ with col1:
83
+ uploaded_file = st.file_uploader(" ", type=["xlsx"])
84
+
85
+ if os.path.exists("chache_file/temp_predicted.csv"):
86
+ if st.button("🧹 Hapus Cache Prediksi"):
87
+ os.remove("chache_file/temp_predicted.csv")
88
+ st.markdown("""
89
+ <span style="color: green;">✅ Cache berhasil dihapus. Silahkan Refresh!</span>
90
+ <script>
91
+ setTimeout(function() {
92
+ window.location.reload();
93
+ }, 100);
94
+ </script>
95
+ """, unsafe_allow_html=True)
96
+
97
+ with col2:
98
+ st.markdown("")
99
+ st.markdown("")
100
+ with st.expander("📘 Petunjuk Penggunaan"):
101
+ st.markdown("""
102
+ - Unggah file `.xlsx` dengan kolom `kritik_saran`
103
+ - Sistem akan melakukan _preprocessing_ dan prediksi otomatis
104
+ - Hasil akhir dapat diunduh dalam bentuk file Excel dan divisualisasikan
105
+ """)
106
+
107
+ # Inisialisasi session state
108
+ if "df_predicted" not in st.session_state:
109
+ st.session_state.df_predicted = None
110
+
111
+ # ✅ Jika belum ada df_predicted tapi ada file cache, load dari file
112
+ if st.session_state.df_predicted is None and os.path.exists("chache_file/temp_predicted.csv"):
113
+ try:
114
+ df_cached = pd.read_csv("chache_file/temp_predicted.csv")
115
+ # Konversi kolom tahun ke integer jika ada
116
+ if "tahun" in df_cached.columns:
117
+ df_cached["tahun"] = pd.to_numeric(
118
+ df_cached["tahun"], errors='coerce').astype('Int64')
119
+ st.session_state.df_predicted = df_cached
120
+ except (pd.errors.ParserError, FileNotFoundError) as e:
121
+ st.warning(f"Gagal memuat cache: {e}")
122
+
123
+
124
+ # Proses prediksi
125
+ if uploaded_file:
126
+ file_bytes = uploaded_file.getvalue()
127
+ if "last_uploaded_file" not in st.session_state or st.session_state.last_uploaded_file != file_bytes:
128
+ st.session_state.last_uploaded_file = file_bytes
129
+ try:
130
+ df_uploaded = pd.read_excel(BytesIO(file_bytes))
131
+
132
+ # Konversi kolom tahun ke integer jika ada
133
+ if "tahun" in df_uploaded.columns:
134
+ df_uploaded["tahun"] = pd.to_numeric(
135
+ df_uploaded["tahun"], errors='coerce').astype('Int64')
136
+
137
+ except ValueError as err:
138
+ st.error(f"❌ Gagal membaca file: {err}")
139
+ else:
140
+ if "kritik_saran" not in df_uploaded.columns:
141
+ st.error("❌ Kolom 'kritik_saran' tidak ditemukan.")
142
+ else:
143
+ df_uploaded = df_uploaded.drop_duplicates(
144
+ subset=["kritik_saran"])
145
+ for aspek in ASPEK_COLUMNS:
146
+ if aspek not in df_uploaded.columns:
147
+ df_uploaded[aspek] = None
148
+
149
+ st.markdown("## ⚙️ Preprocessing dan Prediksi")
150
+ progress = st.progress(1, text="Menyiapkan...")
151
+
152
+ pred_results = []
153
+ with st.spinner("Sedang memproses data..."):
154
+ for i, (_, row) in enumerate(df_uploaded.iterrows()):
155
+ cleaned_text = text_preprocessing_pipeline(
156
+ str(row["kritik_saran"]))
157
+ hasil = predict_multi_aspect(
158
+ model, tokenizer, cleaned_text, ASPEK_COLUMNS, le, device, CONFIG[
159
+ "max_len"]
160
+ )
161
+ result_row = row.to_dict()
162
+ result_row["kritik_saran"] = cleaned_text
163
+ result_row.update(hasil)
164
+ pred_results.append(result_row)
165
+ progress.progress((i + 1) / len(df_uploaded),
166
+ text=f"Memproses baris ke-{i + 1} dari {len(df_uploaded)}")
167
+ # ✅ Simpan ke session dan file sementara
168
+ df_session = pd.DataFrame(pred_results)
169
+ st.session_state.df_predicted = df_session
170
+ df_session.to_csv(
171
+ "chache_file/temp_predicted.csv", index=False)
172
+ st.success("✅ Preprocessing & Prediksi selesai!")
173
+
174
+ # Setelah prediksi selesai
175
+ if st.session_state.df_predicted is not None:
176
+ df_predicted = st.session_state.df_predicted
177
+
178
+ # Sidebar filter - Pindahkan ke atas sebelum menampilkan tabel
179
+ st.sidebar.header("🔍 Filter Data")
180
+
181
+ # Pastikan kolom yang difilter ada dan bersihkan nilai NaN
182
+ df_clean = df_predicted.copy()
183
+
184
+ # Filter Mata Kuliah
185
+ if "nama_matakuliah" in df_clean.columns:
186
+ matkul_options = sorted(
187
+ [x for x in df_clean["nama_matakuliah"].dropna().unique() if x])
188
+ selected_matkul = st.sidebar.multiselect(
189
+ "Nama Mata Kuliah", matkul_options, default=matkul_options)
190
+ else:
191
+ selected_matkul = []
192
+
193
+ # Filter Program Studi
194
+ if "nama_prodi" in df_clean.columns:
195
+ prodi_options = sorted(
196
+ [x for x in df_clean["nama_prodi"].dropna().unique() if x])
197
+ selected_prodi = st.sidebar.multiselect(
198
+ "Program Studi", prodi_options, default=prodi_options)
199
+ else:
200
+ selected_prodi = []
201
+
202
+ # Filter Tahun
203
+ if "tahun" in df_clean.columns:
204
+ tahun_options = sorted(
205
+ [x for x in df_clean["tahun"].dropna().unique() if pd.notna(x)])
206
+ selected_tahun = st.sidebar.multiselect(
207
+ "Tahun", tahun_options, default=tahun_options)
208
+ else:
209
+ selected_tahun = []
210
+
211
+ # Filter Semester
212
+ if "semester" in df_clean.columns:
213
+ semester_options = sorted(
214
+ [x for x in df_clean["semester"].dropna().unique() if pd.notna(x)])
215
+ selected_semester = st.sidebar.multiselect(
216
+ "Semester", semester_options, default=semester_options)
217
+ else:
218
+ selected_semester = []
219
+
220
+ # Apply filters dengan pengecekan kolom yang ada
221
+ df_filtered = df_clean.copy()
222
+
223
+ if selected_matkul and "nama_matakuliah" in df_filtered.columns:
224
+ df_filtered = df_filtered[df_filtered["nama_matakuliah"].isin(
225
+ selected_matkul)]
226
+
227
+ if selected_prodi and "nama_prodi" in df_filtered.columns:
228
+ df_filtered = df_filtered[df_filtered["nama_prodi"].isin(
229
+ selected_prodi)]
230
+
231
+ if selected_tahun and "tahun" in df_filtered.columns:
232
+ df_filtered = df_filtered[df_filtered["tahun"].isin(selected_tahun)]
233
+
234
+ if selected_semester and "semester" in df_filtered.columns:
235
+ df_filtered = df_filtered[df_filtered["semester"].isin(
236
+ selected_semester)]
237
+
238
+ # Tampilkan tabel hasil prediksi yang sudah difilter
239
+ st.markdown("### 📄 Tabel Hasil Prediksi")
240
+ st.dataframe(df_filtered, width="stretch")
241
+
242
+ # Download buttons dalam satu kolom
243
+ st.download_button(
244
+ label="⬇️ Unduh Hasil Excel (Data Terfilter)",
245
+ data=convert_df_to_excel(df_filtered),
246
+ file_name="hasil_prediksi_absa_filtered.xlsx",
247
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
248
+ )
249
+
250
+ st.download_button(
251
+ label="⬇️ Unduh Semua Data Excel",
252
+ data=convert_df_to_excel(df_predicted),
253
+ file_name="hasil_prediksi_absa_all.xlsx",
254
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
255
+ )
256
+
257
+ st.info(
258
+ f"Menampilkan {len(df_filtered)} dari {len(df_predicted)} data ulasan setelah difilter."
259
+ )
260
+
261
+ # Ringkasan Cepat - menggunakan data yang sudah difilter
262
+ st.markdown("### 📌 Ringkasan Cepat")
263
+
264
+ total_pos = (df_filtered[ASPEK_COLUMNS] == "positif").sum().sum()
265
+ total_net = (df_filtered[ASPEK_COLUMNS] == "netral").sum().sum()
266
+ total_neg = (df_filtered[ASPEK_COLUMNS] == "negatif").sum().sum()
267
+
268
+ col1, col2, col3, col4, col5 = st.columns(5)
269
+ col1.metric("Jumlah Ulasan", len(df_filtered))
270
+ col2.metric("Jumlah Aspek", len(ASPEK_COLUMNS))
271
+
272
+ # Tambahkan pengecekan untuk kolom yang mungkin tidak ada
273
+ matkul_count = df_filtered['nama_matakuliah'].nunique(
274
+ ) if 'nama_matakuliah' in df_filtered.columns else 0
275
+ prodi_count = df_filtered['nama_prodi'].nunique(
276
+ ) if 'nama_prodi' in df_filtered.columns else 0
277
+ semester_count = df_filtered['semester'].nunique(
278
+ ) if 'semester' in df_filtered.columns else 0
279
+
280
+ col3.metric("Jumlah Mata Kuliah", matkul_count)
281
+ col4.metric("Jumlah Prodi", prodi_count)
282
+ col5.metric("Jumlah Semester", semester_count)
283
+
284
+ col6, col7, col8, col9, col10 = st.columns(5)
285
+ col6.metric("Sentimen Positif", f"{total_pos}")
286
+ col7.metric("Sentimen Netral", f"{total_net}")
287
+ col8.metric("Sentimen Negatif", f"{total_neg}")
288
+
289
+ # Tampilkan rentang tahun jika kolom tahun ada
290
+ if 'tahun' in df_filtered.columns and len(df_filtered) > 0:
291
+ tahun_min = int(df_filtered['tahun'].min())
292
+ tahun_max = int(df_filtered['tahun'].max())
293
+ col9.metric("Rentang Tahun", f"{tahun_min} - {tahun_max}")
294
+ else:
295
+ col9.metric("Rentang Tahun", "N/A")
296
+
297
+ # Tambahkan rata-rata panjang kata dalam kritik saran
298
+ if len(df_filtered) > 0 and 'kritik_saran' in df_filtered.columns:
299
+ word_counts = df_filtered['kritik_saran'].astype(
300
+ str).str.split().str.len()
301
+ avg_word_count = round(word_counts.mean(), 1)
302
+ col10.metric("Rata-rata Panjang Kata", f"{avg_word_count} kata")
303
+ else:
304
+ col10.metric("Rata-rata Panjang Kata", "0 kata")
305
+
306
+ st.markdown("---")
307
+ st.markdown("### 📊 Visualisasi Data")
308
+
309
+ col1, col2 = st.columns(2)
310
+ with col1:
311
+ show_sentiment_bar_chart(df_filtered, ASPEK_COLUMNS)
312
+ with col2:
313
+ show_sentiment_pie_chart(df_filtered, ASPEK_COLUMNS)
314
+
315
+ col1, col2 = st.columns(2)
316
+ with col1:
317
+ show_year_distribution(df_filtered)
318
+ with col2:
319
+ show_semester_distribution(df_filtered)
320
+
321
+ st.markdown("---")
322
+ show_prodi_distribution(df_filtered)
323
+
324
+ st.markdown("---")
325
+ show_top10_matkul_distribution(df_filtered)
326
+
327
+ st.markdown("---")
328
+ col1, col2 = st.columns(2)
329
+ with col1:
330
+ show_sentiment_by_year(df_filtered, ASPEK_COLUMNS)
331
+ with col2:
332
+ show_sentiment_by_semester(df_filtered, ASPEK_COLUMNS)
333
+
334
+ st.markdown("---")
335
+ show_sentiment_by_prodi(df_filtered, ASPEK_COLUMNS)
336
+
337
+ st.markdown("---")
338
+ show_sentiment_by_top10_matkul(df_filtered, ASPEK_COLUMNS)
339
+
340
+ st.markdown("---")
341
+ show_sentiment_stacked_percentage(df_filtered, ASPEK_COLUMNS)
342
+
343
+ #
344
+
345
+ else:
346
+ st.info("ℹ️ Silakan unggah file Excel untuk memulai proses")
347
+
348
+ # Footer
349
+ st.markdown("""
350
+ <div class='footer'>
351
+ © 2025 Darmawan Jiddan – ABSA IndoBERT | Dibuat dengan ❤️ menggunakan Streamlit
352
+ </div>
353
+ """, unsafe_allow_html=True)
assets/style.css ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Gaya Umum untuk Light & Dark Mode */
2
+ /* .block-container {
3
+ padding-top: 2rem;
4
+ } */
5
+
6
+ /* Heading */
7
+ h1, h2, h3, h4, h5, h6, p,
8
+ .stMarkdown h2,
9
+ .stMarkdown h3 {
10
+ color: var(--text-color);
11
+ }
12
+
13
+ /* Ukuran font tabel */
14
+ .stDataFrame div {
15
+ font-size: 14px;
16
+ }
17
+
18
+ /* Background elemen tertentu */
19
+ .css-1cpxqw2 {
20
+ background-color: var(--secondary-background-color);
21
+ }
22
+
23
+ /* Teks kecil */
24
+ .small-text {
25
+ font-size: 0.9em;
26
+ color: var(--text-color-secondary);
27
+ }
28
+
29
+ /* Judul utama di tengah */
30
+ h1.title-center {
31
+ text-align: center;
32
+ margin-bottom: 10px;
33
+ }
34
+
35
+ .footer {
36
+ position: relative;
37
+ bottom: 0;
38
+ width: 100%;
39
+ padding: 10px 0;
40
+ background-color: var(--background-color);
41
+ color: var(--text-color);
42
+ text-align: center;
43
+ font-size: 14px;
44
+ border-top: 1px solid var(--secondary-background-color);
45
+ margin-top: 50px;
46
+ }
clean_reqs.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Clean_reqs.py
3
+ -------------
4
+ Script untuk membersihkan file requirements.txt dengan cara:
5
+ - Menghapus versi duplikat paket.
6
+ - Menyimpan versi terbaru untuk setiap paket.
7
+ """
8
+
9
+ import re
10
+ from packaging import version
11
+
12
+
13
+ def clean_requirements(file_path="requirements.txt"):
14
+ """
15
+ Membersihkan file requirements.txt.
16
+
17
+ Args:
18
+ file_path (str): Path ke file requirements.txt yang akan dibersihkan.
19
+
20
+ Proses:
21
+ - Melewatkan komentar dan baris kosong.
22
+ - Menyimpan versi terbaru untuk setiap paket.
23
+ - Menulis ulang file requirements.txt yang sudah dibersihkan.
24
+ """
25
+ packages = {}
26
+
27
+ with open(file_path, "r", encoding="utf-8") as f:
28
+ for line in f:
29
+ line = line.strip()
30
+ if not line or line.startswith("#"):
31
+ continue
32
+
33
+ match = re.match(r"([a-zA-Z0-9_\-]+)==(.+)", line)
34
+ if match:
35
+ name, ver = match.groups()
36
+ if name not in packages or version.parse(ver) > version.parse(packages[name]):
37
+ packages[name] = ver
38
+
39
+ with open(file_path, "w", encoding="utf-8") as f:
40
+ for name, ver in sorted(packages.items()):
41
+ f.write(f"{name}=={ver}\n")
42
+
43
+ print(f"✅ Cleaned requirements saved to {file_path}")
44
+
45
+
46
+ if __name__ == "__main__":
47
+ clean_requirements()
config.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ config.py
3
+ ---------
4
+ Berisi konfigurasi utama untuk model ABSA (Aspect-Based Sentiment Analysis)
5
+ menggunakan IndoBERT, termasuk parameter model dan daftar kolom aspek.
6
+ """
7
+
8
+ CONFIG = {
9
+ "model_name": "indobenchmark/indobert-base-p1",
10
+ "dropout_rate": 0.3,
11
+ "max_len": 128
12
+ }
13
+
14
+ ASPEK_COLUMNS = ["tugas", "pengajaran", "materi", "metode", "interaksi"]
model_utils.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ model_utils.py
3
+ --------------
4
+ Berisi utilitas untuk memuat model IndoBERT ABSA, tokenizer, dan label encoder,
5
+ serta fungsi untuk melakukan prediksi multi-aspek.
6
+ """
7
+
8
+ import os
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+ import joblib
13
+ from transformers import AutoModel, AutoTokenizer
14
+ from config import CONFIG
15
+
16
+
17
+ def load_model_and_tokenizer():
18
+ """
19
+ Memuat model IndoBERT ABSA, tokenizer, dan label encoder.
20
+
21
+ Returns:
22
+ model (nn.Module): Model ABSA yang sudah diload.
23
+ tokenizer (AutoTokenizer): Tokenizer untuk IndoBERT.
24
+ label_encoder (LabelEncoder): Encoder untuk label sentimen.
25
+ device (torch.device): Device (cuda/cpu) yang digunakan.
26
+ """
27
+ tokenizer = AutoTokenizer.from_pretrained(CONFIG["model_name"])
28
+
29
+ base_path = os.path.abspath(os.path.dirname(__file__))
30
+ model_dir = os.path.join(base_path, "assets", "model")
31
+ model_path = os.path.join(model_dir, "indobert_absa_model.pth")
32
+ label_path = os.path.join(model_dir, "label_encoder.joblib")
33
+
34
+ if not os.path.exists(model_path) or not os.path.exists(label_path):
35
+ raise FileNotFoundError("Model atau Label Encoder tidak ditemukan.")
36
+
37
+ label_encoder = joblib.load(label_path)
38
+
39
+ class IndoBertForABSA(nn.Module):
40
+ """
41
+ Model klasifikasi aspek berbasis IndoBERT untuk ABSA.
42
+ """
43
+
44
+ def __init__(self, num_labels):
45
+ super().__init__()
46
+ self.bert = AutoModel.from_pretrained(
47
+ CONFIG["model_name"], trust_remote_code=True, use_safetensors=True
48
+ )
49
+ self.norm = nn.LayerNorm(self.bert.config.hidden_size)
50
+ self.dropout = nn.Dropout(CONFIG["dropout_rate"])
51
+ self.classifier = nn.Linear(
52
+ self.bert.config.hidden_size, num_labels)
53
+
54
+ def forward(self, input_ids, attention_mask):
55
+ """
56
+ Forward pass untuk model ABSA.
57
+
58
+ Args:
59
+ input_ids (torch.Tensor): Tensor input token IDs.
60
+ attention_mask (torch.Tensor): Tensor mask perhatian.
61
+
62
+ Returns:
63
+ torch.Tensor: Logit prediksi.
64
+ """
65
+ output = self.bert(input_ids=input_ids,
66
+ attention_mask=attention_mask)
67
+ pooled = output.pooler_output
68
+ normed = self.norm(pooled)
69
+ dropped = self.dropout(normed)
70
+ return self.classifier(dropped)
71
+
72
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
73
+ model = IndoBertForABSA(num_labels=len(label_encoder.classes_))
74
+ model.load_state_dict(torch.load(model_path, map_location=device))
75
+ model.to(device)
76
+ model.eval()
77
+
78
+ return model, tokenizer, label_encoder, device
79
+
80
+
81
+ def predict_multi_aspect(model, tokenizer, sentence, aspek_list, label_encoder, device, max_len):
82
+ """
83
+ Melakukan prediksi sentimen untuk setiap aspek pada satu kalimat.
84
+
85
+ Args:
86
+ model (nn.Module): Model ABSA yang sudah diload.
87
+ tokenizer (AutoTokenizer): Tokenizer IndoBERT.
88
+ sentence (str): Kalimat input.
89
+ aspek_list (list): Daftar aspek yang ingin diprediksi.
90
+ label_encoder (LabelEncoder): Encoder label.
91
+ device (torch.device): Device (cuda/cpu).
92
+ max_len (int): Panjang maksimum token.
93
+
94
+ Returns:
95
+ dict: Hasil prediksi berupa {aspek: label_sentimen}.
96
+ """
97
+ results = {}
98
+ for aspek in aspek_list:
99
+ combined = f"[ASPEK] {aspek} [TEXT] {sentence}"
100
+ encoded = tokenizer.encode_plus(
101
+ combined,
102
+ add_special_tokens=True,
103
+ padding="max_length",
104
+ max_length=max_len,
105
+ truncation=True,
106
+ return_attention_mask=True,
107
+ return_tensors="pt",
108
+ )
109
+ input_ids = encoded["input_ids"].to(device)
110
+ attention_mask = encoded["attention_mask"].to(device)
111
+
112
+ with torch.no_grad():
113
+ outputs = model(input_ids, attention_mask)
114
+ probs = F.softmax(outputs, dim=1).squeeze()
115
+ idx = torch.argmax(probs).item()
116
+ label = label_encoder.inverse_transform([idx])[0]
117
+ results[aspek] = label
118
+ return results
preprocessing.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ preprocessing.py
3
+ ----------------
4
+ Modul preprocessing teks untuk ABSA IndoBERT, termasuk pembersihan teks,
5
+ normalisasi slang, tokenisasi, dan penghapusan emoji.
6
+ """
7
+
8
+ import re
9
+ import os
10
+ import nltk
11
+ from nltk.tokenize import word_tokenize
12
+ from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
13
+
14
+ # Tentukan folder NLTK di project atau environment
15
+ NLTK_DATA_DIR = os.path.join(os.path.dirname(__file__), "nltk_data")
16
+ os.makedirs(NLTK_DATA_DIR, exist_ok=True)
17
+
18
+ # Tambahkan path NLTK supaya bisa menemukan data
19
+ nltk.data.path.append(NLTK_DATA_DIR)
20
+
21
+ # Download punkt jika belum ada
22
+ try:
23
+ nltk.data.find("tokenizers/punkt")
24
+ except LookupError:
25
+ nltk.download("punkt", download_dir=NLTK_DATA_DIR)
26
+
27
+ # Stemmer Sastrawi
28
+ stemmer = StemmerFactory().create_stemmer()
29
+
30
+ slang_dict = {
31
+ 'sy': 'saya', 'aku': 'saya', 'aq': 'saya', 'gua': 'saya', 'gue': 'saya',
32
+ 'km': 'kamu', 'kmu': 'kamu', 'lu': 'kamu', 'loe': 'kamu', 'lo': 'kamu',
33
+ 'gk': 'tidak', 'ga': 'tidak', 'nggak': 'tidak', 'tdk': 'tidak', 'enggak': 'tidak', 'g': 'tidak',
34
+ 'bgt': 'banget', 'bangt': 'banget', 'bngt': 'banget',
35
+ 'bnyk': 'banyak', 'smw': 'semua', 'bbrp': 'beberapa',
36
+ 'udh': 'sudah', 'sdh': 'sudah', 'dah': 'sudah', 'td': 'tadi',
37
+ 'bsk': 'besok', 'kmrn': 'kemarin', 'skrng': 'sekarang', 'skrg': 'sekarang', 'nnti': 'nanti',
38
+ 'mlm': 'malam', 'pgi': 'pagi',
39
+ 'jg': 'juga', 'aja': 'saja', 'aj': 'saja', 'jd': 'jadi', 'lg': 'lagi', 'lgi': 'lagi',
40
+ 'tp': 'tapi', 'tpi': 'tapi', 'tpnya': 'tapi', 'trs': 'terus', 'trus': 'terus', 'trsnya': 'terusnya',
41
+ 'krn': 'karena', 'karana': 'karena', 'utk': 'untuk', 'bsa': 'bisa',
42
+ 'dr': 'dari', 'dpn': 'depan', 'blkg': 'belakang', 'dkt': 'dekat',
43
+ 'ngajar': 'mengajar', 'ngasih': 'memberi', 'ngerti': 'mengerti', 'ngumpul': 'mengumpulkan', 'ngulang': 'mengulang',
44
+ 'bikin': 'membuat', 'ajar': 'mengajar', 'ajarkan': 'mengajarkan', 'diajar': 'diajarkan', 'mengampu': 'mengajar', 'diampu': 'diajarkan',
45
+ 'dosen2': 'dosen', 'mhs': 'mahasiswa', 'mhsw': 'mahasiswa', 'nilai2': 'nilai', 'mantul': 'mantap betul',
46
+ 'ny': 'nya', 'gitu': 'seperti itu', 'bgtu': 'begitu',
47
+ 'sm': 'sama', 'dtg': 'datang', 'mnt': 'menit', 'cepet': 'cepat', 'cpt': 'cepat',
48
+ 'kayak': 'seperti', 'kyk': 'seperti', 'dpt': 'dapat', 'masingmasing': 'masing-masing',
49
+ 'terimakasih': 'terima kasih', 'terimaksih': 'terima kasih', 'terimakasi': 'terima kasih', 'makasi': 'terima kasih', 'kasih': 'terima kasih',
50
+ 'sebaiknya': 'sebaik nya', 'akan tetapi': 'tetapi',
51
+ 'diperjelas': 'dijelaskan', 'diperbaiki': 'dibetulkan',
52
+ 'diparkan': 'dipaparkan', 'refrensi': 'referensi',
53
+ 'pemblajaran': 'pembelajaran', 'pemebelajaran': 'pembelajaran', 'pembelajran': 'pembelajaran', 'pembelajara': 'pembelajaran', 'palajaran': 'pelajaran',
54
+ 'pendikan': 'pendidikan', 'matkul': 'matakuliah',
55
+ 'ofline': 'offline', 'dapatkan': 'diperoleh', 'tatap': 'tatap muka',
56
+ 'kouta': 'kuota', 'sekira': 'kira-kira', 'selow': 'santai', 'penyampain': 'penyampaian',
57
+ 'membing': 'membimbing', 'memaksimalkan': 'maksimal',
58
+ 'tap': 'tatap', 'wasan': 'wawasan', 'pelan': 'lambat', 'iya': 'ya', 'ijin': 'izin',
59
+ 'seperti itu': '', 'dik': '', 'mpk': '', 'pu': '', 'ta': '', 'the': '',
60
+ 'mendik': 'mendidik', 'efesien': 'efisien', 'menuhi': 'memenuhi', 'tep': 'tepat',
61
+ 'pendik': 'pendidikan', 'semster': 'semester', 'vidio': 'video',
62
+ 'asinkronus': 'asyncronous', 'sinkronus': 'syncronous', 'mahasiswai': 'mahasiswa',
63
+ 'perkulihan': 'perkuliahan', 'kedepa': 'kedepan', 'projek': 'proyek', 'bertap': 'bertatap',
64
+ 'perkuliah': 'perkuliahan', 'bajk': 'baik', 'diskus': 'diskusi', 'praktek': 'praktik',
65
+ }
66
+
67
+ pattern_slang = re.compile(r'\b(' + '|'.join(re.escape(k) for k in slang_dict) + r')\b', flags=re.IGNORECASE) # noqa: C0301, C0201
68
+
69
+
70
+ def cleaning_text(text: str) -> str: # noqa: C0103
71
+ """Membersihkan teks: HTML, simbol, karakter aneh, dan rapikan tanda baca.""" # noqa: C0116
72
+ if not isinstance(text, str):
73
+ return ''
74
+ text = re.sub(r'<[^>]+>', '', text)
75
+ text = re.sub(r'&gt|&lt', ' ', text)
76
+ text = re.sub(r'([a-z])([A-Z])', r'\1. \2', text)
77
+ text = text.replace('\n', ' ')
78
+ text = re.sub(r'[^a-zA-Z0-9\s.,!?\-:;\'"()]', '', text)
79
+ text = re.sub(r'([!?.,])\1{2,}', r'\1\1', text)
80
+ text = re.sub(r'([.!?])(\w)', r'\1 \2', text)
81
+ text = re.sub(r'\s+', ' ', text).strip()
82
+ return text
83
+
84
+
85
+ def remove_emoji(text):
86
+ """Hapus emoji dari teks.""" # noqa: C0116
87
+ emoji_pattern = re.compile(
88
+ "["
89
+ u"\U0001F600-\U0001F64F"
90
+ u"\U0001F300-\U0001F5FF"
91
+ u"\U0001F680-\U0001F6FF"
92
+ u"\U0001F1E0-\U0001F1FF"
93
+ u"\U00002702-\U000027B0"
94
+ u"\U000024C2-\U0001F251"
95
+ "]+", flags=re.UNICODE
96
+ )
97
+ return emoji_pattern.sub(r'', text)
98
+
99
+
100
+ def normalize_text(tokens): # noqa: C0103
101
+ """Normalisasi kata berdasarkan kamus slang.""" # noqa: C0116
102
+ return [slang_dict.get(w.lower(), w.lower()) for w in tokens]
103
+
104
+
105
+ def text_preprocessing_pipeline(text):
106
+ """Pipeline preprocessing lengkap: cleaning, emoji removal, tokenisasi, normalisasi.""" # noqa: C0116
107
+ text = cleaning_text(text) # noqa: C0103
108
+ text = remove_emoji(text)
109
+ tokens = word_tokenize(text)
110
+ tokens = normalize_text(tokens) # noqa: C0103
111
+ return " ".join(tokens)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ joblib==1.5.2
2
+ nltk==3.9.1
3
+ packaging==25.0
4
+ pandas==2.3.2
5
+ plotly==6.3.0
6
+ Sastrawi==1.0.1
7
+ streamlit==1.50.0
8
+ torch==2.7.1+cu118
9
+ transformers==4.56.2
visualization.py ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Modul visualization.py
3
+ ----------------------
4
+ Berisi fungsi-fungsi untuk menampilkan berbagai visualisasi data kritik dan saran
5
+ dalam bentuk bar chart, pie chart, serta distribusi berdasarkan tahun, semester,
6
+ program studi, dan mata kuliah menggunakan Streamlit & Plotly.
7
+ """
8
+
9
+ import streamlit as st
10
+ import pandas as pd
11
+ import plotly.express as px
12
+ from config import ASPEK_COLUMNS
13
+
14
+
15
+ # Palet warna kustom
16
+ sentimen_palette = {
17
+ "netral": "#FFE24C",
18
+ "positif": "#4CFF72",
19
+ "negatif": "#FF4C4C"
20
+ }
21
+ category_order = ["netral", "positif", "negatif"]
22
+
23
+ # Konfigurasi Plotly
24
+ config_options = {
25
+ "scrollZoom": False,
26
+ "displayModeBar": False
27
+ }
28
+
29
+
30
+ def show_sentiment_bar_chart(df_predicted, aspek_columns):
31
+ """Menampilkan bar chart distribusi sentimen per aspek."""
32
+ if df_predicted.empty or not set(aspek_columns).issubset(df_predicted.columns):
33
+ st.warning("Data atau kolom aspek tidak tersedia untuk ditampilkan.")
34
+ return
35
+
36
+ df_long = df_predicted.melt(
37
+ value_vars=aspek_columns,
38
+ var_name="aspek",
39
+ value_name="sentimen"
40
+ )
41
+ df_long["sentimen"] = pd.Categorical(
42
+ df_long["sentimen"],
43
+ categories=category_order,
44
+ ordered=True
45
+ )
46
+ count_data = df_long.groupby(
47
+ ["aspek", "sentimen"], observed=False
48
+ ).size().reset_index(name="jumlah")
49
+ fig = px.bar(
50
+ count_data,
51
+ x="aspek",
52
+ y="jumlah",
53
+ color="sentimen",
54
+ barmode="group",
55
+ color_discrete_map=sentimen_palette,
56
+ category_orders={"sentimen": category_order}
57
+ )
58
+ fig.update_layout(title="Distribusi Sentimen per Aspek")
59
+ st.plotly_chart(fig, use_container_width=True, config=config_options)
60
+
61
+
62
+ def show_sentiment_pie_chart(df_predicted, aspek_columns):
63
+ """Menampilkan pie chart distribusi total sentimen."""
64
+ sentimen_total = df_predicted[aspek_columns].values.ravel()
65
+ sentimen_counts = pd.Series(sentimen_total).value_counts().reset_index()
66
+ sentimen_counts.columns = ["sentimen", "jumlah"]
67
+ sentimen_counts = sentimen_counts.sort_values("jumlah", ascending=False)
68
+ fig = px.pie(sentimen_counts, names="sentimen", values="jumlah",
69
+ color="sentimen", color_discrete_map=sentimen_palette,
70
+ hole=0.3)
71
+ fig.update_layout(title="Total Komposisi Sentimen")
72
+ fig.update_traces(textposition='inside', textinfo='percent+label')
73
+ st.plotly_chart(fig, use_container_width=True, config=config_options)
74
+
75
+
76
+ def show_year_distribution(df):
77
+ """Menampilkan distribusi jumlah kritik/saran per tahun."""
78
+ if 'tanggal' in df.columns:
79
+ df['tahun'] = pd.to_datetime(df['tanggal'], errors='coerce').dt.year
80
+
81
+ if 'tahun' in df.columns:
82
+ df_tahun = df.dropna(subset=['tahun']).copy()
83
+ df_tahun['tahun'] = df_tahun['tahun'].astype(int)
84
+
85
+ year_counts = df_tahun['tahun'].value_counts().reset_index()
86
+ year_counts.columns = ['tahun', 'jumlah']
87
+ year_counts = year_counts.sort_values('jumlah', ascending=False)
88
+
89
+ fig = px.bar(year_counts, x='tahun', y='jumlah',
90
+ color='tahun', title="Distribusi Kritik/Saran per Tahun")
91
+ fig.update_layout(xaxis=dict(type='category'))
92
+ st.plotly_chart(fig, use_container_width=True, config=config_options)
93
+
94
+
95
+ def show_semester_distribution(df):
96
+ """Menampilkan distribusi jumlah kritik/saran per semester."""
97
+ if 'semester' in df.columns:
98
+ semester_counts = df['semester'].value_counts().reset_index()
99
+ semester_counts.columns = ['semester', 'jumlah']
100
+ semester_counts = semester_counts.sort_values(
101
+ 'jumlah', ascending=False)
102
+ fig = px.bar(semester_counts, x='semester', y='jumlah',
103
+ color='semester', title="Distribusi Kritik/Saran per Semester")
104
+ fig.update_layout(xaxis=dict(categoryorder='total descending'))
105
+ st.plotly_chart(fig, use_container_width=True, config=config_options)
106
+
107
+
108
+ def show_prodi_distribution(df):
109
+ """Menampilkan jumlah kritik/saran per program studi."""
110
+ if 'nama_prodi' in df.columns:
111
+ prodi_counts = df['nama_prodi'].value_counts().reset_index()
112
+ prodi_counts.columns = ['nama_prodi', 'jumlah']
113
+ prodi_counts = prodi_counts.sort_values(by='jumlah', ascending=True)
114
+ fig = px.bar(
115
+ prodi_counts,
116
+ x='jumlah',
117
+ y='nama_prodi',
118
+ orientation='h',
119
+ color='jumlah',
120
+ title="Jumlah Kritik/Saran per Program Studi"
121
+ )
122
+ st.plotly_chart(fig, use_container_width=True, config=config_options)
123
+
124
+
125
+ def show_top10_matkul_distribution(df):
126
+ """Menampilkan 10 mata kuliah dengan jumlah kritik/saran terbanyak."""
127
+ if 'nama_matakuliah' in df.columns and 'kode_matakuliah' in df.columns:
128
+ matkul_counts = (
129
+ df.groupby(['kode_matakuliah', 'nama_matakuliah'], observed=False)
130
+ .size()
131
+ .reset_index(name='jumlah')
132
+ .sort_values(by='jumlah', ascending=False)
133
+ .head(10)
134
+ )
135
+ matkul_counts['label'] = (
136
+ matkul_counts['kode_matakuliah'] + " - " +
137
+ matkul_counts['nama_matakuliah']
138
+ )
139
+ matkul_counts = matkul_counts.sort_values(by='jumlah', ascending=True)
140
+
141
+ fig = px.bar(
142
+ matkul_counts,
143
+ x='jumlah',
144
+ y='label',
145
+ orientation='h',
146
+ title="Top 10 Mata Kuliah Berdasarkan Kritik/Saran",
147
+ color='jumlah'
148
+ )
149
+ st.plotly_chart(fig, use_container_width=True, config=config_options)
150
+
151
+
152
+ def show_sentiment_by_year(df, aspek_columns):
153
+ """Menampilkan distribusi sentimen per tahun."""
154
+ if 'tahun' in df.columns:
155
+ df_long = df.melt(id_vars=['tahun'],
156
+ value_vars=aspek_columns,
157
+ var_name='aspek',
158
+ value_name='sentimen')
159
+ year_sentiment = df_long.groupby(
160
+ ['tahun', 'sentimen'], observed=False
161
+ ).size().reset_index(name='jumlah')
162
+ year_sentiment = year_sentiment.sort_values('jumlah', ascending=False)
163
+ fig = px.bar(year_sentiment, x='tahun', y='jumlah', color='sentimen',
164
+ barmode='group', color_discrete_map=sentimen_palette)
165
+ fig.update_layout(title="Distribusi Sentimen Kritik/Saran per Tahun")
166
+ st.plotly_chart(fig, use_container_width=True, config=config_options)
167
+
168
+
169
+ def show_sentiment_by_semester(df, aspek_columns):
170
+ """Menampilkan distribusi sentimen per semester."""
171
+ if 'semester' in df.columns:
172
+ df_long = df.melt(id_vars=['semester'],
173
+ value_vars=aspek_columns,
174
+ var_name='aspek',
175
+ value_name='sentimen')
176
+ semester_sentiment = df_long.groupby(
177
+ ['semester', 'sentimen'], observed=False
178
+ ).size().reset_index(name='jumlah')
179
+ semester_sentiment = semester_sentiment.sort_values(
180
+ 'jumlah', ascending=False)
181
+ fig = px.bar(semester_sentiment, x='semester', y='jumlah', color='sentimen',
182
+ barmode='group', color_discrete_map=sentimen_palette)
183
+ fig.update_layout(
184
+ title="Distribusi Sentimen Kritik/Saran per Semester")
185
+ st.plotly_chart(fig, use_container_width=True, config=config_options)
186
+
187
+
188
+ def show_sentiment_by_prodi(df, aspek_columns):
189
+ """Menampilkan distribusi sentimen per program studi."""
190
+ if 'nama_prodi' in df.columns:
191
+ df_long = df.melt(
192
+ id_vars=['nama_prodi'],
193
+ value_vars=aspek_columns,
194
+ var_name='aspek',
195
+ value_name='sentimen'
196
+ )
197
+
198
+ prodi_sentiment = (
199
+ df_long.groupby(['nama_prodi', 'sentimen'], observed=False)
200
+ .size()
201
+ .reset_index(name='jumlah')
202
+ )
203
+
204
+ total_per_prodi = (
205
+ prodi_sentiment.groupby('nama_prodi')['jumlah']
206
+ .sum()
207
+ .sort_values(ascending=False)
208
+ )
209
+ ordered_categories = total_per_prodi.index.tolist()[::-1]
210
+
211
+ prodi_sentiment['nama_prodi'] = pd.Categorical(
212
+ prodi_sentiment['nama_prodi'],
213
+ categories=ordered_categories,
214
+ ordered=True
215
+ )
216
+
217
+ fig = px.bar(
218
+ prodi_sentiment,
219
+ y='nama_prodi',
220
+ x='jumlah',
221
+ color='sentimen',
222
+ barmode='group',
223
+ orientation='h',
224
+ color_discrete_map=sentimen_palette
225
+ )
226
+ fig.update_layout(
227
+ title="Distribusi Sentimen per Program Studi",
228
+ yaxis={'categoryorder': 'array',
229
+ 'categoryarray': ordered_categories}
230
+ )
231
+ st.plotly_chart(fig, use_container_width=True, config=config_options)
232
+
233
+
234
+ def show_sentiment_by_top10_matkul(df, aspek_columns):
235
+ """Menampilkan distribusi sentimen pada 10 mata kuliah teratas."""
236
+ df_top10 = (
237
+ df.groupby(['kode_matakuliah', 'nama_matakuliah'], observed=False)
238
+ .size()
239
+ .sort_values(ascending=False)
240
+ .head(10)
241
+ .index
242
+ )
243
+
244
+ df_filtered = df[df.set_index(
245
+ ['kode_matakuliah', 'nama_matakuliah']).index.isin(df_top10)]
246
+
247
+ df_long = df_filtered.melt(
248
+ id_vars=['kode_matakuliah', 'nama_matakuliah'],
249
+ value_vars=aspek_columns,
250
+ var_name='aspek',
251
+ value_name='sentimen'
252
+ )
253
+
254
+ df_long['label'] = (
255
+ df_long['kode_matakuliah'] + " - " + df_long['nama_matakuliah']
256
+ )
257
+
258
+ matkul_sentiment = (
259
+ df_long.groupby(['label', 'sentimen'], observed=False)
260
+ .size()
261
+ .reset_index(name='jumlah')
262
+ )
263
+
264
+ total_per_label = (
265
+ matkul_sentiment.groupby('label')['jumlah']
266
+ .sum()
267
+ .sort_values(ascending=False)
268
+ )
269
+ ordered_labels = total_per_label.index.tolist()[::-1]
270
+
271
+ matkul_sentiment['label'] = pd.Categorical(
272
+ matkul_sentiment['label'],
273
+ categories=ordered_labels,
274
+ ordered=True
275
+ )
276
+
277
+ fig = px.bar(
278
+ matkul_sentiment,
279
+ y='label',
280
+ x='jumlah',
281
+ color='sentimen',
282
+ barmode='group',
283
+ orientation='h',
284
+ color_discrete_map=sentimen_palette
285
+ )
286
+ fig.update_layout(
287
+ title="Distribusi Sentimen pada Top 10 Mata Kuliah",
288
+ yaxis={'categoryorder': 'array', 'categoryarray': ordered_labels}
289
+ )
290
+ st.plotly_chart(fig, use_container_width=True, config=config_options)
291
+
292
+
293
+ def show_sentiment_stacked_percentage(df, aspek_columns):
294
+ """Menampilkan stacked bar chart dengan persentase sentimen per aspek."""
295
+
296
+ if df.empty or not set(aspek_columns).issubset(df.columns):
297
+ st.warning("Data atau kolom aspek tidak tersedia.")
298
+ return
299
+
300
+ df_long = df.melt(
301
+ value_vars=aspek_columns,
302
+ var_name="aspek",
303
+ value_name="sentimen"
304
+ )
305
+
306
+ # Hitung persentase
307
+ count_data = df_long.groupby(
308
+ ['aspek', 'sentimen']).size().reset_index(name='jumlah')
309
+ total_per_aspek = count_data.groupby('aspek')['jumlah'].sum().reset_index()
310
+ total_per_aspek.columns = ['aspek', 'total']
311
+ count_data = count_data.merge(total_per_aspek, on='aspek')
312
+ count_data['persentase'] = (
313
+ count_data['jumlah'] / count_data['total']) * 100
314
+
315
+ fig = px.bar(
316
+ count_data,
317
+ x="aspek",
318
+ y="persentase",
319
+ color="sentimen",
320
+ title="Persentase Distribusi Sentimen per Aspek",
321
+ color_discrete_map=sentimen_palette,
322
+ category_orders={
323
+ "sentimen": category_order,
324
+ "aspek": ASPEK_COLUMNS # Add this line to order aspects
325
+ }
326
+ )
327
+ fig.update_layout(
328
+ yaxis_title="Persentase (%)",
329
+ xaxis_title="Aspek"
330
+ )
331
+ st.plotly_chart(fig, use_container_width=True, config=config_options)