zdannn2808 commited on
Commit
5bea1cc
·
1 Parent(s): d7ef155

Update requirements.txt dan Tambahkan handler NLTK di preprocessing.py

Browse files
Files changed (2) hide show
  1. preprocessing.py +12 -0
  2. requirements.txt +2 -2
preprocessing.py CHANGED
@@ -7,6 +7,18 @@ Modul untuk preprocessing teks sebelum prediksi ABSA
7
  import re
8
  import string
9
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Stopwords Indonesia
11
  INDONESIAN_STOPWORDS = set([
12
  'ada', 'adalah', 'adanya', 'adapun', 'agak', 'agaknya', 'agar', 'akan', 'akankah', 'akhir',
 
7
  import re
8
  import string
9
 
10
+ # ✅ TAMBAHKAN INI - Download NLTK data jika diperlukan
11
+ try:
12
+ import nltk
13
+ try:
14
+ nltk.data.find('tokenizers/punkt_tab')
15
+ except LookupError:
16
+ print("📥 Downloading NLTK punkt_tab...")
17
+ nltk.download('punkt_tab', quiet=True)
18
+ print("✅ NLTK punkt_tab downloaded")
19
+ except ImportError:
20
+ print("⚠️ NLTK tidak terinstall, menggunakan tokenizer sederhana")
21
+
22
  # Stopwords Indonesia
23
  INDONESIAN_STOPWORDS = set([
24
  'ada', 'adalah', 'adanya', 'adapun', 'agak', 'agaknya', 'agar', 'akan', 'akankah', 'akhir',
requirements.txt CHANGED
@@ -3,10 +3,10 @@ joblib==1.5.2
3
  packaging==25.0
4
  pandas==2.3.3
5
  plotly==6.3.0
6
- Sastrawi==1.0.1
7
  streamlit==1.50.0
8
  torch==2.8.0
9
  transformers==4.56.2
10
  scikit-learn
11
  openpyxl
12
- plotly
 
 
3
  packaging==25.0
4
  pandas==2.3.3
5
  plotly==6.3.0
 
6
  streamlit==1.50.0
7
  torch==2.8.0
8
  transformers==4.56.2
9
  scikit-learn
10
  openpyxl
11
+ plotly
12
+ nltk