Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,6 @@ import pkg_resources
|
|
| 14 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 15 |
font_path = os.path.join(current_dir, "ArabicR2013-J25x.ttf")
|
| 16 |
|
| 17 |
-
# Add Arabic stop words
|
| 18 |
ARABIC_STOP_WORDS = {
|
| 19 |
'في', 'من', 'إلى', 'على', 'عن', 'مع', 'خلال', 'حتى', 'إذا', 'ثم',
|
| 20 |
'أو', 'و', 'ف', 'ل', 'ب', 'ك', 'لل', 'ال', 'هذا', 'هذه', 'ذلك',
|
|
@@ -31,7 +30,7 @@ ARABIC_STOP_WORDS = {
|
|
| 31 |
'اول', 'ضمن', 'انها', 'جميع', 'الذي', 'قبل', 'بعد', 'حول', 'ايضا',
|
| 32 |
'لازم', 'حاجة', 'علي', 'يجب', 'صار', 'صارت', 'تحت', 'ضد'
|
| 33 |
}
|
| 34 |
-
|
| 35 |
st.set_page_config(
|
| 36 |
page_title="Arabic Poem Analysis",
|
| 37 |
page_icon="📚",
|
|
@@ -250,7 +249,6 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
| 250 |
embedding_model=bert_model,
|
| 251 |
**topic_model_params)
|
| 252 |
|
| 253 |
-
# Create vectorizer with stop words
|
| 254 |
vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS),
|
| 255 |
min_df=1,
|
| 256 |
max_df=1.0)
|
|
@@ -316,7 +314,7 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
| 316 |
continue
|
| 317 |
|
| 318 |
return summaries, topic_model
|
| 319 |
-
|
| 320 |
try:
|
| 321 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
| 322 |
st.success("Models loaded successfully!")
|
|
@@ -328,28 +326,23 @@ except Exception as e:
|
|
| 328 |
st.title("📚 Arabic Poem Analysis")
|
| 329 |
st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
|
| 330 |
|
| 331 |
-
# File upload
|
| 332 |
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
|
| 333 |
|
| 334 |
if uploaded_file is not None:
|
| 335 |
try:
|
| 336 |
-
# Read the file
|
| 337 |
if uploaded_file.name.endswith('.csv'):
|
| 338 |
df = pd.read_csv(uploaded_file)
|
| 339 |
else:
|
| 340 |
df = pd.read_excel(uploaded_file)
|
| 341 |
|
| 342 |
-
# Validate columns
|
| 343 |
required_columns = ['country', 'poem']
|
| 344 |
if not all(col in df.columns for col in required_columns):
|
| 345 |
st.error("File must contain 'country' and 'poem' columns.")
|
| 346 |
st.stop()
|
| 347 |
|
| 348 |
-
# Clean data
|
| 349 |
df['country'] = df['country'].str.strip()
|
| 350 |
df = df.dropna(subset=['country', 'poem'])
|
| 351 |
|
| 352 |
-
# Add topic modeling controls
|
| 353 |
st.subheader("Topic Modeling Settings")
|
| 354 |
col1, col2 = st.columns(2)
|
| 355 |
|
|
@@ -404,7 +397,6 @@ if uploaded_file is not None:
|
|
| 404 |
if summaries:
|
| 405 |
st.success("Analysis complete!")
|
| 406 |
|
| 407 |
-
# Display results in tabs
|
| 408 |
tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
|
| 409 |
|
| 410 |
with tab1:
|
|
@@ -445,7 +437,6 @@ if uploaded_file is not None:
|
|
| 445 |
else:
|
| 446 |
st.info("👆 Upload a file to get started!")
|
| 447 |
|
| 448 |
-
# Example format
|
| 449 |
st.write("### Expected File Format:")
|
| 450 |
example_df = pd.DataFrame({
|
| 451 |
'country': ['Egypt', 'Palestine'],
|
|
|
|
| 14 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 15 |
font_path = os.path.join(current_dir, "ArabicR2013-J25x.ttf")
|
| 16 |
|
|
|
|
| 17 |
ARABIC_STOP_WORDS = {
|
| 18 |
'في', 'من', 'إلى', 'على', 'عن', 'مع', 'خلال', 'حتى', 'إذا', 'ثم',
|
| 19 |
'أو', 'و', 'ف', 'ل', 'ب', 'ك', 'لل', 'ال', 'هذا', 'هذه', 'ذلك',
|
|
|
|
| 30 |
'اول', 'ضمن', 'انها', 'جميع', 'الذي', 'قبل', 'بعد', 'حول', 'ايضا',
|
| 31 |
'لازم', 'حاجة', 'علي', 'يجب', 'صار', 'صارت', 'تحت', 'ضد'
|
| 32 |
}
|
| 33 |
+
|
| 34 |
st.set_page_config(
|
| 35 |
page_title="Arabic Poem Analysis",
|
| 36 |
page_icon="📚",
|
|
|
|
| 249 |
embedding_model=bert_model,
|
| 250 |
**topic_model_params)
|
| 251 |
|
|
|
|
| 252 |
vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS),
|
| 253 |
min_df=1,
|
| 254 |
max_df=1.0)
|
|
|
|
| 314 |
continue
|
| 315 |
|
| 316 |
return summaries, topic_model
|
| 317 |
+
|
| 318 |
try:
|
| 319 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
| 320 |
st.success("Models loaded successfully!")
|
|
|
|
| 326 |
st.title("📚 Arabic Poem Analysis")
|
| 327 |
st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
|
| 328 |
|
|
|
|
| 329 |
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
|
| 330 |
|
| 331 |
if uploaded_file is not None:
|
| 332 |
try:
|
|
|
|
| 333 |
if uploaded_file.name.endswith('.csv'):
|
| 334 |
df = pd.read_csv(uploaded_file)
|
| 335 |
else:
|
| 336 |
df = pd.read_excel(uploaded_file)
|
| 337 |
|
|
|
|
| 338 |
required_columns = ['country', 'poem']
|
| 339 |
if not all(col in df.columns for col in required_columns):
|
| 340 |
st.error("File must contain 'country' and 'poem' columns.")
|
| 341 |
st.stop()
|
| 342 |
|
|
|
|
| 343 |
df['country'] = df['country'].str.strip()
|
| 344 |
df = df.dropna(subset=['country', 'poem'])
|
| 345 |
|
|
|
|
| 346 |
st.subheader("Topic Modeling Settings")
|
| 347 |
col1, col2 = st.columns(2)
|
| 348 |
|
|
|
|
| 397 |
if summaries:
|
| 398 |
st.success("Analysis complete!")
|
| 399 |
|
|
|
|
| 400 |
tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
|
| 401 |
|
| 402 |
with tab1:
|
|
|
|
| 437 |
else:
|
| 438 |
st.info("👆 Upload a file to get started!")
|
| 439 |
|
|
|
|
| 440 |
st.write("### Expected File Format:")
|
| 441 |
example_df = pd.DataFrame({
|
| 442 |
'country': ['Egypt', 'Palestine'],
|