AIEcosystem commited on
Commit
d4fcd59
·
verified ·
1 Parent(s): 2431b4e

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +15 -72
src/streamlit_app.py CHANGED
@@ -12,8 +12,6 @@ from streamlit_extras.stylable_container import stylable_container
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
-
16
-
17
  st.markdown(
18
  """
19
  <style>
@@ -57,18 +55,13 @@ st.markdown(
57
  }
58
  </style>
59
  """,
60
- unsafe_allow_html=True
61
- )
62
-
63
-
64
  # --- Page Configuration and UI Elements ---
65
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
66
  st.subheader("Multilingual", divider="green")
67
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
68
  expander = st.expander("**Important notes**")
69
-
70
- expander.write("""**Named Entities:** This Multilingual web app predicts fourteen (14) labels:
71
- "Person", "First_name", "Last_name", "Title", "Job_title", "Affiliation", "Gender", "Age", "Date", "Nationality", "Location", "Country", "Role", "Relationship"
72
 
73
  Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
74
 
@@ -83,17 +76,11 @@ Results are presented in easy-to-read tables, visualized in an interactive tree
83
  **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
84
 
85
  For any errors or inquiries, please contact us at info@nlpblogs.com""")
86
-
87
  with st.sidebar:
88
  st.write("Use the following code to embed the Multilingual web app on your website. Feel free to adjust the width and height values to fit your page.")
89
  code = '''
90
- <iframe
91
- src="https://aiecosystem-multilingual.hf.space"
92
- frameborder="0"
93
- width="850"
94
- height="450"
95
  ></iframe>
96
-
97
  '''
98
  st.code(code, language="html")
99
  st.text("")
@@ -101,44 +88,28 @@ with st.sidebar:
101
  st.divider()
102
  st.subheader("🚀 Ready to build your own AI Web App?", divider="orange")
103
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
104
-
105
  # --- Comet ML Setup ---
106
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
107
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
108
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
109
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
110
-
111
  if not comet_initialized:
112
  st.warning("Comet ML not initialized. Check environment variables.")
113
-
114
  # --- Label Definitions ---
115
-
116
- labels = [
117
- "PERSON",
118
  "FIRST_NAME",
119
-
120
  "LAST_NAME",
121
-
122
  "TITLE", "JOB_TITLE",
123
  "AFFILIATION", "GENDER",
124
  "AGE",
125
  "DATE",
126
-
127
  "NATIONALITY", "LOCATION","COUNTRY", "ROLE",
128
- "RELATIONSHIP"
129
-
130
- ]
131
-
132
-
133
  # Create a mapping dictionary for labels to categories
134
-
135
- category_mapping = {
136
- "Identity": [
137
  "PERSON",
138
  "FIRST_NAME",
139
-
140
  "LAST_NAME",
141
-
142
  "TITLE"
143
  ],
144
  "Professional": [
@@ -149,23 +120,13 @@ category_mapping = {
149
  "GENDER",
150
  "AGE",
151
  "DATE",
152
-
153
  "NATIONALITY",
154
  "LOCATION","COUNTRY"
155
  ],
156
  "Relational": [
157
  "ROLE",
158
  "RELATIONSHIP"
159
- ]
160
- }
161
-
162
-
163
-
164
-
165
-
166
-
167
-
168
-
169
  # --- Model Loading ---
170
  @st.cache_resource
171
  def load_ner_model():
@@ -176,30 +137,28 @@ def load_ner_model():
176
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
177
  st.stop()
178
  model = load_ner_model()
179
-
180
  # Flatten the mapping to a single dictionary
181
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
182
-
183
  # --- Text Input and Clear Button ---
184
- text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
185
-
 
 
186
  def clear_text():
187
  """Clears the text area."""
188
  st.session_state['my_text_area'] = ""
189
-
190
  st.button("Clear text", on_click=clear_text)
191
-
192
-
193
  # --- Results Section ---
194
  if st.button("Results"):
195
  start_time = time.time()
196
  if not text.strip():
197
  st.warning("Please enter some text to extract entities.")
 
 
198
  else:
199
  with st.spinner("Extracting entities...", show_time=True):
200
  entities = model.predict_entities(text, labels)
201
  df = pd.DataFrame(entities)
202
-
203
  if not df.empty:
204
  df['category'] = df['label'].map(reverse_category_mapping)
205
  if comet_initialized:
@@ -210,13 +169,10 @@ if st.button("Results"):
210
  )
211
  experiment.log_parameter("input_text", text)
212
  experiment.log_table("predicted_entities", df)
213
-
214
  st.subheader("Grouped Entities by Category", divider = "green")
215
-
216
  # Create tabs for each category
217
  category_names = sorted(list(category_mapping.keys()))
218
  category_tabs = st.tabs(category_names)
219
-
220
  for i, category_name in enumerate(category_names):
221
  with category_tabs[i]:
222
  df_category_filtered = df[df['category'] == category_name]
@@ -224,9 +180,6 @@ if st.button("Results"):
224
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
225
  else:
226
  st.info(f"No entities found for the '{category_name}' category.")
227
-
228
-
229
-
230
  with st.expander("See Glossary of tags"):
231
  st.write('''
232
  - **text**: ['entity extracted from your text data']
@@ -237,18 +190,15 @@ if st.button("Results"):
237
  - **end**: ['index of the end of the corresponding entity']
238
  ''')
239
  st.divider()
240
-
241
  # Tree map
242
  st.subheader("Tree map", divider = "green")
243
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
244
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F0F2F5', plot_bgcolor='#F0F2F5')
245
  st.plotly_chart(fig_treemap)
246
-
247
  # Pie and Bar charts
248
  grouped_counts = df['category'].value_counts().reset_index()
249
  grouped_counts.columns = ['category', 'count']
250
  col1, col2 = st.columns(2)
251
-
252
  with col1:
253
  st.subheader("Pie chart", divider = "green")
254
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
@@ -258,7 +208,6 @@ if st.button("Results"):
258
  plot_bgcolor='#F0F2F5'
259
  )
260
  st.plotly_chart(fig_pie)
261
-
262
  with col2:
263
  st.subheader("Bar chart", divider = "green")
264
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
@@ -267,7 +216,6 @@ if st.button("Results"):
267
  plot_bgcolor='#F0F2F5'
268
  )
269
  st.plotly_chart(fig_bar)
270
-
271
  # Most Frequent Entities
272
  st.subheader("Most Frequent Entities", divider="green")
273
  word_counts = df['text'].value_counts().reset_index()
@@ -282,10 +230,8 @@ if st.button("Results"):
282
  st.plotly_chart(fig_repeating_bar)
283
  else:
284
  st.warning("No entities were found that occur more than once.")
285
-
286
  # Download Section
287
  st.divider()
288
-
289
  dfa = pd.DataFrame(
290
  data={
291
  'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
@@ -303,7 +249,6 @@ if st.button("Results"):
303
  with zipfile.ZipFile(buf, "w") as myzip:
304
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
305
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
306
-
307
  with stylable_container(
308
  key="download_button",
309
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -314,15 +259,13 @@ if st.button("Results"):
314
  file_name="nlpblogs_results.zip",
315
  mime="application/zip",
316
  )
317
-
318
  if comet_initialized:
319
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
320
  experiment.end()
321
  else: # If df is empty
322
  st.warning("No entities were found in the provided text.")
323
-
324
- end_time = time.time()
325
  elapsed_time = end_time - start_time
326
  st.text("")
327
  st.text("")
328
- st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
 
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
 
 
15
  st.markdown(
16
  """
17
  <style>
 
55
  }
56
  </style>
57
  """,
58
+ unsafe_allow_html=True)
 
 
 
59
  # --- Page Configuration and UI Elements ---
60
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
61
  st.subheader("Multilingual", divider="green")
62
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
63
  expander = st.expander("**Important notes**")
64
+ expander.write("""**Named Entities:** This Multilingual web app predicts fourteen (14) labels: "Person", "First_name", "Last_name", "Title", "Job_title", "Affiliation", "Gender", "Age", "Date", "Nationality", "Location", "Country", "Role", "Relationship"
 
 
65
 
66
  Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
67
 
 
76
  **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
77
 
78
  For any errors or inquiries, please contact us at info@nlpblogs.com""")
 
79
  with st.sidebar:
80
  st.write("Use the following code to embed the Multilingual web app on your website. Feel free to adjust the width and height values to fit your page.")
81
  code = '''
82
+ <iframe src="https://aiecosystem-multilingual.hf.space" frameborder="0" width="850" height="450"
 
 
 
 
83
  ></iframe>
 
84
  '''
85
  st.code(code, language="html")
86
  st.text("")
 
88
  st.divider()
89
  st.subheader("🚀 Ready to build your own AI Web App?", divider="orange")
90
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 
91
  # --- Comet ML Setup ---
92
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
93
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
94
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
95
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
96
  if not comet_initialized:
97
  st.warning("Comet ML not initialized. Check environment variables.")
 
98
  # --- Label Definitions ---
99
+ labels = [ "PERSON",
 
 
100
  "FIRST_NAME",
 
101
  "LAST_NAME",
 
102
  "TITLE", "JOB_TITLE",
103
  "AFFILIATION", "GENDER",
104
  "AGE",
105
  "DATE",
 
106
  "NATIONALITY", "LOCATION","COUNTRY", "ROLE",
107
+ "RELATIONSHIP"]
 
 
 
 
108
  # Create a mapping dictionary for labels to categories
109
+ category_mapping = { "Identity": [
 
 
110
  "PERSON",
111
  "FIRST_NAME",
 
112
  "LAST_NAME",
 
113
  "TITLE"
114
  ],
115
  "Professional": [
 
120
  "GENDER",
121
  "AGE",
122
  "DATE",
 
123
  "NATIONALITY",
124
  "LOCATION","COUNTRY"
125
  ],
126
  "Relational": [
127
  "ROLE",
128
  "RELATIONSHIP"
129
+ ]}
 
 
 
 
 
 
 
 
 
130
  # --- Model Loading ---
131
  @st.cache_resource
132
  def load_ner_model():
 
137
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
138
  st.stop()
139
  model = load_ner_model()
 
140
  # Flatten the mapping to a single dictionary
141
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 
142
  # --- Text Input and Clear Button ---
143
+ word_limit = 200
144
+ text = st.text_area(f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", height=250, key='my_text_area')
145
+ word_count = len(text.split())
146
+ st.markdown(f"**Word count:** {word_count}/{word_limit}")
147
  def clear_text():
148
  """Clears the text area."""
149
  st.session_state['my_text_area'] = ""
 
150
  st.button("Clear text", on_click=clear_text)
 
 
151
  # --- Results Section ---
152
  if st.button("Results"):
153
  start_time = time.time()
154
  if not text.strip():
155
  st.warning("Please enter some text to extract entities.")
156
+ elif word_count > word_limit:
157
+ st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
158
  else:
159
  with st.spinner("Extracting entities...", show_time=True):
160
  entities = model.predict_entities(text, labels)
161
  df = pd.DataFrame(entities)
 
162
  if not df.empty:
163
  df['category'] = df['label'].map(reverse_category_mapping)
164
  if comet_initialized:
 
169
  )
170
  experiment.log_parameter("input_text", text)
171
  experiment.log_table("predicted_entities", df)
 
172
  st.subheader("Grouped Entities by Category", divider = "green")
 
173
  # Create tabs for each category
174
  category_names = sorted(list(category_mapping.keys()))
175
  category_tabs = st.tabs(category_names)
 
176
  for i, category_name in enumerate(category_names):
177
  with category_tabs[i]:
178
  df_category_filtered = df[df['category'] == category_name]
 
180
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
181
  else:
182
  st.info(f"No entities found for the '{category_name}' category.")
 
 
 
183
  with st.expander("See Glossary of tags"):
184
  st.write('''
185
  - **text**: ['entity extracted from your text data']
 
190
  - **end**: ['index of the end of the corresponding entity']
191
  ''')
192
  st.divider()
 
193
  # Tree map
194
  st.subheader("Tree map", divider = "green")
195
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
196
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F0F2F5', plot_bgcolor='#F0F2F5')
197
  st.plotly_chart(fig_treemap)
 
198
  # Pie and Bar charts
199
  grouped_counts = df['category'].value_counts().reset_index()
200
  grouped_counts.columns = ['category', 'count']
201
  col1, col2 = st.columns(2)
 
202
  with col1:
203
  st.subheader("Pie chart", divider = "green")
204
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
 
208
  plot_bgcolor='#F0F2F5'
209
  )
210
  st.plotly_chart(fig_pie)
 
211
  with col2:
212
  st.subheader("Bar chart", divider = "green")
213
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
 
216
  plot_bgcolor='#F0F2F5'
217
  )
218
  st.plotly_chart(fig_bar)
 
219
  # Most Frequent Entities
220
  st.subheader("Most Frequent Entities", divider="green")
221
  word_counts = df['text'].value_counts().reset_index()
 
230
  st.plotly_chart(fig_repeating_bar)
231
  else:
232
  st.warning("No entities were found that occur more than once.")
 
233
  # Download Section
234
  st.divider()
 
235
  dfa = pd.DataFrame(
236
  data={
237
  'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
 
249
  with zipfile.ZipFile(buf, "w") as myzip:
250
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
251
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
 
252
  with stylable_container(
253
  key="download_button",
254
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
 
259
  file_name="nlpblogs_results.zip",
260
  mime="application/zip",
261
  )
 
262
  if comet_initialized:
263
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
264
  experiment.end()
265
  else: # If df is empty
266
  st.warning("No entities were found in the provided text.")
267
+ end_time = time.time()
 
268
  elapsed_time = end_time - start_time
269
  st.text("")
270
  st.text("")
271
+ st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")