Allow free input and some fixes
Browse files- app.py +185 -149
- custom_renderer.py +4 -125
- requirements.txt +3 -2
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import random
|
| 2 |
-
from typing import AnyStr
|
| 3 |
# import tensorflow_hub as hub
|
| 4 |
|
| 5 |
import itertools
|
|
@@ -42,8 +42,8 @@ def get_sentence_embedding_model():
|
|
| 42 |
|
| 43 |
@st.experimental_singleton
|
| 44 |
def get_spacy():
|
| 45 |
-
#nlp = spacy.load('en_core_web_lg')
|
| 46 |
-
nlp =
|
| 47 |
return nlp
|
| 48 |
|
| 49 |
|
|
@@ -62,6 +62,15 @@ def get_transformer_pipeline():
|
|
| 62 |
return pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
|
| 63 |
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
# Page setup
|
| 66 |
st.set_page_config(
|
| 67 |
page_title="Post-processing summarization fact checker",
|
|
@@ -81,10 +90,14 @@ def list_all_article_names() -> list:
|
|
| 81 |
for file in sorted(os.listdir('./sample-articles/')):
|
| 82 |
if file.endswith('.txt'):
|
| 83 |
filenames.append(file.replace('.txt', ''))
|
|
|
|
|
|
|
| 84 |
return filenames
|
| 85 |
|
| 86 |
|
| 87 |
def fetch_article_contents(filename: str) -> AnyStr:
|
|
|
|
|
|
|
| 88 |
with open(f'./sample-articles/{filename.lower()}.txt', 'r') as f:
|
| 89 |
data = f.read()
|
| 90 |
return data
|
|
@@ -110,13 +123,12 @@ def fetch_dependency_specific_contents(filename: str) -> AnyStr:
|
|
| 110 |
|
| 111 |
def fetch_dependency_svg(filename: str) -> AnyStr:
|
| 112 |
with open(f'./dependency-images/{filename.lower()}.txt', 'r') as f:
|
| 113 |
-
#data = f.read()
|
| 114 |
-
lines=[line.rstrip() for line in f]
|
| 115 |
return lines
|
| 116 |
|
| 117 |
|
| 118 |
-
def display_summary(
|
| 119 |
-
summary_content = fetch_summary_contents(article_name)
|
| 120 |
st.session_state.summary_output = summary_content
|
| 121 |
soup = BeautifulSoup(summary_content, features="html.parser")
|
| 122 |
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
|
|
@@ -161,13 +173,15 @@ def get_all_entities(text):
|
|
| 161 |
|
| 162 |
|
| 163 |
# TODO: this functionality can be cached (e.g. by storing html file output) if wanted (or just store list of entities idk)
|
| 164 |
-
def get_and_compare_entities(
|
| 165 |
-
article_content = fetch_article_contents(article_name)
|
|
|
|
| 166 |
all_entities_per_sentence = get_all_entities_per_sentence(article_content)
|
| 167 |
# st.session_state.entities_per_sentence_article = all_entities_per_sentence
|
| 168 |
entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
| 169 |
|
| 170 |
-
summary_content = fetch_summary_contents(article_name)
|
|
|
|
| 171 |
all_entities_per_sentence = get_all_entities_per_sentence(summary_content)
|
| 172 |
# st.session_state.entities_per_sentence_summary = all_entities_per_sentence
|
| 173 |
entities_summary = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
|
@@ -179,7 +193,7 @@ def get_and_compare_entities(article_name: str):
|
|
| 179 |
if any(entity.lower() in substring_entity.lower() for substring_entity in entities_article):
|
| 180 |
matched_entities.append(entity)
|
| 181 |
elif any(
|
| 182 |
-
np.inner(sentence_embedding_model.encode(entity), sentence_embedding_model.encode(art_entity)) > 0.9 for
|
| 183 |
art_entity in entities_article):
|
| 184 |
matched_entities.append(entity)
|
| 185 |
else:
|
|
@@ -187,14 +201,14 @@ def get_and_compare_entities(article_name: str):
|
|
| 187 |
return matched_entities, unmatched_entities
|
| 188 |
|
| 189 |
|
| 190 |
-
def highlight_entities(
|
| 191 |
-
summary_content = fetch_summary_contents(article_name)
|
| 192 |
-
|
| 193 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
| 194 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
| 195 |
markdown_end = "</mark>"
|
| 196 |
|
| 197 |
-
matched_entities, unmatched_entities = get_and_compare_entities(
|
| 198 |
|
| 199 |
for entity in matched_entities:
|
| 200 |
summary_content = summary_content.replace(entity, markdown_start_green + entity + markdown_end)
|
|
@@ -209,10 +223,9 @@ def highlight_entities(article_name: str):
|
|
| 209 |
return HTML_WRAPPER.format(soup)
|
| 210 |
|
| 211 |
|
| 212 |
-
def render_dependency_parsing(text:
|
| 213 |
-
html = render_sentence_custom(text)
|
| 214 |
html = html.replace("\n\n", "\n")
|
| 215 |
-
print(get_svg(html))
|
| 216 |
st.write(get_svg(html), unsafe_allow_html=True)
|
| 217 |
|
| 218 |
|
|
@@ -237,7 +250,6 @@ def check_dependency(article: bool):
|
|
| 237 |
start_id = sentence.start
|
| 238 |
end_id = sentence.end
|
| 239 |
for t in tok_l:
|
| 240 |
-
# print(t)
|
| 241 |
if t["id"] < start_id or t["id"] > end_id:
|
| 242 |
continue
|
| 243 |
head = tok_l[t['head']]
|
|
@@ -261,7 +273,6 @@ def check_dependency(article: bool):
|
|
| 261 |
"identifier": identifier, "sentence": str(sentence)})
|
| 262 |
else:
|
| 263 |
continue
|
| 264 |
-
# print(f'NOW TEST LIST DICT: {test_list_dict_output}')
|
| 265 |
return test_list_dict_output
|
| 266 |
# return all_deps
|
| 267 |
|
|
@@ -273,6 +284,38 @@ def is_valid_url(url: str) -> bool:
|
|
| 273 |
return True
|
| 274 |
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
# Start session
|
| 277 |
if 'results' not in st.session_state:
|
| 278 |
st.session_state.results = []
|
|
@@ -298,12 +341,11 @@ metric, indicating the trustworthiness of the generated summary. Throughout this
|
|
| 298 |
results for some methods on specific examples. These text blocks will be indicated and they change according to the
|
| 299 |
currently selected article.""")
|
| 300 |
|
| 301 |
-
|
| 302 |
sentence_embedding_model = get_sentence_embedding_model()
|
| 303 |
# tagger = get_flair_tagger()
|
| 304 |
ner_model = get_transformer_pipeline()
|
| 305 |
nlp = get_spacy()
|
| 306 |
-
#nlp = en_core_web_sm.load()
|
| 307 |
|
| 308 |
# GENERATING SUMMARIES PART
|
| 309 |
st.header("Generating summaries")
|
|
@@ -321,131 +363,125 @@ article_text = st.text_area(
|
|
| 321 |
height=150
|
| 322 |
)
|
| 323 |
|
| 324 |
-
st.
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
svg = "".join(lines)
|
| 345 |
-
|
| 346 |
-
# """Renders the given svg string."""
|
| 347 |
-
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
|
| 348 |
-
html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64
|
| 349 |
-
return html
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
# ENTITY MATCHING PART
|
| 353 |
-
st.header("Entity matching")
|
| 354 |
-
st.markdown("The first method we will discuss is called **Named Entity Recognition** (NER). NER is the task of "
|
| 355 |
-
"identifying and categorising key information (entities) in text. An entity can be a singular word or a "
|
| 356 |
-
"series of words that consistently refers to the same thing. Common entity classes are person names, "
|
| 357 |
-
"organisations, locations and so on. By applying NER to both the article and its summary, we can spot "
|
| 358 |
-
"possible **hallucinations**. Hallucinations are words generated by the model that are not supported by "
|
| 359 |
-
"the source input. In theory all entities in the summary (such as dates, locations and so on), "
|
| 360 |
-
"should also be present in the article. Thus we can extract all entities from the summary and compare "
|
| 361 |
-
"them to the entities of the original article, spotting potential hallucinations. The more unmatched "
|
| 362 |
-
"entities we find, the lower the factualness score of the summary. ")
|
| 363 |
-
with st.spinner("Calculating and matching entities..."):
|
| 364 |
-
entity_match_html = highlight_entities(selected_article)
|
| 365 |
-
st.write(entity_match_html, unsafe_allow_html=True)
|
| 366 |
-
red_text = """<font color="black"><span style="background-color: rgb(238, 135, 135); opacity:
|
| 367 |
-
1;">red</span></font> """
|
| 368 |
-
green_text = """<font color="black">
|
| 369 |
-
<span style="background-color: rgb(121, 236, 121); opacity: 1;">green</span>
|
| 370 |
-
</font>"""
|
| 371 |
-
|
| 372 |
-
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
| 373 |
-
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
| 374 |
-
st.markdown("We call this technique “entity matching” and here you can see what this looks like when we apply "
|
| 375 |
-
"this method on the summary. Entities in the summary are marked " + green_text + " when the entity "
|
| 376 |
-
"also exists in the "
|
| 377 |
-
"article, "
|
| 378 |
-
"while unmatched "
|
| 379 |
-
"entities are "
|
| 380 |
-
"marked " +
|
| 381 |
-
red_text + ". Several of the example articles and their summaries indicate different errors we find "
|
| 382 |
-
"by using this technique. Based on which article you choose, we provide a short "
|
| 383 |
-
"explanation of the results below.",
|
| 384 |
-
unsafe_allow_html=True)
|
| 385 |
-
entity_specific_text = fetch_entity_specific_contents(selected_article)
|
| 386 |
-
soup = BeautifulSoup(entity_specific_text, features="html.parser")
|
| 387 |
-
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
|
| 388 |
-
margin-bottom: 2.5rem">{}</div> """
|
| 389 |
-
st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
|
| 390 |
-
|
| 391 |
-
# DEPENDENCY PARSING PART
|
| 392 |
-
st.header("Dependency comparison")
|
| 393 |
-
st.markdown("The second method we use for post-processing is called **Dependency parsing**: the process in which the "
|
| 394 |
-
"grammatical structure in a sentence is analysed, to find out related words as well as the type of the "
|
| 395 |
-
"relationship between them. For the sentence “Jan’s wife is called Sarah” you would get the following "
|
| 396 |
-
"dependency graph:")
|
| 397 |
-
|
| 398 |
-
# TODO: I wonder why the first doesn't work but the second does (it doesn't show deps otherwise)
|
| 399 |
-
# st.image("ExampleParsing.svg")
|
| 400 |
-
st.write(render_svg('ExampleParsing.svg'), unsafe_allow_html=True)
|
| 401 |
-
st.markdown("Here, “Jan” is the “poss” (possession modifier) of “wife”. If suddenly the summary would read “Jan’s "
|
| 402 |
-
"husband…”, there would be a dependency in the summary that is non-existent in the article itself (namely "
|
| 403 |
-
"“Jan” is the “poss” of “husband”). However, often new dependencies are introduced in the summary that "
|
| 404 |
-
"are still correct. “The borders of Ukraine” have a different dependency between “borders” and “Ukraine” "
|
| 405 |
-
"than “Ukraine’s borders”, while both descriptions have the same meaning. So just matching all "
|
| 406 |
-
"dependencies between article and summary (as we did with entity matching) would not be a robust method.")
|
| 407 |
-
st.markdown("However, by empirical testing, we have found that there are certain dependencies which can be used for "
|
| 408 |
-
"such matching techniques. When unmatched, these specific dependencies are often an indication of a "
|
| 409 |
-
"wrongly constructed sentence. **Should I explain this more/better or is it enough that I explain by "
|
| 410 |
-
"example specific run throughs?**. We found 2(/3 TODO) common dependencies which, when present in the "
|
| 411 |
-
"summary but not in the article, are highly indicative of factualness errors. Furthermore, we only check "
|
| 412 |
-
"dependencies between an existing **entity** and its direct connections. Below we highlight all unmatched "
|
| 413 |
-
"dependencies that satisfy the discussed constraints. We also discuss the specific results for the "
|
| 414 |
-
"currently selected article.")
|
| 415 |
-
with st.spinner("Doing dependency parsing..."):
|
| 416 |
-
# TODO RIGHT IF FUNCTION (IF EXAMPLE AND IF INPUT UNCHANGED)
|
| 417 |
-
#if selected_article == 'article11':
|
| 418 |
-
if True:
|
| 419 |
-
for cur_svg_image in fetch_dependency_svg(selected_article):
|
| 420 |
-
st.write(cur_svg_image, unsafe_allow_html=True)
|
| 421 |
else:
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import random
|
| 2 |
+
from typing import AnyStr, List, Dict
|
| 3 |
# import tensorflow_hub as hub
|
| 4 |
|
| 5 |
import itertools
|
|
|
|
| 42 |
|
| 43 |
@st.experimental_singleton
|
| 44 |
def get_spacy():
|
| 45 |
+
# nlp = spacy.load('en_core_web_lg')
|
| 46 |
+
nlp = en_core_web_lg.load()
|
| 47 |
return nlp
|
| 48 |
|
| 49 |
|
|
|
|
| 62 |
return pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
|
| 63 |
|
| 64 |
|
| 65 |
+
@st.experimental_singleton
|
| 66 |
+
def get_summarizer_model():
|
| 67 |
+
model_name = 'google/pegasus-cnn_dailymail'
|
| 68 |
+
summarizer_model = pipeline("summarization", model=model_name, tokenizer=model_name,
|
| 69 |
+
device=0 if torch.cuda.is_available() else -1)
|
| 70 |
+
|
| 71 |
+
return summarizer_model
|
| 72 |
+
|
| 73 |
+
|
| 74 |
# Page setup
|
| 75 |
st.set_page_config(
|
| 76 |
page_title="Post-processing summarization fact checker",
|
|
|
|
| 90 |
for file in sorted(os.listdir('./sample-articles/')):
|
| 91 |
if file.endswith('.txt'):
|
| 92 |
filenames.append(file.replace('.txt', ''))
|
| 93 |
+
# Append free use possibility:
|
| 94 |
+
filenames.append("Provide your own input")
|
| 95 |
return filenames
|
| 96 |
|
| 97 |
|
| 98 |
def fetch_article_contents(filename: str) -> AnyStr:
|
| 99 |
+
if (filename == "Provide your own input"):
|
| 100 |
+
return " "
|
| 101 |
with open(f'./sample-articles/{filename.lower()}.txt', 'r') as f:
|
| 102 |
data = f.read()
|
| 103 |
return data
|
|
|
|
| 123 |
|
| 124 |
def fetch_dependency_svg(filename: str) -> AnyStr:
|
| 125 |
with open(f'./dependency-images/{filename.lower()}.txt', 'r') as f:
|
| 126 |
+
# data = f.read()
|
| 127 |
+
lines = [line.rstrip() for line in f]
|
| 128 |
return lines
|
| 129 |
|
| 130 |
|
| 131 |
+
def display_summary(summary_content: str):
|
|
|
|
| 132 |
st.session_state.summary_output = summary_content
|
| 133 |
soup = BeautifulSoup(summary_content, features="html.parser")
|
| 134 |
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
|
|
|
|
| 173 |
|
| 174 |
|
| 175 |
# TODO: this functionality can be cached (e.g. by storing html file output) if wanted (or just store list of entities idk)
|
| 176 |
+
def get_and_compare_entities():
|
| 177 |
+
#article_content = fetch_article_contents(article_name)
|
| 178 |
+
article_content = st.session_state.article_text
|
| 179 |
all_entities_per_sentence = get_all_entities_per_sentence(article_content)
|
| 180 |
# st.session_state.entities_per_sentence_article = all_entities_per_sentence
|
| 181 |
entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
| 182 |
|
| 183 |
+
#summary_content = fetch_summary_contents(article_name)
|
| 184 |
+
summary_content = st.session_state.summary_output
|
| 185 |
all_entities_per_sentence = get_all_entities_per_sentence(summary_content)
|
| 186 |
# st.session_state.entities_per_sentence_summary = all_entities_per_sentence
|
| 187 |
entities_summary = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
|
|
|
| 193 |
if any(entity.lower() in substring_entity.lower() for substring_entity in entities_article):
|
| 194 |
matched_entities.append(entity)
|
| 195 |
elif any(
|
| 196 |
+
np.inner(sentence_embedding_model.encode(entity, show_progress_bar=False), sentence_embedding_model.encode(art_entity, show_progress_bar=False)) > 0.9 for
|
| 197 |
art_entity in entities_article):
|
| 198 |
matched_entities.append(entity)
|
| 199 |
else:
|
|
|
|
| 201 |
return matched_entities, unmatched_entities
|
| 202 |
|
| 203 |
|
| 204 |
+
def highlight_entities():
|
| 205 |
+
#summary_content = fetch_summary_contents(article_name)
|
| 206 |
+
summary_content = st.session_state.summary_output
|
| 207 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
| 208 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
| 209 |
markdown_end = "</mark>"
|
| 210 |
|
| 211 |
+
matched_entities, unmatched_entities = get_and_compare_entities()
|
| 212 |
|
| 213 |
for entity in matched_entities:
|
| 214 |
summary_content = summary_content.replace(entity, markdown_start_green + entity + markdown_end)
|
|
|
|
| 223 |
return HTML_WRAPPER.format(soup)
|
| 224 |
|
| 225 |
|
| 226 |
+
def render_dependency_parsing(text: Dict):
|
| 227 |
+
html = render_sentence_custom(text, nlp)
|
| 228 |
html = html.replace("\n\n", "\n")
|
|
|
|
| 229 |
st.write(get_svg(html), unsafe_allow_html=True)
|
| 230 |
|
| 231 |
|
|
|
|
| 250 |
start_id = sentence.start
|
| 251 |
end_id = sentence.end
|
| 252 |
for t in tok_l:
|
|
|
|
| 253 |
if t["id"] < start_id or t["id"] > end_id:
|
| 254 |
continue
|
| 255 |
head = tok_l[t['head']]
|
|
|
|
| 273 |
"identifier": identifier, "sentence": str(sentence)})
|
| 274 |
else:
|
| 275 |
continue
|
|
|
|
| 276 |
return test_list_dict_output
|
| 277 |
# return all_deps
|
| 278 |
|
|
|
|
| 284 |
return True
|
| 285 |
|
| 286 |
|
| 287 |
+
def render_svg(svg_file):
|
| 288 |
+
with open(svg_file, "r") as f:
|
| 289 |
+
lines = f.readlines()
|
| 290 |
+
svg = "".join(lines)
|
| 291 |
+
|
| 292 |
+
# """Renders the given svg string."""
|
| 293 |
+
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
|
| 294 |
+
html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64
|
| 295 |
+
return html
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def generate_abstractive_summary(text, type, min_len=120, max_len=512, **kwargs):
|
| 299 |
+
summarization_model = get_summarizer_model()
|
| 300 |
+
text = text.strip().replace("\n", " ")
|
| 301 |
+
if type == "top_p":
|
| 302 |
+
text = summarization_model(text, min_length=min_len,
|
| 303 |
+
max_length=max_len,
|
| 304 |
+
top_k=50, top_p=0.95, clean_up_tokenization_spaces=True)
|
| 305 |
+
elif type == "greedy":
|
| 306 |
+
text = summarization_model(text, min_length=min_len,
|
| 307 |
+
max_length=max_len, clean_up_tokenization_spaces=True)
|
| 308 |
+
elif type == "top_k":
|
| 309 |
+
text = summarization_model(text, min_length=min_len, max_length=max_len, top_k=50,
|
| 310 |
+
clean_up_tokenization_spaces=True)
|
| 311 |
+
elif type == "beam":
|
| 312 |
+
text = summarization_model(text, min_length=min_len,
|
| 313 |
+
max_length=max_len,
|
| 314 |
+
clean_up_tokenization_spaces=True, **kwargs)
|
| 315 |
+
summary = text[0]['summary_text'].replace("<n>", " ")
|
| 316 |
+
return summary
|
| 317 |
+
|
| 318 |
+
|
| 319 |
# Start session
|
| 320 |
if 'results' not in st.session_state:
|
| 321 |
st.session_state.results = []
|
|
|
|
| 341 |
results for some methods on specific examples. These text blocks will be indicated and they change according to the
|
| 342 |
currently selected article.""")
|
| 343 |
|
|
|
|
| 344 |
sentence_embedding_model = get_sentence_embedding_model()
|
| 345 |
# tagger = get_flair_tagger()
|
| 346 |
ner_model = get_transformer_pipeline()
|
| 347 |
nlp = get_spacy()
|
| 348 |
+
# nlp = en_core_web_sm.load()
|
| 349 |
|
| 350 |
# GENERATING SUMMARIES PART
|
| 351 |
st.header("Generating summaries")
|
|
|
|
| 363 |
height=150
|
| 364 |
)
|
| 365 |
|
| 366 |
+
summarize_button = st.button(label='Process article content', help="Generates summary and applies entity matching and dependency parsing for given article")
|
| 367 |
+
|
| 368 |
+
if summarize_button:
|
| 369 |
+
st.session_state.article_text = article_text
|
| 370 |
+
st.markdown("Below you can find the generated summary for the article. Based on empirical research, we will discuss "
|
| 371 |
+
"two main methods that detect some common errors. We can then score different summaries, to indicate how "
|
| 372 |
+
"factual a summary is for a given article. The idea is that in production, you could generate a set of "
|
| 373 |
+
"summaries for the same article, with different parameters (or even different models). By using "
|
| 374 |
+
"post-processing error detection, we can then select the best possible summary.")
|
| 375 |
+
if st.session_state.article_text:
|
| 376 |
+
with st.spinner('Generating summary...'):
|
| 377 |
+
# classify_comment(article_text, selected_model)
|
| 378 |
+
if selected_article != "Provide your own input" and article_text == fetch_article_contents(selected_article):
|
| 379 |
+
st.session_state.unchanged_text = True
|
| 380 |
+
summary_content = fetch_summary_contents(selected_article)
|
| 381 |
+
else:
|
| 382 |
+
summary_content = generate_abstractive_summary(article_text, type="beam", do_sample=True, num_beams=15, no_repeat_ngram_size=4)
|
| 383 |
+
st.session_state.unchanged_text = False
|
| 384 |
+
summary_displayed = display_summary(summary_content)
|
| 385 |
+
st.write("**Generated summary:**", summary_displayed, unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
else:
|
| 387 |
+
st.error('**Error**: No comment to classify. Please provide a comment.')
|
| 388 |
+
|
| 389 |
+
# ENTITY MATCHING PART
|
| 390 |
+
st.header("Entity matching")
|
| 391 |
+
st.markdown("The first method we will discuss is called **Named Entity Recognition** (NER). NER is the task of "
|
| 392 |
+
"identifying and categorising key information (entities) in text. An entity can be a singular word or a "
|
| 393 |
+
"series of words that consistently refers to the same thing. Common entity classes are person names, "
|
| 394 |
+
"organisations, locations and so on. By applying NER to both the article and its summary, we can spot "
|
| 395 |
+
"possible **hallucinations**. Hallucinations are words generated by the model that are not supported by "
|
| 396 |
+
"the source input. In theory all entities in the summary (such as dates, locations and so on), "
|
| 397 |
+
"should also be present in the article. Thus we can extract all entities from the summary and compare "
|
| 398 |
+
"them to the entities of the original article, spotting potential hallucinations. The more unmatched "
|
| 399 |
+
"entities we find, the lower the factualness score of the summary. ")
|
| 400 |
+
with st.spinner("Calculating and matching entities..."):
|
| 401 |
+
entity_match_html = highlight_entities()
|
| 402 |
+
st.write(entity_match_html, unsafe_allow_html=True)
|
| 403 |
+
red_text = """<font color="black"><span style="background-color: rgb(238, 135, 135); opacity:
|
| 404 |
+
1;">red</span></font> """
|
| 405 |
+
green_text = """<font color="black">
|
| 406 |
+
<span style="background-color: rgb(121, 236, 121); opacity: 1;">green</span>
|
| 407 |
+
</font>"""
|
| 408 |
+
|
| 409 |
+
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
| 410 |
+
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
| 411 |
+
st.markdown("We call this technique “entity matching” and here you can see what this looks like when we apply "
|
| 412 |
+
"this method on the summary. Entities in the summary are marked " + green_text + " when the entity "
|
| 413 |
+
"also exists in the "
|
| 414 |
+
"article, "
|
| 415 |
+
"while unmatched "
|
| 416 |
+
"entities are "
|
| 417 |
+
"marked " +
|
| 418 |
+
red_text + ". Several of the example articles and their summaries indicate different errors we find "
|
| 419 |
+
"by using this technique. Based on which article you choose, we provide a short "
|
| 420 |
+
"explanation of the results below.",
|
| 421 |
+
unsafe_allow_html=True)
|
| 422 |
+
if st.session_state.unchanged_text:
|
| 423 |
+
entity_specific_text = fetch_entity_specific_contents(selected_article)
|
| 424 |
+
soup = BeautifulSoup(entity_specific_text, features="html.parser")
|
| 425 |
+
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
|
| 426 |
+
margin-bottom: 2.5rem">{}</div> """
|
| 427 |
+
st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
|
| 428 |
+
|
| 429 |
+
# DEPENDENCY PARSING PART
|
| 430 |
+
st.header("Dependency comparison")
|
| 431 |
+
st.markdown("The second method we use for post-processing is called **Dependency parsing**: the process in which the "
|
| 432 |
+
"grammatical structure in a sentence is analysed, to find out related words as well as the type of the "
|
| 433 |
+
"relationship between them. For the sentence “Jan’s wife is called Sarah” you would get the following "
|
| 434 |
+
"dependency graph:")
|
| 435 |
+
|
| 436 |
+
# TODO: I wonder why the first doesn't work but the second does (it doesn't show deps otherwise)
|
| 437 |
+
# st.image("ExampleParsing.svg")
|
| 438 |
+
st.write(render_svg('ExampleParsing.svg'), unsafe_allow_html=True)
|
| 439 |
+
st.markdown("Here, “Jan” is the “poss” (possession modifier) of “wife”. If suddenly the summary would read “Jan’s "
|
| 440 |
+
"husband…”, there would be a dependency in the summary that is non-existent in the article itself (namely "
|
| 441 |
+
"“Jan” is the “poss” of “husband”). However, often new dependencies are introduced in the summary that "
|
| 442 |
+
"are still correct. “The borders of Ukraine” have a different dependency between “borders” and “Ukraine” "
|
| 443 |
+
"than “Ukraine’s borders”, while both descriptions have the same meaning. So just matching all "
|
| 444 |
+
"dependencies between article and summary (as we did with entity matching) would not be a robust method.")
|
| 445 |
+
st.markdown("However, by empirical testing, we have found that there are certain dependencies which can be used for "
|
| 446 |
+
"such matching techniques. When unmatched, these specific dependencies are often an indication of a "
|
| 447 |
+
"wrongly constructed sentence. **Should I explain this more/better or is it enough that I explain by "
|
| 448 |
+
"example specific run throughs?**. We found 2(/3 TODO) common dependencies which, when present in the "
|
| 449 |
+
"summary but not in the article, are highly indicative of factualness errors. Furthermore, we only check "
|
| 450 |
+
"dependencies between an existing **entity** and its direct connections. Below we highlight all unmatched "
|
| 451 |
+
"dependencies that satisfy the discussed constraints. We also discuss the specific results for the "
|
| 452 |
+
"currently selected article.")
|
| 453 |
+
with st.spinner("Doing dependency parsing..."):
|
| 454 |
+
# TODO RIGHT IF FUNCTION (IF EXAMPLE AND IF INPUT UNCHANGED)
|
| 455 |
+
# if selected_article == 'article11':
|
| 456 |
+
if st.session_state.unchanged_text:
|
| 457 |
+
for cur_svg_image in fetch_dependency_svg(selected_article):
|
| 458 |
+
st.write(cur_svg_image, unsafe_allow_html=True)
|
| 459 |
+
dep_specific_text = fetch_dependency_specific_contents(selected_article)
|
| 460 |
+
soup = BeautifulSoup(dep_specific_text, features="html.parser")
|
| 461 |
+
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
|
| 462 |
+
margin-bottom: 2.5rem">{}</div> """
|
| 463 |
+
st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
|
| 464 |
+
else:
|
| 465 |
+
summary_deps = check_dependency(False)
|
| 466 |
+
article_deps = check_dependency(True)
|
| 467 |
+
total_unmatched_deps = []
|
| 468 |
+
for summ_dep in summary_deps:
|
| 469 |
+
if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps):
|
| 470 |
+
total_unmatched_deps.append(summ_dep)
|
| 471 |
+
if total_unmatched_deps:
|
| 472 |
+
for current_drawing_list in total_unmatched_deps:
|
| 473 |
+
render_dependency_parsing(current_drawing_list)
|
| 474 |
+
|
| 475 |
+
# OUTRO/CONCLUSION
|
| 476 |
+
st.header("Wrapping up")
|
| 477 |
+
st.markdown("We have presented 2 methods that try to improve summaries via post-processing steps. Entity matching can "
|
| 478 |
+
"be used to solve hallucinations, while dependency comparison can be used to filter out some bad "
|
| 479 |
+
"sentences (and thus worse summaries). These methods highlight the possibilities of post-processing "
|
| 480 |
+
"AI-made summaries, but are only a basic introduction. As the methods were empirically tested they are "
|
| 481 |
+
"definitely not sufficiently robust for general use-cases. (something about that we tested also RE and "
|
| 482 |
+
"maybe other things).")
|
| 483 |
+
st.markdown("####")
|
| 484 |
+
st.markdown("Below we generated 5 different kind of summaries from the article in which their ranks are estimated, "
|
| 485 |
+
"and hopefully the best summary (read: the one that a human would prefer or indicate as the best one) "
|
| 486 |
+
"will be at the top. TODO: implement this (at the end I think) and also put something in the text with "
|
| 487 |
+
"the actual parameters or something? ")
|
custom_renderer.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
| 1 |
-
from typing import Dict
|
| 2 |
|
| 3 |
-
import numpy as np
|
| 4 |
import spacy
|
| 5 |
from PIL import ImageFont
|
| 6 |
|
| 7 |
-
from spacy.tokens import Doc
|
| 8 |
|
| 9 |
|
| 10 |
def get_pil_text_size(text, font_size, font_name):
|
|
@@ -78,8 +76,7 @@ def get_arrowhead(direction: str, x: int, y: int, end: int) -> str:
|
|
| 78 |
return f"M{p1},{y + 2} L{p2},{y - arrow_width} {p3},{y - arrow_width}"
|
| 79 |
|
| 80 |
|
| 81 |
-
|
| 82 |
-
def render_sentence_custom(unmatched_list: Dict):
|
| 83 |
TPL_DEP_WORDS = """
|
| 84 |
<text class="displacy-token" fill="currentColor" text-anchor="start" y="{y}">
|
| 85 |
<tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
|
|
@@ -91,64 +88,14 @@ def render_sentence_custom(unmatched_list: Dict):
|
|
| 91 |
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="{lang}" id="{id}" class="displacy" width="{width}" height="{height}" direction="{dir}" style="max-width: none; height: {height}px; color: {color}; background: {bg}; font-family: {font}; direction: {dir}">{content}</svg>
|
| 92 |
"""
|
| 93 |
arcs_svg = []
|
| 94 |
-
nlp = spacy.load('en_core_web_lg')
|
| 95 |
doc = nlp(unmatched_list["sentence"])
|
| 96 |
-
# words = {}
|
| 97 |
-
# unmatched_list = [parse_deps(doc)]
|
| 98 |
-
# #print(parsed)
|
| 99 |
-
# for i, p in enumerate(unmatched_list):
|
| 100 |
-
# arcs = p["arcs"]
|
| 101 |
-
# words = p["words"]
|
| 102 |
-
# for i, a in enumerate(arcs):
|
| 103 |
-
# #CHECK CERTAIN DEPS (ALSO ADD/CHANGE BELOW WHEN CHANGING HERE)
|
| 104 |
-
# if a["label"] == "amod":
|
| 105 |
-
# couples = (a["start"], a["end"])
|
| 106 |
-
# elif a["label"] == "pobj":
|
| 107 |
-
# couples = (a["start"], a["end"])
|
| 108 |
-
# #couples = (3,5)
|
| 109 |
-
#
|
| 110 |
-
# x_value_counter = 10
|
| 111 |
-
# index_counter = 0
|
| 112 |
-
# svg_words = []
|
| 113 |
-
# coords_test = []
|
| 114 |
-
# for i, word in enumerate(words):
|
| 115 |
-
# word = word["text"]
|
| 116 |
-
# word = word + " "
|
| 117 |
-
# pixel_x_length = get_pil_text_size(word, 16, 'arial.ttf')[0]
|
| 118 |
-
# svg_words.append(TPL_DEP_WORDS.format(text=word, tag="", x=x_value_counter, y=70))
|
| 119 |
-
# if index_counter >= couples[0] and index_counter <= couples[1]:
|
| 120 |
-
# coords_test.append(x_value_counter)
|
| 121 |
-
# x_value_counter += 50
|
| 122 |
-
# index_counter += 1
|
| 123 |
-
# x_value_counter += pixel_x_length + 4
|
| 124 |
-
# for i, a in enumerate(arcs):
|
| 125 |
-
# if a["label"] == "amod":
|
| 126 |
-
# arcs_svg.append(render_arrow(a["label"], coords_test[0], coords_test[-1], a["dir"], i))
|
| 127 |
-
# elif a["label"] == "pobj":
|
| 128 |
-
# arcs_svg.append(render_arrow(a["label"], coords_test[0], coords_test[-1], a["dir"], i))
|
| 129 |
-
#
|
| 130 |
-
# content = "".join(svg_words) + "".join(arcs_svg)
|
| 131 |
-
#
|
| 132 |
-
# full_svg = TPL_DEP_SVG.format(
|
| 133 |
-
# id=0,
|
| 134 |
-
# width=1200, #600
|
| 135 |
-
# height=250, #125
|
| 136 |
-
# color="#00000",
|
| 137 |
-
# bg="#ffffff",
|
| 138 |
-
# font="Arial",
|
| 139 |
-
# content=content,
|
| 140 |
-
# dir="ltr",
|
| 141 |
-
# lang="en",
|
| 142 |
-
# )
|
| 143 |
|
| 144 |
x_value_counter = 10
|
| 145 |
index_counter = 0
|
| 146 |
svg_words = []
|
| 147 |
-
words = unmatched_list["sentence"].split(" ")
|
| 148 |
coords_test = []
|
| 149 |
-
#print(unmatched_list)
|
| 150 |
-
#print(words)
|
| 151 |
-
#print("NOW")
|
| 152 |
direction_current = "rtl"
|
| 153 |
if unmatched_list["cur_word_index"] < unmatched_list["target_word_index"]:
|
| 154 |
min_index = unmatched_list["cur_word_index"]
|
|
@@ -169,8 +116,6 @@ def render_sentence_custom(unmatched_list: Dict):
|
|
| 169 |
index_counter += 1
|
| 170 |
x_value_counter += pixel_x_length + 4
|
| 171 |
|
| 172 |
-
# TODO: DYNAMIC DIRECTION MAKING (SHOULD GIVE WITH DICT I THINK)
|
| 173 |
-
#print(coords_test)
|
| 174 |
arcs_svg.append(render_arrow(unmatched_list['dep'], coords_test[0], coords_test[-1], direction_current, i))
|
| 175 |
|
| 176 |
content = "".join(svg_words) + "".join(arcs_svg)
|
|
@@ -189,69 +134,3 @@ def render_sentence_custom(unmatched_list: Dict):
|
|
| 189 |
return full_svg
|
| 190 |
|
| 191 |
|
| 192 |
-
def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
| 193 |
-
"""Generate dependency parse in {'words': [], 'arcs': []} format.
|
| 194 |
-
|
| 195 |
-
doc (Doc): Document do parse.
|
| 196 |
-
RETURNS (dict): Generated dependency parse keyed by words and arcs.
|
| 197 |
-
"""
|
| 198 |
-
doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes(exclude=["user_data"]))
|
| 199 |
-
if not doc.has_annotation("DEP"):
|
| 200 |
-
print("WARNING")
|
| 201 |
-
if options.get("collapse_phrases", False):
|
| 202 |
-
with doc.retokenize() as retokenizer:
|
| 203 |
-
for np in list(doc.noun_chunks):
|
| 204 |
-
attrs = {
|
| 205 |
-
"tag": np.root.tag_,
|
| 206 |
-
"lemma": np.root.lemma_,
|
| 207 |
-
"ent_type": np.root.ent_type_,
|
| 208 |
-
}
|
| 209 |
-
retokenizer.merge(np, attrs=attrs)
|
| 210 |
-
if options.get("collapse_punct", True):
|
| 211 |
-
spans = []
|
| 212 |
-
for word in doc[:-1]:
|
| 213 |
-
if word.is_punct or not word.nbor(1).is_punct:
|
| 214 |
-
continue
|
| 215 |
-
start = word.i
|
| 216 |
-
end = word.i + 1
|
| 217 |
-
while end < len(doc) and doc[end].is_punct:
|
| 218 |
-
end += 1
|
| 219 |
-
span = doc[start:end]
|
| 220 |
-
spans.append((span, word.tag_, word.lemma_, word.ent_type_))
|
| 221 |
-
with doc.retokenize() as retokenizer:
|
| 222 |
-
for span, tag, lemma, ent_type in spans:
|
| 223 |
-
attrs = {"tag": tag, "lemma": lemma, "ent_type": ent_type}
|
| 224 |
-
retokenizer.merge(span, attrs=attrs)
|
| 225 |
-
fine_grained = options.get("fine_grained")
|
| 226 |
-
add_lemma = options.get("add_lemma")
|
| 227 |
-
words = [
|
| 228 |
-
{
|
| 229 |
-
"text": w.text,
|
| 230 |
-
"tag": w.tag_ if fine_grained else w.pos_,
|
| 231 |
-
"lemma": w.lemma_ if add_lemma else None,
|
| 232 |
-
}
|
| 233 |
-
for w in doc
|
| 234 |
-
]
|
| 235 |
-
arcs = []
|
| 236 |
-
for word in doc:
|
| 237 |
-
if word.i < word.head.i:
|
| 238 |
-
arcs.append(
|
| 239 |
-
{"start": word.i, "end": word.head.i, "label": word.dep_, "dir": "left"}
|
| 240 |
-
)
|
| 241 |
-
elif word.i > word.head.i:
|
| 242 |
-
arcs.append(
|
| 243 |
-
{
|
| 244 |
-
"start": word.head.i,
|
| 245 |
-
"end": word.i,
|
| 246 |
-
"label": word.dep_,
|
| 247 |
-
"dir": "right",
|
| 248 |
-
}
|
| 249 |
-
)
|
| 250 |
-
return {"words": words, "arcs": arcs, "settings": get_doc_settings(orig_doc)}
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
def get_doc_settings(doc: Doc) -> Dict[str, Any]:
|
| 254 |
-
return {
|
| 255 |
-
"lang": doc.lang_,
|
| 256 |
-
"direction": doc.vocab.writing_system.get("direction", "ltr"),
|
| 257 |
-
}
|
|
|
|
| 1 |
+
from typing import Dict
|
| 2 |
|
|
|
|
| 3 |
import spacy
|
| 4 |
from PIL import ImageFont
|
| 5 |
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def get_pil_text_size(text, font_size, font_name):
|
|
|
|
| 76 |
return f"M{p1},{y + 2} L{p2},{y - arrow_width} {p3},{y - arrow_width}"
|
| 77 |
|
| 78 |
|
| 79 |
+
def render_sentence_custom(unmatched_list: Dict, nlp):
|
|
|
|
| 80 |
TPL_DEP_WORDS = """
|
| 81 |
<text class="displacy-token" fill="currentColor" text-anchor="start" y="{y}">
|
| 82 |
<tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
|
|
|
|
| 88 |
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="{lang}" id="{id}" class="displacy" width="{width}" height="{height}" direction="{dir}" style="max-width: none; height: {height}px; color: {color}; background: {bg}; font-family: {font}; direction: {dir}">{content}</svg>
|
| 89 |
"""
|
| 90 |
arcs_svg = []
|
| 91 |
+
#nlp = spacy.load('en_core_web_lg')
|
| 92 |
doc = nlp(unmatched_list["sentence"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
x_value_counter = 10
|
| 95 |
index_counter = 0
|
| 96 |
svg_words = []
|
| 97 |
+
#words = unmatched_list["sentence"].split(" ")
|
| 98 |
coords_test = []
|
|
|
|
|
|
|
|
|
|
| 99 |
direction_current = "rtl"
|
| 100 |
if unmatched_list["cur_word_index"] < unmatched_list["target_word_index"]:
|
| 101 |
min_index = unmatched_list["cur_word_index"]
|
|
|
|
| 116 |
index_counter += 1
|
| 117 |
x_value_counter += pixel_x_length + 4
|
| 118 |
|
|
|
|
|
|
|
| 119 |
arcs_svg.append(render_arrow(unmatched_list['dep'], coords_test[0], coords_test[-1], direction_current, i))
|
| 120 |
|
| 121 |
content = "".join(svg_words) + "".join(arcs_svg)
|
|
|
|
| 134 |
return full_svg
|
| 135 |
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -5,5 +5,6 @@ transformers-interpret==0.5.2
|
|
| 5 |
sentence-transformers==2.2.0
|
| 6 |
spacy==3.0.0
|
| 7 |
spacy_streamlit==1.0.3
|
| 8 |
-
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.0.0/en_core_web_lg-3.0.0.tar.gz
|
| 9 |
-
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
|
|
|
|
|
|
| 5 |
sentence-transformers==2.2.0
|
| 6 |
spacy==3.0.0
|
| 7 |
spacy_streamlit==1.0.3
|
| 8 |
+
###### en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.0.0/en_core_web_lg-3.0.0.tar.gz
|
| 9 |
+
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
|
| 10 |
+
en_core_web_lg @ https://huggingface.co/spacy/en_core_web_lg/resolve/main/en_core_web_lg-any-py3-none-any.whl
|