Spaces:
Build error
Build error
| import random | |
| from typing import AnyStr | |
| # import tensorflow_hub as hub | |
| import itertools | |
| import streamlit as st | |
| import torch.nn.parameter | |
| from bs4 import BeautifulSoup | |
| import numpy as np | |
| import base64 | |
| import validators | |
| from spacy_streamlit.util import get_svg | |
| from validators import ValidationFailure | |
| from custom_renderer import render_sentence_custom | |
| # from flair.data import Sentence | |
| # from flair.models import SequenceTagger | |
| from sentence_transformers import SentenceTransformer | |
| import spacy | |
| from spacy import displacy | |
| from spacy_streamlit import visualize_parser | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification | |
| from transformers import pipeline | |
| import os | |
| from transformers_interpret import SequenceClassificationExplainer | |
| # USE_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| def get_sentence_embedding_model(): | |
| return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| def get_spacy(): | |
| nlp = spacy.load('en_core_web_lg') | |
| return nlp | |
| # TODO: might look into which one is the best here | |
| # TODO: might be useful to make an ml6 preloaded model for flair as this takes ridiculously long to load the first time | |
| # @st.experimental_singleton | |
| # @st.cache(suppress_st_warning=True, allow_output_mutation=True) | |
| # def get_flair_tagger(): | |
| # return SequenceTagger.load("flair/ner-english-ontonotes-fast") | |
| def get_transformer_pipeline(): | |
| tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english") | |
| model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english") | |
| return pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True) | |
| # Page setup | |
| st.set_page_config( | |
| page_title="Post-processing summarization fact checker", | |
| page_icon="", | |
| layout="centered", | |
| initial_sidebar_state="auto", | |
| menu_items={ | |
| 'Get help': None, | |
| 'Report a bug': None, | |
| 'About': None, | |
| } | |
| ) | |
| def list_all_article_names() -> list: | |
| filenames = [] | |
| for file in sorted(os.listdir('./sample-articles/')): | |
| if file.endswith('.txt'): | |
| filenames.append(file.replace('.txt', '')) | |
| return filenames | |
| def fetch_article_contents(filename: str) -> AnyStr: | |
| with open(f'./sample-articles/{filename.lower()}.txt', 'r') as f: | |
| data = f.read() | |
| return data | |
| def fetch_summary_contents(filename: str) -> AnyStr: | |
| with open(f'./sample-summaries/{filename.lower()}.txt', 'r') as f: | |
| data = f.read() | |
| return data | |
| def fetch_entity_specific_contents(filename: str) -> AnyStr: | |
| with open(f'./entity-specific-text/{filename.lower()}.txt', 'r') as f: | |
| data = f.read() | |
| return data | |
| def fetch_dependency_specific_contents(filename: str) -> AnyStr: | |
| with open(f'./dependency-specific-text/{filename.lower()}.txt', 'r') as f: | |
| data = f.read() | |
| return data | |
| def fetch_dependency_svg(filename: str) -> AnyStr: | |
| with open(f'./dependency-images/{filename.lower()}.txt', 'r') as f: | |
| #data = f.read() | |
| lines=[line.rstrip() for line in f] | |
| return lines | |
| def display_summary(article_name: str): | |
| summary_content = fetch_summary_contents(article_name) | |
| st.session_state.summary_output = summary_content | |
| soup = BeautifulSoup(summary_content, features="html.parser") | |
| HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>""" | |
| return HTML_WRAPPER.format(soup) | |
| def get_all_entities_per_sentence(text): | |
| # load all NER models | |
| # nlp = get_spacy() | |
| # tagger = get_flair_tagger() | |
| doc = nlp(text) | |
| sentences = list(doc.sents) | |
| entities_all_sentences = [] | |
| for sentence in sentences: | |
| entities_this_sentence = [] | |
| # SPACY ENTITIES | |
| for entity in sentence.ents: | |
| entities_this_sentence.append(str(entity)) | |
| # FLAIR ENTITIES | |
| # sentence_entities = Sentence(str(sentence)) | |
| # tagger.predict(sentence_entities) | |
| # for entity in sentence_entities.get_spans('ner'): | |
| # entities_this_sentence.append(entity.text) | |
| # XLM ENTITIES | |
| entities_xlm = [entity["word"] for entity in ner_model(str(sentence))] | |
| for entity in entities_xlm: | |
| entities_this_sentence.append(str(entity)) | |
| entities_all_sentences.append(entities_this_sentence) | |
| return entities_all_sentences | |
| def get_all_entities(text): | |
| all_entities_per_sentence = get_all_entities_per_sentence(text) | |
| return list(itertools.chain.from_iterable(all_entities_per_sentence)) | |
| # TODO: this functionality can be cached (e.g. by storing html file output) if wanted (or just store list of entities idk) | |
| def get_and_compare_entities(article_name: str): | |
| article_content = fetch_article_contents(article_name) | |
| all_entities_per_sentence = get_all_entities_per_sentence(article_content) | |
| # st.session_state.entities_per_sentence_article = all_entities_per_sentence | |
| entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence)) | |
| summary_content = fetch_summary_contents(article_name) | |
| all_entities_per_sentence = get_all_entities_per_sentence(summary_content) | |
| # st.session_state.entities_per_sentence_summary = all_entities_per_sentence | |
| entities_summary = list(itertools.chain.from_iterable(all_entities_per_sentence)) | |
| matched_entities = [] | |
| unmatched_entities = [] | |
| for entity in entities_summary: | |
| # TODO: currently substring matching but probably should do embedding method or idk? | |
| if any(entity.lower() in substring_entity.lower() for substring_entity in entities_article): | |
| matched_entities.append(entity) | |
| elif any( | |
| np.inner(sentence_embedding_model.encode(entity), sentence_embedding_model.encode(art_entity)) > 0.9 for | |
| art_entity in entities_article): | |
| matched_entities.append(entity) | |
| else: | |
| unmatched_entities.append(entity) | |
| return matched_entities, unmatched_entities | |
| def highlight_entities(article_name: str): | |
| summary_content = fetch_summary_contents(article_name) | |
| markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">" | |
| markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">" | |
| markdown_end = "</mark>" | |
| matched_entities, unmatched_entities = get_and_compare_entities(article_name) | |
| for entity in matched_entities: | |
| summary_content = summary_content.replace(entity, markdown_start_green + entity + markdown_end) | |
| for entity in unmatched_entities: | |
| summary_content = summary_content.replace(entity, markdown_start_red + entity + markdown_end) | |
| soup = BeautifulSoup(summary_content, features="html.parser") | |
| HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; | |
| margin-bottom: 2.5rem">{}</div> """ | |
| return HTML_WRAPPER.format(soup) | |
| def render_dependency_parsing(text: str): | |
| html = render_sentence_custom(text) | |
| html = html.replace("\n\n", "\n") | |
| print(get_svg(html)) | |
| st.write(get_svg(html), unsafe_allow_html=True) | |
| # If deps for article: True, otherwise deps for summary calc | |
| def check_dependency(article: bool): | |
| # nlp = spacy.load('en_core_web_lg') | |
| if article: | |
| text = st.session_state.article_text | |
| all_entities = get_all_entities_per_sentence(text) | |
| # all_entities = st.session_state.entities_per_sentence_article | |
| else: | |
| text = st.session_state.summary_output | |
| all_entities = get_all_entities_per_sentence(text) | |
| # all_entities = st.session_state.entities_per_sentence_summary | |
| doc = nlp(text) | |
| tok_l = doc.to_json()['tokens'] | |
| # all_deps = "" | |
| test_list_dict_output = [] | |
| sentences = list(doc.sents) | |
| for i, sentence in enumerate(sentences): | |
| start_id = sentence.start | |
| end_id = sentence.end | |
| for t in tok_l: | |
| # print(t) | |
| if t["id"] < start_id or t["id"] > end_id: | |
| continue | |
| head = tok_l[t['head']] | |
| if t['dep'] == 'amod' or t['dep'] == "pobj": | |
| object_here = text[t['start']:t['end']] | |
| object_target = text[head['start']:head['end']] | |
| if t['dep'] == "pobj" and str.lower(object_target) != "in": | |
| continue | |
| # ONE NEEDS TO BE ENTITY | |
| if object_here in all_entities[i]: | |
| # all_deps = all_deps.join(str(sentence)) | |
| identifier = object_here + t['dep'] + object_target | |
| test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start), | |
| "target_word_index": (t['head'] - sentence.start), | |
| "identifier": identifier, "sentence": str(sentence)}) | |
| elif object_target in all_entities[i]: | |
| # all_deps = all_deps.join(str(sentence)) | |
| identifier = object_here + t['dep'] + object_target | |
| test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start), | |
| "target_word_index": (t['head'] - sentence.start), | |
| "identifier": identifier, "sentence": str(sentence)}) | |
| else: | |
| continue | |
| # print(f'NOW TEST LIST DICT: {test_list_dict_output}') | |
| return test_list_dict_output | |
| # return all_deps | |
| def is_valid_url(url: str) -> bool: | |
| result = validators.url(url) | |
| if isinstance(result, ValidationFailure): | |
| return False | |
| return True | |
| # Start session | |
| if 'results' not in st.session_state: | |
| st.session_state.results = [] | |
| # Page | |
| st.title('Summarization fact checker') | |
| # INTRODUCTION | |
| st.header("Introduction") | |
| st.markdown("""Recent work using transformers on large text corpora has shown great success when fine-tuned on | |
| several different downstream NLP tasks. One such task is that of text summarization. The goal of text summarization | |
| is to generate concise and accurate summaries from input document(s). There are 2 types of summarization: extractive | |
| and abstractive. **Extractive summarization** merely copies informative fragments from the input, | |
| whereas **abstractive summarization** may generate novel words. A good abstractive summary should cover principal | |
| information in the input and has to be linguistically fluent. This blogpost will focus on this more difficult task of | |
| abstractive summary generation.""") | |
| st.markdown("""To generate summaries we will use the [PEGASUS] (https://huggingface.co/google/pegasus-cnn_dailymail) | |
| model, producing abstractive summaries from large articles. These summaries often contain sentences with different | |
| kinds of errors. Rather than improving the core model, we will look into possible post-processing steps to improve | |
| the generated summaries. By comparing contents of the summary with the source text, we come up with a factualness | |
| metric, indicating the trustworthiness of the generated summary. Throughout this blog, we will also explain the | |
| results for some methods on specific examples. These text blocks will be indicated and they change according to the | |
| currently selected article.""") | |
| sentence_embedding_model = get_sentence_embedding_model() | |
| # tagger = get_flair_tagger() | |
| ner_model = get_transformer_pipeline() | |
| nlp = get_spacy() | |
| # GENERATING SUMMARIES PART | |
| st.header("Generating summaries") | |
| st.markdown("Let’s start by selecting an article text for which we want to generate a summary, or you can provide " | |
| "text yourself. Note that it’s suggested to provide a sufficiently large text, as otherwise the summary " | |
| "generated from it might not be optimal, leading to suboptimal performance of the post-processing steps.") | |
| # TODO: NEED TO CHECK ARTICLE TEXT INSTEAD OF ARTICLE NAME ALSO FREE INPUT OPTION | |
| selected_article = st.selectbox('Select an article or provide your own:', | |
| list_all_article_names()) # index=0, format_func=special_internal_function, key=None, help=None, on_change=None, args=None, kwargs=None, *, disabled=False) | |
| st.session_state.article_text = fetch_article_contents(selected_article) | |
| article_text = st.text_area( | |
| label='Full article text', | |
| value=st.session_state.article_text, | |
| height=150 | |
| ) | |
| st.markdown("Below you can find the generated summary for the article. Based on empirical research, we will discuss " | |
| "two main methods that detect some common errors. We can then score different summaries, to indicate how " | |
| "factual a summary is for a given article. The idea is that in production, you could generate a set of " | |
| "summaries for the same article, with different parameters (or even different models). By using " | |
| "post-processing error detection, we can then select the best possible summary.") | |
| if st.session_state.article_text: | |
| with st.spinner('Generating summary...'): | |
| # classify_comment(article_text, selected_model) | |
| summary_displayed = display_summary(selected_article) | |
| st.write("**Generated summary:**", summary_displayed, unsafe_allow_html=True) | |
| else: | |
| st.error('**Error**: No comment to classify. Please provide a comment.', | |
| help="Generate summary for the given article text") | |
| def render_svg(svg_file): | |
| with open(svg_file, "r") as f: | |
| lines = f.readlines() | |
| svg = "".join(lines) | |
| # """Renders the given svg string.""" | |
| b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8") | |
| html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64 | |
| return html | |
| # ENTITY MATCHING PART | |
| st.header("Entity matching") | |
| st.markdown("The first method we will discuss is called **Named Entity Recognition** (NER). NER is the task of " | |
| "identifying and categorising key information (entities) in text. An entity can be a singular word or a " | |
| "series of words that consistently refers to the same thing. Common entity classes are person names, " | |
| "organisations, locations and so on. By applying NER to both the article and its summary, we can spot " | |
| "possible **hallucinations**. Hallucinations are words generated by the model that are not supported by " | |
| "the source input. In theory all entities in the summary (such as dates, locations and so on), " | |
| "should also be present in the article. Thus we can extract all entities from the summary and compare " | |
| "them to the entities of the original article, spotting potential hallucinations. The more unmatched " | |
| "entities we find, the lower the factualness score of the summary. ") | |
| with st.spinner("Calculating and matching entities..."): | |
| entity_match_html = highlight_entities(selected_article) | |
| st.write(entity_match_html, unsafe_allow_html=True) | |
| red_text = """<font color="black"><span style="background-color: rgb(238, 135, 135); opacity: | |
| 1;">red</span></font> """ | |
| green_text = """<font color="black"> | |
| <span style="background-color: rgb(121, 236, 121); opacity: 1;">green</span> | |
| </font>""" | |
| markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">" | |
| markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">" | |
| st.markdown("We call this technique “entity matching” and here you can see what this looks like when we apply " | |
| "this method on the summary. Entities in the summary are marked " + green_text + " when the entity " | |
| "also exists in the " | |
| "article, " | |
| "while unmatched " | |
| "entities are " | |
| "marked " + | |
| red_text + ". Several of the example articles and their summaries indicate different errors we find " | |
| "by using this technique. Based on which article you choose, we provide a short " | |
| "explanation of the results below.", | |
| unsafe_allow_html=True) | |
| entity_specific_text = fetch_entity_specific_contents(selected_article) | |
| soup = BeautifulSoup(entity_specific_text, features="html.parser") | |
| HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; | |
| margin-bottom: 2.5rem">{}</div> """ | |
| st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True) | |
| # DEPENDENCY PARSING PART | |
| st.header("Dependency comparison") | |
| st.markdown("The second method we use for post-processing is called **Dependency parsing**: the process in which the " | |
| "grammatical structure in a sentence is analysed, to find out related words as well as the type of the " | |
| "relationship between them. For the sentence “Jan’s wife is called Sarah” you would get the following " | |
| "dependency graph:") | |
| # TODO: I wonder why the first doesn't work but the second does (it doesn't show deps otherwise) | |
| # st.image("ExampleParsing.svg") | |
| st.write(render_svg('ExampleParsing.svg'), unsafe_allow_html=True) | |
| st.markdown("Here, “Jan” is the “poss” (possession modifier) of “wife”. If suddenly the summary would read “Jan’s " | |
| "husband…”, there would be a dependency in the summary that is non-existent in the article itself (namely " | |
| "“Jan” is the “poss” of “husband”). However, often new dependencies are introduced in the summary that " | |
| "are still correct. “The borders of Ukraine” have a different dependency between “borders” and “Ukraine” " | |
| "than “Ukraine’s borders”, while both descriptions have the same meaning. So just matching all " | |
| "dependencies between article and summary (as we did with entity matching) would not be a robust method.") | |
| st.markdown("However, by empirical testing, we have found that there are certain dependencies which can be used for " | |
| "such matching techniques. When unmatched, these specific dependencies are often an indication of a " | |
| "wrongly constructed sentence. **Should I explain this more/better or is it enough that I explain by " | |
| "example specific run throughs?**. We found 2(/3 TODO) common dependencies which, when present in the " | |
| "summary but not in the article, are highly indicative of factualness errors. Furthermore, we only check " | |
| "dependencies between an existing **entity** and its direct connections. Below we highlight all unmatched " | |
| "dependencies that satisfy the discussed constraints. We also discuss the specific results for the " | |
| "currently selected article.") | |
| with st.spinner("Doing dependency parsing..."): | |
| # TODO RIGHT IF FUNCTION (IF EXAMPLE AND IF INPUT UNCHANGED) | |
| #if selected_article == 'article11': | |
| if True: | |
| for cur_svg_image in fetch_dependency_svg(selected_article): | |
| st.write(cur_svg_image, unsafe_allow_html=True) | |
| else: | |
| summary_deps = check_dependency(False) | |
| article_deps = check_dependency(True) | |
| total_unmatched_deps = [] | |
| for summ_dep in summary_deps: | |
| if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps): | |
| total_unmatched_deps.append(summ_dep) | |
| # print(f'ALL UNMATCHED DEPS ARE: {total_unmatched_deps}') | |
| # render_dependency_parsing(check_dependency(False)) | |
| if total_unmatched_deps: | |
| for current_drawing_list in total_unmatched_deps: | |
| render_dependency_parsing(current_drawing_list) | |
| dep_specific_text = fetch_dependency_specific_contents(selected_article) | |
| soup = BeautifulSoup(dep_specific_text, features="html.parser") | |
| HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; | |
| margin-bottom: 2.5rem">{}</div> """ | |
| st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True) | |
| # OUTRO/CONCLUSION | |
| st.header("Wrapping up") | |
| st.markdown("We have presented 2 methods that try to improve summaries via post-processing steps. Entity matching can " | |
| "be used to solve hallucinations, while dependency comparison can be used to filter out some bad " | |
| "sentences (and thus worse summaries). These methods highlight the possibilities of post-processing " | |
| "AI-made summaries, but are only a basic introduction. As the methods were empirically tested they are " | |
| "definitely not sufficiently robust for general use-cases. (something about that we tested also RE and " | |
| "maybe other things).") | |
| st.markdown("####") | |
| st.markdown("Below we generated 5 different kind of summaries from the article in which their ranks are estimated, " | |
| "and hopefully the best summary (read: the one that a human would prefer or indicate as the best one) " | |
| "will be at the top. TODO: implement this (at the end I think) and also put something in the text with " | |
| "the actual parameters or something? ") | |