Spaces:
Sleeping
Sleeping
| from pandas import DataFrame | |
| from src.application.config import WORD_BREAK | |
| from src.application.formatting import ( | |
| color_text, | |
| format_entity_count, | |
| ) | |
| from src.application.image.helper import encode_image | |
| from src.application.image.image import ImageDetector | |
| from src.application.text.entity import apply_highlight | |
| from src.application.text.helper import ( | |
| extract_equal_text, | |
| replace_leading_spaces, | |
| ) | |
| from src.application.text.text import TextDetector | |
| def create_governor_table( | |
| aligned_sentences_df: DataFrame, | |
| text: TextDetector, | |
| image: ImageDetector, | |
| ): | |
| rows = [] | |
| if image.input is not None: | |
| rows.append(format_image_governor_row(image)) | |
| if text.input is not None: | |
| for _, row in aligned_sentences_df.iterrows(): | |
| if row["input"] is None: | |
| continue | |
| if row["source"] is None: | |
| equal_idx_1 = equal_idx_2 = [] | |
| else: | |
| # Get index of equal phrases in input and source sentences | |
| equal_idx_1, equal_idx_2 = extract_equal_text( | |
| row["input"], | |
| row["source"], | |
| ) | |
| text.governor_table.append( | |
| [ | |
| row, | |
| equal_idx_1, | |
| equal_idx_2, | |
| row["entities"], | |
| ], | |
| ) | |
| formatted_row = format_text_governor_row(text) | |
| rows.append(formatted_row) | |
| table = "\n".join(rows) | |
| return f""" | |
| <h5>Comparison between input news and source news:</h5> | |
| <table border="1" style="width:100%; text-align:left;"> | |
| <col style="width: 170px;"> | |
| <col style="width: 170px;"> | |
| <col style="width: 30px;"> | |
| <col style="width: 75px;"> | |
| <thead> | |
| <tr> | |
| <th>Input news</th> | |
| <th>Source (URL in Originality)</th> | |
| <th>Forensic</th> | |
| <th>Originality</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| {table} | |
| </tbody> | |
| </table> | |
| <style> | |
| """ | |
| def format_text_governor_row(text): | |
| input_sentences = "" | |
| source_sentences = "" | |
| source_text_urls = "" | |
| urls = [] | |
| sentence_count = 0 | |
| entity_count = [0, 0] # to get index of [-2] | |
| for row in text.governor_table: | |
| if row[0]["input"] is None: | |
| continue | |
| if row[0]["source"] is not None: # source is not empty | |
| # highlight entities | |
| input_sentence, highlight_idx_input = apply_highlight( | |
| row[0]["input"], | |
| row[3], # entities_with_colors | |
| "input", # key | |
| entity_count[-2], # since the last one is for current counting | |
| ) | |
| source_sentence, highlight_idx_source = apply_highlight( | |
| row[0]["source"], | |
| row[3], # entities_with_colors | |
| "source", # key | |
| entity_count[-2], # since the last one is for current counting | |
| ) | |
| # Color overlapping words | |
| input_sentence = color_text( | |
| input_sentence, | |
| row[1], | |
| highlight_idx_input, | |
| ) # text, index of highlight words | |
| source_sentence = color_text( | |
| source_sentence, | |
| row[2], | |
| highlight_idx_source, | |
| ) # text, index of highlight words | |
| input_sentence = input_sentence.replace( | |
| "span_style", | |
| "span style", | |
| ).replace("1px_4px", "1px 4px") | |
| source_sentence = source_sentence.replace( | |
| "span_style", | |
| "span style", | |
| ).replace("1px_4px", "1px 4px") | |
| else: | |
| if row[0]["source"] is None: | |
| source_sentence = "" | |
| else: | |
| source_sentence = row[0]["source"] | |
| input_sentence = row[0]["input"] | |
| input_sentence = replace_leading_spaces(input_sentence) | |
| source_sentence = replace_leading_spaces(source_sentence) | |
| input_sentences += input_sentence + "<br>" | |
| source_sentences += source_sentence + "<br>" | |
| url = row[0]["url"] | |
| if url not in urls: | |
| urls.append(url) | |
| source_text_urls += f"""<a href="{url}">{url}</a><br><br>""" | |
| sentence_count += 1 | |
| if row[3] is not None: | |
| entity_count.append(len(row[3])) | |
| entity_count_text = format_entity_count(sum(entity_count)) | |
| return f""" | |
| <tr> | |
| <td>{input_sentences}</td> | |
| <td>{source_sentences}</td> | |
| <td>{text.prediction_label[0]}<br> | |
| ({text.prediction_score[0] * 100:.2f}%)<br><br> | |
| {entity_count_text}</td> | |
| <td style="{WORD_BREAK}";>{source_text_urls}</td> | |
| </tr> | |
| """ | |
| def format_image_governor_row(image: ImageDetector): | |
| if image.input is None: | |
| return "" | |
| if image.referent_url is not None or image.referent_url != "": | |
| if "http" in image.input: | |
| input_image = ( | |
| f"""<a href="{image.input}">{image.input}</a>""" # noqa: E501 | |
| ) | |
| else: | |
| base64_image = encode_image(image.input) | |
| input_image = f"""<img src="data:image/jpeg;base64,{base64_image}" width="100" height="150">""" # noqa: E501 | |
| source_image_url = f"""<a href="{image.referent_url}">{image.referent_url}</a>""" # noqa: E501 | |
| source_image = f"""<img src="{image.referent_url}" width="100" height="150">""" # noqa: E501 | |
| else: | |
| source_image = "Image not found" | |
| source_image_url = "" | |
| return f""" | |
| <tr> | |
| <td>{input_image}</td> | |
| <td>{source_image}</td> | |
| <td>{image.prediction_label}<br>({image.prediction_score:.2f}%)</td> | |
| <td style="{WORD_BREAK}";>{source_image_url}</td> | |
| </tr>""" | |