Spaces:
Runtime error
Runtime error
| import spacy | |
| from spacy import displacy | |
| import random | |
| from spacy.tokens import Span | |
| import gradio as gr | |
| DEFAULT_MODEL = "en_core_web" | |
| DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles." | |
| DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_'] | |
| DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', | |
| 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART'] | |
| def get_all_models(): | |
| with open("requirements.txt") as f: | |
| content = f.readlines() | |
| models = [] | |
| for line in content: | |
| if "huggingface.co" in line: | |
| model = "_".join(line.split("/")[4].split("_")[:3]) | |
| if model not in models: | |
| models.append(model) | |
| return models | |
| models = get_all_models() | |
| def dependency(text, col_punct, col_phrase, compact, model): | |
| nlp = spacy.load(model + "_sm") | |
| doc = nlp(text) | |
| options = {"compact": compact, "collapse_phrases": col_phrase, | |
| "collapse_punct": col_punct} | |
| html = displacy.render(doc, style="dep", options=options) | |
| return html | |
| def entity(text, ents, model): | |
| nlp = spacy.load(model + "_sm") | |
| doc = nlp(text) | |
| options = {"ents": ents} | |
| html = displacy.render(doc, style="ent", options=options) | |
| return html | |
| def token(text, attributes, model): | |
| nlp = spacy.load(model + "_sm") | |
| data = [] | |
| doc = nlp(text) | |
| for tok in doc: | |
| tok_data = [] | |
| for attr in attributes: | |
| tok_data.append(getattr(tok, attr)) | |
| data.append(tok_data) | |
| return data | |
| def vectors(text, model): | |
| nlp = spacy.load(model + "_md") | |
| doc = nlp(text) | |
| n_chunks = [chunk for chunk in doc.noun_chunks] | |
| words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [ | |
| 'PUNCT', "PROPN"]] | |
| str_list = n_chunks + words | |
| choice = random.choices(str_list, k=2) | |
| return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text | |
| def span(text, span1, span2, label1, label2, model): | |
| nlp = spacy.load(model + "_sm") | |
| doc = nlp(text) | |
| idx1_1 = 0 | |
| idx1_2 = 0 | |
| idx2_1 = 0 | |
| idx2_2 = 0 | |
| span1 = span1.split(" ") | |
| span2 = span2.split(" ") | |
| for i in range(len(list(doc))): | |
| tok = list(doc)[i] | |
| if span1[0] == tok.text: | |
| idx1_1 = i | |
| if span1[-1] == tok.text: | |
| idx1_2 = i + 1 | |
| if span2[0] == tok.text: | |
| idx2_1 = i | |
| if span2[-1] == tok.text: | |
| idx2_2 = i + 1 | |
| doc.spans["sc"] = [ | |
| Span(doc, idx1_1, idx1_2, label1), | |
| Span(doc, idx2_1, idx2_2, label2), | |
| ] | |
| html = displacy.render(doc, style="span") | |
| return html | |
| demo = gr.Blocks() | |
| with demo: | |
| text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True) | |
| model_input = gr.Dropdown( | |
| choices=models, value=DEFAULT_MODEL, interactive=True) | |
| with gr.Tabs(): | |
| with gr.TabItem("Dependency"): | |
| col_punct = gr.Checkbox(label="Collapse Punctuation", value=True) | |
| col_phrase = gr.Checkbox(label="Collapse Phrases", value=True) | |
| compact = gr.Checkbox(label="Compact", value=True) | |
| depen_output = gr.HTML() | |
| depen_button = gr.Button("Generate") | |
| with gr.TabItem("Entity"): | |
| entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS) | |
| entity_output = gr.HTML() | |
| entity_button = gr.Button("Generate") | |
| with gr.TabItem("Tokens"): | |
| with gr.Column(): | |
| tok_input = gr.CheckboxGroup( | |
| DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR) | |
| tok_output = gr.Dataframe( | |
| headers=DEFAULT_TOK_ATTR, overflow_row_behaviour="paginate") | |
| tok_button = gr.Button("Generate") | |
| with gr.TabItem("Similarity"): | |
| sim_text1 = gr.Textbox(value="David Bowie", label="Chosen") | |
| sim_text2 = gr.Textbox(value="the US", label="Chosen") | |
| sim_output = gr.Textbox(value="0.09", label="Similarity Score") | |
| sim_button = gr.Button("Generate") | |
| with gr.TabItem("Spans"): | |
| with gr.Row(): | |
| span1 = gr.Textbox(value="David Bowie", label="Span 1") | |
| label1 = gr.Textbox(value="Name", | |
| label="Label for Span 1") | |
| with gr.Row(): | |
| span2 = gr.Textbox(value="David", label="Span 2") | |
| label2 = gr.Textbox(value="First", | |
| label="Label for Span 2") | |
| span_output = gr.HTML() | |
| span_button = gr.Button("Generate") | |
| depen_button.click(dependency, inputs=[ | |
| text_input, col_punct, col_phrase, compact, model_input], outputs=depen_output) | |
| entity_button.click( | |
| entity, inputs=[text_input, entity_input, model_input], outputs=entity_output) | |
| tok_button.click( | |
| token, inputs=[text_input, tok_input, model_input], outputs=tok_output) | |
| sim_button.click(vectors, inputs=[text_input, model_input], outputs=[ | |
| sim_output, sim_text1, sim_text2]) | |
| span_button.click( | |
| span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=span_output) | |
| demo.launch() | |