File size: 6,192 Bytes
434c713 4b8857b 434c713 4b8857b 434c713 4b8857b 434c713 a94db0c 580e226 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import gradio as gr
from transformers import pipeline
fill_mask = pipeline('fill-mask',
model='davmel/ka_homonym_disambiguation_FM',
tokenizer='davmel/ka_homonym_disambiguation_FM') # 1788?
masker = {0: 'αα', 1: 'ααα', 2: 'ααα€α'}
def getMasked(sent):
homonym_forms = [
'ααα αα', 'ααα ααααα‘', 'ααα αα', 'ααα ααα', 'ααα αα‘αααα‘ααͺ', 'ααα αααα', 'ααα ααα‘ααͺ', 'ααα αααα¨α', 'ααα αααααͺ',
'ααα αααα',
'ααα ααͺαα', 'ααα ααααα', 'ααα ααͺ', 'ααα ααααα', 'ααα ααααͺ', 'ααα αααα', 'ααα αα‘αααͺααα', 'ααα ααα¨α', 'ααα ααα‘',
'ααα αααα‘α', 'ααα ααααα', 'ααα αα‘ααα£α α', 'ααα αααα‘αααα‘α', 'ααα αα‘ααααα‘', 'ααα ααα', 'ααα ααα', 'ααα α¨ααα',
'ααα αα‘αα',
'ααα αα‘ααα', 'ααα αααααα', 'ααα ααα', 'ααα ααα', 'ααα α‘', 'ααα αα', 'ααα ααααα‘', 'ααα αααα', 'ααα αααα', 'ααα αααα‘',
'ααα αα‘ααα', 'ααα α¨αα', 'ααα αααα‘ααα', 'ααα αααααα', 'ααα ααααα', 'ααα αααα‘α', 'ααα ααα‘ααα', 'ααα ααα‘ααα',
'ααα αααααα',
'ααα ααααͺ', 'ααα αααααα', 'ααα α¨ααα', 'ααα αααα', 'ααα ααα', 'ααα ααααααͺ', 'ααα αααα‘', 'ααα ααααα‘', 'ααα ααα‘',
'ααα αααααα', 'ααα αααα', 'ααα ααα', 'ααα αααα‘ααα', 'ααα ααα‘αα‘', 'ααα ααα¨αα', 'ααα αααα', 'ααα ααα', 'ααα αααα‘',
'ααα ααααα¨α', 'ααα ααͺα', 'ααα αααα', 'ααα α¨αα', 'ααα αααα', 'ααα ααα', 'ααα α', 'ααα αααα‘α', 'ααα αααα',
'ααα αααα‘ααααα‘',
'ααα αααα', 'ααα ααα¨αα', 'ααα αα‘', 'ααα αα‘αααα‘α', 'ααα αααα', 'ααα α¨αααα', 'ααα ααα‘ααͺ', 'ααα αααααα', 'ααα αααα',
'ααα αα', 'ααα αααααα', 'ααα αα‘αα', 'ααα αααͺ', 'ααα α¨ααͺ', 'ααα αααααα', 'ααα αα', 'ααα ααααααͺ', 'ααα αα‘αααααα ',
'ααα α¨α',
'ααα αααα‘αααα‘ααͺ', 'ααα αααα', 'ααα α¨ααͺαα', 'ααα αα‘αα', 'ααα αα‘αααα‘α', 'ααα αααͺ', 'ααα αααα', 'ααα αα', 'ααα ααααααα',
'ααα αααααα', 'ααα αα‘ααͺ', 'ααα αα‘ααα£α ', 'ααα αααααα‘', 'ααα αααα', 'ααα αα‘α', 'ααα αααααͺ', 'ααα αααα', 'ααα ααααα',
'ααα αααα‘αααα‘', 'ααα ααα‘', 'ααα ', 'ααα ααααͺ', 'ααα αααααͺ', 'ααα αααα‘αααα‘', 'ααα ααα‘ααͺ', 'ααα ααααα', 'ααα αα‘αα',
'ααα αα‘ααα αα', 'ααα αααααα', 'ααα αααααͺ', 'ααα α', 'ααα αα‘αααα', 'ααα αααα', 'ααα αα', 'ααα αααα', 'ααα ααααα',
'ααα αααα‘ααααα‘', 'ααα ααααα', 'ααα ααα¨α', 'ααα ααα', 'ααα ααααα', 'ααα αα‘αααα', 'ααα ααααααα', 'ααα αα‘ααα αα‘',
'ααα αααα',
'ααα ααα', 'ααα ααα¨αααα', 'ααα ααα‘ααα', 'ααα αααααααͺ', 'ααα ααααα', 'ααα αα‘α', 'ααα ααα¨ααͺ', 'ααα αααααα', 'ααα αα‘α',
'ααα ααααα', 'ααα ααα‘', 'ααα αααα', 'ααα αα', 'ααα αααααͺ', 'ααα ααα', 'ααα αα‘αααα‘', 'ααα ααααα', 'ααα ααα‘α',
'ααα αα‘αα',
'ααα ααααα', 'ααα ααα‘', 'ααα αα‘αααααα α', 'ααα ααααα', 'ααα ααααα', 'ααα α‘α', 'ααα α£αα', 'ααα α‘ααͺ', 'α‘αααα α',
'ααα ααααα',
'ααα α¨αααͺ', 'ααα ααα', 'ααα αα‘αα‘',
]
new_sent = []
wasnt = True
for word in sent.split(" "):
if word in homonym_forms and wasnt:
new_sent.append("[MASK]")
# wasnt = False
else:
new_sent.append(word)
return " ".join(new_sent)
def predict(text):
# Mask some words in the input text
text_with_mask = getMasked(text)
# Use the model to predict the masked words
predictions = fill_mask(text_with_mask)
# Extract the predicted word and score
context = predictions[0]["token_str"]
if context == masker[0]:
return "Homonym is used as a \"Shovel\""
elif context == masker[1]:
return "Homonym is used as a \"Lowland\""
else:
return "Homonym is used as a \"Cafe\""
# Create the Gradio interface
iface = gr.Interface(fn=predict, inputs="text", outputs="text", title="Homonym disambiguation in Georgian",
description="Enter a sentence with the homonym \"ααα α\" (for the current purposes, please include the homonym once in the sentence).")
iface.launch(share=True) |