davmel's picture
Update app.py
a94db0c verified
import gradio as gr
from transformers import pipeline
fill_mask = pipeline('fill-mask',
model='davmel/ka_homonym_disambiguation_FM',
tokenizer='davmel/ka_homonym_disambiguation_FM') # 1788?
masker = {0: 'თო', 1: 'დაბ', 2: 'კაჀე'}
def getMasked(sent):
homonym_forms = [
'αƒ‘αƒαƒ αƒ˜αƒ—', 'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒ˜αƒ‘', 'ბარზე', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘αƒαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ', 'ბარეთბაαƒͺ', 'αƒ‘αƒαƒ αƒαƒ˜αƒœαƒ¨αƒ˜', 'ბარეთმაαƒͺ',
'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒ˜',
'αƒ‘αƒαƒ αƒ˜αƒͺაა', 'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœαƒ', 'αƒ‘αƒαƒ αƒ˜αƒͺ', 'ბარვაზედ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒͺ', 'αƒ‘αƒαƒ αƒαƒ•αƒ“αƒ˜', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ™αƒαƒͺαƒ”αƒ‘αƒ˜', 'αƒ‘αƒαƒ αƒ•αƒαƒ¨αƒ˜', 'ბარვაბ',
'αƒ‘αƒαƒ αƒ”αƒ—αƒ˜αƒ‘αƒ', 'ბარავოოო', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ”αƒ‘αƒ£αƒ αƒ˜', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘αƒ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ—αƒ•αƒ˜αƒ‘', 'ბარზეა', 'αƒ‘αƒαƒ αƒ”αƒšαƒ˜', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒ•αƒ”',
'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ',
'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ™αƒ”αƒœ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ˜αƒ—', 'ბარადა', 'ბარვაა', 'ბარბ', 'αƒ‘αƒαƒ αƒ˜αƒ', 'ბარავდებ', 'ბარებმა', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘',
'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ’αƒαƒœ', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ™αƒ”αƒœ', 'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœαƒαƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒœαƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ', 'αƒ‘αƒαƒ αƒ•αƒ˜αƒ‘αƒ’αƒαƒœ', 'αƒ‘αƒαƒ αƒ•αƒ˜αƒ‘αƒ—αƒαƒœ',
'αƒ‘αƒαƒ αƒαƒ•αƒ“αƒœαƒ”αƒœ',
'ბარადაαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒšαƒαƒ›αƒ“αƒ”', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒαƒ', 'ბარადაა', 'αƒ‘αƒαƒ αƒ˜αƒ—αƒ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ˜αƒͺ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ‘', 'αƒ‘αƒαƒ αƒ•αƒ˜αƒ‘',
'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ›αƒ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ›αƒ', 'ბარადო', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ’αƒαƒœ', 'αƒ‘αƒαƒ αƒ•αƒ˜αƒ‘αƒαƒ‘', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ¨αƒ˜αƒ', 'αƒ‘αƒαƒ αƒαƒ•αƒ”αƒœ', 'αƒ‘αƒαƒ αƒ•αƒ˜αƒ—', 'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒ‘',
'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ¨αƒ˜', 'αƒ‘αƒαƒ αƒ˜αƒͺო', 'ბარავდა', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒ', 'ბარვაზე', 'ბარავთ', 'ბარო', 'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ‘αƒ', 'αƒ‘αƒαƒ αƒαƒ˜αƒœαƒ˜',
'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ‘αƒαƒ—αƒ•αƒ˜αƒ‘',
'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ¨αƒ˜αƒ', 'αƒ‘αƒαƒ αƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘αƒ', 'ბარებად', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒ’αƒαƒœ', 'ბარებბაαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—αƒ“αƒ', 'ბარებზე',
'αƒ‘αƒαƒ αƒ˜αƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ“αƒαƒœ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ“', 'ბარზეαƒͺ', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒͺ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—αƒαƒ', 'ბარად', 'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœαƒαƒͺ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ›αƒαƒ’αƒ•αƒαƒ ',
'αƒ‘αƒαƒ αƒ¨αƒ˜',
'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘αƒαƒͺ', 'αƒ‘αƒαƒ αƒ—αƒαƒœαƒ', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒͺაა', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ•αƒ”', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘αƒ', 'ბარმაαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ—', 'ბარვა', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ—αƒαƒœ',
'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ–αƒ”', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒͺ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ”αƒ‘αƒ£αƒ ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ˜', 'ბარებმაαƒͺ', 'αƒ‘αƒαƒ αƒ˜αƒ•αƒ˜αƒ—', 'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒ›αƒ',
'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘', 'ბარებბ', 'ბარ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒͺ', 'ბარებზეαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ”αƒšαƒ‘αƒαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ˜', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ˜αƒ',
'αƒ‘αƒαƒ αƒ˜αƒ‘αƒžαƒ˜αƒ αƒ˜αƒ—', 'ბარაამდეე', 'ბარამდეαƒͺ', 'αƒ‘αƒαƒ αƒ˜', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ™αƒ”αƒœ', 'αƒ‘αƒαƒ αƒ”αƒšαƒαƒ“', 'αƒ‘αƒαƒ αƒ”αƒš', 'ბარამდე', 'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒαƒ“',
'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒαƒ—αƒ•αƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒ˜αƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ¨αƒ˜', 'αƒ‘αƒαƒ αƒ—αƒαƒœ', 'αƒ‘αƒαƒ αƒ”αƒ—αƒ—αƒαƒœ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ’αƒαƒœ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ—αƒαƒ’αƒαƒœ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒžαƒ˜αƒ αƒ˜αƒ‘',
'ბარზედა',
'ბარვაო', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ¨αƒ˜αƒ“αƒαƒ', 'ბარებბადა', 'αƒ‘αƒαƒ αƒ”αƒšαƒαƒ›αƒ“αƒ”αƒͺ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—αƒ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ¨αƒ˜αƒͺ', 'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœαƒ•αƒ”', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ',
'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœαƒœ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ‘', 'αƒ‘αƒαƒ αƒ”αƒšαƒ—αƒ', 'ბართა', 'αƒ‘αƒαƒ αƒ—αƒαƒœαƒαƒͺ', 'ბარავო', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ”αƒšαƒ–αƒ”αƒ', 'ბარებბა',
'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ',
'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ–αƒ”', 'ბარავბ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ›αƒαƒ’αƒ•αƒαƒ αƒ˜', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ—αƒαƒœ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—αƒ', 'ბარბა', 'αƒ‘αƒαƒ αƒ£αƒšαƒ˜', 'ბარბაαƒͺ', 'ბაბარო',
'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—αƒ',
'αƒ‘αƒαƒ αƒ¨αƒ˜αƒαƒͺ', 'ბარობა', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ‘',
]
new_sent = []
wasnt = True
for word in sent.split(" "):
if word in homonym_forms and wasnt:
new_sent.append("[MASK]")
# wasnt = False
else:
new_sent.append(word)
return " ".join(new_sent)
def predict(text):
# Mask some words in the input text
text_with_mask = getMasked(text)
# Use the model to predict the masked words
predictions = fill_mask(text_with_mask)
# Extract the predicted word and score
context = predictions[0]["token_str"]
if context == masker[0]:
return "Homonym is used as a \"Shovel\""
elif context == masker[1]:
return "Homonym is used as a \"Lowland\""
else:
return "Homonym is used as a \"Cafe\""
# Create the Gradio interface
iface = gr.Interface(fn=predict, inputs="text", outputs="text", title="Homonym disambiguation in Georgian",
description="Enter a sentence with the homonym \"αƒ‘αƒαƒ αƒ˜\" (for the current purposes, please include the homonym once in the sentence).")
iface.launch(share=True)