File size: 6,192 Bytes
434c713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b8857b
434c713
4b8857b
434c713
4b8857b
434c713
 
 
a94db0c
580e226
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
from transformers import pipeline
fill_mask = pipeline('fill-mask',
                 model='davmel/ka_homonym_disambiguation_FM',
                 tokenizer='davmel/ka_homonym_disambiguation_FM')  # 1788?

masker = {0: 'თო', 1: 'დაბ', 2: 'კაჀე'}

def getMasked(sent):
    homonym_forms = [
        'αƒ‘αƒαƒ αƒ˜αƒ—', 'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒ˜αƒ‘', 'ბარზე', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘αƒαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ', 'ბარეთბაαƒͺ', 'αƒ‘αƒαƒ αƒαƒ˜αƒœαƒ¨αƒ˜', 'ბარეთმაαƒͺ',
        'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒ˜',
        'αƒ‘αƒαƒ αƒ˜αƒͺაა', 'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœαƒ', 'αƒ‘αƒαƒ αƒ˜αƒͺ', 'ბარვაზედ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒͺ', 'αƒ‘αƒαƒ αƒαƒ•αƒ“αƒ˜', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ™αƒαƒͺαƒ”αƒ‘αƒ˜', 'αƒ‘αƒαƒ αƒ•αƒαƒ¨αƒ˜', 'ბარვაბ',
        'αƒ‘αƒαƒ αƒ”αƒ—αƒ˜αƒ‘αƒ', 'ბარავოოო', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ”αƒ‘αƒ£αƒ αƒ˜', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘αƒ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ—αƒ•αƒ˜αƒ‘', 'ბარზეა', 'αƒ‘αƒαƒ αƒ”αƒšαƒ˜', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒ•αƒ”',
        'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ',
        'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ™αƒ”αƒœ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ˜αƒ—', 'ბარადა', 'ბარვაა', 'ბარბ', 'αƒ‘αƒαƒ αƒ˜αƒ', 'ბარავდებ', 'ბარებმა', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘',
        'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ’αƒαƒœ', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ™αƒ”αƒœ', 'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœαƒαƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒœαƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ', 'αƒ‘αƒαƒ αƒ•αƒ˜αƒ‘αƒ’αƒαƒœ', 'αƒ‘αƒαƒ αƒ•αƒ˜αƒ‘αƒ—αƒαƒœ',
        'αƒ‘αƒαƒ αƒαƒ•αƒ“αƒœαƒ”αƒœ',
        'ბარადაαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒšαƒαƒ›αƒ“αƒ”', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒαƒ', 'ბარადაა', 'αƒ‘αƒαƒ αƒ˜αƒ—αƒ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ˜αƒͺ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ‘', 'αƒ‘αƒαƒ αƒ•αƒ˜αƒ‘',
        'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ›αƒ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ›αƒ', 'ბარადო', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ’αƒαƒœ', 'αƒ‘αƒαƒ αƒ•αƒ˜αƒ‘αƒαƒ‘', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ¨αƒ˜αƒ', 'αƒ‘αƒαƒ αƒαƒ•αƒ”αƒœ', 'αƒ‘αƒαƒ αƒ•αƒ˜αƒ—', 'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒ‘',
        'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ¨αƒ˜', 'αƒ‘αƒαƒ αƒ˜αƒͺო', 'ბარავდა', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒ', 'ბარვაზე', 'ბარავთ', 'ბარო', 'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ‘αƒ', 'αƒ‘αƒαƒ αƒαƒ˜αƒœαƒ˜',
        'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ‘αƒαƒ—αƒ•αƒ˜αƒ‘',
        'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ¨αƒ˜αƒ', 'αƒ‘αƒαƒ αƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘αƒ', 'ბარებად', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒ’αƒαƒœ', 'ბარებბაαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—αƒ“αƒ', 'ბარებზე',
        'αƒ‘αƒαƒ αƒ˜αƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ“αƒαƒœ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ“', 'ბარზეαƒͺ', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒͺ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—αƒαƒ', 'ბარად', 'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœαƒαƒͺ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ›αƒαƒ’αƒ•αƒαƒ ',
        'αƒ‘αƒαƒ αƒ¨αƒ˜',
        'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘αƒαƒͺ', 'αƒ‘αƒαƒ αƒ—αƒαƒœαƒ', 'αƒ‘αƒαƒ αƒ¨αƒ˜αƒͺაა', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ•αƒ”', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘αƒ', 'ბარმაαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ—', 'ბარვა', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ—αƒαƒœ',
        'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ–αƒ”', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒͺ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ”αƒ‘αƒ£αƒ ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ˜', 'ბარებმაαƒͺ', 'αƒ‘αƒαƒ αƒ˜αƒ•αƒ˜αƒ—', 'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒ›αƒ',
        'αƒ‘αƒαƒ αƒ”αƒšαƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘', 'ბარებბ', 'ბარ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒͺ', 'ბარებზეαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ”αƒšαƒ‘αƒαƒͺ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ”αƒ‘αƒ˜', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ˜αƒ',
        'αƒ‘αƒαƒ αƒ˜αƒ‘αƒžαƒ˜αƒ αƒ˜αƒ—', 'ბარაამდეე', 'ბარამდეαƒͺ', 'αƒ‘αƒαƒ αƒ˜', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ™αƒ”αƒœ', 'αƒ‘αƒαƒ αƒ”αƒšαƒαƒ“', 'αƒ‘αƒαƒ αƒ”αƒš', 'ბარამდე', 'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒαƒ“',
        'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ‘αƒαƒ—αƒ•αƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ˜αƒαƒœαƒ˜αƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ¨αƒ˜', 'αƒ‘αƒαƒ αƒ—αƒαƒœ', 'αƒ‘αƒαƒ αƒ”αƒ—αƒ—αƒαƒœ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ’αƒαƒœ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ—αƒαƒ’αƒαƒœ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒžαƒ˜αƒ αƒ˜αƒ‘',
        'ბარზედა',
        'ბარვაო', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ¨αƒ˜αƒ“αƒαƒ', 'ბარებბადა', 'αƒ‘αƒαƒ αƒ”αƒšαƒαƒ›αƒ“αƒ”αƒͺ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—αƒ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ¨αƒ˜αƒͺ', 'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœαƒ•αƒ”', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ',
        'αƒ‘αƒαƒ αƒ˜αƒ“αƒαƒœαƒœ', 'αƒ‘αƒαƒ αƒ”αƒšαƒ‘', 'αƒ‘αƒαƒ αƒ”αƒšαƒ—αƒ', 'ბართა', 'αƒ‘αƒαƒ αƒ—αƒαƒœαƒαƒͺ', 'ბარავო', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ—αƒ•αƒ˜αƒ‘', 'αƒ‘αƒαƒ αƒ”αƒšαƒ–αƒ”αƒ', 'ბარებბა',
        'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ',
        'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ–αƒ”', 'ბარავბ', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒ›αƒαƒ’αƒ•αƒαƒ αƒ˜', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ—αƒαƒœ', 'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—αƒ', 'ბარბა', 'αƒ‘αƒαƒ αƒ£αƒšαƒ˜', 'ბარბაαƒͺ', 'ბაბარო',
        'αƒ‘αƒαƒ αƒ”αƒ‘αƒ˜αƒ—αƒ',
        'αƒ‘αƒαƒ αƒ¨αƒ˜αƒαƒͺ', 'ბარობა', 'αƒ‘αƒαƒ αƒ˜αƒ‘αƒαƒ‘',
    ]
    new_sent = []
    wasnt = True
    for word in sent.split(" "):
        if word in homonym_forms and wasnt:
            new_sent.append("[MASK]")
            # wasnt = False
        else:
            new_sent.append(word)
    return " ".join(new_sent)

def predict(text):
    # Mask some words in the input text
    text_with_mask = getMasked(text)
    # Use the model to predict the masked words
    predictions = fill_mask(text_with_mask)
    # Extract the predicted word and score
    context = predictions[0]["token_str"]
    if context == masker[0]:
        return "Homonym is used as a \"Shovel\""
    elif context == masker[1]:
        return "Homonym is used as a \"Lowland\""
    else:
        return "Homonym is used as a \"Cafe\""

# Create the Gradio interface
iface = gr.Interface(fn=predict, inputs="text", outputs="text", title="Homonym disambiguation in Georgian",
                     description="Enter a sentence with the homonym \"αƒ‘αƒαƒ αƒ˜\" (for the current purposes, please include the homonym once in the sentence).")
iface.launch(share=True)