Spaces:
Runtime error
Runtime error
Final update for version 2 application
Browse files
app.py
CHANGED
|
@@ -1,22 +1,22 @@
|
|
| 1 |
-
|
| 2 |
import gradio as gr
|
| 3 |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
| 4 |
|
| 5 |
# initialize the environment
|
| 6 |
model_name = 'anugrahap/gpt2-indo-textgen'
|
| 7 |
HF_TOKEN = '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'
|
| 8 |
-
|
| 9 |
|
| 10 |
-
# define the tokenization method
|
| 11 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name,
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
|
| 16 |
-
# add the EOS token as PAD token to avoid warnings
|
| 17 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
|
| 18 |
|
| 19 |
-
generator = pipeline('text-generation', model=
|
| 20 |
|
| 21 |
# create the decoder parameter to generate the text
|
| 22 |
def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
|
|
@@ -66,56 +66,20 @@ def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_bea
|
|
| 66 |
else:
|
| 67 |
return error_unknown
|
| 68 |
|
| 69 |
-
# create the decoder parameter to generate the text
|
| 70 |
-
def multiple_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
|
| 71 |
-
# create local variable for error parameter
|
| 72 |
-
error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}")
|
| 73 |
-
error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}")
|
| 74 |
-
error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}")
|
| 75 |
-
error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}")
|
| 76 |
-
error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}")
|
| 77 |
-
error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}")
|
| 78 |
-
error_empty=ValueError("ERROR: Input Text cannot be empty!")
|
| 79 |
-
error_unknown=TypeError("Unknown Error.")
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
temperature=temperature,
|
| 92 |
-
top_k=top_k,
|
| 93 |
-
top_p=top_p,
|
| 94 |
-
num_beams=num_beams,
|
| 95 |
-
repetition_penalty=repetition_penalty,
|
| 96 |
-
do_sample=do_sample,
|
| 97 |
-
no_repeat_ngram_size=2,
|
| 98 |
-
num_return_sequences=3)
|
| 99 |
-
return result[0]["generated_text"], result[1]["generated_text"], result[2]["generated_text"],
|
| 100 |
-
elif repetition_penalty < 1:
|
| 101 |
-
return error_rep,error_rep,error_rep
|
| 102 |
-
elif temperature <= 0:
|
| 103 |
-
return error_temp,error_temp,error_temp
|
| 104 |
-
elif min_length > max_length:
|
| 105 |
-
return error_minmax,error_minmax,error_minmax
|
| 106 |
-
elif type(num_beams) != int:
|
| 107 |
-
return error_numbeams_type,error_numbeams_type,error_numbeams_type
|
| 108 |
-
elif type(top_k) != int:
|
| 109 |
-
return error_topk_type,error_topk_type,error_topk_type
|
| 110 |
-
elif type(min_length) != int or type(max_length) != int:
|
| 111 |
-
return error_minmax_type,error_minmax_type,error_minmax_type
|
| 112 |
-
elif text == '':
|
| 113 |
-
return error_empty,error_empty,error_empty
|
| 114 |
-
else:
|
| 115 |
-
return error_unknown,error_unknown,error_unknown
|
| 116 |
|
|
|
|
| 117 |
|
| 118 |
-
# create the baseline examples
|
| 119 |
examples = [
|
| 120 |
["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 2.0, True],
|
| 121 |
["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 1.0, False],
|
|
@@ -124,9 +88,7 @@ examples = [
|
|
| 124 |
["Pemandangan di pantai kuta Bali sangatlah indah.", 30, 50, 0.5, 40, 0.98, 10, 1.0, True],
|
| 125 |
["Pemandangan di pantai kuta Bali sangatlah indah.", 10, 30, 1.5, 30, 0.93, 5, 2.0, True]]
|
| 126 |
|
| 127 |
-
|
| 128 |
-
with gr.Blocks(title="GPT-2 Indonesian Text Generation Playground", theme='Default') as app:
|
| 129 |
-
gr.Markdown("""
|
| 130 |
<style>
|
| 131 |
.center {
|
| 132 |
display: block;
|
|
@@ -147,66 +109,31 @@ with gr.Blocks(title="GPT-2 Indonesian Text Generation Playground", theme='Defau
|
|
| 147 |
border="0"
|
| 148 |
class="center"
|
| 149 |
style="height: 100px; width: 100px;"/>
|
| 150 |
-
<h1>GPT-2 Indonesian Text Generation Playground</h1>"""
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
#single generation
|
| 156 |
-
with gr.TabItem("Single Generation"):
|
| 157 |
-
with gr.Row():
|
| 158 |
-
with gr.Column():
|
| 159 |
-
input1=[gr.Textbox(lines=5, label="Input Text"),
|
| 160 |
-
gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
|
| 161 |
-
gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
|
| 162 |
-
gr.Number(label="Temperature Sampling", value=1.5),
|
| 163 |
-
gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
|
| 164 |
-
gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
|
| 165 |
-
gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
|
| 166 |
-
gr.Number(label="Rep Penalty", value=2.0),
|
| 167 |
-
gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
|
| 168 |
-
|
| 169 |
-
with gr.Column():
|
| 170 |
-
output1=gr.Textbox(lines=5, max_lines=50, label="Generated Text with Greedy/Beam Search Decoding")
|
| 171 |
-
button1=gr.Button("Run the model")
|
| 172 |
-
button1.click(fn=single_generation, inputs=input1, outputs=output1, show_progress=True)
|
| 173 |
-
flag_btn = gr.Button("Flag")
|
| 174 |
-
|
| 175 |
-
callback.setup([input1,output1],"Flagged Data Points")
|
| 176 |
-
flag_btn.click(lambda *args: callback.flag(args), input1, output1, preprocess=False)
|
| 177 |
-
gr.Examples(examples, inputs=input1)
|
| 178 |
-
|
| 179 |
-
#multiple generation
|
| 180 |
-
with gr.TabItem("Multiple Generation"):
|
| 181 |
-
with gr.Row():
|
| 182 |
-
with gr.Column():
|
| 183 |
-
input2=[gr.Textbox(lines=5, label="Input Text"),
|
| 184 |
-
gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
|
| 185 |
-
gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
|
| 186 |
-
gr.Number(label="Temperature Sampling", value=1.5),
|
| 187 |
-
gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
|
| 188 |
-
gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
|
| 189 |
-
gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
|
| 190 |
-
gr.Number(label="Rep Penalty", value=2.0),
|
| 191 |
-
gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
|
| 192 |
-
with gr.Column():
|
| 193 |
-
output2=[gr.Textbox(lines=5, max_lines=50, label="#1 Generated Text with Greedy/Beam Search Decoding"),
|
| 194 |
-
gr.Textbox(lines=5, max_lines=50, label="#2 Generated Text with Greedy/Beam Search Decoding"),
|
| 195 |
-
gr.Textbox(lines=5, max_lines=50, label="#3 Generated Text with Greedy/Beam Search Decoding")]
|
| 196 |
-
button2=gr.Button("Run the model")
|
| 197 |
-
button2.click(fn=multiple_generation, inputs=input2, outputs=output2, show_progress=True)
|
| 198 |
-
flag_btn = gr.Button("Flag")
|
| 199 |
-
|
| 200 |
-
callback.setup([input2,output2],"Flagged Data Points")
|
| 201 |
-
flag_btn.click(lambda *args: callback.flag(args), input2, output2, preprocess=False)
|
| 202 |
-
gr.Examples(examples, inputs=input2)
|
| 203 |
-
|
| 204 |
-
gr.Markdown("""<p style='text-align: center'>Copyright Anugrah Akbar Praramadhan 2023 <br>
|
| 205 |
<p style='text-align: center'> Trained on Indo4B Benchmark Dataset of Indonesian language Wikipedia with a Causal Language Modeling (CLM) objective <br>
|
| 206 |
<p style='text-align: center'><a href='https://huggingface.co/anugrahap/gpt2-indo-textgen' target='_blank'>Link to the Trained Model</a><br>
|
| 207 |
<p style='text-align: center'><a href='https://huggingface.co/spaces/anugrahap/gpt2-indo-text-gen/tree/main' target='_blank'>Link to the Project Repository</a><br>
|
|
|
|
| 208 |
<p style='text-align: center'><a href='https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf' target='_blank'>Original Paper</a>
|
| 209 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
if __name__=='__main__':
|
| 212 |
app.launch()
|
|
|
|
| 1 |
+
#this is version two with flagging features
|
| 2 |
import gradio as gr
|
| 3 |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
| 4 |
|
| 5 |
# initialize the environment
|
| 6 |
model_name = 'anugrahap/gpt2-indo-textgen'
|
| 7 |
HF_TOKEN = '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'
|
| 8 |
+
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "output-gpt2-indo-textgen")
|
| 9 |
|
| 10 |
+
# # define the tokenization method
|
| 11 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_name,
|
| 12 |
+
# model_max_length=1e30,
|
| 13 |
+
# padding_side='right',
|
| 14 |
+
# return_tensors='pt')
|
| 15 |
|
| 16 |
+
# # add the EOS token as PAD token to avoid warnings
|
| 17 |
+
# model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
|
| 18 |
|
| 19 |
+
generator = pipeline('text-generation', model=model_name)
|
| 20 |
|
| 21 |
# create the decoder parameter to generate the text
|
| 22 |
def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
|
|
|
|
| 66 |
else:
|
| 67 |
return error_unknown
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
+
# create the variable needed for the gradio app
|
| 71 |
+
forinput=[gr.Textbox(lines=5, label="Input Text"),
|
| 72 |
+
gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
|
| 73 |
+
gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
|
| 74 |
+
gr.Number(label="Temperature Sampling", value=1.5),
|
| 75 |
+
gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
|
| 76 |
+
gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
|
| 77 |
+
gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
|
| 78 |
+
gr.Number(label="Rep Penalty", value=2.0),
|
| 79 |
+
gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
+
foroutput=gr.Textbox(lines=5, max_lines=50, label="Generated Text with Greedy/Beam Search Decoding")
|
| 82 |
|
|
|
|
| 83 |
examples = [
|
| 84 |
["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 2.0, True],
|
| 85 |
["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 1.0, False],
|
|
|
|
| 88 |
["Pemandangan di pantai kuta Bali sangatlah indah.", 30, 50, 0.5, 40, 0.98, 10, 1.0, True],
|
| 89 |
["Pemandangan di pantai kuta Bali sangatlah indah.", 10, 30, 1.5, 30, 0.93, 5, 2.0, True]]
|
| 90 |
|
| 91 |
+
title = """
|
|
|
|
|
|
|
| 92 |
<style>
|
| 93 |
.center {
|
| 94 |
display: block;
|
|
|
|
| 109 |
border="0"
|
| 110 |
class="center"
|
| 111 |
style="height: 100px; width: 100px;"/>
|
| 112 |
+
<h1>GPT-2 Indonesian Text Generation Playground</h1>"""
|
| 113 |
+
|
| 114 |
+
description = "<p><i>This project is a part of thesis requirement of Anugrah Akbar Praramadhan</i></p>"
|
| 115 |
+
|
| 116 |
+
article = """<p style='text-align: center'>Copyright Anugrah Akbar Praramadhan 2023 <br>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
<p style='text-align: center'> Trained on Indo4B Benchmark Dataset of Indonesian language Wikipedia with a Causal Language Modeling (CLM) objective <br>
|
| 118 |
<p style='text-align: center'><a href='https://huggingface.co/anugrahap/gpt2-indo-textgen' target='_blank'>Link to the Trained Model</a><br>
|
| 119 |
<p style='text-align: center'><a href='https://huggingface.co/spaces/anugrahap/gpt2-indo-text-gen/tree/main' target='_blank'>Link to the Project Repository</a><br>
|
| 120 |
+
<p style='text-align: center'><a href='https://huggingface.co/datasets/anugrahap/output-gpt2-indo-textgen/' target='_blank'>Link to the Autosaved Generated Output</a><br>
|
| 121 |
<p style='text-align: center'><a href='https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf' target='_blank'>Original Paper</a>
|
| 122 |
+
"""
|
| 123 |
+
|
| 124 |
+
# using gradio interfaces
|
| 125 |
+
app = gr.Interface(
|
| 126 |
+
fn=single_generation,
|
| 127 |
+
inputs=forinput,
|
| 128 |
+
outputs=foroutput,
|
| 129 |
+
examples=examples,
|
| 130 |
+
title=title,
|
| 131 |
+
description=description,
|
| 132 |
+
article=article,
|
| 133 |
+
allow_flagging='manual',
|
| 134 |
+
flagging_options=['Well Performed', 'Inappropriate Word Selection', 'Wordy', 'Strange Word', 'Others'],
|
| 135 |
+
flagging_callback=hf_writer)
|
| 136 |
+
|
| 137 |
|
| 138 |
if __name__=='__main__':
|
| 139 |
app.launch()
|