Update app.py
Browse files
app.py
CHANGED
|
@@ -325,6 +325,10 @@ with gr.Blocks() as demo:
|
|
| 325 |
""")
|
| 326 |
with gr.Row():
|
| 327 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
vocab_size = gr.Number(
|
| 329 |
label="Vocab Size",
|
| 330 |
value=51200,
|
|
@@ -350,25 +354,10 @@ with gr.Blocks() as demo:
|
|
| 350 |
value=1.0,
|
| 351 |
info="Ratio of kv heads to query heads used in model. 1.0 for MHA"
|
| 352 |
)
|
| 353 |
-
|
| 354 |
-
label="
|
| 355 |
-
value=
|
| 356 |
-
info="
|
| 357 |
-
)
|
| 358 |
-
moe = gr.Checkbox(
|
| 359 |
-
label="Mixture of Experts (MoE)",
|
| 360 |
-
value=False,
|
| 361 |
-
info="Whether the model uses Mixture of Experts"
|
| 362 |
-
)
|
| 363 |
-
num_experts = gr.Number(
|
| 364 |
-
label="Number of Experts",
|
| 365 |
-
value=128,
|
| 366 |
-
info="Number of experts for Mixture of Experts (MoE)"
|
| 367 |
-
)
|
| 368 |
-
expert_interval = gr.Number(
|
| 369 |
-
label="Expert Interval",
|
| 370 |
-
value=2,
|
| 371 |
-
info="Expert interval for Mixture of Experts (MoE)"
|
| 372 |
)
|
| 373 |
batch_size = gr.Number(
|
| 374 |
label="Batch Size",
|
|
@@ -385,17 +374,35 @@ with gr.Blocks() as demo:
|
|
| 385 |
value=True,
|
| 386 |
info="Whether Megatron-style activation checkpointing is being used"
|
| 387 |
)
|
| 388 |
-
ffn_expansion_factor = gr.Number(
|
| 389 |
-
label="FFN Expansion Factor",
|
| 390 |
-
value=4,
|
| 391 |
-
info="How much the MLP hidden size expands"
|
| 392 |
-
)
|
| 393 |
infer = gr.Checkbox(
|
| 394 |
label="Inference-Only",
|
| 395 |
value=False,
|
| 396 |
info="Whether the model is being used for inference-only"
|
| 397 |
)
|
| 398 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
calc_flops_button = gr.Button("Calculate FLOPs")
|
| 400 |
flops_result = gr.JSON(label="FLOP Calculation Result", interactive=False)
|
| 401 |
calc_flops_button.click(
|
|
@@ -404,8 +411,8 @@ with gr.Blocks() as demo:
|
|
| 404 |
outputs=flops_result
|
| 405 |
)
|
| 406 |
|
| 407 |
-
hf_model_name_or_path
|
| 408 |
-
|
| 409 |
-
|
| 410 |
|
| 411 |
demo.launch()
|
|
|
|
| 325 |
""")
|
| 326 |
with gr.Row():
|
| 327 |
with gr.Column():
|
| 328 |
+
hf_model_name_or_path = gr.Textbox(
|
| 329 |
+
label="HuggingFace Model Name or Path",
|
| 330 |
+
info="Name of the HuggingFace Hub repository or the local file path for it"
|
| 331 |
+
)
|
| 332 |
vocab_size = gr.Number(
|
| 333 |
label="Vocab Size",
|
| 334 |
value=51200,
|
|
|
|
| 354 |
value=1.0,
|
| 355 |
info="Ratio of kv heads to query heads used in model. 1.0 for MHA"
|
| 356 |
)
|
| 357 |
+
ffn_expansion_factor = gr.Number(
|
| 358 |
+
label="FFN Expansion Factor",
|
| 359 |
+
value=4,
|
| 360 |
+
info="How much the MLP hidden size expands"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
)
|
| 362 |
batch_size = gr.Number(
|
| 363 |
label="Batch Size",
|
|
|
|
| 374 |
value=True,
|
| 375 |
info="Whether Megatron-style activation checkpointing is being used"
|
| 376 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
infer = gr.Checkbox(
|
| 378 |
label="Inference-Only",
|
| 379 |
value=False,
|
| 380 |
info="Whether the model is being used for inference-only"
|
| 381 |
)
|
| 382 |
|
| 383 |
+
# MoE parameters hidden in accordion
|
| 384 |
+
with gr.Accordion("Mixture of Experts (MoE)", open=False):
|
| 385 |
+
moe = gr.Checkbox(
|
| 386 |
+
label="Mixture of Experts (MoE)",
|
| 387 |
+
value=False,
|
| 388 |
+
info="Whether the model uses Mixture of Experts"
|
| 389 |
+
)
|
| 390 |
+
num_experts = gr.Number(
|
| 391 |
+
label="Number of Experts",
|
| 392 |
+
value=128,
|
| 393 |
+
info="Number of experts for Mixture of Experts (MoE)"
|
| 394 |
+
)
|
| 395 |
+
expert_interval = gr.Number(
|
| 396 |
+
label="Expert Interval",
|
| 397 |
+
value=2,
|
| 398 |
+
info="Expert interval for Mixture of Experts (MoE)"
|
| 399 |
+
)
|
| 400 |
+
topk = gr.Number(
|
| 401 |
+
label="Top K Routing for MoE",
|
| 402 |
+
value=1,
|
| 403 |
+
info="Top k routing for Mixture of Experts (MoE)"
|
| 404 |
+
)
|
| 405 |
+
|
| 406 |
calc_flops_button = gr.Button("Calculate FLOPs")
|
| 407 |
flops_result = gr.JSON(label="FLOP Calculation Result", interactive=False)
|
| 408 |
calc_flops_button.click(
|
|
|
|
| 411 |
outputs=flops_result
|
| 412 |
)
|
| 413 |
|
| 414 |
+
hf_model_name_or_path.change(fn=update_from_hf_model,
|
| 415 |
+
inputs=[hf_model_name_or_path],
|
| 416 |
+
outputs=[num_layers, hidden_size, vocab_size, sequence_length])
|
| 417 |
|
| 418 |
demo.launch()
|