Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: improve the submitting process
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ from src.display.css_html_js import custom_css
|
|
| 12 |
from src.leaderboard.read_evals import get_raw_eval_results, get_leaderboard_df
|
| 13 |
|
| 14 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
| 15 |
-
from utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols
|
| 16 |
from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, metric_list
|
| 17 |
from src.display.utils import TYPES_QA, TYPES_LONG_DOC
|
| 18 |
|
|
@@ -306,10 +306,14 @@ with demo:
|
|
| 306 |
model_name = gr.Textbox(label="Model name")
|
| 307 |
with gr.Column():
|
| 308 |
model_url = gr.Textbox(label="Model URL")
|
|
|
|
|
|
|
| 309 |
with gr.Row():
|
| 310 |
file_output = gr.File()
|
| 311 |
with gr.Row():
|
| 312 |
-
|
|
|
|
|
|
|
| 313 |
upload_button.upload(
|
| 314 |
upload_file,
|
| 315 |
[
|
|
@@ -319,6 +323,16 @@ with demo:
|
|
| 319 |
benchmark_version,
|
| 320 |
],
|
| 321 |
file_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
| 324 |
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
|
| 12 |
from src.leaderboard.read_evals import get_raw_eval_results, get_leaderboard_df
|
| 13 |
|
| 14 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
| 15 |
+
from utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results
|
| 16 |
from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, metric_list
|
| 17 |
from src.display.utils import TYPES_QA, TYPES_LONG_DOC
|
| 18 |
|
|
|
|
| 306 |
model_name = gr.Textbox(label="Model name")
|
| 307 |
with gr.Column():
|
| 308 |
model_url = gr.Textbox(label="Model URL")
|
| 309 |
+
with gr.Row():
|
| 310 |
+
upload_button = gr.UploadButton("Upload search results", file_count="single")
|
| 311 |
with gr.Row():
|
| 312 |
file_output = gr.File()
|
| 313 |
with gr.Row():
|
| 314 |
+
submit_button = gr.Button("Submit")
|
| 315 |
+
with gr.Row():
|
| 316 |
+
submission_result = gr.Markdown()
|
| 317 |
upload_button.upload(
|
| 318 |
upload_file,
|
| 319 |
[
|
|
|
|
| 323 |
benchmark_version,
|
| 324 |
],
|
| 325 |
file_output)
|
| 326 |
+
submit_button.click(
|
| 327 |
+
submit_results,
|
| 328 |
+
[
|
| 329 |
+
file_output,
|
| 330 |
+
model_name,
|
| 331 |
+
model_url
|
| 332 |
+
],
|
| 333 |
+
submission_result,
|
| 334 |
+
show_progress="hidden"
|
| 335 |
+
)
|
| 336 |
|
| 337 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
| 338 |
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
|
utils.py
CHANGED
|
@@ -12,6 +12,7 @@ from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, Benchmark
|
|
| 12 |
from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
|
| 13 |
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
| 14 |
from src.envs import API, SEARCH_RESULTS_REPO, CACHE_PATH
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
|
@@ -149,8 +150,34 @@ def upload_file(
|
|
| 149 |
print(f"file uploading aborted. wrong file type: {filepath}")
|
| 150 |
return filepath
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
# rename the uploaded file
|
| 153 |
input_fp = Path(filepath)
|
|
|
|
| 154 |
timezone = pytz.timezone('UTC')
|
| 155 |
timestamp = datetime.now(timezone).strftime('%Y%m%d%H%M%S')
|
| 156 |
output_fn = f"{timestamp}-{input_fp.name}"
|
|
@@ -166,7 +193,9 @@ def upload_file(
|
|
| 166 |
output_config = {
|
| 167 |
"model_name": f"{model}",
|
| 168 |
"model_url": f"{model_url}",
|
| 169 |
-
"version": f"{version}"
|
|
|
|
|
|
|
| 170 |
}
|
| 171 |
with open(input_folder_path / output_config_fn, "w") as f:
|
| 172 |
json.dump(output_config, f, ensure_ascii=False)
|
|
@@ -176,4 +205,6 @@ def upload_file(
|
|
| 176 |
repo_id=SEARCH_RESULTS_REPO,
|
| 177 |
repo_type="dataset",
|
| 178 |
commit_message=f"feat: submit {model} config")
|
| 179 |
-
return
|
|
|
|
|
|
|
|
|
| 12 |
from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
|
| 13 |
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
| 14 |
from src.envs import API, SEARCH_RESULTS_REPO, CACHE_PATH
|
| 15 |
+
from src.display.formatting import styled_message, styled_error
|
| 16 |
|
| 17 |
|
| 18 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
|
|
|
| 150 |
print(f"file uploading aborted. wrong file type: {filepath}")
|
| 151 |
return filepath
|
| 152 |
|
| 153 |
+
return filepath
|
| 154 |
+
|
| 155 |
+
from huggingface_hub import ModelCard
|
| 156 |
+
from huggingface_hub.utils import EntryNotFoundError
|
| 157 |
+
|
| 158 |
+
def submit_results(filepath: str, model: str, model_url: str, version: str="AIR-Bench_24.04"):
|
| 159 |
+
if not filepath.endswith(".zip"):
|
| 160 |
+
return styled_error(f"file uploading aborted. wrong file type: {filepath}")
|
| 161 |
+
|
| 162 |
+
# validate model
|
| 163 |
+
if not model:
|
| 164 |
+
return styled_error("failed to submit. Model name can not be empty.")
|
| 165 |
+
|
| 166 |
+
# validate model url
|
| 167 |
+
if not model_url.startswith("https://huggingface.co/"):
|
| 168 |
+
return styled_error(f"failed to submit. Model url must be a link to a valid HuggingFace model on HuggingFace space. Illegal model url: {model_url}")
|
| 169 |
+
|
| 170 |
+
# validate model card
|
| 171 |
+
repo_id=model_url.removeprefix("https://huggingface.co/")
|
| 172 |
+
try:
|
| 173 |
+
card = ModelCard.load(repo_id)
|
| 174 |
+
except EntryNotFoundError as e:
|
| 175 |
+
print(e)
|
| 176 |
+
return styled_error(f"failed to submit. Model url must be a link to a valid HuggingFace model on HuggingFace space. Could not get model {repo_id}")
|
| 177 |
+
|
| 178 |
# rename the uploaded file
|
| 179 |
input_fp = Path(filepath)
|
| 180 |
+
revision = input_fp.name.removesuffix(".zip")
|
| 181 |
timezone = pytz.timezone('UTC')
|
| 182 |
timestamp = datetime.now(timezone).strftime('%Y%m%d%H%M%S')
|
| 183 |
output_fn = f"{timestamp}-{input_fp.name}"
|
|
|
|
| 193 |
output_config = {
|
| 194 |
"model_name": f"{model}",
|
| 195 |
"model_url": f"{model_url}",
|
| 196 |
+
"version": f"{version}",
|
| 197 |
+
"revision": f"{revision}",
|
| 198 |
+
"timestamp": f"{timestamp}"
|
| 199 |
}
|
| 200 |
with open(input_folder_path / output_config_fn, "w") as f:
|
| 201 |
json.dump(output_config, f, ensure_ascii=False)
|
|
|
|
| 205 |
repo_id=SEARCH_RESULTS_REPO,
|
| 206 |
repo_type="dataset",
|
| 207 |
commit_message=f"feat: submit {model} config")
|
| 208 |
+
return styled_message(
|
| 209 |
+
f"Thanks for submission!\nSubmission revision: {revision}"
|
| 210 |
+
)
|