jing084 commited on
Commit
5e18985
·
verified ·
1 Parent(s): 700e606

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -35
app.py CHANGED
@@ -1,17 +1,16 @@
1
  #!/usr/bin/env python
2
  import os
3
- os.environ["GRADIO_LANGUAGE"] = "en"
 
4
 
 
5
 
6
  RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR")
7
  if not RESULT_DIR:
8
  raise RuntimeError(
9
- "MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR before running app.py"
10
  )
11
 
12
- import json
13
- from typing import List, Tuple
14
-
15
  import gradio as gr
16
  import pandas as pd
17
  from datasets import load_dataset
@@ -77,8 +76,6 @@ def json_to_row(path: str, metrics: dict) -> dict:
77
  return row
78
 
79
 
80
- # uoload
81
-
82
  def build_leaderboard_from_files(files: List[gr.File], prev_rows: list | None = None):
83
  if prev_rows is None:
84
  prev_rows = []
@@ -103,7 +100,7 @@ def build_leaderboard_from_files(files: List[gr.File], prev_rows: list | None =
103
  else:
104
  links.append(str(name))
105
  models_str = ", ".join(links)
106
- summary_md = f"**Loaded {len(prev_rows)} result files.** \n**Models:** {models_str}"
107
  table_html = df.to_html(escape=False, index=False, classes="metrics-table")
108
  return summary_md, table_html, prev_rows
109
 
@@ -144,20 +141,31 @@ def build_leaderboard_from_files(files: List[gr.File], prev_rows: list | None =
144
  else:
145
  links.append(str(name))
146
  models_str = ", ".join(links)
147
- summary_md = f"**Loaded {len(all_rows)} result files.** \n**Models:** {models_str}"
148
 
149
  table_html = df.to_html(escape=False, index=False, classes="metrics-table")
150
 
151
  return summary_md, table_html, all_rows
152
 
153
 
154
- def load_from_dir(dir_path: str):
 
 
155
  try:
156
  pattern = f"hf://datasets/{dir_path}/**/*.json"
157
- ds = load_dataset("json", data_files={"train": pattern}, split="train")
 
 
 
 
 
 
 
 
 
158
  except Exception as e:
159
- empty_html = "<p>No files loaded.</p>"
160
- return f"Failed to load dataset `{dir_path}`: {e}", empty_html
161
 
162
  rows = []
163
  for i, example in enumerate(ds):
@@ -194,7 +202,7 @@ def load_from_dir(dir_path: str):
194
  models_str = ", ".join(links)
195
 
196
  summary_md = (
197
- f"**Loaded {len(rows)} result files from dataset `{dir_path}`.** \n"
198
  f"**Models:** {models_str}"
199
  )
200
 
@@ -203,6 +211,11 @@ def load_from_dir(dir_path: str):
203
  return summary_md, table_html
204
 
205
 
 
 
 
 
 
206
  # Gradio UI
207
 
208
  def build_app() -> gr.Blocks:
@@ -250,41 +263,23 @@ def build_app() -> gr.Blocks:
250
  )
251
 
252
  with gr.Column(scale=1):
253
- # manual upload
254
- # files_input = gr.Files(
255
- # label="Upload `cap_metrics_*.json` files",
256
- # file_types=[".json"],
257
- # file_count="multiple",
258
- # )
259
- # run_button = gr.Button("Parse Uploaded Files")
260
-
261
  dir_path = gr.Textbox(
262
- label="Load from output directory",
263
  value=RESULT_DIR,
264
  lines=1,
 
265
  )
266
- load_dir_button = gr.Button("Load from directory")
267
-
268
- # upload_summary = gr.Markdown(label="Upload Summary")
269
- # upload_table = gr.HTML(label="Upload Metrics")
270
 
271
  summary_output = gr.Markdown(label="Directory Summary")
272
  leaderboard_output = gr.HTML(label="Directory Metrics")
273
 
274
- # run_button.click(
275
- # fn=build_leaderboard_from_files,
276
- # inputs=files_input,
277
- # outputs=[upload_summary, upload_table],
278
- # )
279
-
280
  load_dir_button.click(
281
  fn=load_from_dir,
282
  inputs=dir_path,
283
  outputs=[summary_output, leaderboard_output],
284
  )
285
 
286
-
287
-
288
  timer = gr.Timer(5.0)
289
  timer.tick(
290
  fn=auto_refresh_from_dir,
 
1
  #!/usr/bin/env python
2
  import os
3
+ import json
4
+ from typing import List, Tuple
5
 
6
+ os.environ["GRADIO_LANGUAGE"] = "en"
7
 
8
  RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR")
9
  if not RESULT_DIR:
10
  raise RuntimeError(
11
+ "MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR (HF Repo ID) before running app.py"
12
  )
13
 
 
 
 
14
  import gradio as gr
15
  import pandas as pd
16
  from datasets import load_dataset
 
76
  return row
77
 
78
 
 
 
79
  def build_leaderboard_from_files(files: List[gr.File], prev_rows: list | None = None):
80
  if prev_rows is None:
81
  prev_rows = []
 
100
  else:
101
  links.append(str(name))
102
  models_str = ", ".join(links)
103
+ summary_md = f"**Loaded {len(prev_rows)} result files.** \n**Models:** {models_str}"
104
  table_html = df.to_html(escape=False, index=False, classes="metrics-table")
105
  return summary_md, table_html, prev_rows
106
 
 
141
  else:
142
  links.append(str(name))
143
  models_str = ", ".join(links)
144
+ summary_md = f"**Loaded {len(all_rows)} result files.** \n**Models:** {models_str}"
145
 
146
  table_html = df.to_html(escape=False, index=False, classes="metrics-table")
147
 
148
  return summary_md, table_html, all_rows
149
 
150
 
151
+
152
+ def load_from_dir(dir_path: str, force_refresh: bool = False):
153
+
154
  try:
155
  pattern = f"hf://datasets/{dir_path}/**/*.json"
156
+
157
+ dl_mode = "force_redownload" if force_refresh else None
158
+
159
+ print(f"Fetching from {pattern} (mode={dl_mode})...")
160
+ ds = load_dataset(
161
+ "json",
162
+ data_files={"train": pattern},
163
+ split="train",
164
+ download_mode=dl_mode
165
+ )
166
  except Exception as e:
167
+ empty_html = "<p>No files loaded or Dataset not found.</p>"
168
+ return f"Failed to load dataset `{dir_path}`: {str(e)}", empty_html
169
 
170
  rows = []
171
  for i, example in enumerate(ds):
 
202
  models_str = ", ".join(links)
203
 
204
  summary_md = (
205
+ f"**Loaded {len(rows)} result files from dataset `{dir_path}`.** \n"
206
  f"**Models:** {models_str}"
207
  )
208
 
 
211
  return summary_md, table_html
212
 
213
 
214
+ def auto_refresh_from_dir(dir_path: str):
215
+ return load_from_dir(dir_path, force_refresh=True)
216
+
217
+
218
+
219
  # Gradio UI
220
 
221
  def build_app() -> gr.Blocks:
 
263
  )
264
 
265
  with gr.Column(scale=1):
 
 
 
 
 
 
 
 
266
  dir_path = gr.Textbox(
267
+ label="HF Dataset Repo ID",
268
  value=RESULT_DIR,
269
  lines=1,
270
+ placeholder="username/dataset-name"
271
  )
272
+ load_dir_button = gr.Button("Load from Dataset")
 
 
 
273
 
274
  summary_output = gr.Markdown(label="Directory Summary")
275
  leaderboard_output = gr.HTML(label="Directory Metrics")
276
 
 
 
 
 
 
 
277
  load_dir_button.click(
278
  fn=load_from_dir,
279
  inputs=dir_path,
280
  outputs=[summary_output, leaderboard_output],
281
  )
282
 
 
 
283
  timer = gr.Timer(5.0)
284
  timer.tick(
285
  fn=auto_refresh_from_dir,