Spaces:
Runtime error
Runtime error
Commit
·
cf3b6c5
1
Parent(s):
2f09d3b
initialize
Browse files- app.py +65 -0
- humaneval_v_test_hf/data-00000-of-00001.arrow +3 -0
- humaneval_v_test_hf/dataset_info.json +57 -0
- humaneval_v_test_hf/state.json +13 -0
app.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import datasets
|
| 3 |
+
|
| 4 |
+
humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf")
|
| 5 |
+
st.set_page_config(layout="wide", page_title="HumanEval-V Viewer")
|
| 6 |
+
st.markdown("---")
|
| 7 |
+
|
| 8 |
+
max_index = 108
|
| 9 |
+
|
| 10 |
+
# Initialize session state for index if not present
|
| 11 |
+
if 'index' not in st.session_state:
|
| 12 |
+
st.session_state.index = 1
|
| 13 |
+
|
| 14 |
+
buttons = st.columns([2, 1, 6])
|
| 15 |
+
|
| 16 |
+
with buttons[1]:
|
| 17 |
+
# Number input for navigation
|
| 18 |
+
index_input = st.number_input(
|
| 19 |
+
f"Go to index (1-{max_index}):",
|
| 20 |
+
min_value=1,
|
| 21 |
+
max_value=108,
|
| 22 |
+
value=st.session_state.index,
|
| 23 |
+
key="index_input",
|
| 24 |
+
help="Enter an index and jump to that index.",
|
| 25 |
+
step=1 # Increment by 1
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
with buttons[0]:
|
| 29 |
+
st.markdown("# HumanEval-V Viewer")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# Check if the input differs from the current session state and update it
|
| 33 |
+
if index_input != st.session_state.index:
|
| 34 |
+
st.session_state.index = index_input
|
| 35 |
+
st.experimental_rerun()
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
coding_task = humaneval_v_data[st.session_state.index-1]
|
| 39 |
+
qid = coding_task["qid"]
|
| 40 |
+
image = coding_task["image"]
|
| 41 |
+
function_signature = coding_task["function_signature"]
|
| 42 |
+
ground_truth = coding_task["ground_truth_solution"]
|
| 43 |
+
test_script = coding_task["test_script"]
|
| 44 |
+
|
| 45 |
+
upper_columns = st.columns([2, 7])
|
| 46 |
+
with upper_columns[0]:
|
| 47 |
+
st.markdown(f"### Question ID: {qid}")
|
| 48 |
+
st.image(image, use_column_width=True)
|
| 49 |
+
st.markdown("---")
|
| 50 |
+
with upper_columns[1]:
|
| 51 |
+
st.markdown(f"### Function Signature:")
|
| 52 |
+
st.markdown(f"")
|
| 53 |
+
st.markdown(f"""```python
|
| 54 |
+
{function_signature}
|
| 55 |
+
```""")
|
| 56 |
+
st.markdown(f"### Test Script:")
|
| 57 |
+
st.markdown(f"")
|
| 58 |
+
st.markdown(f"""```python
|
| 59 |
+
{test_script}
|
| 60 |
+
```""")
|
| 61 |
+
st.markdown(f"### Ground Truth Solution:")
|
| 62 |
+
st.markdown(f"")
|
| 63 |
+
st.markdown(f"""```python
|
| 64 |
+
{ground_truth}
|
| 65 |
+
```""")
|
humaneval_v_test_hf/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:774e4cacfc259917fb5a5e8433e6cacbcac01063cb30fd3560170b3a0a9fa76e
|
| 3 |
+
size 12842912
|
humaneval_v_test_hf/dataset_info.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"builder_name": "parquet",
|
| 3 |
+
"citation": "",
|
| 4 |
+
"config_name": "default",
|
| 5 |
+
"dataset_name": "human_eval-v-benchmark",
|
| 6 |
+
"dataset_size": 12841384,
|
| 7 |
+
"description": "",
|
| 8 |
+
"download_checksums": {
|
| 9 |
+
"hf://datasets/HumanEval-V/HumanEval-V-Benchmark@50af2be232641ca618f6aecce901ca5e5a83b20e/data/test-00000-of-00001.parquet": {
|
| 10 |
+
"num_bytes": 12571814,
|
| 11 |
+
"checksum": null
|
| 12 |
+
}
|
| 13 |
+
},
|
| 14 |
+
"download_size": 12571814,
|
| 15 |
+
"features": {
|
| 16 |
+
"qid": {
|
| 17 |
+
"dtype": "string",
|
| 18 |
+
"_type": "Value"
|
| 19 |
+
},
|
| 20 |
+
"ground_truth_solution": {
|
| 21 |
+
"dtype": "string",
|
| 22 |
+
"_type": "Value"
|
| 23 |
+
},
|
| 24 |
+
"image_description": {
|
| 25 |
+
"dtype": "string",
|
| 26 |
+
"_type": "Value"
|
| 27 |
+
},
|
| 28 |
+
"test_script": {
|
| 29 |
+
"dtype": "string",
|
| 30 |
+
"_type": "Value"
|
| 31 |
+
},
|
| 32 |
+
"function_signature": {
|
| 33 |
+
"dtype": "string",
|
| 34 |
+
"_type": "Value"
|
| 35 |
+
},
|
| 36 |
+
"image": {
|
| 37 |
+
"_type": "Image"
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
"homepage": "",
|
| 41 |
+
"license": "",
|
| 42 |
+
"size_in_bytes": 25413198,
|
| 43 |
+
"splits": {
|
| 44 |
+
"test": {
|
| 45 |
+
"name": "test",
|
| 46 |
+
"num_bytes": 12841384,
|
| 47 |
+
"num_examples": 108,
|
| 48 |
+
"dataset_name": "human_eval-v-benchmark"
|
| 49 |
+
}
|
| 50 |
+
},
|
| 51 |
+
"version": {
|
| 52 |
+
"version_str": "0.0.0",
|
| 53 |
+
"major": 0,
|
| 54 |
+
"minor": 0,
|
| 55 |
+
"patch": 0
|
| 56 |
+
}
|
| 57 |
+
}
|
humaneval_v_test_hf/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "d8ffc8935ede93f4",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": "test"
|
| 13 |
+
}
|