Spaces:
Sleeping
Sleeping
jjyang77
commited on
Commit
·
da384b4
1
Parent(s):
0f87dc1
update samples input from file to data list
Browse files- .gitignore +2 -0
- Dockerfile +1 -1
- api/app.py +14 -3
- api/bigcodebench_data.py +7 -23
.gitignore
CHANGED
|
@@ -2,6 +2,8 @@
|
|
| 2 |
**.pyc
|
| 3 |
**/__pycache__
|
| 4 |
|
|
|
|
|
|
|
| 5 |
# Testing data
|
| 6 |
/data
|
| 7 |
|
|
|
|
| 2 |
**.pyc
|
| 3 |
**/__pycache__
|
| 4 |
|
| 5 |
+
.hypothesis/
|
| 6 |
+
|
| 7 |
# Testing data
|
| 8 |
/data
|
| 9 |
|
Dockerfile
CHANGED
|
@@ -21,7 +21,7 @@ RUN pip install --upgrade pip
|
|
| 21 |
# Pre-install the dataset
|
| 22 |
#RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
|
| 23 |
|
| 24 |
-
RUN pip install fastapi gunicorn uvicorn[standard] httpx
|
| 25 |
|
| 26 |
RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
|
| 27 |
|
|
|
|
| 21 |
# Pre-install the dataset
|
| 22 |
#RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
|
| 23 |
|
| 24 |
+
RUN pip install fastapi gunicorn uvicorn[standard] httpx pydantic==2.*
|
| 25 |
|
| 26 |
RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
|
| 27 |
|
api/app.py
CHANGED
|
@@ -7,6 +7,8 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
| 7 |
from typing import Dict, List, Tuple
|
| 8 |
import gc
|
| 9 |
|
|
|
|
|
|
|
| 10 |
from fastapi import FastAPI
|
| 11 |
from fastapi.responses import RedirectResponse
|
| 12 |
|
|
@@ -15,6 +17,14 @@ from api.code_execution import untrusted_check
|
|
| 15 |
|
| 16 |
Result = Tuple[str, List[bool]]
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def create_app() -> FastAPI:
|
| 19 |
|
| 20 |
level = os.environ.get("LOG_LEVEL", default=logging.INFO)
|
|
@@ -33,7 +43,8 @@ def create_app() -> FastAPI:
|
|
| 33 |
|
| 34 |
@app.post("/evaluate/")
|
| 35 |
async def evaluate(
|
| 36 |
-
samples:
|
|
|
|
| 37 |
parallel: int = -1,
|
| 38 |
min_time_limit: float = 1,
|
| 39 |
max_as_limit: int = 30 * 1024,
|
|
@@ -42,7 +53,7 @@ def create_app() -> FastAPI:
|
|
| 42 |
no_gt: bool = True,
|
| 43 |
) -> dict:
|
| 44 |
"""
|
| 45 |
-
Evaluate the correctness of the solutions in the given samples
|
| 46 |
"""
|
| 47 |
if parallel < 1:
|
| 48 |
n_workers = max(1, multiprocessing.cpu_count() // 2)
|
|
@@ -71,7 +82,7 @@ def create_app() -> FastAPI:
|
|
| 71 |
|
| 72 |
solution = sample["solution"]
|
| 73 |
|
| 74 |
-
if
|
| 75 |
solution = sample["code_prompt"] + "\n pass\n" + solution
|
| 76 |
remainings.add(sample["_identifier"])
|
| 77 |
args = (
|
|
|
|
| 7 |
from typing import Dict, List, Tuple
|
| 8 |
import gc
|
| 9 |
|
| 10 |
+
from pydantic import BaseModel
|
| 11 |
+
|
| 12 |
from fastapi import FastAPI
|
| 13 |
from fastapi.responses import RedirectResponse
|
| 14 |
|
|
|
|
| 17 |
|
| 18 |
Result = Tuple[str, List[bool]]
|
| 19 |
|
| 20 |
+
class SampleDate(BaseModel):
|
| 21 |
+
task_id: str
|
| 22 |
+
solution: str
|
| 23 |
+
code_prompt: str
|
| 24 |
+
test: str
|
| 25 |
+
entry_point: str
|
| 26 |
+
res_id: int
|
| 27 |
+
|
| 28 |
def create_app() -> FastAPI:
|
| 29 |
|
| 30 |
level = os.environ.get("LOG_LEVEL", default=logging.INFO)
|
|
|
|
| 43 |
|
| 44 |
@app.post("/evaluate/")
|
| 45 |
async def evaluate(
|
| 46 |
+
samples: List[SampleDate],
|
| 47 |
+
calibrate: bool = True,
|
| 48 |
parallel: int = -1,
|
| 49 |
min_time_limit: float = 1,
|
| 50 |
max_as_limit: int = 30 * 1024,
|
|
|
|
| 53 |
no_gt: bool = True,
|
| 54 |
) -> dict:
|
| 55 |
"""
|
| 56 |
+
Evaluate the correctness of the solutions in the given samples data.
|
| 57 |
"""
|
| 58 |
if parallel < 1:
|
| 59 |
n_workers = max(1, multiprocessing.cpu_count() // 2)
|
|
|
|
| 82 |
|
| 83 |
solution = sample["solution"]
|
| 84 |
|
| 85 |
+
if calibrate:
|
| 86 |
solution = sample["code_prompt"] + "\n pass\n" + solution
|
| 87 |
remainings.add(sample["_identifier"])
|
| 88 |
args = (
|
api/bigcodebench_data.py
CHANGED
|
@@ -20,27 +20,11 @@ def stream_jsonl(filename: str) -> Iterable[Dict]:
|
|
| 20 |
yield json.loads(line)
|
| 21 |
|
| 22 |
|
| 23 |
-
def load_solutions(
|
| 24 |
-
"""We accept two formats of inputs.
|
| 25 |
-
+ `sample.jsonl` which is the format from BigCodeBench, i.e., {task_id, completion or solution}.
|
| 26 |
-
+ A folder which contains sub-folders named after the task_id. Each sub-folder
|
| 27 |
-
contains samples named in `[?].py` where `?` is the solution id starting with 0.
|
| 28 |
-
Different from `sample.jsonl`, the solutions must be complete (with prompt prefix).
|
| 29 |
"""
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
assert "solution" in sample, "No solution found in sample!"
|
| 37 |
-
assert isinstance(
|
| 38 |
-
sample["solution"], str
|
| 39 |
-
), "Solution must be a string! If you have multiple solutions, please repeat the task_id."
|
| 40 |
-
|
| 41 |
-
sample["_identifier"] = (
|
| 42 |
-
sample["task_id"] + f" (line {i+1} in {sample_path})"
|
| 43 |
-
)
|
| 44 |
-
yield sample
|
| 45 |
-
else:
|
| 46 |
-
raise NotImplementedError("Only jsonl solution output file is supported for now.")
|
|
|
|
| 20 |
yield json.loads(line)
|
| 21 |
|
| 22 |
|
| 23 |
+
def load_solutions(samples) -> Iterable[Dict]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
"""
|
| 25 |
+
"""
|
| 26 |
+
for i, sample in enumerate(samples):
|
| 27 |
+
sample["_identifier"] = (
|
| 28 |
+
sample["task_id"] + f" (line {i+1} )"
|
| 29 |
+
)
|
| 30 |
+
yield sample
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|