Spaces:
Runtime error
Runtime error
cyberosa
commited on
Commit
·
d269dc6
1
Parent(s):
d599f4f
cleaning and add main repo of benchmark
Browse files- automate/run_benchmark.py +10 -4
- start.py +6 -6
automate/run_benchmark.py
CHANGED
|
@@ -36,10 +36,15 @@ def tool_map(tool):
|
|
| 36 |
|
| 37 |
def prepare_questions(kwargs):
|
| 38 |
test_questions = json.load(
|
| 39 |
-
open(
|
|
|
|
|
|
|
|
|
|
| 40 |
)
|
| 41 |
with open(
|
| 42 |
-
this_dir
|
|
|
|
|
|
|
| 43 |
) as f:
|
| 44 |
url_to_content = pickle.load(f)
|
| 45 |
num_questions = kwargs.pop("num_questions", len(test_questions))
|
|
@@ -73,7 +78,7 @@ def parse_response(response, test_q):
|
|
| 73 |
test_q["p_no"] = float(result["p_no"])
|
| 74 |
else:
|
| 75 |
test_q["p_no"] = None
|
| 76 |
-
|
| 77 |
if "confidence" in result.keys():
|
| 78 |
test_q["confidence"] = float(result["confidence"])
|
| 79 |
else:
|
|
@@ -277,6 +282,7 @@ if __name__ == "__main__":
|
|
| 277 |
kwargs["model"] = [
|
| 278 |
"gpt-3.5-turbo-0125",
|
| 279 |
]
|
|
|
|
| 280 |
kwargs["api_keys"] = {}
|
| 281 |
kwargs["api_keys"]["openai"] = os.getenv("OPENAI_API_KEY")
|
| 282 |
kwargs["api_keys"]["anthropic"] = os.getenv("ANTHROPIC_API_KEY")
|
|
@@ -285,4 +291,4 @@ if __name__ == "__main__":
|
|
| 285 |
kwargs["num_urls"] = 3
|
| 286 |
kwargs["num_words"] = 300
|
| 287 |
kwargs["provide_source_links"] = True
|
| 288 |
-
run_benchmark(kwargs)
|
|
|
|
| 36 |
|
| 37 |
def prepare_questions(kwargs):
|
| 38 |
test_questions = json.load(
|
| 39 |
+
open(
|
| 40 |
+
this_dir
|
| 41 |
+
/ "olas-predict-benchmark/benchmark/data/autocast/autocast_questions_filtered.json"
|
| 42 |
+
)
|
| 43 |
)
|
| 44 |
with open(
|
| 45 |
+
this_dir
|
| 46 |
+
/ "olas-predict-benchmark/benchmark/data/autocast/autocast_questions_filtered.pkl",
|
| 47 |
+
"rb",
|
| 48 |
) as f:
|
| 49 |
url_to_content = pickle.load(f)
|
| 50 |
num_questions = kwargs.pop("num_questions", len(test_questions))
|
|
|
|
| 78 |
test_q["p_no"] = float(result["p_no"])
|
| 79 |
else:
|
| 80 |
test_q["p_no"] = None
|
| 81 |
+
|
| 82 |
if "confidence" in result.keys():
|
| 83 |
test_q["confidence"] = float(result["confidence"])
|
| 84 |
else:
|
|
|
|
| 282 |
kwargs["model"] = [
|
| 283 |
"gpt-3.5-turbo-0125",
|
| 284 |
]
|
| 285 |
+
|
| 286 |
kwargs["api_keys"] = {}
|
| 287 |
kwargs["api_keys"]["openai"] = os.getenv("OPENAI_API_KEY")
|
| 288 |
kwargs["api_keys"]["anthropic"] = os.getenv("ANTHROPIC_API_KEY")
|
|
|
|
| 291 |
kwargs["num_urls"] = 3
|
| 292 |
kwargs["num_words"] = 300
|
| 293 |
kwargs["provide_source_links"] = True
|
| 294 |
+
run_benchmark(kwargs)
|
start.py
CHANGED
|
@@ -53,15 +53,15 @@ def start():
|
|
| 53 |
# no updates
|
| 54 |
# ("git submodule update --init --recursive", base_dir),
|
| 55 |
# ("git submodule update --remote --recursive", base_dir),
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
# no updates
|
| 61 |
("git remote update", olas_dir),
|
| 62 |
("git fetch --all", olas_dir),
|
| 63 |
-
("git checkout
|
| 64 |
-
|
| 65 |
("git checkout 56ecf18a982c4548feac5efe787690a3ec37c835", mech_dir),
|
| 66 |
# ("git pull origin main", mech_dir),
|
| 67 |
("pip install -e .", os.path.join(olas_dir, "benchmark")),
|
|
|
|
| 53 |
# no updates
|
| 54 |
# ("git submodule update --init --recursive", base_dir),
|
| 55 |
# ("git submodule update --remote --recursive", base_dir),
|
| 56 |
+
(
|
| 57 |
+
'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
|
| 58 |
+
olas_dir,
|
| 59 |
+
),
|
| 60 |
# no updates
|
| 61 |
("git remote update", olas_dir),
|
| 62 |
("git fetch --all", olas_dir),
|
| 63 |
+
("git checkout main", olas_dir),
|
| 64 |
+
("git pull origin main", olas_dir),
|
| 65 |
("git checkout 56ecf18a982c4548feac5efe787690a3ec37c835", mech_dir),
|
| 66 |
# ("git pull origin main", mech_dir),
|
| 67 |
("pip install -e .", os.path.join(olas_dir, "benchmark")),
|