dataset-builder / data3 /quick_batch_start.sh
SunDou's picture
Upload data3/quick_batch_start.sh with huggingface_hub
115f0b5 verified
#!/bin/bash
# 快速启动批处理(仅准备和提交,不等待完成)
set -e
echo "🚀 快速启动 Batch API 任务"
echo "========================================"
# 配置
MIN_SCORE=60
MODEL="gpt-4o-mini"
INPUT_FILE="function_dataset_v2.csv"
BATCH_REQUESTS_FILE="batch_requests_full.jsonl"
# 步骤1: 准备批量请求
echo "📋 准备批量请求..."
python3 generate_problems_batch.py prepare \
--input $INPUT_FILE \
--output $BATCH_REQUESTS_FILE \
--min-score $MIN_SCORE \
--model $MODEL
REQUEST_COUNT=$(wc -l < $BATCH_REQUESTS_FILE)
echo ""
echo "✅ 已准备 $REQUEST_COUNT 个请求"
# 步骤2: 提交
echo ""
echo "🚀 提交批处理任务..."
python3 generate_problems_batch.py submit \
--input $BATCH_REQUESTS_FILE \
--model $MODEL \
--description "Scientific computing problems - $REQUEST_COUNT samples" \
| tee batch_submit_output.txt
# 提取 Batch ID
BATCH_ID=$(grep -oP 'batch_[a-zA-Z0-9_]+' batch_submit_output.txt | head -1)
if [ -n "$BATCH_ID" ]; then
echo "$BATCH_ID" > batch_id.txt
echo ""
echo "✅ 批处理已提交!"
echo "📝 Batch ID: $BATCH_ID (已保存到 batch_id.txt)"
echo ""
echo "监控命令:"
echo " python3 generate_problems_batch.py status $BATCH_ID"
echo ""
echo "或运行监控脚本:"
echo " ./monitor_batch.sh"
fi