#!/usr/bin/python3 # -*- coding: utf-8 -*- """ https://help.aliyun.com/zh/model-studio/qwen-api-reference https://help.aliyun.com/zh/model-studio/models https://help.aliyun.com/zh/model-studio/models?spm=a2c4g.11186623.0.i4#d4ccf72f23jh9 https://help.aliyun.com/zh/model-studio/text-generation?spm=a2c4g.11186623.0.0.6b772e068nnT1J#24e54b27d4agt Deep-Thinking https://help.aliyun.com/zh/model-studio/deep-thinking?spm=a2c4g.11186623.0.0.56076f58IJd4mP """ import argparse from datetime import datetime import json import os from pathlib import Path import sys import time from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装 pwd = os.path.abspath(os.path.dirname(__file__)) sys.path.append(os.path.join(pwd, "../")) from openai import OpenAI from project_settings import environment, project_path def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--model_name", # default="qwen3-max-2025-09-23", # default="qwen3-max-preview", # default="qwen-plus-2025-12-01", # default="qwen-turbo-2025-07-15", default="qwen-flash-2025-07-28", type=str ) parser.add_argument( "--eval_dataset_name", default="agent-nxcloud-zh-375-choice.jsonl", type=str ) parser.add_argument( "--eval_dataset_dir", default=(project_path / "data/dataset").as_posix(), type=str ) parser.add_argument( "--eval_data_dir", default=(project_path / "data/eval_data").as_posix(), type=str ) parser.add_argument( "--client", default="shenzhen_sase", type=str ) parser.add_argument( "--service", default="aliyun_api_key_bj", # default="aliyun_api_key_sgp", type=str ) parser.add_argument( "--create_time_str", # default="null", default="20251209_140530", type=str ) parser.add_argument( "--interval", default=1, type=int ) args = parser.parse_args() return args def conversation_to_str(conversation: list): conversation_str = "" for turn in conversation: role = turn["role"] content = turn["content"] row_ = f"{role}: {content}\n" conversation_str += row_ return conversation_str system_prompt = """ 你是一位专业的电话对话分析专家,负责根据客服与客户之间的通话内容判断客户意图类别。 请仔细分析用户提供的完整对话,并严格按照以下规则进行分类: - **A**:客户**明确同意参加试听课**(如“好啊,安排一下”)。仅询问细节、模糊回应(如“嗯嗯”“好的”)不算。 - **B**:客户**投诉、辱骂、或明确要求停止拨打此类电话**(如“别再打了!”)。仅拒绝试听(如“不用了”)不属于 B。 - **C**:客户表示**当前时刻不方便通话,例如提到“在开车”、“不方便”等**。 - **D**:对话为**语音留言/自动应答**,或包含“留言”“voicemail”“message”“已录音”等关键词,或出现**逐字念出的数字串**(如“九零九五……”)。 - **E**:客服**完成两次独立推销后**,客户**两次都表达了明确拒绝,仅一次不算做E分类**。 - **F**:客户未表达明确意愿,或以上情况均不符合(默认类别)。 **输出要求:** - 仅输出一个大写字母(A、B、C、D、E 或 F); - 不要任何解释、标点、空格、换行、JSON、引号或其他字符; - 输出必须且只能是单个字母。 """ def main(): args = get_args() eval_dataset_dir = Path(args.eval_dataset_dir) eval_dataset_dir.mkdir(parents=True, exist_ok=True) eval_data_dir = Path(args.eval_data_dir) eval_data_dir.mkdir(parents=True, exist_ok=True) if args.create_time_str == "null": tz = ZoneInfo("Asia/Shanghai") now = datetime.now(tz) create_time_str = now.strftime("%Y%m%d_%H%M%S") # create_time_str = "20250724_090615" else: create_time_str = args.create_time_str eval_dataset = eval_dataset_dir / args.eval_dataset_name model_name_ = args.model_name.replace("/", "#") output_file = eval_data_dir / f"aliyun_nxcloud_choice/aliyun/{model_name_}/{args.client}/{args.service}/{create_time_str}/{args.eval_dataset_name}" output_file.parent.mkdir(parents=True, exist_ok=True) api_key = environment.get(args.service, dtype=str) if args.service == "aliyun_api_key_bj": base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" elif args.service == "aliyun_api_key_sgp": base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1" else: raise AssertionError(f"invalid service: {args.service}") client = OpenAI( base_url=base_url, # Read your Ark API Key from the environment variable. api_key=api_key ) total = 0 total_correct = 0 # finished finished_idx_set = set() if os.path.exists(output_file.as_posix()): with open(output_file.as_posix(), "r", encoding="utf-8") as f: for row in f: row = json.loads(row) idx = row["idx"] total = row["total"] total_correct = row["total_correct"] finished_idx_set.add(idx) print(f"finished count: {len(finished_idx_set)}") with open(eval_dataset.as_posix(), "r", encoding="utf-8") as fin, open(output_file.as_posix(), "a+", encoding="utf-8") as fout: for row in fin: row = json.loads(row) idx = row["idx"] # system_prompt = row["system_prompt"] conversation = row["conversation"] examples = row["examples"] choices = row["choices"] response = row["response"] if idx in finished_idx_set: continue # conversation conversation_str = conversation_to_str(conversation) examples_str = "" for example in examples: conversation_ = example["conversation"] outputs = example["outputs"] output = outputs["output"] explanation = outputs["explanation"] examples_str += conversation_to_str(conversation_) # output_json = {"Explanation": explanation, "output": output} # output_json_str = json.dumps(output_json, ensure_ascii=False) # examples_str += f"\nOutput: {output_json_str}\n" examples_str += f"\nOutput: {output}\n\n" # print(examples_str) choices_str = "" for choice in choices: condition = choice["condition"] choice_letter = choice["choice_letter"] row_ = f"{condition}, output: {choice_letter}\n" choices_str += row_ # choices_str += "\nRemember to output ONLY the corresponding letter.\nYour output is:" # choices_str += "\nPlease use only 10-15 words to explain.\nOutput:" # prompt = f"{system_prompt}\n\n**Output**\n{choices_}\n**Examples**\n{examples_}" prompt1 = f"{system_prompt}\n\n**Examples**\n{examples_str}" prompt2 = f"**Conversation**\n{conversation_str}\n\nOutput:" # print(prompt1) # print(prompt2) messages = list() messages.append( {"role": "system", "content": prompt1}, ) messages.append( {"role": "user", "content": prompt2}, ) # print(f"messages: {json.dumps(messages, ensure_ascii=False, indent=4)}") try: time.sleep(args.interval) print(f"sleep: {args.interval}") time_begin = time.time() completion = client.chat.completions.create( model=args.model_name, messages=messages, temperature=0.01, # 由于 enable_thinking 非 OpenAI 标准参数,需要通过 extra_body 传入 extra_body={"enable_thinking": False}, stream=False, ) time_cost = time.time() - time_begin print(f"time_cost: {time_cost}") except Exception as e: print(f"request failed, error type: {type(e)}, error text: {str(e)}") continue # print(f"completion: {completion}") prediction = completion.choices[0].message.content rid = completion.id correct = 1 if prediction == response else 0 total += 1 total_correct += correct score = total_correct / total row_ = { "idx": idx, "rid": rid, "messages": messages, "response": response, "prediction": prediction, "correct": correct, "total": total, "total_correct": total_correct, "score": score, "time_cost": time_cost, } row_ = json.dumps(row_, ensure_ascii=False) fout.write(f"{row_}\n") fout.flush() return if __name__ == "__main__": main()