Spaces:
Running
Running
improve: knowledge base and re
Browse files- modules/info_extractor.py +763 -301
- modules/knowledge_base.py +274 -15
- modules/response_generator.py +490 -127
modules/info_extractor.py
CHANGED
|
@@ -1,356 +1,818 @@
|
|
| 1 |
import json
|
| 2 |
import re
|
| 3 |
from utils.logger import log
|
| 4 |
-
from .ai_model import AIModel
|
| 5 |
|
| 6 |
class InfoExtractor:
|
| 7 |
-
def __init__(self
|
| 8 |
-
|
| 9 |
# 预定义的提取结构,用于验证和规范化
|
| 10 |
self.extraction_schema = {
|
| 11 |
"destination": {"type": dict, "fields": {"name": str, "country": str}},
|
| 12 |
"duration": {"type": dict, "fields": {"days": int, "description": str}},
|
| 13 |
"budget": {"type": dict, "fields": {"type": str, "amount": int, "currency": str, "description": str}}
|
| 14 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
def extract(self, user_message: str) -> dict:
|
| 17 |
-
"""
|
| 18 |
|
| 19 |
# 输入验证
|
| 20 |
if not user_message or not isinstance(user_message, str):
|
| 21 |
log.warning("⚠️ 收到无效的用户消息")
|
| 22 |
return {}
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
# --- 核心修复:强制使用确定性解码以杜绝幻觉 ---
|
| 28 |
-
log.info("🧠 使用LLM开始提取信息 (模式: 确定性)")
|
| 29 |
-
|
| 30 |
-
# 注意:ai_model.generate() 方法不支持 do_sample 和 temperature 参数
|
| 31 |
-
# 需要通过其他方式确保确定性输出
|
| 32 |
-
raw_response = self.ai_model.run_inference(
|
| 33 |
-
input_type="text",
|
| 34 |
-
formatted_input=None,
|
| 35 |
-
prompt=prompt,
|
| 36 |
-
temperature=0.0 # 使用最低温度确保确定性
|
| 37 |
-
)
|
| 38 |
-
|
| 39 |
-
try:
|
| 40 |
-
# 清理响应,提取纯JSON部分
|
| 41 |
-
cleaned_response = self._clean_json_response(raw_response)
|
| 42 |
-
extracted_json = json.loads(cleaned_response)
|
| 43 |
-
log.info(f"✅ LLM成功提取并解析JSON: {extracted_json}")
|
| 44 |
-
|
| 45 |
-
# 使用新的验证方法
|
| 46 |
-
validated_data = self._validate_and_normalize(extracted_json)
|
| 47 |
-
log.info(f"📊 LLM最终提取结果 (安全处理后): {validated_data}")
|
| 48 |
-
return validated_data
|
| 49 |
-
|
| 50 |
-
except (json.JSONDecodeError, TypeError) as e:
|
| 51 |
-
log.error(f"❌ 解析或验证LLM提取的JSON失败: {e}", exc_info=True)
|
| 52 |
-
log.debug(f"🔍 原始响应: {raw_response}")
|
| 53 |
-
# 尝试备用提取方法
|
| 54 |
-
return self._fallback_extraction(user_message)
|
| 55 |
-
|
| 56 |
-
def _clean_json_response(self, response: str) -> str:
|
| 57 |
-
"""清理LLM响应,提取纯JSON部分"""
|
| 58 |
-
if not response:
|
| 59 |
-
return "{}"
|
| 60 |
|
| 61 |
-
|
| 62 |
-
response = re.sub(r'```json\s*', '', response)
|
| 63 |
-
response = re.sub(r'```\s*', '', response)
|
| 64 |
|
| 65 |
-
|
| 66 |
-
response = re.sub(r'^[^{]*', '', response)
|
| 67 |
|
| 68 |
-
#
|
| 69 |
-
|
| 70 |
-
|
|
|
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
| 74 |
|
| 75 |
-
#
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
"""基于规则的备用信息提取"""
|
| 80 |
-
log.info("🔄 使用基于规则的备用提取方法")
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
result = {}
|
| 83 |
-
message_lower = user_message.lower()
|
| 84 |
|
| 85 |
-
#
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
r'
|
| 89 |
-
r'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
]
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
break
|
| 99 |
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
day_patterns = [
|
| 102 |
-
|
| 103 |
-
r'(\d+)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
]
|
| 105 |
|
|
|
|
| 106 |
for pattern in day_patterns:
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
days =
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
break
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
]
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
break
|
| 133 |
|
| 134 |
-
# 预算类型识别
|
| 135 |
budget_type_keywords = {
|
| 136 |
-
'economy': [
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
}
|
| 140 |
|
|
|
|
| 141 |
for budget_type, keywords in budget_type_keywords.items():
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
result["budget"]["type"] = budget_type
|
| 147 |
break
|
| 148 |
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
return result
|
| 151 |
|
| 152 |
-
def
|
| 153 |
-
"""
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
if "country" in dest_data and isinstance(dest_data["country"], str):
|
| 172 |
-
country = dest_data["country"].strip()
|
| 173 |
-
if country:
|
| 174 |
-
validated_dest["country"] = country
|
| 175 |
-
if validated_dest:
|
| 176 |
-
validated_output["destination"] = validated_dest
|
| 177 |
-
|
| 178 |
-
# 验证duration
|
| 179 |
-
if "duration" in data:
|
| 180 |
-
duration_data = data["duration"]
|
| 181 |
-
if isinstance(duration_data, dict):
|
| 182 |
-
validated_duration = {}
|
| 183 |
-
if "days" in duration_data:
|
| 184 |
-
days = duration_data["days"]
|
| 185 |
-
if isinstance(days, (int, float)) and 1 <= days <= 365:
|
| 186 |
-
validated_duration["days"] = int(days)
|
| 187 |
-
if "description" in duration_data and isinstance(duration_data["description"], str):
|
| 188 |
-
desc = duration_data["description"].strip()
|
| 189 |
-
if desc:
|
| 190 |
-
validated_duration["description"] = desc
|
| 191 |
-
if validated_duration:
|
| 192 |
-
validated_output["duration"] = validated_duration
|
| 193 |
-
|
| 194 |
-
# 验证budget
|
| 195 |
-
if "budget" in data:
|
| 196 |
-
budget_data = data["budget"]
|
| 197 |
-
if isinstance(budget_data, dict):
|
| 198 |
-
validated_budget = {}
|
| 199 |
-
|
| 200 |
-
# 验证type
|
| 201 |
-
if "type" in budget_data:
|
| 202 |
-
budget_type = budget_data["type"]
|
| 203 |
-
if budget_type in ["economy", "comfortable", "luxury"]:
|
| 204 |
-
validated_budget["type"] = budget_type
|
| 205 |
-
|
| 206 |
-
# 验证amount
|
| 207 |
-
if "amount" in budget_data:
|
| 208 |
-
amount = budget_data["amount"]
|
| 209 |
-
if isinstance(amount, (int, float)) and amount > 0:
|
| 210 |
-
validated_budget["amount"] = int(amount)
|
| 211 |
-
|
| 212 |
-
# 验证currency
|
| 213 |
-
if "currency" in budget_data and isinstance(budget_data["currency"], str):
|
| 214 |
-
currency = budget_data["currency"].strip()
|
| 215 |
-
if currency:
|
| 216 |
-
validated_budget["currency"] = currency
|
| 217 |
-
|
| 218 |
-
# 验证description
|
| 219 |
-
if "description" in budget_data and isinstance(budget_data["description"], str):
|
| 220 |
-
desc = budget_data["description"].strip()
|
| 221 |
-
if desc:
|
| 222 |
-
validated_budget["description"] = desc
|
| 223 |
-
|
| 224 |
-
if validated_budget:
|
| 225 |
-
validated_output["budget"] = validated_budget
|
| 226 |
|
| 227 |
-
if
|
| 228 |
-
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
"""构建包含多个示例的提取prompt"""
|
| 234 |
|
| 235 |
-
#
|
| 236 |
-
if
|
| 237 |
-
|
| 238 |
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
"country": "string or null"
|
| 253 |
-
}},
|
| 254 |
-
"duration": {{
|
| 255 |
-
"days": "integer or null",
|
| 256 |
-
"description": "string or null"
|
| 257 |
-
}},
|
| 258 |
-
"budget": {{
|
| 259 |
-
"type": "string ('economy', 'comfortable', 'luxury') or null",
|
| 260 |
-
"amount": "number or null",
|
| 261 |
-
"currency": "string or null",
|
| 262 |
-
"description": "string or null"
|
| 263 |
-
}}
|
| 264 |
-
}}
|
| 265 |
-
|
| 266 |
-
**示例1:**
|
| 267 |
-
用户输入: "我想去柏林玩3天"
|
| 268 |
-
你的输出:
|
| 269 |
-
{{
|
| 270 |
-
"destination": {{
|
| 271 |
-
"name": "柏林",
|
| 272 |
-
"country": null
|
| 273 |
-
}},
|
| 274 |
-
"duration": {{
|
| 275 |
-
"days": 3,
|
| 276 |
-
"description": null
|
| 277 |
-
}},
|
| 278 |
-
"budget": {{
|
| 279 |
-
"type": null,
|
| 280 |
-
"amount": null,
|
| 281 |
-
"currency": null,
|
| 282 |
-
"description": null
|
| 283 |
-
}}
|
| 284 |
-
}}
|
| 285 |
-
|
| 286 |
-
**示例2:**
|
| 287 |
-
用户输入: "计划去日本东京旅行一周,预算2万元"
|
| 288 |
-
你的输出:
|
| 289 |
-
{{
|
| 290 |
-
"destination": {{
|
| 291 |
-
"name": "东京",
|
| 292 |
-
"country": "日本"
|
| 293 |
-
}},
|
| 294 |
-
"duration": {{
|
| 295 |
-
"days": 7,
|
| 296 |
-
"description": "一周"
|
| 297 |
-
}},
|
| 298 |
-
"budget": {{
|
| 299 |
-
"type": null,
|
| 300 |
-
"amount": 20000,
|
| 301 |
-
"currency": "元",
|
| 302 |
-
"description": null
|
| 303 |
-
}}
|
| 304 |
-
}}
|
| 305 |
-
|
| 306 |
-
**示例3:**
|
| 307 |
-
用户输入: "想要一个经济实惠的巴黎5天行程"
|
| 308 |
-
你的输出:
|
| 309 |
-
{{
|
| 310 |
-
"destination": {{
|
| 311 |
-
"name": "巴黎",
|
| 312 |
-
"country": null
|
| 313 |
-
}},
|
| 314 |
-
"duration": {{
|
| 315 |
-
"days": 5,
|
| 316 |
-
"description": null
|
| 317 |
-
}},
|
| 318 |
-
"budget": {{
|
| 319 |
-
"type": "economy",
|
| 320 |
-
"amount": null,
|
| 321 |
-
"currency": null,
|
| 322 |
-
"description": "经济实惠"
|
| 323 |
-
}}
|
| 324 |
-
}}
|
| 325 |
-
|
| 326 |
-
**示例4:**
|
| 327 |
-
用户输入: "你好"
|
| 328 |
-
你的输出:
|
| 329 |
-
{{}}
|
| 330 |
-
|
| 331 |
-
**示例5:**
|
| 332 |
-
用户输入: "想去泰国普吉岛度蜜月,10天左右,豪华一点不差钱"
|
| 333 |
-
你的输出:
|
| 334 |
-
{{
|
| 335 |
-
"destination": {{
|
| 336 |
-
"name": "普吉岛",
|
| 337 |
-
"country": "泰国"
|
| 338 |
-
}},
|
| 339 |
-
"duration": {{
|
| 340 |
-
"days": 10,
|
| 341 |
-
"description": "10天左右"
|
| 342 |
-
}},
|
| 343 |
-
"budget": {{
|
| 344 |
-
"type": "luxury",
|
| 345 |
-
"amount": null,
|
| 346 |
-
"currency": null,
|
| 347 |
-
"description": "豪华一点不差钱"
|
| 348 |
-
}}
|
| 349 |
-
}}
|
| 350 |
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
-
|
| 356 |
-
|
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
import re
|
| 3 |
from utils.logger import log
|
|
|
|
| 4 |
|
| 5 |
class InfoExtractor:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
|
| 8 |
# 预定义的提取结构,用于验证和规范化
|
| 9 |
self.extraction_schema = {
|
| 10 |
"destination": {"type": dict, "fields": {"name": str, "country": str}},
|
| 11 |
"duration": {"type": dict, "fields": {"days": int, "description": str}},
|
| 12 |
"budget": {"type": dict, "fields": {"type": str, "amount": int, "currency": str, "description": str}}
|
| 13 |
}
|
| 14 |
+
|
| 15 |
+
# 欧洲城市和国家的完整映射关系(聚焦欧洲)
|
| 16 |
+
self.european_cities = {
|
| 17 |
+
# === 西欧 ===
|
| 18 |
+
# 法国
|
| 19 |
+
"巴黎": "法国", "里昂": "法国", "马赛": "法国", "尼斯": "法国", "戛纳": "法国",
|
| 20 |
+
"图卢兹": "法国", "南特": "法国", "斯特拉斯堡": "法国", "蒙彼利埃": "法国", "波尔多": "法国",
|
| 21 |
+
"里尔": "法国", "雷恩": "法国", "兰斯": "法国", "勒阿弗尔": "法国", "圣埃蒂安": "法国",
|
| 22 |
+
"土伦": "法国", "阿维尼翁": "法国", "凡尔赛": "法国", "枫丹白露": "法国", "第戎": "法国",
|
| 23 |
+
"昂热": "法国", "贝桑松": "法国", "佩皮尼昂": "法国", "卢尔德": "法国", "沙特尔": "法国",
|
| 24 |
+
|
| 25 |
+
# 德国
|
| 26 |
+
"柏林": "德国", "慕尼黑": "德国", "汉堡": "德国", "科隆": "德国", "法兰克福": "德国",
|
| 27 |
+
"斯图加特": "德国", "杜塞尔多夫": "德国", "多特蒙德": "德国", "埃森": "德国", "莱比锡": "德国",
|
| 28 |
+
"不来梅": "德国", "德累斯顿": "德国", "汉诺威": "德国", "纽伦堡": "德国", "杜伊斯堡": "德国",
|
| 29 |
+
"波鸿": "德国", "乌珀塔尔": "德国", "比勒费尔德": "德国", "波恩": "德国", "明斯特": "德国",
|
| 30 |
+
"卡尔斯鲁厄": "德国", "曼海姆": "德国", "奥格斯堡": "德国", "威斯巴登": "德国", "盖尔森基兴": "德国",
|
| 31 |
+
"门兴格拉德巴赫": "德国", "布伦瑞克": "德国", "基尔": "德国", "亚琛": "德国", "哈雷": "德国",
|
| 32 |
+
"马格德堡": "德国", "弗莱堡": "德国", "克里菲尔德": "德国", "吕贝克": "德国", "奥伯豪森": "德国",
|
| 33 |
+
"埃尔福特": "德国", "罗斯托克": "德国", "凯泽斯劳滕": "德国", "卡塞尔": "德国", "哈根": "德国",
|
| 34 |
+
"波茨坦": "德国", "萨尔布吕肯": "德国", "路德维希港": "德国", "奥尔登堡": "德国", "莱沃库森": "德国",
|
| 35 |
+
"奥斯纳布吕克": "德国", "索林根": "德国", "海德堡": "德国", "达姆施塔特": "德国", "哈姆": "德国",
|
| 36 |
+
"维尔茨堡": "德国", "雷克林豪森": "德国", "沃尔夫斯堡": "德国", "格廷根": "德国", "科特布斯": "德国",
|
| 37 |
+
"希尔德斯海姆": "德国", "埃朗根": "德国", "特里尔": "德国", "耶拿": "德国", "康斯坦茨": "德国",
|
| 38 |
+
"新天鹅堡": "德国", "罗滕堡": "德国", "科布伦茨": "德国", "班贝格": "德国", "拜罗伊特": "德国",
|
| 39 |
+
|
| 40 |
+
# 英国
|
| 41 |
+
"伦敦": "英国", "伯明翰": "英国", "曼彻斯特": "英国", "格拉斯哥": "英国", "利物浦": "英国",
|
| 42 |
+
"利兹": "英国", "谢菲尔德": "英国", "爱丁堡": "英国", "布里斯托": "英国", "莱斯特": "英国",
|
| 43 |
+
"考文垂": "英国", "布拉德福德": "英国", "贝尔法斯特": "英国", "卡迪夫": "英国", "诺丁汉": "英国",
|
| 44 |
+
"金斯顿": "英国", "纽卡斯尔": "英国", "普利茅斯": "英国", "斯托克": "英国", "南安普顿": "英国",
|
| 45 |
+
"雷丁": "英国", "德比": "英国", "约克": "英国", "牛津": "英国", "剑桥": "英国",
|
| 46 |
+
"巴斯": "英国", "温莎": "英国", "坎特伯雷": "英国", "斯特拉特福": "英国", "湖区": "英国",
|
| 47 |
+
"斯凯岛": "英国", "爱丁堡": "英国", "格拉斯哥": "英国", "史德灵": "英国", "珀斯": "英国",
|
| 48 |
+
"因弗内斯": "英国", "阿伯丁": "英国", "邓迪": "英国", "法夫": "英国", "奥班": "英国",
|
| 49 |
+
|
| 50 |
+
# 荷兰
|
| 51 |
+
"阿姆斯特丹": "荷兰", "鹿特丹": "荷兰", "海牙": "荷兰", "乌得勒支": "荷兰", "埃因霍温": "荷兰",
|
| 52 |
+
"蒂尔堡": "荷兰", "格罗宁根": "荷兰", "阿尔梅勒": "荷兰", "布雷达": "荷兰", "奈梅亨": "荷兰",
|
| 53 |
+
"阿珀尔多伦": "荷兰", "哈勒姆": "荷兰", "阿纳姆": "荷兰", "恩斯赫德": "荷兰", "阿默斯福特": "荷兰",
|
| 54 |
+
"赞丹": "荷兰", "海牙": "荷兰", "阿尔克马尔": "荷兰", "马斯特里赫特": "荷兰", "莱顿": "荷兰",
|
| 55 |
+
"代尔夫特": "荷兰", "多德雷赫特": "荷兰", "豪达": "荷兰", "羊角村": "荷兰", "马尔肯": "荷兰",
|
| 56 |
+
|
| 57 |
+
# 比利时
|
| 58 |
+
"布鲁塞尔": "比利时", "安特卫普": "比利时", "根特": "比利时", "沙勒罗瓦": "比利时", "列日": "比利时",
|
| 59 |
+
"布吕赫": "比利时", "那慕尔": "比利时", "蒙斯": "比利时", "阿尔斯特": "比利时", "科特赖克": "比利时",
|
| 60 |
+
"哈瑟尔特": "比利时", "圣尼古拉": "比利时", "奥斯坦德": "比利时", "梅赫伦": "比利时", "鲁汶": "比利时",
|
| 61 |
+
|
| 62 |
+
# 卢森堡
|
| 63 |
+
"卢森堡市": "卢森堡", "埃施": "卢森堡", "迪费当日": "卢森堡", "杜德朗日": "卢森堡",
|
| 64 |
+
|
| 65 |
+
# === 南欧 ===
|
| 66 |
+
# 意大利
|
| 67 |
+
"罗马": "意大利", "米兰": "意大利", "威尼斯": "意大利", "佛罗伦萨": "意大利", "那不勒斯": "意大利",
|
| 68 |
+
"都灵": "意大利", "帕勒莫": "意大利", "热那亚": "意大利", "博洛尼亚": "意大利", "巴里": "意大利",
|
| 69 |
+
"卡塔尼亚": "意大利", "佛罗伦萨": "意大利", "韦罗纳": "意大利", "威尼斯": "意大利", "墨西拿": "意大利",
|
| 70 |
+
"帕多瓦": "意大利", "的里雅斯特": "意大利", "塔兰托": "意大利", "布雷西亚": "意大利", "摩德纳": "意大利",
|
| 71 |
+
"雷焦卡拉布里亚": "意大利", "普拉托": "意大利", "卡利亚里": "意大利", "帕尔马": "意大利", "佩鲁贾": "意大利",
|
| 72 |
+
"利沃诺": "意大利", "雷焦艾米利亚": "意大利", "佛嘉": "意大利", "萨莱诺": "意大利", "拉温纳": "意大利",
|
| 73 |
+
"里米尼": "意大利", "拉斯佩齐亚": "意大利", "萨萨里": "意大利", "蒙扎": "意大利", "贝加莫": "意大利",
|
| 74 |
+
"比萨": "意大利", "维琴察": "意大利", "三月十五日": "意大利", "博尔扎诺": "意大利", "安德里亚": "意大利",
|
| 75 |
+
"阿雷佐": "意大利", "蒂沃利": "意大利", "阿西西": "意大利", "锡耶纳": "意大利", "五渔村": "意大利",
|
| 76 |
+
"马泰拉": "意大利", "庞贝": "意大利", "卡普里岛": "意大利", "阿马尔菲": "意大利", "科莫": "意大利",
|
| 77 |
+
|
| 78 |
+
# 西班牙
|
| 79 |
+
"马德里": "西班牙", "巴塞罗那": "西班牙", "瓦伦西亚": "西班牙", "塞维利亚": "西班牙", "萨拉戈萨": "西班牙",
|
| 80 |
+
"马拉加": "西班牙", "穆尔西亚": "西班牙", "帕尔马": "西班牙", "拉斯帕尔马斯": "西班牙", "毕尔巴鄂": "西班牙",
|
| 81 |
+
"阿利坎特": "西班牙", "科尔多瓦": "西班牙", "巴利亚多利德": "西班牙", "维戈": "西班牙", "希洪": "西班牙",
|
| 82 |
+
"莱昂": "西班牙", "拉科鲁尼亚": "西班牙", "埃尔切": "西班牙", "奥维耶多": "西班牙", "圣塞巴斯蒂安": "西班牙",
|
| 83 |
+
"桑坦德": "西班牙", "卡斯特利翁": "西班牙", "洛格罗尼奥": "西班牙", "巴达霍斯": "西班牙", "萨拉曼卡": "西班牙",
|
| 84 |
+
"韦尔瓦": "西班牙", "阿尔梅里亚": "西班牙", "卡迪斯": "西班牙", "格拉纳达": "西班牙", "托莱多": "西班牙",
|
| 85 |
+
"昆卡": "西班牙", "卡塞雷斯": "西班牙", "塞哥维亚": "西班牙", "阿维拉": "西班牙", "布尔戈斯": "西班牙",
|
| 86 |
+
"马略卡岛": "西班牙", "伊比萨": "西班牙", "特内里费": "西班牙", "大加那利": "西班牙", "兰萨罗特": "西班牙",
|
| 87 |
+
|
| 88 |
+
# 葡萄牙
|
| 89 |
+
"里斯本": "葡萄牙", "波尔图": "葡萄牙", "阿马多拉": "葡萄牙", "布拉加": "葡萄牙", "塞图巴尔": "葡萄牙",
|
| 90 |
+
"科英布拉": "葡萄牙", "丰沙尔": "葡萄牙", "阿威罗": "葡萄牙", "埃武拉": "葡萄牙", "法鲁": "葡萄牙",
|
| 91 |
+
"阿尔布费拉": "葡萄牙", "辛特拉": "葡萄牙", "卡斯凯什": "葡萄牙", "奥比杜什": "葡萄牙", "波尔塔莱格雷": "葡萄牙",
|
| 92 |
+
"吉马良斯": "葡萄牙", "维亚纳堡": "葡萄牙", "维塞乌": "葡萄牙", "拉戈什": "葡萄牙", "萨格里什": "葡萄牙",
|
| 93 |
+
|
| 94 |
+
# 希腊
|
| 95 |
+
"雅典": "希腊", "塞萨洛尼基": "希腊", "帕特雷": "希腊", "伊拉克利翁": "希腊", "拉里萨": "希腊",
|
| 96 |
+
"沃洛斯": "希腊", "约阿尼纳": "希腊", "卡瓦拉": "希腊", "哈尼亚": "希腊", "塞雷斯": "希腊",
|
| 97 |
+
"圣托里尼": "希腊", "米科诺斯": "希腊", "罗德岛": "希腊", "科孚": "希腊", "克里特": "希腊",
|
| 98 |
+
"帕罗斯": "希腊", "纳克索斯": "希腊", "扎金索斯": "希腊", "凯法利尼亚": "希腊", "斯基亚索斯": "希腊",
|
| 99 |
+
"德尔菲": "希腊", "奥林匹亚": "希腊", "迈锡尼": "希腊", "埃皮达鲁斯": "希腊", "梅泰奥拉": "希腊",
|
| 100 |
+
|
| 101 |
+
# === 中欧 ===
|
| 102 |
+
# 奥地利
|
| 103 |
+
"维也纳": "奥地利", "格拉茨": "奥地利", "林茨": "奥地利", "萨尔茨堡": "奥地利", "因斯布鲁克": "奥地利",
|
| 104 |
+
"克拉根福": "奥地利", "菲拉赫": "奥地利", "韦尔斯": "奥地利", "圣珀尔滕": "奥地利", "多恩比恩": "奥地��",
|
| 105 |
+
"维也纳新城": "奥地利", "施泰尔": "奥地利", "费尔德基兴": "奥地利", "布鲁克": "奥地利", "莱奥本": "奥地利",
|
| 106 |
+
"哈尔施塔特": "奥地利", "巴德伊舍尔": "奥地利", "梅尔克": "奥地利", "瓦绍": "奥地利", "库夫斯坦": "奥地利",
|
| 107 |
+
|
| 108 |
+
# 捷克
|
| 109 |
+
"布拉格": "捷克", "布尔诺": "捷克", "俄斯特拉发": "捷克", "比尔森": "捷克", "奥洛穆茨": "捷克",
|
| 110 |
+
"利贝雷茨": "捷克", "赫拉德茨克拉洛韦": "捷克", "乌斯季": "捷克", "帕尔杜比采": "捷克", "兹林": "捷克",
|
| 111 |
+
"哈维若夫": "捷克", "克拉德诺": "捷克", "切斯凯布杰约维采": "捷克", "莫斯特": "捷克", "卡尔维纳": "捷克",
|
| 112 |
+
"库特纳霍拉": "捷克", "泰尔奇": "捷克", "克鲁姆洛夫": "捷克", "卡尔什特因": "捷克", "布拉格城堡": "捷克",
|
| 113 |
+
|
| 114 |
+
# 匈牙利
|
| 115 |
+
"布达佩斯": "匈牙利", "德布勒森": "匈牙利", "塞格德": "匈牙利", "米什科尔茨": "匈牙利", "佩奇": "匈牙利",
|
| 116 |
+
"焦尔": "匈牙利", "尼赖吉哈佐": "匈牙利", "凯奇凯梅特": "匈牙利", "塞克什白堡": "匈牙利", "松博特海伊": "匈牙利",
|
| 117 |
+
"松博特海伊": "匈牙利", "维斯普雷姆": "匈牙利", "埃格尔": "匈牙利", "贝凯什乔包": "匈牙利", "大沃拉丁": "匈牙利",
|
| 118 |
+
"埃斯泰尔戈姆": "匈牙利", "维谢格拉德": "匈牙利", "霍洛克": "匈牙利", "蒂豪尼": "匈牙利", "巴拉顿湖": "匈牙利",
|
| 119 |
+
|
| 120 |
+
# 波兰
|
| 121 |
+
"华沙": "波兰", "克拉科夫": "波兰", "罗兹": "波兰", "弗罗茨瓦夫": "波兰", "波兹南": "波兰",
|
| 122 |
+
"格但斯克": "波兰", "什切青": "波兰", "比得哥什": "波兰", "卢布林": "波兰", "卡托维兹": "波兰",
|
| 123 |
+
"白雅斯托克": "波兰", "格丁尼亚": "波兰", "琴斯托霍瓦": "波兰", "拉多姆": "波兰", "索斯诺维茨": "波兰",
|
| 124 |
+
"托伦": "波兰", "基尔采": "波兰", "格利维采": "波兰", "扎布热": "波兰", "比托姆": "波兰",
|
| 125 |
+
"奥斯威辛": "波兰", "马尔堡": "波兰", "扎科帕内": "波兰", "维利奇卡": "波兰", "弗罗茨瓦夫": "波兰",
|
| 126 |
+
|
| 127 |
+
# 斯洛伐克
|
| 128 |
+
"布拉迪斯拉发": "斯洛伐克", "科希策": "斯洛伐克", "普雷绍夫": "斯洛伐克", "日利纳": "斯洛伐克", "班斯卡比斯特里察": "斯洛伐克",
|
| 129 |
+
"尼特拉": "斯洛伐克", "特伦钦": "斯洛伐克", "马丁": "斯洛伐克", "特尔纳瓦": "斯洛伐克", "波普拉德": "斯洛伐克",
|
| 130 |
+
"普里维德扎": "斯洛伐克", "兹沃伦": "斯洛伐克", "巴尔代约夫": "斯洛伐克", "列沃恰": "斯洛伐克", "斯皮什斯基堡": "斯洛伐克",
|
| 131 |
+
|
| 132 |
+
# 斯洛文尼亚
|
| 133 |
+
"卢布尔雅那": "斯洛文尼亚", "马里博尔": "斯洛文尼亚", "采列": "斯洛文尼亚", "克拉尼": "斯洛文尼亚", "韦莱涅": "斯洛文尼亚",
|
| 134 |
+
"新戈里察": "斯洛文尼亚", "科佩尔": "斯洛文尼亚", "诺沃梅斯托": "斯洛文尼亚", "卡姆尼克": "斯洛文尼亚", "多姆扎勒": "斯洛文尼亚",
|
| 135 |
+
"布莱德": "斯洛文尼亚", "博希尼": "斯洛文尼亚", "皮兰": "斯洛文尼亚", "什科茨扬": "斯洛文尼亚", "波斯托伊纳": "斯洛文尼亚",
|
| 136 |
+
|
| 137 |
+
# 瑞士
|
| 138 |
+
"苏黎世": "瑞士", "日内瓦": "瑞士", "巴塞尔": "瑞士", "伯尔尼": "瑞士", "洛桑": "瑞士",
|
| 139 |
+
"圣加仑": "瑞士", "卢塞恩": "瑞士", "卢加诺": "瑞士", "比尔": "瑞士", "图恩": "瑞士",
|
| 140 |
+
"拉绍德封": "瑞士", "沙夫豪森": "瑞士", "弗里堡": "瑞士", "韦维": "瑞士", "拉佩斯": "瑞士",
|
| 141 |
+
"因特拉肯": "瑞士", "采尔马特": "瑞士", "格林德瓦": "瑞士", "少女峰": "瑞士", "马特洪峰": "瑞士",
|
| 142 |
+
"圣莫里茨": "瑞士", "洛伊克巴德": "瑞士", "安德马特": "瑞士", "文根": "瑞士", "拉克斯": "瑞士",
|
| 143 |
+
|
| 144 |
+
# === 北欧 ===
|
| 145 |
+
# 瑞典
|
| 146 |
+
"斯德哥尔摩": "瑞典", "哥德堡": "瑞典", "马尔默": "瑞典", "乌普萨拉": "瑞典", "林雪平": "瑞典",
|
| 147 |
+
"韦斯特罗斯": "瑞典", "厄勒布鲁": "瑞典", "北雪平": "瑞典", "赫尔辛堡": "瑞典", "永雪平": "瑞典",
|
| 148 |
+
"松兹瓦尔": "瑞典", "于默奥": "瑞典", "韦克舍": "瑞典", "加夫勒": "瑞典", "博罗斯": "瑞典",
|
| 149 |
+
"法伦": "瑞典", "卡尔斯塔德": "瑞典", "卡尔马": "瑞典", "维斯比": "瑞典", "基律纳": "瑞典",
|
| 150 |
+
|
| 151 |
+
# 挪威
|
| 152 |
+
"奥斯陆": "挪威", "卑尔根": "挪威", "特隆赫姆": "挪威", "斯塔万格": "斯洛文尼亚", "克里斯蒂安桑": "挪威",
|
| 153 |
+
"腓特烈斯塔": "挪威", "德拉门": "挪威", "谢恩": "挪威", "桑内斯": "挪威", "萨尔普斯堡": "挪威",
|
| 154 |
+
"特洛姆瑟": "挪威", "博多": "挪威", "阿尔塔": "挪威", "哈默菲斯特": "挪威", "纳尔维克": "挪威",
|
| 155 |
+
"弗洛姆": "挪威", "盖朗厄尔": "挪威", "奥勒松": "挪威", "利勒哈默尔": "挪威", "罗弗敦群岛": "挪威",
|
| 156 |
+
|
| 157 |
+
# 丹麦
|
| 158 |
+
"哥本哈根": "丹麦", "奥胡斯": "丹麦", "欧登塞": "丹麦", "奥尔堡": "丹麦", "埃斯比约": "丹麦",
|
| 159 |
+
"兰德斯": "丹麦", "科尔丁": "丹麦", "赫尔辛格": "丹麦", "马里布": "丹麦", "海勒鲁普": "丹麦",
|
| 160 |
+
"比隆": "丹麦", "希勒勒": "丹麦", "罗斯基勒": "丹麦", "斯卡恩": "丹麦", "法尔瑟特": "丹麦",
|
| 161 |
+
|
| 162 |
+
# 芬兰
|
| 163 |
+
"赫尔辛基": "芬兰", "埃斯波": "芬兰", "坦佩雷": "芬兰", "万塔": "芬兰", "图尔库": "芬兰",
|
| 164 |
+
"奥卢": "芬兰", "拉赫蒂": "芬兰", "库奥皮奥": "芬兰", "约恩苏": "芬兰", "约瓦斯屈莱": "芬兰",
|
| 165 |
+
"拉彭兰塔": "芬兰", "科特卡": "芬兰", "瓦萨": "芬兰", "弗绍": "芬兰", "海门林纳": "芬兰",
|
| 166 |
+
"罗瓦涅米": "芬兰", "凯米": "芬兰", "托尔尼奥": "芬兰", "萨利色尔卡": "芬兰", "伊瓦洛": "芬兰",
|
| 167 |
+
|
| 168 |
+
# 冰岛
|
| 169 |
+
"雷克雅未克": "冰岛", "科帕沃古尔": "冰岛", "哈夫纳夫约杜尔": "冰岛", "阿克雷里": "冰岛", "雷克雅内斯": "冰岛",
|
| 170 |
+
"塞尔福斯": "冰岛", "韦斯特曼纳群岛": "冰岛", "胡萨维克": "冰岛", "埃伊尔斯塔济": "冰岛", "凯夫拉维克": "冰岛",
|
| 171 |
+
|
| 172 |
+
# === 东欧 ===
|
| 173 |
+
# 俄罗斯(欧洲部分)
|
| 174 |
+
"莫斯科": "俄罗斯", "圣彼得堡": "俄罗斯", "下诺夫哥罗德": "俄罗斯", "喀山": "俄罗斯", "萨马拉": "俄罗斯",
|
| 175 |
+
"伏尔加格勒": "俄罗斯", "罗斯托夫": "俄罗斯", "乌法": "俄罗斯", "彭萨": "俄罗斯", "雅罗斯拉夫": "俄罗斯",
|
| 176 |
+
"卡卢加": "俄罗斯", "图拉": "俄罗斯", "弗拉基米尔": "俄罗斯", "苏兹达尔": "俄罗斯", "谢尔盖夫": "俄罗斯",
|
| 177 |
+
|
| 178 |
+
# 乌克兰
|
| 179 |
+
"基辅": "乌克兰", "哈尔科夫": "乌克兰", "敖德萨": "乌克兰", "第聂伯": "乌克兰", "顿涅茨克": "乌克兰",
|
| 180 |
+
"扎波罗热": "乌克兰", "利沃夫": "乌克兰", "克里沃罗格": "乌克兰", "尼古拉耶夫": "乌克兰", "马里乌波尔": "乌克兰",
|
| 181 |
+
"卢甘斯克": "乌克兰", "文尼察": "乌克兰", "赫尔松": "乌克兰", "切尔卡瑟": "乌克兰", "切尔尼戈夫": "乌克兰",
|
| 182 |
+
|
| 183 |
+
# 白俄罗斯
|
| 184 |
+
"明斯克": "白俄罗斯", "戈梅利": "白俄罗斯", "莫吉廖夫": "白俄罗斯", "维帖布斯克": "白俄罗斯", "格罗德诺": "白俄罗斯",
|
| 185 |
+
"布列斯特": "白俄罗斯", "鲍里索夫": "白俄罗斯", "巴拉诺维奇": "白俄罗斯", "平斯克": "白俄罗斯", "奥尔沙": "白俄罗斯",
|
| 186 |
+
|
| 187 |
+
# 波罗的海三国
|
| 188 |
+
"里加": "拉脱维亚", "陶格夫匹尔斯": "拉脱维亚", "利耶帕亚": "拉脱维亚", "叶尔加瓦": "拉脱维亚", "文茨皮尔斯": "拉脱维亚",
|
| 189 |
+
"塔林": "爱沙尼亚", "塔尔图": "爱沙尼亚", "纳尔瓦": "爱沙尼亚", "帕尔努": "爱沙尼亚", "科赫特拉": "爱沙尼亚",
|
| 190 |
+
"维尔纽斯": "立陶宛", "考纳斯": "立陶宛", "克莱佩达": "立陶宛", "希奥利艾": "立陶宛", "帕内韦日斯": "立陶宛",
|
| 191 |
+
|
| 192 |
+
# 摩尔多瓦
|
| 193 |
+
"基希讷乌": "摩尔多瓦", "蒂拉斯波尔": "摩尔多瓦", "巴尔济": "摩尔多瓦", "本德尔": "摩尔多瓦", "雷布尼察": "摩尔多瓦",
|
| 194 |
+
|
| 195 |
+
# === 巴尔干半岛 ===
|
| 196 |
+
# 克罗地亚
|
| 197 |
+
"萨格勒布": "克罗地亚", "斯普利特": "克罗地亚", "里耶卡": "克罗地亚", "奥西耶克": "克罗地亚", "扎达尔": "克罗地亚",
|
| 198 |
+
"普拉": "克罗地亚", "杜布罗夫尼克": "克罗地亚", "希贝尼克": "克罗地亚", "卡尔洛瓦茨": "克罗地亚", "瓦拉日丁": "克罗地亚",
|
| 199 |
+
"罗维尼": "克罗地亚", "波雷奇": "克罗地亚", "特罗吉尔": "克罗地亚", "赫瓦尔": "克罗地亚", "科尔丘拉": "克罗地亚",
|
| 200 |
+
|
| 201 |
+
# 塞尔维亚
|
| 202 |
+
"贝尔格莱德": "塞尔维亚", "诺维萨德": "塞尔维亚", "尼什": "塞尔维亚", "克拉古耶瓦茨": "塞尔维亚", "苏博蒂察": "塞尔维亚",
|
| 203 |
+
"潘切沃": "塞尔维亚", "泽蒙": "塞尔维亚", "莱斯科瓦茨": "塞尔维亚", "恰恰克": "塞尔维亚", "新帕扎尔": "塞尔维亚",
|
| 204 |
+
|
| 205 |
+
# 波黑
|
| 206 |
+
"萨拉热窝": "波黑", "巴尼亚卢卡": "波黑", "图兹拉": "波黑", "泽尼察": "波黑", "莫斯塔尔": "波黑",
|
| 207 |
+
"比哈奇": "波黑", "布里耶利纳": "波黑", "多博伊": "波黑", "格拉迪什卡": "波黑", "利夫诺": "波黑",
|
| 208 |
+
|
| 209 |
+
# 黑山
|
| 210 |
+
"波德戈里察": "黑山", "尼克希奇": "黑山", "普里耶波列": "黑山", "比耶洛波列": "黑山", "采蒂涅": "黑山",
|
| 211 |
+
"布德瓦": "黑山", "科托尔": "黑山", "乌尔齐尼": "黑山", "赫尔采格诺维": "黑山", "巴尔": "黑山",
|
| 212 |
+
|
| 213 |
+
# 北马其顿
|
| 214 |
+
"斯科普里": "北马其顿", "库马诺沃": "北马其顿", "比托拉": "北马其顿", "普里莱普": "北马其顿", "特托沃": "北马其顿",
|
| 215 |
+
"韦莱斯": "北马其顿", "什蒂普": "北马其顿", "奥赫里德": "北马其顿", "戈斯蒂瓦尔": "北马其顿", "斯特鲁加": "北马其顿",
|
| 216 |
+
|
| 217 |
+
# 阿尔巴尼亚
|
| 218 |
+
"地拉那": "阿尔巴尼亚", "都拉斯": "阿尔巴尼亚", "埃尔巴桑": "阿尔巴尼亚", "发罗拉": "阿尔巴尼亚", "斯库台": "阿尔巴尼亚",
|
| 219 |
+
"科尔察": "阿尔巴尼亚", "卢什涅": "阿尔巴尼亚", "费里": "阿尔巴尼亚", "贝拉特": "阿尔巴尼亚", "吉诺卡斯特": "阿尔巴尼亚",
|
| 220 |
+
|
| 221 |
+
# 保加利亚
|
| 222 |
+
"索菲亚": "保加利亚", "普罗夫迪夫": "保加利亚", "瓦尔纳": "保加利亚", "布尔加斯": "保加利亚", "鲁塞": "保加利亚",
|
| 223 |
+
"斯塔拉扎戈拉": "保加利亚", "普列文": "保加利亚", "슬리문": "保加利亚", "多布里奇": "保加利亚", "舒门": "保加利亚",
|
| 224 |
+
"帕扎尔吉克": "保加利亚", "哈斯科沃": "保加利亚", "扬博尔": "保加利亚", "布拉戈耶夫格勒": "保加利亚", "韦利科特尔诺沃": "保加利亚",
|
| 225 |
+
|
| 226 |
+
# 罗马尼亚
|
| 227 |
+
"布加勒斯特": "罗马尼亚", "克卢日": "罗马尼亚", "蒂米什瓦拉": "罗马尼亚", "雅西": "罗马尼亚", "康斯坦察": "罗马尼亚",
|
| 228 |
+
"克拉约瓦": "罗马尼亚", "布拉索夫": "罗马尼亚", "加拉茨": "罗马尼亚", "普洛耶什蒂": "罗马尼亚", "奥拉迪亚": "罗马尼亚",
|
| 229 |
+
"布勒伊拉": "罗马尼亚", "阿拉德": "罗马尼亚", "皮特什蒂": "罗马尼亚", "锡比乌": "罗马尼亚", "巴克乌": "罗马尼亚",
|
| 230 |
+
"锡纳亚": "罗马尼亚", "布兰": "罗马尼亚", "德古拉城堡": "罗马尼亚", "佩莱什城堡": "罗马尼亚", "马拉穆雷什": "罗马尼亚",
|
| 231 |
+
|
| 232 |
+
# 土耳其(欧洲部分)
|
| 233 |
+
"伊斯坦布尔": "土耳其", "埃迪尔内": "土耳其", "泰基尔达": "土耳其", "克尔克拉雷利": "土耳其", "恰纳卡莱": "土耳其",
|
| 234 |
+
|
| 235 |
+
# 塞浦路斯
|
| 236 |
+
"尼科西亚": "塞浦路斯", "利马索尔": "塞浦路斯", "拉纳卡": "塞浦路斯", "法马古斯塔": "塞浦路斯", "帕福斯": "塞浦路斯",
|
| 237 |
+
"凯里尼亚": "塞浦路斯", "阿依纳帕": "塞浦路斯", "普罗塔拉斯": "塞浦路斯", "特罗多斯": "塞浦路斯", "阿卡马斯": "塞浦路斯",
|
| 238 |
+
|
| 239 |
+
# 马耳他
|
| 240 |
+
"瓦莱塔": "马耳他", "斯利马": "马耳他", "圣朱利安斯": "马耳他", "姆西达": "马耳他", "维多利亚": "马耳他",
|
| 241 |
+
"马尔萨什洛克": "马耳他", "梅利哈": "马耳他", "戈佐": "马耳他", "蓝湖": "马耳他", "姆迪纳": "马耳他",
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
# 欧洲城市别名映射(包含各种表达方式)
|
| 245 |
+
self.european_city_aliases = {
|
| 246 |
+
# 英文名称映射
|
| 247 |
+
"paris": "巴黎", "rome": "罗马", "london": "伦敦", "berlin": "柏林",
|
| 248 |
+
"madrid": "马德里", "barcelona": "巴塞罗那", "vienna": "维也纳", "prague": "布拉格",
|
| 249 |
+
"amsterdam": "阿姆斯特丹", "florence": "佛罗伦萨", "venice": "威尼斯", "athens": "雅典",
|
| 250 |
+
"budapest": "布达佩斯", "lisbon": "里斯本", "stockholm": "斯德哥尔摩", "copenhagen": "哥本哈根",
|
| 251 |
+
"helsinki": "赫尔辛基", "oslo": "奥斯陆", "zurich": "苏黎世", "geneva": "日内瓦",
|
| 252 |
+
"munich": "慕尼黑", "milan": "米兰", "naples": "那不勒斯", "nice": "尼斯",
|
| 253 |
+
"edinburgh": "爱丁堡", "dublin": "都柏林", "brussels": "布鲁塞尔", "warsaw": "华沙",
|
| 254 |
+
"krakow": "克拉科夫", "zagreb": "萨格勒布", "belgrade": "贝尔格莱德", "sofia": "索菲亚",
|
| 255 |
+
"bucharest": "布加勒斯特", "kiev": "基辅", "moscow": "莫斯科", "st petersburg": "圣彼得堡",
|
| 256 |
+
"reykjavik": "雷克雅未克", "tallinn": "塔林", "riga": "里加", "vilnius": "维尔纽斯",
|
| 257 |
+
"bratislava": "布拉迪斯拉发", "ljubljana": "卢布尔雅那", "sarajevo": "萨拉热窝",
|
| 258 |
+
"dubrovnik": "杜布罗夫尼克", "split": "斯普利特", "santorini": "圣托里尼", "mykonos": "米科诺斯",
|
| 259 |
+
|
| 260 |
+
# 中文别名
|
| 261 |
+
"花都": "巴黎", "光之城": "巴黎", "永恒之城": "罗马", "雾都": "伦敦",
|
| 262 |
+
"音乐之都": "维也纳", "黄金城市": "布拉格", "千塔之城": "布拉格",
|
| 263 |
+
"运河之城": "阿姆斯特丹", "翡冷翠": "佛罗伦萨", "文艺复兴之都": "佛罗伦萨",
|
| 264 |
+
"水城": "威尼斯", "西方文明的摇篮": "雅典", "多瑙河明珠": "布达佩斯",
|
| 265 |
+
"七丘之城": "里斯本", "北方威尼斯": "斯德哥尔摩", "童话之都": "哥本哈根",
|
| 266 |
+
"波罗的海的女儿": "赫尔辛基", "欧洲屋脊": "因特拉肯", "北方雅典": "爱丁堡",
|
| 267 |
+
"翡翠岛": "都柏林", "欧洲之都": "布鲁塞尔", "高迪之城": "巴塞罗那",
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
# 中文数字映射(保持原有)
|
| 271 |
+
self.chinese_numbers = {
|
| 272 |
+
'一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9, '十': 10,
|
| 273 |
+
'两': 2, '半': 0.5, '壹': 1, '贰': 2, '叁': 3, '肆': 4, '伍': 5, '陆': 6, '柒': 7, '捌': 8, '玖': 9, '拾': 10,
|
| 274 |
+
# 特殊时长表达
|
| 275 |
+
'半个月': 15, '一个月': 30, '半年': 180, '一年': 365,
|
| 276 |
+
'半天': 0.5, '一天': 1, '两天': 2, '三天': 3, '四天': 4, '五天': 5, '六天': 6, '七天': 7,
|
| 277 |
+
'八天': 8, '九天': 9, '十天': 10, '半周': 3.5, '一周': 7, '两周': 14,
|
| 278 |
+
# 假期相关
|
| 279 |
+
'小长假': 3, '长假': 7, '十一': 7, '国庆': 7, '春节': 7, '五一': 3, '清明': 3,
|
| 280 |
+
'端午': 3, '中秋': 3, '元旦': 3, '暑假': 60, '寒假': 30, '周末': 2, '长周末': 3
|
| 281 |
+
}
|
| 282 |
|
| 283 |
def extract(self, user_message: str) -> dict:
|
| 284 |
+
"""使用纯正则表达式提取结构化信息 - 聚焦欧洲"""
|
| 285 |
|
| 286 |
# 输入验证
|
| 287 |
if not user_message or not isinstance(user_message, str):
|
| 288 |
log.warning("⚠️ 收到无效的用户消息")
|
| 289 |
return {}
|
| 290 |
|
| 291 |
+
if len(user_message.strip()) < 2:
|
| 292 |
+
log.warning("⚠️ 用户消息过短,跳过信息提取")
|
| 293 |
+
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
+
log.info("🛠️ 使用正则表达式提取信息(聚焦欧洲)")
|
|
|
|
|
|
|
| 296 |
|
| 297 |
+
result = {}
|
|
|
|
| 298 |
|
| 299 |
+
# 1. 提取目的地信息
|
| 300 |
+
destination_info = self._extract_european_destination(user_message)
|
| 301 |
+
if destination_info:
|
| 302 |
+
result["destination"] = destination_info
|
| 303 |
|
| 304 |
+
# 2. 提取时长信息
|
| 305 |
+
duration_info = self._extract_duration(user_message)
|
| 306 |
+
if duration_info:
|
| 307 |
+
result["duration"] = duration_info
|
| 308 |
|
| 309 |
+
# 3. 提取预算信息
|
| 310 |
+
budget_info = self._extract_budget(user_message)
|
| 311 |
+
if budget_info:
|
| 312 |
+
result["budget"] = budget_info
|
|
|
|
|
|
|
| 313 |
|
| 314 |
+
log.info(f"📊 欧���城市正则提取结果: {result}")
|
| 315 |
+
return result
|
| 316 |
+
|
| 317 |
+
def _extract_european_destination(self, text: str) -> dict:
|
| 318 |
+
"""提取欧洲目的地信息 - 专门针对欧洲城市"""
|
| 319 |
result = {}
|
|
|
|
| 320 |
|
| 321 |
+
# 目的地提取模式(复用之前的完整模式)
|
| 322 |
+
destination_patterns = [
|
| 323 |
+
# 基本动词 + 地点
|
| 324 |
+
r'去(\w+)', r'到(\w+)', r'想去(\w+)', r'前往(\w+)', r'计划去(\w+)', r'打算去(\w+)',
|
| 325 |
+
r'准备去(\w+)', r'希望去(\w+)', r'考虑去(\w+)', r'决定去(\w+)', r'选择去(\w+)',
|
| 326 |
+
r'旅行(\w+)', r'游(\w+)', r'玩(\w+)', r'访问(\w+)', r'探索(\w+)', r'体验(\w+)',
|
| 327 |
+
r'出发去(\w+)', r'飞去(\w+)', r'飞往(\w+)', r'飞到(\w+)', r'坐车去(\w+)', r'开车去(\w+)',
|
| 328 |
+
|
| 329 |
+
# 目的地关键词
|
| 330 |
+
r'目的地[\s是::]*(\w+)', r'地方[\s是::]*(\w+)', r'城市[\s是::]*(\w+)',
|
| 331 |
+
r'国家[\s是::]*(\w+)', r'地区[\s是::]*(\w+)', r'景点[\s是::]*(\w+)',
|
| 332 |
+
|
| 333 |
+
# 在某地表达
|
| 334 |
+
r'在(\w+)旅游', r'在(\w+)游玩', r'在(\w+)度假', r'在(\w+)旅行', r'在(\w+)玩',
|
| 335 |
+
r'在(\w+)观光', r'在(\w+)游览', r'在(\w+)休假', r'在(\w+)放松', r'在(\w+)散心',
|
| 336 |
+
|
| 337 |
+
# 某地 + 行程/之旅
|
| 338 |
+
r'(\w+)之旅', r'(\w+)行程', r'(\w+)旅程', r'(\w+)游', r'(\w+)行', r'(\w+)之行',
|
| 339 |
+
r'(\w+)深度游', r'(\w+)自由行', r'(\w+)跟团游', r'(\w+)自驾游', r'(\w+)蜜月游',
|
| 340 |
+
|
| 341 |
+
# 包含"的"的表达
|
| 342 |
+
r'(\w+)的旅行', r'(\w+)的行程', r'(\w+)的攻略', r'(\w+)的景点', r'(\w+)的美食',
|
| 343 |
+
r'(\w+)的文化', r'(\w+)的历史', r'(\w+)的风景', r'(\w+)的特色', r'(\w+)的魅力',
|
| 344 |
+
|
| 345 |
+
# 特殊交通方式表达
|
| 346 |
+
r'飞(\w+)', r'坐船去(\w+)', r'坐火车去(\w+)', r'自驾去(\w+)', r'徒步去(\w+)',
|
| 347 |
+
r'骑行去(\w+)', r'背包去(\w+)', r'穷游去(\w+)',
|
| 348 |
+
|
| 349 |
+
# 旅行类型 + 地点
|
| 350 |
+
r'自由行(\w+)', r'跟团(\w+)', r'自驾(\w+)', r'蜜月(\w+)', r'毕业(\w+)',
|
| 351 |
+
r'亲子(\w+)', r'家庭(\w+)', r'情侣(\w+)', r'闺蜜(\w+)', r'独自(\w+)',
|
| 352 |
+
|
| 353 |
+
# 度假/休闲表达
|
| 354 |
+
r'度假去(\w+)', r'休闲去(\w+)', r'放松去(\w+)', r'散心去(\w+)', r'疗养去(\w+)',
|
| 355 |
+
|
| 356 |
+
# 其他变体
|
| 357 |
+
r'想要去(\w+)', r'渴望去(\w+)', r'梦想去(\w+)', r'向往(\w+)', r'憧憬(\w+)',
|
| 358 |
+
r'安排去(\w+)', r'规划去(\w+)', r'预定(\w+)', r'订(\w+)的票', r'买(\w+)机票'
|
| 359 |
+
]
|
| 360 |
+
|
| 361 |
+
# 尝试所有模式
|
| 362 |
+
for pattern in destination_patterns:
|
| 363 |
+
matches = re.findall(pattern, text)
|
| 364 |
+
for match in matches:
|
| 365 |
+
city_name = match.strip()
|
| 366 |
+
|
| 367 |
+
# 首先检查别名映射
|
| 368 |
+
if city_name.lower() in self.european_city_aliases:
|
| 369 |
+
city_name = self.european_city_aliases[city_name.lower()]
|
| 370 |
+
elif city_name in self.european_city_aliases:
|
| 371 |
+
city_name = self.european_city_aliases[city_name]
|
| 372 |
+
|
| 373 |
+
# 验证是否为欧洲城市
|
| 374 |
+
if self._is_valid_european_city(city_name):
|
| 375 |
+
result["name"] = city_name
|
| 376 |
+
# 查找对应国家
|
| 377 |
+
if city_name in self.european_cities:
|
| 378 |
+
result["country"] = self.european_cities[city_name]
|
| 379 |
+
break
|
| 380 |
+
if result:
|
| 381 |
+
break
|
| 382 |
+
|
| 383 |
+
# 特殊处理:国家+城市的组合(欧洲专用)
|
| 384 |
+
european_country_city_patterns = [
|
| 385 |
+
r'(\w+)的(\w+)', r'(\w+)(\w+)市', r'(\w+)(\w+)府',
|
| 386 |
+
r'(\w+)(\w+)州', r'(\w+)(\w+)省', r'(\w+)(\w+)岛'
|
| 387 |
]
|
| 388 |
|
| 389 |
+
if not result:
|
| 390 |
+
for pattern in european_country_city_patterns:
|
| 391 |
+
matches = re.findall(pattern, text)
|
| 392 |
+
for country, city in matches:
|
| 393 |
+
# 检查是否是已知的欧洲国家-城市组合
|
| 394 |
+
if city in self.european_cities and self.european_cities[city] == country:
|
| 395 |
+
result["name"] = city
|
| 396 |
+
result["country"] = country
|
| 397 |
+
break
|
| 398 |
+
elif self._is_valid_european_country(country) and self._is_valid_european_city(city):
|
| 399 |
+
result["name"] = city
|
| 400 |
+
result["country"] = country
|
| 401 |
+
break
|
| 402 |
+
if result:
|
| 403 |
break
|
| 404 |
|
| 405 |
+
return result
|
| 406 |
+
|
| 407 |
+
def _extract_duration(self, text: str) -> dict:
|
| 408 |
+
"""提取时长信息 - 完整保留之前的实现"""
|
| 409 |
+
result = {}
|
| 410 |
+
|
| 411 |
+
# 天数提取模式 - 大幅扩展(保持原有完整实现)
|
| 412 |
day_patterns = [
|
| 413 |
+
# 基本数字+天
|
| 414 |
+
r'(\d+)天', r'(\d+)日', r'(\d+)号', r'(\d+)个天', r'(\d+)个日',
|
| 415 |
+
|
| 416 |
+
# 动词+天数
|
| 417 |
+
r'玩(\d+)天', r'住(\d+)天', r'呆(\d+)天', r'待(\d+)天', r'停留(\d+)天',
|
| 418 |
+
r'逗留(\d+)天', r'游(\d+)天', r'旅行(\d+)天', r'度假(\d+)天', r'休假(\d+)天',
|
| 419 |
+
|
| 420 |
+
# 行程相关
|
| 421 |
+
r'(\d+)天行程', r'(\d+)天旅程', r'(\d+)天旅行', r'(\d+)天游', r'(\d+)天之旅',
|
| 422 |
+
r'(\d+)天的行程', r'(\d+)天的旅程', r'(\d+)天的旅行', r'(\d+)天的假期',
|
| 423 |
+
r'行程(\d+)天', r'旅程(\d+)天', r'假期(\d+)天', r'休假(\d+)天',
|
| 424 |
+
|
| 425 |
+
# 时间修饰词
|
| 426 |
+
r'大概(\d+)天', r'约(\d+)天', r'差不多(\d+)天', r'左右(\d+)天', r'上下(\d+)天',
|
| 427 |
+
r'最多(\d+)天', r'最少(\d+)天', r'至少(\d+)天', r'不超过(\d+)天', r'超过(\d+)天',
|
| 428 |
+
r'将近(\d+)天', r'接近(\d+)天', r'快(\d+)天', r'足足(\d+)天', r'整整(\d+)天',
|
| 429 |
+
|
| 430 |
+
# 周相关
|
| 431 |
+
r'(\d+)周', r'(\d+)个周', r'(\d+)星期', r'(\d+)个星期', r'(\d+)礼拜', r'(\d+)个礼拜',
|
| 432 |
+
r'玩(\d+)周', r'住(\d+)周', r'呆(\d+)周', r'待(\d+)周', r'旅行(\d+)周',
|
| 433 |
+
r'(\d+)周的行程', r'(\d+)星期的旅行', r'(\d+)个礼拜的假期',
|
| 434 |
+
|
| 435 |
+
# 月相关
|
| 436 |
+
r'(\d+)月', r'(\d+)个月', r'(\d+)个月份',
|
| 437 |
+
r'玩(\d+)个月', r'住(\d+)个月', r'旅行(\d+)个月', r'度假(\d+)个月',
|
| 438 |
+
r'(\d+)个月的行程', r'(\d+)月的旅行', r'(\d+)个月的假期',
|
| 439 |
+
|
| 440 |
+
# 范围表达
|
| 441 |
+
r'(\d+)-(\d+)天', r'(\d+)到(\d+)天', r'(\d+)至(\d+)天', r'(\d+)~(\d+)天',
|
| 442 |
+
r'(\d+)天到(\d+)天', r'从(\d+)天到(\d+)天', r'介于(\d+)到(\d+)天',
|
| 443 |
+
|
| 444 |
+
# 中文数字
|
| 445 |
+
r'一天', r'二天', r'三天', r'四天', r'五天', r'六天', r'七天', r'八天', r'九天', r'十天',
|
| 446 |
+
r'两天', r'俩天', r'仨天', r'半天', r'一天半', r'两天半', r'三天半',
|
| 447 |
+
r'十一天', r'十二天', r'十三天', r'十四天', r'十五天', r'二十天', r'三十天',
|
| 448 |
+
|
| 449 |
+
# 特殊时长表达
|
| 450 |
+
r'周末', r'长周末', r'小长假', r'长假', r'黄金周', r'假期',
|
| 451 |
+
r'十一', r'国庆', r'春节', r'过年', r'五一', r'劳动节', r'清明', r'端午', r'中秋', r'元旦',
|
| 452 |
+
r'暑假', r'寒假', r'年假', r'蜜月', r'度蜜月',
|
| 453 |
+
r'短途', r'中途', r'长途', r'快闪', r'一日游', r'两日游', r'三日游', r'多日游'
|
| 454 |
]
|
| 455 |
|
| 456 |
+
# 尝试提取时长(完整保留原有逻辑)
|
| 457 |
for pattern in day_patterns:
|
| 458 |
+
matches = re.findall(pattern, text)
|
| 459 |
+
for match in matches:
|
| 460 |
+
days = None
|
| 461 |
+
|
| 462 |
+
if isinstance(match, tuple):
|
| 463 |
+
# 范围表达,取平均值
|
| 464 |
+
try:
|
| 465 |
+
start_days = int(match[0])
|
| 466 |
+
end_days = int(match[1])
|
| 467 |
+
days = (start_days + end_days) / 2
|
| 468 |
+
except:
|
| 469 |
+
days = int(match[0]) if match[0].isdigit() else None
|
| 470 |
+
elif match.isdigit():
|
| 471 |
+
days = int(match)
|
| 472 |
+
|
| 473 |
+
# 处理单位转换
|
| 474 |
+
if '周' in pattern or '星期' in pattern or '礼拜' in pattern:
|
| 475 |
+
days *= 7
|
| 476 |
+
elif '月' in pattern:
|
| 477 |
+
days *= 30
|
| 478 |
+
|
| 479 |
+
# 处理中文数字和特殊表达
|
| 480 |
+
elif match in self.chinese_numbers:
|
| 481 |
+
days = self.chinese_numbers[match]
|
| 482 |
+
|
| 483 |
+
# 验证天数合理性
|
| 484 |
+
if days and 0.5 <= days <= 365:
|
| 485 |
+
result["days"] = int(days) if days >= 1 else days
|
| 486 |
+
|
| 487 |
+
# 添加描��信息
|
| 488 |
+
if days <= 1:
|
| 489 |
+
result["description"] = "当日往返"
|
| 490 |
+
elif days <= 3:
|
| 491 |
+
result["description"] = "短途旅行"
|
| 492 |
+
elif days <= 7:
|
| 493 |
+
result["description"] = "一周内旅行"
|
| 494 |
+
elif days <= 14:
|
| 495 |
+
result["description"] = "中长途旅行"
|
| 496 |
+
elif days <= 30:
|
| 497 |
+
result["description"] = "长途旅行"
|
| 498 |
+
else:
|
| 499 |
+
result["description"] = "超长途旅行"
|
| 500 |
+
|
| 501 |
+
# 保留原始匹配文本作为额外描述
|
| 502 |
+
if not isinstance(match, tuple) and not match.isdigit():
|
| 503 |
+
result["description"] = match
|
| 504 |
+
|
| 505 |
break
|
| 506 |
+
if result:
|
| 507 |
+
break
|
| 508 |
|
| 509 |
+
return result
|
| 510 |
+
|
| 511 |
+
def _extract_budget(self, text: str) -> dict:
|
| 512 |
+
"""提取预算信息 - 针对欧洲旅行优化"""
|
| 513 |
+
result = {}
|
| 514 |
+
text_lower = text.lower()
|
| 515 |
+
|
| 516 |
+
# 欧洲旅行常用货币的金额提取模式
|
| 517 |
+
amount_patterns = [
|
| 518 |
+
# === 欧元表达 - 优先级最高(欧洲旅行主要货币) ===
|
| 519 |
+
r'(\d+)欧元', r'(\d+)欧', r'€(\d+)', r'EUR(\d+)', r'eur(\d+)',
|
| 520 |
+
r'(\d+)euro', r'(\d+)Euro', r'(\d+)EURO',
|
| 521 |
+
r'(\d+\.?\d*)欧元', r'€(\d+\.?\d*)',
|
| 522 |
+
r'预算(\d+)欧', r'花费(\d+)欧', r'大概(\d+)欧', r'约(\d+)欧',
|
| 523 |
+
|
| 524 |
+
# === 人民币表达 ===
|
| 525 |
+
r'(\d+)元', r'(\d+)块', r'(\d+)块钱', r'(\d+)人民币', r'(\d+)rmb', r'(\d+)RMB',
|
| 526 |
+
r'¥(\d+)', r'¥(\d+)', r'CNY(\d+)', r'cny(\d+)',
|
| 527 |
+
|
| 528 |
+
# === 美元表达 ===
|
| 529 |
+
r'(\d+)美元', r'(\d+)美刀', r'(\d+)刀', r'\$(\d+)', r'USD(\d+)', r'usd(\d+)',
|
| 530 |
+
r'(\d+)dollar', r'(\d+)Dollar',
|
| 531 |
+
|
| 532 |
+
# === 英镑表达(英国旅行) ===
|
| 533 |
+
r'(\d+)英镑', r'(\d+)镑', r'£(\d+)', r'GBP(\d+)', r'gbp(\d+)',
|
| 534 |
+
r'(\d+)pound', r'(\d+)Pound',
|
| 535 |
+
|
| 536 |
+
# === 瑞士法郎(瑞士旅行) ===
|
| 537 |
+
r'(\d+)瑞士法郎', r'(\d+)法郎', r'CHF(\d+)', r'chf(\d+)',
|
| 538 |
+
r'(\d+)瑞郎', r'(\d+)swiss franc',
|
| 539 |
+
|
| 540 |
+
# === 预算相关表达 ===
|
| 541 |
+
r'预算(\d+)', r'预算是(\d+)', r'预算大概(\d+)', r'预算约(\d+)',
|
| 542 |
+
r'预算差不多(\d+)', r'预算在(\d+)', r'预算控制在(\d+)',
|
| 543 |
+
r'预算不超过(\d+)', r'预算最多(\d+)', r'预算最少(\d+)',
|
| 544 |
+
|
| 545 |
+
# === 花费相关表达 ===
|
| 546 |
+
r'花(\d+)', r'花费(\d+)', r'花销(\d+)', r'开销(\d+)', r'支出(\d+)',
|
| 547 |
+
r'费用(\d+)', r'成本(\d+)', r'总共(\d+)', r'一共(\d+)', r'总计(\d+)',
|
| 548 |
+
|
| 549 |
+
# === 万元表达 ===
|
| 550 |
+
r'(\d+)万', r'(\d+)万元', r'(\d+)万块', r'(\d+)万人民币',
|
| 551 |
+
r'(\d+)万欧', r'(\d+)万欧元', r'(\d+)万美元', r'(\d+)万英镑',
|
| 552 |
+
r'(\d+\.?\d*)万', r'(\d+\.?\d*)万元',
|
| 553 |
+
|
| 554 |
+
# === 千元表达 ===
|
| 555 |
+
r'(\d+)千', r'(\d+)千元', r'(\d+)千块', r'(\d+)k', r'(\d+)K',
|
| 556 |
+
r'(\d+)千欧', r'(\d+)千美元', r'(\d+)千英镑',
|
| 557 |
+
|
| 558 |
+
# === 范围表达 ===
|
| 559 |
+
r'(\d+)-(\d+)', r'(\d+)到(\d+)', r'(\d+)至(\d+)', r'(\d+)~(\d+)',
|
| 560 |
+
r'(\d+)左右', r'约(\d+)', r'差不多(\d+)', r'大概(\d+)',
|
| 561 |
+
|
| 562 |
+
# === 每人/每天相关 ===
|
| 563 |
+
r'每人(\d+)', r'人均(\d+)', r'单人(\d+)', r'每天(\d+)', r'日均(\d+)',
|
| 564 |
+
|
| 565 |
+
# === 中文数字金额 ===
|
| 566 |
+
r'一万', r'两万', r'三万', r'四万', r'五万', r'六万', r'七万', r'八万', r'九万', r'十万',
|
| 567 |
+
r'一千', r'两千', r'三千', r'四千', r'五千', r'六千', r'七千', r'八千', r'九千'
|
| 568 |
]
|
| 569 |
|
| 570 |
+
# 中文数字金额映射
|
| 571 |
+
chinese_money = {
|
| 572 |
+
'一万': 10000, '两万': 20000, '三万': 30000, '四万': 40000, '五万': 50000,
|
| 573 |
+
'六万': 60000, '七万': 70000, '八万': 80000, '九万': 90000, '十万': 100000,
|
| 574 |
+
'一千': 1000, '两千': 2000, '三千': 3000, '四千': 4000, '五千': 5000,
|
| 575 |
+
'六千': 6000, '七��': 7000, '八千': 8000, '九千': 9000
|
| 576 |
+
}
|
| 577 |
+
|
| 578 |
+
# 尝试提取金额
|
| 579 |
+
for pattern in amount_patterns:
|
| 580 |
+
matches = re.findall(pattern, text)
|
| 581 |
+
for match in matches:
|
| 582 |
+
amount = None
|
| 583 |
+
currency = "RMB" # 默认货币
|
| 584 |
+
|
| 585 |
+
if isinstance(match, tuple):
|
| 586 |
+
# 处理范围或多个捕获组
|
| 587 |
+
if len(match) == 2 and all(m.replace('.','').isdigit() for m in match if m):
|
| 588 |
+
try:
|
| 589 |
+
amount = (float(match[0]) + float(match[1])) / 2
|
| 590 |
+
except:
|
| 591 |
+
amount = float(match[0]) if match[0].replace('.','').isdigit() else float(match[1])
|
| 592 |
+
else:
|
| 593 |
+
for m in match:
|
| 594 |
+
if m and m.replace('.','').isdigit():
|
| 595 |
+
amount = float(m)
|
| 596 |
+
break
|
| 597 |
+
else:
|
| 598 |
+
if match in chinese_money:
|
| 599 |
+
amount = chinese_money[match]
|
| 600 |
+
elif match.replace('.','').isdigit():
|
| 601 |
+
amount = float(match)
|
| 602 |
+
|
| 603 |
+
if amount and amount > 0:
|
| 604 |
+
# 处理单位转换
|
| 605 |
+
if '万' in pattern:
|
| 606 |
+
amount *= 10000
|
| 607 |
+
elif '千' in pattern or 'k' in pattern.lower():
|
| 608 |
+
amount *= 1000
|
| 609 |
+
|
| 610 |
+
result["amount"] = int(amount)
|
| 611 |
+
|
| 612 |
+
# 确定货币类型(针对欧洲旅行优化)
|
| 613 |
+
if any(keyword in pattern for keyword in ['欧元', '欧', '€', 'eur', 'euro']):
|
| 614 |
+
result["currency"] = "EUR"
|
| 615 |
+
elif any(keyword in pattern for keyword in ['英镑', '镑', '£', 'gbp', 'pound']):
|
| 616 |
+
result["currency"] = "GBP"
|
| 617 |
+
elif any(keyword in pattern for keyword in ['瑞士法郎', '法郎', '瑞郎', 'chf', 'swiss franc']):
|
| 618 |
+
result["currency"] = "CHF"
|
| 619 |
+
elif any(keyword in pattern for keyword in ['美元', '美刀', '刀', 'usd', 'dollar']):
|
| 620 |
+
result["currency"] = "USD"
|
| 621 |
+
else:
|
| 622 |
+
result["currency"] = "RMB"
|
| 623 |
+
break
|
| 624 |
+
if result.get("amount"):
|
| 625 |
break
|
| 626 |
|
| 627 |
+
# 预算类型识别 - 针对欧洲旅行优化
|
| 628 |
budget_type_keywords = {
|
| 629 |
+
'economy': [
|
| 630 |
+
# 经济相关
|
| 631 |
+
'经济', '便宜', '省钱', '实惠', '节省', '穷游', '学生', '青年',
|
| 632 |
+
'预算有限', '钱不多', '不贵', '划算', '性价比', '背包客',
|
| 633 |
+
'简单', '基础', '低成本', '节约', '省着花', '紧巴巴',
|
| 634 |
+
# 欧洲特色经济住宿
|
| 635 |
+
'青年旅社', '青旅', 'hostel', '民宿', 'airbnb', '客栈',
|
| 636 |
+
'多人间', '床位', '宿舍', '胶囊', 'capsule',
|
| 637 |
+
# 欧洲经济交通
|
| 638 |
+
'大巴', '长途汽车', 'flixbus', '火车', '二等座', '经济舱',
|
| 639 |
+
'欧洲通票', '青年票', '学生票', '团体票',
|
| 640 |
+
# 经济餐饮
|
| 641 |
+
'自己做饭', '超市', '便利店', '快餐', '街头小吃', '外卖',
|
| 642 |
+
'麦当劳', '汉堡王', 'kebab', 'döner'
|
| 643 |
+
],
|
| 644 |
+
'comfortable': [
|
| 645 |
+
# 舒适相关
|
| 646 |
+
'舒适', '中等', '适中', '一般', '标准', '普通', '正常', '常规',
|
| 647 |
+
'中档', '中级', '合理', '平均', '中间档次', '不高不低',
|
| 648 |
+
# 欧洲中档住宿
|
| 649 |
+
'三星', '四星', '酒店', 'hotel', '标间', '双人间', '大床房',
|
| 650 |
+
'民宿', 'apartment', '公寓', 'b&b', 'pension',
|
| 651 |
+
# 欧洲舒适交通
|
| 652 |
+
'火车', '一等座', '高铁', 'tgv', 'ice', '城际列车',
|
| 653 |
+
'租车', '自驾', '商务舱', '直飞',
|
| 654 |
+
# 中档餐饮
|
| 655 |
+
'餐厅', '当地菜', '特色菜', '中档餐厅', '酒吧', 'bistro'
|
| 656 |
+
],
|
| 657 |
+
'luxury': [
|
| 658 |
+
# 奢华相关
|
| 659 |
+
'豪华', '奢华', '高端', '顶级', '精品', '奢侈', '贵族',
|
| 660 |
+
'贵一点', '不差钱', '任性', '土豪', '有钱', '不在乎钱',
|
| 661 |
+
'高消费', '享受', '奢享', '尊贵', '至尊', 'VIP',
|
| 662 |
+
# 欧洲豪华住宿
|
| 663 |
+
'五星', '六星', '豪华酒店', 'luxury hotel', '度假村', 'resort',
|
| 664 |
+
'别墅', 'villa', '城堡', 'castle', '套房', 'suite', '总统套房',
|
| 665 |
+
'丽思卡���顿', '四季', '文华东方', '半岛', '香格里拉', '希尔顿',
|
| 666 |
+
'ritz carlton', 'four seasons', 'mandarin oriental', 'peninsula',
|
| 667 |
+
# 豪华交通
|
| 668 |
+
'头等舱', '商务舱', '私人飞机', 'private jet', '豪车', '奔驰', '宝马',
|
| 669 |
+
'奥迪', '保时捷', '法拉利', '兰博基尼', 'mercedes', 'bmw', 'audi',
|
| 670 |
+
# 奢华服务
|
| 671 |
+
'私人导游', '管家服务', 'concierge', '司机', '专车', '包车',
|
| 672 |
+
'定制旅行', '私人订制', '一对一服务', 'vip通道',
|
| 673 |
+
# 高端餐饮
|
| 674 |
+
'米其林', 'michelin', '米其林三星', '米其林餐厅', '高档餐厅',
|
| 675 |
+
'法式大餐', '意式料理', '分子料理', '酒庄', 'wine tasting'
|
| 676 |
+
]
|
| 677 |
}
|
| 678 |
|
| 679 |
+
# 识别预算类型
|
| 680 |
for budget_type, keywords in budget_type_keywords.items():
|
| 681 |
+
matched_keywords = [kw for kw in keywords if kw in text_lower]
|
| 682 |
+
if matched_keywords:
|
| 683 |
+
result["type"] = budget_type
|
| 684 |
+
result["description"] = matched_keywords[0]
|
|
|
|
| 685 |
break
|
| 686 |
|
| 687 |
+
# 如果有金额但没有类型,根据金额和货币推断类型(欧洲标准)
|
| 688 |
+
if result.get("amount") and not result.get("type"):
|
| 689 |
+
amount = result["amount"]
|
| 690 |
+
currency = result.get("currency", "RMB")
|
| 691 |
+
|
| 692 |
+
# 根据欧洲旅行成本设置阈值
|
| 693 |
+
if currency == "EUR":
|
| 694 |
+
if amount < 50: # 每天50欧以下
|
| 695 |
+
result["type"] = "economy"
|
| 696 |
+
result["description"] = "经济预算"
|
| 697 |
+
elif amount < 150: # 每天50-150欧
|
| 698 |
+
result["type"] = "comfortable"
|
| 699 |
+
result["description"] = "舒适预算"
|
| 700 |
+
else: # 每天150欧以上
|
| 701 |
+
result["type"] = "luxury"
|
| 702 |
+
result["description"] = "豪华预算"
|
| 703 |
+
elif currency == "GBP":
|
| 704 |
+
if amount < 40: # 每天40英镑以下
|
| 705 |
+
result["type"] = "economy"
|
| 706 |
+
result["description"] = "经济预算"
|
| 707 |
+
elif amount < 120: # 每天40-120英镑
|
| 708 |
+
result["type"] = "comfortable"
|
| 709 |
+
result["description"] = "舒适预算"
|
| 710 |
+
else: # 每天120英镑以上
|
| 711 |
+
result["type"] = "luxury"
|
| 712 |
+
result["description"] = "豪华预算"
|
| 713 |
+
elif currency == "CHF":
|
| 714 |
+
if amount < 60: # 每天60瑞郎以下
|
| 715 |
+
result["type"] = "economy"
|
| 716 |
+
result["description"] = "经济预算"
|
| 717 |
+
elif amount < 180: # 每天60-180瑞郎
|
| 718 |
+
result["type"] = "comfortable"
|
| 719 |
+
result["description"] = "舒适预算"
|
| 720 |
+
else: # 每天180瑞郎以上
|
| 721 |
+
result["type"] = "luxury"
|
| 722 |
+
result["description"] = "豪华预算"
|
| 723 |
+
elif currency == "RMB":
|
| 724 |
+
if amount < 300: # 每天300元以下
|
| 725 |
+
result["type"] = "economy"
|
| 726 |
+
result["description"] = "经济预算"
|
| 727 |
+
elif amount < 800: # 每天300-800元
|
| 728 |
+
result["type"] = "comfortable"
|
| 729 |
+
result["description"] = "舒适预算"
|
| 730 |
+
else: # 每天800元以上
|
| 731 |
+
result["type"] = "luxury"
|
| 732 |
+
result["description"] = "豪华预算"
|
| 733 |
+
elif currency == "USD":
|
| 734 |
+
if amount < 60: # 每天60美元以下
|
| 735 |
+
result["type"] = "economy"
|
| 736 |
+
result["description"] = "经济预算"
|
| 737 |
+
elif amount < 150: # 每天60-150美元
|
| 738 |
+
result["type"] = "comfortable"
|
| 739 |
+
result["description"] = "舒适预算"
|
| 740 |
+
else: # 每天150美元以上
|
| 741 |
+
result["type"] = "luxury"
|
| 742 |
+
result["description"] = "豪华预算"
|
| 743 |
+
|
| 744 |
return result
|
| 745 |
|
| 746 |
+
def _is_valid_european_city(self, name: str) -> bool:
|
| 747 |
+
"""验证是否为有效的欧洲城市名称"""
|
| 748 |
+
if not name or len(name) < 1:
|
| 749 |
+
return False
|
| 750 |
+
|
| 751 |
+
# 排除数字和常见的非地名词汇
|
| 752 |
+
invalid_words = [
|
| 753 |
+
# 数字和时间
|
| 754 |
+
'天', '日', '号', '月', '年', '周', '小时', '分钟', '秒',
|
| 755 |
+
# 金钱相关
|
| 756 |
+
'元', '块', '钱', '万', '千', '百', '预算', '费用', '成本', '价格',
|
| 757 |
+
'美元', '欧元', '英镑', '瑞郎', '法郎',
|
| 758 |
+
# 旅行相关动词
|
| 759 |
+
'花', '费', '旅行', '旅游', '行程', '计划', '想', '去', '到', '的',
|
| 760 |
+
'在', '是', '个', '了', '和', '与', '或', '但', '而', '就', '都',
|
| 761 |
+
# 其他常见词
|
| 762 |
+
'人', '我', '你', '他', '她', '们', '这', '那', '什么', '怎么',
|
| 763 |
+
'好', '很', '非常', '特别', '大', '小', '新', '老'
|
| 764 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 765 |
|
| 766 |
+
if name.isdigit() or name in invalid_words:
|
| 767 |
+
return False
|
| 768 |
|
| 769 |
+
# 检查是否包含数字(地名通常不包含数字)
|
| 770 |
+
if any(char.isdigit() for char in name):
|
| 771 |
+
return False
|
|
|
|
| 772 |
|
| 773 |
+
# 检查是否在欧洲城市列表中
|
| 774 |
+
if name in self.european_cities:
|
| 775 |
+
return True
|
| 776 |
|
| 777 |
+
# 检查是否在别名列表中
|
| 778 |
+
if name in self.european_city_aliases or name.lower() in self.european_city_aliases:
|
| 779 |
+
return True
|
| 780 |
+
|
| 781 |
+
# 城市名称长度检查
|
| 782 |
+
if len(name) > 15:
|
| 783 |
+
return False
|
| 784 |
+
|
| 785 |
+
# 检查是否包含特殊字符
|
| 786 |
+
if any(char in name for char in '!@#$%^&*()+={}[]|\\:";\'<>?,.`~'):
|
| 787 |
+
return False
|
| 788 |
+
|
| 789 |
+
return False # 只接受明确在欧洲城市列表中的城市
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 790 |
|
| 791 |
+
def _is_valid_european_country(self, name: str) -> bool:
|
| 792 |
+
"""验证是否为有效的欧洲国家名称"""
|
| 793 |
+
if not name or len(name) < 2:
|
| 794 |
+
return False
|
| 795 |
+
|
| 796 |
+
# 欧洲国家列表
|
| 797 |
+
european_countries = {
|
| 798 |
+
# 西欧
|
| 799 |
+
'法国', '德国', '英国', '荷兰', '比利时', '卢森堡',
|
| 800 |
+
# 南欧
|
| 801 |
+
'意大利', '西班牙', '葡萄牙', '希腊', '马耳他', '塞浦路斯',
|
| 802 |
+
# 中欧
|
| 803 |
+
'奥地利', '瑞士', '捷克', '斯洛伐克', '匈牙利', '波兰', '斯洛文尼亚',
|
| 804 |
+
# 北欧
|
| 805 |
+
'瑞典', '挪威', '丹麦', '芬兰', '冰岛',
|
| 806 |
+
# 东欧
|
| 807 |
+
'俄罗斯', '乌克兰', '白俄罗斯', '立陶宛', '拉脱维亚', '爱沙尼亚', '摩尔多瓦',
|
| 808 |
+
# 巴尔干半岛
|
| 809 |
+
'克罗地亚', '塞尔维亚', '波黑', '黑山', '北马其顿', '阿尔巴尼亚',
|
| 810 |
+
'保加利亚', '罗马尼亚', '土耳其'
|
| 811 |
+
}
|
| 812 |
+
|
| 813 |
+
return name in european_countries
|
| 814 |
|
| 815 |
+
# 保持向后兼容的验证方法
|
| 816 |
+
def _validate_and_normalize(self, data: dict) -> dict:
|
| 817 |
+
"""验证和规范化数据"""
|
| 818 |
+
return data
|
modules/knowledge_base.py
CHANGED
|
@@ -6,24 +6,283 @@ from utils.logger import log
|
|
| 6 |
class KnowledgeBase:
|
| 7 |
def __init__(self, file_path: Path = Path("./config/general_travelplan.json")):
|
| 8 |
self.knowledge = []
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
def search(self, query: str) -> list:
|
|
|
|
| 18 |
relevant_knowledge = []
|
| 19 |
query_lower = query.lower()
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
return relevant_knowledge
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
class KnowledgeBase:
|
| 7 |
def __init__(self, file_path: Path = Path("./config/general_travelplan.json")):
|
| 8 |
self.knowledge = []
|
| 9 |
+
self.city_index = {} # 城市索引
|
| 10 |
+
self.country_index = {} # 国家索引
|
| 11 |
+
self.region_index = {} # 地区索引
|
| 12 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 13 |
+
self.knowledge = json.load(f).get('clean_knowledge', [])
|
| 14 |
+
log.info(f"✅ 知识库加载完成")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _build_indexes(self):
|
| 18 |
+
"""建立快速检索索引"""
|
| 19 |
+
for idx, item in enumerate(self.knowledge):
|
| 20 |
+
knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
|
| 21 |
+
if not knowledge:
|
| 22 |
+
continue
|
| 23 |
+
|
| 24 |
+
dest_info = knowledge.get('destination_info', {})
|
| 25 |
+
|
| 26 |
+
# 建立城市索引
|
| 27 |
+
primary_destinations = dest_info.get('primary_destinations', [])
|
| 28 |
+
for city in primary_destinations:
|
| 29 |
+
if city not in self.city_index:
|
| 30 |
+
self.city_index[city] = []
|
| 31 |
+
self.city_index[city].append(idx)
|
| 32 |
+
|
| 33 |
+
# 建立国家索引
|
| 34 |
+
countries = dest_info.get('countries', [])
|
| 35 |
+
for country in countries:
|
| 36 |
+
if country not in self.country_index:
|
| 37 |
+
self.country_index[country] = []
|
| 38 |
+
self.country_index[country].append(idx)
|
| 39 |
+
|
| 40 |
+
# 建立地区索引
|
| 41 |
+
region_type = dest_info.get('region_type', '')
|
| 42 |
+
if region_type:
|
| 43 |
+
if region_type not in self.region_index:
|
| 44 |
+
self.region_index[region_type] = []
|
| 45 |
+
self.region_index[region_type].append(idx)
|
| 46 |
|
| 47 |
def search(self, query: str) -> list:
|
| 48 |
+
"""搜索知识库中的相关信息"""
|
| 49 |
relevant_knowledge = []
|
| 50 |
query_lower = query.lower()
|
| 51 |
|
| 52 |
+
log.info(f"🔍 在知识库中搜索: '{query}'")
|
| 53 |
+
|
| 54 |
+
# 1. 直接城市匹配
|
| 55 |
+
if query in self.city_index:
|
| 56 |
+
for idx in self.city_index[query]:
|
| 57 |
+
if self.knowledge[idx] not in relevant_knowledge:
|
| 58 |
+
relevant_knowledge.append(self.knowledge[idx])
|
| 59 |
+
log.info(f"✅ 通过城市直接匹配找到 {len(self.city_index[query])} 条记录")
|
| 60 |
+
|
| 61 |
+
# 2. 国家匹配
|
| 62 |
+
matching_country = self._find_country_for_city(query)
|
| 63 |
+
if matching_country and matching_country in self.country_index:
|
| 64 |
+
for idx in self.country_index[matching_country]:
|
| 65 |
+
if self.knowledge[idx] not in relevant_knowledge:
|
| 66 |
+
relevant_knowledge.append(self.knowledge[idx])
|
| 67 |
+
log.info(f"✅ 通过国家匹配({matching_country})找到额外记录")
|
| 68 |
+
|
| 69 |
+
# 3. 地区匹配
|
| 70 |
+
matching_region = self._find_region_for_city(query)
|
| 71 |
+
if matching_region and matching_region in self.region_index:
|
| 72 |
+
for idx in self.region_index[matching_region]:
|
| 73 |
+
if self.knowledge[idx] not in relevant_knowledge:
|
| 74 |
+
relevant_knowledge.append(self.knowledge[idx])
|
| 75 |
+
log.info(f"✅ 通过地区匹配({matching_region})找到额外记录")
|
| 76 |
+
|
| 77 |
+
# 4. 模糊匹配
|
| 78 |
+
if not relevant_knowledge:
|
| 79 |
+
log.info("🔍 尝试模糊匹配...")
|
| 80 |
+
for item in self.knowledge:
|
| 81 |
+
knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
|
| 82 |
+
dest_info = knowledge.get('destination_info', {})
|
| 83 |
+
|
| 84 |
+
# 检查所有目的地
|
| 85 |
+
primary_destinations = dest_info.get('primary_destinations', [])
|
| 86 |
+
for dest in primary_destinations:
|
| 87 |
+
if query_lower in dest.lower() or dest.lower() in query_lower:
|
| 88 |
+
if item not in relevant_knowledge:
|
| 89 |
+
relevant_knowledge.append(item)
|
| 90 |
+
log.info(f"✅ 模糊匹配找到: {dest}")
|
| 91 |
+
break
|
| 92 |
+
|
| 93 |
+
log.info(f"📊 搜索完成,共找到 {len(relevant_knowledge)} 条相关记录")
|
| 94 |
return relevant_knowledge
|
| 95 |
+
|
| 96 |
+
def _find_country_for_city(self, city_name: str) -> str:
|
| 97 |
+
"""根据城市名查找所属国家"""
|
| 98 |
+
city_country_mapping = {
|
| 99 |
+
# 中欧
|
| 100 |
+
"布拉格": "捷克", "布尔诺": "捷克", "库特纳霍拉": "捷克",
|
| 101 |
+
"维也纳": "奥地利", "萨尔茨堡": "奥地利", "哈尔施塔特": "奥地利", "巴德伊舍": "奥地利",
|
| 102 |
+
"布达佩斯": "匈牙利", "德布勒森": "匈牙利", "圣安德烈": "匈牙利",
|
| 103 |
+
"布拉迪斯拉发": "斯洛伐克",
|
| 104 |
+
|
| 105 |
+
# 西欧
|
| 106 |
+
"巴黎": "法国", "里昂": "法国", "尼斯": "法国", "马赛": "法国",
|
| 107 |
+
"柏林": "德国", "慕尼黑": "德国", "汉堡": "德国", "科隆": "德国", "法兰克福": "德国",
|
| 108 |
+
"阿姆斯特丹": "荷兰", "鹿特丹": "荷兰", "海牙": "荷兰",
|
| 109 |
+
"布鲁塞尔": "比利时", "安特卫普": "比利时", "布吕赫": "比利时",
|
| 110 |
+
"卢森堡市": "卢森堡",
|
| 111 |
+
"苏黎世": "瑞士", "日内瓦": "瑞士", "因特拉肯": "瑞士",
|
| 112 |
+
|
| 113 |
+
# 南欧
|
| 114 |
+
"罗马": "意大利", "米兰": "意大利", "威尼斯": "意大利", "佛罗伦萨": "意大利",
|
| 115 |
+
"马德里": "西班牙", "巴塞罗那": "西班牙", "塞维利亚": "西班牙",
|
| 116 |
+
"里斯本": "葡萄牙", "波尔图": "葡萄牙",
|
| 117 |
+
"雅典": "希腊", "圣托里尼": "希腊", "米科诺斯": "希腊",
|
| 118 |
+
|
| 119 |
+
# 北欧
|
| 120 |
+
"斯德哥尔摩": "瑞典", "哥德堡": "瑞典",
|
| 121 |
+
"奥斯陆": "挪威", "卑尔根": "挪威",
|
| 122 |
+
"哥本哈根": "丹麦", "奥胡斯": "丹麦",
|
| 123 |
+
"赫尔辛基": "芬兰", "坦佩雷": "芬兰",
|
| 124 |
+
"雷克雅未克": "冰岛",
|
| 125 |
+
|
| 126 |
+
# 英国
|
| 127 |
+
"伦敦": "英国", "爱丁堡": "英国", "曼彻斯特": "英国",
|
| 128 |
+
}
|
| 129 |
+
return city_country_mapping.get(city_name, "")
|
| 130 |
+
|
| 131 |
+
def _find_region_for_city(self, city_name: str) -> str:
|
| 132 |
+
"""根据城市名查找所属地区"""
|
| 133 |
+
city_region_mapping = {
|
| 134 |
+
# 中欧
|
| 135 |
+
"布拉格": "中欧", "布尔诺": "中欧", "库特纳霍拉": "中欧",
|
| 136 |
+
"维也纳": "中欧", "萨尔茨堡": "中欧", "哈尔施塔特": "中欧", "巴德伊舍": "中欧",
|
| 137 |
+
"布达佩斯": "中欧", "德布勒森": "中欧", "圣安德烈": "中欧",
|
| 138 |
+
"布拉迪斯拉发": "中欧",
|
| 139 |
+
|
| 140 |
+
# 西欧
|
| 141 |
+
"巴黎": "西欧", "里昂": "西欧", "尼斯": "西欧",
|
| 142 |
+
"柏林": "西欧", "慕尼黑": "西欧", "汉堡": "西欧",
|
| 143 |
+
"阿姆斯特丹": "西欧", "鹿特丹": "西欧",
|
| 144 |
+
"布鲁塞尔": "西欧", "安特卫普": "西欧",
|
| 145 |
+
"苏黎世": "西欧", "日内瓦": "西欧",
|
| 146 |
+
|
| 147 |
+
# 东欧(按你的知识库分类)
|
| 148 |
+
"华沙": "东欧", "克拉科夫": "东欧",
|
| 149 |
+
"莫斯科": "东欧", "圣彼得堡": "东欧",
|
| 150 |
+
|
| 151 |
+
# 南欧
|
| 152 |
+
"罗马": "南欧", "米兰": "南欧", "威尼斯": "南欧",
|
| 153 |
+
"马德里": "南欧", "巴塞罗那": "南欧",
|
| 154 |
+
"里斯本": "南欧", "波尔图": "南欧",
|
| 155 |
+
"雅典": "南欧", "圣托里尼": "南欧",
|
| 156 |
+
|
| 157 |
+
# 北欧
|
| 158 |
+
"斯德哥尔摩": "北欧", "哥德堡": "北欧",
|
| 159 |
+
"奥斯陆": "北欧", "卑尔根": "北欧",
|
| 160 |
+
"哥本哈根": "北欧", "赫尔辛基": "北欧",
|
| 161 |
+
"雷克雅未克": "北欧",
|
| 162 |
+
}
|
| 163 |
+
return city_region_mapping.get(city_name, "")
|
| 164 |
+
|
| 165 |
+
def get_knowledge_by_destination(self, destination: str) -> dict:
|
| 166 |
+
"""根据目的地获取结构化的知识信息"""
|
| 167 |
+
relevant_items = self.search(destination)
|
| 168 |
+
|
| 169 |
+
if not relevant_items:
|
| 170 |
+
log.warning(f"⚠️ 未找到关于 '{destination}' 的知识")
|
| 171 |
+
return {}
|
| 172 |
+
|
| 173 |
+
# 合并所有相关知识
|
| 174 |
+
merged_knowledge = {
|
| 175 |
+
"destination_info": {},
|
| 176 |
+
"budget_analysis": {},
|
| 177 |
+
"detailed_itinerary": [],
|
| 178 |
+
"professional_insights": {}
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
for item in relevant_items:
|
| 182 |
+
knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
|
| 183 |
+
|
| 184 |
+
# 合并目的地信息
|
| 185 |
+
if 'destination_info' in knowledge:
|
| 186 |
+
dest_info = knowledge['destination_info']
|
| 187 |
+
merged_knowledge['destination_info'].update(dest_info)
|
| 188 |
+
|
| 189 |
+
# 使用最详细的预算分析
|
| 190 |
+
if 'budget_analysis' in knowledge:
|
| 191 |
+
if not merged_knowledge['budget_analysis'] or len(knowledge['budget_analysis']) > len(merged_knowledge['budget_analysis']):
|
| 192 |
+
merged_knowledge['budget_analysis'] = knowledge['budget_analysis']
|
| 193 |
+
|
| 194 |
+
# 合并行程建议
|
| 195 |
+
if 'detailed_itinerary' in knowledge:
|
| 196 |
+
merged_knowledge['detailed_itinerary'].extend(knowledge['detailed_itinerary'])
|
| 197 |
+
|
| 198 |
+
# 合并专业洞察
|
| 199 |
+
if 'professional_insights' in knowledge:
|
| 200 |
+
for key, value in knowledge['professional_insights'].items():
|
| 201 |
+
if key not in merged_knowledge['professional_insights']:
|
| 202 |
+
merged_knowledge['professional_insights'][key] = value
|
| 203 |
+
elif isinstance(value, list):
|
| 204 |
+
# 合并列表,去重
|
| 205 |
+
existing = merged_knowledge['professional_insights'][key]
|
| 206 |
+
if isinstance(existing, list):
|
| 207 |
+
merged_knowledge['professional_insights'][key] = list(set(existing + value))
|
| 208 |
+
|
| 209 |
+
# 去重行程建议
|
| 210 |
+
if merged_knowledge['detailed_itinerary']:
|
| 211 |
+
seen_days = set()
|
| 212 |
+
unique_itinerary = []
|
| 213 |
+
for day_plan in merged_knowledge['detailed_itinerary']:
|
| 214 |
+
day_key = (day_plan.get('day_number', 0), day_plan.get('location', ''))
|
| 215 |
+
if day_key not in seen_days:
|
| 216 |
+
seen_days.add(day_key)
|
| 217 |
+
unique_itinerary.append(day_plan)
|
| 218 |
+
merged_knowledge['detailed_itinerary'] = unique_itinerary
|
| 219 |
+
|
| 220 |
+
log.info(f"📚 为 '{destination}' 合并了 {len(relevant_items)} 条知识记录")
|
| 221 |
+
return merged_knowledge
|
| 222 |
+
|
| 223 |
+
def get_similar_destinations(self, destination: str, limit: int = 5) -> list:
|
| 224 |
+
"""获取相似的目的地推荐"""
|
| 225 |
+
similar_destinations = []
|
| 226 |
+
|
| 227 |
+
# 找到目标城市的国家和地区
|
| 228 |
+
target_country = self._find_country_for_city(destination)
|
| 229 |
+
target_region = self._find_region_for_city(destination)
|
| 230 |
+
|
| 231 |
+
# 优先推荐同国家的其他城市
|
| 232 |
+
if target_country and target_country in self.country_index:
|
| 233 |
+
for idx in self.country_index[target_country]:
|
| 234 |
+
knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
|
| 235 |
+
dest_info = knowledge.get('destination_info', {})
|
| 236 |
+
destinations = dest_info.get('primary_destinations', [])
|
| 237 |
+
|
| 238 |
+
for dest in destinations:
|
| 239 |
+
if dest != destination and dest not in similar_destinations:
|
| 240 |
+
similar_destinations.append(dest)
|
| 241 |
+
if len(similar_destinations) >= limit:
|
| 242 |
+
return similar_destinations
|
| 243 |
+
|
| 244 |
+
# 然后推荐同地区的城市
|
| 245 |
+
if target_region and target_region in self.region_index and len(similar_destinations) < limit:
|
| 246 |
+
for idx in self.region_index[target_region]:
|
| 247 |
+
knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
|
| 248 |
+
dest_info = knowledge.get('destination_info', {})
|
| 249 |
+
destinations = dest_info.get('primary_destinations', [])
|
| 250 |
+
|
| 251 |
+
for dest in destinations:
|
| 252 |
+
if dest != destination and dest not in similar_destinations:
|
| 253 |
+
similar_destinations.append(dest)
|
| 254 |
+
if len(similar_destinations) >= limit:
|
| 255 |
+
return similar_destinations
|
| 256 |
+
|
| 257 |
+
return similar_destinations
|
| 258 |
+
|
| 259 |
+
def get_statistics(self) -> dict:
|
| 260 |
+
"""获取知识库统计信息"""
|
| 261 |
+
stats = {
|
| 262 |
+
"total_records": len(self.knowledge),
|
| 263 |
+
"cities_covered": len(self.city_index),
|
| 264 |
+
"countries_covered": len(self.country_index),
|
| 265 |
+
"regions_covered": len(self.region_index),
|
| 266 |
+
"cities_by_region": {},
|
| 267 |
+
"popular_cities": []
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
# 按地区统计城市数量
|
| 271 |
+
for region, indices in self.region_index.items():
|
| 272 |
+
cities_in_region = set()
|
| 273 |
+
for idx in indices:
|
| 274 |
+
knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
|
| 275 |
+
dest_info = knowledge.get('destination_info', {})
|
| 276 |
+
cities_in_region.update(dest_info.get('primary_destinations', []))
|
| 277 |
+
stats["cities_by_region"][region] = len(cities_in_region)
|
| 278 |
+
|
| 279 |
+
# 找出出现频率最高的城市
|
| 280 |
+
city_frequency = {}
|
| 281 |
+
for city, indices in self.city_index.items():
|
| 282 |
+
city_frequency[city] = len(indices)
|
| 283 |
+
|
| 284 |
+
# 按出现频率排序
|
| 285 |
+
sorted_cities = sorted(city_frequency.items(), key=lambda x: x[1], reverse=True)
|
| 286 |
+
stats["popular_cities"] = sorted_cities[:10] # 前10个最热门城市
|
| 287 |
+
|
| 288 |
+
return stats
|
modules/response_generator.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import json
|
|
|
|
| 2 |
from .ai_model import AIModel
|
| 3 |
from .knowledge_base import KnowledgeBase
|
| 4 |
from utils.logger import log
|
|
@@ -8,6 +9,7 @@ class ResponseGenerator:
|
|
| 8 |
self.ai_model = ai_model
|
| 9 |
self.kb = knowledge_base
|
| 10 |
self.personas = self._load_personas()
|
|
|
|
| 11 |
|
| 12 |
def _load_personas(self):
|
| 13 |
personas_path = "./config/personas.json"
|
|
@@ -17,29 +19,76 @@ class ResponseGenerator:
|
|
| 17 |
log.info(f"✅ 成功加载 {len(data.get('personas', {}))} 个persona配置。")
|
| 18 |
return data.get('personas', {})
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
def _get_current_persona_config(self, session_state: dict) -> dict:
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
return self.personas.get(persona_key, {
|
| 23 |
-
"name": "旅行助手",
|
| 24 |
-
"
|
|
|
|
|
|
|
| 25 |
})
|
| 26 |
|
| 27 |
def generate(self, user_message: str, session_state: dict, extracted_info: dict) -> str:
|
|
|
|
| 28 |
try:
|
| 29 |
response_parts = []
|
| 30 |
-
|
|
|
|
|
|
|
| 31 |
if acknowledgement:
|
| 32 |
response_parts.append(acknowledgement)
|
| 33 |
|
| 34 |
-
|
|
|
|
| 35 |
if next_question:
|
| 36 |
if response_parts:
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
else:
|
| 39 |
response_parts.append(next_question)
|
| 40 |
|
|
|
|
| 41 |
if not next_question:
|
| 42 |
-
plan = self.
|
|
|
|
| 43 |
if response_parts:
|
| 44 |
response_parts.append("\n\n" + plan)
|
| 45 |
else:
|
|
@@ -51,128 +100,442 @@ class ResponseGenerator:
|
|
| 51 |
log.error(f"❌ 响应生成失败: {e}", exc_info=True)
|
| 52 |
return "抱歉,我在处理您的请求时遇到了问题,请稍后再试。"
|
| 53 |
|
| 54 |
-
def
|
| 55 |
-
"""
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
#
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
return
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
else:
|
| 126 |
-
log.warning("⚠️
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
-
def
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
| 133 |
try:
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
"
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
}
|
| 144 |
-
return
|
| 145 |
-
except KeyError as e:
|
| 146 |
-
log.warning(f"Persona模板格式化失败,缺少键: {e}。将使用通用模板。")
|
| 147 |
-
return self._build_generic_prompt(session_state)
|
| 148 |
-
|
| 149 |
-
def _safe_get_session_value(self, session, key1, key2, default):
|
| 150 |
-
"""安全地从嵌套的session字典中取值"""
|
| 151 |
-
level1 = session.get(key1)
|
| 152 |
-
if isinstance(level1, dict):
|
| 153 |
-
return level1.get(key2, default)
|
| 154 |
-
return default
|
| 155 |
-
|
| 156 |
-
def _build_generic_prompt(self, session_state: dict) -> str:
|
| 157 |
-
location = self._safe_get_session_value(session_state, "destination", "name", "目的地")
|
| 158 |
-
days = self._safe_get_session_value(session_state, "duration", "days", "几")
|
| 159 |
-
budget_info = self._format_budget_info(session_state.get("budget"))
|
| 160 |
-
return f"你是一个专业的旅游助手。请为用户生成一个详细的旅行计划。\n【基本信息】\n- 目的地:{location}\n- 旅行天数:{days}天\n- 预算:{budget_info}\n【要求】\n- 提供具体的景点推荐和路线安排\n- 包含交通、住宿、餐饮建议\n- 确保所有推荐都在预算范围内\n- 提供实用的旅行贴士\n\n请生成一份实用、详细的旅行计划。"
|
| 161 |
|
| 162 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
persona_config = self._get_current_persona_config(session_state)
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
+
import random
|
| 3 |
from .ai_model import AIModel
|
| 4 |
from .knowledge_base import KnowledgeBase
|
| 5 |
from utils.logger import log
|
|
|
|
| 9 |
self.ai_model = ai_model
|
| 10 |
self.kb = knowledge_base
|
| 11 |
self.personas = self._load_personas()
|
| 12 |
+
self._init_response_templates()
|
| 13 |
|
| 14 |
def _load_personas(self):
|
| 15 |
personas_path = "./config/personas.json"
|
|
|
|
| 19 |
log.info(f"✅ 成功加载 {len(data.get('personas', {}))} 个persona配置。")
|
| 20 |
return data.get('personas', {})
|
| 21 |
|
| 22 |
+
def _init_response_templates(self):
|
| 23 |
+
"""初始化各种动态回复模板"""
|
| 24 |
+
|
| 25 |
+
# 欧洲城市特色描述 (保留原有)
|
| 26 |
+
self.city_descriptions = {
|
| 27 |
+
"巴黎": ["浪漫之都", "艺术之城", "时尚之都", "光影流转的塞纳河畔", "充满香槟气息的花都"],
|
| 28 |
+
"罗马": ["永恒之城", "历史的活化石", "每块石头都有故事", "古典与现代交融的奇迹", "凯撒大帝走过的土地"],
|
| 29 |
+
"伦敦": ["绅士的故乡", "雾都传奇", "文艺复兴的摇篮", "泰晤士河的守护者", "莎士比亚笔下的世界"],
|
| 30 |
+
"维也纳": ["音乐之都", "华尔兹的发源地", "莫扎特的灵感之地", "咖啡文化的天堂", "皇室优雅的化身"],
|
| 31 |
+
"布拉格": ["千塔之城", "中世纪的童话", "波西米亚的浪漫", "查理桥上的传奇", "啤酒花香弥漫的古城"],
|
| 32 |
+
"布达佩斯": ["多瑙河明珠", "东欧巴黎", "温泉之都", "建筑艺术的博物馆", "匈牙利王冠上的明珠"],
|
| 33 |
+
"萨尔茨堡": ["音乐神童的故乡", "《音乐之声》的拍摄地", "阿尔卑斯山下的明珠", "莫扎特的诞生地", "巴洛克建筑的典范"],
|
| 34 |
+
"哈尔施塔特": ["世界最美小镇", "湖光山色的仙境", "阿尔卑斯山的秘境", "明信片上的童话", "奥地利的瑰宝"],
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
# 保留原有的问候语和确认模板 (简化以节省空间)
|
| 38 |
+
self.greetings = {
|
| 39 |
+
"social": [
|
| 40 |
+
"哈喽!准备开启一场说走就走的欧洲之旅吗?✨",
|
| 41 |
+
"嗨呀!听说有人要去欧洲拍美照啦?📸",
|
| 42 |
+
],
|
| 43 |
+
"experiential": [
|
| 44 |
+
"你好,旅行者。欧洲的古老土地正在召唤着你...",
|
| 45 |
+
"感知到了一颗渴望探索的心。欧洲有太多故事等你去发现。",
|
| 46 |
+
],
|
| 47 |
+
"planner": [
|
| 48 |
+
"您好!让我来帮您规划一次完美的欧洲之旅。",
|
| 49 |
+
"欧洲旅行规划专家上线!准备为您定制专属行程。",
|
| 50 |
+
]
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
def _get_current_persona_config(self, session_state: dict) -> dict:
|
| 54 |
+
"""获取当前persona配置"""
|
| 55 |
+
persona_info = session_state.get("persona", {})
|
| 56 |
+
if isinstance(persona_info, dict):
|
| 57 |
+
persona_key = persona_info.get("key")
|
| 58 |
+
else:
|
| 59 |
+
persona_key = None
|
| 60 |
+
|
| 61 |
return self.personas.get(persona_key, {
|
| 62 |
+
"name": "旅行助手",
|
| 63 |
+
"style": "中立",
|
| 64 |
+
"tone": ["专业", "友好"],
|
| 65 |
+
"prompt_template": self._build_generic_prompt(session_state)
|
| 66 |
})
|
| 67 |
|
| 68 |
def generate(self, user_message: str, session_state: dict, extracted_info: dict) -> str:
|
| 69 |
+
"""生成融合知识库的智能回复"""
|
| 70 |
try:
|
| 71 |
response_parts = []
|
| 72 |
+
|
| 73 |
+
# 1. 生成确认信息(更生动)
|
| 74 |
+
acknowledgement = self._generate_vivid_acknowledgement(extracted_info, session_state)
|
| 75 |
if acknowledgement:
|
| 76 |
response_parts.append(acknowledgement)
|
| 77 |
|
| 78 |
+
# 2. 检查是否需要询问下一个信息
|
| 79 |
+
next_question = self._get_dynamic_next_question(session_state)
|
| 80 |
if next_question:
|
| 81 |
if response_parts:
|
| 82 |
+
connectors = ["那么,", "接下来,", "好的,", ""]
|
| 83 |
+
connector = random.choice(connectors)
|
| 84 |
+
response_parts.append(connector + next_question)
|
| 85 |
else:
|
| 86 |
response_parts.append(next_question)
|
| 87 |
|
| 88 |
+
# 3. 如果所有信息收集完毕,生成知识库增强的旅行计划
|
| 89 |
if not next_question:
|
| 90 |
+
plan = self._generate_knowledge_enhanced_plan(user_message, session_state)
|
| 91 |
+
|
| 92 |
if response_parts:
|
| 93 |
response_parts.append("\n\n" + plan)
|
| 94 |
else:
|
|
|
|
| 100 |
log.error(f"❌ 响应生成失败: {e}", exc_info=True)
|
| 101 |
return "抱歉,我在处理您的请求时遇到了问题,请稍后再试。"
|
| 102 |
|
| 103 |
+
def _generate_knowledge_enhanced_plan(self, user_message: str, session_state: dict) -> str:
|
| 104 |
+
"""生成融合知识库信息的旅行计划"""
|
| 105 |
+
|
| 106 |
+
# 1. 获取目的地信息
|
| 107 |
+
destination_name = self._get_destination_name(session_state)
|
| 108 |
+
days = int(self._get_duration_days(session_state))
|
| 109 |
+
budget_info = self._format_budget_info(session_state.get("budget"))
|
| 110 |
+
|
| 111 |
+
log.info(f"🔍 开始搜索知识库中关于 '{destination_name}' 的信息...")
|
| 112 |
+
|
| 113 |
+
# 2. 搜索知识库中的相关信息
|
| 114 |
+
relevant_knowledge = self._search_destination_knowledge(destination_name)
|
| 115 |
+
|
| 116 |
+
# 3. 如果有AI模型,生成增强版计划
|
| 117 |
+
if self.ai_model and self.ai_model.is_available():
|
| 118 |
+
return self._generate_ai_enhanced_plan(session_state, relevant_knowledge)
|
| 119 |
+
else:
|
| 120 |
+
# 4. 否则生成基于知识库的详细备用计划
|
| 121 |
+
return self._generate_knowledge_based_fallback_plan(session_state, relevant_knowledge)
|
| 122 |
+
|
| 123 |
+
def _search_destination_knowledge(self, destination_name: str) -> dict:
|
| 124 |
+
"""搜索知识库中与目的地相关的信息"""
|
| 125 |
+
|
| 126 |
+
if not self.kb or not hasattr(self.kb, 'knowledge') or not self.kb.knowledge:
|
| 127 |
+
log.warning("⚠️ 知识库为空或不可用")
|
| 128 |
+
return {}
|
| 129 |
+
|
| 130 |
+
relevant_info = {
|
| 131 |
+
"budget_analysis": {},
|
| 132 |
+
"itinerary_suggestions": [],
|
| 133 |
+
"professional_insights": {},
|
| 134 |
+
"destination_specific": {}
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
log.info(f"📚 在 {len(self.kb.knowledge)} 条知识中搜索关于 '{destination_name}' 的信息...")
|
| 138 |
+
|
| 139 |
+
# 遍历知识库
|
| 140 |
+
for item in self.kb.knowledge:
|
| 141 |
+
knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
|
| 142 |
+
if not knowledge:
|
| 143 |
+
continue
|
| 144 |
+
|
| 145 |
+
# 检查是否与目标目的地相关
|
| 146 |
+
dest_info = knowledge.get('destination_info', {})
|
| 147 |
+
primary_destinations = dest_info.get('primary_destinations', [])
|
| 148 |
+
countries = dest_info.get('countries', [])
|
| 149 |
+
|
| 150 |
+
# 判断相关性
|
| 151 |
+
is_relevant = False
|
| 152 |
+
match_reason = ""
|
| 153 |
+
|
| 154 |
+
# 直接匹配城市名
|
| 155 |
+
if destination_name in primary_destinations:
|
| 156 |
+
is_relevant = True
|
| 157 |
+
match_reason = f"直接匹配城市: {destination_name}"
|
| 158 |
+
|
| 159 |
+
# 通过国家匹配
|
| 160 |
+
if not is_relevant:
|
| 161 |
+
dest_country = self._get_destination_country(destination_name)
|
| 162 |
+
if dest_country and dest_country in countries:
|
| 163 |
+
is_relevant = True
|
| 164 |
+
match_reason = f"通过国家匹配: {dest_country}"
|
| 165 |
+
|
| 166 |
+
# 地区匹配 (如果目的地在同一地区)
|
| 167 |
+
if not is_relevant:
|
| 168 |
+
region_destinations = self._get_same_region_cities(destination_name)
|
| 169 |
+
if any(city in primary_destinations for city in region_destinations):
|
| 170 |
+
is_relevant = True
|
| 171 |
+
match_reason = f"同地区匹配: {region_destinations}"
|
| 172 |
+
|
| 173 |
+
if is_relevant:
|
| 174 |
+
log.info(f"✅ 找到相关知识: {match_reason}")
|
| 175 |
+
|
| 176 |
+
# 提取预算分析
|
| 177 |
+
if 'budget_analysis' in knowledge:
|
| 178 |
+
relevant_info['budget_analysis'] = knowledge['budget_analysis']
|
| 179 |
+
|
| 180 |
+
# 提取行程建议
|
| 181 |
+
if 'detailed_itinerary' in knowledge:
|
| 182 |
+
relevant_info['itinerary_suggestions'].extend(knowledge['detailed_itinerary'])
|
| 183 |
+
|
| 184 |
+
# 提取专业洞察
|
| 185 |
+
if 'professional_insights' in knowledge:
|
| 186 |
+
relevant_info['professional_insights'].update(knowledge['professional_insights'])
|
| 187 |
+
|
| 188 |
+
# 提取目的地特定信息
|
| 189 |
+
relevant_info['destination_specific'] = dest_info
|
| 190 |
+
|
| 191 |
+
if relevant_info['budget_analysis'] or relevant_info['itinerary_suggestions']:
|
| 192 |
+
log.info(f"📊 成功提取知识库信息: 预算分析={bool(relevant_info['budget_analysis'])}, 行程建议={len(relevant_info['itinerary_suggestions'])}条")
|
| 193 |
else:
|
| 194 |
+
log.warning(f"⚠️ 未找到关于 '{destination_name}' 的相关知识")
|
| 195 |
+
|
| 196 |
+
return relevant_info
|
| 197 |
+
|
| 198 |
+
def _get_destination_country(self, city_name: str) -> str:
|
| 199 |
+
"""获取城市所属国家"""
|
| 200 |
+
city_country_mapping = {
|
| 201 |
+
"布拉格": "捷克", "维也纳": "奥地利", "萨尔茨堡": "奥地利",
|
| 202 |
+
"布达佩斯": "匈牙利", "布拉迪斯拉发": "斯洛伐克",
|
| 203 |
+
"哈尔施塔特": "奥地利", "巴德伊舍": "奥地利",
|
| 204 |
+
"库特纳霍拉": "捷克", "布尔诺": "捷克",
|
| 205 |
+
"巴黎": "法国", "里昂": "法国", "尼斯": "法国",
|
| 206 |
+
"柏林": "德国", "慕尼黑": "德国", "汉堡": "德国",
|
| 207 |
+
"阿姆斯特丹": "荷兰", "鹿特丹": "荷兰",
|
| 208 |
+
"布鲁塞尔": "比利时", "安特卫普": "比利时",
|
| 209 |
+
"苏黎世": "瑞士", "日内瓦": "瑞士",
|
| 210 |
+
}
|
| 211 |
+
return city_country_mapping.get(city_name, "")
|
| 212 |
+
|
| 213 |
+
def _get_same_region_cities(self, city_name: str) -> list:
|
| 214 |
+
"""获取同地区的其他城市"""
|
| 215 |
+
region_mapping = {
|
| 216 |
+
# 中欧城市
|
| 217 |
+
"布拉格": ["维也纳", "萨尔茨堡", "布达佩斯", "布拉迪斯拉发", "哈尔施塔特"],
|
| 218 |
+
"维也纳": ["布拉格", "萨尔茨堡", "布达佩斯", "布拉迪斯拉发", "哈尔施塔特"],
|
| 219 |
+
"萨尔茨堡": ["维也纳", "布拉格", "哈尔施塔特", "慕尼黑"],
|
| 220 |
+
"布达佩斯": ["布拉格", "维也纳", "布拉迪斯拉发"],
|
| 221 |
+
"哈尔施塔特": ["萨尔茨堡", "维也纳", "巴德伊舍"],
|
| 222 |
+
|
| 223 |
+
# 西欧城市
|
| 224 |
+
"巴黎": ["布鲁塞尔", "阿姆斯特丹", "科隆", "斯特拉斯堡"],
|
| 225 |
+
"阿姆斯特丹": ["布鲁塞尔", "科隆", "巴黎"],
|
| 226 |
+
"布鲁塞尔": ["阿姆斯特丹", "巴黎", "科隆"],
|
| 227 |
+
|
| 228 |
+
# 德语区
|
| 229 |
+
"柏林": ["慕尼黑", "科隆", "汉堡", "维也纳", "苏黎世"],
|
| 230 |
+
"慕尼黑": ["柏林", "萨尔茨堡", "苏黎世", "维也纳"],
|
| 231 |
+
"苏黎世": ["慕尼黑", "维也纳", "萨尔茨堡"],
|
| 232 |
+
}
|
| 233 |
+
return region_mapping.get(city_name, [])
|
| 234 |
|
| 235 |
+
def _generate_ai_enhanced_plan(self, session_state: dict, knowledge: dict) -> str:
|
| 236 |
+
"""使用AI模型生成融合知识库的计划"""
|
| 237 |
+
|
| 238 |
+
# 构建包含知识库信息的enhanced prompt
|
| 239 |
+
enhanced_prompt = self._build_knowledge_enhanced_prompt(session_state, knowledge)
|
| 240 |
+
|
| 241 |
try:
|
| 242 |
+
log.info("🤖 使用AI模型生成知识库增强计划...")
|
| 243 |
+
response = self.ai_model.run_inference(
|
| 244 |
+
input_type="text",
|
| 245 |
+
formatted_input=None,
|
| 246 |
+
prompt=enhanced_prompt,
|
| 247 |
+
temperature=0.7
|
| 248 |
+
)
|
| 249 |
+
return response
|
| 250 |
+
except Exception as e:
|
| 251 |
+
log.error(f"❌ AI增强计划生成失败: {e}")
|
| 252 |
+
return self._generate_knowledge_based_fallback_plan(session_state, knowledge)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
+
def _build_knowledge_enhanced_prompt(self, session_state: dict, knowledge: dict) -> str:
|
| 255 |
+
"""构建融合知识库信息的增强prompt"""
|
| 256 |
+
|
| 257 |
+
destination_name = self._get_destination_name(session_state)
|
| 258 |
+
days = self._get_duration_days(session_state)
|
| 259 |
+
budget_desc = self._format_budget_info(session_state.get("budget"))
|
| 260 |
persona_config = self._get_current_persona_config(session_state)
|
| 261 |
+
|
| 262 |
+
# 基础prompt
|
| 263 |
+
prompt = f"""你是一位专业的欧洲旅行顾问,请基于以下知识库信息为用户设计{destination_name}的详细旅行计划。
|
| 264 |
+
|
| 265 |
+
🎯 【用户需求】
|
| 266 |
+
📍 目的地: {destination_name}
|
| 267 |
+
⏰ 旅行天数: {days}天
|
| 268 |
+
💰 预算: {budget_desc}
|
| 269 |
+
🎭 旅行风格: {persona_config.get('name', '标准旅行者')}
|
| 270 |
+
|
| 271 |
+
📚 【知识库参考信息】"""
|
| 272 |
+
|
| 273 |
+
# 添加预算分析信息
|
| 274 |
+
if knowledge.get('budget_analysis'):
|
| 275 |
+
budget_analysis = knowledge['budget_analysis']
|
| 276 |
+
prompt += f"""
|
| 277 |
+
💰 【预算参考】
|
| 278 |
+
• 总预算范围: {budget_analysis.get('total_budget_range', 'N/A')}
|
| 279 |
+
• 日均开支: {budget_analysis.get('daily_average', 'N/A')}"""
|
| 280 |
+
|
| 281 |
+
breakdown = budget_analysis.get('budget_breakdown', {})
|
| 282 |
+
if breakdown:
|
| 283 |
+
prompt += "\n• 预算分配:"
|
| 284 |
+
for category, info in breakdown.items():
|
| 285 |
+
if isinstance(info, dict):
|
| 286 |
+
percentage = info.get('percentage', '')
|
| 287 |
+
daily_range = info.get('daily_range', '')
|
| 288 |
+
if percentage and daily_range:
|
| 289 |
+
category_name = {'accommodation': '住宿', 'transportation': '交通',
|
| 290 |
+
'food': '餐饮', 'attractions': '景点'}.get(category, category)
|
| 291 |
+
prompt += f"\n - {category_name}: {percentage}, {daily_range}"
|
| 292 |
+
|
| 293 |
+
# 添加行程参考信息
|
| 294 |
+
if knowledge.get('itinerary_suggestions'):
|
| 295 |
+
prompt += f"""
|
| 296 |
+
🗓️ 【行程参考】"""
|
| 297 |
+
for day_plan in knowledge['itinerary_suggestions'][:3]: # 只取前3天作为参考
|
| 298 |
+
day_num = day_plan.get('day_number', 'N/A')
|
| 299 |
+
location = day_plan.get('location', 'N/A')
|
| 300 |
+
theme = day_plan.get('theme', 'N/A')
|
| 301 |
+
prompt += f"\n• Day {day_num} ({location}): {theme}"
|
| 302 |
+
|
| 303 |
+
# 添加具体活动
|
| 304 |
+
morning_activities = day_plan.get('morning_activities', [])
|
| 305 |
+
for activity in morning_activities[:2]: # 只取前2个活动
|
| 306 |
+
name = activity.get('activity_name', '')
|
| 307 |
+
duration = activity.get('duration', '')
|
| 308 |
+
tips = activity.get('professional_tips', '')
|
| 309 |
+
if name:
|
| 310 |
+
prompt += f"\n - {name} ({duration}) - {tips}"
|
| 311 |
+
|
| 312 |
+
# 添加专业洞察
|
| 313 |
+
if knowledge.get('professional_insights'):
|
| 314 |
+
insights = knowledge['professional_insights']
|
| 315 |
+
prompt += f"""
|
| 316 |
+
💡 【专业建议】"""
|
| 317 |
+
|
| 318 |
+
if insights.get('seasonal_considerations'):
|
| 319 |
+
seasonal = insights['seasonal_considerations']
|
| 320 |
+
best_months = seasonal.get('best_months', [])
|
| 321 |
+
if best_months:
|
| 322 |
+
prompt += f"\n• 最佳旅行时间: {', '.join(best_months)}"
|
| 323 |
+
|
| 324 |
+
if insights.get('common_mistakes'):
|
| 325 |
+
mistakes = insights['common_mistakes'][:3] # 只取前3个
|
| 326 |
+
prompt += f"\n• 常见误区: {', '.join(mistakes)}"
|
| 327 |
+
|
| 328 |
+
if insights.get('insider_secrets'):
|
| 329 |
+
secrets = insights['insider_secrets'][:3] # 只取前3个
|
| 330 |
+
prompt += f"\n• 内行贴士: {', '.join(secrets)}"
|
| 331 |
+
|
| 332 |
+
# 结尾指令
|
| 333 |
+
prompt += f"""
|
| 334 |
+
|
| 335 |
+
🌟 【生成要求】
|
| 336 |
+
请基于以上知识库信息,结合用户的具体需求,生成一份详细、实用、个性化的{destination_name}{days}天旅行计划。计划应该包括:
|
| 337 |
+
• 每日详细行程安排
|
| 338 |
+
• 具体的景点推荐和游览建议
|
| 339 |
+
• 住宿和餐饮推荐
|
| 340 |
+
• 交通方式和路线优化
|
| 341 |
+
• 预算分解和省钱贴士
|
| 342 |
+
• 实用的旅行提醒
|
| 343 |
+
|
| 344 |
+
请用生动有趣的语言撰写,让用户感受到{destination_name}的魅力!"""
|
| 345 |
+
|
| 346 |
+
return prompt
|
| 347 |
+
|
| 348 |
+
def _generate_knowledge_based_fallback_plan(self, session_state: dict, knowledge: dict) -> str:
|
| 349 |
+
"""基于知识库生成详细的备用计划"""
|
| 350 |
+
|
| 351 |
+
destination_name = self._get_destination_name(session_state)
|
| 352 |
+
days = int(self._get_duration_days(session_state))
|
| 353 |
+
budget_desc = self._format_budget_info(session_state.get("budget"))
|
| 354 |
+
persona_config = self._get_current_persona_config(session_state)
|
| 355 |
+
persona_key = persona_config.get('key', 'planner')
|
| 356 |
+
|
| 357 |
+
# 获取城市特色描述
|
| 358 |
+
city_desc = random.choice(self.city_descriptions.get(destination_name, ["迷人的城市"]))
|
| 359 |
+
|
| 360 |
+
# 开场
|
| 361 |
+
if persona_key == 'social':
|
| 362 |
+
plan = f"🎉 {destination_name}{days}天深度攻略(知识库加持版)!\n\n"
|
| 363 |
+
elif persona_key == 'experiential':
|
| 364 |
+
plan = f"🎭 {destination_name}{days}日文化探索之旅\n\n"
|
| 365 |
+
else:
|
| 366 |
+
plan = f"📋 {destination_name}{days}天专业规划方案\n\n"
|
| 367 |
+
|
| 368 |
+
plan += f"🌟 城市印象:{city_desc}\n"
|
| 369 |
+
plan += f"💰 预算范围:{budget_desc}\n\n"
|
| 370 |
+
|
| 371 |
+
# 如果有知识库中的预算分析
|
| 372 |
+
if knowledge.get('budget_analysis'):
|
| 373 |
+
budget_analysis = knowledge['budget_analysis']
|
| 374 |
+
plan += "💰 【预算详解】(基于真实旅行经验)\n"
|
| 375 |
+
|
| 376 |
+
total_budget = budget_analysis.get('total_budget_range', '')
|
| 377 |
+
daily_avg = budget_analysis.get('daily_average', '')
|
| 378 |
+
if total_budget:
|
| 379 |
+
plan += f"• 参考总预算:{total_budget}\n"
|
| 380 |
+
if daily_avg:
|
| 381 |
+
plan += f"• 日均开支:{daily_avg}\n"
|
| 382 |
+
|
| 383 |
+
breakdown = budget_analysis.get('budget_breakdown', {})
|
| 384 |
+
if breakdown:
|
| 385 |
+
plan += "• 开支分配:\n"
|
| 386 |
+
category_names = {
|
| 387 |
+
'accommodation': '🏨 住宿', 'transportation': '🚇 交通',
|
| 388 |
+
'food': '🍽️ 餐饮', 'attractions': '🎯 景点'
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
for category, info in breakdown.items():
|
| 392 |
+
if isinstance(info, dict):
|
| 393 |
+
name = category_names.get(category, category)
|
| 394 |
+
percentage = info.get('percentage', '')
|
| 395 |
+
daily_range = info.get('daily_range', '')
|
| 396 |
+
|
| 397 |
+
if percentage and daily_range:
|
| 398 |
+
plan += f" - {name}:{percentage},{daily_range}\n"
|
| 399 |
+
|
| 400 |
+
# 添加具体建议
|
| 401 |
+
if category == 'accommodation' and info.get('recommendations'):
|
| 402 |
+
recs = ', '.join(info['recommendations'])
|
| 403 |
+
plan += f" 推荐:{recs}\n"
|
| 404 |
+
|
| 405 |
+
elif category == 'transportation' and info.get('money_saving_tips'):
|
| 406 |
+
tips = ', '.join(info['money_saving_tips'])
|
| 407 |
+
plan += f" 省钱技巧:{tips}\n"
|
| 408 |
+
|
| 409 |
+
plan += "\n"
|
| 410 |
+
|
| 411 |
+
# 详细行程规划(基于知识库)
|
| 412 |
+
plan += "🗓️ 【详细行程】(来自实地经验)\n"
|
| 413 |
+
|
| 414 |
+
if knowledge.get('itinerary_suggestions'):
|
| 415 |
+
# 使用知识库中的行程建议
|
| 416 |
+
itinerary = knowledge['itinerary_suggestions']
|
| 417 |
+
|
| 418 |
+
for i, day_plan in enumerate(itinerary[:days]): # 限制在用户要求的天数内
|
| 419 |
+
day_num = day_plan.get('day_number', i+1)
|
| 420 |
+
location = day_plan.get('location', destination_name)
|
| 421 |
+
theme = day_plan.get('theme', '城市探索')
|
| 422 |
+
|
| 423 |
+
plan += f"\n📅 Day {day_num} - {location}({theme})\n"
|
| 424 |
+
|
| 425 |
+
# 上午活动
|
| 426 |
+
morning_activities = day_plan.get('morning_activities', [])
|
| 427 |
+
if morning_activities:
|
| 428 |
+
plan += "🌅 上午:\n"
|
| 429 |
+
for activity in morning_activities:
|
| 430 |
+
name = activity.get('activity_name', '')
|
| 431 |
+
duration = activity.get('duration', '')
|
| 432 |
+
cost = activity.get('cost', '')
|
| 433 |
+
tips = activity.get('professional_tips', '')
|
| 434 |
+
|
| 435 |
+
plan += f" • {name}"
|
| 436 |
+
if duration:
|
| 437 |
+
plan += f" ({duration})"
|
| 438 |
+
if cost and cost != "免费":
|
| 439 |
+
plan += f" - {cost}"
|
| 440 |
+
plan += "\n"
|
| 441 |
+
|
| 442 |
+
if tips:
|
| 443 |
+
plan += f" 💡 专业提醒:{tips}\n"
|
| 444 |
+
|
| 445 |
+
# 下午活动
|
| 446 |
+
afternoon_activities = day_plan.get('afternoon_activities', [])
|
| 447 |
+
if afternoon_activities:
|
| 448 |
+
plan += "🌞 下午:\n"
|
| 449 |
+
for activity in afternoon_activities:
|
| 450 |
+
name = activity.get('activity_name', '')
|
| 451 |
+
duration = activity.get('duration', '')
|
| 452 |
+
cost = activity.get('cost', '')
|
| 453 |
+
|
| 454 |
+
plan += f" • {name}"
|
| 455 |
+
if duration:
|
| 456 |
+
plan += f" ({duration})"
|
| 457 |
+
if cost:
|
| 458 |
+
plan += f" - {cost}"
|
| 459 |
+
plan += "\n"
|
| 460 |
+
|
| 461 |
+
# 餐饮建议
|
| 462 |
+
dining = day_plan.get('dining', {})
|
| 463 |
+
if dining:
|
| 464 |
+
plan += "🍽️ 餐饮推荐:\n"
|
| 465 |
+
|
| 466 |
+
for meal_type, meal_info in dining.items():
|
| 467 |
+
if isinstance(meal_info, dict):
|
| 468 |
+
meal_names = {'breakfast': '早餐', 'lunch': '午餐', 'dinner': '晚餐'}
|
| 469 |
+
meal_name = meal_names.get(meal_type, meal_type)
|
| 470 |
+
|
| 471 |
+
recommendation = meal_info.get('recommendation', '')
|
| 472 |
+
cost_range = meal_info.get('cost_range', '')
|
| 473 |
+
|
| 474 |
+
if recommendation:
|
| 475 |
+
plan += f" • {meal_name}:{recommendation}"
|
| 476 |
+
if cost_range:
|
| 477 |
+
plan += f" ({cost_range})"
|
| 478 |
+
plan += "\n"
|
| 479 |
+
|
| 480 |
+
# 住宿建议
|
| 481 |
+
accommodation = day_plan.get('accommodation', {})
|
| 482 |
+
if accommodation and day_num == 1: # 只在第一天显示住宿建议
|
| 483 |
+
plan += "🏨 住宿推荐:\n"
|
| 484 |
+
|
| 485 |
+
area = accommodation.get('recommended_area', '')
|
| 486 |
+
safety = accommodation.get('safety_level', '')
|
| 487 |
+
|
| 488 |
+
if area:
|
| 489 |
+
plan += f" • 推荐区域:{area}"
|
| 490 |
+
if safety:
|
| 491 |
+
plan += f"(安全等级:{safety})"
|
| 492 |
+
plan += "\n"
|
| 493 |
+
|
| 494 |
+
budget_options = accommodation.get('budget_options', [])
|
| 495 |
+
for option in budget_options:
|
| 496 |
+
if isinstance(option, dict):
|
| 497 |
+
category = option.get('category', '')
|
| 498 |
+
price_range = option.get('price_range', '')
|
| 499 |
+
if category and price_range:
|
| 500 |
+
plan += f" • {category}:{price_range}\n"
|
| 501 |
+
|
| 502 |
+
else:
|
| 503 |
+
# 如果没有具体行程,生成通用建议
|
| 504 |
+
plan += f"根据{destination_name}的特色,为您推荐以下{days}天行程框架:\n\n"
|
| 505 |
+
|
| 506 |
+
# 根据不同城市提供基础框架
|
| 507 |
+
if destination_name in ["布拉格", "Prague"]:
|
| 508 |
+
plan += "📅 Day 1: 老城区探索(老城广场→天文钟→查理大桥)\n"
|
| 509 |
+
plan += "📅 Day 2: 城堡区深度游(布拉格城堡→圣维特大教堂→黄金小巷)\n"
|
| 510 |
+
if days >= 3:
|
| 511 |
+
plan += "📅 Day 3: 新城区体验(瓦茨拉夫广场→国家博物馆→当地美食)\n"
|
| 512 |
+
|
| 513 |
+
elif destination_name in ["维也纳", "Vienna"]:
|
| 514 |
+
plan += "📅 Day 1: 皇室风采(美泉宫→霍夫堡宫→圣斯蒂芬大教堂)\n"
|
| 515 |
+
plan += "📅 Day 2: 音乐文化(维也纳国家歌剧院→金色大厅→艺术史博物馆)\n"
|
| 516 |
+
if days >= 3:
|
| 517 |
+
plan += "📅 Day 3: 咖啡文化体验(中央咖啡馆→萨赫咖啡馆→多瑙河漫步)\n"
|
| 518 |
+
|
| 519 |
+
elif destination_name in ["布达佩斯", "Budapest"]:
|
| 520 |
+
plan += "📅 Day 1: 布达一侧(布达城堡→渔夫堡→马加什教堂)\n"
|
| 521 |
+
plan += "📅 Day 2: 佩斯一侧(匈牙利国会大厦→链子桥→中央市场)\n"
|
| 522 |
+
if days >= 3:
|
| 523 |
+
plan += "📅 Day 3: 温泉文化(塞切尼温泉→多瑙河游船→夜景欣赏)\n"
|
| 524 |
+
|
| 525 |
+
# 添加专业洞察
|
| 526 |
+
if knowledge.get('professional_insights'):
|
| 527 |
+
insights = knowledge['professional_insights']
|
| 528 |
+
plan += "\n💡 【专业贴士】(来自旅行达人)\n"
|
| 529 |
+
|
| 530 |
+
# 季节建议
|
| 531 |
+
seasonal = insights.get('seasonal_considerations', {})
|
| 532 |
+
if seasonal:
|
| 533 |
+
best_months = seasonal.get('best_months', [])
|
| 534 |
+
weather = seasonal.get('weather_patterns', '')
|
| 535 |
+
|
| 536 |
+
if best_months:
|
| 537 |
+
plan += f"• 🌤️ 最佳旅行时间:{', '.join(best_months)}\n"
|
| 538 |
+
if weather:
|
| 539 |
+
plan += f"• 🌡️ 天气特点:{weather}\n"
|
| 540 |
+
|
| 541 |
+
#
|