Eliot0110 commited on
Commit
05b4419
·
1 Parent(s): 502ec94

improve: knowledge base and re

Browse files
modules/info_extractor.py CHANGED
@@ -1,356 +1,818 @@
1
  import json
2
  import re
3
  from utils.logger import log
4
- from .ai_model import AIModel
5
 
6
  class InfoExtractor:
7
- def __init__(self, ai_model: AIModel):
8
- self.ai_model = ai_model
9
  # 预定义的提取结构,用于验证和规范化
10
  self.extraction_schema = {
11
  "destination": {"type": dict, "fields": {"name": str, "country": str}},
12
  "duration": {"type": dict, "fields": {"days": int, "description": str}},
13
  "budget": {"type": dict, "fields": {"type": str, "amount": int, "currency": str, "description": str}}
14
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def extract(self, user_message: str) -> dict:
17
- """从用户消息中提取结构化信息,确保使用确定性解码。"""
18
 
19
  # 输入验证
20
  if not user_message or not isinstance(user_message, str):
21
  log.warning("⚠️ 收到无效的用户消息")
22
  return {}
23
 
24
- # 构建prompt
25
- prompt = self._build_prompt_template(user_message)
26
-
27
- # --- 核心修复:强制使用确定性解码以杜绝幻觉 ---
28
- log.info("🧠 使用LLM开始提取信息 (模式: 确定性)")
29
-
30
- # 注意:ai_model.generate() 方法不支持 do_sample 和 temperature 参数
31
- # 需要通过其他方式确保确定性输出
32
- raw_response = self.ai_model.run_inference(
33
- input_type="text",
34
- formatted_input=None,
35
- prompt=prompt,
36
- temperature=0.0 # 使用最低温度确保确定性
37
- )
38
-
39
- try:
40
- # 清理响应,提取纯JSON部分
41
- cleaned_response = self._clean_json_response(raw_response)
42
- extracted_json = json.loads(cleaned_response)
43
- log.info(f"✅ LLM成功提取并解析JSON: {extracted_json}")
44
-
45
- # 使用新的验证方法
46
- validated_data = self._validate_and_normalize(extracted_json)
47
- log.info(f"📊 LLM最终提取结果 (安全处理后): {validated_data}")
48
- return validated_data
49
-
50
- except (json.JSONDecodeError, TypeError) as e:
51
- log.error(f"❌ 解析或验证LLM提取的JSON失败: {e}", exc_info=True)
52
- log.debug(f"🔍 原始响应: {raw_response}")
53
- # 尝试备用提取方法
54
- return self._fallback_extraction(user_message)
55
-
56
- def _clean_json_response(self, response: str) -> str:
57
- """清理LLM响应,提取纯JSON部分"""
58
- if not response:
59
- return "{}"
60
 
61
- # 移除可能的markdown代码块标记
62
- response = re.sub(r'```json\s*', '', response)
63
- response = re.sub(r'```\s*', '', response)
64
 
65
- # 移除可能的前导文字
66
- response = re.sub(r'^[^{]*', '', response)
67
 
68
- # 查找第一个{和最后一个}
69
- start_idx = response.find('{')
70
- end_idx = response.rfind('}')
 
71
 
72
- if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
73
- return response[start_idx:end_idx+1].strip()
 
 
74
 
75
- # 如果找不到有效的JSON结构,返回空对象
76
- return "{}"
77
-
78
- def _fallback_extraction(self, user_message: str) -> dict:
79
- """基于规则的备用信息提取"""
80
- log.info("🔄 使用基于规则的备用提取方法")
81
 
 
 
 
 
 
82
  result = {}
83
- message_lower = user_message.lower()
84
 
85
- # 目的地提取 - 更全面的模式
86
- city_patterns = [
87
- r'去(\w+)', r'到(\w+)', r'想去(\w+)', r'前往(\w+)',
88
- r'旅行(\w+)', r'(\w+)', r'(\w+)', r'访问(\w+)',
89
- r'目的地[\s是::]*(\w+)', r'地方[\s是::]*(\w+)'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  ]
91
 
92
- for pattern in city_patterns:
93
- match = re.search(pattern, user_message)
94
- if match:
95
- city_name = match.group(1)
96
- if len(city_name) >= 2 and not city_name.isdigit():
97
- result["destination"] = {"name": city_name}
 
 
 
 
 
 
 
 
98
  break
99
 
100
- # 天数提取 - 更全面的模式
 
 
 
 
 
 
101
  day_patterns = [
102
- r'(\d+)天', r'(\d+)日', r'玩(\d+)天', r'住(\d+)天',
103
- r'(\d+)个天', r'(\d+)', r'(\d+)', r'(\d+)天行程'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  ]
105
 
 
106
  for pattern in day_patterns:
107
- match = re.search(pattern, user_message)
108
- if match:
109
- days = int(match.group(1))
110
- if 1 <= days <= 365: # 合理范围检查
111
- result["duration"] = {"days": days}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  break
 
 
113
 
114
- # 预算提取 - 更全面的模式
115
- budget_patterns = [
116
- r'(\d+)元', r'(\d+)块', r'预算(\d+)', r'(\d+)rmb',
117
- r'(\d+)人民币', r'花(\d+)', r'费用(\d+)', r'(\d+)万'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  ]
119
 
120
- for pattern in budget_patterns:
121
- match = re.search(pattern, user_message)
122
- if match:
123
- amount = int(match.group(1))
124
- # 处理"万"的情况
125
- if '' in pattern:
126
- amount *= 10000
127
- result["budget"] = {
128
- "type": None,
129
- "amount": amount,
130
- "currency": "RMB"
131
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  break
133
 
134
- # 预算类型识别
135
  budget_type_keywords = {
136
- 'economy': ['经济', '便宜', '省钱', '实惠', '节省'],
137
- 'comfortable': ['舒适', '中等', '适中', '一般'],
138
- 'luxury': ['豪华', '奢华', '高端', '贵一点', '不差钱']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
 
 
141
  for budget_type, keywords in budget_type_keywords.items():
142
- if any(keyword in message_lower for keyword in keywords):
143
- if "budget" not in result:
144
- result["budget"] = {"type": budget_type, "amount": None, "currency": None}
145
- else:
146
- result["budget"]["type"] = budget_type
147
  break
148
 
149
- log.info(f"🛠️ 备用提取结果: {result}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  return result
151
 
152
- def _validate_and_normalize(self, data: dict) -> dict:
153
- """
154
- 根据预定义schema验证并规范化提取的数据。
155
- 这取代了之前返回列表的逻辑,只返回符合结构的键值对。
156
- """
157
- if not isinstance(data, dict):
158
- return {}
159
-
160
- validated_output = {}
161
-
162
- # 验证destination
163
- if "destination" in data:
164
- dest_data = data["destination"]
165
- if isinstance(dest_data, dict):
166
- validated_dest = {}
167
- if "name" in dest_data and isinstance(dest_data["name"], str):
168
- name = dest_data["name"].strip()
169
- if name:
170
- validated_dest["name"] = name
171
- if "country" in dest_data and isinstance(dest_data["country"], str):
172
- country = dest_data["country"].strip()
173
- if country:
174
- validated_dest["country"] = country
175
- if validated_dest:
176
- validated_output["destination"] = validated_dest
177
-
178
- # 验证duration
179
- if "duration" in data:
180
- duration_data = data["duration"]
181
- if isinstance(duration_data, dict):
182
- validated_duration = {}
183
- if "days" in duration_data:
184
- days = duration_data["days"]
185
- if isinstance(days, (int, float)) and 1 <= days <= 365:
186
- validated_duration["days"] = int(days)
187
- if "description" in duration_data and isinstance(duration_data["description"], str):
188
- desc = duration_data["description"].strip()
189
- if desc:
190
- validated_duration["description"] = desc
191
- if validated_duration:
192
- validated_output["duration"] = validated_duration
193
-
194
- # 验证budget
195
- if "budget" in data:
196
- budget_data = data["budget"]
197
- if isinstance(budget_data, dict):
198
- validated_budget = {}
199
-
200
- # 验证type
201
- if "type" in budget_data:
202
- budget_type = budget_data["type"]
203
- if budget_type in ["economy", "comfortable", "luxury"]:
204
- validated_budget["type"] = budget_type
205
-
206
- # 验证amount
207
- if "amount" in budget_data:
208
- amount = budget_data["amount"]
209
- if isinstance(amount, (int, float)) and amount > 0:
210
- validated_budget["amount"] = int(amount)
211
-
212
- # 验证currency
213
- if "currency" in budget_data and isinstance(budget_data["currency"], str):
214
- currency = budget_data["currency"].strip()
215
- if currency:
216
- validated_budget["currency"] = currency
217
-
218
- # 验证description
219
- if "description" in budget_data and isinstance(budget_data["description"], str):
220
- desc = budget_data["description"].strip()
221
- if desc:
222
- validated_budget["description"] = desc
223
-
224
- if validated_budget:
225
- validated_output["budget"] = validated_budget
226
 
227
- if not validated_output:
228
- log.warning(f"⚠️ 提取的数据 {data} 未通过验证,未发现任何有效字段。")
229
 
230
- return validated_output
231
-
232
- def _build_prompt_template(self, user_message: str) -> str:
233
- """构建包含多个示例的提取prompt"""
234
 
235
- # 输入长度控制
236
- if len(user_message) > 300:
237
- user_message = user_message[:300] + "..."
238
 
239
- return f"""你的任务是且仅是作为文本解析器。
240
- 严格分析用户输入,并以一个纯净、无注释的JSON对象格式返回。
241
-
242
- **核心规则:**
243
- 1. **绝对禁止** 在JSON之外添加任何文本、注释、解释或Markdown标记。你的输出必须从 `{{` 开始,到 `}}` 结束。
244
- 2. **必须严格遵守** 下方定义的嵌套JSON结构。不要创造新的键,也不要改变层级。
245
- 3. 如果信息未提供,对应的键值必须为 `null`,而���是省略该键。
246
- 4. 如果用户输入与旅行无关(如 "你好"),必须返回一个空的JSON对象: `{{}}`。
247
-
248
- **强制JSON输出结构:**
249
- {{
250
- "destination": {{
251
- "name": "string or null",
252
- "country": "string or null"
253
- }},
254
- "duration": {{
255
- "days": "integer or null",
256
- "description": "string or null"
257
- }},
258
- "budget": {{
259
- "type": "string ('economy', 'comfortable', 'luxury') or null",
260
- "amount": "number or null",
261
- "currency": "string or null",
262
- "description": "string or null"
263
- }}
264
- }}
265
-
266
- **示例1:**
267
- 用户输入: "我想去柏林玩3天"
268
- 你的输出:
269
- {{
270
- "destination": {{
271
- "name": "柏林",
272
- "country": null
273
- }},
274
- "duration": {{
275
- "days": 3,
276
- "description": null
277
- }},
278
- "budget": {{
279
- "type": null,
280
- "amount": null,
281
- "currency": null,
282
- "description": null
283
- }}
284
- }}
285
-
286
- **示例2:**
287
- 用户输入: "计划去日本东京旅行一周,预算2万元"
288
- 你的输出:
289
- {{
290
- "destination": {{
291
- "name": "东京",
292
- "country": "日本"
293
- }},
294
- "duration": {{
295
- "days": 7,
296
- "description": "一周"
297
- }},
298
- "budget": {{
299
- "type": null,
300
- "amount": 20000,
301
- "currency": "元",
302
- "description": null
303
- }}
304
- }}
305
-
306
- **示例3:**
307
- 用户输入: "想要一个经济实惠的巴黎5天行程"
308
- 你的输出:
309
- {{
310
- "destination": {{
311
- "name": "巴黎",
312
- "country": null
313
- }},
314
- "duration": {{
315
- "days": 5,
316
- "description": null
317
- }},
318
- "budget": {{
319
- "type": "economy",
320
- "amount": null,
321
- "currency": null,
322
- "description": "经济实惠"
323
- }}
324
- }}
325
-
326
- **示例4:**
327
- 用户输入: "你好"
328
- 你的输出:
329
- {{}}
330
-
331
- **示例5:**
332
- 用户输入: "想去泰国普吉岛度蜜月,10天左右,豪华一点不差钱"
333
- 你的输出:
334
- {{
335
- "destination": {{
336
- "name": "普吉岛",
337
- "country": "泰国"
338
- }},
339
- "duration": {{
340
- "days": 10,
341
- "description": "10天左右"
342
- }},
343
- "budget": {{
344
- "type": "luxury",
345
- "amount": null,
346
- "currency": null,
347
- "description": "豪华一点不差钱"
348
- }}
349
- }}
350
 
351
- ---
352
- **用户输入:**
353
- {user_message}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
 
355
- **你的输出 (必须是纯JSON):**
356
- """
 
 
 
1
  import json
2
  import re
3
  from utils.logger import log
 
4
 
5
  class InfoExtractor:
6
+ def __init__(self):
7
+
8
  # 预定义的提取结构,用于验证和规范化
9
  self.extraction_schema = {
10
  "destination": {"type": dict, "fields": {"name": str, "country": str}},
11
  "duration": {"type": dict, "fields": {"days": int, "description": str}},
12
  "budget": {"type": dict, "fields": {"type": str, "amount": int, "currency": str, "description": str}}
13
  }
14
+
15
+ # 欧洲城市和国家的完整映射关系(聚焦欧洲)
16
+ self.european_cities = {
17
+ # === 西欧 ===
18
+ # 法国
19
+ "巴黎": "法国", "里昂": "法国", "马赛": "法国", "尼斯": "法国", "戛纳": "法国",
20
+ "图卢兹": "法国", "南特": "法国", "斯特拉斯堡": "法国", "蒙彼利埃": "法国", "波尔多": "法国",
21
+ "里尔": "法国", "雷恩": "法国", "兰斯": "法国", "勒阿弗尔": "法国", "圣埃蒂安": "法国",
22
+ "土伦": "法国", "阿维尼翁": "法国", "凡尔赛": "法国", "枫丹白露": "法国", "第戎": "法国",
23
+ "昂热": "法国", "贝桑松": "法国", "佩皮尼昂": "法国", "卢尔德": "法国", "沙特尔": "法国",
24
+
25
+ # 德国
26
+ "柏林": "德国", "慕尼黑": "德国", "汉堡": "德国", "科隆": "德国", "法兰克福": "德国",
27
+ "斯图加特": "德国", "杜塞尔多夫": "德国", "多特蒙德": "德国", "埃森": "德国", "莱比锡": "德国",
28
+ "不来梅": "德国", "德累斯顿": "德国", "汉诺威": "德国", "纽伦堡": "德国", "杜伊斯堡": "德国",
29
+ "波鸿": "德国", "乌珀塔尔": "德国", "比勒费尔德": "德国", "波恩": "德国", "明斯特": "德国",
30
+ "卡尔斯鲁厄": "德国", "曼海姆": "德国", "奥格斯堡": "德国", "威斯巴登": "德国", "盖尔森基兴": "德国",
31
+ "门兴格拉德巴赫": "德国", "布伦瑞克": "德国", "基尔": "德国", "亚琛": "德国", "哈雷": "德国",
32
+ "马格德堡": "德国", "弗莱堡": "德国", "克里菲尔德": "德国", "吕贝克": "德国", "奥伯豪森": "德国",
33
+ "埃尔福特": "德国", "罗斯托克": "德国", "凯泽斯劳滕": "德国", "卡塞尔": "德国", "哈根": "德国",
34
+ "波茨坦": "德国", "萨尔布吕肯": "德国", "路德维希港": "德国", "奥尔登堡": "德国", "莱沃库森": "德国",
35
+ "奥斯纳布吕克": "德国", "索林根": "德国", "海德堡": "德国", "达姆施塔特": "德国", "哈姆": "德国",
36
+ "维尔茨堡": "德国", "雷克林豪森": "德国", "沃尔夫斯堡": "德国", "格廷根": "德国", "科特布斯": "德国",
37
+ "希尔德斯海姆": "德国", "埃朗根": "德国", "特里尔": "德国", "耶拿": "德国", "康斯坦茨": "德国",
38
+ "新天鹅堡": "德国", "罗滕堡": "德国", "科布伦茨": "德国", "班贝格": "德国", "拜罗伊特": "德国",
39
+
40
+ # 英国
41
+ "伦敦": "英国", "伯明翰": "英国", "曼彻斯特": "英国", "格拉斯哥": "英国", "利物浦": "英国",
42
+ "利兹": "英国", "谢菲尔德": "英国", "爱丁堡": "英国", "布里斯托": "英国", "莱斯特": "英国",
43
+ "考文垂": "英国", "布拉德福德": "英国", "贝尔法斯特": "英国", "卡迪夫": "英国", "诺丁汉": "英国",
44
+ "金斯顿": "英国", "纽卡斯尔": "英国", "普利茅斯": "英国", "斯托克": "英国", "南安普顿": "英国",
45
+ "雷丁": "英国", "德比": "英国", "约克": "英国", "牛津": "英国", "剑桥": "英国",
46
+ "巴斯": "英国", "温莎": "英国", "坎特伯雷": "英国", "斯特拉特福": "英国", "湖区": "英国",
47
+ "斯凯岛": "英国", "爱丁堡": "英国", "格拉斯哥": "英国", "史德灵": "英国", "珀斯": "英国",
48
+ "因弗内斯": "英国", "阿伯丁": "英国", "邓迪": "英国", "法夫": "英国", "奥班": "英国",
49
+
50
+ # 荷兰
51
+ "阿姆斯特丹": "荷兰", "鹿特丹": "荷兰", "海牙": "荷兰", "乌得勒支": "荷兰", "埃因霍温": "荷兰",
52
+ "蒂尔堡": "荷兰", "格罗宁根": "荷兰", "阿尔梅勒": "荷兰", "布雷达": "荷兰", "奈梅亨": "荷兰",
53
+ "阿珀尔多伦": "荷兰", "哈勒姆": "荷兰", "阿纳姆": "荷兰", "恩斯赫德": "荷兰", "阿默斯福特": "荷兰",
54
+ "赞丹": "荷兰", "海牙": "荷兰", "阿尔克马尔": "荷兰", "马斯特里赫特": "荷兰", "莱顿": "荷兰",
55
+ "代尔夫特": "荷兰", "多德雷赫特": "荷兰", "豪达": "荷兰", "羊角村": "荷兰", "马尔肯": "荷兰",
56
+
57
+ # 比利时
58
+ "布鲁塞尔": "比利时", "安特卫普": "比利时", "根特": "比利时", "沙勒罗瓦": "比利时", "列日": "比利时",
59
+ "布吕赫": "比利时", "那慕尔": "比利时", "蒙斯": "比利时", "阿尔斯特": "比利时", "科特赖克": "比利时",
60
+ "哈瑟尔特": "比利时", "圣尼古拉": "比利时", "奥斯坦德": "比利时", "梅赫伦": "比利时", "鲁汶": "比利时",
61
+
62
+ # 卢森堡
63
+ "卢森堡市": "卢森堡", "埃施": "卢森堡", "迪费当日": "卢森堡", "杜德朗日": "卢森堡",
64
+
65
+ # === 南欧 ===
66
+ # 意大利
67
+ "罗马": "意大利", "米兰": "意大利", "威尼斯": "意大利", "佛罗伦萨": "意大利", "那不勒斯": "意大利",
68
+ "都灵": "意大利", "帕勒莫": "意大利", "热那亚": "意大利", "博洛尼亚": "意大利", "巴里": "意大利",
69
+ "卡塔尼亚": "意大利", "佛罗伦萨": "意大利", "韦罗纳": "意大利", "威尼斯": "意大利", "墨西拿": "意大利",
70
+ "帕多瓦": "意大利", "的里雅斯特": "意大利", "塔兰托": "意大利", "布雷西亚": "意大利", "摩德纳": "意大利",
71
+ "雷焦卡拉布里亚": "意大利", "普拉托": "意大利", "卡利亚里": "意大利", "帕尔马": "意大利", "佩鲁贾": "意大利",
72
+ "利沃诺": "意大利", "雷焦艾米利亚": "意大利", "佛嘉": "意大利", "萨莱诺": "意大利", "拉温纳": "意大利",
73
+ "里米尼": "意大利", "拉斯佩齐亚": "意大利", "萨萨里": "意大利", "蒙扎": "意大利", "贝加莫": "意大利",
74
+ "比萨": "意大利", "维琴察": "意大利", "三月十五日": "意大利", "博尔扎诺": "意大利", "安德里亚": "意大利",
75
+ "阿雷佐": "意大利", "蒂沃利": "意大利", "阿西西": "意大利", "锡耶纳": "意大利", "五渔村": "意大利",
76
+ "马泰拉": "意大利", "庞贝": "意大利", "卡普里岛": "意大利", "阿马尔菲": "意大利", "科莫": "意大利",
77
+
78
+ # 西班牙
79
+ "马德里": "西班牙", "巴塞罗那": "西班牙", "瓦伦西亚": "西班牙", "塞维利亚": "西班牙", "萨拉戈萨": "西班牙",
80
+ "马拉加": "西班牙", "穆尔西亚": "西班牙", "帕尔马": "西班牙", "拉斯帕尔马斯": "西班牙", "毕尔巴鄂": "西班牙",
81
+ "阿利坎特": "西班牙", "科尔多瓦": "西班牙", "巴利亚多利德": "西班牙", "维戈": "西班牙", "希洪": "西班牙",
82
+ "莱昂": "西班牙", "拉科鲁尼亚": "西班牙", "埃尔切": "西班牙", "奥维耶多": "西班牙", "圣塞巴斯蒂安": "西班牙",
83
+ "桑坦德": "西班牙", "卡斯特利翁": "西班牙", "洛格罗尼奥": "西班牙", "巴达霍斯": "西班牙", "萨拉曼卡": "西班牙",
84
+ "韦尔瓦": "西班牙", "阿尔梅里亚": "西班牙", "卡迪斯": "西班牙", "格拉纳达": "西班牙", "托莱多": "西班牙",
85
+ "昆卡": "西班牙", "卡塞雷斯": "西班牙", "塞哥维亚": "西班牙", "阿维拉": "西班牙", "布尔戈斯": "西班牙",
86
+ "马略卡岛": "西班牙", "伊比萨": "西班牙", "特内里费": "西班牙", "大加那利": "西班牙", "兰萨罗特": "西班牙",
87
+
88
+ # 葡萄牙
89
+ "里斯本": "葡萄牙", "波尔图": "葡萄牙", "阿马多拉": "葡萄牙", "布拉加": "葡萄牙", "塞图巴尔": "葡萄牙",
90
+ "科英布拉": "葡萄牙", "丰沙尔": "葡萄牙", "阿威罗": "葡萄牙", "埃武拉": "葡萄牙", "法鲁": "葡萄牙",
91
+ "阿尔布费拉": "葡萄牙", "辛特拉": "葡萄牙", "卡斯凯什": "葡萄牙", "奥比杜什": "葡萄牙", "波尔塔莱格雷": "葡萄牙",
92
+ "吉马良斯": "葡萄牙", "维亚纳堡": "葡萄牙", "维塞乌": "葡萄牙", "拉戈什": "葡萄牙", "萨格里什": "葡萄牙",
93
+
94
+ # 希腊
95
+ "雅典": "希腊", "塞萨洛尼基": "希腊", "帕特雷": "希腊", "伊拉克利翁": "希腊", "拉里萨": "希腊",
96
+ "沃洛斯": "希腊", "约阿尼纳": "希腊", "卡瓦拉": "希腊", "哈尼亚": "希腊", "塞雷斯": "希腊",
97
+ "圣托里尼": "希腊", "米科诺斯": "希腊", "罗德岛": "希腊", "科孚": "希腊", "克里特": "希腊",
98
+ "帕罗斯": "希腊", "纳克索斯": "希腊", "扎金索斯": "希腊", "凯法利尼亚": "希腊", "斯基亚索斯": "希腊",
99
+ "德尔菲": "希腊", "奥林匹亚": "希腊", "迈锡尼": "希腊", "埃皮达鲁斯": "希腊", "梅泰奥拉": "希腊",
100
+
101
+ # === 中欧 ===
102
+ # 奥地利
103
+ "维也纳": "奥地利", "格拉茨": "奥地利", "林茨": "奥地利", "萨尔茨堡": "奥地利", "因斯布鲁克": "奥地利",
104
+ "克拉根福": "奥地利", "菲拉赫": "奥地利", "韦尔斯": "奥地利", "圣珀尔滕": "奥地利", "多恩比恩": "奥地��",
105
+ "维也纳新城": "奥地利", "施泰尔": "奥地利", "费尔德基兴": "奥地利", "布鲁克": "奥地利", "莱奥本": "奥地利",
106
+ "哈尔施塔特": "奥地利", "巴德伊舍尔": "奥地利", "梅尔克": "奥地利", "瓦绍": "奥地利", "库夫斯坦": "奥地利",
107
+
108
+ # 捷克
109
+ "布拉格": "捷克", "布尔诺": "捷克", "俄斯特拉发": "捷克", "比尔森": "捷克", "奥洛穆茨": "捷克",
110
+ "利贝雷茨": "捷克", "赫拉德茨克拉洛韦": "捷克", "乌斯季": "捷克", "帕尔杜比采": "捷克", "兹林": "捷克",
111
+ "哈维若夫": "捷克", "克拉德诺": "捷克", "切斯凯布杰约维采": "捷克", "莫斯特": "捷克", "卡尔维纳": "捷克",
112
+ "库特纳霍拉": "捷克", "泰尔奇": "捷克", "克鲁姆洛夫": "捷克", "卡尔什特因": "捷克", "布拉格城堡": "捷克",
113
+
114
+ # 匈牙利
115
+ "布达佩斯": "匈牙利", "德布勒森": "匈牙利", "塞格德": "匈牙利", "米什科尔茨": "匈牙利", "佩奇": "匈牙利",
116
+ "焦尔": "匈牙利", "尼赖吉哈佐": "匈牙利", "凯奇凯梅特": "匈牙利", "塞克什白堡": "匈牙利", "松博特海伊": "匈牙利",
117
+ "松博特海伊": "匈牙利", "维斯普雷姆": "匈牙利", "埃格尔": "匈牙利", "贝凯什乔包": "匈牙利", "大沃拉丁": "匈牙利",
118
+ "埃斯泰尔戈姆": "匈牙利", "维谢格拉德": "匈牙利", "霍洛克": "匈牙利", "蒂豪尼": "匈牙利", "巴拉顿湖": "匈牙利",
119
+
120
+ # 波兰
121
+ "华沙": "波兰", "克拉科夫": "波兰", "罗兹": "波兰", "弗罗茨瓦夫": "波兰", "波兹南": "波兰",
122
+ "格但斯克": "波兰", "什切青": "波兰", "比得哥什": "波兰", "卢布林": "波兰", "卡托维兹": "波兰",
123
+ "白雅斯托克": "波兰", "格丁尼亚": "波兰", "琴斯托霍瓦": "波兰", "拉多姆": "波兰", "索斯诺维茨": "波兰",
124
+ "托伦": "波兰", "基尔采": "波兰", "格利维采": "波兰", "扎布热": "波兰", "比托姆": "波兰",
125
+ "奥斯威辛": "波兰", "马尔堡": "波兰", "扎科帕内": "波兰", "维利奇卡": "波兰", "弗罗茨瓦夫": "波兰",
126
+
127
+ # 斯洛伐克
128
+ "布拉迪斯拉发": "斯洛伐克", "科希策": "斯洛伐克", "普雷绍夫": "斯洛伐克", "日利纳": "斯洛伐克", "班斯卡比斯特里察": "斯洛伐克",
129
+ "尼特拉": "斯洛伐克", "特伦钦": "斯洛伐克", "马丁": "斯洛伐克", "特尔纳瓦": "斯洛伐克", "波普拉德": "斯洛伐克",
130
+ "普里维德扎": "斯洛伐克", "兹沃伦": "斯洛伐克", "巴尔代约夫": "斯洛伐克", "列沃恰": "斯洛伐克", "斯皮什斯基堡": "斯洛伐克",
131
+
132
+ # 斯洛文尼亚
133
+ "卢布尔雅那": "斯洛文尼亚", "马里博尔": "斯洛文尼亚", "采列": "斯洛文尼亚", "克拉尼": "斯洛文尼亚", "韦莱涅": "斯洛文尼亚",
134
+ "新戈里察": "斯洛文尼亚", "科佩尔": "斯洛文尼亚", "诺沃梅斯托": "斯洛文尼亚", "卡姆尼克": "斯洛文尼亚", "多姆扎勒": "斯洛文尼亚",
135
+ "布莱德": "斯洛文尼亚", "博希尼": "斯洛文尼亚", "皮兰": "斯洛文尼亚", "什科茨扬": "斯洛文尼亚", "波斯托伊纳": "斯洛文尼亚",
136
+
137
+ # 瑞士
138
+ "苏黎世": "瑞士", "日内瓦": "瑞士", "巴塞尔": "瑞士", "伯尔尼": "瑞士", "洛桑": "瑞士",
139
+ "圣加仑": "瑞士", "卢塞恩": "瑞士", "卢加诺": "瑞士", "比尔": "瑞士", "图恩": "瑞士",
140
+ "拉绍德封": "瑞士", "沙夫豪森": "瑞士", "弗里堡": "瑞士", "韦维": "瑞士", "拉佩斯": "瑞士",
141
+ "因特拉肯": "瑞士", "采尔马特": "瑞士", "格林德瓦": "瑞士", "少女峰": "瑞士", "马特洪峰": "瑞士",
142
+ "圣莫里茨": "瑞士", "洛伊克巴德": "瑞士", "安德马特": "瑞士", "文根": "瑞士", "拉克斯": "瑞士",
143
+
144
+ # === 北欧 ===
145
+ # 瑞典
146
+ "斯德哥尔摩": "瑞典", "哥德堡": "瑞典", "马尔默": "瑞典", "乌普萨拉": "瑞典", "林雪平": "瑞典",
147
+ "韦斯特罗斯": "瑞典", "厄勒布鲁": "瑞典", "北雪平": "瑞典", "赫尔辛堡": "瑞典", "永雪平": "瑞典",
148
+ "松兹瓦尔": "瑞典", "于默奥": "瑞典", "韦克舍": "瑞典", "加夫勒": "瑞典", "博罗斯": "瑞典",
149
+ "法伦": "瑞典", "卡尔斯塔德": "瑞典", "卡尔马": "瑞典", "维斯比": "瑞典", "基律纳": "瑞典",
150
+
151
+ # 挪威
152
+ "奥斯陆": "挪威", "卑尔根": "挪威", "特隆赫姆": "挪威", "斯塔万格": "斯洛文尼亚", "克里斯蒂安桑": "挪威",
153
+ "腓特烈斯塔": "挪威", "德拉门": "挪威", "谢恩": "挪威", "桑内斯": "挪威", "萨尔普斯堡": "挪威",
154
+ "特洛姆瑟": "挪威", "博多": "挪威", "阿尔塔": "挪威", "哈默菲斯特": "挪威", "纳尔维克": "挪威",
155
+ "弗洛姆": "挪威", "盖朗厄尔": "挪威", "奥勒松": "挪威", "利勒哈默尔": "挪威", "罗弗敦群岛": "挪威",
156
+
157
+ # 丹麦
158
+ "哥本哈根": "丹麦", "奥胡斯": "丹麦", "欧登塞": "丹麦", "奥尔堡": "丹麦", "埃斯比约": "丹麦",
159
+ "兰德斯": "丹麦", "科尔丁": "丹麦", "赫尔辛格": "丹麦", "马里布": "丹麦", "海勒鲁普": "丹麦",
160
+ "比隆": "丹麦", "希勒勒": "丹麦", "罗斯基勒": "丹麦", "斯卡恩": "丹麦", "法尔瑟特": "丹麦",
161
+
162
+ # 芬兰
163
+ "赫尔辛基": "芬兰", "埃斯波": "芬兰", "坦佩雷": "芬兰", "万塔": "芬兰", "图尔库": "芬兰",
164
+ "奥卢": "芬兰", "拉赫蒂": "芬兰", "库奥皮奥": "芬兰", "约恩苏": "芬兰", "约瓦斯屈莱": "芬兰",
165
+ "拉彭兰塔": "芬兰", "科特卡": "芬兰", "瓦萨": "芬兰", "弗绍": "芬兰", "海门林纳": "芬兰",
166
+ "罗瓦涅米": "芬兰", "凯米": "芬兰", "托尔尼奥": "芬兰", "萨利色尔卡": "芬兰", "伊瓦洛": "芬兰",
167
+
168
+ # 冰岛
169
+ "雷克雅未克": "冰岛", "科帕沃古尔": "冰岛", "哈夫纳夫约杜尔": "冰岛", "阿克雷里": "冰岛", "雷克雅内斯": "冰岛",
170
+ "塞尔福斯": "冰岛", "韦斯特曼纳群岛": "冰岛", "胡萨维克": "冰岛", "埃伊尔斯塔济": "冰岛", "凯夫拉维克": "冰岛",
171
+
172
+ # === 东欧 ===
173
+ # 俄罗斯(欧洲部分)
174
+ "莫斯科": "俄罗斯", "圣彼得堡": "俄罗斯", "下诺夫哥罗德": "俄罗斯", "喀山": "俄罗斯", "萨马拉": "俄罗斯",
175
+ "伏尔加格勒": "俄罗斯", "罗斯托夫": "俄罗斯", "乌法": "俄罗斯", "彭萨": "俄罗斯", "雅罗斯拉夫": "俄罗斯",
176
+ "卡卢加": "俄罗斯", "图拉": "俄罗斯", "弗拉基米尔": "俄罗斯", "苏兹达尔": "俄罗斯", "谢尔盖夫": "俄罗斯",
177
+
178
+ # 乌克兰
179
+ "基辅": "乌克兰", "哈尔科夫": "乌克兰", "敖德萨": "乌克兰", "第聂伯": "乌克兰", "顿涅茨克": "乌克兰",
180
+ "扎波罗热": "乌克兰", "利沃夫": "乌克兰", "克里沃罗格": "乌克兰", "尼古拉耶夫": "乌克兰", "马里乌波尔": "乌克兰",
181
+ "卢甘斯克": "乌克兰", "文尼察": "乌克兰", "赫尔松": "乌克兰", "切尔卡瑟": "乌克兰", "切尔尼戈夫": "乌克兰",
182
+
183
+ # 白俄罗斯
184
+ "明斯克": "白俄罗斯", "戈梅利": "白俄罗斯", "莫吉廖夫": "白俄罗斯", "维帖布斯克": "白俄罗斯", "格罗德诺": "白俄罗斯",
185
+ "布列斯特": "白俄罗斯", "鲍里索夫": "白俄罗斯", "巴拉诺维奇": "白俄罗斯", "平斯克": "白俄罗斯", "奥尔沙": "白俄罗斯",
186
+
187
+ # 波罗的海三国
188
+ "里加": "拉脱维亚", "陶格夫匹尔斯": "拉脱维亚", "利耶帕亚": "拉脱维亚", "叶尔加瓦": "拉脱维亚", "文茨皮尔斯": "拉脱维亚",
189
+ "塔林": "爱沙尼亚", "塔尔图": "爱沙尼亚", "纳尔瓦": "爱沙尼亚", "帕尔努": "爱沙尼亚", "科赫特拉": "爱沙尼亚",
190
+ "维尔纽斯": "立陶宛", "考纳斯": "立陶宛", "克莱佩达": "立陶宛", "希奥利艾": "立陶宛", "帕内韦日斯": "立陶宛",
191
+
192
+ # 摩尔多瓦
193
+ "基希讷乌": "摩尔多瓦", "蒂拉斯波尔": "摩尔多瓦", "巴尔济": "摩尔多瓦", "本德尔": "摩尔多瓦", "雷布尼察": "摩尔多瓦",
194
+
195
+ # === 巴尔干半岛 ===
196
+ # 克罗地亚
197
+ "萨格勒布": "克罗地亚", "斯普利特": "克罗地亚", "里耶卡": "克罗地亚", "奥西耶克": "克罗地亚", "扎达尔": "克罗地亚",
198
+ "普拉": "克罗地亚", "杜布罗夫尼克": "克罗地亚", "希贝尼克": "克罗地亚", "卡尔洛瓦茨": "克罗地亚", "瓦拉日丁": "克罗地亚",
199
+ "罗维尼": "克罗地亚", "波雷奇": "克罗地亚", "特罗吉尔": "克罗地亚", "赫瓦尔": "克罗地亚", "科尔丘拉": "克罗地亚",
200
+
201
+ # 塞尔维亚
202
+ "贝尔格莱德": "塞尔维亚", "诺维萨德": "塞尔维亚", "尼什": "塞尔维亚", "克拉古耶瓦茨": "塞尔维亚", "苏博蒂察": "塞尔维亚",
203
+ "潘切沃": "塞尔维亚", "泽蒙": "塞尔维亚", "莱斯科瓦茨": "塞尔维亚", "恰恰克": "塞尔维亚", "新帕扎尔": "塞尔维亚",
204
+
205
+ # 波黑
206
+ "萨拉热窝": "波黑", "巴尼亚卢卡": "波黑", "图兹拉": "波黑", "泽尼察": "波黑", "莫斯塔尔": "波黑",
207
+ "比哈奇": "波黑", "布里耶利纳": "波黑", "多博伊": "波黑", "格拉迪什卡": "波黑", "利夫诺": "波黑",
208
+
209
+ # 黑山
210
+ "波德戈里察": "黑山", "尼克希奇": "黑山", "普里耶波列": "黑山", "比耶洛波列": "黑山", "采蒂涅": "黑山",
211
+ "布德瓦": "黑山", "科托尔": "黑山", "乌尔齐尼": "黑山", "赫尔采格诺维": "黑山", "巴尔": "黑山",
212
+
213
+ # 北马其顿
214
+ "斯科普里": "北马其顿", "库马诺沃": "北马其顿", "比托拉": "北马其顿", "普里莱普": "北马其顿", "特托沃": "北马其顿",
215
+ "韦莱斯": "北马其顿", "什蒂普": "北马其顿", "奥赫里德": "北马其顿", "戈斯蒂瓦尔": "北马其顿", "斯特鲁加": "北马其顿",
216
+
217
+ # 阿尔巴尼亚
218
+ "地拉那": "阿尔巴尼亚", "都拉斯": "阿尔巴尼亚", "埃尔巴桑": "阿尔巴尼亚", "发罗拉": "阿尔巴尼亚", "斯库台": "阿尔巴尼亚",
219
+ "科尔察": "阿尔巴尼亚", "卢什涅": "阿尔巴尼亚", "费里": "阿尔巴尼亚", "贝拉特": "阿尔巴尼亚", "吉诺卡斯特": "阿尔巴尼亚",
220
+
221
+ # 保加利亚
222
+ "索菲亚": "保加利亚", "普罗夫迪夫": "保加利亚", "瓦尔纳": "保加利亚", "布尔加斯": "保加利亚", "鲁塞": "保加利亚",
223
+ "斯塔拉扎戈拉": "保加利亚", "普列文": "保加利亚", "슬리문": "保加利亚", "多布里奇": "保加利亚", "舒门": "保加利亚",
224
+ "帕扎尔吉克": "保加利亚", "哈斯科沃": "保加利亚", "扬博尔": "保加利亚", "布拉戈耶夫格勒": "保加利亚", "韦利科特尔诺沃": "保加利亚",
225
+
226
+ # 罗马尼亚
227
+ "布加勒斯特": "罗马尼亚", "克卢日": "罗马尼亚", "蒂米什瓦拉": "罗马尼亚", "雅西": "罗马尼亚", "康斯坦察": "罗马尼亚",
228
+ "克拉约瓦": "罗马尼亚", "布拉索夫": "罗马尼亚", "加拉茨": "罗马尼亚", "普洛耶什蒂": "罗马尼亚", "奥拉迪亚": "罗马尼亚",
229
+ "布勒伊拉": "罗马尼亚", "阿拉德": "罗马尼亚", "皮特什蒂": "罗马尼亚", "锡比乌": "罗马尼亚", "巴克乌": "罗马尼亚",
230
+ "锡纳亚": "罗马尼亚", "布兰": "罗马尼亚", "德古拉城堡": "罗马尼亚", "佩莱什城堡": "罗马尼亚", "马拉穆雷什": "罗马尼亚",
231
+
232
+ # 土耳其(欧洲部分)
233
+ "伊斯坦布尔": "土耳其", "埃迪尔内": "土耳其", "泰基尔达": "土耳其", "克尔克拉雷利": "土耳其", "恰纳卡莱": "土耳其",
234
+
235
+ # 塞浦路斯
236
+ "尼科西亚": "塞浦路斯", "利马索尔": "塞浦路斯", "拉纳卡": "塞浦路斯", "法马古斯塔": "塞浦路斯", "帕福斯": "塞浦路斯",
237
+ "凯里尼亚": "塞浦路斯", "阿依纳帕": "塞浦路斯", "普罗塔拉斯": "塞浦路斯", "特罗多斯": "塞浦路斯", "阿卡马斯": "塞浦路斯",
238
+
239
+ # 马耳他
240
+ "瓦莱塔": "马耳他", "斯利马": "马耳他", "圣朱利安斯": "马耳他", "姆西达": "马耳他", "维多利亚": "马耳他",
241
+ "马尔萨什洛克": "马耳他", "梅利哈": "马耳他", "戈佐": "马耳他", "蓝湖": "马耳他", "姆迪纳": "马耳他",
242
+ }
243
+
244
+ # 欧洲城市别名映射(包含各种表达方式)
245
+ self.european_city_aliases = {
246
+ # 英文名称映射
247
+ "paris": "巴黎", "rome": "罗马", "london": "伦敦", "berlin": "柏林",
248
+ "madrid": "马德里", "barcelona": "巴塞罗那", "vienna": "维也纳", "prague": "布拉格",
249
+ "amsterdam": "阿姆斯特丹", "florence": "佛罗伦萨", "venice": "威尼斯", "athens": "雅典",
250
+ "budapest": "布达佩斯", "lisbon": "里斯本", "stockholm": "斯德哥尔摩", "copenhagen": "哥本哈根",
251
+ "helsinki": "赫尔辛基", "oslo": "奥斯陆", "zurich": "苏黎世", "geneva": "日内瓦",
252
+ "munich": "慕尼黑", "milan": "米兰", "naples": "那不勒斯", "nice": "尼斯",
253
+ "edinburgh": "爱丁堡", "dublin": "都柏林", "brussels": "布鲁塞尔", "warsaw": "华沙",
254
+ "krakow": "克拉科夫", "zagreb": "萨格勒布", "belgrade": "贝尔格莱德", "sofia": "索菲亚",
255
+ "bucharest": "布加勒斯特", "kiev": "基辅", "moscow": "莫斯科", "st petersburg": "圣彼得堡",
256
+ "reykjavik": "雷克雅未克", "tallinn": "塔林", "riga": "里加", "vilnius": "维尔纽斯",
257
+ "bratislava": "布拉迪斯拉发", "ljubljana": "卢布尔雅那", "sarajevo": "萨拉热窝",
258
+ "dubrovnik": "杜布罗夫尼克", "split": "斯普利特", "santorini": "圣托里尼", "mykonos": "米科诺斯",
259
+
260
+ # 中文别名
261
+ "花都": "巴黎", "光之城": "巴黎", "永恒之城": "罗马", "雾都": "伦敦",
262
+ "音乐之都": "维也纳", "黄金城市": "布拉格", "千塔之城": "布拉格",
263
+ "运河之城": "阿姆斯特丹", "翡冷翠": "佛罗伦萨", "文艺复兴之都": "佛罗伦萨",
264
+ "水城": "威尼斯", "西方文明的摇篮": "雅典", "多瑙河明珠": "布达佩斯",
265
+ "七丘之城": "里斯本", "北方威尼斯": "斯德哥尔摩", "童话之都": "哥本哈根",
266
+ "波罗的海的女儿": "赫尔辛基", "欧洲屋脊": "因特拉肯", "北方雅典": "爱丁堡",
267
+ "翡翠岛": "都柏林", "欧洲之都": "布鲁塞尔", "高迪之城": "巴塞罗那",
268
+ }
269
+
270
+ # 中文数字映射(保持原有)
271
+ self.chinese_numbers = {
272
+ '一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9, '十': 10,
273
+ '两': 2, '半': 0.5, '壹': 1, '贰': 2, '叁': 3, '肆': 4, '伍': 5, '陆': 6, '柒': 7, '捌': 8, '玖': 9, '拾': 10,
274
+ # 特殊时长表达
275
+ '半个月': 15, '一个月': 30, '半年': 180, '一年': 365,
276
+ '半天': 0.5, '一天': 1, '两天': 2, '三天': 3, '四天': 4, '五天': 5, '六天': 6, '七天': 7,
277
+ '八天': 8, '九天': 9, '十天': 10, '半周': 3.5, '一周': 7, '两周': 14,
278
+ # 假期相关
279
+ '小长假': 3, '长假': 7, '十一': 7, '国庆': 7, '春节': 7, '五一': 3, '清明': 3,
280
+ '端午': 3, '中秋': 3, '元旦': 3, '暑假': 60, '寒假': 30, '周末': 2, '长周末': 3
281
+ }
282
 
283
  def extract(self, user_message: str) -> dict:
284
+ """使用纯正则表达式提取结构化信息 - 聚焦欧洲"""
285
 
286
  # 输入验证
287
  if not user_message or not isinstance(user_message, str):
288
  log.warning("⚠️ 收到无效的用户消息")
289
  return {}
290
 
291
+ if len(user_message.strip()) < 2:
292
+ log.warning("⚠️ 用户消息过短,跳过信息提取")
293
+ return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
+ log.info("🛠️ 使用正则表达式提取信息(聚焦欧洲)")
 
 
296
 
297
+ result = {}
 
298
 
299
+ # 1. 提取目的地信息
300
+ destination_info = self._extract_european_destination(user_message)
301
+ if destination_info:
302
+ result["destination"] = destination_info
303
 
304
+ # 2. 提取时长信息
305
+ duration_info = self._extract_duration(user_message)
306
+ if duration_info:
307
+ result["duration"] = duration_info
308
 
309
+ # 3. 提取预算信息
310
+ budget_info = self._extract_budget(user_message)
311
+ if budget_info:
312
+ result["budget"] = budget_info
 
 
313
 
314
+ log.info(f"📊 欧���城市正则提取结果: {result}")
315
+ return result
316
+
317
+ def _extract_european_destination(self, text: str) -> dict:
318
+ """提取欧洲目的地信息 - 专门针对欧洲城市"""
319
  result = {}
 
320
 
321
+ # 目的地提取模式(复用之前的完整模式)
322
+ destination_patterns = [
323
+ # 基本动词 + 地点
324
+ r'(\w+)', r'(\w+)', r'想去(\w+)', r'前往(\w+)', r'计划去(\w+)', r'打算去(\w+)',
325
+ r'准备去(\w+)', r'希望去(\w+)', r'考虑去(\w+)', r'决定去(\w+)', r'选择去(\w+)',
326
+ r'旅行(\w+)', r'游(\w+)', r'玩(\w+)', r'访问(\w+)', r'探索(\w+)', r'体验(\w+)',
327
+ r'出发去(\w+)', r'飞去(\w+)', r'飞往(\w+)', r'飞到(\w+)', r'坐车去(\w+)', r'开车去(\w+)',
328
+
329
+ # 目的地关键词
330
+ r'目的地[\s是::]*(\w+)', r'地方[\s是::]*(\w+)', r'城市[\s是::]*(\w+)',
331
+ r'国家[\s是::]*(\w+)', r'地区[\s是::]*(\w+)', r'景点[\s是::]*(\w+)',
332
+
333
+ # 在某地表达
334
+ r'在(\w+)旅游', r'在(\w+)游玩', r'在(\w+)度假', r'在(\w+)旅行', r'在(\w+)玩',
335
+ r'在(\w+)观光', r'在(\w+)游览', r'在(\w+)休假', r'在(\w+)放松', r'在(\w+)散心',
336
+
337
+ # 某地 + 行程/之旅
338
+ r'(\w+)之旅', r'(\w+)行程', r'(\w+)旅程', r'(\w+)游', r'(\w+)行', r'(\w+)之行',
339
+ r'(\w+)深度游', r'(\w+)自由行', r'(\w+)跟团游', r'(\w+)自驾游', r'(\w+)蜜月游',
340
+
341
+ # 包含"的"的表达
342
+ r'(\w+)的旅行', r'(\w+)的行程', r'(\w+)的攻略', r'(\w+)的景点', r'(\w+)的美食',
343
+ r'(\w+)的文化', r'(\w+)的历史', r'(\w+)的风景', r'(\w+)的特色', r'(\w+)的魅力',
344
+
345
+ # 特殊交通方式表达
346
+ r'飞(\w+)', r'坐船去(\w+)', r'坐火车去(\w+)', r'自驾去(\w+)', r'徒步去(\w+)',
347
+ r'骑行去(\w+)', r'背包去(\w+)', r'穷游去(\w+)',
348
+
349
+ # 旅行类型 + 地点
350
+ r'自由行(\w+)', r'跟团(\w+)', r'自驾(\w+)', r'蜜月(\w+)', r'毕业(\w+)',
351
+ r'亲子(\w+)', r'家庭(\w+)', r'情侣(\w+)', r'闺蜜(\w+)', r'独自(\w+)',
352
+
353
+ # 度假/休闲表达
354
+ r'度假去(\w+)', r'休闲去(\w+)', r'放松去(\w+)', r'散心去(\w+)', r'疗养去(\w+)',
355
+
356
+ # 其他变体
357
+ r'想要去(\w+)', r'渴望去(\w+)', r'梦想去(\w+)', r'向往(\w+)', r'憧憬(\w+)',
358
+ r'安排去(\w+)', r'规划去(\w+)', r'预定(\w+)', r'订(\w+)的票', r'买(\w+)机票'
359
+ ]
360
+
361
+ # 尝试所有模式
362
+ for pattern in destination_patterns:
363
+ matches = re.findall(pattern, text)
364
+ for match in matches:
365
+ city_name = match.strip()
366
+
367
+ # 首先检查别名映射
368
+ if city_name.lower() in self.european_city_aliases:
369
+ city_name = self.european_city_aliases[city_name.lower()]
370
+ elif city_name in self.european_city_aliases:
371
+ city_name = self.european_city_aliases[city_name]
372
+
373
+ # 验证是否为欧洲城市
374
+ if self._is_valid_european_city(city_name):
375
+ result["name"] = city_name
376
+ # 查找对应国家
377
+ if city_name in self.european_cities:
378
+ result["country"] = self.european_cities[city_name]
379
+ break
380
+ if result:
381
+ break
382
+
383
+ # 特殊处理:国家+城市的组合(欧洲专用)
384
+ european_country_city_patterns = [
385
+ r'(\w+)的(\w+)', r'(\w+)(\w+)市', r'(\w+)(\w+)府',
386
+ r'(\w+)(\w+)州', r'(\w+)(\w+)省', r'(\w+)(\w+)岛'
387
  ]
388
 
389
+ if not result:
390
+ for pattern in european_country_city_patterns:
391
+ matches = re.findall(pattern, text)
392
+ for country, city in matches:
393
+ # 检查是否是已知的欧洲国家-城市组合
394
+ if city in self.european_cities and self.european_cities[city] == country:
395
+ result["name"] = city
396
+ result["country"] = country
397
+ break
398
+ elif self._is_valid_european_country(country) and self._is_valid_european_city(city):
399
+ result["name"] = city
400
+ result["country"] = country
401
+ break
402
+ if result:
403
  break
404
 
405
+ return result
406
+
407
+ def _extract_duration(self, text: str) -> dict:
408
+ """提取时长信息 - 完整保留之前的实现"""
409
+ result = {}
410
+
411
+ # 天数提取模式 - 大幅扩展(保持原有完整实现)
412
  day_patterns = [
413
+ # 基本数字+天
414
+ r'(\d+)', r'(\d+)', r'(\d+)', r'(\d+)个天', r'(\d+)个日',
415
+
416
+ # 动词+天数
417
+ r'玩(\d+)天', r'住(\d+)天', r'呆(\d+)天', r'待(\d+)天', r'停留(\d+)天',
418
+ r'逗留(\d+)天', r'游(\d+)天', r'旅行(\d+)天', r'度假(\d+)天', r'休假(\d+)天',
419
+
420
+ # 行程相关
421
+ r'(\d+)天行程', r'(\d+)天旅程', r'(\d+)天旅行', r'(\d+)天游', r'(\d+)天之旅',
422
+ r'(\d+)天的行程', r'(\d+)天的旅程', r'(\d+)天的旅行', r'(\d+)天的假期',
423
+ r'行程(\d+)天', r'旅程(\d+)天', r'假期(\d+)天', r'休假(\d+)天',
424
+
425
+ # 时间修饰词
426
+ r'大概(\d+)天', r'约(\d+)天', r'差不多(\d+)天', r'左右(\d+)天', r'上下(\d+)天',
427
+ r'最多(\d+)天', r'最少(\d+)天', r'至少(\d+)天', r'不超过(\d+)天', r'超过(\d+)天',
428
+ r'将近(\d+)天', r'接近(\d+)天', r'快(\d+)天', r'足足(\d+)天', r'整整(\d+)天',
429
+
430
+ # 周相关
431
+ r'(\d+)周', r'(\d+)个周', r'(\d+)星期', r'(\d+)个星期', r'(\d+)礼拜', r'(\d+)个礼拜',
432
+ r'玩(\d+)周', r'住(\d+)周', r'呆(\d+)周', r'待(\d+)周', r'旅行(\d+)周',
433
+ r'(\d+)周的行程', r'(\d+)星期的旅行', r'(\d+)个礼拜的假期',
434
+
435
+ # 月相关
436
+ r'(\d+)月', r'(\d+)个月', r'(\d+)个月份',
437
+ r'玩(\d+)个月', r'住(\d+)个月', r'旅行(\d+)个月', r'度假(\d+)个月',
438
+ r'(\d+)个月的行程', r'(\d+)月的旅行', r'(\d+)个月的假期',
439
+
440
+ # 范围表达
441
+ r'(\d+)-(\d+)天', r'(\d+)到(\d+)天', r'(\d+)至(\d+)天', r'(\d+)~(\d+)天',
442
+ r'(\d+)天到(\d+)天', r'从(\d+)天到(\d+)天', r'介于(\d+)到(\d+)天',
443
+
444
+ # 中文数字
445
+ r'一天', r'二天', r'三天', r'四天', r'五天', r'六天', r'七天', r'八天', r'九天', r'十天',
446
+ r'两天', r'俩天', r'仨天', r'半天', r'一天半', r'两天半', r'三天半',
447
+ r'十一天', r'十二天', r'十三天', r'十四天', r'十五天', r'二十天', r'三十天',
448
+
449
+ # 特殊时长表达
450
+ r'周末', r'长周末', r'小长假', r'长假', r'黄金周', r'假期',
451
+ r'十一', r'国庆', r'春节', r'过年', r'五一', r'劳动节', r'清明', r'端午', r'中秋', r'元旦',
452
+ r'暑假', r'寒假', r'年假', r'蜜月', r'度蜜月',
453
+ r'短途', r'中途', r'长途', r'快闪', r'一日游', r'两日游', r'三日游', r'多日游'
454
  ]
455
 
456
+ # 尝试提取时长(完整保留原有逻辑)
457
  for pattern in day_patterns:
458
+ matches = re.findall(pattern, text)
459
+ for match in matches:
460
+ days = None
461
+
462
+ if isinstance(match, tuple):
463
+ # 范围表达,取平均值
464
+ try:
465
+ start_days = int(match[0])
466
+ end_days = int(match[1])
467
+ days = (start_days + end_days) / 2
468
+ except:
469
+ days = int(match[0]) if match[0].isdigit() else None
470
+ elif match.isdigit():
471
+ days = int(match)
472
+
473
+ # 处理单位转换
474
+ if '周' in pattern or '星期' in pattern or '礼拜' in pattern:
475
+ days *= 7
476
+ elif '月' in pattern:
477
+ days *= 30
478
+
479
+ # 处理中文数字和特殊表达
480
+ elif match in self.chinese_numbers:
481
+ days = self.chinese_numbers[match]
482
+
483
+ # 验证天数合理性
484
+ if days and 0.5 <= days <= 365:
485
+ result["days"] = int(days) if days >= 1 else days
486
+
487
+ # 添加描��信息
488
+ if days <= 1:
489
+ result["description"] = "当日往返"
490
+ elif days <= 3:
491
+ result["description"] = "短途旅行"
492
+ elif days <= 7:
493
+ result["description"] = "一周内旅行"
494
+ elif days <= 14:
495
+ result["description"] = "中长途旅行"
496
+ elif days <= 30:
497
+ result["description"] = "长途旅行"
498
+ else:
499
+ result["description"] = "超长途旅行"
500
+
501
+ # 保留原始匹配文本作为额外描述
502
+ if not isinstance(match, tuple) and not match.isdigit():
503
+ result["description"] = match
504
+
505
  break
506
+ if result:
507
+ break
508
 
509
+ return result
510
+
511
+ def _extract_budget(self, text: str) -> dict:
512
+ """提取预算信息 - 针对欧洲旅行优化"""
513
+ result = {}
514
+ text_lower = text.lower()
515
+
516
+ # 欧洲旅行常用货币的金额提取模式
517
+ amount_patterns = [
518
+ # === 欧元表达 - 优先级最高(欧洲旅行主要货币) ===
519
+ r'(\d+)欧元', r'(\d+)欧', r'€(\d+)', r'EUR(\d+)', r'eur(\d+)',
520
+ r'(\d+)euro', r'(\d+)Euro', r'(\d+)EURO',
521
+ r'(\d+\.?\d*)欧元', r'€(\d+\.?\d*)',
522
+ r'预算(\d+)欧', r'花费(\d+)欧', r'大概(\d+)欧', r'约(\d+)欧',
523
+
524
+ # === 人民币表达 ===
525
+ r'(\d+)元', r'(\d+)块', r'(\d+)块钱', r'(\d+)人民币', r'(\d+)rmb', r'(\d+)RMB',
526
+ r'¥(\d+)', r'¥(\d+)', r'CNY(\d+)', r'cny(\d+)',
527
+
528
+ # === 美元表达 ===
529
+ r'(\d+)美元', r'(\d+)美刀', r'(\d+)刀', r'\$(\d+)', r'USD(\d+)', r'usd(\d+)',
530
+ r'(\d+)dollar', r'(\d+)Dollar',
531
+
532
+ # === 英镑表达(英国旅行) ===
533
+ r'(\d+)英镑', r'(\d+)镑', r'£(\d+)', r'GBP(\d+)', r'gbp(\d+)',
534
+ r'(\d+)pound', r'(\d+)Pound',
535
+
536
+ # === 瑞士法郎(瑞士旅行) ===
537
+ r'(\d+)瑞士法郎', r'(\d+)法郎', r'CHF(\d+)', r'chf(\d+)',
538
+ r'(\d+)瑞郎', r'(\d+)swiss franc',
539
+
540
+ # === 预算相关表达 ===
541
+ r'预算(\d+)', r'预算是(\d+)', r'预算大概(\d+)', r'预算约(\d+)',
542
+ r'预算差不多(\d+)', r'预算在(\d+)', r'预算控制在(\d+)',
543
+ r'预算不超过(\d+)', r'预算最多(\d+)', r'预算最少(\d+)',
544
+
545
+ # === 花费相关表达 ===
546
+ r'花(\d+)', r'花费(\d+)', r'花销(\d+)', r'开销(\d+)', r'支出(\d+)',
547
+ r'费用(\d+)', r'成本(\d+)', r'总共(\d+)', r'一共(\d+)', r'总计(\d+)',
548
+
549
+ # === 万元表达 ===
550
+ r'(\d+)万', r'(\d+)万元', r'(\d+)万块', r'(\d+)万人民币',
551
+ r'(\d+)万欧', r'(\d+)万欧元', r'(\d+)万美元', r'(\d+)万英镑',
552
+ r'(\d+\.?\d*)万', r'(\d+\.?\d*)万元',
553
+
554
+ # === 千元表达 ===
555
+ r'(\d+)千', r'(\d+)千元', r'(\d+)千块', r'(\d+)k', r'(\d+)K',
556
+ r'(\d+)千欧', r'(\d+)千美元', r'(\d+)千英镑',
557
+
558
+ # === 范围表达 ===
559
+ r'(\d+)-(\d+)', r'(\d+)到(\d+)', r'(\d+)至(\d+)', r'(\d+)~(\d+)',
560
+ r'(\d+)左右', r'约(\d+)', r'差不多(\d+)', r'大概(\d+)',
561
+
562
+ # === 每人/每天相关 ===
563
+ r'每人(\d+)', r'人均(\d+)', r'单人(\d+)', r'每天(\d+)', r'日均(\d+)',
564
+
565
+ # === 中文数字金额 ===
566
+ r'一万', r'两万', r'三万', r'四万', r'五万', r'六万', r'七万', r'八万', r'九万', r'十万',
567
+ r'一千', r'两千', r'三千', r'四千', r'五千', r'六千', r'七千', r'八千', r'九千'
568
  ]
569
 
570
+ # 中文数字金额映射
571
+ chinese_money = {
572
+ '一万': 10000, '两万': 20000, '三万': 30000, '四万': 40000, '五万': 50000,
573
+ '六万': 60000, '七万': 70000, '八万': 80000, '九万': 90000, '十万': 100000,
574
+ '一千': 1000, '两千': 2000, '三千': 3000, '四千': 4000, '五千': 5000,
575
+ '六千': 6000, '七��': 7000, '八千': 8000, '九千': 9000
576
+ }
577
+
578
+ # 尝试提取金额
579
+ for pattern in amount_patterns:
580
+ matches = re.findall(pattern, text)
581
+ for match in matches:
582
+ amount = None
583
+ currency = "RMB" # 默认货币
584
+
585
+ if isinstance(match, tuple):
586
+ # 处理范围或多个捕获组
587
+ if len(match) == 2 and all(m.replace('.','').isdigit() for m in match if m):
588
+ try:
589
+ amount = (float(match[0]) + float(match[1])) / 2
590
+ except:
591
+ amount = float(match[0]) if match[0].replace('.','').isdigit() else float(match[1])
592
+ else:
593
+ for m in match:
594
+ if m and m.replace('.','').isdigit():
595
+ amount = float(m)
596
+ break
597
+ else:
598
+ if match in chinese_money:
599
+ amount = chinese_money[match]
600
+ elif match.replace('.','').isdigit():
601
+ amount = float(match)
602
+
603
+ if amount and amount > 0:
604
+ # 处理单位转换
605
+ if '万' in pattern:
606
+ amount *= 10000
607
+ elif '千' in pattern or 'k' in pattern.lower():
608
+ amount *= 1000
609
+
610
+ result["amount"] = int(amount)
611
+
612
+ # 确定货币类型(针对欧洲旅行优化)
613
+ if any(keyword in pattern for keyword in ['欧元', '欧', '€', 'eur', 'euro']):
614
+ result["currency"] = "EUR"
615
+ elif any(keyword in pattern for keyword in ['英镑', '镑', '£', 'gbp', 'pound']):
616
+ result["currency"] = "GBP"
617
+ elif any(keyword in pattern for keyword in ['瑞士法郎', '法郎', '瑞郎', 'chf', 'swiss franc']):
618
+ result["currency"] = "CHF"
619
+ elif any(keyword in pattern for keyword in ['美元', '美刀', '刀', 'usd', 'dollar']):
620
+ result["currency"] = "USD"
621
+ else:
622
+ result["currency"] = "RMB"
623
+ break
624
+ if result.get("amount"):
625
  break
626
 
627
+ # 预算类型识别 - 针对欧洲旅行优化
628
  budget_type_keywords = {
629
+ 'economy': [
630
+ # 经济相关
631
+ '经济', '便宜', '省钱', '实惠', '节省', '穷游', '学生', '青年',
632
+ '预算有限', '钱不多', '不贵', '划算', '性价比', '背包客',
633
+ '简单', '基础', '低成本', '节约', '省着花', '紧巴巴',
634
+ # 欧洲特色经济住宿
635
+ '青年旅社', '青旅', 'hostel', '民宿', 'airbnb', '客栈',
636
+ '多人间', '床位', '宿舍', '胶囊', 'capsule',
637
+ # 欧洲经济交通
638
+ '大巴', '长途汽车', 'flixbus', '火车', '二等座', '经济舱',
639
+ '欧洲通票', '青年票', '学生票', '团体票',
640
+ # 经济餐饮
641
+ '自己做饭', '超市', '便利店', '快餐', '街头小吃', '外卖',
642
+ '麦当劳', '汉堡王', 'kebab', 'döner'
643
+ ],
644
+ 'comfortable': [
645
+ # 舒适相关
646
+ '舒适', '中等', '适中', '一般', '标准', '普通', '正常', '常规',
647
+ '中档', '中级', '合理', '平均', '中间档次', '不高不低',
648
+ # 欧洲中档住宿
649
+ '三星', '四星', '酒店', 'hotel', '标间', '双人间', '大床房',
650
+ '民宿', 'apartment', '公寓', 'b&b', 'pension',
651
+ # 欧洲舒适交通
652
+ '火车', '一等座', '高铁', 'tgv', 'ice', '城际列车',
653
+ '租车', '自驾', '商务舱', '直飞',
654
+ # 中档餐饮
655
+ '餐厅', '当地菜', '特色菜', '中档餐厅', '酒吧', 'bistro'
656
+ ],
657
+ 'luxury': [
658
+ # 奢华相关
659
+ '豪华', '奢华', '高端', '顶级', '精品', '奢侈', '贵族',
660
+ '贵一点', '不差钱', '任性', '土豪', '有钱', '不在乎钱',
661
+ '高消费', '享受', '奢享', '尊贵', '至尊', 'VIP',
662
+ # 欧洲豪华住宿
663
+ '五星', '六星', '豪华酒店', 'luxury hotel', '度假村', 'resort',
664
+ '别墅', 'villa', '城堡', 'castle', '套房', 'suite', '总统套房',
665
+ '丽思卡���顿', '四季', '文华东方', '半岛', '香格里拉', '希尔顿',
666
+ 'ritz carlton', 'four seasons', 'mandarin oriental', 'peninsula',
667
+ # 豪华交通
668
+ '头等舱', '商务舱', '私人飞机', 'private jet', '豪车', '奔驰', '宝马',
669
+ '奥迪', '保时捷', '法拉利', '兰博基尼', 'mercedes', 'bmw', 'audi',
670
+ # 奢华服务
671
+ '私人导游', '管家服务', 'concierge', '司机', '专车', '包车',
672
+ '定制旅行', '私人订制', '一对一服务', 'vip通道',
673
+ # 高端餐饮
674
+ '米其林', 'michelin', '米其林三星', '米其林餐厅', '高档餐厅',
675
+ '法式大餐', '意式料理', '分子料理', '酒庄', 'wine tasting'
676
+ ]
677
  }
678
 
679
+ # 识别预算类型
680
  for budget_type, keywords in budget_type_keywords.items():
681
+ matched_keywords = [kw for kw in keywords if kw in text_lower]
682
+ if matched_keywords:
683
+ result["type"] = budget_type
684
+ result["description"] = matched_keywords[0]
 
685
  break
686
 
687
+ # 如果有金额但没有类型,根据金额和货币推断类型(欧洲标准)
688
+ if result.get("amount") and not result.get("type"):
689
+ amount = result["amount"]
690
+ currency = result.get("currency", "RMB")
691
+
692
+ # 根据欧洲旅行成本设置阈值
693
+ if currency == "EUR":
694
+ if amount < 50: # 每天50欧以下
695
+ result["type"] = "economy"
696
+ result["description"] = "经济预算"
697
+ elif amount < 150: # 每天50-150欧
698
+ result["type"] = "comfortable"
699
+ result["description"] = "舒适预算"
700
+ else: # 每天150欧以上
701
+ result["type"] = "luxury"
702
+ result["description"] = "豪华预算"
703
+ elif currency == "GBP":
704
+ if amount < 40: # 每天40英镑以下
705
+ result["type"] = "economy"
706
+ result["description"] = "经济预算"
707
+ elif amount < 120: # 每天40-120英镑
708
+ result["type"] = "comfortable"
709
+ result["description"] = "舒适预算"
710
+ else: # 每天120英镑以上
711
+ result["type"] = "luxury"
712
+ result["description"] = "豪华预算"
713
+ elif currency == "CHF":
714
+ if amount < 60: # 每天60瑞郎以下
715
+ result["type"] = "economy"
716
+ result["description"] = "经济预算"
717
+ elif amount < 180: # 每天60-180瑞郎
718
+ result["type"] = "comfortable"
719
+ result["description"] = "舒适预算"
720
+ else: # 每天180瑞郎以上
721
+ result["type"] = "luxury"
722
+ result["description"] = "豪华预算"
723
+ elif currency == "RMB":
724
+ if amount < 300: # 每天300元以下
725
+ result["type"] = "economy"
726
+ result["description"] = "经济预算"
727
+ elif amount < 800: # 每天300-800元
728
+ result["type"] = "comfortable"
729
+ result["description"] = "舒适预算"
730
+ else: # 每天800元以上
731
+ result["type"] = "luxury"
732
+ result["description"] = "豪华预算"
733
+ elif currency == "USD":
734
+ if amount < 60: # 每天60美元以下
735
+ result["type"] = "economy"
736
+ result["description"] = "经济预算"
737
+ elif amount < 150: # 每天60-150美元
738
+ result["type"] = "comfortable"
739
+ result["description"] = "舒适预算"
740
+ else: # 每天150美元以上
741
+ result["type"] = "luxury"
742
+ result["description"] = "豪华预算"
743
+
744
  return result
745
 
746
+ def _is_valid_european_city(self, name: str) -> bool:
747
+ """验证是否为有效的欧洲城市名称"""
748
+ if not name or len(name) < 1:
749
+ return False
750
+
751
+ # 排除数字和常见的非地名词汇
752
+ invalid_words = [
753
+ # 数字和时间
754
+ '天', '日', '号', '月', '年', '周', '小时', '分钟', '秒',
755
+ # 金钱相关
756
+ '元', '块', '钱', '万', '千', '百', '预算', '费用', '成本', '价格',
757
+ '美元', '欧元', '英镑', '瑞郎', '法郎',
758
+ # 旅行相关动词
759
+ '花', '费', '旅行', '旅游', '行程', '计划', '想', '去', '到', '的',
760
+ '在', '是', '个', '了', '和', '与', '或', '但', '而', '就', '都',
761
+ # 其他常见词
762
+ '人', '我', '你', '他', '她', '们', '这', '那', '什么', '怎么',
763
+ '好', '很', '非常', '特别', '大', '小', '新', '老'
764
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
 
766
+ if name.isdigit() or name in invalid_words:
767
+ return False
768
 
769
+ # 检查是否包含数字(地名通常不包含数字)
770
+ if any(char.isdigit() for char in name):
771
+ return False
 
772
 
773
+ # 检查是否在欧洲城市列表中
774
+ if name in self.european_cities:
775
+ return True
776
 
777
+ # 检查是否在别名列表中
778
+ if name in self.european_city_aliases or name.lower() in self.european_city_aliases:
779
+ return True
780
+
781
+ # 城市名称长度检查
782
+ if len(name) > 15:
783
+ return False
784
+
785
+ # 检查是否包含特殊字符
786
+ if any(char in name for char in '!@#$%^&*()+={}[]|\\:";\'<>?,.`~'):
787
+ return False
788
+
789
+ return False # 只接受明确在欧洲城市列表中的城市
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
790
 
791
+ def _is_valid_european_country(self, name: str) -> bool:
792
+ """验证是否为有效的欧洲国家名称"""
793
+ if not name or len(name) < 2:
794
+ return False
795
+
796
+ # 欧洲国家列表
797
+ european_countries = {
798
+ # 西欧
799
+ '法国', '德国', '英国', '荷兰', '比利时', '卢森堡',
800
+ # 南欧
801
+ '意大利', '西班牙', '葡萄牙', '希腊', '马耳他', '塞浦路斯',
802
+ # 中欧
803
+ '奥地利', '瑞士', '捷克', '斯洛伐克', '匈牙利', '波兰', '斯洛文尼亚',
804
+ # 北欧
805
+ '瑞典', '挪威', '丹麦', '芬兰', '冰岛',
806
+ # 东欧
807
+ '俄罗斯', '乌克兰', '白俄罗斯', '立陶宛', '拉脱维亚', '爱沙尼亚', '摩尔多瓦',
808
+ # 巴尔干半岛
809
+ '克罗地亚', '塞尔维亚', '波黑', '黑山', '北马其顿', '阿尔巴尼亚',
810
+ '保加利亚', '罗马尼亚', '土耳其'
811
+ }
812
+
813
+ return name in european_countries
814
 
815
+ # 保持向后兼容的验证方法
816
+ def _validate_and_normalize(self, data: dict) -> dict:
817
+ """验证和规范化数据"""
818
+ return data
modules/knowledge_base.py CHANGED
@@ -6,24 +6,283 @@ from utils.logger import log
6
  class KnowledgeBase:
7
  def __init__(self, file_path: Path = Path("./config/general_travelplan.json")):
8
  self.knowledge = []
9
- try:
10
- with open(file_path, 'r', encoding='utf-8') as f:
11
- self.knowledge = json.load(f).get('clean_knowledge', [])
12
- log.info(f"✅ 知识库加载完成")
13
- except Exception as e:
14
- log.error(f" 知识库加载失败: {e}", exc_info=True)
15
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def search(self, query: str) -> list:
 
18
  relevant_knowledge = []
19
  query_lower = query.lower()
20
 
21
- for item in self.knowledge:
22
- # 简单实现:如果查询的城市在知识库的目的地中,则返回该知识
23
- destinations = item.get('knowledge', {}).get('travel_knowledge', {}).get('destination_info', {}).get('primary_destinations', [])
24
- for dest in destinations:
25
- if dest.lower() in query_lower:
26
- if item not in relevant_knowledge:
27
- relevant_knowledge.append(item)
28
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  return relevant_knowledge
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  class KnowledgeBase:
7
  def __init__(self, file_path: Path = Path("./config/general_travelplan.json")):
8
  self.knowledge = []
9
+ self.city_index = {} # 城市索引
10
+ self.country_index = {} # 国家索引
11
+ self.region_index = {} # 地区索引
12
+ with open(file_path, 'r', encoding='utf-8') as f:
13
+ self.knowledge = json.load(f).get('clean_knowledge', [])
14
+ log.info(f" 知识库加载完成")
15
+
16
+
17
+ def _build_indexes(self):
18
+ """建立快速检索索引"""
19
+ for idx, item in enumerate(self.knowledge):
20
+ knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
21
+ if not knowledge:
22
+ continue
23
+
24
+ dest_info = knowledge.get('destination_info', {})
25
+
26
+ # 建立城市索引
27
+ primary_destinations = dest_info.get('primary_destinations', [])
28
+ for city in primary_destinations:
29
+ if city not in self.city_index:
30
+ self.city_index[city] = []
31
+ self.city_index[city].append(idx)
32
+
33
+ # 建立国家索引
34
+ countries = dest_info.get('countries', [])
35
+ for country in countries:
36
+ if country not in self.country_index:
37
+ self.country_index[country] = []
38
+ self.country_index[country].append(idx)
39
+
40
+ # 建立地区索引
41
+ region_type = dest_info.get('region_type', '')
42
+ if region_type:
43
+ if region_type not in self.region_index:
44
+ self.region_index[region_type] = []
45
+ self.region_index[region_type].append(idx)
46
 
47
  def search(self, query: str) -> list:
48
+ """搜索知识库中的相关信息"""
49
  relevant_knowledge = []
50
  query_lower = query.lower()
51
 
52
+ log.info(f"🔍 在知识库中搜索: '{query}'")
53
+
54
+ # 1. 直接城市匹配
55
+ if query in self.city_index:
56
+ for idx in self.city_index[query]:
57
+ if self.knowledge[idx] not in relevant_knowledge:
58
+ relevant_knowledge.append(self.knowledge[idx])
59
+ log.info(f"✅ 通过城市直接匹配找到 {len(self.city_index[query])} 条记录")
60
+
61
+ # 2. 国家匹配
62
+ matching_country = self._find_country_for_city(query)
63
+ if matching_country and matching_country in self.country_index:
64
+ for idx in self.country_index[matching_country]:
65
+ if self.knowledge[idx] not in relevant_knowledge:
66
+ relevant_knowledge.append(self.knowledge[idx])
67
+ log.info(f"✅ 通过国家匹配({matching_country})找到额外记录")
68
+
69
+ # 3. 地区匹配
70
+ matching_region = self._find_region_for_city(query)
71
+ if matching_region and matching_region in self.region_index:
72
+ for idx in self.region_index[matching_region]:
73
+ if self.knowledge[idx] not in relevant_knowledge:
74
+ relevant_knowledge.append(self.knowledge[idx])
75
+ log.info(f"✅ 通过地区匹配({matching_region})找到额外记录")
76
+
77
+ # 4. 模糊匹配
78
+ if not relevant_knowledge:
79
+ log.info("🔍 尝试模糊匹配...")
80
+ for item in self.knowledge:
81
+ knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
82
+ dest_info = knowledge.get('destination_info', {})
83
+
84
+ # 检查所有目的地
85
+ primary_destinations = dest_info.get('primary_destinations', [])
86
+ for dest in primary_destinations:
87
+ if query_lower in dest.lower() or dest.lower() in query_lower:
88
+ if item not in relevant_knowledge:
89
+ relevant_knowledge.append(item)
90
+ log.info(f"✅ 模糊匹配找到: {dest}")
91
+ break
92
+
93
+ log.info(f"📊 搜索完成,共找到 {len(relevant_knowledge)} 条相关记录")
94
  return relevant_knowledge
95
+
96
+ def _find_country_for_city(self, city_name: str) -> str:
97
+ """根据城市名查找所属国家"""
98
+ city_country_mapping = {
99
+ # 中欧
100
+ "布拉格": "捷克", "布尔诺": "捷克", "库特纳霍拉": "捷克",
101
+ "维也纳": "奥地利", "萨尔茨堡": "奥地利", "哈尔施塔特": "奥地利", "巴德伊舍": "奥地利",
102
+ "布达佩斯": "匈牙利", "德布勒森": "匈牙利", "圣安德烈": "匈牙利",
103
+ "布拉迪斯拉发": "斯洛伐克",
104
+
105
+ # 西欧
106
+ "巴黎": "法国", "里昂": "法国", "尼斯": "法国", "马赛": "法国",
107
+ "柏林": "德国", "慕尼黑": "德国", "汉堡": "德国", "科隆": "德国", "法兰克福": "德国",
108
+ "阿姆斯特丹": "荷兰", "鹿特丹": "荷兰", "海牙": "荷兰",
109
+ "布鲁塞尔": "比利时", "安特卫普": "比利时", "布吕赫": "比利时",
110
+ "卢森堡市": "卢森堡",
111
+ "苏黎世": "瑞士", "日内瓦": "瑞士", "因特拉肯": "瑞士",
112
+
113
+ # 南欧
114
+ "罗马": "意大利", "米兰": "意大利", "威尼斯": "意大利", "佛罗伦萨": "意大利",
115
+ "马德里": "西班牙", "巴塞罗那": "西班牙", "塞维利亚": "西班牙",
116
+ "里斯本": "葡萄牙", "波尔图": "葡萄牙",
117
+ "雅典": "希腊", "圣托里尼": "希腊", "米科诺斯": "希腊",
118
+
119
+ # 北欧
120
+ "斯德哥尔摩": "瑞典", "哥德堡": "瑞典",
121
+ "奥斯陆": "挪威", "卑尔根": "挪威",
122
+ "哥本哈根": "丹麦", "奥胡斯": "丹麦",
123
+ "赫尔辛基": "芬兰", "坦佩雷": "芬兰",
124
+ "雷克雅未克": "冰岛",
125
+
126
+ # 英国
127
+ "伦敦": "英国", "爱丁堡": "英国", "曼彻斯特": "英国",
128
+ }
129
+ return city_country_mapping.get(city_name, "")
130
+
131
+ def _find_region_for_city(self, city_name: str) -> str:
132
+ """根据城市名查找所属地区"""
133
+ city_region_mapping = {
134
+ # 中欧
135
+ "布拉格": "中欧", "布尔诺": "中欧", "库特纳霍拉": "中欧",
136
+ "维也纳": "中欧", "萨尔茨堡": "中欧", "哈尔施塔特": "中欧", "巴德伊舍": "中欧",
137
+ "布达佩斯": "中欧", "德布勒森": "中欧", "圣安德烈": "中欧",
138
+ "布拉迪斯拉发": "中欧",
139
+
140
+ # 西欧
141
+ "巴黎": "西欧", "里昂": "西欧", "尼斯": "西欧",
142
+ "柏林": "西欧", "慕尼黑": "西欧", "汉堡": "西欧",
143
+ "阿姆斯特丹": "西欧", "鹿特丹": "西欧",
144
+ "布鲁塞尔": "西欧", "安特卫普": "西欧",
145
+ "苏黎世": "西欧", "日内瓦": "西欧",
146
+
147
+ # 东欧(按你的知识库分类)
148
+ "华沙": "东欧", "克拉科夫": "东欧",
149
+ "莫斯科": "东欧", "圣彼得堡": "东欧",
150
+
151
+ # 南欧
152
+ "罗马": "南欧", "米兰": "南欧", "威尼斯": "南欧",
153
+ "马德里": "南欧", "巴塞罗那": "南欧",
154
+ "里斯本": "南欧", "波尔图": "南欧",
155
+ "雅典": "南欧", "圣托里尼": "南欧",
156
+
157
+ # 北欧
158
+ "斯德哥尔摩": "北欧", "哥德堡": "北欧",
159
+ "奥斯陆": "北欧", "卑尔根": "北欧",
160
+ "哥本哈根": "北欧", "赫尔辛基": "北欧",
161
+ "雷克雅未克": "北欧",
162
+ }
163
+ return city_region_mapping.get(city_name, "")
164
+
165
+ def get_knowledge_by_destination(self, destination: str) -> dict:
166
+ """根据目的地获取结构化的知识信息"""
167
+ relevant_items = self.search(destination)
168
+
169
+ if not relevant_items:
170
+ log.warning(f"⚠️ 未找到关于 '{destination}' 的知识")
171
+ return {}
172
+
173
+ # 合并所有相关知识
174
+ merged_knowledge = {
175
+ "destination_info": {},
176
+ "budget_analysis": {},
177
+ "detailed_itinerary": [],
178
+ "professional_insights": {}
179
+ }
180
+
181
+ for item in relevant_items:
182
+ knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
183
+
184
+ # 合并目的地信息
185
+ if 'destination_info' in knowledge:
186
+ dest_info = knowledge['destination_info']
187
+ merged_knowledge['destination_info'].update(dest_info)
188
+
189
+ # 使用最详细的预算分析
190
+ if 'budget_analysis' in knowledge:
191
+ if not merged_knowledge['budget_analysis'] or len(knowledge['budget_analysis']) > len(merged_knowledge['budget_analysis']):
192
+ merged_knowledge['budget_analysis'] = knowledge['budget_analysis']
193
+
194
+ # 合并行程建议
195
+ if 'detailed_itinerary' in knowledge:
196
+ merged_knowledge['detailed_itinerary'].extend(knowledge['detailed_itinerary'])
197
+
198
+ # 合并专业洞察
199
+ if 'professional_insights' in knowledge:
200
+ for key, value in knowledge['professional_insights'].items():
201
+ if key not in merged_knowledge['professional_insights']:
202
+ merged_knowledge['professional_insights'][key] = value
203
+ elif isinstance(value, list):
204
+ # 合并列表,去重
205
+ existing = merged_knowledge['professional_insights'][key]
206
+ if isinstance(existing, list):
207
+ merged_knowledge['professional_insights'][key] = list(set(existing + value))
208
+
209
+ # 去重行程建议
210
+ if merged_knowledge['detailed_itinerary']:
211
+ seen_days = set()
212
+ unique_itinerary = []
213
+ for day_plan in merged_knowledge['detailed_itinerary']:
214
+ day_key = (day_plan.get('day_number', 0), day_plan.get('location', ''))
215
+ if day_key not in seen_days:
216
+ seen_days.add(day_key)
217
+ unique_itinerary.append(day_plan)
218
+ merged_knowledge['detailed_itinerary'] = unique_itinerary
219
+
220
+ log.info(f"📚 为 '{destination}' 合并了 {len(relevant_items)} 条知识记录")
221
+ return merged_knowledge
222
+
223
+ def get_similar_destinations(self, destination: str, limit: int = 5) -> list:
224
+ """获取相似的目的地推荐"""
225
+ similar_destinations = []
226
+
227
+ # 找到目标城市的国家和地区
228
+ target_country = self._find_country_for_city(destination)
229
+ target_region = self._find_region_for_city(destination)
230
+
231
+ # 优先推荐同国家的其他城市
232
+ if target_country and target_country in self.country_index:
233
+ for idx in self.country_index[target_country]:
234
+ knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
235
+ dest_info = knowledge.get('destination_info', {})
236
+ destinations = dest_info.get('primary_destinations', [])
237
+
238
+ for dest in destinations:
239
+ if dest != destination and dest not in similar_destinations:
240
+ similar_destinations.append(dest)
241
+ if len(similar_destinations) >= limit:
242
+ return similar_destinations
243
+
244
+ # 然后推荐同地区的城市
245
+ if target_region and target_region in self.region_index and len(similar_destinations) < limit:
246
+ for idx in self.region_index[target_region]:
247
+ knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
248
+ dest_info = knowledge.get('destination_info', {})
249
+ destinations = dest_info.get('primary_destinations', [])
250
+
251
+ for dest in destinations:
252
+ if dest != destination and dest not in similar_destinations:
253
+ similar_destinations.append(dest)
254
+ if len(similar_destinations) >= limit:
255
+ return similar_destinations
256
+
257
+ return similar_destinations
258
+
259
+ def get_statistics(self) -> dict:
260
+ """获取知识库统计信息"""
261
+ stats = {
262
+ "total_records": len(self.knowledge),
263
+ "cities_covered": len(self.city_index),
264
+ "countries_covered": len(self.country_index),
265
+ "regions_covered": len(self.region_index),
266
+ "cities_by_region": {},
267
+ "popular_cities": []
268
+ }
269
+
270
+ # 按地区统计城市数量
271
+ for region, indices in self.region_index.items():
272
+ cities_in_region = set()
273
+ for idx in indices:
274
+ knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
275
+ dest_info = knowledge.get('destination_info', {})
276
+ cities_in_region.update(dest_info.get('primary_destinations', []))
277
+ stats["cities_by_region"][region] = len(cities_in_region)
278
+
279
+ # 找出出现频率最高的城市
280
+ city_frequency = {}
281
+ for city, indices in self.city_index.items():
282
+ city_frequency[city] = len(indices)
283
+
284
+ # 按出现频率排序
285
+ sorted_cities = sorted(city_frequency.items(), key=lambda x: x[1], reverse=True)
286
+ stats["popular_cities"] = sorted_cities[:10] # 前10个最热门城市
287
+
288
+ return stats
modules/response_generator.py CHANGED
@@ -1,4 +1,5 @@
1
  import json
 
2
  from .ai_model import AIModel
3
  from .knowledge_base import KnowledgeBase
4
  from utils.logger import log
@@ -8,6 +9,7 @@ class ResponseGenerator:
8
  self.ai_model = ai_model
9
  self.kb = knowledge_base
10
  self.personas = self._load_personas()
 
11
 
12
  def _load_personas(self):
13
  personas_path = "./config/personas.json"
@@ -17,29 +19,76 @@ class ResponseGenerator:
17
  log.info(f"✅ 成功加载 {len(data.get('personas', {}))} 个persona配置。")
18
  return data.get('personas', {})
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def _get_current_persona_config(self, session_state: dict) -> dict:
21
- persona_key = session_state.get("persona", {}).get("key") if isinstance(session_state.get("persona"), dict) else None
 
 
 
 
 
 
22
  return self.personas.get(persona_key, {
23
- "name": "旅行助手", "style": "中立",
24
- "tone": ["专业", "友好"], "prompt_template": self._build_generic_prompt(session_state)
 
 
25
  })
26
 
27
  def generate(self, user_message: str, session_state: dict, extracted_info: dict) -> str:
 
28
  try:
29
  response_parts = []
30
- acknowledgement = self._generate_acknowledgement(extracted_info, session_state)
 
 
31
  if acknowledgement:
32
  response_parts.append(acknowledgement)
33
 
34
- next_question = self._get_next_question(session_state)
 
35
  if next_question:
36
  if response_parts:
37
- response_parts.append("那么," + next_question[0].lower() + next_question[1:])
 
 
38
  else:
39
  response_parts.append(next_question)
40
 
 
41
  if not next_question:
42
- plan = self._generate_persona_enhanced_plan(user_message, session_state)
 
43
  if response_parts:
44
  response_parts.append("\n\n" + plan)
45
  else:
@@ -51,128 +100,442 @@ class ResponseGenerator:
51
  log.error(f"❌ 响应生成失败: {e}", exc_info=True)
52
  return "抱歉,我在处理您的请求时遇到了问题,请稍后再试。"
53
 
54
- def _get_next_question(self, session_state: dict) -> str:
55
- """根据Persona风格,健壮地获取下一个问题,绝不崩溃。"""
56
- persona_config = self._get_current_persona_config(session_state)
57
- persona_style = persona_config.get("style", "中立")
58
-
59
- # --- 核心修复:借鉴session_manager的健壮性检查逻辑 ---
60
- destination_info = session_state.get("destination") # 可能为 None
61
- duration_info = session_state.get("duration") # 可能为 None
62
- budget_info = session_state.get("budget") # 可能为 None
63
-
64
- # 只有当info是字典时才尝试获取深层数据,否则使用安全默认值
65
- destination_name = destination_info.get('name', '那里') if isinstance(destination_info, dict) else '那里'
66
- days = duration_info.get('days', '几') if isinstance(duration_info, dict) else '几'
67
-
68
- # 依次检查核心信息是否缺失
69
- if not destination_info:
70
- if "社交" in persona_style: return "哈喽!准备好去哪里嗨皮了吗?告诉我想去哪个城市,我们来一场刷爆朋友圈的旅行吧!✨"
71
- if "体验" in persona_style: return "你好,旅行者。为了开启一段独特的深度体验,你心中的目的地是哪里?"
72
- return "您好!为了高效地开始规划,请首先明确您的目的地城市。"
73
-
74
- if not duration_info:
75
- if "社交" in persona_style: return f"{destination_name}超棒的!打算和小伙伴们在那玩几天呀?"
76
- if "体验" in persona_style: return f"感知到了,{destination_name}。你希望在这片土地上沉浸多少个日夜?"
77
- return f"目的地已锁定:{destination_name}。请提供计划的旅行天数。"
78
-
79
- if not budget_info:
80
- if "社交" in persona_style: return f"太棒啦,{days}天的行程!这次出去玩,预算大概是多少呀?是经济实惠,还是想来个轻奢体验呢?"
81
- if "体验" in persona_style: return f"{days}天的探索之旅,听起来很不错。对于这次旅行的开销,你有什么样的构想?"
82
- return f"已记录:行程共{days}天。请明确您的预算范围(例如:经济型、舒适型,或具体金额)。"
83
-
84
- return "" # 所有信息已收集完毕
85
-
86
- def _generate_acknowledgement(self, extracted_info: dict, session_state: dict) -> str:
87
- # ... (此部分及以下方法与上一版健壮版本相同,无需修改) ...
88
- if not extracted_info: return ""
89
- persona_config = self._get_current_persona_config(session_state)
90
- persona_style = persona_config.get("style", "")
91
- ack_parts = []
92
- if "destination" in extracted_info:
93
- name = extracted_info['destination'].get('name', '目的地')
94
- if "社交" in persona_style: ack_parts.append(f"目的地锁定{name}!已经开始期待啦!💖")
95
- elif "体验" in persona_style: ack_parts.append(f"我感知到了,{name},一个充满故事的地方")
96
- else: ack_parts.append(f"确认:目的地已记录为{name}")
97
- if "duration" in extracted_info:
98
- days = extracted_info['duration'].get('days', '几')
99
- if "社交" in persona_style: ack_parts.append(f"玩{days}天,时间超充裕的")
100
- elif "体验" in persona_style: ack_parts.append(f"{days}个日夜,足够深入探索了")
101
- else: ack_parts.append(f"行程时长已设定为{days}天")
102
- if "budget" in extracted_info:
103
- budget_desc = self._format_budget_info(extracted_info.get('budget'))
104
- if "社交" in persona_style: ack_parts.append(f"{budget_desc}的预算,妥妥的")
105
- elif "体验" in persona_style: ack_parts.append(f"了解,{budget_desc}的投入,追求的是价值而非价格")
106
- else: ack_parts.append(f"预算已明确为{budget_desc}")
107
- return ",".join(ack_parts) + "。" if ack_parts else ""
108
-
109
- def _format_budget_info(self, budget: dict) -> str:
110
- if not budget or not isinstance(budget, dict): return "未指定"
111
- if budget.get('amount') and budget.get('currency'): return f"{budget['amount']}{budget['currency']}"
112
- if budget.get('description'): return budget['description']
113
- if budget.get('type'):
114
- type_map = {'economy': '经济型', 'comfortable': '舒适型', 'luxury': '豪华型'}
115
- return type_map.get(budget['type'], budget['type'])
116
- return "未指定"
117
-
118
- # --- 以下方法保持不变 ---
119
- def _generate_persona_enhanced_plan(self, user_message: str, session_state: dict) -> str:
120
- persona_config = self._get_current_persona_config(session_state)
121
- if self.ai_model.is_available():
122
- prompt = self._build_prompt(session_state, persona_config)
123
- log.info(f"🚀 使用Persona '{persona_config.get('name')}' 构建的Prompt进行生成。")
124
- return self.ai_model.generate(user_message, prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  else:
126
- log.warning("⚠️ AI模型不可用,生成备用计划。")
127
- return self._generate_fallback_plan(session_state)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- def _build_prompt(self, session_state: dict, persona_config: dict) -> str:
130
- template = persona_config.get('prompt_template')
131
- if not template:
132
- return self._build_generic_prompt(session_state)
 
 
133
  try:
134
- format_args = {
135
- "location": self._safe_get_session_value(session_state, "destination", "name", "未指定"),
136
- "days": self._safe_get_session_value(session_state, "duration", "days", "未指定"),
137
- "budget": self._format_budget_info(session_state.get("budget")),
138
- "date": session_state.get('date', '近期'),
139
- "user_tags": ", ".join(session_state.get('user_tags', [])),
140
- "commercial_preference": session_state.get('commercial_preference', '适中'),
141
- "group_description": session_state.get('group_description', '个人'),
142
- "tags": ", ".join(session_state.get('tags', []))
143
- }
144
- return template.format(**format_args)
145
- except KeyError as e:
146
- log.warning(f"Persona模板格式化失败,缺少键: {e}。将使用通用模板。")
147
- return self._build_generic_prompt(session_state)
148
-
149
- def _safe_get_session_value(self, session, key1, key2, default):
150
- """安全地从嵌套的session字典中取值"""
151
- level1 = session.get(key1)
152
- if isinstance(level1, dict):
153
- return level1.get(key2, default)
154
- return default
155
-
156
- def _build_generic_prompt(self, session_state: dict) -> str:
157
- location = self._safe_get_session_value(session_state, "destination", "name", "目的地")
158
- days = self._safe_get_session_value(session_state, "duration", "days", "几")
159
- budget_info = self._format_budget_info(session_state.get("budget"))
160
- return f"你是一个专业的旅游助手。请为用户生成一个详细的旅行计划。\n【基本信息】\n- 目的地:{location}\n- 旅行天数:{days}天\n- 预算:{budget_info}\n【要求】\n- 提供具体的景点推荐和路线安排\n- 包含交通、住宿、餐饮建议\n- 确保所有推荐都在预算范围内\n- 提供实用的旅行贴士\n\n请生成一份实用、详细的旅行计划。"
161
 
162
- def _generate_fallback_plan(self, session_state: dict) -> str:
 
 
 
 
 
163
  persona_config = self._get_current_persona_config(session_state)
164
- location = self._safe_get_session_value(session_state, "destination", "name", "目的地")
165
- days = self._safe_get_session_value(session_state, "duration", "days", "几")
166
- budget_info = self._format_budget_info(session_state.get("budget"))
167
- persona_name = persona_config.get('name', '旅行者')
168
- plan = f"为您推荐 {location} {days}天旅行计划:\n\n"
169
- plan += f"👤 旅行者类型:{persona_name}\n"
170
- plan += f"💰 预算范围:{budget_info}\n\n"
171
- highlights = self._safe_get_session_value(session_state, "destination", "highlights", "精彩景点等待您的探索")
172
- plan += f"🎯 主要景点:{highlights}\n\n"
173
- persona_key = self._safe_get_session_value(session_state, "persona", "key", None)
174
- if persona_key == 'planner': plan += "📋 建议制定详细的每日行程表。\n"
175
- elif persona_key == 'social': plan += "📸 推荐寻找热门打卡点!\n"
176
- elif persona_key == 'experiential': plan += "🎨 建议深入当地社区,寻找地道体验。\n"
177
- plan += "\n如需更详细的个性化规划,请告诉我您的具体需求!"
178
- return plan
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import json
2
+ import random
3
  from .ai_model import AIModel
4
  from .knowledge_base import KnowledgeBase
5
  from utils.logger import log
 
9
  self.ai_model = ai_model
10
  self.kb = knowledge_base
11
  self.personas = self._load_personas()
12
+ self._init_response_templates()
13
 
14
  def _load_personas(self):
15
  personas_path = "./config/personas.json"
 
19
  log.info(f"✅ 成功加载 {len(data.get('personas', {}))} 个persona配置。")
20
  return data.get('personas', {})
21
 
22
+ def _init_response_templates(self):
23
+ """初始化各种动态回复模板"""
24
+
25
+ # 欧洲城市特色描述 (保留原有)
26
+ self.city_descriptions = {
27
+ "巴黎": ["浪漫之都", "艺术之城", "时尚之都", "光影流转的塞纳河畔", "充满香槟气息的花都"],
28
+ "罗马": ["永恒之城", "历史的活化石", "每块石头都有故事", "古典与现代交融的奇迹", "凯撒大帝走过的土地"],
29
+ "伦敦": ["绅士的故乡", "雾都传奇", "文艺复兴的摇篮", "泰晤士河的守护者", "莎士比亚笔下的世界"],
30
+ "维也纳": ["音乐之都", "华尔兹的发源地", "莫扎特的灵感之地", "咖啡文化的天堂", "皇室优雅的化身"],
31
+ "布拉格": ["千塔之城", "中世纪的童话", "波西米亚的浪漫", "查理桥上的传奇", "啤酒花香弥漫的古城"],
32
+ "布达佩斯": ["多瑙河明珠", "东欧巴黎", "温泉之都", "建筑艺术的博物馆", "匈牙利王冠上的明珠"],
33
+ "萨尔茨堡": ["音乐神童的故乡", "《音乐之声》的拍摄地", "阿尔卑斯山下的明珠", "莫扎特的诞生地", "巴洛克建筑的典范"],
34
+ "哈尔施塔特": ["世界最美小镇", "湖光山色的仙境", "阿尔卑斯山的秘境", "明信片上的童话", "奥地利的瑰宝"],
35
+ }
36
+
37
+ # 保留原有的问候语和确认模板 (简化以节省空间)
38
+ self.greetings = {
39
+ "social": [
40
+ "哈喽!准备开启一场说走就走的欧洲之旅吗?✨",
41
+ "嗨呀!听说有人要去欧洲拍美照啦?📸",
42
+ ],
43
+ "experiential": [
44
+ "你好,旅行者。欧洲的古老土地正在召唤着你...",
45
+ "感知到了一颗渴望探索的心。欧洲有太多故事等你去发现。",
46
+ ],
47
+ "planner": [
48
+ "您好!让我来帮您规划一次完美的欧洲之旅。",
49
+ "欧洲旅行规划专家上线!准备为您定制专属行程。",
50
+ ]
51
+ }
52
+
53
  def _get_current_persona_config(self, session_state: dict) -> dict:
54
+ """获取当前persona配置"""
55
+ persona_info = session_state.get("persona", {})
56
+ if isinstance(persona_info, dict):
57
+ persona_key = persona_info.get("key")
58
+ else:
59
+ persona_key = None
60
+
61
  return self.personas.get(persona_key, {
62
+ "name": "旅行助手",
63
+ "style": "中立",
64
+ "tone": ["专业", "友好"],
65
+ "prompt_template": self._build_generic_prompt(session_state)
66
  })
67
 
68
  def generate(self, user_message: str, session_state: dict, extracted_info: dict) -> str:
69
+ """生成融合知识库的智能回复"""
70
  try:
71
  response_parts = []
72
+
73
+ # 1. 生成确认信息(更生动)
74
+ acknowledgement = self._generate_vivid_acknowledgement(extracted_info, session_state)
75
  if acknowledgement:
76
  response_parts.append(acknowledgement)
77
 
78
+ # 2. 检查是否需要询问下一个信息
79
+ next_question = self._get_dynamic_next_question(session_state)
80
  if next_question:
81
  if response_parts:
82
+ connectors = ["那么,", "接下来,", "好的,", ""]
83
+ connector = random.choice(connectors)
84
+ response_parts.append(connector + next_question)
85
  else:
86
  response_parts.append(next_question)
87
 
88
+ # 3. 如果所有信息收集完毕,生成知识库增强的旅行计划
89
  if not next_question:
90
+ plan = self._generate_knowledge_enhanced_plan(user_message, session_state)
91
+
92
  if response_parts:
93
  response_parts.append("\n\n" + plan)
94
  else:
 
100
  log.error(f"❌ 响应生成失败: {e}", exc_info=True)
101
  return "抱歉,我在处理您的请求时遇到了问题,请稍后再试。"
102
 
103
+ def _generate_knowledge_enhanced_plan(self, user_message: str, session_state: dict) -> str:
104
+ """生成融合知识库信息的旅行计划"""
105
+
106
+ # 1. 获取目的地信息
107
+ destination_name = self._get_destination_name(session_state)
108
+ days = int(self._get_duration_days(session_state))
109
+ budget_info = self._format_budget_info(session_state.get("budget"))
110
+
111
+ log.info(f"🔍 开始搜索知识库中关于 '{destination_name}' 的信息...")
112
+
113
+ # 2. 搜索知识库中的相关信息
114
+ relevant_knowledge = self._search_destination_knowledge(destination_name)
115
+
116
+ # 3. 如果有AI模型,生成增强版计划
117
+ if self.ai_model and self.ai_model.is_available():
118
+ return self._generate_ai_enhanced_plan(session_state, relevant_knowledge)
119
+ else:
120
+ # 4. 否则生成基于知识库的详细备用计划
121
+ return self._generate_knowledge_based_fallback_plan(session_state, relevant_knowledge)
122
+
123
+ def _search_destination_knowledge(self, destination_name: str) -> dict:
124
+ """搜索知识库中与目的地相关的信息"""
125
+
126
+ if not self.kb or not hasattr(self.kb, 'knowledge') or not self.kb.knowledge:
127
+ log.warning("⚠️ 知识库为空或不可用")
128
+ return {}
129
+
130
+ relevant_info = {
131
+ "budget_analysis": {},
132
+ "itinerary_suggestions": [],
133
+ "professional_insights": {},
134
+ "destination_specific": {}
135
+ }
136
+
137
+ log.info(f"📚 {len(self.kb.knowledge)} 条知识中搜索关于 '{destination_name}' 的信息...")
138
+
139
+ # 遍历知识库
140
+ for item in self.kb.knowledge:
141
+ knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
142
+ if not knowledge:
143
+ continue
144
+
145
+ # 检查是否与目标目的地相关
146
+ dest_info = knowledge.get('destination_info', {})
147
+ primary_destinations = dest_info.get('primary_destinations', [])
148
+ countries = dest_info.get('countries', [])
149
+
150
+ # 判断相关性
151
+ is_relevant = False
152
+ match_reason = ""
153
+
154
+ # 直接匹配城市名
155
+ if destination_name in primary_destinations:
156
+ is_relevant = True
157
+ match_reason = f"直接匹配城市: {destination_name}"
158
+
159
+ # 通过国家匹配
160
+ if not is_relevant:
161
+ dest_country = self._get_destination_country(destination_name)
162
+ if dest_country and dest_country in countries:
163
+ is_relevant = True
164
+ match_reason = f"通过国家匹配: {dest_country}"
165
+
166
+ # 地区匹配 (如果目的地在同一地区)
167
+ if not is_relevant:
168
+ region_destinations = self._get_same_region_cities(destination_name)
169
+ if any(city in primary_destinations for city in region_destinations):
170
+ is_relevant = True
171
+ match_reason = f"同地区匹配: {region_destinations}"
172
+
173
+ if is_relevant:
174
+ log.info(f"✅ 找到相关知识: {match_reason}")
175
+
176
+ # 提取预算分析
177
+ if 'budget_analysis' in knowledge:
178
+ relevant_info['budget_analysis'] = knowledge['budget_analysis']
179
+
180
+ # 提取行程建议
181
+ if 'detailed_itinerary' in knowledge:
182
+ relevant_info['itinerary_suggestions'].extend(knowledge['detailed_itinerary'])
183
+
184
+ # 提取专业洞察
185
+ if 'professional_insights' in knowledge:
186
+ relevant_info['professional_insights'].update(knowledge['professional_insights'])
187
+
188
+ # 提取目的地特定信息
189
+ relevant_info['destination_specific'] = dest_info
190
+
191
+ if relevant_info['budget_analysis'] or relevant_info['itinerary_suggestions']:
192
+ log.info(f"📊 成功提取知识库信息: 预算分析={bool(relevant_info['budget_analysis'])}, 行程建议={len(relevant_info['itinerary_suggestions'])}条")
193
  else:
194
+ log.warning(f"⚠️ 未找到关于 '{destination_name}' 的相关知识")
195
+
196
+ return relevant_info
197
+
198
+ def _get_destination_country(self, city_name: str) -> str:
199
+ """获取城市所属国家"""
200
+ city_country_mapping = {
201
+ "布拉格": "捷克", "维也纳": "奥地利", "萨尔茨堡": "奥地利",
202
+ "布达佩斯": "匈牙利", "布拉迪斯拉发": "斯洛伐克",
203
+ "哈尔施塔特": "奥地利", "巴德伊舍": "奥地利",
204
+ "库特纳霍拉": "捷克", "布尔诺": "捷克",
205
+ "巴黎": "法国", "里昂": "法国", "尼斯": "法国",
206
+ "柏林": "德国", "慕尼黑": "德国", "汉堡": "德国",
207
+ "阿姆斯特丹": "荷兰", "鹿特丹": "荷兰",
208
+ "布鲁塞尔": "比利时", "安特卫普": "比利时",
209
+ "苏黎世": "瑞士", "日内瓦": "瑞士",
210
+ }
211
+ return city_country_mapping.get(city_name, "")
212
+
213
+ def _get_same_region_cities(self, city_name: str) -> list:
214
+ """获取同地区的其他城市"""
215
+ region_mapping = {
216
+ # 中欧城市
217
+ "布拉格": ["维也纳", "萨尔茨堡", "布达佩斯", "布拉迪斯拉发", "哈尔施塔特"],
218
+ "维也纳": ["布拉格", "萨尔茨堡", "布达佩斯", "布拉迪斯拉发", "哈尔施塔特"],
219
+ "萨尔茨堡": ["维也纳", "布拉格", "哈尔施塔特", "慕尼黑"],
220
+ "布达佩斯": ["布拉格", "维也纳", "布拉迪斯拉发"],
221
+ "哈尔施塔特": ["萨尔茨堡", "维也纳", "巴德伊舍"],
222
+
223
+ # 西欧城市
224
+ "巴黎": ["布鲁塞尔", "阿姆斯特丹", "科隆", "斯特拉斯堡"],
225
+ "阿姆斯特丹": ["布鲁塞尔", "科隆", "巴黎"],
226
+ "布鲁塞尔": ["阿姆斯特丹", "巴黎", "科隆"],
227
+
228
+ # 德语区
229
+ "柏林": ["慕尼黑", "科隆", "汉堡", "维也纳", "苏黎世"],
230
+ "慕尼黑": ["柏林", "萨尔茨堡", "苏黎世", "维也纳"],
231
+ "苏黎世": ["慕尼黑", "维也纳", "萨尔茨堡"],
232
+ }
233
+ return region_mapping.get(city_name, [])
234
 
235
+ def _generate_ai_enhanced_plan(self, session_state: dict, knowledge: dict) -> str:
236
+ """使用AI模型生成融合知识库的计划"""
237
+
238
+ # 构建包含知识库信息的enhanced prompt
239
+ enhanced_prompt = self._build_knowledge_enhanced_prompt(session_state, knowledge)
240
+
241
  try:
242
+ log.info("🤖 使用AI模型生成知识库增强计划...")
243
+ response = self.ai_model.run_inference(
244
+ input_type="text",
245
+ formatted_input=None,
246
+ prompt=enhanced_prompt,
247
+ temperature=0.7
248
+ )
249
+ return response
250
+ except Exception as e:
251
+ log.error(f"❌ AI增强计划生成失败: {e}")
252
+ return self._generate_knowledge_based_fallback_plan(session_state, knowledge)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
+ def _build_knowledge_enhanced_prompt(self, session_state: dict, knowledge: dict) -> str:
255
+ """构建融合知识库信息的增强prompt"""
256
+
257
+ destination_name = self._get_destination_name(session_state)
258
+ days = self._get_duration_days(session_state)
259
+ budget_desc = self._format_budget_info(session_state.get("budget"))
260
  persona_config = self._get_current_persona_config(session_state)
261
+
262
+ # 基础prompt
263
+ prompt = f"""你是一位专业的欧洲旅行顾问,请基于以下知识库信息为用户设计{destination_name}的详细旅行计划。
264
+
265
+ 🎯 【用户需求】
266
+ 📍 目的地: {destination_name}
267
+ 旅行天数: {days}
268
+ 💰 预算: {budget_desc}
269
+ 🎭 旅行风格: {persona_config.get('name', '标准旅行者')}
270
+
271
+ 📚 【知识库参考信息】"""
272
+
273
+ # 添加预算分析信息
274
+ if knowledge.get('budget_analysis'):
275
+ budget_analysis = knowledge['budget_analysis']
276
+ prompt += f"""
277
+ 💰 【预算参考】
278
+ • 总预算范围: {budget_analysis.get('total_budget_range', 'N/A')}
279
+ • 日均开支: {budget_analysis.get('daily_average', 'N/A')}"""
280
+
281
+ breakdown = budget_analysis.get('budget_breakdown', {})
282
+ if breakdown:
283
+ prompt += "\n• 预算分配:"
284
+ for category, info in breakdown.items():
285
+ if isinstance(info, dict):
286
+ percentage = info.get('percentage', '')
287
+ daily_range = info.get('daily_range', '')
288
+ if percentage and daily_range:
289
+ category_name = {'accommodation': '住宿', 'transportation': '交通',
290
+ 'food': '餐饮', 'attractions': '景点'}.get(category, category)
291
+ prompt += f"\n - {category_name}: {percentage}, {daily_range}"
292
+
293
+ # 添加行程参考信息
294
+ if knowledge.get('itinerary_suggestions'):
295
+ prompt += f"""
296
+ 🗓️ 【行程参考】"""
297
+ for day_plan in knowledge['itinerary_suggestions'][:3]: # 只取前3天作为参考
298
+ day_num = day_plan.get('day_number', 'N/A')
299
+ location = day_plan.get('location', 'N/A')
300
+ theme = day_plan.get('theme', 'N/A')
301
+ prompt += f"\n• Day {day_num} ({location}): {theme}"
302
+
303
+ # 添加具体活动
304
+ morning_activities = day_plan.get('morning_activities', [])
305
+ for activity in morning_activities[:2]: # 只取前2个活动
306
+ name = activity.get('activity_name', '')
307
+ duration = activity.get('duration', '')
308
+ tips = activity.get('professional_tips', '')
309
+ if name:
310
+ prompt += f"\n - {name} ({duration}) - {tips}"
311
+
312
+ # 添加专业洞察
313
+ if knowledge.get('professional_insights'):
314
+ insights = knowledge['professional_insights']
315
+ prompt += f"""
316
+ 💡 【专业建议】"""
317
+
318
+ if insights.get('seasonal_considerations'):
319
+ seasonal = insights['seasonal_considerations']
320
+ best_months = seasonal.get('best_months', [])
321
+ if best_months:
322
+ prompt += f"\n• 最佳旅行时间: {', '.join(best_months)}"
323
+
324
+ if insights.get('common_mistakes'):
325
+ mistakes = insights['common_mistakes'][:3] # 只取前3个
326
+ prompt += f"\n• 常见误区: {', '.join(mistakes)}"
327
+
328
+ if insights.get('insider_secrets'):
329
+ secrets = insights['insider_secrets'][:3] # 只取前3个
330
+ prompt += f"\n• 内行贴士: {', '.join(secrets)}"
331
+
332
+ # 结尾指令
333
+ prompt += f"""
334
+
335
+ 🌟 【生成要求】
336
+ 请基于以上知识库信息,结合用户的具体需求,生成一份详细、实用、个性化的{destination_name}{days}天旅行计划。计划应该包括:
337
+ • 每日详细行程安排
338
+ • 具体的景点推荐和游览建议
339
+ • 住宿和餐饮推荐
340
+ • 交通方式和路线优化
341
+ • 预算分解和省钱贴士
342
+ • 实用的旅行提醒
343
+
344
+ 请用生动有趣的语言撰写,让用户感受到{destination_name}的魅力!"""
345
+
346
+ return prompt
347
+
348
+ def _generate_knowledge_based_fallback_plan(self, session_state: dict, knowledge: dict) -> str:
349
+ """基于知识库生成详细的备用计划"""
350
+
351
+ destination_name = self._get_destination_name(session_state)
352
+ days = int(self._get_duration_days(session_state))
353
+ budget_desc = self._format_budget_info(session_state.get("budget"))
354
+ persona_config = self._get_current_persona_config(session_state)
355
+ persona_key = persona_config.get('key', 'planner')
356
+
357
+ # 获取城市特色描述
358
+ city_desc = random.choice(self.city_descriptions.get(destination_name, ["迷人的城市"]))
359
+
360
+ # 开场
361
+ if persona_key == 'social':
362
+ plan = f"🎉 {destination_name}{days}天深度攻略(知识库加持版)!\n\n"
363
+ elif persona_key == 'experiential':
364
+ plan = f"🎭 {destination_name}{days}日文化探索之旅\n\n"
365
+ else:
366
+ plan = f"📋 {destination_name}{days}天专业规划方案\n\n"
367
+
368
+ plan += f"🌟 城市印象:{city_desc}\n"
369
+ plan += f"💰 预算范围:{budget_desc}\n\n"
370
+
371
+ # 如果有知识库中的预算分析
372
+ if knowledge.get('budget_analysis'):
373
+ budget_analysis = knowledge['budget_analysis']
374
+ plan += "💰 【预算详解】(基于真实旅行经验)\n"
375
+
376
+ total_budget = budget_analysis.get('total_budget_range', '')
377
+ daily_avg = budget_analysis.get('daily_average', '')
378
+ if total_budget:
379
+ plan += f"• 参考总预算:{total_budget}\n"
380
+ if daily_avg:
381
+ plan += f"• 日均开支:{daily_avg}\n"
382
+
383
+ breakdown = budget_analysis.get('budget_breakdown', {})
384
+ if breakdown:
385
+ plan += "• 开支分配:\n"
386
+ category_names = {
387
+ 'accommodation': '🏨 住宿', 'transportation': '🚇 交通',
388
+ 'food': '🍽️ 餐饮', 'attractions': '🎯 景点'
389
+ }
390
+
391
+ for category, info in breakdown.items():
392
+ if isinstance(info, dict):
393
+ name = category_names.get(category, category)
394
+ percentage = info.get('percentage', '')
395
+ daily_range = info.get('daily_range', '')
396
+
397
+ if percentage and daily_range:
398
+ plan += f" - {name}:{percentage},{daily_range}\n"
399
+
400
+ # 添加具体建议
401
+ if category == 'accommodation' and info.get('recommendations'):
402
+ recs = ', '.join(info['recommendations'])
403
+ plan += f" 推荐:{recs}\n"
404
+
405
+ elif category == 'transportation' and info.get('money_saving_tips'):
406
+ tips = ', '.join(info['money_saving_tips'])
407
+ plan += f" 省钱技巧:{tips}\n"
408
+
409
+ plan += "\n"
410
+
411
+ # 详细行程规划(基于知识库)
412
+ plan += "🗓️ 【详细行程】(来自实地经验)\n"
413
+
414
+ if knowledge.get('itinerary_suggestions'):
415
+ # 使用知识库中的行程建议
416
+ itinerary = knowledge['itinerary_suggestions']
417
+
418
+ for i, day_plan in enumerate(itinerary[:days]): # 限制在用户要求的天数内
419
+ day_num = day_plan.get('day_number', i+1)
420
+ location = day_plan.get('location', destination_name)
421
+ theme = day_plan.get('theme', '城市探索')
422
+
423
+ plan += f"\n📅 Day {day_num} - {location}({theme})\n"
424
+
425
+ # 上午活动
426
+ morning_activities = day_plan.get('morning_activities', [])
427
+ if morning_activities:
428
+ plan += "🌅 上午:\n"
429
+ for activity in morning_activities:
430
+ name = activity.get('activity_name', '')
431
+ duration = activity.get('duration', '')
432
+ cost = activity.get('cost', '')
433
+ tips = activity.get('professional_tips', '')
434
+
435
+ plan += f" • {name}"
436
+ if duration:
437
+ plan += f" ({duration})"
438
+ if cost and cost != "免费":
439
+ plan += f" - {cost}"
440
+ plan += "\n"
441
+
442
+ if tips:
443
+ plan += f" 💡 专业提醒:{tips}\n"
444
+
445
+ # 下午活动
446
+ afternoon_activities = day_plan.get('afternoon_activities', [])
447
+ if afternoon_activities:
448
+ plan += "🌞 下午:\n"
449
+ for activity in afternoon_activities:
450
+ name = activity.get('activity_name', '')
451
+ duration = activity.get('duration', '')
452
+ cost = activity.get('cost', '')
453
+
454
+ plan += f" • {name}"
455
+ if duration:
456
+ plan += f" ({duration})"
457
+ if cost:
458
+ plan += f" - {cost}"
459
+ plan += "\n"
460
+
461
+ # 餐饮建议
462
+ dining = day_plan.get('dining', {})
463
+ if dining:
464
+ plan += "🍽️ 餐饮推荐:\n"
465
+
466
+ for meal_type, meal_info in dining.items():
467
+ if isinstance(meal_info, dict):
468
+ meal_names = {'breakfast': '早餐', 'lunch': '午餐', 'dinner': '晚餐'}
469
+ meal_name = meal_names.get(meal_type, meal_type)
470
+
471
+ recommendation = meal_info.get('recommendation', '')
472
+ cost_range = meal_info.get('cost_range', '')
473
+
474
+ if recommendation:
475
+ plan += f" • {meal_name}:{recommendation}"
476
+ if cost_range:
477
+ plan += f" ({cost_range})"
478
+ plan += "\n"
479
+
480
+ # 住宿建议
481
+ accommodation = day_plan.get('accommodation', {})
482
+ if accommodation and day_num == 1: # 只在第一天显示住宿建议
483
+ plan += "🏨 住宿推荐:\n"
484
+
485
+ area = accommodation.get('recommended_area', '')
486
+ safety = accommodation.get('safety_level', '')
487
+
488
+ if area:
489
+ plan += f" • 推荐区域:{area}"
490
+ if safety:
491
+ plan += f"(安全等级:{safety})"
492
+ plan += "\n"
493
+
494
+ budget_options = accommodation.get('budget_options', [])
495
+ for option in budget_options:
496
+ if isinstance(option, dict):
497
+ category = option.get('category', '')
498
+ price_range = option.get('price_range', '')
499
+ if category and price_range:
500
+ plan += f" • {category}:{price_range}\n"
501
+
502
+ else:
503
+ # 如果没有具体行程,生成通用建议
504
+ plan += f"根据{destination_name}的特色,为您推荐以下{days}天行程框架:\n\n"
505
+
506
+ # 根据不同城市提供基础框架
507
+ if destination_name in ["布拉格", "Prague"]:
508
+ plan += "📅 Day 1: 老城区探索(老城广场→天文钟→查理大桥)\n"
509
+ plan += "📅 Day 2: 城堡区深度游(布拉格城堡→圣维特大教堂→黄金小巷)\n"
510
+ if days >= 3:
511
+ plan += "📅 Day 3: 新城区体验(瓦茨拉夫广场→国家博物馆→当地美食)\n"
512
+
513
+ elif destination_name in ["维也纳", "Vienna"]:
514
+ plan += "📅 Day 1: 皇室风采(美泉宫→霍夫堡宫→圣斯蒂芬大教堂)\n"
515
+ plan += "📅 Day 2: 音乐文化(维也纳国家歌剧院→金色大厅→艺术史博物馆)\n"
516
+ if days >= 3:
517
+ plan += "📅 Day 3: 咖啡文化体验(中央咖啡馆→萨赫咖啡馆→多瑙河漫步)\n"
518
+
519
+ elif destination_name in ["布达佩斯", "Budapest"]:
520
+ plan += "📅 Day 1: 布达一侧(布达城堡→渔夫堡→马加什教堂)\n"
521
+ plan += "📅 Day 2: 佩斯一侧(匈牙利国会大厦→链子桥→中央市场)\n"
522
+ if days >= 3:
523
+ plan += "📅 Day 3: 温泉文化(塞切尼温泉→多瑙河游船→夜景欣赏)\n"
524
+
525
+ # 添加专业洞察
526
+ if knowledge.get('professional_insights'):
527
+ insights = knowledge['professional_insights']
528
+ plan += "\n💡 【专业贴士】(来自旅行达人)\n"
529
+
530
+ # 季节建议
531
+ seasonal = insights.get('seasonal_considerations', {})
532
+ if seasonal:
533
+ best_months = seasonal.get('best_months', [])
534
+ weather = seasonal.get('weather_patterns', '')
535
+
536
+ if best_months:
537
+ plan += f"• 🌤️ 最佳旅行时间:{', '.join(best_months)}\n"
538
+ if weather:
539
+ plan += f"• 🌡️ 天气特点:{weather}\n"
540
+
541
+ #