Spaces:

Eliot0110
/

Travel_Assistant

Sleeping

App Files Files Community

Eliot0110 commited on Aug 5, 2025

Commit

96512ae

1 Parent(s): 3589840

fix: greedy decoding

Browse files

Files changed (1) hide show

modules/ai_model.py +24 -11

modules/ai_model.py CHANGED Viewed

@@ -143,43 +143,56 @@ class AIModel:
     def run_inference(self, input_type: str, formatted_input: Union[str, Image.Image], prompt: str,temperature: float = 0.7) -> str:
         try:
             if len(prompt) > 500:
                 prompt = prompt[:500] + "..."
             if input_type == "image" and isinstance(formatted_input, Image.Image):
                 image_token = getattr(self.processor.tokenizer, 'image_token', '<image>')
                 if image_token not in prompt:
-                    prompt = f"{image_token}\n{prompt}"
                 inputs = self.processor(
                     text=prompt,
                     images=formatted_input,
                     return_tensors="pt"
                 ).to(self.model.device, dtype=torch.bfloat16)
             else:
                 inputs = self.processor(
                     text=prompt,
                     return_tensors="pt"
                 ).to(self.model.device, dtype=torch.bfloat16)
             if hasattr(inputs, 'input_ids') and inputs.input_ids.shape[-1] > 512:
                 log.warning(f"⚠️ 截断过长输入: {inputs.input_ids.shape[-1]} -> 512")
                 inputs.input_ids = inputs.input_ids[:, :512]
                 if hasattr(inputs, 'attention_mask'):
                     inputs.attention_mask = inputs.attention_mask[:, :512]
             with torch.inference_mode():
                 outputs = self.model.generate(
                     **inputs,
-                    max_new_tokens=256,
-                    do_sample=True,
-                    temperature=temperature,
-                    top_p=0.9,
-                    pad_token_id=self.processor.tokenizer.eos_token_id,
-                    use_cache=True
                 )
             decoded = self.processor.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

     def run_inference(self, input_type: str, formatted_input: Union[str, Image.Image], prompt: str,temperature: float = 0.7) -> str:
         try:
+            # 截断过长的 prompt
             if len(prompt) > 500:
                 prompt = prompt[:500] + "..."
+            # 准备输入 (处理图片或文本)
             if input_type == "image" and isinstance(formatted_input, Image.Image):
                 image_token = getattr(self.processor.tokenizer, 'image_token', '<image>')
                 if image_token not in prompt:
+                    prompt = f"{image_token}\\n{prompt}"
                 inputs = self.processor(
                     text=prompt,
                     images=formatted_input,
                     return_tensors="pt"
                 ).to(self.model.device, dtype=torch.bfloat16)
             else:
                 inputs = self.processor(
                     text=prompt,
                     return_tensors="pt"
                 ).to(self.model.device, dtype=torch.bfloat16)
+            # 截断过长的 token
             if hasattr(inputs, 'input_ids') and inputs.input_ids.shape[-1] > 512:
                 log.warning(f"⚠️ 截断过长输入: {inputs.input_ids.shape[-1]} -> 512")
                 inputs.input_ids = inputs.input_ids[:, :512]
                 if hasattr(inputs, 'attention_mask'):
                     inputs.attention_mask = inputs.attention_mask[:, :512]
+            # --- 这是关键的修改 ---
             with torch.inference_mode():
+                generation_args = {
+                    "max_new_tokens": 256,
+                    "pad_token_id": self.processor.tokenizer.eos_token_id,
+                    "use_cache": True
+                }
+                # 如果 temperature 接近0，使用贪心解码 (用于分类等确定性任务)
+                if temperature < 1e-6: # 使用一个很小的数来比较浮点数
+                    log.info("▶️ 使用贪心解码 (do_sample=False) 以获得确定性输出。")
+                    generation_args["do_sample"] = False
+                # 否则，使用采样解码 (用于创造性生成任务)
+                else:
+                    log.info(f"▶️ 使用采样解码 (do_sample=True)，temperature={temperature}。")
+                    generation_args["do_sample"] = True
+                    generation_args["temperature"] = temperature
+                    generation_args["top_p"] = 0.9 # top_p 只在采样时有意义
+                # 使用构建好的参数字典来调用 generate
                 outputs = self.model.generate(
                     **inputs,
+                    **generation_args
                 )
             decoded = self.processor.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()