Bläddra i källkod

增加阿里云 qwen TTS, 修复在多任务合集时候的 多频次推理

hwt 1 vecka sedan
förälder
incheckning
c36aac1fd7

+ 3 - 3
brain/PlannerNode2/config_node/config_node/config_node.py

@@ -157,13 +157,13 @@ class ConfigNode(Node):
                     "tongyi_app_id": "6ed9f00173214e7883af7310731a5d7b",
 
                     # 多模态模型
-                    "multimodel": "qwen-vl-max-2025-04-08",
+                    "multimodel": "qwen-vl-max",
 
                     # TTS 配置
                     "tts_supplier": "aliyun",
                     "tts_language": "zh",
-                    "oline_tts_model": "cosyvoice-v2",
-                    "voice_tone": "longwan_v2",
+                    "oline_tts_model": "qwen-tts",
+                    "voice_tone": "Cherry",
 
                     # ASR 配置
                     "oline_asr_sample_rate": 16000,

+ 4 - 4
brain/PlannerNode2/environment_node/environment_node/environment_node.py

@@ -166,10 +166,10 @@ class EnvironmentNode(Node):
         """生成地图导航点模拟数据"""
         self.map_data = {
             "points": [
-                {"id": "A", "name": "办公室(室内)", "position": {"x": 1.633, "y": 3.490, "z": 0.0}},
-                {"id": "B", "name": "酒店大堂(室内)", "position": {"x": 2.436, "y": -0.574, "z": 0.0}},
-                {"id": "C", "name": "园区(室外)", "position": {"x": 0.024, "y": -1.820, "z": 0.0}},
-                {"id": "D", "name": "充电点(室内)", "position": {"x": 0.014, "y": -2.820, "z": 0.0}}
+                {"id": "A", "name": "办公室(室内)", "position": {"x": 3.633, "y": 9.490, "z": 0.0}},
+                {"id": "B", "name": "酒店大堂(室内)", "position": {"x": 6.436, "y": -0.574, "z": 0.0}},
+                {"id": "C", "name": "园区(室外)", "position": {"x": 0.024, "y": -3.820, "z": 0.0}},
+                {"id": "D", "name": "充电点(室内)", "position": {"x": 0.014, "y": -6.820, "z": 0.0}}
             ]
         }
 

+ 3 - 2
brain/PlannerNode2/largemodel/config/large_model_interface.yaml

@@ -16,8 +16,9 @@ oline_asr_model : 'paraformer-realtime-v2'                                     #
 tts_supplier :  "aliyun"                                                        #tts语音合成模型供应商:aliyun/baidu 目前提供两个平台的接口,详见large_model_interface.py接口程序
 
 #通义千问平台语音合成配置
-oline_tts_model : "cosyvoice-v2"                                                #语音模型
-voice_tone : "longwan_v2"
+#oline_tts_model : "cosyvoice-v2"     
+oline_tts_model : "qwen-tts-2025-05-22"                                             #语音模型
+voice_tone : "Cherry"
 
 #百度智能云平台语音合成模型
 baidu_API_KEY : 'Ppprf0XqOyQ6uOv2rGg34oR7'                                      #百度平台语音合成API_KEY

+ 221 - 39
brain/PlannerNode2/largemodel/largemodel/action_service.py

@@ -3,6 +3,8 @@ import re
 import rclpy
 import subprocess
 import json
+import signal
+import psutil
 from rclpy.action import ActionServer
 from rclpy.node import Node
 from geometry_msgs.msg import Twist
@@ -46,8 +48,21 @@ class CustomActionServer(Node):
         # self.arm_grasp_init()
         # 配置标志:等待 /ai/config 到达后再初始化声音和语言
         self.config_ready = False
+        self.config_init_done = False  # 确保初始化只执行一次
+        self._config_debounce_timer = None  # 防抖定时器
+        self._config_debounce_delay = 0.5  # 防抖延迟(秒)
         self.get_logger().info("action service started, waiting for /ai/config...")
 
+        # 注册 shutdown 回调,确保退出时清理子进程
+        self.get_logger().info("[Shutdown] Registered shutdown handler")
+
+    def destroy_node(self):
+        """
+        重写 destroy_node,确保节点销毁前清理所有子进程
+        """
+        self._on_shutdown()
+        super().destroy_node()
+
     def init_param_config(self):
         """
         初始化参数配置 / Initialize parameter configuration
@@ -210,32 +225,38 @@ class CustomActionServer(Node):
                     )
                     self.get_logger().debug(f'[配置] image_topic 已更新并重建订阅: {self.image_topic}')
 
-                # useolinetts(需要重新初始化 TTS
+                # useolinetts(配置变化不触发初始化,由外部重启节点处理
                 new_useolinetts = action_service_cfg.get('useolinetts', self.useolinetts)
                 if new_useolinetts != self.useolinetts:
                     self.useolinetts = new_useolinetts
-                    self.get_logger().debug(f'[配置] useolinetts 已更新: {self.useolinetts},重新初始化 TTS')
-                    # 重新初始化 TTS 模型
-                    self._init_tts_model()
+                    self.get_logger().warn(f'[配置] useolinetts 已更新: {self.useolinetts},需要重启节点以生效')
 
-                # language(可能需要重新初始化)
+                # language(配置变化不触发初始化)
                 new_language = action_service_cfg.get('language', self.language)
                 if new_language != self.language:
                     self.language = new_language
-                    self.get_logger().debug(f'[配置] language 已更新: {self.language}')
+                    self.get_logger().warn(f'[配置] language 已更新: {self.language},需要重启节点以生效')
 
-                # regional_setting(可能需要重新初始化)
+                # regional_setting(配置变化不触发初始化)
                 new_regional_setting = action_service_cfg.get('regional_setting', self.regional_setting)
                 if new_regional_setting != self.regional_setting:
                     self.regional_setting = new_regional_setting
-                    self.get_logger().info(f'[配置] regional_setting 已更新: {self.regional_setting}')
+                    self.get_logger().warn(f'[配置] regional_setting 已更新: {self.regional_setting},需要重启节点以生效')
+
+            # --- large_model 配置处理 ---
+            large_model_cfg = config_root.get('large_model', {})
+            if large_model_cfg:
+                # 调用 model_client 的 update_config 方法更新配置
+                self.model_client.update_config(large_model_cfg)
+                self.get_logger().debug(
+                    f'[配置] large_model 已更新: oline_tts_model={large_model_cfg.get("oline_tts_model")}, '
+                    f'voice_tone={large_model_cfg.get("voice_tone")}, tts_supplier={large_model_cfg.get("tts_supplier")}'
+                )
 
-            # --- 首次配置到达:完成延迟初始化的模块 ---
+            # --- 首次配置到达:防抖执行一次性初始化 ---
             if not self.config_ready:
-                self.system_sound_init()
-                self.init_language()
                 self.config_ready = True
-                self.get_logger().debug('[配置] 首次配置已到达,声音和语言模块初始化完成')
+                self._schedule_config_init()  # 防抖延迟执行初始化
 
             # --- topics 配置处理 ---
             topics = config_root.get('topics', {})
@@ -254,18 +275,44 @@ class CustomActionServer(Node):
         except Exception as e:
             self.get_logger().warn(f'解析配置数据失败: {e}')
 
+    def _on_config_ready(self):
+        """
+        防抖处理:配置稳定后执行一次性初始化
+        """
+        if self.config_init_done:
+            return  # 已经初始化过,跳过
+
+        self.config_init_done = True
+        self.get_logger().info('[配置] 开始执行一次性初始化...')
+        self.system_sound_init()
+        self.init_language()
+        self.get_logger().info('[配置] 一次性初始化完成')
+
+    def _schedule_config_init(self):
+        """
+        防抖:延迟执行初始化,确保短时间内多次配置只执行一次
+        """
+        if self._config_debounce_timer:
+            self._config_debounce_timer.cancel()
+
+        self._config_debounce_timer = threading.Timer(
+            self._config_debounce_delay,
+            self._on_config_ready
+        )
+        self._config_debounce_timer.start()
+
     def _init_tts_model(self):
         """
-        根据当前 useolinetts / regional_setting 重新初始化 TTS 模型
-        供首次配置到达和 config 动态更新时调用
+        根据当前 useolinetts / regional_setting 初始化 TTS 模型
         """
         pkg_path = get_package_share_directory("largemodel")
 
         if self.regional_setting == "China":
             if self.useolinetts:
                 model_type = "oline"
+                # Qwen-TTS 只返回 WAV 格式,不支持 MP3
                 self.tts_out_path = os.path.join(
-                    pkg_path, "resources_file", "tts_output.mp3"
+                    pkg_path, "resources_file", "tts_output.wav"
                 )
             else:
                 model_type = "local"
@@ -312,7 +359,7 @@ class CustomActionServer(Node):
                 "navigation_2": "机器人反馈:执行navigation({point_name})完成",
                 "navigation_3": "机器人反馈:执行navigation({point_name})失败,目标点不存在",
                 "navigation_4": "机器人反馈:执行navigation({point_name})失败",
-                "get_current_pose_success": "机器人反馈:get_current_pose()成功",
+                "get_current_pose_success": "机器人反馈:get_current_pose({point_name})成功",
                 "arm_up_done": "机器人反馈:执行arm_up()完成",
                 "arm_down_done": "机器人反馈:执行arm_down()完成",
                 "drift_done": "机器人反馈:执行drift()完成",
@@ -351,7 +398,7 @@ class CustomActionServer(Node):
                 "navigation_2": "Robot feedback: Execute navigation({point_name}) completed",
                 "navigation_3": "Robot feedback: Execute navigation({point_name}) failed, target does not exist",
                 "navigation_4": "Robot feedback: Execute navigation({point_name}) failed",
-                "get_current_pose_success": "Robot feedback: get_current_pose() succeeded",
+                "get_current_pose_success": "Robot feedback: get_current_pose({point_name}) succeeded",
                 "arm_up_done": "Robot feedback: Execute arm_up() completed",
                 "arm_down_done": "Robot feedback: Execute arm_down() completed",
                 "drift_done": "Robot feedback: Execute drift() completed",
@@ -616,7 +663,7 @@ class CustomActionServer(Node):
     def person_approach_event_callback(self, msg):
         """
         人物靠近事件回调函数 / Person approach event callback
-        收到事件后播放欢迎语
+        收到事件后播放欢迎语,并停止人物靠近检测
         """
         if not self.welcome_mode:
             return
@@ -626,29 +673,53 @@ class CustomActionServer(Node):
             if data.get('event') == 'person_approach':
                 self.get_logger().info(f"Person approach detected: {data}")
 
+                # 停止人物靠近检测进程 / Stop person_approach detection process
+                if self.process_map['person_approach']['running']:
+                    pid = self.process_map['person_approach']['pid']
+                    try:
+                        # 使用 psutil 杀掉整个进程树(包括子进程)
+                        parent = psutil.Process(pid)
+                        children = parent.children(recursive=True)
+                        for child in children:
+                            try:
+                                child.terminate()
+                            except psutil.NoSuchProcess:
+                                pass
+                        parent.terminate()
+                        self.process_map['person_approach']['running'] = False
+                        self.get_logger().info(f"Stopped person_approach node, PID: {pid}, children: {len(children)}")
+                    except Exception as e:
+                        self.get_logger().warn(f"Failed to stop person_approach: {e}")
+
                 # 停止上一个 TTS 播放
                 self.stop_event.set()
                 time.sleep(0.1)
 
                 # 欢迎语内容(后续可修改)
-                welcome_text = "欢迎光临"
+                welcome_text = "欢迎光临,有什么可以帮助您的呢?"
 
                 # TTS 合成
                 self.model_client.voice_synthesis(
                     welcome_text, self.tts_out_path
                 )
-                # 异步播放
-                self.play_audio_async(self.tts_out_path)
+                # 同步播放欢迎语,等待播放完成后启动 ASR 监听
+                self.play_audio(self.tts_out_path)
                 self.get_logger().info(f"Playing welcome TTS: {welcome_text}")
+                # 播放完成,启动 ASR 监听用户对话
+                # 将 welcome_mode 设置为 False,允许对话结束后重新启动迎宾模式
+                self.welcome_mode = False
+                self.asr_control_pub.publish(String(data="start_listen"))
+                self.get_logger().info("Welcome TTS finished, started ASR listening")
 
         except json.JSONDecodeError:
             self.get_logger().error("Failed to parse person_approach event data")
         except Exception as e:
             self.get_logger().error(f"Error in person_approach_event_callback: {e}")
 
-    def get_current_pose(self):
+    def get_current_pose(self, point_name="zero"):
         """
         获取当前在全局地图坐标系下的位置 /Get the current position in the global map coordinate system
+        :param point_name: 点位名称,用于保存到 navpose_dict,默认为 "zero"
         """
         # 获取当前目标点坐标
         transform = self.tf_buffer.lookup_transform(
@@ -661,16 +732,16 @@ class CustomActionServer(Node):
         pose.pose.position.y = transform.transform.translation.y
         pose.pose.position.z = 0.0
         pose.pose.orientation = transform.transform.rotation
-        self.navpose_dict["zero"] = pose
+        self.navpose_dict[point_name] = pose
         # 打印记录的坐标
         position = pose.pose.position
         orientation = pose.pose.orientation
         self.get_logger().info(
-            f"Recorded Pose - Position: x={position.x}, y={position.y},\
+            f"Recorded Pose [{point_name}] - Position: x={position.x}, y={position.y},\
                                 z={position.z},Orientation: x={orientation.x}, y={orientation.y}, z={orientation.z}, w={orientation.w}"
         )
-        if not self.interrupt_flag:
-            self.action_status_pub("get_current_pose_success")
+        if not self.combination_mode and not self.interrupt_flag:
+            self.action_status_pub("get_current_pose_success", point_name=point_name)
 
     def action_status_pub(self, key, **kwargs):
         """
@@ -741,9 +812,10 @@ class CustomActionServer(Node):
                 self.result = future_result.result()
                 self.navigation_finish_flag = True
                 if self.result.status == 4:
-                    self.action_status_pub(
-                        "navigation_2", point_name=point_name
-                    )  # 执行导航成功 /execute navigation success
+                    if not self.combination_mode:
+                        self.action_status_pub(
+                            "navigation_2", point_name=point_name
+                        )  # 执行导航成功 /execute navigation success
 
                 elif self.result.status == 5:
                     self.get_logger().info("Cancel navigation")
@@ -1137,11 +1209,14 @@ class CustomActionServer(Node):
             self.action_status_pub("dance_done")
 
     def stop(self):  # 停止
-        twist = Twist()
-        twist.linear.x = 0.0
-        twist.linear.y = 0.0
-        twist.angular.z = 0.0
-        self.publisher.publish(twist)
+        try:
+            twist = Twist()
+            twist.linear.x = 0.0
+            twist.linear.y = 0.0
+            twist.angular.z = 0.0
+            self.publisher.publish(twist)
+        except Exception as e:
+            self.get_logger().warn(f"stop() failed to publish twist: {e}")
 
     def _execute_action(self, twist, num=1, durationtime=3.0):
         for _ in range(num):
@@ -1414,9 +1489,11 @@ class CustomActionServer(Node):
                 goal_handle.request.llm_response is not None
                 or goal_handle.request.text_response != ""
             ):  # 语音模式,播放对话  # Voice mode, play dialogue
-                self.model_client.voice_synthesis(
+                self.get_logger().info(f"[TTS] 开始合成: {goal_handle.request.llm_response[:50]}...")
+                result = self.model_client.voice_synthesis(
                     goal_handle.request.llm_response, self.tts_out_path
                 )
+                self.get_logger().info(f"[TTS] 合成完成, result={result}, 路径={self.tts_out_path}")
                 self.play_audio(self.tts_out_path, feedback=True)
             else:
                 self.action_status_pub("response_done")
@@ -1524,13 +1601,23 @@ class CustomActionServer(Node):
         """
         发布 continue_listen 指令,让 ASR 继续监听
         """
+        # 关闭人物靠近检测,避免在多轮对话期间重复触发
+        if self.process_map['person_approach']['running']:
+            pid = self.process_map['person_approach']['pid']
+            try:
+                import os
+                os.kill(pid, signal.SIGTERM)
+                self.process_map['person_approach']['running'] = False
+                self.get_logger().info(f"Stopped person_approach node for dialogue, PID: {pid}")
+            except Exception as e:
+                self.get_logger().warn(f"Failed to stop person_approach: {e}")
+
         msg = String()
         msg.data = "continue_listen"
         self.asr_control_pub.publish(msg)
         self.get_logger().info("[多轮对话] ask_user 播放完成,已发布 /asr/control: continue_listen")
 
-    @staticmethod
-    def kill_process_tree(pid):
+    def kill_process_tree(self, pid):
         try:
             parent = psutil.Process(pid)
             children = parent.children(recursive=True)
@@ -1564,6 +1651,101 @@ class CustomActionServer(Node):
         except psutil.NoSuchProcess:
             pass
 
+    def _on_shutdown(self):
+        """
+        ROS2 节点关闭时的回调,确保清理所有子进程
+        """
+        self.get_logger().info("[Shutdown] Cleaning up child processes...")
+
+        # 取消防抖定时器
+        if self._config_debounce_timer:
+            self._config_debounce_timer.cancel()
+
+        # 清理所有管理的子进程
+        for process_name, process_info in self.process_map.items():
+            if process_info['pid'] is not None:
+                self.get_logger().info(f"[Shutdown] Killing {process_name} (PID: {process_info['pid']})")
+                self.kill_process_tree(process_info['pid'])
+                process_info['pid'] = None
+
+        self.get_logger().info("[Shutdown] All child processes cleaned up")
+
+    def kill_process_tree(self, pid):
+        """
+        彻底杀死进程及其所有子进程(修复版)
+        """
+        import os
+        import time
+        
+        try:
+            parent = psutil.Process(pid)
+            children = parent.children(recursive=True)
+            self.get_logger().info(f"[Shutdown] Found {len(children)} children for PID {pid}")
+            
+            # 1. 先向所有子进程发送 SIGTERM
+            for child in children:
+                try:
+                    child.terminate()
+                    self.get_logger().info(f"[Shutdown] Sent SIGTERM to child PID {child.pid}")
+                except psutil.NoSuchProcess:
+                    pass
+            
+            # 2. 同时向父进程发送 SIGTERM
+            try:
+                parent.terminate()
+                self.get_logger().info(f"[Shutdown] Sent SIGTERM to parent PID {pid}")
+            except psutil.NoSuchProcess:
+                pass
+            
+            # 3. 等待一小段时间让进程响应
+            time.sleep(0.5)
+            
+            # 4. 检查哪些进程还活着,发送 SIGKILL
+            alive = []
+            for child in children:
+                try:
+                    if child.is_running():
+                        alive.append(child)
+                except psutil.NoSuchProcess:
+                    pass
+            
+            try:
+                if parent.is_running():
+                    alive.append(parent)
+            except psutil.NoSuchProcess:
+                pass
+            
+            for p in alive:
+                try:
+                    p.kill()
+                    self.get_logger().info(f"[Shutdown] Sent SIGKILL to PID {p.pid}")
+                except psutil.NoSuchProcess:
+                    pass
+
+        except psutil.NoSuchProcess:
+            self.get_logger().info(f"[Shutdown] PID {pid} already terminated")
+        except Exception as e:
+            self.get_logger().warn(f"[Shutdown] Error killing process tree: {e}")
+        
+        # 5. 额外措施:尝试使用 killpg 杀死进程组(针对 ros2 launch)
+        try:
+            import os
+            pgid = os.getpgid(pid)
+            os.killpg(pgid, signal.SIGKILL)
+            self.get_logger().info(f"[Shutdown] Killed process group {pgid}")
+        except ProcessLookupError:
+            self.get_logger().info(f"[Shutdown] Process group already terminated")
+        except Exception as e:
+            self.get_logger().warn(f"[Shutdown] killpg failed: {e}")
+        
+        # 6. 最后手段:使用 ros2 命令停止节点
+        try:
+            subprocess.run(["ros2", "node", "kill", "/person_approach_node"], 
+                          capture_output=True, timeout=2)
+            self.get_logger().info("[Shutdown] Sent ros2 node kill for person_approach_node")
+        except Exception:
+            pass
+
     def play_audio(self, file_path: str, feedback: Bool = False) -> None:
         """
         同步方式播放音频函数The function for playing audio in synchronous mode
@@ -1575,6 +1757,7 @@ class CustomActionServer(Node):
         if pygame.mixer.music.get_busy():
             pygame.mixer.music.stop()
         self.stop_event.clear()
+        self.get_logger().info(f"play_audio: loading {file_path}")
         pygame.mixer.music.load(file_path)
         pygame.mixer.music.play()
         self.get_logger().info(f"play_audio: started playing {file_path}")
@@ -1660,10 +1843,9 @@ def main(args=None):
     try:
         executor.spin()
     except KeyboardInterrupt:
-        custom_action_server.stop()
         pass
     finally:
-        custom_action_server.stop()
+        # 先调用 destroy_node 清理子进程(这会触发 _on_shutdown)
         custom_action_server.destroy_node()
         executor.shutdown()
         rclpy.shutdown()

+ 45 - 27
brain/PlannerNode2/largemodel/largemodel/asr.py

@@ -147,37 +147,55 @@ class ASRNode(Node):
                 self.current_thread.start()
             rclpy.spin_once(self, timeout_sec=0.1)
 
-    def kws_handler(self, play_error_response=True) -> None:
+    def kws_handler(self, play_error_response=True, max_retry=2) -> None:
         if self.stop_event.is_set():
             return
 
-        # 清空 buffer 中已有的旧帧,确保从"当前时刻"开始录音
-        while not self.audio_buffer.empty():
-            try:
-                self.audio_buffer.get_nowait()
-            except queue.Empty:
-                break
-
-        if self.listen_for_speech(self.mic_index):
-            asr_text = self.ASR_conversion(
-                self.user_speechdir
-            )  # 进行 ASR 转换 / Perform ASR conversion
-            if (
-                asr_text == "error"
-            ):  # 检查 ASR 结果长度是否小于4个字符 / Check if ASR result length is less than 4 characters
-                self.get_logger().warn(
-                    "I still don't understand what you mean. Please try again"
-                )
-                if play_error_response:
-                    playsound(
-                        self.audio_dict[self.error_response]
-                    )  # 错误响应 / Error response
+        retry_count = 0
+
+        while True:
+            if self.stop_event.is_set():
+                return
+
+            # 清空 buffer 中已有的旧帧,确保从"当前时刻"开始录音
+            while not self.audio_buffer.empty():
+                try:
+                    self.audio_buffer.get_nowait()
+                except queue.Empty:
+                    break
+
+            if self.listen_for_speech(self.mic_index):
+                asr_text = self.ASR_conversion(
+                    self.user_speechdir
+                )  # 进行 ASR 转换 / Perform ASR conversion
+                if asr_text == "error":
+                    retry_count += 1
+                    if retry_count <= max_retry:
+                        self.get_logger().warn(
+                            f"[ASR] 没有听清楚,这是第 {retry_count} 次重试(共 {max_retry} 次)"
+                        )
+                        if play_error_response:
+                            playsound(self.audio_dict[self.error_response])
+                        time.sleep(0.3)  # 短暂等待后重试
+                        continue  # 重试录音
+                    else:
+                        self.get_logger().warn(
+                            "[ASR] 重试次数用完,没有听清楚,发布失败反馈"
+                        )
+                        if play_error_response:
+                            playsound(self.audio_dict[self.error_response])
+                        self.asr_pub_result("asr_error")
+                        return
+                else:
+                    self.get_logger().info(asr_text)
+                    self.get_logger().info("😀okay, let me think for a moment...")
+                    self.asr_pub_result(asr_text)  # 发布 ASR结果 / Publish ASR result
+                    return  # 成功,退出
             else:
-                self.get_logger().info(asr_text)
-                self.get_logger().info("😀okay, let me think for a moment...")
-                self.asr_pub_result(asr_text)  # 发布 ASR结果 / Publish ASR result
-        else:
-            return
+                # 超时无响应,发布超时反馈(让上层触发空推理)
+                self.get_logger().warn("[多轮对话] ask_user 超时,发布超时反馈")
+                self.asr_pub_result("ask_user_timeout")
+                return
 
     def asr_control_callback(self, msg):
         """

+ 7 - 0
brain/PlannerNode2/largemodel/largemodel/model_service.py

@@ -167,6 +167,13 @@ class LargeModelService(Node):
             self.get_logger().info(
                 "The current instruction cycle has ended"
             )  # 当前指令周期已结束...
+        elif msg.data == "ask_user_timeout":
+            # ask_user 超时,触发空推理
+            self.get_logger().warn("[多轮对话] ask_user 超时,触发空推理")
+            if self.regional_setting == "China":
+                self.dual_large_model_mode(type="text", prompt="ask_user_timeout")
+            else:
+                self.dual_large_model_international_model(type="text", prompt="ask_user_timeout")
         else:  # 向指令执行层大模型反馈动作执行结果 / Feedback action execution results to the large model in the command execution layer
             if self.regional_setting == "China":
                 self.dual_large_model_mode(type="text", prompt=msg.data)

+ 51 - 8
brain/PlannerNode2/largemodel/utils/large_model_interface.py

@@ -362,6 +362,7 @@ class model_interface:
         self.voice_tone = config_param.get("voice_tone")
         self.local_asr_model = config_param.get("local_asr_model")
         self.tts_supplier = config_param.get("tts_supplier")
+        self.tts_language = config_param.get("tts_language", "zh")
         self.baidu_API_KEY = config_param.get("baidu_API_KEY")
         self.baidu_SECRET_KEY = config_param.get("baidu_SECRET_KEY")
         self.CUID = config_param.get("CUID")
@@ -404,6 +405,8 @@ class model_interface:
                 self.voice_tone = config.get('voice_tone')
             if config.get('tts_supplier'):
                 self.tts_supplier = config.get('tts_supplier')
+            if config.get('tts_language'):
+                self.tts_language = config.get('tts_language')
             if config.get('baidu_API_KEY'):
                 self.baidu_API_KEY = config.get('baidu_API_KEY')
             if config.get('baidu_SECRET_KEY'):
@@ -882,16 +885,56 @@ class model_interface:
                 """
                 阿里通义语音合成接口
                 """
-                self.synthesizer = SpeechSynthesizer(
-                    model=self.oline_tts_model, voice=self.voice_tone, volume=100
+                is_qwen_tts = self.oline_tts_model and (
+                    'qwen-tts' in self.oline_tts_model.lower() or
+                    'qwen3-tts' in self.oline_tts_model.lower()
                 )
-                audio = self.synthesizer.call(text)
-                if audio is None:
-                    return 1
+                
+                if is_qwen_tts:
+                    # Qwen-TTS 模型使用 MultiModalConversation 接口(非流式)
+                    if self.tts_language == "en":
+                        language_type = "English"
+                    else:
+                        language_type = "Chinese"
+                    
+                    response = dashscope.MultiModalConversation.call(
+                        model=self.oline_tts_model,
+                        text=text,
+                        voice=self.voice_tone,
+                        language_type=language_type,
+                        stream=False
+                    )
+                    if self.logger:
+                        self.logger.info(f"[TTS] Qwen-TTS 响应: status_code={response.status_code}")
+                    if response.output is None or not hasattr(response.output, 'audio') or response.output.audio is None:
+                        if self.logger:
+                            self.logger.error(f'[TTS] Qwen-TTS 合成失败: {response}')
+                        return 1
+                    audio_url = response.output.audio.url
+                    # 下载音频文件
+                    try:
+                        audio_data = requests.get(audio_url, timeout=30).content
+                        with open(path, "wb") as f:
+                            f.write(audio_data)
+                        if self.logger:
+                            self.logger.info(f"[TTS] Qwen-TTS 音频已保存: {path}, 大小: {len(audio_data)} bytes")
+                        return 0
+                    except Exception as e:
+                        if self.logger:
+                            self.logger.error(f'[TTS] 音频下载失败: {e}')
+                        return 1
                 else:
-                    with open(path, "wb") as f:
-                        f.write(audio)
-                    return 0
+                    # CosyVoice 系列使用 SpeechSynthesizer 接口
+                    self.synthesizer = SpeechSynthesizer(
+                        model=self.oline_tts_model, voice=self.voice_tone, volume=100
+                    )
+                    audio = self.synthesizer.call(text)
+                    if audio is None:
+                        return 1
+                    else:
+                        with open(path, "wb") as f:
+                            f.write(audio)
+                        return 0
         elif self.model_type == "local":
             with wave.open(path, "wb") as wav_file:
                 wav_file.setnchannels(1)  # 单声道

+ 20 - 7
brain/PlannerNode2/largemodel/utils/promot.py

@@ -137,7 +137,7 @@ default_prompt = '''
 1. **接收任务**:接收用户指令和决策层AI生成的任务步骤,决策层AI生成的步骤是辅助你理解指令,以用户指令为最终参考,任务步骤格式类似“1.xxxx,2.xxxx,3.xxxx”,每个序号代表一个步骤。
 2. **处理反馈与指令**:接收机器人执行动作的反馈,若反馈成功,按任务步骤生成新的动作并回复。
 3. **生成内容**:生成动作列表和聊天内容,保证任务能按照任务步骤顺利推进。
-4. **完成任务**:当执行完最后一个任务步骤,回复用户同时调用“finishtask()”函数;
+4. **完成任务**:当执行完最后一个任务步骤,回复用户同时调用“ask_user()”函数;
 
 ## 输出格式:
 - 输出为JSON格式,不要包含 ```json 开头或结尾标识
@@ -149,7 +149,7 @@ default_prompt = '''
 - 地图映射中每个目标点都有“室内”或“室外”属性。
 - 当天气情况包含“小雨、雨、中雨、大雨、暴雨、雷雨、下雨”等任意雨天状态时,禁止导航到属性为“室外”的目标点。
 - 如果用户要求前往室外目标点,必须拒绝执行导航,不允许输出 navigation()、set_cmdvel()、move_left()、move_right() 等任何移动动作。
-- 拒绝时,action 必须输出 ["finish_dialogue()"],response 中说明当前天气为雨天,目标点属于室外,不能前往。
+- 拒绝时,action 必须输出 ["ask_user()"],response 中说明当前天气为雨天,目标点属于室外,不能前往,并且询问是否去其他地方
 - 该规则优先级高于用户指令、任务步骤、地图导航规则和训练样例。
 
 ## 执行动作前强制检查
@@ -170,6 +170,14 @@ default_prompt = '''
 - 若连续2次或以上收到:"机器人反馈:回复用户完成",立即调用"finishtask() 函数,让机器人停止重复反馈
 - 除非用户明确说"结束、退下、休息、不用了",否则不要调用 finish_dialogue()。
 - 若某个动作执行失败,最多重试一次,若再次失败,调用 "finish_dialogue()" 结束当前任务,并告知用户遇到困难。
+
+## 迎宾模式(重要)
+- 当前用户启动迎宾模式,需要导航到启动迎宾模式的位置,并记录当前位置,然后开启迎宾功能。
+- 执行完用户任务后,询问客户还需要帮助什么,如果客户没有需求,那么返回初始位置的动作:`navigation(welcome)`
+- 返回初始位置后,必须重新启动迎宾模式:,action 应输出 '["welcome()"]'
+### 示例 
+- 用户说"带我去会议室",action 应输出 `["navigation(x)"]`
+- 用户说"去XXX开启迎宾模式",action 应输出 `["navigation(x)","get_current_pose(welcome)","welcome()"]`
 ## 输出限制
 - 严格遵循规定的输出格式。
 - 调用的动作函数只能从动作函数库中选取,禁止不存在的编造函数
@@ -203,14 +211,15 @@ action_function_library='''
   - 说明:导航至目标点,`x`根据地图映射中的符号(如:茶水间→`A`,会议室→`C`)。  
 - **返回初始位置**:`navigation(zero)`  
   - 相近语义:回到初始位置、返回起点。   
-- **记录当前位置**:`get_current_pose()`    
+- **记录当前位置**:`get_current_pose(point_name)`  - 说明:记录当前位置到指定名称的导航点位,`point_name`默认为"zero",可设置为任意名称(如迎宾业务设置"welcome")。    
 ### 示例  
-- 导航去茶水间:`navigation(A)`  、回到初始位置:`navigation(zero)` 、记录当前位置:`get_current_pose()`  
-
+  - 导航去茶水间:  `navigation(A)` 
+  - 去茶水间看看有没有人回来告诉我: action 应输出 `["get_current_pose(zero)","navigation(x)","get_current_pose(zero)"]`
+  - 说明:需要返回的任务,都需要先记录当前位置,然后导航到目标位置执行任务,最后回到初始位置。 
 ## 对话控制类
 - **询问用户**:`ask_user()`
   - 说明:向用户提出问题,并等待用户继续回答。
-  - 使用场景:用户指令不完整、目标地点不明确、参数缺失、需要确认。
+  - 使用场景:用户指令不完整、目标地点不明确、参数缺失、需要确认,到达地点后询问用户还有什么需求
   - 播放完成后系统会自动继续录音,用户无需再次说唤醒词。
   - ask_user() 不是物理动作,不会移动机器人。
 - **结束对话**:`finish_dialogue()`
@@ -228,7 +237,10 @@ action_function_library='''
   - 说明:清空上下文,结束任务(如用户指令“退下”“休息”)。  
 - **等待一段时间**:`wait(x)`  
   - 说明:暂停x秒
-- **最后一个动作步骤时完成时调用**:`finishtask()` 
+- **最后一个动作步骤时完成时调用**:`ask_user()`
+  - 说明:询问用户还有什么需求
+- **如果用户确定没有任何需求并且没有可以要执行的任务时调用**:`finishtask()`
+  - 说明:结束当前任务周期,并告知用户已经完成所有任务  
 '''
 
 sample_library='''
@@ -238,6 +250,7 @@ sample_library='''
 {"action": ["ask_user()"], "response": "哎呀,你这是在考我吗?不过我不能乱猜路线哦。请告诉我你想去哪里,我马上帮你规划。"}
 {"action": ["ask_user()"], "response": "没问题,不过你还没告诉我具体目的地呢。你想让我去哪里?"}
 {"action": ["ask_user()"], "response": "这个我可不能替你乱选哦。你想去办公室、酒店大堂、园区还是充电点呢?"}
+{"action": ["navigation(x)","get_current_pose(welcome)","welcome()"], "response": "好的,我现在就去酒店大堂,并启动迎宾模式。"}
 '''
 
 def get_prompt():