large_model_interface.py 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012
  1. from dashscope import Application
  2. import dashscope
  3. from openai import OpenAI
  4. import os
  5. import piper
  6. import wave
  7. from http import HTTPStatus
  8. from dashscope.audio.asr import Recognition
  9. from funasr import AutoModel
  10. from dashscope.audio.tts_v2 import *
  11. from dashscope.audio.asr import *
  12. from ament_index_python.packages import get_package_share_directory
  13. from dify_client2 import CompletionClient, ChatClient
  14. from promot import get_prompt, get_large_model_config, get_model_paths, get_system_config
  15. import yaml
  16. import base64
  17. import requests
  18. import json
  19. import netifaces
  20. from urllib.request import urlopen
  21. from urllib.request import Request
  22. from urllib.error import URLError
  23. from urllib.parse import urlencode
  24. from urllib.parse import quote_plus
  25. import websocket
  26. import datetime
  27. import hashlib
  28. import base64
  29. import hmac
  30. from urllib.parse import urlencode
  31. import time
  32. import ssl
  33. from wsgiref.handlers import format_date_time
  34. from datetime import datetime
  35. from time import mktime
  36. import _thread as thread
  37. from subprocess import Popen
  38. import functools
  39. def measure_execution_time(func):
  40. """
  41. 装饰器:测量函数执行时间并使用 ROS 日志打印结果
  42. """
  43. @functools.wraps(func)
  44. def wrapper(self, *args, **kwargs):
  45. start_time = time.time()
  46. result = func(self, *args, **kwargs)
  47. end_time = time.time()
  48. execution_time = end_time - start_time
  49. # 使用 ROS 日志系统记录执行时间
  50. if hasattr(self, 'get_logger'):
  51. self.get_logger().info(f"[性能统计] {func.__name__} 函数执行时间: {execution_time:.4f} 秒")
  52. else:
  53. print(f"[性能统计] {func.__name__} 函数执行时间: {execution_time:.4f} 秒")
  54. return result
  55. return wrapper
  56. xufei = ""
  57. Ws_Param = ""
  58. STATUS_FIRST_FRAME = 0 # 第一帧的标识
  59. STATUS_CONTINUE_FRAME = 1 # 中间帧标识
  60. STATUS_LAST_FRAME = 2 # 最后一帧的标识
  61. record_speech_file = os.path.join(
  62. get_package_share_directory("largemodel"), "resources_file", "user_speech.wav"
  63. )
  64. class Ws_Param(object):
  65. # 初始化
  66. def __init__(self, APPID, APIKey, APISecret, AudioFile):
  67. self.APPID = APPID
  68. self.APIKey = APIKey
  69. self.APISecret = APISecret
  70. self.AudioFile = AudioFile
  71. # 公共参数(common)
  72. self.CommonArgs = {"app_id": self.APPID}
  73. # 业务参数(business),更多个性化参数可在官网查看
  74. self.BusinessArgs = {
  75. "domain": "iat",
  76. "language": "en_us",
  77. "accent": "mandarin",
  78. "vinfo": 1,
  79. "vad_eos": 10000,
  80. }
  81. # 生成url
  82. def create_url(self):
  83. url = "wss://ws-api.xfyun.cn/v2/iat"
  84. # 生成RFC1123格式的时间戳
  85. now = datetime.now()
  86. date = format_date_time(mktime(now.timetuple()))
  87. # 拼接字符串
  88. signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
  89. signature_origin += "date: " + date + "\n"
  90. signature_origin += "GET " + "/v2/iat " + "HTTP/1.1"
  91. # 进行hmac-sha256进行加密
  92. signature_sha = hmac.new(
  93. self.APISecret.encode("utf-8"),
  94. signature_origin.encode("utf-8"),
  95. digestmod=hashlib.sha256,
  96. ).digest()
  97. signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
  98. authorization_origin = (
  99. 'api_key="%s", algorithm="%s", headers="%s", signature="%s"'
  100. % (self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
  101. )
  102. authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(
  103. encoding="utf-8"
  104. )
  105. # 将请求的鉴权参数组合为字典
  106. v = {"authorization": authorization, "date": date, "host": "ws-api.xfyun.cn"}
  107. # 拼接鉴权参数,生成url
  108. url = url + "?" + urlencode(v)
  109. return url
  110. # 收到websocket消息的处理
  111. def on_message(ws, message):
  112. try:
  113. code = json.loads(message)["code"]
  114. sid = json.loads(message)["sid"]
  115. if code != 0:
  116. errMsg = json.loads(message)["message"]
  117. # print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))
  118. else:
  119. data = json.loads(message)["data"]["result"]["ws"]
  120. result = ""
  121. for i in data:
  122. for w in i["cw"]:
  123. result += w["w"]
  124. global xufei
  125. xufei += result
  126. except Exception as e:
  127. print("receive msg,but parse exception:", e)
  128. # 收到websocket错误的处理
  129. def on_error(ws, error):
  130. print("### error:", error)
  131. # 收到websocket关闭的处理
  132. def on_close(ws, a, b):
  133. # print("###speak iat closed ###")
  134. return
  135. # 收到websocket连接建立的处理
  136. def on_open(ws):
  137. def run(*args):
  138. frameSize = 8000 # 每一帧的音频大小
  139. intervel = 0.04 # 发送音频间隔(单位:s)
  140. status = (
  141. STATUS_FIRST_FRAME # 音频的状态信息,标识音频是第一帧,还是中间帧、最后一帧
  142. )
  143. with open(wsParam.AudioFile, "rb") as fp:
  144. while True:
  145. buf = fp.read(frameSize)
  146. # 文件结束
  147. if not buf:
  148. status = STATUS_LAST_FRAME
  149. # 第一帧处理
  150. # 发送第一帧音频,带business 参数
  151. # appid 必须带上,只需第一帧发送
  152. if status == STATUS_FIRST_FRAME:
  153. d = {
  154. "common": wsParam.CommonArgs,
  155. "business": wsParam.BusinessArgs,
  156. "data": {
  157. "status": 0,
  158. "format": "audio/L16;rate=16000",
  159. "audio": str(base64.b64encode(buf), "utf-8"),
  160. "encoding": "raw",
  161. },
  162. }
  163. d = json.dumps(d)
  164. ws.send(d)
  165. status = STATUS_CONTINUE_FRAME
  166. # 中间帧处理
  167. elif status == STATUS_CONTINUE_FRAME:
  168. d = {
  169. "data": {
  170. "status": 1,
  171. "format": "audio/L16;rate=16000",
  172. "audio": str(base64.b64encode(buf), "utf-8"),
  173. "encoding": "raw",
  174. }
  175. }
  176. ws.send(json.dumps(d))
  177. # 最后一帧处理
  178. elif status == STATUS_LAST_FRAME:
  179. d = {
  180. "data": {
  181. "status": 2,
  182. "format": "audio/L16;rate=16000",
  183. "audio": str(base64.b64encode(buf), "utf-8"),
  184. "encoding": "raw",
  185. }
  186. }
  187. ws.send(json.dumps(d))
  188. time.sleep(1)
  189. break
  190. # 模拟音频采样间隔
  191. time.sleep(intervel)
  192. ws.close()
  193. thread.start_new_thread(run, ())
  194. wsParam = ""
  195. XUNFEI_TTS_FILE = os.path.join(
  196. get_package_share_directory("largemodel"), "resources_file", "XUNFEI_TTS.mp3"
  197. )
  198. class Ws_Param_1(object):
  199. # 初始化 initialization
  200. def __init__(self, APPID, APIKey, APISecret, Text):
  201. self.APPID = APPID
  202. self.APIKey = APIKey
  203. self.APISecret = APISecret
  204. self.Text = Text
  205. # 公共参数(common)
  206. self.CommonArgs = {"app_id": self.APPID}
  207. # 业务参数(business),更多个性化参数可在官网查看
  208. self.BusinessArgs = {
  209. "aue": "lame",
  210. "sfl": 1,
  211. "auf": "audio/L16;rate=16000",
  212. "vcn": "x4_xiaoyan",
  213. "tte": "utf8",
  214. "speed": 50,
  215. "pitch": 50,
  216. }
  217. self.Data = {
  218. "status": 2,
  219. "text": str(base64.b64encode(self.Text.encode("utf-8")), "UTF8"),
  220. }
  221. # 使用小语种须使用以下方式,此处的unicode指的是 utf16小端的编码方式,即"UTF-16LE"”
  222. # self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-16')), "UTF8")}
  223. # 生成url Generate URL
  224. def create_url_1(self):
  225. url = "wss://tts-api.xfyun.cn/v2/tts"
  226. # 生成RFC1123格式的时间戳 Generate timestamp in RFC1123 format
  227. now = datetime.now()
  228. date = format_date_time(mktime(now.timetuple()))
  229. # 拼接字符串 Splicing strings
  230. signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
  231. signature_origin += "date: " + date + "\n"
  232. signature_origin += "GET " + "/v2/tts " + "HTTP/1.1"
  233. # 进行hmac-sha256进行加密 Encrypt hmac-sha256
  234. signature_sha = hmac.new(
  235. self.APISecret.encode("utf-8"),
  236. signature_origin.encode("utf-8"),
  237. digestmod=hashlib.sha256,
  238. ).digest()
  239. signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
  240. authorization_origin = (
  241. 'api_key="%s", algorithm="%s", headers="%s", signature="%s"'
  242. % (self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
  243. )
  244. authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(
  245. encoding="utf-8"
  246. )
  247. # 将请求的鉴权参数组合为字典 Combine the requested authentication parameters into a dictionary
  248. v = {"authorization": authorization, "date": date, "host": "ws-api.xfyun.cn"}
  249. # 拼接鉴权参数,生成url Splicing authentication parameters and generating URLs
  250. url = url + "?" + urlencode(v)
  251. return url
  252. def on_message_1(ws, message):
  253. try:
  254. message = json.loads(message)
  255. code = message["code"]
  256. sid = message["sid"]
  257. audio = message["data"]["audio"]
  258. audio = base64.b64decode(audio)
  259. status = message["data"]["status"]
  260. # print(message)
  261. if status == 2:
  262. # print("ws is closed")
  263. ws.close()
  264. if code != 0:
  265. errMsg = message["message"]
  266. print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))
  267. else:
  268. with open(XUNFEI_TTS_FILE, "ab") as f:
  269. f.write(audio)
  270. except Exception as e:
  271. print("receive msg,but parse exception:", e)
  272. # 收到websocket错误的处理 Handling of websocket errors received
  273. def on_error_1(ws, error):
  274. print("### error:", error)
  275. def on_close_1(ws, close_status_code, close_msg):
  276. return
  277. # 收到websocket连接建立的处理 Received processing for establishing websocket connection
  278. def on_open_1(ws):
  279. def run(*args):
  280. d = {
  281. "common": wsParam.CommonArgs,
  282. "business": wsParam.BusinessArgs,
  283. "data": wsParam.Data,
  284. }
  285. d = json.dumps(d)
  286. # print("------>开始发送文本数据")
  287. ws.send(d)
  288. if os.path.exists(XUNFEI_TTS_FILE):
  289. os.remove(XUNFEI_TTS_FILE)
  290. thread.start_new_thread(run, ())
  291. class model_interface:
  292. def __init__(self, logger=None):
  293. self.logger = logger # 可选的 logger 用于打印调试信息
  294. self.init_config_param()
  295. dashscope.api_key = self.tongyi_api_key
  296. def init_config_param(self):
  297. self.pkg_path = get_package_share_directory("largemodel")
  298. config_param_file = os.path.join(
  299. self.pkg_path, "config", "large_model_interface.yaml"
  300. )
  301. with open(config_param_file, "r") as file:
  302. config_param = yaml.safe_load(file)
  303. self.tongyi_api_key = config_param.get("tongyi_api_key")
  304. self.tongyi_base_url = config_param.get("tongyi_base_url")
  305. self.tongyi_app_id = config_param.get("tongyi_app_id")
  306. self.oline_asr_model = config_param.get("oline_asr_model")
  307. self.zh_tts_model = config_param.get("zh_tts_model")
  308. self.zh_tts_json = config_param.get("zh_tts_json")
  309. self.en_tts_model = config_param.get("en_tts_model")
  310. self.en_tts_json = config_param.get("en_tts_json")
  311. self.multimodel = config_param.get("multimodel")
  312. self.ANYTHINGLLM_BASE_URL = config_param.get("ANYTHINGLLM_BASE_URL")
  313. self.API_KEY = config_param.get("API_KEY")
  314. self.WORKSPACE_SLUG = config_param.get("WORKSPACE_SLUG")
  315. self.oline_asr_sample_rate = config_param.get("oline_asr_sample_rate")
  316. self.oline_tts_model = config_param.get("oline_tts_model")
  317. self.voice_tone = config_param.get("voice_tone")
  318. self.local_asr_model = config_param.get("local_asr_model")
  319. self.tts_supplier = config_param.get("tts_supplier")
  320. self.baidu_API_KEY = config_param.get("baidu_API_KEY")
  321. self.baidu_SECRET_KEY = config_param.get("baidu_SECRET_KEY")
  322. self.CUID = config_param.get("CUID")
  323. self.PER = config_param.get("PER")
  324. self.SPD = config_param.get("SPD")
  325. self.PIT = config_param.get("PIT")
  326. self.VOL = config_param.get("VOL")
  327. self.decision_AI_api_key = config_param.get("decision_AI_api_key")
  328. self.execution_AI_api_key = config_param.get("execution_AI_api_key")
  329. self.network_adapter = config_param.get("network_adapter")
  330. self.decision_id = None # dify决策层id
  331. self.execution_id = None # dify执行层id
  332. self.international_mode = False # 是否启用国际模式,默认为国内模式
  333. # 从缓存更新配置(如果缓存中有配置的话)
  334. self.update_config_from_cache()
  335. def update_config_from_cache(self):
  336. """从缓存更新配置(从 config_node 订阅获取)"""
  337. # 获取大模型配置缓存
  338. config = get_large_model_config()
  339. if config:
  340. if config.get('tongyi_api_key'):
  341. self.tongyi_api_key = config.get('tongyi_api_key')
  342. dashscope.api_key = self.tongyi_api_key
  343. if config.get('tongyi_base_url'):
  344. self.tongyi_base_url = config.get('tongyi_base_url')
  345. if config.get('tongyi_app_id'):
  346. self.tongyi_app_id = config.get('tongyi_app_id')
  347. if config.get('multimodel'):
  348. self.multimodel = config.get('multimodel')
  349. if config.get('oline_asr_model'):
  350. self.oline_asr_model = config.get('oline_asr_model')
  351. if config.get('oline_asr_sample_rate'):
  352. self.oline_asr_sample_rate = config.get('oline_asr_sample_rate')
  353. if config.get('oline_tts_model'):
  354. self.oline_tts_model = config.get('oline_tts_model')
  355. if config.get('voice_tone'):
  356. self.voice_tone = config.get('voice_tone')
  357. if config.get('tts_supplier'):
  358. self.tts_supplier = config.get('tts_supplier')
  359. if config.get('baidu_API_KEY'):
  360. self.baidu_API_KEY = config.get('baidu_API_KEY')
  361. if config.get('baidu_SECRET_KEY'):
  362. self.baidu_SECRET_KEY = config.get('baidu_SECRET_KEY')
  363. if config.get('CUID'):
  364. self.CUID = config.get('CUID')
  365. if config.get('PER'):
  366. self.PER = config.get('PER')
  367. if config.get('SPD'):
  368. self.SPD = config.get('SPD')
  369. if config.get('PIT'):
  370. self.PIT = config.get('PIT')
  371. if config.get('VOL'):
  372. self.VOL = config.get('VOL')
  373. if config.get('decision_AI_api_key'):
  374. self.decision_AI_api_key = config.get('decision_AI_api_key')
  375. if config.get('execution_AI_api_key'):
  376. self.execution_AI_api_key = config.get('execution_AI_api_key')
  377. if config.get('network_adapter'):
  378. self.network_adapter = config.get('network_adapter')
  379. # 获取模型路径缓存
  380. paths = get_model_paths()
  381. if paths:
  382. if paths.get('zh_tts_model'):
  383. self.zh_tts_model = paths.get('zh_tts_model')
  384. if paths.get('zh_tts_json'):
  385. self.zh_tts_json = paths.get('zh_tts_json')
  386. if paths.get('en_tts_model'):
  387. self.en_tts_model = paths.get('en_tts_model')
  388. if paths.get('en_tts_json'):
  389. self.en_tts_json = paths.get('en_tts_json')
  390. if paths.get('local_asr_model'):
  391. self.local_asr_model = paths.get('local_asr_model')
  392. # 获取系统配置缓存
  393. system = get_system_config()
  394. if system:
  395. if system.get('tongyi_base_url'):
  396. self.tongyi_base_url = system.get('tongyi_base_url')
  397. def update_config(self, config):
  398. """
  399. 动态更新配置(供外部调用)
  400. 当 config_node 发布新配置时会调用此方法
  401. """
  402. if config.get('tongyi_api_key'):
  403. self.tongyi_api_key = config.get('tongyi_api_key')
  404. dashscope.api_key = self.tongyi_api_key
  405. if config.get('tongyi_base_url'):
  406. self.tongyi_base_url = config.get('tongyi_base_url')
  407. if config.get('tongyi_app_id'):
  408. self.tongyi_app_id = config.get('tongyi_app_id')
  409. if config.get('multimodel'):
  410. self.multimodel = config.get('multimodel')
  411. if config.get('oline_asr_model'):
  412. self.oline_asr_model = config.get('oline_asr_model')
  413. if config.get('oline_tts_model'):
  414. self.oline_tts_model = config.get('oline_tts_model')
  415. if config.get('voice_tone'):
  416. self.voice_tone = config.get('voice_tone')
  417. if config.get('tts_supplier'):
  418. self.tts_supplier = config.get('tts_supplier')
  419. if config.get('baidu_API_KEY'):
  420. self.baidu_API_KEY = config.get('baidu_API_KEY')
  421. if config.get('baidu_SECRET_KEY'):
  422. self.baidu_SECRET_KEY = config.get('baidu_SECRET_KEY')
  423. if config.get('CUID'):
  424. self.CUID = config.get('CUID')
  425. if config.get('PER'):
  426. self.PER = config.get('PER')
  427. if config.get('SPD'):
  428. self.SPD = config.get('SPD')
  429. if config.get('PIT'):
  430. self.PIT = config.get('PIT')
  431. if config.get('VOL'):
  432. self.VOL = config.get('VOL')
  433. if config.get('decision_AI_api_key'):
  434. self.decision_AI_api_key = config.get('decision_AI_api_key')
  435. if config.get('execution_AI_api_key'):
  436. self.execution_AI_api_key = config.get('execution_AI_api_key')
  437. if config.get('network_adapter'):
  438. self.network_adapter = config.get('network_adapter')
  439. def init_dify_client(self):
  440. self.international_mode = True
  441. self.user = "yahboom"
  442. self.decision_client = ChatClient(
  443. self.decision_AI_api_key, base_url="http://localhost/v1"
  444. )
  445. self.execution_client = ChatClient(
  446. self.execution_AI_api_key, base_url="http://localhost/v1"
  447. )
  448. if self.decision_client is not None:
  449. return True
  450. else:
  451. return False
  452. def init_Multimodal(self):
  453. self.multimodal_client = OpenAI(
  454. api_key=self.tongyi_api_key, base_url=self.tongyi_base_url
  455. )
  456. self.init_Multimodal_history(get_prompt())
  457. def init_Multimodal_history(self, system_prompt):
  458. self.Multimodalmessages = []
  459. self.Multimodalmessages.append(
  460. {"role": "user", "content": [{"type": "text", "text": system_prompt}]}
  461. )
  462. self.Multimodalmessages.append(
  463. {
  464. "role": "assistant",
  465. "content": [
  466. {
  467. "type": "text",
  468. "text": "我已经记住所有规则、动作函数和案例了,请开始您的指令吧",
  469. }
  470. ],
  471. }
  472. )
  473. def init_oline_asr(self, language):
  474. self.language = language
  475. return self.oline_asr_model
  476. def multimodalinfer(self, prompt, image_path=None):
  477. """version: 2.0
  478. 通用多模态接口,适用于通义千问平台的多模态模型
  479. """
  480. if image_path:
  481. image_data = self.encode_image(image_path)
  482. conversation_entry = {
  483. "role": "user",
  484. "content": [
  485. {
  486. "type": "image_url",
  487. "image_url": {"url": f"data:image/png;base64,{image_data}"},
  488. },
  489. {"type": "text", "text": "机器人反馈:执行seewhat()完成"},
  490. ],
  491. }
  492. else:
  493. conversation_entry = {
  494. "role": "user",
  495. "content": [{"type": "text", "text": prompt}],
  496. }
  497. self.Multimodalmessages.append(conversation_entry)
  498. completion = self.multimodal_client.chat.completions.create(
  499. model=self.multimodel, messages=self.Multimodalmessages
  500. )
  501. self.Multimodalmessages.append(
  502. {
  503. "role": "assistant",
  504. "content": [
  505. {"type": "text", "text": completion.choices[0].message.content}
  506. ],
  507. }
  508. )
  509. return completion.choices[0].message.content
  510. def TaskDecision(self, input: str) -> list: # 任务决策规划
  511. """
  512. 决策层模型接口
  513. input: 用户输入
  514. """
  515. if self.international_mode: # 国际版,调用本地dify应用API
  516. try:
  517. # 打印发送给 Dify 决策层的请求信息
  518. if self.logger:
  519. self.logger.info(f"[决策层-Dify] 发送请求: query={input}")
  520. chat_response = self.decision_client.create_chat_message(
  521. inputs={},
  522. query=input,
  523. user=self.user,
  524. response_mode="blocking",
  525. )
  526. chat_response.raise_for_status()
  527. result = chat_response.json()
  528. # 打印 Dify 返回结果
  529. if self.logger:
  530. self.logger.info(f"[决策层-Dify] 返回结果: {result}")
  531. if result.get("answer") is not None:
  532. output = [True, result.get("answer"), result.get("conversation_id")]
  533. else:
  534. output = [
  535. False,
  536. "The model service is abnormal. Check the large model account or configuration options",
  537. None,
  538. ]
  539. except Exception as e:
  540. if self.logger:
  541. self.logger.error(f"[决策层-Dify] 调用异常: {e}")
  542. output = [
  543. False,
  544. "The model service is abnormal. Check the large model account or configuration options",
  545. None,
  546. ]
  547. else: # 国内版,调用百炼大模型平台应用API
  548. try:
  549. # 打印发送给百炼的请求信息
  550. if self.logger:
  551. self.logger.info(f"[决策层-百炼] 发送请求:")
  552. self.logger.info(f" - api_key: {self.tongyi_api_key[:10]}...")
  553. self.logger.info(f" - app_id: {self.tongyi_app_id}")
  554. self.logger.info(f" - prompt: {input}")
  555. response = Application.call(
  556. api_key=self.tongyi_api_key, app_id=self.tongyi_app_id, prompt=input
  557. )
  558. # 打印百炼返回结果
  559. if self.logger:
  560. self.logger.info(f"[决策层-百炼] 返回结果: {response}")
  561. if hasattr(response, 'output') and response.output:
  562. self.logger.info(f"[决策层-百炼] output.text: {response.output.text}")
  563. if hasattr(response, 'usage'):
  564. self.logger.info(f"[决策层-百炼] usage: {response.usage}")
  565. if hasattr(response, 'request_id'):
  566. self.logger.info(f"[决策层-百炼] request_id: {response.request_id}")
  567. if response.output.text is not None:
  568. output = [True, response.output.text, None]
  569. else:
  570. output = [
  571. False,
  572. "The model service is abnormal. Check the large model account or configuration options",
  573. None,
  574. ]
  575. except Exception as e:
  576. if self.logger:
  577. self.logger.error(f"[决策层-百炼] 调用异常: {e}")
  578. output = [
  579. False,
  580. "The model service is abnormal. Check the large model account or configuration options",
  581. None,
  582. ]
  583. return output
  584. def TaskExecution(
  585. self,
  586. input: str,
  587. map_mapping: str,
  588. language: str,
  589. image_path=None,
  590. conversation_id=None,
  591. ) -> list: # 执行层模型接口
  592. """
  593. 执行层模型接口,适用于dify
  594. input: 用户输入
  595. map_mapping: 地图映射
  596. language: 回复语言
  597. image_path: 图片路径
  598. conversation_id: 会话id
  599. return:list
  600. """
  601. if image_path is not None:
  602. with open(image_path, "rb") as file: # 上传图片
  603. files = {"file": ("robot-perspective-picture", file, "image/png")}
  604. response = self.execution_client.file_upload("yahboom", files)
  605. file_id = response.json().get("id")
  606. image = [
  607. {
  608. "type": "image",
  609. "transfer_method": "local_file",
  610. "upload_file_id": file_id,
  611. }
  612. ]
  613. try:
  614. chat_response = self.execution_client.create_chat_message(
  615. inputs={"map_mapping": map_mapping, "language": language},
  616. query=input,
  617. user=self.user,
  618. response_mode="blocking",
  619. conversation_id=conversation_id,
  620. files=image,
  621. )
  622. chat_response.raise_for_status()
  623. result = chat_response.json()
  624. if result.get("answer") is not None:
  625. output = [True, result.get("answer"), result.get("conversation_id")]
  626. else:
  627. output = [
  628. False,
  629. "The model service is abnormal. Check the large model account or configuration options",
  630. None,
  631. ]
  632. except Exception as e:
  633. output = [
  634. False,
  635. "The model service is abnormal. Check the large model account or configuration options",
  636. None,
  637. ]
  638. else:
  639. try:
  640. chat_response = self.execution_client.create_chat_message(
  641. inputs={"map_mapping": map_mapping, "language": language},
  642. query=input,
  643. user=self.user,
  644. response_mode="blocking",
  645. conversation_id=conversation_id,
  646. )
  647. chat_response.raise_for_status()
  648. result = chat_response.json()
  649. if result.get("answer") is not None:
  650. output = [True, result.get("answer"), result.get("conversation_id")]
  651. else:
  652. output = [
  653. False,
  654. "The model service is abnormal. Check the large model account or configuration options",
  655. None,
  656. ]
  657. except Exception as e:
  658. output = [
  659. False,
  660. "The model service is abnormal. Check the large model account or configuration options",
  661. None,
  662. ]
  663. return output
  664. def oline_asr(self, input_file):
  665. """
  666. 语音识别接口,兼容通义千问平台paraformer、gummy系列模型
  667. """
  668. if self.oline_asr_model in [
  669. "paraformer-realtime-v2",
  670. "paraformer-realtime-v1",
  671. "paraformer-realtime-8k-v2",
  672. "paraformer-realtime-8k-v1",
  673. ]:
  674. output = self.paraformer_asr_inferce(input_file)
  675. return output
  676. elif self.oline_asr_model in ["gummy-realtime-v1", "gummy-chat-v1"]:
  677. output = self.gummy_asr_inferce(input_file)
  678. return output
  679. def paraformer_asr_inferce(self, input_file):
  680. """
  681. 通义千问平台paraformer模型接口
  682. """
  683. recognition = Recognition(
  684. model=self.oline_asr_model,
  685. format="wav",
  686. sample_rate=self.oline_asr_sample_rate,
  687. callback=None,
  688. )
  689. result = recognition.call(input_file)
  690. if result.status_code == HTTPStatus.OK:
  691. sentences = result.get_sentence()
  692. if sentences and isinstance(sentences, list):
  693. return ["ok", sentences[0].get("text", "")]
  694. else:
  695. return [
  696. "error",
  697. "ASR Error: The large model returns an empty result. Please check the account balance or parameter configuration",
  698. ]
  699. else:
  700. return ["error", "ASR Error:" + result.message]
  701. def gummy_asr_inferce(self, input_file):
  702. """
  703. 通义千问平台gummy模型接口
  704. """
  705. translator = TranslationRecognizerRealtime(
  706. model=self.oline_asr_model,
  707. format="wav",
  708. sample_rate=self.oline_asr_sample_rate,
  709. translation_target_languages=[self.language],
  710. translation_enabled=True,
  711. callback=None,
  712. )
  713. result = translator.call(input_file)
  714. if not result.error_message:
  715. output = ""
  716. for transcription_result in result.transcription_result_list:
  717. output += transcription_result.text
  718. return ["ok", output]
  719. else:
  720. return ["error", result.error_message]
  721. def init_local_asr_model(self):
  722. self.model_senceVoice = AutoModel(
  723. model=self.local_asr_model, trust_remote_code=False, disable_update=True
  724. )
  725. def tts_model_init(self, model_type="oline", language="zh"):
  726. if model_type == "oline":
  727. if self.tts_supplier == "baidu":
  728. self.token = self.fetch_token()
  729. self.model_type = "oline"
  730. elif model_type == "local":
  731. self.model_type = "local"
  732. # 初始化Piper语音合成模型
  733. if language == "zh":
  734. tts_model = self.zh_tts_model
  735. tts_json = self.zh_tts_json
  736. elif language == "en":
  737. tts_model = self.en_tts_model
  738. tts_json = self.en_tts_json
  739. self.synthesizer = piper.PiperVoice.load(
  740. tts_model, config_path=tts_json, use_cuda=False
  741. )
  742. elif model_type == "XUNFEI_FOR_INTERNATIONAL":
  743. self.model_type = "XUNFEI_FOR_INTERNATIONAL"
  744. def SenseVoiceSmall_ASR(self, input_file, language="zn"):
  745. res = self.model_senceVoice.generate(
  746. input=input_file,
  747. cache={},
  748. language=language, # "zn", "en", "yue", "ja", "ko", "nospeech"
  749. use_itn=False,
  750. )
  751. prompt = res[0]["text"].split(">")[-1]
  752. return ["ok", prompt]
  753. @measure_execution_time
  754. def voice_synthesis(self, text, path):
  755. """
  756. 语音合成
  757. text:合成的文本
  758. path:保存路径
  759. 返回1:失败 返回0:成功
  760. """
  761. if self.model_type == "oline":
  762. if self.tts_supplier == "baidu":
  763. """
  764. 百度智能云平台语音合成模型接口
  765. """
  766. # print('baiduhecheng')
  767. TTS_URL = "http://tsn.baidu.com/text2audio"
  768. tex = quote_plus(text)
  769. params = {
  770. "tok": self.token,
  771. "tex": tex,
  772. "per": self.PER,
  773. "spd": self.SPD,
  774. "pit": self.PIT,
  775. "vol": self.VOL,
  776. "aue": 3,
  777. "cuid": self.CUID,
  778. "lan": "zh",
  779. "ctp": 1,
  780. } # lan ctp 固定参数
  781. data = urlencode(params)
  782. req = Request(TTS_URL, data.encode("utf-8"))
  783. # has_error = False
  784. try:
  785. f = urlopen(req)
  786. result_str = f.read()
  787. # headers = dict((name.lower(), value) for name, value in f.headers.items())
  788. except URLError as err:
  789. print("asr http response http code : " + str(err.code))
  790. result_str = err.read()
  791. # has_error = True
  792. return 1
  793. with open(path, "wb") as of:
  794. of.write(result_str)
  795. return 0
  796. elif self.tts_supplier == "aliyun":
  797. """
  798. 阿里通义语音合成接口
  799. """
  800. self.synthesizer = SpeechSynthesizer(
  801. model=self.oline_tts_model, voice=self.voice_tone, volume=100
  802. )
  803. audio = self.synthesizer.call(text)
  804. if audio is None:
  805. return 1
  806. else:
  807. with open(path, "wb") as f:
  808. f.write(audio)
  809. return 0
  810. elif self.model_type == "local":
  811. with wave.open(path, "wb") as wav_file:
  812. wav_file.setnchannels(1) # 单声道
  813. wav_file.setsampwidth(2) # 16位采样
  814. wav_file.setframerate(self.synthesizer.config.sample_rate) # 设置采样率
  815. # 进行文本转语音
  816. self.synthesizer.synthesize(text, wav_file)
  817. elif self.model_type == "XUNFEI_FOR_INTERNATIONAL":
  818. Xinghou_speaktts(text)
  819. def openrouter_model_infer(self, prompt, image_path=None):
  820. """
  821. 使用anythingllm连接openrouter平台大模型:已弃用
  822. Connect the large model of the openrouter platform using anythingllm
  823. """
  824. if image_path:
  825. image_data = self.encode_image(image_path)
  826. data = {
  827. "message": self.system_text["text1"],
  828. "mode": "chat",
  829. "attachments": [
  830. {
  831. "name": "image.png",
  832. "mime": "image/png",
  833. "contentString": f"data:image/png;base64,{image_data}",
  834. }
  835. ],
  836. "reset": False,
  837. }
  838. else:
  839. data = {"message": prompt, "mode": "chat"}
  840. # --- 发送 POST 请求 ---
  841. response = requests.post(self.chat_endpoint, headers=self.headers, json=data)
  842. response.raise_for_status() # 如果请求失败 (状态码 >= 400),则抛出异常
  843. # --- 处理响应 ---
  844. result = response.json()
  845. return result["textResponse"]
  846. def fetch_token(self):
  847. """
  848. 专用于百度语音合成的token生成方法,百度平台有专有的token生成工具
  849. """
  850. TOKEN_URL = "http://aip.baidubce.com/oauth/2.0/token"
  851. SCOPE = "audio_tts_post" # 有此scope表示有tts能力,没有请在网页里勾选
  852. params = {
  853. "grant_type": "client_credentials",
  854. "client_id": self.baidu_API_KEY,
  855. "client_secret": self.baidu_SECRET_KEY,
  856. }
  857. post_data = urlencode(params)
  858. post_data = post_data.encode("utf-8")
  859. req = Request(TOKEN_URL, post_data)
  860. try:
  861. f = urlopen(req, timeout=5)
  862. result_str = f.read()
  863. except URLError as err:
  864. print("token http response http code : " + str(err.code))
  865. result_str = err.read()
  866. result_str = result_str.decode()
  867. result = json.loads(result_str)
  868. if "access_token" in result.keys() and "scope" in result.keys():
  869. return result["access_token"]
  870. @staticmethod
  871. def encode_image(image_path):
  872. with open(image_path, "rb") as image_file:
  873. return base64.b64encode(image_file.read()).decode("utf-8")
  874. @staticmethod
  875. def get_ip(network_interface):
  876. addresses = netifaces.ifaddresses(network_interface)
  877. if netifaces.AF_INET in addresses:
  878. for info in addresses[netifaces.AF_INET]:
  879. if "addr" in info:
  880. return info["addr"]
  881. # 录完音,可以直接调用去识别 After recording the audio, it can be directly called for recognition
  882. def rec_wav_music_en():
  883. global xufei, wsParam
  884. xufei = ""
  885. # time1 = datetime.now()
  886. wsParam = Ws_Param(
  887. APPID="f12672f1",
  888. APISecret="NmUyYTRmNTM2MjE3OWJkMDczYzlhZDgz",
  889. APIKey="8c7b9858dc5e11e8490ce0d09879ad1e",
  890. AudioFile=record_speech_file,
  891. )
  892. websocket.enableTrace(False)
  893. wsUrl = wsParam.create_url()
  894. ws = websocket.WebSocketApp(
  895. wsUrl, on_message=on_message, on_error=on_error, on_close=on_close
  896. )
  897. ws.on_open = on_open
  898. ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
  899. return xufei
  900. def Xinghou_speaktts(context):
  901. global wsParam
  902. # 测试时候在此处正确填写相关信息即可运行 Fill in the relevant information correctly here during testing to run
  903. wsParam = Ws_Param_1(
  904. APPID="f12672f1",
  905. APISecret="NmUyYTRmNTM2MjE3OWJkMDczYzlhZDgz",
  906. APIKey="8c7b9858dc5e11e8490ce0d09879ad1e",
  907. Text=context,
  908. )
  909. websocket.enableTrace(False)
  910. wsUrl = wsParam.create_url_1()
  911. ws = websocket.WebSocketApp(
  912. wsUrl, on_message=on_message_1, on_error=on_error_1, on_close=on_close_1
  913. )
  914. ws.on_open = on_open_1
  915. ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})