TtsUtil.java 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. package com.ruoyi.common.utils;
  2. import com.alibaba.dashscope.utils.Constants;
  3. import com.alibaba.dashscope.utils.JsonUtils;
  4. import com.ruoyi.common.config.DashScopeProperties;
  5. import com.ruoyi.common.config.RuoYiConfig;
  6. import com.ruoyi.common.utils.spring.SpringUtils;
  7. import com.ruoyi.common.utils.uuid.IdUtils;
  8. import org.apache.commons.lang3.StringUtils;
  9. import org.slf4j.Logger;
  10. import org.slf4j.LoggerFactory;
  11. import java.io.*;
  12. import java.net.HttpURLConnection;
  13. import java.net.URL;
  14. import java.nio.charset.StandardCharsets;
  15. import java.nio.file.Files;
  16. import java.nio.file.Path;
  17. import java.nio.file.Paths;
  18. import java.util.HashMap;
  19. import java.util.Map;
  20. /**
  21. * 阿里云百炼 Qwen-TTS (Text-to-Speech) 工具类
  22. * 使用 DashScope HTTP API 直接调用
  23. *
  24. * @author ruoyi
  25. * @date 2026-05-19
  26. */
  27. public class TtsUtil
  28. {
  29. private static final Logger log = LoggerFactory.getLogger(TtsUtil.class);
  30. /**
  31. * 音频输出目录(相对于 profile 路径)
  32. */
  33. private static final String AUDIO_OUTPUT_DIR = "audio/tts";
  34. /**
  35. * 音频格式
  36. */
  37. private static final String FORMAT = "wav";
  38. /**
  39. * Qwen-TTS 模型名称
  40. */
  41. private static final String MODEL = "qwen3-tts-flash";
  42. /**
  43. * 默认音色
  44. */
  45. private static final String DEFAULT_VOICE = "Cherry";
  46. /**
  47. * 默认语种
  48. */
  49. private static final String LANGUAGE_TYPE = "Chinese";
  50. /**
  51. * API URL
  52. */
  53. private static final String API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation";
  54. /**
  55. * 音频合成结果
  56. */
  57. public static class SynthesisResult
  58. {
  59. private String audioPath;
  60. private Integer audioDuration;
  61. private String errorMessage;
  62. public SynthesisResult() { }
  63. public SynthesisResult(String audioPath, Integer audioDuration)
  64. {
  65. this.audioPath = audioPath;
  66. this.audioDuration = audioDuration;
  67. }
  68. public SynthesisResult(String errorMessage)
  69. {
  70. this.errorMessage = errorMessage;
  71. }
  72. public String getAudioPath() { return audioPath; }
  73. public void setAudioPath(String audioPath) { this.audioPath = audioPath; }
  74. public Integer getAudioDuration() { return audioDuration; }
  75. public void setAudioDuration(Integer audioDuration) { this.audioDuration = audioDuration; }
  76. public String getErrorMessage() { return errorMessage; }
  77. public void setErrorMessage(String errorMessage) { this.errorMessage = errorMessage; }
  78. public boolean isSuccess() { return audioPath != null && !audioPath.isEmpty(); }
  79. }
  80. /**
  81. * 初始化 DashScope API URL
  82. */
  83. static
  84. {
  85. Constants.baseHttpApiUrl = "https://dashscope.aliyuncs.com/api/v1";
  86. }
  87. /**
  88. * 获取 API Key
  89. */
  90. private static String getApiKey()
  91. {
  92. // 优先从环境变量获取
  93. String envKey = System.getenv("DASHSCOPE_API_KEY");
  94. if (StringUtils.isNotBlank(envKey))
  95. {
  96. return envKey;
  97. }
  98. // 从 Spring 配置获取
  99. try
  100. {
  101. DashScopeProperties properties = SpringUtils.getBean(DashScopeProperties.class);
  102. if (properties != null && StringUtils.isNotBlank(properties.getApiKey()))
  103. {
  104. return properties.getApiKey();
  105. }
  106. }
  107. catch (Exception e)
  108. {
  109. // Spring 上下文不可用,忽略
  110. }
  111. // 从系统属性获取
  112. try
  113. {
  114. return System.getProperty("alibaba.dashscope.api-key", "");
  115. }
  116. catch (Exception e)
  117. {
  118. return "";
  119. }
  120. }
  121. /**
  122. * 同步合成语音(使用 HTTP API)
  123. *
  124. * @param text 待合成的文本
  125. * @return 音频文件相对路径(如 audio/tts/xxx.wav),失败返回 null
  126. */
  127. public static String synthesizeSpeech(String text)
  128. {
  129. SynthesisResult result = synthesize(text, DEFAULT_VOICE, LANGUAGE_TYPE);
  130. return result.isSuccess() ? result.getAudioPath() : null;
  131. }
  132. /**
  133. * 同步合成语音(指定音色)
  134. *
  135. * @param text 待合成的文本
  136. * @param voice 音色名称
  137. * @return 音频文件相对路径
  138. */
  139. public static String synthesizeSpeech(String text, String voice)
  140. {
  141. SynthesisResult result = synthesize(text, voice, LANGUAGE_TYPE);
  142. return result.isSuccess() ? result.getAudioPath() : null;
  143. }
  144. /**
  145. * 同步合成语音(指定音色和语种)
  146. *
  147. * @param text 待合成的文本
  148. * @param voice 音色名称
  149. * @param languageType 语种
  150. * @return 音频文件相对路径
  151. */
  152. public static String synthesizeSpeech(String text, String voice, String languageType)
  153. {
  154. SynthesisResult result = synthesize(text, voice, languageType);
  155. return result.isSuccess() ? result.getAudioPath() : null;
  156. }
  157. /**
  158. * 同步合成语音,返回完整结果(包含音频路径和时长)
  159. *
  160. * @param text 待合成的文本
  161. * @param voice 音色名称
  162. * @param languageType 语种
  163. * @return 合成结果
  164. */
  165. public static SynthesisResult synthesize(String text, String voice, String languageType)
  166. {
  167. if (StringUtils.isBlank(text))
  168. {
  169. log.error("[TtsUtil] 文本不能为空");
  170. return new SynthesisResult("文本不能为空");
  171. }
  172. String apiKey = getApiKey();
  173. if (StringUtils.isBlank(apiKey))
  174. {
  175. log.error("[TtsUtil] 阿里云百炼 API Key 未配置,请检查配置或设置 DASHSCOPE_API_KEY 环境变量");
  176. return new SynthesisResult("API Key 未配置");
  177. }
  178. try
  179. {
  180. log.info("[TtsUtil] 开始语音合成,文本长度: {}, 音色: {}, 语种: {}", text.length(), voice, languageType);
  181. // 构建请求体
  182. Map<String, Object> input = new HashMap<>();
  183. input.put("text", text);
  184. input.put("voice", StringUtils.isNotBlank(voice) ? voice : DEFAULT_VOICE);
  185. input.put("language_type", languageType);
  186. Map<String, Object> requestBody = new HashMap<>();
  187. requestBody.put("model", MODEL);
  188. requestBody.put("input", input);
  189. // 发送 HTTP 请求
  190. String jsonResponse = httpPost(API_URL, apiKey, JsonUtils.toJson(requestBody));
  191. if (StringUtils.isBlank(jsonResponse))
  192. {
  193. log.error("[TtsUtil] API 返回为空");
  194. return new SynthesisResult("API 返回为空");
  195. }
  196. log.debug("[TtsUtil] API 响应: {}", jsonResponse);
  197. // 解析响应获取音频 URL
  198. String audioUrl = extractAudioUrl(jsonResponse);
  199. if (StringUtils.isBlank(audioUrl))
  200. {
  201. log.error("[TtsUtil] 未能从响应中提取音频 URL,响应: {}", jsonResponse);
  202. return new SynthesisResult("未能获取音频 URL");
  203. }
  204. // 下载音频文件并获取时长
  205. String localPath = downloadAudio(audioUrl);
  206. if (StringUtils.isBlank(localPath))
  207. {
  208. return new SynthesisResult("下载音频失败");
  209. }
  210. // 计算音频时长
  211. int duration = calculateAudioDuration(localPath);
  212. log.info("[TtsUtil] 语音合成成功,文件路径: {}, 时长: {} 秒", localPath, duration);
  213. return new SynthesisResult(localPath, duration);
  214. }
  215. catch (Exception e)
  216. {
  217. log.error("[TtsUtil] 语音合成异常: {}", e.getMessage(), e);
  218. return new SynthesisResult(e.getMessage());
  219. }
  220. }
  221. /**
  222. * 计算音频时长(秒)
  223. * 根据 WAV 文件头计算时长
  224. */
  225. private static int calculateAudioDuration(String localPath)
  226. {
  227. FileInputStream fis = null;
  228. try
  229. {
  230. File file = new File(RuoYiConfig.getProfile() + File.separator + localPath);
  231. if (!file.exists())
  232. {
  233. return 0;
  234. }
  235. fis = new FileInputStream(file);
  236. byte[] header = new byte[44];
  237. if (fis.read(header) != 44)
  238. {
  239. log.warn("[TtsUtil] WAV 文件头读取失败");
  240. return 0;
  241. }
  242. // 验证 RIFF 标识
  243. if (header[0] != 'R' || header[1] != 'I' || header[2] != 'F' || header[3] != 'F')
  244. {
  245. log.warn("[TtsUtil] 不是有效的 WAV 文件");
  246. return 0;
  247. }
  248. // 提取采样率(offset 24-27,小端序)
  249. int sampleRate = ((header[27] & 0xFF) << 24) | ((header[26] & 0xFF) << 16) |
  250. ((header[25] & 0xFF) << 8) | (header[24] & 0xFF);
  251. // 提取通道数(offset 22-23,小端序)
  252. short numChannels = (short) (((header[23] & 0xFF) << 8) | (header[22] & 0xFF));
  253. // 提取比特率(offset 34-35,小端序)
  254. short bitsPerSample = (short) (((header[35] & 0xFF) << 8) | (header[34] & 0xFF));
  255. // 提取数据大小(offset 40-43,小端序)
  256. int dataSize = ((header[43] & 0xFF) << 24) | ((header[42] & 0xFF) << 16) |
  257. ((header[41] & 0xFF) << 8) | (header[40] & 0xFF);
  258. if (sampleRate <= 0 || numChannels <= 0 || bitsPerSample <= 0 || dataSize <= 0)
  259. {
  260. log.warn("[TtsUtil] WAV 文件参数无效,sampleRate={}, channels={}, bits={}, dataSize={}",
  261. sampleRate, numChannels, bitsPerSample, dataSize);
  262. return 0;
  263. }
  264. // 计算时长(秒)
  265. int bytesPerSample = bitsPerSample / 8;
  266. int bytesPerSecond = numChannels * sampleRate * bytesPerSample;
  267. int duration = (int) Math.ceil((double) dataSize / bytesPerSecond);
  268. log.debug("[TtsUtil] 音频时长计算:采样率={}, 通道数={}, 比特率={}, 数据大小={}, 时长={}秒",
  269. sampleRate, numChannels, bitsPerSample, dataSize, duration);
  270. return duration > 0 ? duration : 0;
  271. }
  272. catch (Exception e)
  273. {
  274. log.warn("[TtsUtil] 计算音频时长失败: {}", e.getMessage());
  275. return 0;
  276. }
  277. finally
  278. {
  279. if (fis != null)
  280. {
  281. try { fis.close(); } catch (IOException e) { }
  282. }
  283. }
  284. }
  285. /**
  286. * 使用指令控制语音合成
  287. *
  288. * @param text 待合成的文本
  289. * @param instruction 语音控制指令
  290. * @return 音频文件相对路径
  291. */
  292. public static String synthesizeSpeechWithInstruction(String text, String instruction)
  293. {
  294. return synthesizeSpeechWithInstruction(text, instruction, false);
  295. }
  296. /**
  297. * 使用指令控制语音合成
  298. *
  299. * @param text 待合成的文本
  300. * @param instruction 语音控制指令
  301. * @param optimizeInstructions 是否优化指令
  302. * @return 音频文件相对路径
  303. */
  304. public static String synthesizeSpeechWithInstruction(String text, String instruction, boolean optimizeInstructions)
  305. {
  306. if (StringUtils.isBlank(text))
  307. {
  308. log.error("[TtsUtil] 文本不能为空");
  309. return null;
  310. }
  311. String apiKey = getApiKey();
  312. if (StringUtils.isBlank(apiKey))
  313. {
  314. log.error("[TtsUtil] 阿里云百炼 API Key 未配置");
  315. return null;
  316. }
  317. log.info("[TtsUtil] 使用指令控制模式,模型: qwen3-tts-instruct-flash");
  318. try
  319. {
  320. Map<String, Object> input = new HashMap<>();
  321. input.put("text", text);
  322. input.put("voice", DEFAULT_VOICE);
  323. input.put("language_type", LANGUAGE_TYPE);
  324. Map<String, Object> parameters = new HashMap<>();
  325. parameters.put("instructions", instruction);
  326. parameters.put("optimize_instructions", optimizeInstructions);
  327. Map<String, Object> requestBody = new HashMap<>();
  328. requestBody.put("model", "qwen3-tts-instruct-flash");
  329. requestBody.put("input", input);
  330. requestBody.put("parameters", parameters);
  331. String jsonResponse = httpPost(API_URL, apiKey, JsonUtils.toJson(requestBody));
  332. if (StringUtils.isBlank(jsonResponse))
  333. {
  334. log.error("[TtsUtil] API 返回为空");
  335. return null;
  336. }
  337. String audioUrl = extractAudioUrl(jsonResponse);
  338. if (StringUtils.isBlank(audioUrl))
  339. {
  340. log.error("[TtsUtil] 未能提取音频 URL");
  341. return null;
  342. }
  343. return downloadAudio(audioUrl);
  344. }
  345. catch (Exception e)
  346. {
  347. log.error("[TtsUtil] 语音合成异常: {}", e.getMessage(), e);
  348. return null;
  349. }
  350. }
  351. /**
  352. * 发送 HTTP POST 请求
  353. */
  354. private static String httpPost(String urlStr, String apiKey, String jsonBody) throws Exception
  355. {
  356. HttpURLConnection conn = null;
  357. BufferedReader reader = null;
  358. StringBuilder response = new StringBuilder();
  359. try
  360. {
  361. URL url = new URL(urlStr);
  362. conn = (HttpURLConnection) url.openConnection();
  363. conn.setRequestMethod("POST");
  364. conn.setDoOutput(true);
  365. conn.setDoInput(true);
  366. conn.setConnectTimeout(60000);
  367. conn.setReadTimeout(120000);
  368. // 设置请求头
  369. conn.setRequestProperty("Authorization", "Bearer " + apiKey);
  370. conn.setRequestProperty("Content-Type", "application/json");
  371. // 发送请求体
  372. try (OutputStream os = conn.getOutputStream())
  373. {
  374. byte[] input = jsonBody.getBytes(StandardCharsets.UTF_8);
  375. os.write(input, 0, input.length);
  376. }
  377. // 读取响应
  378. int responseCode = conn.getResponseCode();
  379. if (responseCode != 200)
  380. {
  381. log.error("[TtsUtil] HTTP 请求失败,状态码: {}", responseCode);
  382. try (BufferedReader errorReader = new BufferedReader(
  383. new InputStreamReader(conn.getErrorStream(), StandardCharsets.UTF_8)))
  384. {
  385. String line;
  386. while ((line = errorReader.readLine()) != null)
  387. {
  388. response.append(line);
  389. }
  390. }
  391. log.error("[TtsUtil] 错误响应: {}", response);
  392. return null;
  393. }
  394. reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8));
  395. String line;
  396. while ((line = reader.readLine()) != null)
  397. {
  398. response.append(line);
  399. }
  400. return response.toString();
  401. }
  402. finally
  403. {
  404. if (reader != null)
  405. {
  406. try { reader.close(); } catch (IOException e) { }
  407. }
  408. if (conn != null)
  409. {
  410. conn.disconnect();
  411. }
  412. }
  413. }
  414. /**
  415. * 从 JSON 响应中提取音频 URL
  416. */
  417. private static String extractAudioUrl(String jsonResponse)
  418. {
  419. try
  420. {
  421. // 简单解析 JSON(避免引入 Gson 依赖)
  422. // 查找 "url": "..." 模式
  423. int urlIndex = jsonResponse.indexOf("\"url\"");
  424. if (urlIndex == -1)
  425. {
  426. return null;
  427. }
  428. int colonIndex = jsonResponse.indexOf(":", urlIndex);
  429. if (colonIndex == -1)
  430. {
  431. return null;
  432. }
  433. // 找到值的开始位置(跳过引号和可能的空格)
  434. int valueStart = colonIndex + 1;
  435. while (valueStart < jsonResponse.length() &&
  436. (jsonResponse.charAt(valueStart) == ' ' ||
  437. jsonResponse.charAt(valueStart) == '"'))
  438. {
  439. valueStart++;
  440. }
  441. // 找到值的结束位置(下一个引号)
  442. int valueEnd = valueStart;
  443. while (valueEnd < jsonResponse.length() && jsonResponse.charAt(valueEnd) != '"')
  444. {
  445. valueEnd++;
  446. }
  447. return jsonResponse.substring(valueStart, valueEnd);
  448. }
  449. catch (Exception e)
  450. {
  451. log.error("[TtsUtil] 解析音频 URL 失败: {}", e.getMessage());
  452. return null;
  453. }
  454. }
  455. /**
  456. * 下载音频文件到本地
  457. */
  458. private static String downloadAudio(String audioUrl)
  459. {
  460. InputStream inputStream = null;
  461. OutputStream outputStream = null;
  462. HttpURLConnection conn = null;
  463. try
  464. {
  465. // 创建输出目录
  466. String outputDir = RuoYiConfig.getProfile() + File.separator + AUDIO_OUTPUT_DIR;
  467. Path dirPath = Paths.get(outputDir);
  468. if (!Files.exists(dirPath))
  469. {
  470. Files.createDirectories(dirPath);
  471. }
  472. // 生成唯一的文件名
  473. String fileName = IdUtils.fastSimpleUUID() + "." + FORMAT;
  474. String localFilePath = outputDir + File.separator + fileName;
  475. // 下载文件
  476. URL url = new URL(audioUrl);
  477. conn = (HttpURLConnection) url.openConnection();
  478. conn.setConnectTimeout(60000);
  479. conn.setReadTimeout(60000);
  480. int responseCode = conn.getResponseCode();
  481. if (responseCode != 200)
  482. {
  483. log.error("[TtsUtil] 下载音频失败,状态码: {}", responseCode);
  484. return null;
  485. }
  486. inputStream = conn.getInputStream();
  487. outputStream = new FileOutputStream(localFilePath);
  488. byte[] buffer = new byte[8192];
  489. int bytesRead;
  490. while ((bytesRead = inputStream.read(buffer)) != -1)
  491. {
  492. outputStream.write(buffer, 0, bytesRead);
  493. }
  494. outputStream.flush();
  495. return AUDIO_OUTPUT_DIR + File.separator + fileName;
  496. }
  497. catch (Exception e)
  498. {
  499. log.error("[TtsUtil] 下载音频异常: {}", e.getMessage(), e);
  500. return null;
  501. }
  502. finally
  503. {
  504. if (inputStream != null)
  505. {
  506. try { inputStream.close(); } catch (IOException e) { }
  507. }
  508. if (outputStream != null)
  509. {
  510. try { outputStream.close(); } catch (IOException e) { }
  511. }
  512. if (conn != null)
  513. {
  514. conn.disconnect();
  515. }
  516. }
  517. }
  518. /**
  519. * 删除本地音频文件
  520. */
  521. public static boolean deleteAudioFile(String audioPath)
  522. {
  523. if (StringUtils.isBlank(audioPath))
  524. {
  525. return false;
  526. }
  527. try
  528. {
  529. String fullPath = RuoYiConfig.getProfile() + File.separator + audioPath;
  530. Path path = Paths.get(fullPath);
  531. if (Files.exists(path))
  532. {
  533. Files.delete(path);
  534. log.info("[TtsUtil] 音频文件已删除: {}", fullPath);
  535. return true;
  536. }
  537. else
  538. {
  539. log.warn("[TtsUtil] 音频文件不存在: {}", fullPath);
  540. return false;
  541. }
  542. }
  543. catch (Exception e)
  544. {
  545. log.error("[TtsUtil] 删除音频文件失败: {}", e.getMessage(), e);
  546. return false;
  547. }
  548. }
  549. /**
  550. * 生成输出文件路径
  551. */
  552. public static String generateOutputPath()
  553. {
  554. try
  555. {
  556. String outputDir = RuoYiConfig.getProfile() + File.separator + AUDIO_OUTPUT_DIR;
  557. Path dirPath = Paths.get(outputDir);
  558. if (!Files.exists(dirPath))
  559. {
  560. Files.createDirectories(dirPath);
  561. }
  562. String fileName = IdUtils.fastSimpleUUID() + "." + FORMAT;
  563. return outputDir + File.separator + fileName;
  564. }
  565. catch (IOException e)
  566. {
  567. log.error("[TtsUtil] 生成输出路径失败: {}", e.getMessage());
  568. return null;
  569. }
  570. }
  571. /**
  572. * 获取相对路径
  573. */
  574. public static String getRelativePath(String fullPath)
  575. {
  576. if (StringUtils.isBlank(fullPath))
  577. {
  578. return null;
  579. }
  580. String fileName = new File(fullPath).getName();
  581. return AUDIO_OUTPUT_DIR + File.separator + fileName;
  582. }
  583. /**
  584. * 获取音频访问 URL
  585. */
  586. public static String getAudioUrl(String relativePath)
  587. {
  588. if (StringUtils.isBlank(relativePath))
  589. {
  590. return null;
  591. }
  592. return "/profile/" + relativePath;
  593. }
  594. }