package com.ruoyi.common.utils; import com.alibaba.dashscope.utils.Constants; import com.alibaba.dashscope.utils.JsonUtils; import com.ruoyi.common.config.DashScopeProperties; import com.ruoyi.common.config.RuoYiConfig; import com.ruoyi.common.utils.spring.SpringUtils; import com.ruoyi.common.utils.uuid.IdUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.net.HttpURLConnection; import java.net.URL; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; import java.util.Map; /** * 阿里云百炼 Qwen-TTS (Text-to-Speech) 工具类 * 使用 DashScope HTTP API 直接调用 * * @author ruoyi * @date 2026-05-19 */ public class TtsUtil { private static final Logger log = LoggerFactory.getLogger(TtsUtil.class); /** * 音频输出目录(相对于 profile 路径) */ private static final String AUDIO_OUTPUT_DIR = "audio/tts"; /** * 音频格式 */ private static final String FORMAT = "wav"; /** * Qwen-TTS 模型名称 */ private static final String MODEL = "qwen3-tts-flash"; /** * 默认音色 */ private static final String DEFAULT_VOICE = "Cherry"; /** * 默认语种 */ private static final String LANGUAGE_TYPE = "Chinese"; /** * API URL */ private static final String API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation"; /** * 音频合成结果 */ public static class SynthesisResult { private String audioPath; private Integer audioDuration; private String errorMessage; public SynthesisResult() { } public SynthesisResult(String audioPath, Integer audioDuration) { this.audioPath = audioPath; this.audioDuration = audioDuration; } public SynthesisResult(String errorMessage) { this.errorMessage = errorMessage; } public String getAudioPath() { return audioPath; } public void setAudioPath(String audioPath) { this.audioPath = audioPath; } public Integer getAudioDuration() { return audioDuration; } public void setAudioDuration(Integer audioDuration) { this.audioDuration = audioDuration; } public String getErrorMessage() { return errorMessage; } public void setErrorMessage(String errorMessage) { this.errorMessage = errorMessage; } public boolean isSuccess() { return audioPath != null && !audioPath.isEmpty(); } } /** * 初始化 DashScope API URL */ static { Constants.baseHttpApiUrl = "https://dashscope.aliyuncs.com/api/v1"; } /** * 获取 API Key */ private static String getApiKey() { // 优先从环境变量获取 String envKey = System.getenv("DASHSCOPE_API_KEY"); if (StringUtils.isNotBlank(envKey)) { return envKey; } // 从 Spring 配置获取 try { DashScopeProperties properties = SpringUtils.getBean(DashScopeProperties.class); if (properties != null && StringUtils.isNotBlank(properties.getApiKey())) { return properties.getApiKey(); } } catch (Exception e) { // Spring 上下文不可用,忽略 } // 从系统属性获取 try { return System.getProperty("alibaba.dashscope.api-key", ""); } catch (Exception e) { return ""; } } /** * 同步合成语音(使用 HTTP API) * * @param text 待合成的文本 * @return 音频文件相对路径(如 audio/tts/xxx.wav),失败返回 null */ public static String synthesizeSpeech(String text) { SynthesisResult result = synthesize(text, DEFAULT_VOICE, LANGUAGE_TYPE); return result.isSuccess() ? result.getAudioPath() : null; } /** * 同步合成语音(指定音色) * * @param text 待合成的文本 * @param voice 音色名称 * @return 音频文件相对路径 */ public static String synthesizeSpeech(String text, String voice) { SynthesisResult result = synthesize(text, voice, LANGUAGE_TYPE); return result.isSuccess() ? result.getAudioPath() : null; } /** * 同步合成语音(指定音色和语种) * * @param text 待合成的文本 * @param voice 音色名称 * @param languageType 语种 * @return 音频文件相对路径 */ public static String synthesizeSpeech(String text, String voice, String languageType) { SynthesisResult result = synthesize(text, voice, languageType); return result.isSuccess() ? result.getAudioPath() : null; } /** * 同步合成语音,返回完整结果(包含音频路径和时长) * * @param text 待合成的文本 * @param voice 音色名称 * @param languageType 语种 * @return 合成结果 */ public static SynthesisResult synthesize(String text, String voice, String languageType) { if (StringUtils.isBlank(text)) { log.error("[TtsUtil] 文本不能为空"); return new SynthesisResult("文本不能为空"); } String apiKey = getApiKey(); if (StringUtils.isBlank(apiKey)) { log.error("[TtsUtil] 阿里云百炼 API Key 未配置,请检查配置或设置 DASHSCOPE_API_KEY 环境变量"); return new SynthesisResult("API Key 未配置"); } try { log.info("[TtsUtil] 开始语音合成,文本长度: {}, 音色: {}, 语种: {}", text.length(), voice, languageType); // 构建请求体 Map input = new HashMap<>(); input.put("text", text); input.put("voice", StringUtils.isNotBlank(voice) ? voice : DEFAULT_VOICE); input.put("language_type", languageType); Map requestBody = new HashMap<>(); requestBody.put("model", MODEL); requestBody.put("input", input); // 发送 HTTP 请求 String jsonResponse = httpPost(API_URL, apiKey, JsonUtils.toJson(requestBody)); if (StringUtils.isBlank(jsonResponse)) { log.error("[TtsUtil] API 返回为空"); return new SynthesisResult("API 返回为空"); } log.debug("[TtsUtil] API 响应: {}", jsonResponse); // 解析响应获取音频 URL String audioUrl = extractAudioUrl(jsonResponse); if (StringUtils.isBlank(audioUrl)) { log.error("[TtsUtil] 未能从响应中提取音频 URL,响应: {}", jsonResponse); return new SynthesisResult("未能获取音频 URL"); } // 下载音频文件并获取时长 String localPath = downloadAudio(audioUrl); if (StringUtils.isBlank(localPath)) { return new SynthesisResult("下载音频失败"); } // 计算音频时长 int duration = calculateAudioDuration(localPath); log.info("[TtsUtil] 语音合成成功,文件路径: {}, 时长: {} 秒", localPath, duration); return new SynthesisResult(localPath, duration); } catch (Exception e) { log.error("[TtsUtil] 语音合成异常: {}", e.getMessage(), e); return new SynthesisResult(e.getMessage()); } } /** * 计算音频时长(秒) * 根据 WAV 文件头计算时长 */ private static int calculateAudioDuration(String localPath) { FileInputStream fis = null; try { File file = new File(RuoYiConfig.getProfile() + File.separator + localPath); if (!file.exists()) { return 0; } fis = new FileInputStream(file); byte[] header = new byte[44]; if (fis.read(header) != 44) { log.warn("[TtsUtil] WAV 文件头读取失败"); return 0; } // 验证 RIFF 标识 if (header[0] != 'R' || header[1] != 'I' || header[2] != 'F' || header[3] != 'F') { log.warn("[TtsUtil] 不是有效的 WAV 文件"); return 0; } // 提取采样率(offset 24-27,小端序) int sampleRate = ((header[27] & 0xFF) << 24) | ((header[26] & 0xFF) << 16) | ((header[25] & 0xFF) << 8) | (header[24] & 0xFF); // 提取通道数(offset 22-23,小端序) short numChannels = (short) (((header[23] & 0xFF) << 8) | (header[22] & 0xFF)); // 提取比特率(offset 34-35,小端序) short bitsPerSample = (short) (((header[35] & 0xFF) << 8) | (header[34] & 0xFF)); // 提取数据大小(offset 40-43,小端序) int dataSize = ((header[43] & 0xFF) << 24) | ((header[42] & 0xFF) << 16) | ((header[41] & 0xFF) << 8) | (header[40] & 0xFF); if (sampleRate <= 0 || numChannels <= 0 || bitsPerSample <= 0 || dataSize <= 0) { log.warn("[TtsUtil] WAV 文件参数无效,sampleRate={}, channels={}, bits={}, dataSize={}", sampleRate, numChannels, bitsPerSample, dataSize); return 0; } // 计算时长(秒) int bytesPerSample = bitsPerSample / 8; int bytesPerSecond = numChannels * sampleRate * bytesPerSample; int duration = (int) Math.ceil((double) dataSize / bytesPerSecond); log.debug("[TtsUtil] 音频时长计算:采样率={}, 通道数={}, 比特率={}, 数据大小={}, 时长={}秒", sampleRate, numChannels, bitsPerSample, dataSize, duration); return duration > 0 ? duration : 0; } catch (Exception e) { log.warn("[TtsUtil] 计算音频时长失败: {}", e.getMessage()); return 0; } finally { if (fis != null) { try { fis.close(); } catch (IOException e) { } } } } /** * 使用指令控制语音合成 * * @param text 待合成的文本 * @param instruction 语音控制指令 * @return 音频文件相对路径 */ public static String synthesizeSpeechWithInstruction(String text, String instruction) { return synthesizeSpeechWithInstruction(text, instruction, false); } /** * 使用指令控制语音合成 * * @param text 待合成的文本 * @param instruction 语音控制指令 * @param optimizeInstructions 是否优化指令 * @return 音频文件相对路径 */ public static String synthesizeSpeechWithInstruction(String text, String instruction, boolean optimizeInstructions) { if (StringUtils.isBlank(text)) { log.error("[TtsUtil] 文本不能为空"); return null; } String apiKey = getApiKey(); if (StringUtils.isBlank(apiKey)) { log.error("[TtsUtil] 阿里云百炼 API Key 未配置"); return null; } log.info("[TtsUtil] 使用指令控制模式,模型: qwen3-tts-instruct-flash"); try { Map input = new HashMap<>(); input.put("text", text); input.put("voice", DEFAULT_VOICE); input.put("language_type", LANGUAGE_TYPE); Map parameters = new HashMap<>(); parameters.put("instructions", instruction); parameters.put("optimize_instructions", optimizeInstructions); Map requestBody = new HashMap<>(); requestBody.put("model", "qwen3-tts-instruct-flash"); requestBody.put("input", input); requestBody.put("parameters", parameters); String jsonResponse = httpPost(API_URL, apiKey, JsonUtils.toJson(requestBody)); if (StringUtils.isBlank(jsonResponse)) { log.error("[TtsUtil] API 返回为空"); return null; } String audioUrl = extractAudioUrl(jsonResponse); if (StringUtils.isBlank(audioUrl)) { log.error("[TtsUtil] 未能提取音频 URL"); return null; } return downloadAudio(audioUrl); } catch (Exception e) { log.error("[TtsUtil] 语音合成异常: {}", e.getMessage(), e); return null; } } /** * 发送 HTTP POST 请求 */ private static String httpPost(String urlStr, String apiKey, String jsonBody) throws Exception { HttpURLConnection conn = null; BufferedReader reader = null; StringBuilder response = new StringBuilder(); try { URL url = new URL(urlStr); conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("POST"); conn.setDoOutput(true); conn.setDoInput(true); conn.setConnectTimeout(60000); conn.setReadTimeout(120000); // 设置请求头 conn.setRequestProperty("Authorization", "Bearer " + apiKey); conn.setRequestProperty("Content-Type", "application/json"); // 发送请求体 try (OutputStream os = conn.getOutputStream()) { byte[] input = jsonBody.getBytes(StandardCharsets.UTF_8); os.write(input, 0, input.length); } // 读取响应 int responseCode = conn.getResponseCode(); if (responseCode != 200) { log.error("[TtsUtil] HTTP 请求失败,状态码: {}", responseCode); try (BufferedReader errorReader = new BufferedReader( new InputStreamReader(conn.getErrorStream(), StandardCharsets.UTF_8))) { String line; while ((line = errorReader.readLine()) != null) { response.append(line); } } log.error("[TtsUtil] 错误响应: {}", response); return null; } reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8)); String line; while ((line = reader.readLine()) != null) { response.append(line); } return response.toString(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { } } if (conn != null) { conn.disconnect(); } } } /** * 从 JSON 响应中提取音频 URL */ private static String extractAudioUrl(String jsonResponse) { try { // 简单解析 JSON(避免引入 Gson 依赖) // 查找 "url": "..." 模式 int urlIndex = jsonResponse.indexOf("\"url\""); if (urlIndex == -1) { return null; } int colonIndex = jsonResponse.indexOf(":", urlIndex); if (colonIndex == -1) { return null; } // 找到值的开始位置(跳过引号和可能的空格) int valueStart = colonIndex + 1; while (valueStart < jsonResponse.length() && (jsonResponse.charAt(valueStart) == ' ' || jsonResponse.charAt(valueStart) == '"')) { valueStart++; } // 找到值的结束位置(下一个引号) int valueEnd = valueStart; while (valueEnd < jsonResponse.length() && jsonResponse.charAt(valueEnd) != '"') { valueEnd++; } return jsonResponse.substring(valueStart, valueEnd); } catch (Exception e) { log.error("[TtsUtil] 解析音频 URL 失败: {}", e.getMessage()); return null; } } /** * 下载音频文件到本地 */ private static String downloadAudio(String audioUrl) { InputStream inputStream = null; OutputStream outputStream = null; HttpURLConnection conn = null; try { // 创建输出目录 String outputDir = RuoYiConfig.getProfile() + File.separator + AUDIO_OUTPUT_DIR; Path dirPath = Paths.get(outputDir); if (!Files.exists(dirPath)) { Files.createDirectories(dirPath); } // 生成唯一的文件名 String fileName = IdUtils.fastSimpleUUID() + "." + FORMAT; String localFilePath = outputDir + File.separator + fileName; // 下载文件 URL url = new URL(audioUrl); conn = (HttpURLConnection) url.openConnection(); conn.setConnectTimeout(60000); conn.setReadTimeout(60000); int responseCode = conn.getResponseCode(); if (responseCode != 200) { log.error("[TtsUtil] 下载音频失败,状态码: {}", responseCode); return null; } inputStream = conn.getInputStream(); outputStream = new FileOutputStream(localFilePath); byte[] buffer = new byte[8192]; int bytesRead; while ((bytesRead = inputStream.read(buffer)) != -1) { outputStream.write(buffer, 0, bytesRead); } outputStream.flush(); return AUDIO_OUTPUT_DIR + File.separator + fileName; } catch (Exception e) { log.error("[TtsUtil] 下载音频异常: {}", e.getMessage(), e); return null; } finally { if (inputStream != null) { try { inputStream.close(); } catch (IOException e) { } } if (outputStream != null) { try { outputStream.close(); } catch (IOException e) { } } if (conn != null) { conn.disconnect(); } } } /** * 删除本地音频文件 */ public static boolean deleteAudioFile(String audioPath) { if (StringUtils.isBlank(audioPath)) { return false; } try { String fullPath = RuoYiConfig.getProfile() + File.separator + audioPath; Path path = Paths.get(fullPath); if (Files.exists(path)) { Files.delete(path); log.info("[TtsUtil] 音频文件已删除: {}", fullPath); return true; } else { log.warn("[TtsUtil] 音频文件不存在: {}", fullPath); return false; } } catch (Exception e) { log.error("[TtsUtil] 删除音频文件失败: {}", e.getMessage(), e); return false; } } /** * 生成输出文件路径 */ public static String generateOutputPath() { try { String outputDir = RuoYiConfig.getProfile() + File.separator + AUDIO_OUTPUT_DIR; Path dirPath = Paths.get(outputDir); if (!Files.exists(dirPath)) { Files.createDirectories(dirPath); } String fileName = IdUtils.fastSimpleUUID() + "." + FORMAT; return outputDir + File.separator + fileName; } catch (IOException e) { log.error("[TtsUtil] 生成输出路径失败: {}", e.getMessage()); return null; } } /** * 获取相对路径 */ public static String getRelativePath(String fullPath) { if (StringUtils.isBlank(fullPath)) { return null; } String fileName = new File(fullPath).getName(); return AUDIO_OUTPUT_DIR + File.separator + fileName; } /** * 获取音频访问 URL */ public static String getAudioUrl(String relativePath) { if (StringUtils.isBlank(relativePath)) { return null; } return "/profile/" + relativePath; } }