| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679 |
- package com.ruoyi.common.utils;
- import com.alibaba.dashscope.utils.Constants;
- import com.alibaba.dashscope.utils.JsonUtils;
- import com.ruoyi.common.config.DashScopeProperties;
- import com.ruoyi.common.config.RuoYiConfig;
- import com.ruoyi.common.utils.spring.SpringUtils;
- import com.ruoyi.common.utils.uuid.IdUtils;
- import org.apache.commons.lang3.StringUtils;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import java.io.*;
- import java.net.HttpURLConnection;
- import java.net.URL;
- import java.nio.charset.StandardCharsets;
- import java.nio.file.Files;
- import java.nio.file.Path;
- import java.nio.file.Paths;
- import java.util.HashMap;
- import java.util.Map;
- /**
- * 阿里云百炼 Qwen-TTS (Text-to-Speech) 工具类
- * 使用 DashScope HTTP API 直接调用
- *
- * @author ruoyi
- * @date 2026-05-19
- */
- public class TtsUtil
- {
- private static final Logger log = LoggerFactory.getLogger(TtsUtil.class);
- /**
- * 音频输出目录(相对于 profile 路径)
- */
- private static final String AUDIO_OUTPUT_DIR = "audio/tts";
- /**
- * 音频格式
- */
- private static final String FORMAT = "wav";
- /**
- * Qwen-TTS 模型名称
- */
- private static final String MODEL = "qwen3-tts-flash";
- /**
- * 默认音色
- */
- private static final String DEFAULT_VOICE = "Cherry";
- /**
- * 默认语种
- */
- private static final String LANGUAGE_TYPE = "Chinese";
- /**
- * API URL
- */
- private static final String API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation";
- /**
- * 音频合成结果
- */
- public static class SynthesisResult
- {
- private String audioPath;
- private Integer audioDuration;
- private String errorMessage;
- public SynthesisResult() { }
- public SynthesisResult(String audioPath, Integer audioDuration)
- {
- this.audioPath = audioPath;
- this.audioDuration = audioDuration;
- }
- public SynthesisResult(String errorMessage)
- {
- this.errorMessage = errorMessage;
- }
- public String getAudioPath() { return audioPath; }
- public void setAudioPath(String audioPath) { this.audioPath = audioPath; }
- public Integer getAudioDuration() { return audioDuration; }
- public void setAudioDuration(Integer audioDuration) { this.audioDuration = audioDuration; }
- public String getErrorMessage() { return errorMessage; }
- public void setErrorMessage(String errorMessage) { this.errorMessage = errorMessage; }
- public boolean isSuccess() { return audioPath != null && !audioPath.isEmpty(); }
- }
- /**
- * 初始化 DashScope API URL
- */
- static
- {
- Constants.baseHttpApiUrl = "https://dashscope.aliyuncs.com/api/v1";
- }
- /**
- * 获取 API Key
- */
- private static String getApiKey()
- {
- // 优先从环境变量获取
- String envKey = System.getenv("DASHSCOPE_API_KEY");
- if (StringUtils.isNotBlank(envKey))
- {
- return envKey;
- }
- // 从 Spring 配置获取
- try
- {
- DashScopeProperties properties = SpringUtils.getBean(DashScopeProperties.class);
- if (properties != null && StringUtils.isNotBlank(properties.getApiKey()))
- {
- return properties.getApiKey();
- }
- }
- catch (Exception e)
- {
- // Spring 上下文不可用,忽略
- }
- // 从系统属性获取
- try
- {
- return System.getProperty("alibaba.dashscope.api-key", "");
- }
- catch (Exception e)
- {
- return "";
- }
- }
- /**
- * 同步合成语音(使用 HTTP API)
- *
- * @param text 待合成的文本
- * @return 音频文件相对路径(如 audio/tts/xxx.wav),失败返回 null
- */
- public static String synthesizeSpeech(String text)
- {
- SynthesisResult result = synthesize(text, DEFAULT_VOICE, LANGUAGE_TYPE);
- return result.isSuccess() ? result.getAudioPath() : null;
- }
- /**
- * 同步合成语音(指定音色)
- *
- * @param text 待合成的文本
- * @param voice 音色名称
- * @return 音频文件相对路径
- */
- public static String synthesizeSpeech(String text, String voice)
- {
- SynthesisResult result = synthesize(text, voice, LANGUAGE_TYPE);
- return result.isSuccess() ? result.getAudioPath() : null;
- }
- /**
- * 同步合成语音(指定音色和语种)
- *
- * @param text 待合成的文本
- * @param voice 音色名称
- * @param languageType 语种
- * @return 音频文件相对路径
- */
- public static String synthesizeSpeech(String text, String voice, String languageType)
- {
- SynthesisResult result = synthesize(text, voice, languageType);
- return result.isSuccess() ? result.getAudioPath() : null;
- }
- /**
- * 同步合成语音,返回完整结果(包含音频路径和时长)
- *
- * @param text 待合成的文本
- * @param voice 音色名称
- * @param languageType 语种
- * @return 合成结果
- */
- public static SynthesisResult synthesize(String text, String voice, String languageType)
- {
- if (StringUtils.isBlank(text))
- {
- log.error("[TtsUtil] 文本不能为空");
- return new SynthesisResult("文本不能为空");
- }
- String apiKey = getApiKey();
- if (StringUtils.isBlank(apiKey))
- {
- log.error("[TtsUtil] 阿里云百炼 API Key 未配置,请检查配置或设置 DASHSCOPE_API_KEY 环境变量");
- return new SynthesisResult("API Key 未配置");
- }
- try
- {
- log.info("[TtsUtil] 开始语音合成,文本长度: {}, 音色: {}, 语种: {}", text.length(), voice, languageType);
- // 构建请求体
- Map<String, Object> input = new HashMap<>();
- input.put("text", text);
- input.put("voice", StringUtils.isNotBlank(voice) ? voice : DEFAULT_VOICE);
- input.put("language_type", languageType);
- Map<String, Object> requestBody = new HashMap<>();
- requestBody.put("model", MODEL);
- requestBody.put("input", input);
- // 发送 HTTP 请求
- String jsonResponse = httpPost(API_URL, apiKey, JsonUtils.toJson(requestBody));
- if (StringUtils.isBlank(jsonResponse))
- {
- log.error("[TtsUtil] API 返回为空");
- return new SynthesisResult("API 返回为空");
- }
- log.debug("[TtsUtil] API 响应: {}", jsonResponse);
- // 解析响应获取音频 URL
- String audioUrl = extractAudioUrl(jsonResponse);
- if (StringUtils.isBlank(audioUrl))
- {
- log.error("[TtsUtil] 未能从响应中提取音频 URL,响应: {}", jsonResponse);
- return new SynthesisResult("未能获取音频 URL");
- }
- // 下载音频文件并获取时长
- String localPath = downloadAudio(audioUrl);
- if (StringUtils.isBlank(localPath))
- {
- return new SynthesisResult("下载音频失败");
- }
- // 计算音频时长
- int duration = calculateAudioDuration(localPath);
- log.info("[TtsUtil] 语音合成成功,文件路径: {}, 时长: {} 秒", localPath, duration);
- return new SynthesisResult(localPath, duration);
- }
- catch (Exception e)
- {
- log.error("[TtsUtil] 语音合成异常: {}", e.getMessage(), e);
- return new SynthesisResult(e.getMessage());
- }
- }
- /**
- * 计算音频时长(秒)
- * 根据 WAV 文件头计算时长
- */
- private static int calculateAudioDuration(String localPath)
- {
- FileInputStream fis = null;
- try
- {
- File file = new File(RuoYiConfig.getProfile() + File.separator + localPath);
- if (!file.exists())
- {
- return 0;
- }
- fis = new FileInputStream(file);
- byte[] header = new byte[44];
- if (fis.read(header) != 44)
- {
- log.warn("[TtsUtil] WAV 文件头读取失败");
- return 0;
- }
- // 验证 RIFF 标识
- if (header[0] != 'R' || header[1] != 'I' || header[2] != 'F' || header[3] != 'F')
- {
- log.warn("[TtsUtil] 不是有效的 WAV 文件");
- return 0;
- }
- // 提取采样率(offset 24-27,小端序)
- int sampleRate = ((header[27] & 0xFF) << 24) | ((header[26] & 0xFF) << 16) |
- ((header[25] & 0xFF) << 8) | (header[24] & 0xFF);
- // 提取通道数(offset 22-23,小端序)
- short numChannels = (short) (((header[23] & 0xFF) << 8) | (header[22] & 0xFF));
- // 提取比特率(offset 34-35,小端序)
- short bitsPerSample = (short) (((header[35] & 0xFF) << 8) | (header[34] & 0xFF));
- // 提取数据大小(offset 40-43,小端序)
- int dataSize = ((header[43] & 0xFF) << 24) | ((header[42] & 0xFF) << 16) |
- ((header[41] & 0xFF) << 8) | (header[40] & 0xFF);
- if (sampleRate <= 0 || numChannels <= 0 || bitsPerSample <= 0 || dataSize <= 0)
- {
- log.warn("[TtsUtil] WAV 文件参数无效,sampleRate={}, channels={}, bits={}, dataSize={}",
- sampleRate, numChannels, bitsPerSample, dataSize);
- return 0;
- }
- // 计算时长(秒)
- int bytesPerSample = bitsPerSample / 8;
- int bytesPerSecond = numChannels * sampleRate * bytesPerSample;
- int duration = (int) Math.ceil((double) dataSize / bytesPerSecond);
- log.debug("[TtsUtil] 音频时长计算:采样率={}, 通道数={}, 比特率={}, 数据大小={}, 时长={}秒",
- sampleRate, numChannels, bitsPerSample, dataSize, duration);
- return duration > 0 ? duration : 0;
- }
- catch (Exception e)
- {
- log.warn("[TtsUtil] 计算音频时长失败: {}", e.getMessage());
- return 0;
- }
- finally
- {
- if (fis != null)
- {
- try { fis.close(); } catch (IOException e) { }
- }
- }
- }
- /**
- * 使用指令控制语音合成
- *
- * @param text 待合成的文本
- * @param instruction 语音控制指令
- * @return 音频文件相对路径
- */
- public static String synthesizeSpeechWithInstruction(String text, String instruction)
- {
- return synthesizeSpeechWithInstruction(text, instruction, false);
- }
- /**
- * 使用指令控制语音合成
- *
- * @param text 待合成的文本
- * @param instruction 语音控制指令
- * @param optimizeInstructions 是否优化指令
- * @return 音频文件相对路径
- */
- public static String synthesizeSpeechWithInstruction(String text, String instruction, boolean optimizeInstructions)
- {
- if (StringUtils.isBlank(text))
- {
- log.error("[TtsUtil] 文本不能为空");
- return null;
- }
- String apiKey = getApiKey();
- if (StringUtils.isBlank(apiKey))
- {
- log.error("[TtsUtil] 阿里云百炼 API Key 未配置");
- return null;
- }
- log.info("[TtsUtil] 使用指令控制模式,模型: qwen3-tts-instruct-flash");
- try
- {
- Map<String, Object> input = new HashMap<>();
- input.put("text", text);
- input.put("voice", DEFAULT_VOICE);
- input.put("language_type", LANGUAGE_TYPE);
- Map<String, Object> parameters = new HashMap<>();
- parameters.put("instructions", instruction);
- parameters.put("optimize_instructions", optimizeInstructions);
- Map<String, Object> requestBody = new HashMap<>();
- requestBody.put("model", "qwen3-tts-instruct-flash");
- requestBody.put("input", input);
- requestBody.put("parameters", parameters);
- String jsonResponse = httpPost(API_URL, apiKey, JsonUtils.toJson(requestBody));
- if (StringUtils.isBlank(jsonResponse))
- {
- log.error("[TtsUtil] API 返回为空");
- return null;
- }
- String audioUrl = extractAudioUrl(jsonResponse);
- if (StringUtils.isBlank(audioUrl))
- {
- log.error("[TtsUtil] 未能提取音频 URL");
- return null;
- }
- return downloadAudio(audioUrl);
- }
- catch (Exception e)
- {
- log.error("[TtsUtil] 语音合成异常: {}", e.getMessage(), e);
- return null;
- }
- }
- /**
- * 发送 HTTP POST 请求
- */
- private static String httpPost(String urlStr, String apiKey, String jsonBody) throws Exception
- {
- HttpURLConnection conn = null;
- BufferedReader reader = null;
- StringBuilder response = new StringBuilder();
- try
- {
- URL url = new URL(urlStr);
- conn = (HttpURLConnection) url.openConnection();
- conn.setRequestMethod("POST");
- conn.setDoOutput(true);
- conn.setDoInput(true);
- conn.setConnectTimeout(60000);
- conn.setReadTimeout(120000);
- // 设置请求头
- conn.setRequestProperty("Authorization", "Bearer " + apiKey);
- conn.setRequestProperty("Content-Type", "application/json");
- // 发送请求体
- try (OutputStream os = conn.getOutputStream())
- {
- byte[] input = jsonBody.getBytes(StandardCharsets.UTF_8);
- os.write(input, 0, input.length);
- }
- // 读取响应
- int responseCode = conn.getResponseCode();
- if (responseCode != 200)
- {
- log.error("[TtsUtil] HTTP 请求失败,状态码: {}", responseCode);
- try (BufferedReader errorReader = new BufferedReader(
- new InputStreamReader(conn.getErrorStream(), StandardCharsets.UTF_8)))
- {
- String line;
- while ((line = errorReader.readLine()) != null)
- {
- response.append(line);
- }
- }
- log.error("[TtsUtil] 错误响应: {}", response);
- return null;
- }
- reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8));
- String line;
- while ((line = reader.readLine()) != null)
- {
- response.append(line);
- }
- return response.toString();
- }
- finally
- {
- if (reader != null)
- {
- try { reader.close(); } catch (IOException e) { }
- }
- if (conn != null)
- {
- conn.disconnect();
- }
- }
- }
- /**
- * 从 JSON 响应中提取音频 URL
- */
- private static String extractAudioUrl(String jsonResponse)
- {
- try
- {
- // 简单解析 JSON(避免引入 Gson 依赖)
- // 查找 "url": "..." 模式
- int urlIndex = jsonResponse.indexOf("\"url\"");
- if (urlIndex == -1)
- {
- return null;
- }
- int colonIndex = jsonResponse.indexOf(":", urlIndex);
- if (colonIndex == -1)
- {
- return null;
- }
- // 找到值的开始位置(跳过引号和可能的空格)
- int valueStart = colonIndex + 1;
- while (valueStart < jsonResponse.length() &&
- (jsonResponse.charAt(valueStart) == ' ' ||
- jsonResponse.charAt(valueStart) == '"'))
- {
- valueStart++;
- }
- // 找到值的结束位置(下一个引号)
- int valueEnd = valueStart;
- while (valueEnd < jsonResponse.length() && jsonResponse.charAt(valueEnd) != '"')
- {
- valueEnd++;
- }
- return jsonResponse.substring(valueStart, valueEnd);
- }
- catch (Exception e)
- {
- log.error("[TtsUtil] 解析音频 URL 失败: {}", e.getMessage());
- return null;
- }
- }
- /**
- * 下载音频文件到本地
- */
- private static String downloadAudio(String audioUrl)
- {
- InputStream inputStream = null;
- OutputStream outputStream = null;
- HttpURLConnection conn = null;
- try
- {
- // 创建输出目录
- String outputDir = RuoYiConfig.getProfile() + File.separator + AUDIO_OUTPUT_DIR;
- Path dirPath = Paths.get(outputDir);
- if (!Files.exists(dirPath))
- {
- Files.createDirectories(dirPath);
- }
- // 生成唯一的文件名
- String fileName = IdUtils.fastSimpleUUID() + "." + FORMAT;
- String localFilePath = outputDir + File.separator + fileName;
- // 下载文件
- URL url = new URL(audioUrl);
- conn = (HttpURLConnection) url.openConnection();
- conn.setConnectTimeout(60000);
- conn.setReadTimeout(60000);
- int responseCode = conn.getResponseCode();
- if (responseCode != 200)
- {
- log.error("[TtsUtil] 下载音频失败,状态码: {}", responseCode);
- return null;
- }
- inputStream = conn.getInputStream();
- outputStream = new FileOutputStream(localFilePath);
- byte[] buffer = new byte[8192];
- int bytesRead;
- while ((bytesRead = inputStream.read(buffer)) != -1)
- {
- outputStream.write(buffer, 0, bytesRead);
- }
- outputStream.flush();
- return AUDIO_OUTPUT_DIR + File.separator + fileName;
- }
- catch (Exception e)
- {
- log.error("[TtsUtil] 下载音频异常: {}", e.getMessage(), e);
- return null;
- }
- finally
- {
- if (inputStream != null)
- {
- try { inputStream.close(); } catch (IOException e) { }
- }
- if (outputStream != null)
- {
- try { outputStream.close(); } catch (IOException e) { }
- }
- if (conn != null)
- {
- conn.disconnect();
- }
- }
- }
- /**
- * 删除本地音频文件
- */
- public static boolean deleteAudioFile(String audioPath)
- {
- if (StringUtils.isBlank(audioPath))
- {
- return false;
- }
- try
- {
- String fullPath = RuoYiConfig.getProfile() + File.separator + audioPath;
- Path path = Paths.get(fullPath);
- if (Files.exists(path))
- {
- Files.delete(path);
- log.info("[TtsUtil] 音频文件已删除: {}", fullPath);
- return true;
- }
- else
- {
- log.warn("[TtsUtil] 音频文件不存在: {}", fullPath);
- return false;
- }
- }
- catch (Exception e)
- {
- log.error("[TtsUtil] 删除音频文件失败: {}", e.getMessage(), e);
- return false;
- }
- }
- /**
- * 生成输出文件路径
- */
- public static String generateOutputPath()
- {
- try
- {
- String outputDir = RuoYiConfig.getProfile() + File.separator + AUDIO_OUTPUT_DIR;
- Path dirPath = Paths.get(outputDir);
- if (!Files.exists(dirPath))
- {
- Files.createDirectories(dirPath);
- }
- String fileName = IdUtils.fastSimpleUUID() + "." + FORMAT;
- return outputDir + File.separator + fileName;
- }
- catch (IOException e)
- {
- log.error("[TtsUtil] 生成输出路径失败: {}", e.getMessage());
- return null;
- }
- }
- /**
- * 获取相对路径
- */
- public static String getRelativePath(String fullPath)
- {
- if (StringUtils.isBlank(fullPath))
- {
- return null;
- }
- String fileName = new File(fullPath).getName();
- return AUDIO_OUTPUT_DIR + File.separator + fileName;
- }
- /**
- * 获取音频访问 URL
- */
- public static String getAudioUrl(String relativePath)
- {
- if (StringUtils.isBlank(relativePath))
- {
- return null;
- }
- return "/profile/" + relativePath;
- }
- }
|