Browse Source

chore: Audio Handle

master
wangxiang 2 years ago
parent
commit
2fbc058d27
No known key found for this signature in database
GPG Key ID: 1BA7946AB6B232E4
  1. 10
      kicc-common/kicc-common-bom/pom.xml
  2. 4
      kicc-platform/kicc-platform-biz/kicc-system-biz/pom.xml
  3. 3
      kicc-platform/kicc-platform-biz/kicc-system-biz/src/main/java/com/cloud/kicc/system/config/OpenAiConfigProperties.java
  4. 112
      kicc-platform/kicc-platform-biz/kicc-system-biz/src/main/java/com/cloud/kicc/system/service/impl/ImContentServiceImpl.java

10
kicc-common/kicc-common-bom/pom.xml

@ -33,7 +33,7 @@
<liquibase.version>4.22.0</liquibase.version> <liquibase.version>4.22.0</liquibase.version>
<ureport2.version>2.2.9</ureport2.version> <ureport2.version>2.2.9</ureport2.version>
<openai-gpt-java.api.version>0.17.0</openai-gpt-java.api.version> <openai-gpt-java.api.version>0.17.0</openai-gpt-java.api.version>
<javacv.version>1.5.9</javacv.version> <jaudiotagger.version>3.0.1</jaudiotagger.version>
</properties> </properties>
<!-- 定义全局jar版本,模块使用需要再次引入但不用写版本号--> <!-- 定义全局jar版本,模块使用需要再次引入但不用写版本号-->
@ -270,11 +270,11 @@
<artifactId>service</artifactId> <artifactId>service</artifactId>
<version>${openai-gpt-java.api.version}</version> <version>${openai-gpt-java.api.version}</version>
</dependency> </dependency>
<!--计算机视觉相关处理库(目前用于处理音频但支持更多功能): https://github.com/bytedeco/javacv --> <!--音频文件相关处理库(目前用于处理音频但支持更多功能列如修复音频元文件): https://bitbucket.org/ijabz/jaudiotagger/src/master -->
<dependency> <dependency>
<groupId>org.bytedeco</groupId> <groupId>net.jthink</groupId>
<artifactId>javacv-platform</artifactId> <artifactId>jaudiotagger</artifactId>
<version>${javacv.version}</version> <version>${jaudiotagger.version}</version>
</dependency> </dependency>
</dependencies> </dependencies>
</dependencyManagement> </dependencyManagement>

4
kicc-platform/kicc-platform-biz/kicc-system-biz/pom.xml

@ -93,8 +93,8 @@
<artifactId>service</artifactId> <artifactId>service</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.bytedeco</groupId> <groupId>net.jthink</groupId>
<artifactId>javacv-platform</artifactId> <artifactId>jaudiotagger</artifactId>
</dependency> </dependency>
</dependencies> </dependencies>

3
kicc-platform/kicc-platform-biz/kicc-system-biz/src/main/java/com/cloud/kicc/system/config/OpenAiConfigProperties.java

@ -35,4 +35,7 @@ public class OpenAiConfigProperties {
/** timeout time unit seconds */ /** timeout time unit seconds */
private int timeout = 10; private int timeout = 10;
/** OpenAI API Voice - <a href="https://platform.openai.com/docs/guides/text-to-speech/voice-options">...</a> */
private String voice;
} }

112
kicc-platform/kicc-platform-biz/kicc-system-biz/src/main/java/com/cloud/kicc/system/service/impl/ImContentServiceImpl.java

@ -13,6 +13,7 @@ import com.cloud.kicc.common.security.util.SecurityUtils;
import com.cloud.kicc.system.api.entity.ImContent; import com.cloud.kicc.system.api.entity.ImContent;
import com.cloud.kicc.system.api.entity.OssFile; import com.cloud.kicc.system.api.entity.OssFile;
import com.cloud.kicc.system.api.enums.ImMessageTypeEnum; import com.cloud.kicc.system.api.enums.ImMessageTypeEnum;
import com.cloud.kicc.system.config.OpenAiConfigProperties;
import com.cloud.kicc.system.mapper.ImContentMapper; import com.cloud.kicc.system.mapper.ImContentMapper;
import com.cloud.kicc.system.service.FileService; import com.cloud.kicc.system.service.FileService;
import com.cloud.kicc.system.service.IImContentService; import com.cloud.kicc.system.service.IImContentService;
@ -30,7 +31,8 @@ import com.theokanning.openai.service.OpenAiService;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import okhttp3.ResponseBody; import okhttp3.ResponseBody;
import org.bytedeco.javacv.FFmpegFrameGrabber; import org.jaudiotagger.audio.AudioFile;
import org.jaudiotagger.audio.AudioFileIO;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
@ -61,6 +63,8 @@ public class ImContentServiceImpl extends ServiceImpl<ImContentMapper, ImContent
private final OssTemplate ossTemplate; private final OssTemplate ossTemplate;
private final OpenAiConfigProperties openAiConfigProperties;
@Override @Override
public IPage<Map<String, Object>> listHistoryMessage(Page page, ImContent imContent) { public IPage<Map<String, Object>> listHistoryMessage(Page page, ImContent imContent) {
return baseMapper.listHistoryMessage(page, imContent); return baseMapper.listHistoryMessage(page, imContent);
@ -92,64 +96,74 @@ public class ImContentServiceImpl extends ServiceImpl<ImContentMapper, ImContent
fileService.save(sendOssOssFile); fileService.save(sendOssOssFile);
imContent.setFiles(sendOssOssFile.getId()); imContent.setFiles(sendOssOssFile.getId());
// 语音转文字 File inputVoiceFile = File.createTempFile(sendFileName, StrUtil.DOT + FileUtil.extName(file.getOriginalFilename()));
File voiceFile = File.createTempFile(sendFileName, StrUtil.DOT + FileUtil.extName(file.getOriginalFilename())); File outputVoiceFile = File.createTempFile(sendFileName, StrUtil.DOT + FileUtil.extName(file.getOriginalFilename()));
CreateTranscriptionRequest request = CreateTranscriptionRequest.builder() ImContent receiveContent;
.model("whisper-1") OssFile receiveOssOssFile;
.language("zh") try {
.build();
FileUtil.writeBytes(file.getBytes(), voiceFile); // 语音转文字
TranscriptionResult transcriptionResult = openAiService.createTranscription(request, voiceFile); CreateTranscriptionRequest request = CreateTranscriptionRequest.builder()
if (voiceFile.delete()) { .model("whisper-1")
System.out.println("已成功删除临时文件!"); .language("zh")
} .build();
FileUtil.writeBytes(file.getBytes(), inputVoiceFile);
imContent.setContent(transcriptionResult.getText()); TranscriptionResult transcriptionResult = openAiService.createTranscription(request, inputVoiceFile);
ImContent content = askChatCompletion(imContent); imContent.setContent(transcriptionResult.getText());
content.setContentType(ImMessageTypeEnum.AUDIO.getValue());
// ai涡轮增压
// 文字转语音 receiveContent = askChatCompletion(imContent);
CreateSpeechRequest createSpeechRequest = CreateSpeechRequest.builder() receiveContent.setContentType(ImMessageTypeEnum.AUDIO.getValue());
.model("tts-1")
.input(content.getContent()) // 文字转语音
.voice("nova") CreateSpeechRequest createSpeechRequest = CreateSpeechRequest.builder()
.responseFormat(FileUtil.extName(speechName)) .model("tts-1")
.speed(1.0) .input(receiveContent.getContent())
.build(); .voice(openAiConfigProperties.getVoice())
ResponseBody responseBody = openAiService.createSpeech(createSpeechRequest); .responseFormat(FileUtil.extName(speechName))
.speed(1.0)
// 获取音频时长 .build();
FFmpegFrameGrabber grabber = new FFmpegFrameGrabber(responseBody.byteStream()); ResponseBody responseBody = openAiService.createSpeech(createSpeechRequest);
grabber.start(); FileUtil.writeBytes(responseBody.bytes(), outputVoiceFile);
long durationInSec = grabber.getFormatContext().duration() / 1000000;
grabber.close(); // 获取输出音频时长
AudioFile outputAudioFile = AudioFileIO.read(outputVoiceFile);
// 构建发送文件信息进行OSS存储 long outputVoiceDuration = outputAudioFile.getAudioHeader().getTrackLength();
OssFile receiveOssOssFile = new OssFile()
// 构建发送文件信息进行OSS存储
receiveOssOssFile = new OssFile()
.setFileName(receiveFileName) .setFileName(receiveFileName)
.setBucketName(ossProperties.getBucketName()) .setBucketName(ossProperties.getBucketName())
.setOriginal(speechName) .setOriginal(speechName)
.setType(FileUtil.extName(speechName)) .setType(FileUtil.extName(speechName))
.setFileSize(responseBody.contentLength()) .setFileSize(responseBody.contentLength())
.setDuration(durationInSec) .setDuration(outputVoiceDuration)
.setMimeType(Objects.requireNonNull(responseBody.contentType()).toString()); .setMimeType(Objects.requireNonNull(responseBody.contentType()).toString());
ossTemplate.putObject(ossProperties.getBucketName(), receiveFileName, Objects.requireNonNull(responseBody.contentType()).toString(), responseBody.byteStream()); ossTemplate.putObject(ossProperties.getBucketName(), receiveFileName, Objects.requireNonNull(responseBody.contentType()).toString(), FileUtil.getInputStream(outputVoiceFile));
String receiveVoiceUrl = ossTemplate.getObjectURL(ossProperties.getBucketName(), receiveFileName); String receiveVoiceUrl = ossTemplate.getObjectURL(ossProperties.getBucketName(), receiveFileName);
receiveOssOssFile.setAvailablePath(receiveVoiceUrl); receiveOssOssFile.setAvailablePath(receiveVoiceUrl);
fileService.save(receiveOssOssFile); fileService.save(receiveOssOssFile);
} catch (Exception e) {
// 保存AI回复聊天记录 throw new CheckedException(e.getLocalizedMessage());
content.setFiles(receiveOssOssFile.getId()); } finally {
super.save(content); if (inputVoiceFile.delete())
System.out.println("已成功删除临时输入语音文件!");
if (outputVoiceFile.delete())
System.out.println("已成功删除临时输输出语音文件!");
}
Map<String, Object> result = Convert.toMap(String.class, Object.class, content); // ai chat message build
receiveContent.setFiles(receiveOssOssFile.getId());
super.save(receiveContent);
Map<String, Object> result = Convert.toMap(String.class, Object.class, receiveContent);
result.putAll(Convert.toMap(String.class, Object.class, receiveOssOssFile)); result.putAll(Convert.toMap(String.class, Object.class, receiveOssOssFile));
return result; return result;
} else { } else {
// 保存AI回复聊天记录
ImContent content = askChatCompletion(imContent); // ai chat message build
super.save(content); ImContent receiveContent = askChatCompletion(imContent);
return Convert.toMap(String.class, Object.class, content); super.save(receiveContent);
return Convert.toMap(String.class, Object.class, receiveContent);
} }
} }

Loading…
Cancel
Save