Browse Source

chore: Audio Handle

master
wangxiang 2 years ago
parent
commit
2fbc058d27
No known key found for this signature in database
GPG Key ID: 1BA7946AB6B232E4
  1. 10
      kicc-common/kicc-common-bom/pom.xml
  2. 4
      kicc-platform/kicc-platform-biz/kicc-system-biz/pom.xml
  3. 3
      kicc-platform/kicc-platform-biz/kicc-system-biz/src/main/java/com/cloud/kicc/system/config/OpenAiConfigProperties.java
  4. 112
      kicc-platform/kicc-platform-biz/kicc-system-biz/src/main/java/com/cloud/kicc/system/service/impl/ImContentServiceImpl.java

10
kicc-common/kicc-common-bom/pom.xml

@ -33,7 +33,7 @@ @@ -33,7 +33,7 @@
<liquibase.version>4.22.0</liquibase.version>
<ureport2.version>2.2.9</ureport2.version>
<openai-gpt-java.api.version>0.17.0</openai-gpt-java.api.version>
<javacv.version>1.5.9</javacv.version>
<jaudiotagger.version>3.0.1</jaudiotagger.version>
</properties>
<!-- 定义全局jar版本,模块使用需要再次引入但不用写版本号-->
@ -270,11 +270,11 @@ @@ -270,11 +270,11 @@
<artifactId>service</artifactId>
<version>${openai-gpt-java.api.version}</version>
</dependency>
<!--计算机视觉相关处理库(目前用于处理音频但支持更多功能): https://github.com/bytedeco/javacv -->
<!--音频文件相关处理库(目前用于处理音频但支持更多功能列如修复音频元文件): https://bitbucket.org/ijabz/jaudiotagger/src/master -->
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>javacv-platform</artifactId>
<version>${javacv.version}</version>
<groupId>net.jthink</groupId>
<artifactId>jaudiotagger</artifactId>
<version>${jaudiotagger.version}</version>
</dependency>
</dependencies>
</dependencyManagement>

4
kicc-platform/kicc-platform-biz/kicc-system-biz/pom.xml

@ -93,8 +93,8 @@ @@ -93,8 +93,8 @@
<artifactId>service</artifactId>
</dependency>
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>javacv-platform</artifactId>
<groupId>net.jthink</groupId>
<artifactId>jaudiotagger</artifactId>
</dependency>
</dependencies>

3
kicc-platform/kicc-platform-biz/kicc-system-biz/src/main/java/com/cloud/kicc/system/config/OpenAiConfigProperties.java

@ -35,4 +35,7 @@ public class OpenAiConfigProperties { @@ -35,4 +35,7 @@ public class OpenAiConfigProperties {
/** timeout time unit seconds */
private int timeout = 10;
/** OpenAI API Voice - <a href="https://platform.openai.com/docs/guides/text-to-speech/voice-options">...</a> */
private String voice;
}

112
kicc-platform/kicc-platform-biz/kicc-system-biz/src/main/java/com/cloud/kicc/system/service/impl/ImContentServiceImpl.java

@ -13,6 +13,7 @@ import com.cloud.kicc.common.security.util.SecurityUtils; @@ -13,6 +13,7 @@ import com.cloud.kicc.common.security.util.SecurityUtils;
import com.cloud.kicc.system.api.entity.ImContent;
import com.cloud.kicc.system.api.entity.OssFile;
import com.cloud.kicc.system.api.enums.ImMessageTypeEnum;
import com.cloud.kicc.system.config.OpenAiConfigProperties;
import com.cloud.kicc.system.mapper.ImContentMapper;
import com.cloud.kicc.system.service.FileService;
import com.cloud.kicc.system.service.IImContentService;
@ -30,7 +31,8 @@ import com.theokanning.openai.service.OpenAiService; @@ -30,7 +31,8 @@ import com.theokanning.openai.service.OpenAiService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import okhttp3.ResponseBody;
import org.bytedeco.javacv.FFmpegFrameGrabber;
import org.jaudiotagger.audio.AudioFile;
import org.jaudiotagger.audio.AudioFileIO;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile;
@ -61,6 +63,8 @@ public class ImContentServiceImpl extends ServiceImpl<ImContentMapper, ImContent @@ -61,6 +63,8 @@ public class ImContentServiceImpl extends ServiceImpl<ImContentMapper, ImContent
private final OssTemplate ossTemplate;
private final OpenAiConfigProperties openAiConfigProperties;
@Override
public IPage<Map<String, Object>> listHistoryMessage(Page page, ImContent imContent) {
return baseMapper.listHistoryMessage(page, imContent);
@ -92,64 +96,74 @@ public class ImContentServiceImpl extends ServiceImpl<ImContentMapper, ImContent @@ -92,64 +96,74 @@ public class ImContentServiceImpl extends ServiceImpl<ImContentMapper, ImContent
fileService.save(sendOssOssFile);
imContent.setFiles(sendOssOssFile.getId());
// 语音转文字
File voiceFile = File.createTempFile(sendFileName, StrUtil.DOT + FileUtil.extName(file.getOriginalFilename()));
CreateTranscriptionRequest request = CreateTranscriptionRequest.builder()
.model("whisper-1")
.language("zh")
.build();
FileUtil.writeBytes(file.getBytes(), voiceFile);
TranscriptionResult transcriptionResult = openAiService.createTranscription(request, voiceFile);
if (voiceFile.delete()) {
System.out.println("已成功删除临时文件!");
}
imContent.setContent(transcriptionResult.getText());
ImContent content = askChatCompletion(imContent);
content.setContentType(ImMessageTypeEnum.AUDIO.getValue());
// 文字转语音
CreateSpeechRequest createSpeechRequest = CreateSpeechRequest.builder()
.model("tts-1")
.input(content.getContent())
.voice("nova")
.responseFormat(FileUtil.extName(speechName))
.speed(1.0)
.build();
ResponseBody responseBody = openAiService.createSpeech(createSpeechRequest);
// 获取音频时长
FFmpegFrameGrabber grabber = new FFmpegFrameGrabber(responseBody.byteStream());
grabber.start();
long durationInSec = grabber.getFormatContext().duration() / 1000000;
grabber.close();
// 构建发送文件信息进行OSS存储
OssFile receiveOssOssFile = new OssFile()
File inputVoiceFile = File.createTempFile(sendFileName, StrUtil.DOT + FileUtil.extName(file.getOriginalFilename()));
File outputVoiceFile = File.createTempFile(sendFileName, StrUtil.DOT + FileUtil.extName(file.getOriginalFilename()));
ImContent receiveContent;
OssFile receiveOssOssFile;
try {
// 语音转文字
CreateTranscriptionRequest request = CreateTranscriptionRequest.builder()
.model("whisper-1")
.language("zh")
.build();
FileUtil.writeBytes(file.getBytes(), inputVoiceFile);
TranscriptionResult transcriptionResult = openAiService.createTranscription(request, inputVoiceFile);
imContent.setContent(transcriptionResult.getText());
// ai涡轮增压
receiveContent = askChatCompletion(imContent);
receiveContent.setContentType(ImMessageTypeEnum.AUDIO.getValue());
// 文字转语音
CreateSpeechRequest createSpeechRequest = CreateSpeechRequest.builder()
.model("tts-1")
.input(receiveContent.getContent())
.voice(openAiConfigProperties.getVoice())
.responseFormat(FileUtil.extName(speechName))
.speed(1.0)
.build();
ResponseBody responseBody = openAiService.createSpeech(createSpeechRequest);
FileUtil.writeBytes(responseBody.bytes(), outputVoiceFile);
// 获取输出音频时长
AudioFile outputAudioFile = AudioFileIO.read(outputVoiceFile);
long outputVoiceDuration = outputAudioFile.getAudioHeader().getTrackLength();
// 构建发送文件信息进行OSS存储
receiveOssOssFile = new OssFile()
.setFileName(receiveFileName)
.setBucketName(ossProperties.getBucketName())
.setOriginal(speechName)
.setType(FileUtil.extName(speechName))
.setFileSize(responseBody.contentLength())
.setDuration(durationInSec)
.setDuration(outputVoiceDuration)
.setMimeType(Objects.requireNonNull(responseBody.contentType()).toString());
ossTemplate.putObject(ossProperties.getBucketName(), receiveFileName, Objects.requireNonNull(responseBody.contentType()).toString(), responseBody.byteStream());
String receiveVoiceUrl = ossTemplate.getObjectURL(ossProperties.getBucketName(), receiveFileName);
receiveOssOssFile.setAvailablePath(receiveVoiceUrl);
fileService.save(receiveOssOssFile);
// 保存AI回复聊天记录
content.setFiles(receiveOssOssFile.getId());
super.save(content);
ossTemplate.putObject(ossProperties.getBucketName(), receiveFileName, Objects.requireNonNull(responseBody.contentType()).toString(), FileUtil.getInputStream(outputVoiceFile));
String receiveVoiceUrl = ossTemplate.getObjectURL(ossProperties.getBucketName(), receiveFileName);
receiveOssOssFile.setAvailablePath(receiveVoiceUrl);
fileService.save(receiveOssOssFile);
} catch (Exception e) {
throw new CheckedException(e.getLocalizedMessage());
} finally {
if (inputVoiceFile.delete())
System.out.println("已成功删除临时输入语音文件!");
if (outputVoiceFile.delete())
System.out.println("已成功删除临时输输出语音文件!");
}
Map<String, Object> result = Convert.toMap(String.class, Object.class, content);
// ai chat message build
receiveContent.setFiles(receiveOssOssFile.getId());
super.save(receiveContent);
Map<String, Object> result = Convert.toMap(String.class, Object.class, receiveContent);
result.putAll(Convert.toMap(String.class, Object.class, receiveOssOssFile));
return result;
} else {
// 保存AI回复聊天记录
ImContent content = askChatCompletion(imContent);
super.save(content);
return Convert.toMap(String.class, Object.class, content);
// ai chat message build
ImContent receiveContent = askChatCompletion(imContent);
super.save(receiveContent);
return Convert.toMap(String.class, Object.class, receiveContent);
}
}

Loading…
Cancel
Save