4cfad5ae0f
- 全新ui - 全面优化websocket逻辑,提高数字人和ui连接的稳定性及资源开销 - 全面优化唤醒逻辑,提供稳定的普通唤醒模式和前置词唤醒模式 - 优化拾音质量,支持多声道麦克风拾音 - 优化自动播放服务器的对接机制,提供稳定和兼容旧版ue工程的对接模式 - 数字人接口输出机器人表情,以适应新fay ui及单片机的数字人表情输出 - 使用更高级的音频时长计算方式,可以更精准控制音频播放完成后的逻辑 - 修复点击关闭按钮会导致程序退出的bug - 修复没有麦克风的设备开启麦克风会出错的问题 - 为服务器主机地址提供配置项,以方便服务器部署
92 lines
2.9 KiB
Python
92 lines
2.9 KiB
Python
import base64
|
|
import json
|
|
import uuid
|
|
import requests
|
|
import time
|
|
from utils import util, config_util
|
|
from utils import config_util as cfg
|
|
import wave
|
|
|
|
|
|
class Speech:
|
|
def __init__(self):
|
|
self.appid = cfg.volcano_tts_appid
|
|
self.access_token = cfg.volcano_tts_access_token
|
|
self.cluster = cfg.volcano_tts_cluster
|
|
self.__history_data = []
|
|
|
|
def connect(self):
|
|
pass
|
|
|
|
def __get_history(self, voice_name, style, text):
|
|
for data in self.__history_data:
|
|
if data[0] == voice_name and data[1] == style and data[2] == text:
|
|
return data[3]
|
|
return None
|
|
|
|
def to_sample(self, text, style) :
|
|
if cfg.volcano_tts_voice_type != None and cfg.volcano_tts_voice_type != '':
|
|
voice = cfg.volcano_tts_voice_type
|
|
else:
|
|
voice = config_util.config["attribute"]["voice"]
|
|
try:
|
|
history = self.__get_history(voice, style, text)
|
|
if history is not None:
|
|
return history
|
|
host = "openspeech.bytedance.com"
|
|
api_url = f"https://{host}/api/v1/tts"
|
|
header = {"Authorization": f"Bearer;{self.access_token}"}
|
|
|
|
request_json = {
|
|
"app": {
|
|
"appid": self.appid,
|
|
"token": "access_token",
|
|
"cluster": self.cluster
|
|
},
|
|
"user": {
|
|
"uid": "388808087185088"
|
|
},
|
|
"audio": {
|
|
"voice_type": voice,
|
|
"encoding": "wav",
|
|
"speed_ratio": 1.0,
|
|
"volume_ratio": 1.0,
|
|
"pitch_ratio": 1.0,
|
|
},
|
|
"request": {
|
|
"reqid": str(uuid.uuid4()),
|
|
"text": text,
|
|
"text_type": "plain",
|
|
"operation": "query",
|
|
"with_frontend": 1,
|
|
"frontend_type": "unitTson"
|
|
|
|
}
|
|
}
|
|
response = requests.post(api_url, json.dumps(request_json), headers=header)
|
|
if "data" in response.json():
|
|
data = response.json()["data"]
|
|
file_url = './samples/sample-' + str(int(time.time() * 1000)) + '.wav'
|
|
with wave.open(file_url, 'wb') as wf:
|
|
wf.setnchannels(1)
|
|
wf.setsampwidth(2)
|
|
wf.setframerate(24000)
|
|
wf.writeframes(base64.b64decode(data))
|
|
else :
|
|
util.log(1, "[x] 语音转换失败!")
|
|
file_url = None
|
|
return file_url
|
|
return file_url
|
|
|
|
except Exception as e :
|
|
util.log(1, "[x] 语音转换失败!")
|
|
util.log(1, "[x] 原因: " + str(str(e)))
|
|
file_url = None
|
|
return file_url
|
|
|
|
|
|
def close(self):
|
|
pass
|
|
|
|
|