olivebot/tts/volcano_tts.py

import base64
import json
import uuid
import requests
import time
from utils import util, config_util
from utils import config_util as cfg
import wave


class Speech:
    def __init__(self):
        self.appid = cfg.volcano_tts_appid
        self.access_token = cfg.volcano_tts_access_token
        self.cluster = cfg.volcano_tts_cluster
        self.__history_data = []

    def connect(self):
        pass

    def __get_history(self, voice_name, style, text):
        for data in self.__history_data:
            if data[0] == voice_name and data[1] == style and data[2] == text:
                return data[3]
        return None    

    def to_sample(self, text, style) :
        if cfg.volcano_tts_voice_type != None and cfg.volcano_tts_voice_type != '':
            voice = cfg.volcano_tts_voice_type
        else:
            voice = config_util.config["attribute"]["voice"]
        try:
            history = self.__get_history(voice, style, text)
            if history is not None:
                return history           
            host = "openspeech.bytedance.com"
            api_url = f"https://{host}/api/v1/tts"
            header = {"Authorization": f"Bearer;{self.access_token}"}

            request_json = {
                "app": {
                    "appid": self.appid,
                    "token": "access_token",
                    "cluster": self.cluster
                },
                "user": {
                    "uid": "388808087185088"
                },
                "audio": {
                    "voice_type": voice,
                    "encoding": "wav",
                    "speed_ratio": 1.0,
                    "volume_ratio": 1.0,
                    "pitch_ratio": 1.0,
                },
                "request": {
                    "reqid": str(uuid.uuid4()),
                    "text": text,
                    "text_type": "plain",
                    "operation": "query",
                    "with_frontend": 1,
                    "frontend_type": "unitTson"

                }
            }
            response = requests.post(api_url, json.dumps(request_json), headers=header)
            if "data" in response.json():
                data = response.json()["data"]
                file_url = './samples/sample-' + str(int(time.time() * 1000)) + '.wav'
                with wave.open(file_url, 'wb') as wf:
                        wf.setnchannels(1)
                        wf.setsampwidth(2)
                        wf.setframerate(24000)
                        wf.writeframes(base64.b64decode(data))
            else :
                util.log(1, "[x] 语音转换失败！")
                file_url = None
                return file_url
            return file_url
           
        except Exception as e :
                util.log(1, "[x] 语音转换失败！")
                util.log(1, "[x] 原因: " + str(str(e)))
                file_url = None
                return file_url


    def close(self):
       pass
年翻更新 - 全新ui - 全面优化websocket逻辑，提高数字人和ui连接的稳定性及资源开销 - 全面优化唤醒逻辑，提供稳定的普通唤醒模式和前置词唤醒模式 - 优化拾音质量，支持多声道麦克风拾音 - 优化自动播放服务器的对接机制，提供稳定和兼容旧版ue工程的对接模式 - 数字人接口输出机器人表情，以适应新fay ui及单片机的数字人表情输出 - 使用更高级的音频时长计算方式，可以更精准控制音频播放完成后的逻辑 - 修复点击关闭按钮会导致程序退出的bug - 修复没有麦克风的设备开启麦克风会出错的问题 - 为服务器主机地址提供配置项，以方便服务器部署 2024-10-26 11:34:55 +08:00			`import base64`
			`import json`
			`import uuid`
			`import requests`
			`import time`
			`from utils import util, config_util`
			`from utils import config_util as cfg`
			`import wave`


			`class Speech:`
			`def __init__(self):`
			`self.appid = cfg.volcano_tts_appid`
			`self.access_token = cfg.volcano_tts_access_token`
			`self.cluster = cfg.volcano_tts_cluster`
			`self.__history_data = []`

			`def connect(self):`
			`pass`

			`def __get_history(self, voice_name, style, text):`
			`for data in self.__history_data:`
			`if data[0] == voice_name and data[1] == style and data[2] == text:`
			`return data[3]`
			`return None`

			`def to_sample(self, text, style) :`
			`if cfg.volcano_tts_voice_type != None and cfg.volcano_tts_voice_type != '':`
			`voice = cfg.volcano_tts_voice_type`
			`else:`
			`voice = config_util.config["attribute"]["voice"]`
			`try:`
			`history = self.__get_history(voice, style, text)`
			`if history is not None:`
			`return history`
			`host = "openspeech.bytedance.com"`
			`api_url = f"https://{host}/api/v1/tts"`
			`header = {"Authorization": f"Bearer;{self.access_token}"}`

			`request_json = {`
			`"app": {`
			`"appid": self.appid,`
			`"token": "access_token",`
			`"cluster": self.cluster`
			`},`
			`"user": {`
			`"uid": "388808087185088"`
			`},`
			`"audio": {`
			`"voice_type": voice,`
			`"encoding": "wav",`
			`"speed_ratio": 1.0,`
			`"volume_ratio": 1.0,`
			`"pitch_ratio": 1.0,`
			`},`
			`"request": {`
			`"reqid": str(uuid.uuid4()),`
			`"text": text,`
			`"text_type": "plain",`
			`"operation": "query",`
			`"with_frontend": 1,`
			`"frontend_type": "unitTson"`

			`}`
			`}`
			`response = requests.post(api_url, json.dumps(request_json), headers=header)`
			`if "data" in response.json():`
			`data = response.json()["data"]`
			`file_url = './samples/sample-' + str(int(time.time() * 1000)) + '.wav'`
			`with wave.open(file_url, 'wb') as wf:`
			`wf.setnchannels(1)`
			`wf.setsampwidth(2)`
			`wf.setframerate(24000)`
			`wf.writeframes(base64.b64decode(data))`
			`else :`
			`util.log(1, "[x] 语音转换失败！")`
			`file_url = None`
			`return file_url`
			`return file_url`

			`except Exception as e :`
			`util.log(1, "[x] 语音转换失败！")`
			`util.log(1, "[x] 原因: " + str(str(e)))`
			`file_url = None`
			`return file_url`


			`def close(self):`
			`pass`