import time
import azure.cognitiveservices.speech as speechsdk
import asyncio
import sys
sys.path.append("E:\\GitHub\\Fay\\")
from core import tts_voice
from core.tts_voice import EnumVoice
from utils import util, config_util
from utils import config_util as cfg
import pygame
import edge_tts
class Speech:
def __init__(self):
self.ms_tts = False
if config_util.key_ms_tts_key and config_util.key_ms_tts_key is not None and config_util.key_ms_tts_key.strip() != "":
self.__speech_config = speechsdk.SpeechConfig(subscription=cfg.key_ms_tts_key, region=cfg.key_ms_tts_region)
self.__speech_config.speech_recognition_language = "zh-CN"
self.__speech_config.speech_synthesis_voice_name = "zh-CN-XiaoxiaoNeural"
self.__speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
self.__synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.__speech_config, audio_config=None)
self.ms_tts = True
self.__connection = None
self.__history_data = []
def __get_history(self, voice_name, style, text):
for data in self.__history_data:
if data[0] == voice_name and data[1] == style and data[2] == text:
return data[3]
return None
def connect(self):
if self.ms_tts:
self.__connection = speechsdk.Connection.from_speech_synthesizer(self.__synthesizer)
self.__connection.open(True)
util.log(1, "TTS 服务已经连接!")
def close(self):
if self.__connection is not None:
self.__connection.close()
#生成mp3音频
async def get_edge_tts(self,text,voice,file_url) -> None:
communicate = edge_tts.Communicate(text, voice)
await communicate.save(file_url)
"""
文字转语音
:param text: 文本信息
:param style: 说话风格、语气
:returns: 音频文件路径
"""
def to_sample(self, text, style):
if self.ms_tts:
voice_type = tts_voice.get_voice_of(config_util.config["attribute"]["voice"])
voice_name = EnumVoice.XIAO_XIAO.value["voiceName"]
if voice_type is not None:
voice_name = voice_type.value["voiceName"]
history = self.__get_history(voice_name, style, text)
if history is not None:
return history
ssml = '' \
'' \
'' \
'{}' \
'' \
'' \
''.format(voice_name, style, 1.8, text)
result = self.__synthesizer.speak_ssml(ssml)
audio_data_stream = speechsdk.AudioDataStream(result)
file_url = './samples/sample-' + str(int(time.time() * 1000)) + '.mp3'
audio_data_stream.save_to_wav_file(file_url)
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
self.__history_data.append((voice_name, style, text, file_url))
return file_url
else:
util.log(1, "[x] 语音转换失败!")
util.log(1, "[x] 原因: " + str(result.reason))
return None
else:
voice_type = tts_voice.get_voice_of(config_util.config["attribute"]["voice"])
voice_name = EnumVoice.XIAO_XIAO.value["voiceName"]
if voice_type is not None:
voice_name = voice_type.value["voiceName"]
history = self.__get_history(voice_name, style, text)
if history is not None:
return history
ssml = '' \
'' \
'' \
'{}' \
'' \
'' \
''.format(voice_name, style, 1.8, text)
try:
file_url = './samples/sample-' + str(int(time.time() * 1000)) + '.mp3'
asyncio.new_event_loop().run_until_complete(self.get_edge_tts(text,voice_name,file_url))
self.__history_data.append((voice_name, style, text, file_url))
except Exception as e :
util.log(1, "[x] 语音转换失败!")
util.log(1, "[x] 原因: " + str(str(e)))
file_url = None
return file_url
if __name__ == '__main__':
cfg.load_config()
sp = Speech()
sp.connect()
text = """这是一段音频,测试一下3"""
s = sp.to_sample(text, "cheerful")
print(s)
sp.close()