olivebot/ai_module/ms_tts_sdk.py

import time

import azure.cognitiveservices.speech as speechsdk
import asyncio
import sys
sys.path.append("E:\\GitHub\\Fay\\")
from core import tts_voice
from core.tts_voice import EnumVoice
from utils import util, config_util
from utils import config_util as cfg
import pygame
import edge_tts


class Speech:
    def __init__(self):
        self.ms_tts = False
        if config_util.key_ms_tts_key and config_util.key_ms_tts_key is not None and config_util.key_ms_tts_key.strip() != "":
            self.__speech_config = speechsdk.SpeechConfig(subscription=cfg.key_ms_tts_key, region=cfg.key_ms_tts_region)
            self.__speech_config.speech_recognition_language = "zh-CN"
            self.__speech_config.speech_synthesis_voice_name = "zh-CN-XiaoxiaoNeural"
            self.__speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
            self.__synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.__speech_config, audio_config=None)
            self.ms_tts = True
        self.__connection = None
        self.__history_data = []


    def __get_history(self, voice_name, style, text):
        for data in self.__history_data:
            if data[0] == voice_name and data[1] == style and data[2] == text:
                return data[3]
        return None

    def connect(self):
        if self.ms_tts:
            self.__connection = speechsdk.Connection.from_speech_synthesizer(self.__synthesizer)
            self.__connection.open(True)
        util.log(1, "TTS 服务已经连接！")

    def close(self):
        if self.__connection is not None:
            self.__connection.close()

    #生成mp3音频
    async def get_edge_tts(self,text,voice,file_url) -> None:
        communicate = edge_tts.Communicate(text, voice)
        await communicate.save(file_url)

    """
    文字转语音
    :param text: 文本信息
    :param style: 说话风格、语气
    :returns: 音频文件路径
    """

    def to_sample(self, text, style):
        if self.ms_tts:
            voice_type = tts_voice.get_voice_of(config_util.config["attribute"]["voice"])
            voice_name = EnumVoice.XIAO_XIAO.value["voiceName"]
            if voice_type is not None:
                voice_name = voice_type.value["voiceName"]
            history = self.__get_history(voice_name, style, text)
            if history is not None:
                return history
            ssml = '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="zh-CN">' \
                   '<voice name="{}">' \
                   '<mstts:express-as style="{}" styledegree="{}">' \
                   '{}' \
                   '</mstts:express-as>' \
                   '</voice>' \
                   '</speak>'.format(voice_name, style, 1.8, text)
            result = self.__synthesizer.speak_ssml(ssml)
            audio_data_stream = speechsdk.AudioDataStream(result)

            file_url = './samples/sample-' + str(int(time.time() * 1000)) + '.mp3'
            audio_data_stream.save_to_wav_file(file_url)
            if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
                self.__history_data.append((voice_name, style, text, file_url))
                return file_url
            else:
                util.log(1, "[x] 语音转换失败！")
                util.log(1, "[x] 原因: " + str(result.reason))
                return None
        else:
            voice_type = tts_voice.get_voice_of(config_util.config["attribute"]["voice"])
            voice_name = EnumVoice.XIAO_XIAO.value["voiceName"]
            if voice_type is not None:
                voice_name = voice_type.value["voiceName"]
            history = self.__get_history(voice_name, style, text)
            if history is not None:
                return history
            ssml = '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="zh-CN">' \
                   '<voice name="{}">' \
                   '<mstts:express-as style="{}" styledegree="{}">' \
                   '{}' \
                   '</mstts:express-as>' \
                   '</voice>' \
                   '</speak>'.format(voice_name, style, 1.8, text)
            try:
                file_url = './samples/sample-' + str(int(time.time() * 1000)) + '.mp3'
                asyncio.new_event_loop().run_until_complete(self.get_edge_tts(text,voice_name,file_url))
                self.__history_data.append((voice_name, style, text, file_url))
            except Exception as e :
                util.log(1, "[x] 语音转换失败！")
                util.log(1, "[x] 原因: " + str(str(e)))
                file_url = None
            return file_url


if __name__ == '__main__':
    cfg.load_config()
    sp = Speech()
    sp.connect()
    text = """这是一段音频，测试一下3"""
    s = sp.to_sample(text, "cheerful")

    print(s)
    sp.close()
-												Update

											
										
										
											2022-06-20 11:05:10 +08:00
+								import time
 								import azure.cognitiveservices.speech as speechsdk
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
+								import asyncio
 								import sys
 								sys.path.append("E:\\GitHub\\Fay\\")
-												Update

											
										
										
											2022-06-20 11:05:10 +08:00
+								from core import tts_voice
 								from core.tts_voice import EnumVoice
 								from utils import util, config_util
 								from utils import config_util as cfg
-.01

Fay2.0:
1、控制器pc内网穿透，音频输入输出设备远程直连；
2、提供android 音频输入输出工程示例代码；
3、提供python音频输入输出工程示例代码（远程PC、树莓派等可用）；
4、补传1.0语音指令音乐播放模块（暂不支持远程播放）；
5、重构及补充若干工具模块：websocket、多线程、缓冲器、音频流录制器等；
6、修复1.x版本的多个bug。

											
										
										
											2023-01-31 12:40:36 +08:00
+								import pygame
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
+								import edge_tts
-.01

Fay2.0:
1、控制器pc内网穿透，音频输入输出设备远程直连；
2、提供android 音频输入输出工程示例代码；
3、提供python音频输入输出工程示例代码（远程PC、树莓派等可用）；
4、补传1.0语音指令音乐播放模块（暂不支持远程播放）；
5、重构及补充若干工具模块：websocket、多线程、缓冲器、音频流录制器等；
6、修复1.x版本的多个bug。

											
										
										
											2023-01-31 12:40:36 +08:00
-												Update

											
										
										
											2022-06-20 11:05:10 +08:00
 								class Speech:
 								    def __init__(self):
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
+								        self.ms_tts = False
 								        if config_util.key_ms_tts_key and config_util.key_ms_tts_key is not None and config_util.key_ms_tts_key.strip() != "":
 								            self.__speech_config = speechsdk.SpeechConfig(subscription=cfg.key_ms_tts_key, region=cfg.key_ms_tts_region)
 								            self.__speech_config.speech_recognition_language = "zh-CN"
 								            self.__speech_config.speech_synthesis_voice_name = "zh-CN-XiaoxiaoNeural"
 								            self.__speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
 								            self.__synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.__speech_config, audio_config=None)
 								            self.ms_tts = True
-												Update

											
										
										
											2022-06-20 11:05:10 +08:00
+								        self.__connection = None
 								        self.__history_data = []
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
-												Update

											
										
										
											2022-06-20 11:05:10 +08:00
+								    def __get_history(self, voice_name, style, text):
 								        for data in self.__history_data:
 								            if data[0] == voice_name and data[1] == style and data[2] == text:
 								                return data[3]
 								        return None
 								    def connect(self):
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
+								        if self.ms_tts:
 								            self.__connection = speechsdk.Connection.from_speech_synthesizer(self.__synthesizer)
 								            self.__connection.open(True)
-												Update

											
										
										
											2022-06-20 11:05:10 +08:00
+								        util.log(1, "TTS 服务已经连接！")
 								    def close(self):
 								        if self.__connection is not None:
 								            self.__connection.close()
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
+								    #生成mp3音频
 								    async def get_edge_tts(self,text,voice,file_url) -> None:
 								        communicate = edge_tts.Communicate(text, voice)
 								        await communicate.save(file_url)
-												Update

											
										
										
											2022-06-20 11:05:10 +08:00
+								    """
 								    文字转语音
 								    :param text: 文本信息
 								    :param style: 说话风格、语气
 								    :returns: 音频文件路径
 								    """
 								    def to_sample(self, text, style):
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
+								        if self.ms_tts:
 								            voice_type = tts_voice.get_voice_of(config_util.config["attribute"]["voice"])
 								            voice_name = EnumVoice.XIAO_XIAO.value["voiceName"]
 								            if voice_type is not None:
 								                voice_name = voice_type.value["voiceName"]
 								            history = self.__get_history(voice_name, style, text)
 								            if history is not None:
 								                return history
 								            ssml = '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="zh-CN">' \
 								                   '<voice name="{}">' \
 								                   '<mstts:express-as style="{}" styledegree="{}">' \
 								                   '{}' \
 								                   '</mstts:express-as>' \
 								                   '</voice>' \
 								                   '</speak>'.format(voice_name, style, 1.8, text)
 								            result = self.__synthesizer.speak_ssml(ssml)
 								            audio_data_stream = speechsdk.AudioDataStream(result)
 								            file_url = './samples/sample-' + str(int(time.time() * 1000)) + '.mp3'
 								            audio_data_stream.save_to_wav_file(file_url)
 								            if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
 								                self.__history_data.append((voice_name, style, text, file_url))
 								                return file_url
 								            else:
 								                util.log(1, "[x] 语音转换失败！")
 								                util.log(1, "[x] 原因: " + str(result.reason))
 								                return None
-												Update

											
										
										
											2022-06-20 11:05:10 +08:00
+								        else:
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
+								            voice_type = tts_voice.get_voice_of(config_util.config["attribute"]["voice"])
 								            voice_name = EnumVoice.XIAO_XIAO.value["voiceName"]
 								            if voice_type is not None:
 								                voice_name = voice_type.value["voiceName"]
 								            history = self.__get_history(voice_name, style, text)
 								            if history is not None:
 								                return history
 								            ssml = '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="zh-CN">' \
 								                   '<voice name="{}">' \
 								                   '<mstts:express-as style="{}" styledegree="{}">' \
 								                   '{}' \
 								                   '</mstts:express-as>' \
 								                   '</voice>' \
 								                   '</speak>'.format(voice_name, style, 1.8, text)
 								            try:
 								                file_url = './samples/sample-' + str(int(time.time() * 1000)) + '.mp3'
 								                asyncio.new_event_loop().run_until_complete(self.get_edge_tts(text,voice_name,file_url))
 								                self.__history_data.append((voice_name, style, text, file_url))
 								            except Exception as e :
 								                util.log(1, "[x] 语音转换失败！")
 								                util.log(1, "[x] 原因: " + str(str(e)))
 								                file_url = None
 								            return file_url
-.01

Fay2.0:
1、控制器pc内网穿透，音频输入输出设备远程直连；
2、提供android 音频输入输出工程示例代码；
3、提供python音频输入输出工程示例代码（远程PC、树莓派等可用）；
4、补传1.0语音指令音乐播放模块（暂不支持远程播放）；
5、重构及补充若干工具模块：websocket、多线程、缓冲器、音频流录制器等；
6、修复1.x版本的多个bug。

											
										
										
											2023-01-31 12:40:36 +08:00
+								if __name__ == '__main__':
 								    cfg.load_config()
 								    sp = Speech()
 								    sp.connect()
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
+								    text = """这是一段音频，测试一下3"""
-.01

Fay2.0:
1、控制器pc内网穿透，音频输入输出设备远程直连；
2、提供android 音频输入输出工程示例代码；
3、提供python音频输入输出工程示例代码（远程PC、树莓派等可用）；
4、补传1.0语音指令音乐播放模块（暂不支持远程播放）；
5、重构及补充若干工具模块：websocket、多线程、缓冲器、音频流录制器等；
6、修复1.x版本的多个bug。

											
										
										
											2023-01-31 12:40:36 +08:00
+								    s = sp.to_sample(text, "cheerful")
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
-.01

Fay2.0:
1、控制器pc内网穿透，音频输入输出设备远程直连；
2、提供android 音频输入输出工程示例代码；
3、提供python音频输入输出工程示例代码（远程PC、树莓派等可用）；
4、补传1.0语音指令音乐播放模块（暂不支持远程播放）；
5、重构及补充若干工具模块：websocket、多线程、缓冲器、音频流录制器等；
6、修复1.x版本的多个bug。

											
										
										
											2023-01-31 12:40:36 +08:00
+								    print(s)
-												20230315

1、增加edge-tts语音合成（免费）可替换azure-tts(支持情绪化语音)；
2、替换flask发行版运行方式。

											
										
										
											2023-03-15 02:22:50 +08:00
+								    sp.close()