 4cfad5ae0f
			
		
	
	
		4cfad5ae0f
		
	
	
	
	
		
			
			- 全新ui - 全面优化websocket逻辑,提高数字人和ui连接的稳定性及资源开销 - 全面优化唤醒逻辑,提供稳定的普通唤醒模式和前置词唤醒模式 - 优化拾音质量,支持多声道麦克风拾音 - 优化自动播放服务器的对接机制,提供稳定和兼容旧版ue工程的对接模式 - 数字人接口输出机器人表情,以适应新fay ui及单片机的数字人表情输出 - 使用更高级的音频时长计算方式,可以更精准控制音频播放完成后的逻辑 - 修复点击关闭按钮会导致程序退出的bug - 修复没有麦克风的设备开启麦克风会出错的问题 - 为服务器主机地址提供配置项,以方便服务器部署
		
			
				
	
	
		
			82 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			82 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import subprocess
 | |
| import time
 | |
| import os
 | |
| os.environ['PATH'] += os.pathsep + os.path.join(os.getcwd(), "test", "ovr_lipsync", "ffmpeg", "bin")
 | |
| from pydub import AudioSegment
 | |
| import json
 | |
| 
 | |
| def list_files(dir_path):
 | |
|     for root, dirs, files in os.walk(dir_path):
 | |
|         for file in files:
 | |
|             print(os.path.join(root, file))
 | |
| 
 | |
| class LipSyncGenerator:
 | |
|     def __init__(self):
 | |
|         self.viseme_em = [
 | |
|           "sil", "PP", "FF", "TH", "DD",
 | |
|           "kk", "CH", "SS", "nn", "RR",
 | |
|           "aa", "E", "ih", "oh", "ou"]
 | |
|         self.viseme = []
 | |
|         self.exe_path = os.path.join(os.getcwd(), "test", "ovr_lipsync", "ovr_lipsync_exe", "ProcessWAV.exe")
 | |
| 
 | |
|     def run_exe_and_get_output(self, arguments):
 | |
|         process = subprocess.Popen([self.exe_path] + arguments, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 | |
| 
 | |
|         while True:
 | |
|             output = process.stdout.readline()
 | |
|             if output == b'' and process.poll() is not None:
 | |
|                 break
 | |
|             if output:
 | |
|                 self.viseme.append(output.strip().decode())
 | |
|         rc = process.poll()
 | |
|         return rc
 | |
| 
 | |
|     def filter(self, viseme):
 | |
|         new_viseme = []
 | |
|         for v in self.viseme:
 | |
|             if v in self.viseme_em:
 | |
|                 new_viseme.append(v)
 | |
|         return new_viseme
 | |
| 
 | |
|     def generate_visemes(self, wav_filepath):
 | |
|         arguments = ["--print-viseme-name", wav_filepath]
 | |
|         self.run_exe_and_get_output(arguments)
 | |
|         
 | |
|         return self.filter(self.viseme)
 | |
|         
 | |
|     def consolidate_visemes(self, viseme_list):
 | |
|         if not viseme_list:
 | |
|             return []
 | |
| 
 | |
|         result = []
 | |
|         current_viseme = viseme_list[0]
 | |
|         count = 1
 | |
| 
 | |
|         for viseme in viseme_list[1:]:
 | |
|             if viseme == current_viseme:
 | |
|                 count += 1
 | |
|             else:
 | |
|                 result.append({"Lip": current_viseme, "Time": count*33})  # Multiply by 10 for duration in ms
 | |
|                 current_viseme = viseme
 | |
|                 count = 1
 | |
| 
 | |
|         # Add the last viseme to the result
 | |
|         result.append({"Lip": current_viseme, "Time": count*33})  # Multiply by 10 for duration in ms
 | |
| 
 | |
|         new_data = []
 | |
|         for i in range(len(result)):
 | |
|             if result[i]['Time'] < 30:
 | |
|                 if len(new_data) > 0:
 | |
|                     new_data[-1]['Time'] += result[i]['Time']
 | |
|             else:
 | |
|                 new_data.append(result[i])
 | |
|         return new_data
 | |
| if __name__ == "__main__":
 | |
|     start_time = time.time()
 | |
|     lip_sync_generator = LipSyncGenerator()
 | |
|     viseme_list = lip_sync_generator.generate_visemes("E:\\github\\Fay\\samples\\fay-man.mp3")
 | |
|     print(viseme_list)
 | |
|     consolidated_visemes = lip_sync_generator.consolidate_visemes(viseme_list)
 | |
|     print(json.dumps(consolidated_visemes))
 | |
|     print(time.time() - start_time)
 |