2023.02.10

1、加入了NLP三选一:xfaiui、yuan、chatgpt;
2、修复由pyaudio bug导致的录音错误;
3、修改远程音频输入输出设备同时只允许连1个;
This commit is contained in:
xszyou 2023-02-10 19:50:47 +08:00
parent cfd4a3fbbd
commit 77b2c0a479
16 changed files with 447 additions and 22 deletions

View File

@ -10,7 +10,7 @@
使用UE、C4D、DAZ、LIVE2D等三维引擎软件开发的数字形象可以与本“数字人控制器”对接从而实现虚拟主播、数字导游、数字助手等。我们提供UE4对接的demo但我们更鼓励用户自行实现喜欢的数字形象。
当然,若不考虑外观形象的话,本“数字人控制器”其实也可以独立使用的,可以充当一个语音助理。
当然,若不考虑外观形象的话,本“数字人控制器”其实也可以独立使用的,可以充当一个语音助理。NLP可以自由切换AIUI、ChatGPT及Yuan1.0。
<img src="images/5.png" alt="QA">
@ -30,6 +30,9 @@
目前最新版本是2.0。在新版本里我们提出一个全新的架构。在这个架构下每个人都可以把Fay控制器搭建在自己个人电脑上未来或许我们会提供终端让你电脑成为你数字助理的载体。你的所有设备手表、手机、眼镜、笔记本随时可以与你的数字助理通讯数字助理将通过电脑为你处理数字世界里的所有事情。贾维斯Her?
![](images/20230122074644.png)
2023.02
1、提供chatgpt及yuan1.0作为选择。
2023.01
1、控制器pc内网穿透音频输入输出设备远程直连
2、提供android 音频输入输出工程示例代码;
@ -37,7 +40,8 @@
4、补传1.0语音指令音乐播放模块(暂不支持远程播放);
5、重构及补充若干工具模块websocket、多线程、缓冲器、音频流录制器等
6、修复1.x版本的多个bug
7、集成看板娘。
7、集成看板娘;
2022.12
1、上传bin目录用于直播互动
@ -68,7 +72,7 @@
取消静音
你在哪呢?
你可以说话了
- **播放歌曲**
- **播放歌曲(暂不能用需更换音乐库)**
播放歌曲
播放音乐
唱首歌
@ -198,9 +202,11 @@ python main.py
| ------------------------- | -------------------------- | ------------------------------------------------------------ |
| ./ai_module/ali_nls.py | 阿里云 实时语音识别 | https://ai.aliyun.com/nls/trans |
| ./ai_module/ms_tts_sdk.py | 微软 文本转语音 基于SDK | https://azure.microsoft.com/zh-cn/services/cognitive-services/text-to-speech/ |
| ./ai_module/xf_aiui.py | 讯飞 人机交互-自然语言处理 | https://aiui.xfyun.cn/solution/webapi |
| ./ai_module/xf_ltp.py | 讯飞 情感分析 | https://www.xfyun.cn/service/emotion-analysis |
| ./utils/ngrok_util.py | ngrok.cc 外网穿透 | http://ngrok.cc |
| ./utils/ngrok_util.py | ngrok.cc 外网穿透(可选) | http://ngrok.cc |
| ./ai_module/yuan_1_0.py | 浪潮源大模型NLP 3选1 | https://air.inspur.com/ |
| ./ai_module/chatgpt.py | ChatGPTNLP 3选1 | ******* |
| ./ai_module/xf_aiui.py | 讯飞自然语言处理NLP 3选1 | https://aiui.xfyun.cn/solution/webapi |

10
ai_module/chatgpt.py Normal file
View File

@ -0,0 +1,10 @@
from utils import config_util as cfg
import openai
def question(text):
cfg.load_config()
openai.api_key = cfg.key_chatgpt_api_key
prompt = text
completions = openai.Completion.create(engine="text-davinci-002", prompt=prompt, max_tokens=1024)
a_msg = completions.choices[0].text
return a_msg

View File

@ -0,0 +1,194 @@
import os
import uuid
from ai_module.yuan1_0.url_config import submit_request, reply_request
def set_yuan_account(user, phone):
os.environ['YUAN_ACCOUNT'] = user + '||' + phone
class Example:
""" store some examples(input, output pairs and formats) for few-shots to prime the model."""
def __init__(self, inp, out):
self.input = inp
self.output = out
self.id = uuid.uuid4().hex
def get_input(self):
"""return the input of the example."""
return self.input
def get_output(self):
"""Return the output of the example."""
return self.output
def get_id(self):
"""Returns the unique ID of the example."""
return self.id
def as_dict(self):
return {
"input": self.get_input(),
"output": self.get_output(),
"id": self.get_id(),
}
class Yuan:
"""The main class for a user to interface with the Inspur Yuan API.
A user can set account info and add examples of the API request.
"""
def __init__(self,
engine='base_10B',
temperature=0.9,
max_tokens=100,
input_prefix='',
input_suffix='\n',
output_prefix='答:',
output_suffix='\n\n',
append_output_prefix_to_query=False,
topK=1,
topP=0.9,
frequencyPenalty=1.2,
responsePenalty=1.2,
noRepeatNgramSize=2):
self.examples = {}
self.engine = engine
self.temperature = temperature
self.max_tokens = max_tokens
self.topK = topK
self.topP = topP
self.frequencyPenalty = frequencyPenalty
self.responsePenalty = responsePenalty
self.noRepeatNgramSize = noRepeatNgramSize
self.input_prefix = input_prefix
self.input_suffix = input_suffix
self.output_prefix = output_prefix
self.output_suffix = output_suffix
self.append_output_prefix_to_query = append_output_prefix_to_query
self.stop = (output_suffix + input_prefix).strip()
# if self.engine not in ['base_10B','translate','dialog']:
# raise Exception('engine must be one of [\'base_10B\',\'translate\',\'dialog\'] ')
def add_example(self, ex):
"""Add an example to the object.
Example must be an instance of the Example class."""
assert isinstance(ex, Example), "Please create an Example object."
self.examples[ex.get_id()] = ex
def delete_example(self, id):
"""Delete example with the specific id."""
if id in self.examples:
del self.examples[id]
def get_example(self, id):
"""Get a single example."""
return self.examples.get(id, None)
def get_all_examples(self):
"""Returns all examples as a list of dicts."""
return {k: v.as_dict() for k, v in self.examples.items()}
def get_prime_text(self):
"""Formats all examples to prime the model."""
return "".join(
[self.format_example(ex) for ex in self.examples.values()])
def get_engine(self):
"""Returns the engine specified for the API."""
return self.engine
def get_temperature(self):
"""Returns the temperature specified for the API."""
return self.temperature
def get_max_tokens(self):
"""Returns the max tokens specified for the API."""
return self.max_tokens
def craft_query(self, prompt):
"""Creates the query for the API request."""
q = self.get_prime_text(
) + self.input_prefix + prompt + self.input_suffix
if self.append_output_prefix_to_query:
q = q + self.output_prefix
return q
def format_example(self, ex):
"""Formats the input, output pair."""
return self.input_prefix + ex.get_input(
) + self.input_suffix + self.output_prefix + ex.get_output(
) + self.output_suffix
def response(self,
query,
engine='base_10B',
max_tokens=20,
temperature=0.9,
topP=0.1,
topK=1,
frequencyPenalty=1.0,
responsePenalty=1.0,
noRepeatNgramSize=0):
"""Obtains the original result returned by the API."""
try:
# requestId = submit_request(query,temperature,topP,topK,max_tokens, engine)
requestId = submit_request(query, temperature, topP, topK, max_tokens, engine, frequencyPenalty,
responsePenalty, noRepeatNgramSize)
response_text = reply_request(requestId)
except Exception as e:
raise e
return response_text
def del_special_chars(self, msg):
special_chars = ['<unk>', '<eod>', '#', '', '', '', ' ']
for char in special_chars:
msg = msg.replace(char, '')
return msg
def submit_API(self, prompt, trun=[]):
"""Submit prompt to yuan API interface and obtain an pure text reply.
:prompt: Question or any content a user may input.
:return: pure text response."""
query = self.craft_query(prompt)
res = self.response(query,engine=self.engine,
max_tokens=self.max_tokens,
temperature=self.temperature,
topP=self.topP,
topK=self.topK,
frequencyPenalty = self.frequencyPenalty,
responsePenalty = self.responsePenalty,
noRepeatNgramSize = self.noRepeatNgramSize)
if 'resData' in res and res['resData'] != None:
txt = res['resData']
else:
txt = '模型返回为空,请尝试修改输入'
# 单独针对翻译模型的后处理
if self.engine == 'translate':
txt = txt.replace(' ##', '').replace(' "', '"').replace(": ", ":").replace(" ,", ",") \
.replace('英文:', '').replace('文:', '').replace("( ", "(").replace(" )", ")")
else:
txt = txt.replace(' ', '')
txt = self.del_special_chars(txt)
# trun多结束符截断模型输出
if isinstance(trun, str):
trun = [trun]
try:
if trun != None and isinstance(trun, list) and trun != []:
for tr in trun:
if tr in txt and tr!="":
txt = txt[:txt.index(tr)]
else:
continue
except:
return txt
return txt

View File

@ -0,0 +1,72 @@
import requests
import hashlib
import time
from datetime import datetime
import pytz
import json
import os
ACCOUNT = ''
PHONE = ''
SUBMIT_URL = "http://api-air.inspur.com:32102/v1/interface/api/infer/getRequestId?"
REPLY_URL = "http://api-air.inspur.com:32102/v1/interface/api/result?"
def code_md5(str):
code=str.encode("utf-8")
m = hashlib.md5()
m.update(code)
result= m.hexdigest()
return result
def rest_get(url, header,timeout, show_error=False):
'''Call rest get method'''
try:
response = requests.get(url, headers=header,timeout=timeout, verify=False)
return response
except Exception as exception:
if show_error:
print(exception)
return None
def header_generation():
"""Generate header for API request."""
t = datetime.now(pytz.timezone("Asia/Shanghai")).strftime("%Y-%m-%d")
global ACCOUNT, PHONE
ACCOUNT, PHONE = os.environ.get('YUAN_ACCOUNT').split('||')
token=code_md5(ACCOUNT+PHONE+t)
headers = {'token': token}
return headers
def submit_request(query,temperature,topP,topK,max_tokens,engine, frequencyPenalty,responsePenalty,noRepeatNgramSize):
"""Submit query to the backend server and get requestID."""
headers=header_generation()
# url=SUBMIT_URL + "account={0}&data={1}&temperature={2}&topP={3}&topK={4}&tokensToGenerate={5}&type={6}".format(ACCOUNT,query,temperature,topP,topK,max_tokens,"api")
# url=SUBMIT_URL + "engine={0}&account={1}&data={2}&temperature={3}&topP={4}&topK={5}&tokensToGenerate={6}" \
# "&type={7}".format(engine,ACCOUNT,query,temperature,topP,topK, max_tokens,"api")
url=SUBMIT_URL + "engine={0}&account={1}&data={2}&temperature={3}&topP={4}&topK={5}&tokensToGenerate={6}" \
"&type={7}&frequencyPenalty={8}&responsePenalty={9}&noRepeatNgramSize={10}".\
format(engine,ACCOUNT,query,temperature,topP,topK, max_tokens,"api", frequencyPenalty,responsePenalty,noRepeatNgramSize)
response=rest_get(url,headers,30)
response_text = json.loads(response.text)
if response_text["flag"]:
requestId = response_text["resData"]
return requestId
else:
raise RuntimeWarning(response_text)
def reply_request(requestId,cycle_count=5):
"""Check reply API to get the inference response."""
url = REPLY_URL + "account={0}&requestId={1}".format(ACCOUNT, requestId)
headers=header_generation()
response_text= {"flag":True, "resData":None}
for i in range(cycle_count):
response = rest_get(url, headers, 30, show_error=True)
response_text = json.loads(response.text)
if response_text["resData"] != None:
return response_text
if response_text["flag"] == False and i ==cycle_count-1:
raise RuntimeWarning(response_text)
time.sleep(3)
return response_text

View File

@ -0,0 +1,92 @@
from simhash import Simhash
from ai_module.yuan1_0.inspurai import Yuan, set_yuan_account,Example
import heapq
import sys
from utils import config_util as cfg
class Yuan1Dialog:
def __init__(self, account, phone) -> None:
self.account = account
self.phone = phone
set_yuan_account(account, phone)
self.yuan = Yuan(engine='dialog',
input_prefix="问:“",
input_suffix="",
output_prefix="答:“",
output_suffix="",
max_tokens=30,
append_output_prefix_to_query=True)
self.h_dialog = []
def get_relative_qa(self, prompt, h_dialog, topN=2):
"""
可以添加相关性计算这里简单使用最近的一次对话
:topN: 需要返回的相关对话轮数
"""
def simhash(query, text,):
"""
采用局部敏感的hash值表示语义
"""
q_simhash = Simhash(query)
t_simhash = Simhash(text)
max_hashbit = max(len(bin(q_simhash.value)), len(bin(t_simhash.value)))
distance = q_simhash.distance(t_simhash)
# print(distance)
similar = 1 - distance / max_hashbit
return similar
h_num = len(h_dialog)
sim_values = []
tm_effs= []
rel_effs = []
gamma = 0.8 # time effect coefficient
if not h_dialog:
return []
else:
for indx, dialog in enumerate(h_dialog):
text = '|'.join((dialog.input, dialog.output))
sim_value = simhash(prompt, text)
tm_eff = gamma ** ((h_num - indx)/h_num)
rel_eff = sim_value * tm_eff
sim_values.append(sim_value)
tm_effs.append(tm_eff)
rel_effs.append(rel_eff)
top_idx = heapq.nlargest(topN, range(len(rel_effs)), rel_effs.__getitem__)
mst_dialog = [h_dialog[idx] for idx in top_idx]
mst_dialog.reverse()
return mst_dialog
def update_example(self, yuan, exs):
ex_ids = []
for ex in exs:
ex_ids.append(ex.get_id())
yuan.add_example(ex)
return yuan, ex_ids
def dialog(self, prompt):
yuan = self.yuan
h_dialog = self.h_dialog
exs = self.get_relative_qa(prompt, h_dialog)
yuan, ex_ids = self.update_example(yuan, exs)
response = yuan.submit_API(prompt=prompt, trun="")
if len(h_dialog)<10: # 设置保存最多不超过10轮最近的历史对话
h_dialog.append(Example(inp=prompt,out=response))
else:
del(h_dialog[0])
h_dialog.append(Example(inp=prompt,out=response))
for ex_id in ex_ids:
yuan.delete_example(ex_id)
return response
if __name__ == "__main__":
cfg.load_config()
account = cfg.key_yuan_1_0_account
phone = cfg.key_yuan_1_0_phone
yuan1_dialog = Yuan1Dialog(account, phone)
prompt = "你好"
print(yuan1_dialog.dialog(prompt))

10
ai_module/yuan_1_0.py Normal file
View File

@ -0,0 +1,10 @@
from utils import config_util as cfg
from ai_module.yuan1_0.yuan1_0_dialog import Yuan1Dialog
def question(text):
account = cfg.key_yuan_1_0_account
phone = cfg.key_yuan_1_0_phone
yuan1_dialog = Yuan1Dialog(account, phone)
prompt = text
a_msg = yuan1_dialog.dialog(prompt)
return a_msg

View File

@ -126,7 +126,7 @@ public class FayConnectorService extends Service {
}
try {
socket = new Socket("5gzvip.91tunnel.com", 10001);
socket = new Socket("192.168.1.101", 10001);
in = socket.getInputStream();
out = socket.getOutputStream();
Log.d("fay", "fay控制器连接成功");

View File

@ -86,7 +86,7 @@ public class MainActivity extends AppCompatActivity {
}
try {
socket = new Socket("5gzvip.91tunnel.com", 10001);
socket = new Socket("192.168.1.101", 10001);
in = socket.getInputStream();
out = socket.getOutputStream();
Snackbar.make(view, "fay控制器连接成功", Snackbar.LENGTH_SHORT)

View File

@ -45,8 +45,8 @@
"url": "https://v.douyin.com/hL6ehu8/"
},
"record": {
"device": "\u9470\u866b\u6e80 (BT-50 PRO Hands-Free AG Aud",
"enabled": false
"device": "",
"enabled": true
}
}
}

View File

@ -21,8 +21,10 @@ from core.interact import Interact
from core.tts_voice import EnumVoice
from scheduler.thread_manager import MyThread
from utils import util, storer, config_util
from ai_module import yuan_1_0
from ai_module import chatgpt
import pygame
from utils import config_util as cfg
class FeiFei:
@ -243,7 +245,15 @@ class FeiFei:
wsa_server.get_web_instance().add_cmd({"panelMsg": "思考中..."})
util.log(1, '自然语言处理...')
tm = time.time()
cfg.load_config()
if cfg.key_chat_module == 'xfaiui':
text = xf_aiui.question(self.q_msg)
elif cfg.key_chat_module == 'yuan':
text = yuan_1_0.question(self.q_msg)
elif cfg.key_chat_module == 'chatgpt':
text = chatgpt.question(self.q_msg)
else:
raise RuntimeError('讯飞key、yuan key、chatgpt key都没有配置')
util.log(1, '自然语言处理完成. 耗时: {} ms'.format(math.floor((time.time() - tm) * 1000)))
if text == '哎呀,你这么说我也不懂,详细点呗' or text == '':
util.log(1, '[!] 自然语言无语了!')
@ -536,8 +546,10 @@ class FeiFei:
try:
while True:
self.deviceConnect,addr=self.deviceSocket.accept() #接受TCP连接并返回新的套接字与IP地址
MyThread(target=self.__device_socket_keep_alive).start()
MyThread(target=self.__device_socket_keep_alive).start() # 开启心跳包检测
util.log(1,"远程音频输入输出设备连接上:{}".format(addr))
while self.deviceConnect: #只允许一个设备连接
time.sleep(1)
except Exception as err:
pass

View File

@ -97,7 +97,7 @@ class Recorder:
last_mute_time = time.time()
last_speaking_time = time.time()
while self.__running:
data = stream.read(1024)
data = stream.read(1024, exception_on_overflow=False)
if not data:
continue
else:

View File

@ -16,7 +16,7 @@ def get_stream():
def send_audio(client):
stream = get_stream()
while stream:
data = stream.read(1024)
data = stream.read(1024, exception_on_overflow=False)
client.send(data)
time.sleep(0.005)
print(".", end="")
@ -47,7 +47,7 @@ def receive_audio(client):
if __name__ == "__main__":
client = socket.socket()
client.connect(("5gzvip.91tunnel.com", 10001))
client.connect(("192.168.1.101", 10001))
pygame.init()
thread_manager.MyThread(target=send_audio, args=(client,)).start()
thread_manager.MyThread(target=receive_audio, args=(client,)).start()

View File

@ -16,3 +16,10 @@ websocket-client~=1.3.2
azure-cognitiveservices-speech~=1.21.0
aliyun-python-sdk-core==2.13.3
scipy~=1.10.0
openai~=0.26.5
simhash
uuid
hashlib
datetime
pytz
json

View File

@ -12,13 +12,23 @@ ali_nls_app_key=
ms_tts_key=
ms_tts_region=
# 讯飞 自然语言处理 服务密钥(必须)
xf_aiui_app_id=
xf_aiui_api_key=
# 讯飞 情绪分析 服务密钥(必须)
# 讯飞 情绪分析 服务密钥
xf_ltp_app_id=
xf_ltp_api_key=
#NLP三选一:xfaiui、yuan、chatgpt
chat_module=xfaiui
# 讯飞 自然语言处理 服务密钥(NLP3选1)
xf_aiui_app_id=
xf_aiui_api_key=
#yuan_1_0 对话机器人 服务密钥(NLP3选1)
yuan_1_0_account=
yuan_1_0_phone=
#chatgpt 对话机器人 服务密钥(NLP3选1)
chatgpt_api_key=
#ngrok.cc 内网穿透id非必须
ngrok_cc_id=

View File

@ -17,6 +17,10 @@ key_xf_aiui_api_key = None
key_xf_ltp_app_id = None
key_xf_ltp_api_key = None
key_ngrok_cc_id = None
key_yuan_1_0_account = None
key_yuan_1_0_phone = None
key_chatgpt_api_key = None
key_chat_module = None
def load_config():
global config
@ -32,6 +36,10 @@ def load_config():
global key_xf_ltp_app_id
global key_xf_ltp_api_key
global key_ngrok_cc_id
global key_yuan_1_0_account
global key_yuan_1_0_phone
global key_chatgpt_api_key
global key_chat_module
system_config = ConfigParser()
system_config.read('system.conf', encoding='UTF-8')
@ -46,6 +54,10 @@ def load_config():
key_xf_ltp_app_id = system_config.get('key', 'xf_ltp_app_id')
key_xf_ltp_api_key = system_config.get('key', 'xf_ltp_api_key')
key_ngrok_cc_id = system_config.get('key', 'ngrok_cc_id')
key_yuan_1_0_account = system_config.get('key', 'yuan_1_0_account')
key_yuan_1_0_phone = system_config.get('key', 'yuan_1_0_phone')
key_chatgpt_api_key = system_config.get('key', 'chatgpt_api_key')
key_chat_module = system_config.get('key', 'chat_module')
config = json.load(codecs.open('config.json', encoding='utf-8'))

View File

@ -37,7 +37,7 @@ class StreamCache:
@synchronized
def read(self, length):
def read(self, length, exception_on_overflow = False):
if self.idle < length:
return None
# print("读:{}".format(length), end=' ')