diff --git a/README.md b/README.md index a60c196..4e275d7 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,7 @@ - -**12月迟来的报到,Fay数字人 AI Agent版(含智慧农业箱的操作demo代码,如果你需要完整代码可以公众号留言申请获取)第5版正式上传!** +**请先想明白** 如果你需要是一个线上线下的销售员,请移步[`带货完整版`](https://github.com/TheRamU/Fay/tree/fay-sales-edition) @@ -19,13 +18,25 @@ -***“优秀的产品都值得用数字人从新做一遍”*** +***然后,“优秀的产品都值得用数字人从新做一遍”*** +1、基于日程维护的助理模式:执行及维护你的日程,绝不是一个简单的闹钟 +Fay -亮点:计划任务主动执行,无需一问一答,自动规划及调用agent tool去完成工作;使用open ai tts;使用向量数据库实现永久记忆及记忆检索; +2、强大的规划执行(ReAct)能力:规划->执行<->反思->总结 +Fay -![](images/agent_demo.gif) +3、LLM Chain与React Agent自动切换:保留规划执行能力的同时兼顾聊天能力(还需优化) +Fay + +4、双记忆机制:斯坦福AI小镇的记忆流(时间、重要性、相关度)实现长时记忆,邻近对话记忆实现连贯对话 +Fay + +5、易于扩展的agent 工具 +Fay + +6、配套24小时后台运行的android 连接器 +Fay -​ (上图:实测ReAct能力) ## **安装说明** @@ -62,20 +73,23 @@ python main.py + 仓库地址:https://github.com/xszyou/fay-android ### **更新日志** +2024.01.01: +openai token计算✓ +优化ReAct Agent 与 LLM Chain自动切换逻辑✓ +*添加双记忆机制:长时记忆流及短时聊天记忆✓ +修复record.py asr bug✓ +提高远程音频(android 连接器)的稳定性✓ +修复执行时间计算bug✓ +优化语音输出逻辑✓ + 2023.12.25: *实现agent ReAct与LLM chain自动切换逻辑✓ - 聊天窗区分任务消息✓ - 修复删除日程bug✓ - 优化远程音频逻辑✓ - 等待处理引入加载中效果✓ - 优化prompt以解决日程任务递归调用问题✓ - 修复一次性日程清除的bug✓ diff --git a/README_EN.md b/README_EN.md index 85457ae..9e28e5e 100644 --- a/README_EN.md +++ b/README_EN.md @@ -10,20 +10,30 @@ - -**Belated December announcement, the 5th edition of Fay Digital Human AI Agent Version (complete code for smart agriculture box can be requested via our public channel) is officially uploaded!** +**Please Understand First** If you need an online and offline salesperson, please go to [`Complete Retail Version`](https://github.com/TheRamU/Fay/tree/fay-sales-edition) If you need a digital human assistant for human-computer interaction (and yes, you can command it to switch devices on and off), please go to [`Complete Assistant Version`](https://github.com/TheRamU/Fay/tree/fay-assistant-edition) -***“Exceptional products deserve to be reimagined with digital humans”*** +**"Excellent products deserve to be redone with digital humans."** +1.Assistant mode based on schedule maintenance: Managing and maintaining your schedule, not just a simple alarm clock. +Fay -Highlights: Proactive execution of planned tasks without the need for question-and-answer interactions, automatic planning and use of the agent tool to complete tasks; use of OpenAI TTS; use of a vector database for permanent memory and memory retrieval; +2.Powerful planning and execution (ReAct) capability: Plan -> Execute <-> Reflect -> Summarize. +Fay -![](images/agent_demo.gif) +3.Automatic switching between LLM Chain and React Agent: Retains planning and execution capabilities while considering chatting abilities (still needs optimization). +Fay -​ (Above image: Testing ReAct capabilities) +4.Dual memory mechanism: Stanford AI Town's memory stream (time, importance, relevance) for long-term memory, and adjacent conversation memory for coherent conversations. +Fay + +5.Easily expandable agent tools. +Fay + +6.Accompanying 24-hour background running Android connector. +Fay ## **Installation Instructions** @@ -60,19 +70,24 @@ Repository URL: https://github.com/xszyou/fay-android ### **Changelog** +2024.01.01: + +OpenAI token calculation ✓ +Optimized ReAct Agent and LLM Chain auto-switching logic ✓ +*Added dual memory mechanism: long-term memory stream and short-term chat memory ✓ +Fixed record.py ASR bug ✓ +Improved stability of remote audio (Android connector) ✓ +Fixed execution time calculation bug ✓ +Optimized voice output logic ✓ + 2023.12.25: Implemented the automatic switching logic between agent ReAct and LLM chain ✓ Distinguished task messages in the chat window ✓ - Fixed the bug in deleting schedules ✓ - Optimized remote audio logic ✓ - Introduced loading effects for pending processes ✓ - Optimized prompts to resolve recursive calling issues in schedule tasks ✓ - Fixed the bug in clearing one-time schedules ✓ diff --git a/agent/agent_service.py b/agent/agent_service.py index d006538..1eef704 100644 --- a/agent/agent_service.py +++ b/agent/agent_service.py @@ -3,6 +3,7 @@ import threading import datetime import time from agent.fay_agent import FayAgentCore +from core import fay_core scheduled_tasks = {} agent_running = False @@ -52,8 +53,10 @@ def parse_repeat_rule(rule, task_time): # 执行任务 def execute_task(task_time, id, content): - agent.run("执行任务->立刻" + content) - del scheduled_tasks[id] + agent.is_chat = False + fay_core.send_for_answer("执行任务->立刻" + content) + if id in scheduled_tasks: + del scheduled_tasks[id] # 如果不重复,执行后删除记录 conn = sqlite3.connect('timer.db') cursor = conn.cursor() @@ -89,24 +92,24 @@ def agent_start(): global agent agent_running = True - init_db() + # init_db() check_and_execute_thread = threading.Thread(target=check_and_execute) check_and_execute_thread.start() #初始计划 - agent.run("""执行任务--> - 你是一个智慧农业实验箱里的ai,你的责任是陪伴主人生活、工作,以及协助主人打理好农业种植箱里的农作物: - 1、在每天早上8点、中午12点、晚上10点检查农业种植箱的状态是否附合设定的预期执行,如果不符合请告知我调整。 - 2、每天12点“语音提醒主人吃饭”; - 3、在星期一到星期五13:30“语音提醒主人开始工作”; - 4、在星期一到星期五15:15“语音提醒主人冲咖啡”; - 5、在星期一、星期三的11:15“语音提醒主人开会”; - 6、在星期五17:30“语音提醒主人开会”; - 7、在星期一到星期五18:00“语音提醒主人下班”; - 8、在每天21点陪主人聊聊天; - 9、在每天晚上10:30会跟据第二天的天气预报信息和当天的运行情况,检查iotm系统当天的控制规则; + # fay_core.send_for_answer("""执行任务--> + # 你是一个智慧农业实验箱里的ai,你的责任是陪伴主人生活、工作,以及协助主人打理好农业种植箱里的农作物: + # 1、在每天早上8点、中午12点、晚上10点检查农业种植箱的状态是否附合设定的预期执行,如果不符合请告知我调整。 + # 2、每天12点“语音提醒主人吃饭”; + # 3、在星期一到星期五13:30“语音提醒主人开始工作”; + # 4、在星期一到星期五15:15“语音提醒主人冲咖啡”; + # 5、在星期一、星期三的11:15“语音提醒主人开会”; + # 6、在星期五17:30“语音提醒主人开会”; + # 7、在星期一到星期五18:00“语音提醒主人下班”; + # 8、在每天21点陪主人聊聊天; + # 9、在每天晚上10:30会跟据第二天的天气预报信息和当天的运行情况,检查iotm系统当天的控制规则; - """) + # """) def agent_stop(): global agent_running diff --git a/agent/fay_agent.py b/agent/fay_agent.py index 71b3f2d..d52680d 100644 --- a/agent/fay_agent.py +++ b/agent/fay_agent.py @@ -1,3 +1,7 @@ +import os +import time +import math + from langchain.embeddings.openai import OpenAIEmbeddings from langchain.chat_models import ChatOpenAI from langchain.memory import VectorStoreRetrieverMemory @@ -21,18 +25,18 @@ from agent.tools.DeleteTimer import DeleteTimer from agent.tools.GetSwitchLog import GetSwitchLog from agent.tools.getOnRunLinkage import getOnRunLinkage from agent.tools.SetChatStatus import SetChatStatus - +from langchain.callbacks import get_openai_callback +from langchain.retrievers import TimeWeightedVectorStoreRetriever +from langchain.memory import ConversationBufferWindowMemory import utils.config_util as utils -from core.content_db import Content_Db from core import wsa_server -import os - +import fay_booter +from utils import util class FayAgentCore(): def __init__(self): - utils.load_config() os.environ['OPENAI_API_KEY'] = utils.key_gpt_api_key #使用open ai embedding @@ -41,19 +45,24 @@ class FayAgentCore(): embedding_fn = OpenAIEmbeddings() #创建llm - llm = ChatOpenAI(model="gpt-4-1106-preview", verbose=True) + self.llm = ChatOpenAI(model="gpt-4-1106-preview", verbose=True) #创建向量数据库 - vectorstore = FAISS(embedding_fn, index, InMemoryDocstore({}), {}) + def relevance_score_fn(self, score: float) -> float: + return 1.0 - score / math.sqrt(2) + vectorstore = FAISS(embedding_fn, index, InMemoryDocstore({}), {}, relevance_score_fn=relevance_score_fn) - # 创建记忆 - retriever = vectorstore.as_retriever(search_kwargs=dict(k=2)) - memory = VectorStoreRetrieverMemory(memory_key="chat_history", retriever=retriever) + # 创建记忆(斯坦福小镇同款记忆检索机制:时间、相关性、重要性三个维度) + retriever = TimeWeightedVectorStoreRetriever(vectorstore=vectorstore, other_score_keys=["importance"], k=3) + self.agent_memory = VectorStoreRetrieverMemory(memory_key="history", retriever=retriever) # 保存基本信息到记忆 utils.load_config() attr_info = ", ".join(f"{key}: {value}" for key, value in utils.config["attribute"].items()) - memory.save_context({"input": "我的基本信息是?"}, {"output": attr_info}) + self.agent_memory.save_context({"input": "我的基本信息是?"}, {"output": attr_info}) + + #内存保存聊天历史 + self.chat_history = [] #创建agent chain my_timer = MyTimer() @@ -70,7 +79,7 @@ class FayAgentCore(): get_on_run_linkage = getOnRunLinkage() set_chat_status_tool = SetChatStatus() - tools = [ + self.tools = [ Tool( name=my_timer.name, func=my_timer.run, @@ -140,58 +149,136 @@ class FayAgentCore(): #agent用于执行任务 self.agent = initialize_agent(agent_types=agent_types.AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, - tools=tools, llm=llm, verbose=True, - max_history=5, - memory=memory, handle_parsing_errors=True) + tools=self.tools, llm=self.llm, verbose=True, + max_history=5, handle_parsing_errors=True) - #创建llm chain用于聊天 - tools_prompt = "[" - tool_names = [tool.name for tool in tools if tool.name != set_chat_status_tool.name and tool.name != say_tool.name] - tools_prompt += "、".join(tool_names) + "]" - template = """你是一个智慧农业实验箱里的ai,你的责任是陪伴主人生活、工作,以及协助主人打理好农业种植箱里的农作物.现在主人正在和你聊天,若你在聊天过程中感觉到主人想使用以下工具,请按“agent:'主人刚刚的说话'”这样的格式回复,否则请直接回复我文字内容。工具如下: - """ + tools_prompt +""" - {chat_history} - Human: {human_input} - AI:""" - prompt = PromptTemplate( - input_variables=["chat_history", "human_input"], template=template - ) - self.llm_chain = LLMChain( - llm=llm, - prompt=prompt, - verbose=True, - memory=memory - ) - + #llm chain 用于聊天 self.is_chat = False#聊天状态 + #记录一轮执行有无调用过say tool + self.is_use_say_tool = False + self.say_tool_text = "" - def run(self, input_text): - #消息保存 - contentdb = Content_Db() - contentdb.add_content('member', 'agent', input_text.replace('主人语音说了:', '').replace('主人文字说了:', '')) - wsa_server.get_web_instance().add_cmd({"panelReply": {"type":"member","content":input_text.replace('主人语音说了:', '').replace('主人文字说了:', '')}}) + self.total_tokens = 0 + self.total_cost = 0 + + + def format_history_str(self, str): result = "" + history_string = str['history'] + + # Split the string into lines + lines = history_string.split('input:') + + # Initialize an empty list to store the formatted history + formatted_history = [] + + #处理记忆流格式 + for line in lines: + if "output" in line: + input_line = line.split("output:")[0].strip() + output_line = line.split("output:")[1].strip() + formatted_history.append({"input": input_line, "output": output_line}) + + + # 记忆流转换成字符串 + result += "-以下是与用户说话关连度最高的记忆:\n" + for i in range(len(formatted_history)): + if i >= 3: + break + line = formatted_history[i] + result += f"--input:{line['input']}\n--output:{line['output']}\n" + if len(formatted_history) == 0: + result += "--没有记录\n" + + + #添加内存记忆 + formatted_history = [] + for line in self.chat_history: + formatted_history.append({"input": line[0], "output": line[1]}) + + #格式化内存记忆字符串 + result += "\n-以下刚刚的对话:\n" + for i in range(len(formatted_history)): + line = formatted_history[i] + result += f"--input:{line['input']}\n--output:{line['output']}\n" + if len(formatted_history) == 0: + result += "--没有记录\n" + + return result + + + def get_llm_chain(self, history): + tools_prompt = "[" + tool_names = [tool.name for tool in self.tools if tool.name != SetChatStatus().name and tool.name != Say().name] + tools_prompt += "、".join(tool_names) + "]" + template = """ +你是一个智能家居系统中的AI,负责协助主人处理日常事务和智能设备的操作。当主人提出要求时,如果需要使用特定的工具或执行特定的操作,请严格回复“agent: {human_input}”字符串。如果主人只是进行普通对话或询问信息,直接以文本内容回答即可。你可以使用的工具或执行的任务包括:。 +""" + tools_prompt + "等。" +""" +现在时间是:now_time +请依据以下信息回复主人: +chat_history + +input: +{human_input} +output:""".replace("chat_history", history).replace("now_time", QueryTime().run("")) + prompt = PromptTemplate( + input_variables=["human_input"], template=template + ) + + llm_chain = LLMChain( + llm=self.llm, + prompt=prompt, + verbose=True + ) + return llm_chain + + def run(self, input_text): + self.is_use_say_tool = False + self.say_tool_text = "" + + result = "" + history = self.agent_memory.load_memory_variables({"input":input_text.replace('主人语音说了:', '').replace('主人文字说了:', '')}) + history = self.format_history_str(history) try: #判断执行聊天模式还是agent模式,双模式在运行过程中会主动切换 if self.is_chat: - result = self.llm_chain.predict(human_input=input_text.replace('主人语音说了:', '').replace('主人文字说了:', '')) + llm_chain = self.get_llm_chain(history) + with get_openai_callback() as cb: + result = llm_chain.predict(human_input=input_text.replace('主人语音说了:', '').replace('主人文字说了:', '')) + self.total_tokens = self.total_tokens + cb.total_tokens + self.total_cost = self.total_cost + cb.total_cost + util.log(1, "本次消耗token:{}, Cost (USD):{},共消耗token:{}, Cost (USD):{}".format(cb.total_tokens, cb.total_cost, self.total_tokens, self.total_cost)) + if "agent:" in result.lower() or not self.is_chat: - print(result) - print(self.is_chat) self.is_chat = False - input_text = result if result.lower().replace("agent:", "") else input_text - result = self.agent.run(input_text) + input_text = result.lower().replace("agent:", "") if "agent:" in result.lower() else input_text.replace('主人语音说了:', '').replace('主人文字说了:', '') + agent_prompt = """ +现在时间是:{now_time}。请依据以下信息为主人服务 : +{history} +input:{input_text} +output: +""".format(history=history, input_text=input_text, now_time=QueryTime().run("")) + print(agent_prompt) + with get_openai_callback() as cb: + result = self.agent.run(agent_prompt) + self.total_tokens = self.total_tokens + cb.total_tokens + self.total_cost = self.total_cost + cb.total_cost + util.log(1, "本次消耗token:{}, Cost (USD):{},共消耗token:{}, Cost (USD):{}".format(cb.total_tokens, cb.total_cost, self.total_tokens, self.total_cost)) + except Exception as e: print(e) + result = "执行完毕" if result is None or result == "N/A" else result - + chat_text = self.say_tool_text if self.is_use_say_tool else result - #消息保存 - contentdb.add_content('fay','agent', result) - wsa_server.get_web_instance().add_cmd({"panelReply": {"type":"fay","content":result}}) - - return result + #保存到记忆流和聊天对话 + self.agent_memory.save_context({"input": input_text.replace('主人语音说了:', '').replace('主人文字说了:', '')},{"output": result}) + self.chat_history.append((input_text.replace('主人语音说了:', '').replace('主人文字说了:', ''), chat_text)) + if len(self.chat_history) > 5: + self.chat_history.pop(0) + + return self.is_use_say_tool, chat_text if __name__ == "__main__": agent = FayAgentCore() diff --git a/agent/tools/Say.py b/agent/tools/Say.py index ae59bdf..66868aa 100644 --- a/agent/tools/Say.py +++ b/agent/tools/Say.py @@ -21,9 +21,11 @@ class Say(BaseTool): def _run(self, para: str) -> str: + agent_service.agent.is_chat = True + agent_service.agent.is_use_say_tool = True + agent_service.agent.say_tool_text = para interact = Interact("audio", 1, {'user': '', 'msg': para}) fay_booter.feiFei.on_interact(interact) - agent_service.agent.is_chat = True return "语音输出了:" + para diff --git a/core/fay_core.py b/core/fay_core.py index eb75723..ba90676 100644 --- a/core/fay_core.py +++ b/core/fay_core.py @@ -10,10 +10,8 @@ import logging # 适应模型使用 import numpy as np -# import tensorflow as tf import fay_booter from ai_module import xf_ltp -# from ai_module.ms_tts_sdk import Speech from ai_module.openai_tts import Speech from core import wsa_server from core.interact import Interact @@ -27,6 +25,7 @@ import platform from ai_module import yolov8 from agent import agent_service import fay_booter +from core.content_db import Content_Db if platform.system() == "Windows": import sys sys.path.append("test/ovr_lipsync") @@ -38,6 +37,11 @@ def send_for_answer(msg): #记录运行时间 fay_booter.feiFei.last_quest_time = time.time() + #消息保存 + contentdb = Content_Db() + contentdb.add_content('member', 'agent', msg.replace('主人语音说了:', '').replace('主人文字说了:', '')) + wsa_server.get_web_instance().add_cmd({"panelReply": {"type":"member","content":msg.replace('主人语音说了:', '').replace('主人文字说了:', '')}}) + # 发送给数字人端 if not config_util.config["interact"]["playSound"]: content = {'Topic': 'Unreal', 'Data': {'Key': 'question', 'Value': msg}} @@ -49,14 +53,19 @@ def send_for_answer(msg): content = {'Topic': 'Unreal', 'Data': {'Key': 'log', 'Value': "思考中..."}} wsa_server.get_instance().add_cmd(content) - #agent 处理 - text = agent_service.agent.run(msg) + #agent 或llm chain处理 + is_use_say_tool, text = agent_service.agent.run(msg) - #聊天模式语音输入语音输出 - if text and "语音" in msg and agent_service.agent.is_chat: + #语音输入强制语音输出 + if text and "语音说了" in msg and not is_use_say_tool: interact = Interact("audio", 1, {'user': '', 'msg': text}) fay_booter.feiFei.on_interact(interact) + #消息保存 + contentdb.add_content('fay','agent', text) + wsa_server.get_web_instance().add_cmd({"panelReply": {"type":"fay","content":text}}) + util.log(1, 'ReAct Agent或LLM Chain处理总时长:{} ms'.format(math.floor((time.time() - fay_booter.feiFei.last_quest_time) * 1000))) + #推送数字人 if not cfg.config["interact"]["playSound"]: content = {'Topic': 'Unreal', 'Data': {'Key': 'log', 'Value': text}} @@ -294,7 +303,7 @@ class FeiFei: except Exception as serr: util.log(1,"远程音频输入输出设备已经断开:{}".format(serr)) self.deviceConnect = None - time.sleep(1) + time.sleep(5) def __accept_audio_device_output_connect(self): self.deviceSocket = socket.socket(socket.AF_INET,socket.SOCK_STREAM) diff --git a/core/recorder.py b/core/recorder.py index b83b10e..b65b968 100644 --- a/core/recorder.py +++ b/core/recorder.py @@ -42,16 +42,14 @@ class Recorder: if cfg.config['source']['wake_word_enabled']: self.timer = threading.Timer(60, self.reset_wakeup_status) # 60秒后执行reset_wakeup_status方法 - def asrclient(self): + asrcli = None if self.ASRMode == "ali": asrcli = ALiNls() elif self.ASRMode == "funasr": asrcli = FunASR() return asrcli - - def __get_history_average(self, number): total = 0 num = 0 diff --git a/images/you1.png b/images/you1.png new file mode 100644 index 0000000..15cdfda Binary files /dev/null and b/images/you1.png differ diff --git a/images/you2.png b/images/you2.png new file mode 100644 index 0000000..5e626cb Binary files /dev/null and b/images/you2.png differ diff --git a/images/you3.png b/images/you3.png new file mode 100644 index 0000000..5611b13 Binary files /dev/null and b/images/you3.png differ diff --git a/images/you4.png b/images/you4.png new file mode 100644 index 0000000..c533a31 Binary files /dev/null and b/images/you4.png differ diff --git a/images/you5.png b/images/you5.png new file mode 100644 index 0000000..f869550 Binary files /dev/null and b/images/you5.png differ diff --git a/images/you6.png b/images/you6.png new file mode 100644 index 0000000..9ac3e47 Binary files /dev/null and b/images/you6.png differ