olivebot/llm/nlp_privategpt.py

import hashlib
import os
from pgpt_python.client import PrivateGPTApi

client = PrivateGPTApi(base_url="http://127.0.0.1:8001")

index_name = "knowledge_data"
folder_path = "llm/privategpt/knowledge_base"
local_persist_path = "llm/privategpt"
md5_file_path = os.path.join(local_persist_path, "pdf_md5.txt")

def generate_file_md5(file_path):
    hasher = hashlib.md5()
    with open(file_path, 'rb') as afile:
        buf = afile.read()
        hasher.update(buf)
    return hasher.hexdigest()

def load_md5_list():
    if os.path.exists(md5_file_path):
        with open(md5_file_path, 'r') as file:
            return {line.split(",")[0]: line.split(",")[1].strip() for line in file}
    return {}

def update_md5_list(file_name, md5_value):
    md5_list = load_md5_list()
    md5_list[file_name] = md5_value
    with open(md5_file_path, 'w') as file:
        for name, md5 in md5_list.items():
            file.write(f"{name},{md5}\n")

def load_all_pdfs(folder_path):
    md5_list = load_md5_list()
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".pdf"):
            file_path = os.path.join(folder_path, file_name)
            file_md5 = generate_file_md5(file_path)
            if file_name not in md5_list or md5_list[file_name] != file_md5:
                print(f"正在上传 {file_name} 到服务器...")
                with open(file_path, "rb") as f:
                    try:
                        ingested_file_doc_id = client.ingestion.ingest_file(file=f).data[0].doc_id
                        print(f"Ingested file doc id: {ingested_file_doc_id}")
                        update_md5_list(file_name, file_md5)
                    except Exception as e:
                        print(f"上传 {file_name} 失败: {e}")


def question(cont, uid=0, observation=""):
    load_all_pdfs(folder_path)
    text = client.contextual_completions.prompt_completion(
        prompt=cont
    ).choices[0].message.content
    return text


def save_all():
    load_all_pdfs(folder_path)

if __name__ == "__main__":
    print(question("土豆怎么做"))
年翻更新 - 全新ui - 全面优化websocket逻辑，提高数字人和ui连接的稳定性及资源开销 - 全面优化唤醒逻辑，提供稳定的普通唤醒模式和前置词唤醒模式 - 优化拾音质量，支持多声道麦克风拾音 - 优化自动播放服务器的对接机制，提供稳定和兼容旧版ue工程的对接模式 - 数字人接口输出机器人表情，以适应新fay ui及单片机的数字人表情输出 - 使用更高级的音频时长计算方式，可以更精准控制音频播放完成后的逻辑 - 修复点击关闭按钮会导致程序退出的bug - 修复没有麦克风的设备开启麦克风会出错的问题 - 为服务器主机地址提供配置项，以方便服务器部署 2024-10-26 11:34:55 +08:00			`import hashlib`
			`import os`
			`from pgpt_python.client import PrivateGPTApi`

			`client = PrivateGPTApi(base_url="http://127.0.0.1:8001")`

			`index_name = "knowledge_data"`
			`folder_path = "llm/privategpt/knowledge_base"`
			`local_persist_path = "llm/privategpt"`
			`md5_file_path = os.path.join(local_persist_path, "pdf_md5.txt")`

			`def generate_file_md5(file_path):`
			`hasher = hashlib.md5()`
			`with open(file_path, 'rb') as afile:`
			`buf = afile.read()`
			`hasher.update(buf)`
			`return hasher.hexdigest()`

			`def load_md5_list():`
			`if os.path.exists(md5_file_path):`
			`with open(md5_file_path, 'r') as file:`
			`return {line.split(",")[0]: line.split(",")[1].strip() for line in file}`
			`return {}`

			`def update_md5_list(file_name, md5_value):`
			`md5_list = load_md5_list()`
			`md5_list[file_name] = md5_value`
			`with open(md5_file_path, 'w') as file:`
			`for name, md5 in md5_list.items():`
			`file.write(f"{name},{md5}\n")`

			`def load_all_pdfs(folder_path):`
			`md5_list = load_md5_list()`
			`for file_name in os.listdir(folder_path):`
			`if file_name.endswith(".pdf"):`
			`file_path = os.path.join(folder_path, file_name)`
			`file_md5 = generate_file_md5(file_path)`
			`if file_name not in md5_list or md5_list[file_name] != file_md5:`
			`print(f"正在上传 {file_name} 到服务器...")`
			`with open(file_path, "rb") as f:`
			`try:`
			`ingested_file_doc_id = client.ingestion.ingest_file(file=f).data[0].doc_id`
			`print(f"Ingested file doc id: {ingested_file_doc_id}")`
			`update_md5_list(file_name, file_md5)`
			`except Exception as e:`
			`print(f"上传 {file_name} 失败: {e}")`


年翻更新 1、修复服务器ip配置，配置页没替换问题； 2、修复开启状态偶尔没对齐问题； 3、修复关闭时关闭按钮停留在关闭中问题； 4、修复星座读取错误问题； 5、修复刷新重复提醒开启问题； 6、新增是否进行语音合成的选择； 7、文字沟通接口加入“观察描述”； 8、聊天记录时间改为毫秒级； 9、补充数字人和远程音频的连接状态显示； 10、修复备注填写无法保存问题。 2024-10-30 19:11:15 +08:00			`def question(cont, uid=0, observation=""):`
年翻更新 - 全新ui - 全面优化websocket逻辑，提高数字人和ui连接的稳定性及资源开销 - 全面优化唤醒逻辑，提供稳定的普通唤醒模式和前置词唤醒模式 - 优化拾音质量，支持多声道麦克风拾音 - 优化自动播放服务器的对接机制，提供稳定和兼容旧版ue工程的对接模式 - 数字人接口输出机器人表情，以适应新fay ui及单片机的数字人表情输出 - 使用更高级的音频时长计算方式，可以更精准控制音频播放完成后的逻辑 - 修复点击关闭按钮会导致程序退出的bug - 修复没有麦克风的设备开启麦克风会出错的问题 - 为服务器主机地址提供配置项，以方便服务器部署 2024-10-26 11:34:55 +08:00			`load_all_pdfs(folder_path)`
			`text = client.contextual_completions.prompt_completion(`
			`prompt=cont`
			`).choices[0].message.content`
			`return text`


			`def save_all():`
			`load_all_pdfs(folder_path)`

			`if __name__ == "__main__":`
			`print(question("土豆怎么做"))`