olivebot/llm/agent/tools/WebPageScraper.py
xszyou 87ed1c4425 Fay年翻更新
- 升级Agent(chat_module=agent切换):升级到langgraph react agent逻辑、集成到主分支fay中、基于自动决策工具调用机制、基于日程跟踪的主动沟通、支持外部观测数据传入;
- 修复因线程同步问题导致的配置文件读写不稳定
- 聊天采纳功能的bug修复
2024-11-20 23:44:47 +08:00

35 lines
1.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from bs4 import BeautifulSoup
import abc
from typing import Any
from langchain.tools import BaseTool
import requests
class WebPageScraper(BaseTool, abc.ABC):
name: str = "WebPageScraper"
description: str = "此工具用于获取网页内容使用时请传入需要查询的网页地址作为参数https://www.baidu.com/。"
def __init__(self):
super().__init__()
async def _arun(self, *args: Any, **kwargs: Any) -> Any:
# 用例中没有用到 arun 不予具体实现
pass
def _run(self, para) -> str:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
try:
response = requests.get(para, headers=headers, timeout=10, verify=True)
soup = BeautifulSoup(response.text, 'html.parser')
return soup
except requests.exceptions.SSLCertVerificationError:
return 'SSL证书验证失败'
except requests.exceptions.Timeout:
return '请求超时'
except Exception as e:
print("Http Error:", e)
return '无法获取该网页内容'
if __name__ == "__main__":
tool = WebPageScraper()
result = tool.run("https://book.douban.com/review/14636204")
print(result)