57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
|
import json
|
||
|
import requests
|
||
|
# from core import content_db
|
||
|
|
||
|
class VllmGPT:
|
||
|
|
||
|
def __init__(self, host="127.0.0.1",
|
||
|
port="8000",
|
||
|
model="THUDM/chatglm3-6b",
|
||
|
max_tokens="1024"):
|
||
|
self.host = host
|
||
|
self.port = port
|
||
|
self.model=model
|
||
|
self.max_tokens=max_tokens
|
||
|
self.__URL = "http://{}:{}/v1/completions".format(self.host, self.port)
|
||
|
self.__URL2 = "http://{}:{}/v1/chat/completions".format(self.host, self.port)
|
||
|
|
||
|
def question(self,cont):
|
||
|
chat_list = []
|
||
|
url = "http://127.0.0.1:8101/v1/completions"
|
||
|
req = json.dumps({
|
||
|
"model": "THUDM/chatglm3-6b",
|
||
|
"prompt": cont,
|
||
|
"max_tokens": 768,
|
||
|
"temperature": 0})
|
||
|
print(url)
|
||
|
print(req)
|
||
|
|
||
|
headers = {'content-type': 'application/json'}
|
||
|
r = requests.post(url, headers=headers, data=req)
|
||
|
res = json.loads(r.text)
|
||
|
|
||
|
return res['choices'][0]['text']
|
||
|
|
||
|
def question2(self,cont):
|
||
|
chat_list = []
|
||
|
current_chat={"role": "user", "content": cont}
|
||
|
chat_list.append(current_chat)
|
||
|
content = {
|
||
|
"model": self.model,
|
||
|
"messages": chat_list,
|
||
|
"max_tokens": 768,
|
||
|
"temperature": 0.3,
|
||
|
"user":"live-virtual-digital-person"}
|
||
|
url = self.__URL2
|
||
|
req = json.dumps(content)
|
||
|
headers = {'content-type': 'application/json', 'Authorization': 'Bearer '}
|
||
|
r = requests.post(url, headers=headers, json=content)
|
||
|
res = json.loads(r.text)
|
||
|
|
||
|
return res['choices'][0]['message']['content']
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
vllm = VllmGPT('127.0.0.1','8101','Qwen-7B-Chat')
|
||
|
req = vllm.question2("你叫什么名字啊今年多大了")
|
||
|
print(req)
|