OliveSensorAPI/deploy/api-file.py
এ許我辞忧࿐♡ 8d8cb07c59
Add files via upload
添加本地部署fastapi
2024-03-14 23:56:18 +08:00

86 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import uvicorn
import json
import datetime
import torch
# 设置设备参数
DEVICE = "cuda" # 使用CUDA
DEVICE_ID = "0" # CUDA设备ID如果未设置则为空
CUDA_DEVICE = f"{DEVICE}:{DEVICE_ID}" if DEVICE_ID else DEVICE # 组合CUDA设备信息
# 加载模型
from transformers.utils import logging
from openxlab.model import download
logger = logging.get_logger(__name__)
# 可修改
download(model_repo='ajupyter/EmoLLM_aiwei',
output='model')
# 清理GPU内存函数
def torch_gc():
if torch.cuda.is_available(): # 检查是否可用CUDA
with torch.cuda.device(CUDA_DEVICE): # 指定CUDA设备
torch.cuda.empty_cache() # 清空CUDA缓存
torch.cuda.ipc_collect() # 收集CUDA内存碎片
# 创建FastAPI应用
app = FastAPI()
# 处理POST请求的端点
@app.post("/")
async def create_item(request: Request):
global model, tokenizer # 声明全局变量以便在函数内部使用模型和分词器
json_post_raw = await request.json() # 获取POST请求的JSON数据
json_post = json.dumps(json_post_raw) # 将JSON数据转换为字符串
json_post_list = json.loads(json_post) # 将字符串转换为Python对象
prompt = json_post_list.get('prompt') # 获取请求中的提示
history = json_post_list.get('history') # 获取请求中的历史记录
max_length = json_post_list.get('max_length') # 获取请求中的最大长度
top_p = json_post_list.get('top_p') # 获取请求中的top_p参数
temperature = json_post_list.get('temperature') # 获取请求中的温度参数
# 调用模型进行对话生成
response, history = model.chat(
tokenizer,
prompt,
history=history,
max_length=max_length if max_length else 2048, # 如果未提供最大长度默认使用2048
top_p=top_p if top_p else 0.7, # 如果未提供top_p参数默认使用0.7
temperature=temperature if temperature else 0.95 # 如果未提供温度参数默认使用0.95
)
now = datetime.datetime.now() # 获取当前时间
time = now.strftime("%Y-%m-%d %H:%M:%S") # 格式化时间为字符串
# 构建响应JSON
answer = {
"response": response,
"history": history,
"status": 200,
"time": time
}
# 构建日志信息
log = "[" + time + "] " + '", prompt:"' + prompt + '", response:"' + repr(response) + '"'
print(log) # 打印日志
torch_gc() # 执行GPU内存清理
return answer # 返回响应
# 主函数入口
if __name__ == '__main__':
# 加载预训练的分词器和模型
tokenizer = AutoTokenizer.from_pretrained("model", trust_remote_code=True)
model = (
AutoModelForCausalLM.from_pretrained("model", device_map="auto", trust_remote_code=True)
.to(torch.bfloat16)
.cuda()
)
# model = AutoModelForCausalLM.from_pretrained("model", device_map="auto", trust_remote_code=True).eval()
model.generation_config = GenerationConfig(max_length=2048, top_p=0.7, temperature=0.95) # 可指定
model.eval() # 设置模型为评估模式
# 启动FastAPI应用
# 用6006端口可以将autodl的端口映射到本地从而在本地使用api
uvicorn.run(app, host='127.0.0.1', port=6006, workers=1) # 在指定端口和主机上启动应用