OliveSensorAPI/web_internlm2.py

271 lines
11 KiB
Python
Raw Normal View History

"""
This script refers to the dialogue example of streamlit, the interactive generation code of chatglm2 and transformers.
We mainly modified part of the code logic to adapt to the generation of our model.
Please refer to these links below for more information:
1. streamlit chat example: https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
2. chatglm2: https://github.com/THUDM/ChatGLM2-6B
3. transformers: https://github.com/huggingface/transformers
Please run with the command `streamlit run path/to/web_demo.py --server.address=0.0.0.0 --server.port 7860`.
Using `python path/to/web_demo.py` may cause unknown problems.
"""
import copy
Update code (#8) * feat: add agents/actions/write_markdown * [ADD] add evaluation result of base model on 5/10 epochs * Rename mother.json to mother_v1_2439.json * Add files via upload * [DOC] update README * Update requirements.txt update mpi4py installation * Update README_EN.md update English comma * Update README.md 基于母亲角色的多轮对话模型微调完毕。已上传到 Huggingface。 * 多轮对话母亲角色的微调的脚本 * Update README.md 加上了王几行XING 和 思在 的作者信息 * Update README_EN.md * Update README.md * Update README_EN.md * Update README_EN.md * Changes to be committed: modified: .gitignore modified: README.md modified: README_EN.md new file: assets/EmoLLM_transparent.png deleted: assets/Shusheng.jpg new file: assets/Shusheng.png new file: assets/aiwei_demo1.gif new file: assets/aiwei_demo2.gif new file: assets/aiwei_demo3.gif new file: assets/aiwei_demo4.gif * Update README.md rectify aiwei_demo.gif * Update README.md rectify aiwei_demo style * Changes to be committed: modified: README.md modified: README_EN.md * Changes to be committed: modified: README.md modified: README_EN.md * [Doc] update readme * [Doc] update readme * Update README.md * Update README_EN.md * Update README.md * Update README_EN.md * Delete datasets/mother_v1_2439.json * Rename mother_v2_3838.json to mother_v2.json * Delete datasets/mother_v2.json * Add files via upload * Update README.md * Update README_EN.md * [Doc] Update README_EN.md minor fix * InternLM2-Base-7B QLoRA微调模型 链接和测评结果更新 * add download_model.py script, automatic download of model libraries * 清除图片的黑边、更新作者信息 modified: README.md new file: assets/aiwei_demo.gif deleted: assets/aiwei_demo1.gif modified: assets/aiwei_demo2.gif modified: assets/aiwei_demo3.gif modified: assets/aiwei_demo4.gif * rectify aiwei_demo transparent * transparent * modify: aiwei_demo table--->div * modified: aiwei_demo * modify: div ---> table * modified: README.md * modified: README_EN.md * update model config file links * Create internlm2_20b_chat_lora_alpaca_e3.py 20b模型的配置文件 * update model config file links update model config file links * Revert "update model config file links" --------- Co-authored-by: jujimeizuo <fengzetao.zed@foxmail.com> Co-authored-by: xzw <62385492+aJupyter@users.noreply.github.com> Co-authored-by: Zeyu Ba <72795264+ZeyuBa@users.noreply.github.com> Co-authored-by: Bryce Wang <90940753+brycewang2018@users.noreply.github.com> Co-authored-by: zealot52099 <songyan5209@163.com> Co-authored-by: HongCheng <kwchenghong@gmail.com> Co-authored-by: Yicong <yicooong@qq.com> Co-authored-by: Yicooong <54353406+Yicooong@users.noreply.github.com> Co-authored-by: aJupyter <ajupyter@163.com> Co-authored-by: MING_X <119648793+MING-ZCH@users.noreply.github.com> Co-authored-by: Ikko Eltociear Ashimine <eltociear@gmail.com> Co-authored-by: HatBoy <null2none@163.com> Co-authored-by: ZhouXinAo <142309012+zxazys@users.noreply.github.com>
2024-04-14 10:09:17 +08:00
import os
import warnings
from dataclasses import asdict, dataclass
2024-04-14 12:22:35 +08:00
from rag.src.pipeline import EmoLLMRAG
from typing import Callable, List, Optional
import streamlit as st
import torch
from torch import nn
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList
from transformers.utils import logging
from transformers import AutoTokenizer, AutoModelForCausalLM # isort: skip
from openxlab.model import download
logger = logging.get_logger(__name__)
Update code (#8) * feat: add agents/actions/write_markdown * [ADD] add evaluation result of base model on 5/10 epochs * Rename mother.json to mother_v1_2439.json * Add files via upload * [DOC] update README * Update requirements.txt update mpi4py installation * Update README_EN.md update English comma * Update README.md 基于母亲角色的多轮对话模型微调完毕。已上传到 Huggingface。 * 多轮对话母亲角色的微调的脚本 * Update README.md 加上了王几行XING 和 思在 的作者信息 * Update README_EN.md * Update README.md * Update README_EN.md * Update README_EN.md * Changes to be committed: modified: .gitignore modified: README.md modified: README_EN.md new file: assets/EmoLLM_transparent.png deleted: assets/Shusheng.jpg new file: assets/Shusheng.png new file: assets/aiwei_demo1.gif new file: assets/aiwei_demo2.gif new file: assets/aiwei_demo3.gif new file: assets/aiwei_demo4.gif * Update README.md rectify aiwei_demo.gif * Update README.md rectify aiwei_demo style * Changes to be committed: modified: README.md modified: README_EN.md * Changes to be committed: modified: README.md modified: README_EN.md * [Doc] update readme * [Doc] update readme * Update README.md * Update README_EN.md * Update README.md * Update README_EN.md * Delete datasets/mother_v1_2439.json * Rename mother_v2_3838.json to mother_v2.json * Delete datasets/mother_v2.json * Add files via upload * Update README.md * Update README_EN.md * [Doc] Update README_EN.md minor fix * InternLM2-Base-7B QLoRA微调模型 链接和测评结果更新 * add download_model.py script, automatic download of model libraries * 清除图片的黑边、更新作者信息 modified: README.md new file: assets/aiwei_demo.gif deleted: assets/aiwei_demo1.gif modified: assets/aiwei_demo2.gif modified: assets/aiwei_demo3.gif modified: assets/aiwei_demo4.gif * rectify aiwei_demo transparent * transparent * modify: aiwei_demo table--->div * modified: aiwei_demo * modify: div ---> table * modified: README.md * modified: README_EN.md * update model config file links * Create internlm2_20b_chat_lora_alpaca_e3.py 20b模型的配置文件 * update model config file links update model config file links * Revert "update model config file links" --------- Co-authored-by: jujimeizuo <fengzetao.zed@foxmail.com> Co-authored-by: xzw <62385492+aJupyter@users.noreply.github.com> Co-authored-by: Zeyu Ba <72795264+ZeyuBa@users.noreply.github.com> Co-authored-by: Bryce Wang <90940753+brycewang2018@users.noreply.github.com> Co-authored-by: zealot52099 <songyan5209@163.com> Co-authored-by: HongCheng <kwchenghong@gmail.com> Co-authored-by: Yicong <yicooong@qq.com> Co-authored-by: Yicooong <54353406+Yicooong@users.noreply.github.com> Co-authored-by: aJupyter <ajupyter@163.com> Co-authored-by: MING_X <119648793+MING-ZCH@users.noreply.github.com> Co-authored-by: Ikko Eltociear Ashimine <eltociear@gmail.com> Co-authored-by: HatBoy <null2none@163.com> Co-authored-by: ZhouXinAo <142309012+zxazys@users.noreply.github.com>
2024-04-14 10:09:17 +08:00
if not os.path.isdir("model"):
print("[ERROR] not find model dir")
exit(0)
@dataclass
class GenerationConfig:
# this config is used for chat to provide more diversity
max_length: int = 32768
top_p: float = 0.8
temperature: float = 0.8
do_sample: bool = True
repetition_penalty: float = 1.005
@torch.inference_mode()
def generate_interactive(
model,
tokenizer,
prompt,
generation_config: Optional[GenerationConfig] = None,
logits_processor: Optional[LogitsProcessorList] = None,
stopping_criteria: Optional[StoppingCriteriaList] = None,
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
additional_eos_token_id: Optional[int] = None,
**kwargs,
):
inputs = tokenizer([prompt], padding=True, return_tensors="pt")
input_length = len(inputs["input_ids"][0])
for k, v in inputs.items():
inputs[k] = v.cuda()
input_ids = inputs["input_ids"]
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1] # noqa: F841 # pylint: disable=W0612
if generation_config is None:
generation_config = model.generation_config
generation_config = copy.deepcopy(generation_config)
model_kwargs = generation_config.update(**kwargs)
bos_token_id, eos_token_id = ( # noqa: F841 # pylint: disable=W0612
generation_config.bos_token_id,
generation_config.eos_token_id,
)
if isinstance(eos_token_id, int):
eos_token_id = [eos_token_id]
if additional_eos_token_id is not None:
eos_token_id.append(additional_eos_token_id)
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
if has_default_max_length and generation_config.max_new_tokens is None:
warnings.warn(
f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
"This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
" recommend using `max_new_tokens` to control the maximum length of the generation.",
UserWarning,
)
elif generation_config.max_new_tokens is not None:
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
if not has_default_max_length:
logger.warn( # pylint: disable=W4902
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
"Please refer to the documentation for more information. "
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
UserWarning,
)
if input_ids_seq_length >= generation_config.max_length:
input_ids_string = "input_ids"
logger.warning(
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
" increasing `max_new_tokens`."
)
# 2. Set generation parameters if not already defined
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
logits_processor = model._get_logits_processor(
generation_config=generation_config,
input_ids_seq_length=input_ids_seq_length,
encoder_input_ids=input_ids,
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
logits_processor=logits_processor,
)
stopping_criteria = model._get_stopping_criteria(
generation_config=generation_config, stopping_criteria=stopping_criteria
)
logits_warper = model._get_logits_warper(generation_config)
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
scores = None
while True:
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
# forward pass to get next token
outputs = model(
**model_inputs,
return_dict=True,
output_attentions=False,
output_hidden_states=False,
)
next_token_logits = outputs.logits[:, -1, :]
# pre-process distribution
next_token_scores = logits_processor(input_ids, next_token_logits)
next_token_scores = logits_warper(input_ids, next_token_scores)
# sample
probs = nn.functional.softmax(next_token_scores, dim=-1)
if generation_config.do_sample:
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
else:
next_tokens = torch.argmax(probs, dim=-1)
# update generated ids, model inputs, and length for next step
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
model_kwargs = model._update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder=False)
unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
output_token_ids = input_ids[0].cpu().tolist()
output_token_ids = output_token_ids[input_length:]
for each_eos_token_id in eos_token_id:
if output_token_ids[-1] == each_eos_token_id:
output_token_ids = output_token_ids[:-1]
response = tokenizer.decode(output_token_ids)
yield response
# stop when each sentence is finished, or if we exceed the maximum length
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
break
def on_btn_click():
del st.session_state.messages
@st.cache_resource
def load_model():
model = (
AutoModelForCausalLM.from_pretrained("model", trust_remote_code=True)
.to(torch.bfloat16)
.cuda()
)
tokenizer = AutoTokenizer.from_pretrained("model", trust_remote_code=True)
return model, tokenizer
def prepare_generation_config():
with st.sidebar:
max_length = st.slider("Max Length", min_value=8, max_value=32768, value=32768)
top_p = st.slider("Top P", 0.0, 1.0, 0.8, step=0.01)
temperature = st.slider("Temperature", 0.0, 1.0, 0.7, step=0.01)
st.button("Clear Chat History", on_click=on_btn_click)
generation_config = GenerationConfig(max_length=max_length, top_p=top_p, temperature=temperature)
return generation_config
user_prompt = "<|im_start|>user\n{user}<|im_end|>\n"
robot_prompt = "<|im_start|>assistant\n{robot}<|im_end|>\n"
cur_query_prompt = "<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n"
2024-04-14 12:22:35 +08:00
def combine_history(prompt, retrieval_content=''):
messages = st.session_state.messages
2024-04-14 12:22:35 +08:00
prompt = f"你需要根据以下从书本中检索到的专业知识:`{retrieval_content}`。从一个心理专家的专业角度来回答后续提问:{prompt}"
meta_instruction = (
"你是一个由aJupyter、Farewell、jujimeizuo、Smiling&Weeping研发排名按字母顺序排序不分先后、散步提供技术支持、上海人工智能实验室提供支持开发的心理健康大模型。现在你是一个心理专家我有一些心理问题请你用专业的知识帮我解决。"
)
total_prompt = f"<s><|im_start|>system\n{meta_instruction}<|im_end|>\n"
for message in messages:
cur_content = message["content"]
if message["role"] == "user":
cur_prompt = user_prompt.format(user=cur_content)
elif message["role"] == "robot":
cur_prompt = robot_prompt.format(robot=cur_content)
else:
raise RuntimeError
total_prompt += cur_prompt
total_prompt = total_prompt + cur_query_prompt.format(user=prompt)
return total_prompt
def main():
# torch.cuda.empty_cache()
print("load model begin.")
model, tokenizer = load_model()
2024-04-14 12:22:35 +08:00
rag_obj = EmoLLMRAG(model)
print("load model end.")
user_avator = "assets/user.png"
robot_avator = "assets/robot.jpeg"
st.title("EmoLLM")
generation_config = prepare_generation_config()
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"], avatar=message.get("avatar")):
st.markdown(message["content"])
# Accept user input
if prompt := st.chat_input("What is up?"):
# Display user message in chat message container
2024-04-14 12:22:35 +08:00
retrieval_content = rag_obj.get_retrieval_content(prompt)
with st.chat_message("user", avatar=user_avator):
st.markdown(prompt)
2024-04-14 12:22:35 +08:00
#st.markdown(retrieval_content)
real_prompt = combine_history(prompt, retrieval_content)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt, "avatar": user_avator})
with st.chat_message("robot", avatar=robot_avator):
message_placeholder = st.empty()
for cur_response in generate_interactive(
model=model,
tokenizer=tokenizer,
prompt=real_prompt,
additional_eos_token_id=92542,
**asdict(generation_config),
):
# Display robot response in chat message container
message_placeholder.markdown(cur_response + "")
message_placeholder.markdown(cur_response) # pylint: disable=undefined-loop-variable
# Add robot response to chat history
st.session_state.messages.append(
{
"role": "robot",
"content": cur_response, # pylint: disable=undefined-loop-variable
"avatar": robot_avator,
}
)
torch.cuda.empty_cache()
if __name__ == "__main__":
main()